From af911c4e1026f20dbe4413a1588621980b324f76 Mon Sep 17 00:00:00 2001 From: SreeHarshaNelaturu Date: Tue, 10 Feb 2026 01:16:08 +0100 Subject: [PATCH 1/4] bump global-mmlu-lite and hfopenllm_v2 to v0.2.0 --- .../40094cf6-b187-475d-8f14-abb71d998c2b.json | 108 - data/formatted | 283714 --------------- .../d1b63dce-9740-4347-b7b2-01099fa8b9e7.json | 353 - .../b34d5c62-d44a-44ce-9d14-f97445a407a3.json | 353 - .../462fd172-5786-45a9-a938-361fef294d8b.json | 353 - .../562a23d0-d80a-4564-a68b-6b478817fa0e.json | 353 - .../0e7e63be-9a07-48fd-a525-7378f6d0477f.json | 353 - .../33c55930-eba4-45d1-a214-bfb0338812b3.json | 353 - .../ba5f478e-7484-4c7d-9691-1c4da2aa39a1.json | 353 - .../a00e87b5-bb92-4ff5-aea7-b4e8357663c2.json | 353 - .../a1dabd04-29d3-4170-88f7-ee899b26c24a.json | 353 - .../ea9d0ff1-0801-4de7-a99a-febdcde420fa.json | 353 - .../32612d44-2a0e-44b6-9f23-bdbf8bccb714.json | 353 - .../4d20140d-a955-4927-9140-49fe597519c6.json | 353 - .../658d3005-8fe2-4560-acb9-e2e271b72cea.json | 353 - .../21e8fec0-ea47-4375-9c99-c5a3811296e9.json | 353 - .../dd08c6cc-919b-414d-a97e-025a7b485987.json | 353 - .../75bb331f-e492-4dfd-9f1b-b83cad2f04d9.json | 353 - .../b08417e3-22f1-40e7-a621-f25531972052.json | 353 - .../f5efe093-1cec-4e7f-8413-05039461ed27.json | 353 - .../ab0d8833-09d3-4d42-b1f4-e0d3e410ea7f.json | 353 - .../744ce6aa-57ad-4f39-ac32-6ccce3fb727c.json | 353 - .../c38e906d-d904-4515-8312-76c1082343c3.json | 353 - .../16f3cc58-7107-4443-b872-c8515feb67ef.json | 353 - .../2d0a09db-e97e-4ef7-9987-ef7c933ad721.json | 451 + .../7af30210-b021-49d5-932c-75a9a42a2d08.json | 451 + .../4291c294-8155-4664-aec4-272445cc8862.json | 451 + .../911db593-5c95-41e9-9264-b130be6a9fb1.json | 451 + .../51465d80-23e2-4328-8845-70b373408d65.json | 451 + .../12a16399-1aff-4173-9677-58d0d9e23ea2.json | 451 + .../aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json | 451 + .../803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json | 448 + .../3796f2e5-ee3f-4598-911f-92e8efac92c3.json | 451 + .../b225eef0-9698-4340-bc6d-cece877c8863.json | 451 + .../531fe0ba-1f29-4409-abdb-daad56918fcc.json | 451 + .../9193adbe-0c95-4b5e-a179-4c14e749a75c.json | 451 + .../7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json | 451 + .../548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json | 451 + .../1e2e51d0-42e8-4564-a42c-31819f89f459.json | 451 + .../85822e81-7478-4f63-b7f3-89a78e75c6d9.json | 451 + .../c4b48f92-4f10-4831-86a9-3ede0512bf7b.json | 451 + .../07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json | 451 + .../bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json | 451 + .../1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json | 451 + .../96569c98-0d02-4b32-b915-87b707102913.json | 451 + .../f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json | 451 + .../d3dd93e4-0cfe-4141-a835-3921fb80ed27.json | 353 - .../96c76d71-942b-452b-919b-ad13bd1614d6.json | 353 - .../143d53e6-b34e-4fa8-af3f-8019cef29abb.json | 451 + .../bcfa473c-1686-42af-8d07-4c8b92c3d864.json | 451 + .../a796664d-51fa-49b0-ae93-b446171f5521.json | 1343 - .../0d7928c3-c769-474e-8249-7a5c70c4c559.json | 132 + .../40e80d5e-db72-46b7-bd14-b7d005df4be8.json | 105 - .../0d91a153-1b6b-4891-8722-a5c7e372ba64.json | 105 - .../f63536ed-752b-4538-9b92-2514a617a4bf.json | 132 + .../2192007d-1f6e-4f74-b518-7448ef3a896e.json | 105 - .../8ff13de2-ea43-4392-992f-ba70b6023e96.json | 132 + .../02bac8a7-bd09-4e73-979a-7dbaa7a8ed75.json | 132 + .../e335874b-9b3e-4966-a7e0-22e9d16f8324.json | 105 - .../74e4406d-b2b6-4c3f-b059-f52cccf1fff4.json | 132 + .../8409c158-ef12-4e6c-8a1d-7be2084b3446.json | 105 - .../3452e57f-3023-4e2e-ad84-b09e409fe334.json | 105 - .../ec8a6d6c-b8ea-48a3-9af6-d357e0057ec1.json | 132 + .../05307b41-d832-4533-99bd-c8608bf8e64c.json | 132 + .../1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json | 105 - .../c09bd9b0-6f85-4120-94a9-b628c68bccb7.json | 132 + .../df9d9d44-daa1-4e61-9b46-192380043889.json | 105 - .../090c9691-4b7e-4a98-b9a2-644e21797be4.json | 105 - .../9f971385-1146-4436-91a6-0e52d4db1f07.json | 132 + .../80ed14ca-b4cd-4ceb-8fdb-24705e47bd0e.json | 132 + .../9256c32b-d956-418f-97da-ea78e3ad9e48.json | 105 - .../904d1f91-3153-49d5-afd3-9921bfc086f1.json | 105 - .../db88e3f5-58a9-4783-9093-a6df96483342.json | 132 + .../8cd90f8a-d8dc-469b-95b9-260fcef804d2.json | 132 + .../fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json | 105 - .../5d9b9217-874b-426d-8af4-5105a3b1b3ad.json | 105 - .../b2c82703-2b5c-407d-b84f-a8f8261ac894.json | 132 + .../3ebcbf3d-cb2d-4332-bb8a-1db104033391.json | 105 - .../55462e67-5eca-4e9d-9095-51fcf12de5fa.json | 132 + .../25a119f0-5eaa-4fa9-8cd4-e0f437ada456.json | 132 + .../6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json | 105 - .../1120c801-7736-4d9d-b23d-08eeedb34186.json | 105 - .../efc036b6-d8de-4393-87a1-d4f86fb44d91.json | 132 + .../297419fa-855c-4eae-ad7c-3cf4a0262450.json | 105 - .../a5144406-eb85-43b2-a49d-be6b06d6b04a.json | 132 + .../4299df04-495a-4687-b143-96b1b562d5e8.json | 105 - .../900184ad-656d-416b-956f-5f6e3a991d1b.json | 132 + .../0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json | 105 - .../7a58954a-5d7d-4640-99fd-773249640237.json | 132 + .../4ea3146c-b912-424a-b0a9-7c37348348c8.json | 132 + .../ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json | 105 - .../a48b0864-76b7-4860-a448-942a8d74f68e.json | 105 - .../b0276278-6d86-49c0-a246-cd9110ac1deb.json | 132 + .../04216f67-1385-43bf-b7de-5bae7a60f379.json | 132 + .../fbf7b76b-7ced-4217-8e14-1d02184e271c.json | 132 + .../74ac8aba-6dfb-464c-81b5-d02a9192b9cc.json | 132 + .../295938e1-ade2-4d36-beca-3cbe506b5b90.json | 132 + .../f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json | 105 - .../1347cd1b-2ebc-4223-900f-7c2479e228a3.json | 105 - .../f331782f-ea09-41bd-8c6a-e964c88d7e09.json | 132 + .../b2926dd6-628c-4274-b0e8-1efc64269bb2.json | 105 - .../e4e3d79a-1de9-43be-a029-0be4f60e472b.json | 132 + .../0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json | 105 - .../6914ac28-b543-4f36-81f1-f7491c018e3b.json | 132 + .../b7378f41-46ab-41af-94cc-e7fb10738658.json | 132 + .../acedae59-6192-4ac4-a354-d520ecd6ba36.json | 132 + .../ff105961-761d-4261-8a44-20acf2e7f440.json | 132 + .../74973e37-cd82-4e8a-816a-02b035fabff4.json | 105 - .../fa0901f6-514e-44ae-84dc-0b793f26169e.json | 132 + .../3766e8a0-99ad-4733-a01b-ced446b15eda.json | 105 - .../d2dff5df-343b-40f3-85de-14eb72dab050.json | 132 + .../342ac912-805f-4166-b8f4-10f0503fa892.json | 105 - .../8fa3010f-b7a1-4fc1-9156-ba70453add86.json | 132 + .../58034f99-3b01-46d6-aea9-90c75d073bb0.json | 132 + .../e6c08c9c-6d01-45c7-8a24-219b756b8632.json | 132 + .../cd97ad01-1d20-4cbd-a9bb-2acf3d9fdcc7.json | 132 + .../95f44ef8-e5ba-4bdc-97a7-2c5a678b07be.json | 132 + .../082f25f0-994c-438a-8086-b1e439aca466.json | 132 + .../162b6d5f-f983-4989-9603-f6baea26b633.json | 105 - .../31423cbd-08cd-4079-b1c5-ba412acf1b51.json | 132 + .../2669bd86-da65-4d87-8464-bfa8c741ce0b.json | 132 + .../9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json | 105 - .../ab2c19ff-5671-446f-b09e-731e2ae515ca.json | 132 + .../0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json | 105 - .../36250dc3-cb51-43be-8ab0-6788eb5bda7c.json | 132 + .../1d68bd2e-de6e-4327-a8f1-33322eba537e.json | 105 - .../cd616d6a-151f-4aaa-93b5-9c4a758f95b5.json | 132 + .../9cb09cae-9b1b-43b1-afbf-f44b0a44053c.json | 132 + .../038c32da-add5-4299-ac17-df6ef3fdea58.json | 132 + .../a554a3eb-943c-4135-966b-929129ef025d.json | 105 - .../25eb4bdf-beb4-4ad2-a5e9-3a2f31c46cb5.json | 132 + .../332ccdb5-faf5-47c6-afeb-a91d2148adf0.json | 105 - .../17f7398f-675d-4b38-b233-64fc106737c3.json | 105 - .../77655d60-872f-468a-acc6-d584ef5bf46a.json | 132 + .../4de378c8-ccf6-4f0b-8287-3d138a8645b9.json | 132 + .../7ea9f4db-5b52-40a5-904e-785e43302934.json | 105 - .../8039cadf-6644-44e7-8452-90e9c8069e28.json | 132 + .../8914d89d-c873-4704-998e-dc807e96030b.json | 132 + .../c2e9fc29-db07-4b49-a98a-084158831ac4.json | 132 + .../58724539-6fc5-40d9-ba43-87410959894d.json | 132 + .../76397277-901a-4ad0-9dae-0351ca875ec6.json | 105 - .../81a5aafb-2cf7-490d-b619-ce638fcc8b38.json | 105 - .../b13324cf-f6f5-4bf1-9cf3-c196120c4bcf.json | 132 + .../43c907eb-3e43-47ff-b38d-f912ba6ef46c.json | 105 - .../782b2df0-d1b3-414c-a4bd-59052a4441a9.json | 132 + .../48732edf-8baf-438e-8a5c-763eee6c0c18.json | 105 - .../b508e41e-0f1c-49ce-8b80-5e7ec82b8f15.json | 132 + .../2824e8d4-2749-4b18-a3a1-b987ed215ac6.json | 132 + .../38f169f0-e939-4b12-8f78-b2a27fb90de0.json | 105 - .../53176984-ba93-4a64-b81e-21f6e0f65bcd.json | 132 + .../e8c63728-a1f5-432f-bf9f-204b0f4041aa.json | 105 - .../53252698-7d17-4f2a-9106-3b744ae7a985.json | 132 + .../b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json | 105 - .../1a4477f7-c414-41ab-bbcb-593f4a86031a.json | 105 - .../6dd0f3a2-27ee-48f1-9d97-ef6954d298c8.json | 132 + .../27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json | 105 - .../35f11d5e-88c4-4a95-8d06-a40bee648b00.json | 132 + .../030f17b0-036f-4021-90da-6c1d38da659d.json | 105 - .../ba1193c0-42b8-487d-b9fd-ddbc1fd15359.json | 132 + .../39ea9329-5ed7-46ea-bcc4-30679a63b405.json | 105 - .../95733620-e1e7-4442-b9c3-a699165df5e7.json | 132 + .../cacfce0d-f5f1-4101-8065-f5f02eaab1fb.json | 132 + .../72be5537-198a-43e9-9840-a803083158d3.json | 132 + .../ef37c096-a089-4d3e-9fad-c0f959a18bb3.json | 105 - .../183313de-d526-42a9-a35d-a4e71466e546.json | 105 - .../2e9a3443-970d-4f37-a356-277a11c81754.json | 132 + .../1188402f-aa1c-4306-b031-c92ff0a5dd64.json | 132 + .../7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json | 105 - .../ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json | 105 - .../ee2f567a-6403-46d5-9a6b-bd029f81d660.json | 132 + .../3e967795-680c-4bfc-906b-eadb969cf2bd.json | 105 - .../d809fdff-f5ff-44f5-afc7-7e8af9ce2f93.json | 132 + .../87d66efc-173f-4c14-b76c-d8b7e00d575d.json | 132 + .../c0b7e3e6-4160-4482-af4f-038ae79c7578.json | 105 - .../47f62378-c3cc-408f-a0d1-71eb3f522f57.json | 132 + .../63fc1679-8504-41a0-98d5-2d23aad57b81.json | 105 - .../dba8c12c-388d-4f8b-8ce8-83acfc4920c7.json | 132 + .../d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json | 105 - .../e4087285-1d1a-465e-ac88-91310e939710.json | 132 + .../09f189d9-74fd-47bb-b5fb-7994cba56ae2.json | 132 + .../7aeaf034-1c02-4da7-b7b4-9a27ce759601.json | 105 - .../0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json | 105 - .../5754c262-6ddf-4f54-9722-22ff20a8d76f.json | 132 + .../cc1bd811-ec88-4514-8b47-4140ded4f03d.json | 132 + .../3f08155d-8551-4472-86fe-7988cd6df78b.json | 132 + .../a1c56b87-d8d4-4570-9c33-b84dd066d92f.json | 105 - .../339e12fb-b4a4-4a4b-bb40-899b4ad833f9.json | 132 + .../c6fae489-9bf8-40e5-a602-1c6ce9000537.json | 105 - .../4fd60e9c-5c90-492a-b24d-7ca6d1e91eae.json | 132 + .../7f8d935e-3782-4769-8bd0-ee8a0ce91cd6.json | 132 + .../13716fd0-049a-4e9a-90ca-af9db59c1703.json | 105 - .../6fa07e60-9f82-4abc-aa45-4dfc0bcf9b8d.json | 132 + .../99a0022b-3fe7-4612-9cbb-cf082c1f6b70.json | 132 + .../bedab846-a6b2-4c51-9690-27deb7a76fe7.json | 105 - .../0a5e585d-1a90-4849-9df5-670a56b9f161.json | 105 - .../b1153714-d6fe-4ff9-ab8c-85b677d57f8f.json | 132 + .../5fe88e89-1055-4357-9394-004dd4635e58.json | 105 - .../c3d39b6c-02af-410d-8a5c-224495b04572.json | 132 + .../0426fcba-3db4-492d-b622-e34ab8d3fc8f.json | 132 + .../39e029ad-b385-4b26-9a02-b40c90cd8ad8.json | 105 - .../3402882b-af4e-4509-9d57-32efa5d8c495.json | 105 - .../aa099cfe-ac9a-42dd-8357-f4d8115133ca.json | 132 + .../8239ffac-3fca-4eab-86d4-78bab22dc420.json | 105 - .../ccbc8a5e-9a97-452a-b023-cc996ffe31f1.json | 132 + .../b359a7a3-cf2c-4952-b308-333672dadcec.json | 132 + .../eb45737a-74bc-482d-9d7f-d2bd1d876c77.json | 105 - .../0864d5cf-d6fe-42bc-9059-9f2e5ff06b60.json | 132 + .../678cad7f-854b-4dc3-91cc-2d1774ef7faf.json | 105 - .../24ce59a5-c351-4ed8-8944-8ec5db739da8.json | 105 - .../e6ef2559-8a63-43e3-a60b-0d2b7256ad3d.json | 132 + .../35949fb3-8c01-45cf-b4db-bbe983b15ac6.json | 105 - .../45d019ab-b23c-4fc3-baf5-d57576e9945c.json | 132 + .../6adfe39d-f2c2-4101-8f0f-7496d55397cd.json | 105 - .../e3cd7c32-e5a1-4cd6-a9dc-95364a8abe75.json | 132 + .../9be442e8-4b77-43e0-a981-887338e59b78.json | 132 + .../a07b6326-f393-490e-b696-d8b45f593d4b.json | 132 + .../d2342413-1b55-4da5-a6e5-da6274f309ad.json | 105 - .../8eb55323-b0d7-4419-aec6-03de8bcd472e.json | 105 - .../b66ed91a-98d5-407c-9896-9c2e2a31e9da.json | 132 + .../9c70921d-956b-4727-9201-1addbd01bb8b.json | 132 + .../4ba6d51e-314a-4db4-9552-568a4093e01a.json | 132 + .../69cea95c-c167-42f4-a233-f7739f86f6a7.json | 105 - .../835f5056-56bf-4a6c-886f-fbe6f263ac07.json | 132 + .../9d9ac91a-f339-41a4-ae91-3dba41b06382.json | 105 - .../73eb53bc-a090-4415-8fdc-a767a2e00188.json | 105 - .../c2a63afa-9d25-41dc-b25f-848f5a640501.json | 132 + .../00d87824-732a-4746-8d9f-ce7b1f45c0ae.json | 105 - .../f64f9d24-e448-4bb6-89c3-edb66499bac9.json | 132 + .../2de14bfb-844a-4711-815e-8f63487a78fd.json | 132 + .../be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json | 105 - .../2390d668-3273-4f58-a0fd-b13b9d9b1651.json | 105 - .../f953e0e2-ddca-42a2-a0f6-752a137bc6b5.json | 132 + .../8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json | 105 - .../98187b98-0cc8-4756-9cb7-c53deb998f90.json | 132 + .../8c79c60d-ebf4-4409-be4f-928a54cedd1d.json | 132 + .../eace7f56-b853-436d-a744-bfdb9e227993.json | 105 - .../25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json | 105 - .../5d5cebeb-faf0-4fdf-8749-6307080e82f2.json | 132 + .../b04b4e4d-2f15-446b-974f-21f72fd80fe0.json | 105 - .../e926ce8f-45bb-4f3d-b579-ecadb3df6468.json | 132 + .../070609d6-5f41-4712-9ad7-e215b1a6bb81.json | 132 + .../84f2027c-3e68-489e-902b-2fec6ec8f850.json | 105 - .../51daf5e7-1d4e-4753-b24b-79273e6f9370.json | 105 - .../8d2909c7-37f2-4198-a1e2-4bf2ebc1444d.json | 132 + .../53587959-25f9-43aa-a34b-f274d8bc93af.json | 132 + .../567f27f3-3f64-4054-aa67-684c29e4d71a.json | 105 - .../2a7f80ed-d404-4c81-b000-b65c83069121.json | 132 + .../f0983645-4adb-4ddb-bf2f-33480cb7f421.json | 132 + .../161dadfe-4983-4f56-8a7d-9b97f1c5a3c7.json | 132 + .../694a02f9-4729-4d0b-97ce-80adaef29be2.json | 132 + .../0521f51d-22c1-4821-8f04-23c533411668.json | 132 + .../112be4bf-bfac-470f-bde8-c1e4d7282667.json | 105 - .../8fdea71b-5e68-4a78-aefc-8a00650464c4.json | 132 + .../cdf0ce69-4697-4f16-a769-80691cc08b27.json | 105 - .../e2ba5674-9251-4a4e-9eb8-046c834da400.json | 132 + .../4caafdb2-3065-40d4-b5a7-9deb41e1d8a7.json | 132 + .../7b1574ca-4106-42c0-9336-27df4f0851aa.json | 105 - .../886e0b8b-b2dc-434f-a299-50f668006241.json | 132 + .../e91b6749-3103-4cfa-bf16-86126ee2086e.json | 105 - .../2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json | 105 - .../7a6a9443-f331-4dfa-acf9-6aa30049bade.json | 132 + .../36d2d3af-60aa-4624-b414-e249d06b6ee1.json | 105 - .../6d523da4-ec4a-405b-a25d-afc7b1b5aefd.json | 132 + .../cfecfce3-090d-4c2e-826c-03c0c5337e98.json | 132 + .../5aa124dc-4abd-4c5f-b40a-a8d81af922eb.json | 132 + .../b1632b15-fa00-4476-b3f4-05aba95df664.json | 105 - .../ad8e3029-612c-434e-a92b-f5c481476e25.json | 105 - .../ec91b122-c8f5-4dfb-94fd-336ef78c3e14.json | 132 + .../114f246a-6049-40bf-ad86-9a822d13cf74.json | 132 + .../129ba653-ec88-46f2-8828-77e320b922c6.json | 105 - .../160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json | 105 - .../82d28a3a-44f2-463f-a1b8-7e9079ec47b7.json | 132 + .../b298e0fc-f4fb-4464-beb8-45f8b5f35653.json | 105 - .../ed3c1349-a154-4866-890f-2b115ffaf127.json | 132 + .../47942c55-5ddb-4fda-9c5b-34676ae2046a.json | 132 + .../d860210b-4c8a-4d15-ad3a-4e39905f91ed.json | 132 + .../d137f429-2b65-4ee9-9d66-3f619b270fad.json | 132 + .../1da10dfe-b0a3-4cb8-aaa3-e16d48f3aab4.json | 132 + .../6156a0d2-4c32-40b2-9624-ef0c7a6a95bb.json | 132 + .../676342d2-f37a-4b6a-967d-3ac750243470.json | 132 + .../950b7108-0192-4875-b4e9-c3e43ab71e08.json | 132 + .../85672df5-2f35-43be-8648-9937c66872dc.json | 132 + .../051c5642-3b23-4879-9d10-639d1b3127d7.json | 132 + .../e6b5e728-28a4-444a-8b6b-89d29b7b5225.json | 105 - .../2acf0d12-7e0c-46dc-a079-ebc48a8818d3.json | 132 + .../8ce42090-006e-4e08-8d3f-5b1eb0b8da0b.json | 132 + .../d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json | 105 - .../f83a5d67-b967-47c8-b76e-b58c445a3634.json | 105 - .../2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json | 105 - .../703df6c3-dae4-437f-9379-f8c264797adc.json | 132 + .../1e349ad3-d29b-4a4b-97e7-b82055e41b07.json | 132 + .../8f677a76-932c-4c35-9708-4b723226aa19.json | 132 + .../efcc28d3-ca6a-4100-afd2-75f9925354ba.json | 105 - .../ebfe625f-ff1f-45f9-826c-9351ea4134e1.json | 132 + .../faf20d1a-5a92-49b2-be69-903cafb9460a.json | 105 - .../03e122da-30cc-4c2e-9b44-8261c3f2a934.json | 105 - .../66e6a757-ac22-47f3-82ce-81af45e1d3cf.json | 132 + .../1cd840c7-d432-495c-a3df-af1fa6264259.json | 132 + .../3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json | 105 - .../066f520f-9a64-4564-abfc-6435732c3585.json | 132 + .../aced5181-040a-48c0-bc5f-78d0de3afae8.json | 132 + .../cb833a8b-81d7-41a6-bff2-9d0927703113.json | 105 - .../76833817-781e-4292-9fe8-5e8a1da7f962.json | 105 - .../a4889a38-84d2-4ae1-b8a9-297b4400602d.json | 132 + .../6e325f0f-b5db-4773-8179-7e949bd3f5f2.json | 105 - .../d540505a-c67b-4b72-a53a-c03aa6f8d3e7.json | 132 + .../9859afee-02ca-4c48-acc8-acfd20c37e4e.json | 132 + .../e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json | 105 - .../e222d12b-c796-4890-a584-cd689bae7ea6.json | 132 + .../fd19dada-5945-45d5-8a84-122404b8dd57.json | 105 - .../41809335-e00c-4911-bc08-6edd71891585.json | 105 - .../c16850f8-0b80-4455-8f38-8ec453cd1d41.json | 132 + .../0d400b0f-cc82-4c86-b600-93a31b133f9d.json | 132 + .../8c56b973-d5cb-48b6-a43e-ad50769b1f40.json | 105 - .../90f6f8f1-02fc-425a-8499-e9b43ae8ac59.json | 132 + .../da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json | 105 - .../6704d6bc-6d38-4c59-87a4-81d3eacde3b1.json | 132 + .../c4376867-854d-44fa-9215-b9c1af7612a4.json | 105 - .../cc482ca4-031a-4c22-90c2-68322184125b.json | 105 - .../e8ad6ce4-7efc-499e-a2c9-9e0df898fbb9.json | 132 + .../5e9c1273-536d-4280-8fff-9931f46dc968.json | 132 + .../e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json | 105 - .../460ca160-ac34-4091-ba2d-986b53532b55.json | 132 + .../4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json | 105 - .../225bc36b-4bfb-4818-8601-903e7f9decb3.json | 105 - .../ef9d2fab-07a2-44e2-aae2-ede5a2ff31d9.json | 132 + .../0be5437b-2489-4107-8c38-d0cd198a2d8c.json | 105 - .../a29a69d3-d64e-4463-aa52-0a9d6d012c98.json | 132 + .../4539c16e-1ac6-47f4-88eb-a09842497330.json | 132 + .../b2bdf337-9065-4a67-aa1a-5ba8751d5438.json | 105 - .../2ff33c55-1236-4c57-8809-2d3076e43cc7.json | 132 + .../e06c19ce-9247-473b-b5db-8686fee5e785.json | 105 - .../281ba822-49a2-4746-bc04-8de046439508.json | 132 + .../51559a6d-1262-41e2-8092-008dc8f53974.json | 105 - .../0606d916-95ea-4318-af0c-3942329071c6.json | 132 + .../9c77aa3f-080c-4dd6-8a9d-50d18657de35.json | 105 - .../005159f0-da68-480d-972c-c160d145a682.json | 132 + .../2ff655cd-9123-4577-832b-3f0b04f7d466.json | 105 - .../2f6abb5d-52b3-44b0-b960-115793485fb1.json | 132 + .../ebbe9a61-6dff-467a-b77c-7c125a043832.json | 105 - .../3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json | 105 - .../6ffacad9-1a4d-472e-bbbf-0d64d068dd0d.json | 132 + .../26eadaf8-bfb8-4aad-a8a4-90699b6f0fcd.json | 132 + .../f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json | 105 - .../c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json | 105 - .../d4536913-5708-45e4-a024-45ae37fdae13.json | 132 + .../848860aa-7de3-4fae-afca-ac11224b96c5.json | 132 + .../cd24b18c-faff-44e1-87d6-735bcb9ab465.json | 105 - .../0241a8e3-d6e5-4ba5-afb9-862bde2ba851.json | 132 + .../85fa7edb-df5c-4baa-a0f1-c520db55c08c.json | 105 - .../20b69120-d476-4e34-b3c6-8cef11d6ee78.json | 132 + .../696bbbfc-49dd-444e-a90b-76821845a726.json | 132 + .../76f198aa-0aa5-4c98-8d86-20410582d3a5.json | 105 - .../f39ad9a4-b02a-415e-b83a-53d705b6bea2.json | 105 - .../e6d974d3-467e-4fe7-bd84-79fc7c72cde2.json | 132 + .../0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json | 105 - .../b26ba2b7-1365-4b1c-a1be-35d588e02d36.json | 132 + .../64bd755d-ba4b-4559-ad8e-f56c697b1ae6.json | 132 + .../bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json | 105 - .../c4e572cb-1d12-4baf-a4d8-a55422692207.json | 132 + .../23c9a71d-3504-497d-a0e2-6a5e299346e5.json | 105 - .../c6123e10-b1f9-49dc-888b-083881e6ef09.json | 132 + .../e1647f10-fec5-463d-b8e5-6b2b880bd687.json | 132 + .../6d5fa235-8d69-456e-9f23-0f702760baf4.json | 132 + .../91ec4ba1-6948-48e8-8db0-a335b982c560.json | 105 - .../e8709a6a-a2b8-4b09-9342-d1aeae89de1f.json | 132 + .../603e95c9-7e7f-4892-93f7-92f92b256865.json | 132 + .../3e2fd38a-186e-49aa-915c-7eb3cde50562.json | 132 + .../16d55e66-9015-4d72-81e4-3f14c42b0368.json | 132 + .../4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json | 105 - .../696644b9-bd40-4047-bb85-0cb19510a96c.json | 132 + .../cbae8c39-0aec-4859-98bc-3b2d065833ad.json | 132 + .../15fb3cc7-1ba5-4ba5-ba02-8e8a9d2029d0.json | 132 + .../357f6051-b880-48bb-8e68-e4b0a7a0cbcc.json | 132 + .../a50a542b-668e-47b1-a37e-805a58eea3d1.json | 132 + .../00f7bd51-0b31-446d-be8c-1e0dc0d82e54.json | 132 + .../26782941-b918-44c5-a7f6-5f770e47c3d6.json | 132 + .../54015982-408c-469b-86da-6642f5708180.json | 105 - .../5547ddaf-8fbb-4259-8b88-e946fc3d2404.json | 132 + .../aded7428-1283-4ed8-b068-cc1a5ea92dca.json | 105 - .../bee5ea59-b97a-4783-b763-b6bd432d4558.json | 132 + .../8150333f-8e79-4230-af8b-7ddb1d5eeb21.json | 132 + .../f4ff02eb-7763-41bc-8a86-adbb051603af.json | 105 - .../2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json | 105 - .../be8510a9-ecd4-4ac7-9930-3200cacb7b50.json | 132 + .../887e4574-f876-4e75-afb8-e543bcb30020.json | 132 + .../9c9e0887-5561-4789-9521-a3a78e7cfd99.json | 105 - .../c457473c-6c40-4930-94b8-993d3b1e8937.json | 105 - .../fd21d8bd-28cf-4b91-8075-c38a61f5f32a.json | 132 + .../5448dbb6-9874-4734-8252-369c7b0189d7.json | 105 - .../c0f05e38-6592-478a-9c46-26567f24ff85.json | 132 + .../06cc2913-8e05-44bf-a128-9a7c4aeff536.json | 132 + .../45531924-35ad-4baf-9994-5d5fa3bafd02.json | 105 - .../5e5e70f4-c597-415c-ab74-17aaf55b7b28.json | 105 - .../86368d5b-0509-4b52-b988-58bcf7e1043e.json | 132 + .../77b89fe6-464b-4017-a77f-8750e2668a82.json | 132 + .../d2e47d86-23dd-4c95-a7fb-99518615d09f.json | 132 + .../0a09891e-ac97-4c3a-8364-7106a851f1a8.json | 132 + .../eb41fe62-ac46-4630-bb2d-6b907f271737.json | 132 + .../d540a6c8-e9ec-4413-b9d2-dee68533c377.json | 132 + .../5b1f413a-05c4-43be-bdbc-9de5728e8d0a.json | 132 + .../6701738c-27e4-4bbd-b614-fbc297c3164f.json | 132 + .../7f4563b4-0b25-49e7-ac1c-afaa28b0eda2.json | 132 + .../32b6e4af-69ba-49b7-9367-dfafe3e390e8.json | 132 + .../e16deaf7-da55-40ba-ac18-860fa3f14d34.json | 132 + .../8a7a5886-0618-4615-9cdf-46f5d19a29fe.json | 132 + .../66d18e5b-9ebc-4ab6-94fb-6d5c23c58672.json | 132 + .../a36aaaf6-2478-4b98-ad0c-2b06ddb8c308.json | 132 + .../4a6237a7-019c-4310-971e-84b08d1b5067.json | 132 + .../996e781e-5939-41ac-b347-95c99037c34a.json | 132 + .../e880fa0e-ae49-4398-91bd-eadf8695425f.json | 132 + .../da04ff51-fbeb-41a8-ae5e-8ddf5925b792.json | 132 + .../6d709396-1ae1-4e5c-a03c-13c1e9425202.json | 132 + .../5b616df9-e15a-4f84-98b4-c2cb532c1b95.json | 132 + .../0f6552d9-3cbe-447e-909b-068e5ceed4c9.json | 132 + .../84bc884e-29be-40b5-bfe2-6147bec90a78.json | 105 - .../2861aae0-d2ec-48f5-bd20-9e7bcaf8dabd.json | 132 + .../51a64f37-256c-4fe7-b28c-6117520f04ec.json | 132 + .../03ce9c1d-38e8-4a6c-b293-57428a9d7c0e.json | 132 + .../3b0f5dea-db9b-4657-9807-6b3e56d38823.json | 132 + .../2d19e9ff-e331-4171-ae90-47e44f3f8885.json | 132 + .../6bfb8b24-1abd-405b-b01d-7d7111705dbb.json | 132 + .../c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json | 105 - .../c83e6b6c-c8be-4d97-9c65-2d883f88f37f.json | 132 + .../72569796-1b11-48cc-ada7-e8c09522dd54.json | 132 + .../58403e30-bd2b-4f4c-ad41-daa890c77d40.json | 132 + .../eb8e1f1d-c6b3-407c-b172-d240553d2f89.json | 132 + .../356d75a0-6520-46c1-afa9-7dbb2596a5c1.json | 132 + .../78681e0c-5fe2-4920-af7b-99345cea3efe.json | 132 + .../ba0ee5b4-070a-461d-a3d2-cd4036387cc9.json | 132 + .../17d0d377-bca4-411c-be11-6c5cfce07798.json | 132 + .../1f223500-a1d6-471f-b3cf-2575ab5a52c8.json | 105 - .../bff80553-e91f-470e-923c-7f8103d37fca.json | 105 - .../d01a56a1-1eb9-4ccf-8c09-348b6ba5480b.json | 132 + .../389821ff-d8e2-4d1d-8fb2-57a689867ac5.json | 132 + .../a4cd4144-75d5-4c48-a936-96d70f052a66.json | 105 - .../180be3a9-1d8e-4705-bda4-032bc66768c6.json | 105 - .../7913f782-29b0-48bd-bc62-37da9a5ac7d9.json | 132 + .../843cbaa0-5d9d-47a8-ae69-fe38a5812136.json | 105 - .../b0930974-999e-4372-9d21-b9790e0bad4c.json | 132 + .../791a8f9f-5c85-42e5-a06d-270118b0c7c2.json | 105 - .../8265f577-f504-4a56-9cf0-42c34766559a.json | 132 + .../82044cd2-1a46-406e-bc68-397ce41b29ea.json | 132 + .../8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json | 105 - .../de09e323-8cf1-4aa9-9537-e8ad30a8c297.json | 132 + .../fa9282c6-7820-49dd-9893-9559c5a984a9.json | 105 - .../574d79eb-94ae-4b79-8763-77267d300670.json | 105 - .../bfe543b4-ec38-488e-ae04-125cd358b61f.json | 132 + .../822268e0-8f66-4bb3-9d01-52c684ca281f.json | 105 - .../be36d8ae-b81c-4b4e-aa2f-5999c7582237.json | 132 + .../342b435f-89e9-48ad-ab0f-2c1f52f4571a.json | 132 + .../f39e1ca4-2a0f-4650-886b-4160760daee5.json | 105 - .../5e08074c-32bd-4ce6-a09f-7b5832cba288.json | 105 - .../b0c8737d-d838-4da1-909b-b218e22119dc.json | 132 + .../024f23d8-66b0-4a7b-be01-fd68f0ab295e.json | 105 - .../4cd40f28-842f-44d5-9eb2-86238077fc55.json | 132 + .../0758051c-2d75-402e-af0e-769096cbb17c.json | 132 + .../6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json | 105 - .../9a638bb6-f16f-496b-a974-d97dbb6cd626.json | 105 - .../c93f610b-fb97-4ad1-b8af-fc41c6d8da33.json | 132 + .../b8467118-d895-41fa-81c7-89892e1844d5.json | 132 + .../30d867bb-63c6-48d1-8d43-6c24f4cf44ba.json | 132 + .../1e87d1ea-59df-4c1a-96da-31e12e27dab2.json | 105 - .../89b92cda-c5b6-45ed-a534-361c9d34794a.json | 132 + .../48cdf76a-886d-41ec-8580-00ed4232b601.json | 132 + .../7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json | 105 - .../116272d4-d25d-49cb-80cb-ff26a0fb3cf4.json | 132 + .../a2da90e0-5f59-4c89-b819-316d2cc318be.json | 105 - .../274ab6b9-5fd7-41df-9076-b16c52947640.json | 105 - .../bb103828-70fe-4767-9302-6750d839129e.json | 132 + .../7b58ab54-239b-4e49-93f1-c3940df61474.json | 132 + .../ba3924c6-f913-4094-a56a-1699f07f103c.json | 105 - .../445f2c79-2c47-465c-ace7-73b3fa491454.json | 105 - .../559067a2-816c-4091-893e-b1c7860171ec.json | 132 + .../c07f2943-f3f4-46be-993e-be56dadcb561.json | 105 - .../ec502619-880b-4b7c-acfe-c43cf6514e3f.json | 132 + .../6941a5dd-2a70-4846-a5f6-b16ef2d56a03.json | 132 + .../29459932-a7a5-458f-9778-e236cc4ea985.json | 105 - .../636e2f93-3242-491c-9df5-003aa1dacecf.json | 132 + .../1f4efa23-816d-49be-8659-feb003f4b3ef.json | 132 + .../3baf9882-5625-47eb-a88b-b172dfc9a330.json | 105 - .../ab74d5ca-6c80-44de-96e9-af61861090b6.json | 105 - .../d05be1e4-bcac-4b4a-bbde-8b17a5a71243.json | 132 + .../5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json | 105 - .../9ab53055-86f5-4a88-976f-015dd9c9e832.json | 132 + .../39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json | 105 - .../ba34083a-9b13-46d9-8f36-aa3ddd586711.json | 132 + .../6a39d734-ad73-4c4a-9583-3563e336d4b3.json | 132 + .../900e5686-083d-460c-918f-06a39936810c.json | 105 - .../2af71e88-4931-4359-b92a-c64fa33df802.json | 132 + .../bf9336a7-a7c4-420a-9dd0-68d8e0c815c4.json | 132 + .../2de872b2-10c7-44dd-91c3-f20205207da6.json | 132 + .../5cabed09-d8ea-46c2-bb78-012dac954d6b.json | 132 + .../460de6c8-d706-420b-9c0a-a108ddb11e5f.json | 105 - .../8236db6a-ff8a-4237-af5a-03bb258f8e59.json | 132 + .../1a7b078e-bc1f-400f-a0cd-f7b535548f23.json | 132 + .../661e2393-2560-4d25-a6f3-f0d680052e8e.json | 105 - .../faac8ed1-1042-42dc-9762-3f90161fb34f.json | 105 - .../fdaf561c-567c-416d-a74a-ac3c07c5be5b.json | 132 + .../58900b3b-303b-49c8-b807-7b8d06601568.json | 132 + .../a9afd0b3-8189-47e0-9e33-d60540679e20.json | 105 - .../2a0d23aa-47ae-4974-ac64-5371097a1b0f.json | 105 - .../7ac5a45a-7b41-4f63-8556-8737638a00ea.json | 132 + .../3cb55475-30c8-43c8-8d7d-394450fdc117.json | 132 + .../acdaefdc-b28c-4081-bf72-517d6c70595e.json | 105 - .../f5e140ff-0c0e-4769-8116-63cf50255773.json | 132 + .../df85ec6e-1325-40ce-8087-d960a1d767dd.json | 132 + .../a7bd3fff-f01e-46ca-af85-5b4ac6ae7320.json | 132 + .../11842dd9-0572-41ef-aaa0-8d19f3420efc.json | 132 + .../88a3b40a-3ba2-4f13-bd8c-110872d807c7.json | 105 - .../01abccec-1cea-4060-89be-289987d0a2ce.json | 132 + .../dce8226c-57bd-4255-b813-8a70494f0a1a.json | 132 + .../7f80e69c-eec6-49ac-a088-6248ee25f736.json | 132 + .../bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json | 105 - .../c9dedad4-65d4-479e-b465-912cd8885e32.json | 105 - .../e0267a2c-dfc5-456e-864d-b5b0ad1fa508.json | 132 + .../6b61018c-249d-482b-a787-06f1e6514f29.json | 105 - .../e6ad37be-28f4-43b4-9df1-b7b47d31232e.json | 132 + .../5514368a-1f7d-4cd0-b7f7-d116b753f975.json | 132 + .../9873b58d-1ffd-44a7-bb93-15038986419a.json | 105 - .../71656625-cd85-49a6-a8df-abc0b9c0ae5d.json | 105 - .../c0e29cf8-897f-4e07-abb4-71c801d34301.json | 132 + .../68310379-65b2-482d-892b-f76547bce2b0.json | 132 + .../d47dc284-0ed6-4853-8a54-b87b4b529150.json | 105 - .../60db255b-d34c-4f33-91a4-279a9ccc6791.json | 105 - .../a034c4ec-d4cd-439b-8dbd-e67685ea7616.json | 132 + .../e4b761d3-bb84-4433-b9fb-4c92ecae6279.json | 132 + .../38d78d30-be6d-476c-a3aa-d9a40f570a56.json | 132 + .../36e60f6c-60f7-4b17-88fe-82810e195fc7.json | 132 + .../8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json | 105 - .../a6c647e8-ed24-4150-8563-dd9b20e21498.json | 132 + .../b5a366ac-d736-4447-a2f1-98d0b84ba3bd.json | 132 + .../5d098dc6-8124-4d26-86ec-d54e6e09c3a6.json | 132 + .../1137cbc4-d80b-4e21-bfeb-feab41dc80b2.json | 132 + .../097bbfbc-0ccd-4fd4-9e0c-9c192cba9e8b.json | 132 + .../db8c6169-bfc1-48bb-be53-fa93c673f051.json | 132 + .../41437fc9-6d48-4317-a8de-ab4e63b2cf46.json | 132 + .../e075f4fe-95e0-48f4-94c4-f6ebd3f4edaa.json | 132 + .../3349d66c-e12b-49c1-a406-e0e77b697458.json | 132 + .../7aa0ff6b-11a9-4554-a27f-e477a0ff77c7.json | 132 + .../ac749485-df6d-485e-8fa7-63bdfd744167.json | 132 + .../54363a4b-312b-4035-a1c3-b5321311cec4.json | 132 + .../aa9e2b9e-cd25-4492-9801-eba7d40b4365.json | 132 + .../2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json | 105 - .../c6b484b8-f6f3-4516-aff5-c2f6438c9047.json | 132 + .../c6c760c9-a345-4e25-b333-b403bf6db389.json | 132 + .../f5c2a2cc-392e-4337-aad9-72d65ba87aab.json | 105 - .../65b2aa58-2c04-48f2-9ea3-c8fd97cb9dde.json | 132 + .../85a1ef3f-7d68-4324-876d-b52cfa71317d.json | 105 - .../92903344-0dde-4f5a-a7d2-749a1ffe9cd3.json | 132 + .../a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json | 105 - .../59ddd478-c1cd-4bd8-80c3-fdebe762414a.json | 132 + .../970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json | 105 - .../02f63fc6-9376-4fb5-b067-63493238cc27.json | 132 + .../9dbf220a-cbe9-40da-814f-951205c3abbe.json | 105 - .../51566db6-56e4-40bd-a248-6c968f2b83e8.json | 105 - .../dd7597fd-27f5-4e77-a44f-b01d0db82719.json | 132 + .../0982d599-57c7-4eeb-bd47-844879bb79a5.json | 105 - .../20cd0d60-eb0d-41bd-b37f-910a03dd7f82.json | 132 + .../a7df9a84-fa29-4c8e-8413-4542b5eafb63.json | 105 - .../c4e9d045-3769-4828-a2ca-7fa508873089.json | 132 + .../0a0501ec-4ecd-47c1-914b-d473f795cef2.json | 132 + .../beca755f-203f-4bc8-b5cf-f9a9e3f8bd8f.json | 132 + .../79e1e1c6-cbe0-43a9-a593-8e2119baaf77.json | 132 + .../097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json | 105 - .../def80b44-3d9a-46ba-bf5f-ffc81e50af2e.json | 132 + .../5e1aa809-ef20-445e-a05b-eccd585d5991.json | 132 + .../7c2be651-ca56-4285-afc7-1bfe1c8ce11e.json | 132 + .../cfe4ea72-ddb9-49b5-9599-99f215e112e5.json | 132 + .../81d63d8e-88dd-4b16-b9b8-d07604878f8f.json | 132 + .../81f8208b-f7e7-4685-bb84-321d9e097470.json | 132 + .../a0c9a434-9b8c-47c5-b511-9daac7901686.json | 132 + .../274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json | 105 - .../28b60eae-1b38-4404-8db1-3fb2997583f4.json | 132 + .../746862a2-a90c-4612-91d0-f989b9eed1a5.json | 132 + .../ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json | 105 - .../1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json | 105 - .../715ee057-9c9a-4e04-991c-7040b1eef65b.json | 132 + .../4dc1d103-3458-4b8c-9e63-b98effd69667.json | 132 + .../f9d2408b-03dd-4cf8-851e-51a15ff13be9.json | 105 - .../070ff2a5-9a5d-48cf-8517-1ad9b6642d59.json | 132 + .../8406a5b8-a87d-489b-b75b-00e9f675f09f.json | 132 + .../11e8f9b6-32ab-4b83-a601-e5644c0b2c39.json | 132 + .../6b542f5a-ea62-45ce-8e98-436a4d058877.json | 132 + .../9b280640-bfee-4730-acc3-386a54b2434c.json | 132 + .../eff5171b-6119-4013-8aa8-8a4f0215b045.json | 132 + .../471c5fed-f155-4521-9d9c-b5370ca91bec.json | 132 + .../d5d73b84-4436-47bf-967e-c9be94898189.json | 105 - .../690be099-3ace-484f-b01f-2fe6b324d12a.json | 132 + .../71fbd15f-5eec-40d9-84e8-07323f3ffac6.json | 132 + .../eb93dd3e-3d13-4234-bb66-f6177648aa2b.json | 132 + .../f7ec1ed7-cc30-4879-8ab1-4909011553d5.json | 132 + .../fb8eb882-26a9-4008-9226-90d44d38b54f.json | 105 - .../3e100704-dbd3-4d05-b325-5bb4bc90e51c.json | 132 + .../a7ba1534-464f-45ba-834f-5f501b155c20.json | 105 - .../12f003ef-1098-4d3f-aed7-7343034157bc.json | 132 + .../5eb28bbd-8428-4385-b078-13e8a868e9f0.json | 105 - .../9de2e564-3a30-4f1c-80da-6432a245a64f.json | 132 + .../bf38278f-6375-41a6-9744-04fb4a32ed72.json | 105 - .../cc8f594a-e2f7-49e3-8654-57f1b397797f.json | 105 - .../dd5aaa3f-b24b-4a5b-852b-b80f4a6bf366.json | 132 + .../8d8b9fd2-43f6-4edc-8340-44d20824a7e7.json | 132 + .../7fe45c20-a2c0-4acf-9425-651a1ec3b0d0.json | 132 + .../baf93ef6-56f3-4809-93f6-32dcf4730388.json | 132 + .../f6df14bd-207c-4fea-b789-c9f9aef749b3.json | 132 + .../97766a7f-cf5b-46ae-b51e-5c5702ae000b.json | 132 + .../d5cd2a1b-3def-4b33-a8fe-4b02e090db27.json | 132 + .../275d4bf0-566c-4b50-86b9-38c7f45df143.json | 132 + .../aa504db9-81f3-424f-b7d9-683ebe31f5d8.json | 132 + .../2cc209b7-ef10-435d-a840-b904ab741491.json | 132 + .../9b9390ac-fd65-4a58-9834-5352aa340cdc.json | 132 + .../ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json | 105 - .../34350829-d42d-4e67-b23f-171044428c1f.json | 105 - .../4efe5cd4-6b8a-4951-a63a-4c7dc390bbec.json | 132 + .../4bc5a0db-1c88-4c61-9343-1d340305ecc5.json | 132 + .../6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json | 105 - .../20796a87-8691-44b9-9b60-85ad3c7f4b7b.json | 105 - .../74527f51-dcec-4b82-8ba8-075c933404f5.json | 132 + .../26dc4843-56a7-45b5-a61a-386e260574a2.json | 105 - .../ac31bc90-3854-4d38-925d-ef8dc7e75d24.json | 132 + .../88583cff-1adc-4b1b-8e68-07f0074d0ae2.json | 132 + .../a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json | 105 - .../73f9a017-15ac-42e6-9600-69b411de4086.json | 105 - .../fadbac9e-7224-41d1-abfa-7039cbcba9f6.json | 132 + .../1fb90540-0fa0-44ca-ad67-1e3503f6b729.json | 132 + .../586d4e20-c1f4-466a-8488-07ac18ad6253.json | 105 - .../047784e2-c1ee-40d9-a60d-e43504825801.json | 132 + .../ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json | 105 - .../ee60453d-2d51-46f7-8a18-c651d590f0e7.json | 132 + .../b0ac4b11-f7b4-4753-baae-310a92f08259.json | 132 + .../324db8b3-38c7-4a2c-82e8-7bebfa38e760.json | 132 + .../3c4058cd-238b-4b01-870d-8693f5ce1b8f.json | 105 - .../54dd9033-61b9-4f26-9cde-e04c7136524b.json | 132 + .../6d8d63c0-ad69-4224-8250-b1664f6abbcf.json | 105 - .../c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json | 105 - .../d0973d6c-373c-41cd-9e62-52470c044dac.json | 132 + .../da15da67-b316-4c2e-86a5-c1f88eece9cb.json | 132 + .../ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json | 105 - .../b0c34174-bfd0-4556-a3bf-92ec0ddf5ec4.json | 132 + .../e46698de-8b2d-4b3c-b482-8cc8a3665eac.json | 105 - .../35351894-ea9d-456b-ab9a-c98686948e6b.json | 105 - .../bce7b15d-1670-46db-bdff-24fb38bc3fd9.json | 132 + .../15e5e02f-27b9-4063-b601-42c2b17180f9.json | 132 + .../2f19082b-8377-4f63-8c5f-1aa25071a240.json | 105 - .../51b0c546-0dde-4668-a8b8-3b9753a31aa0.json | 132 + .../630c100f-c88d-42a7-9614-bd9a958eab2b.json | 105 - .../37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json | 105 - .../45842b1c-cf68-44a7-928f-2da454cdd13f.json | 132 + .../c15cdefd-dbe3-432e-aab0-3c43540cd320.json | 132 + .../ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json | 105 - .../1f489afa-a01d-40f3-836a-9e386c502d1d.json | 132 + .../3d68e2fb-06cc-43b9-830b-f1cd02f12166.json | 105 - .../94bcc87e-eb06-4321-9b72-2f99168cf92a.json | 132 + .../a04a8775-8b4d-4608-9692-47af9f7ed5a7.json | 105 - .../9b9eb072-4120-4a6a-a565-27136e617f10.json | 105 - .../c0bc9811-4d7c-412f-a12b-3e6eab2e5a6f.json | 132 + .../6a21892f-1d11-4c59-8894-8800822b2e72.json | 105 - .../b5a8b278-69e9-41ba-89ee-8fd6b2d90a1c.json | 132 + .../a3ad7f0f-64bd-42a1-bc7d-d7d4cbbd80fd.json | 132 + .../db8eedcc-1dcf-47af-9c2b-a72da97146ca.json | 105 - .../8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json | 105 - .../f07c3a4a-2a8e-45c4-a726-be95726df2db.json | 132 + .../e4e71999-6f83-4745-8a9d-66e711e39ac3.json | 105 - .../f36d56b8-cd77-4d69-a51d-39025bcfcdfd.json | 132 + .../65acabdc-ea5f-426c-820b-2b79f2b20b44.json | 132 + .../f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json | 105 - .../96b00cfa-1383-4b36-a043-17eb39678ffc.json | 132 + .../aa2478d9-59bd-458b-abee-5669aa6280df.json | 105 - .../3b8a796e-6bde-4506-8335-bd3cc72482e1.json | 132 + .../66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json | 105 - .../1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json | 105 - .../a93e99e2-ca13-4cdc-9904-7ae5cc82c623.json | 132 + .../65d9e237-2757-459e-94e7-e382213e4eeb.json | 132 + .../d7303703-f33e-430b-813d-998c95dbdb67.json | 105 - .../8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json | 105 - .../c3f44524-4c75-4cd0-9f5d-79c8b08f6f77.json | 132 + .../00f0fe96-4a06-46e7-88d8-368b86bcdb06.json | 105 - .../2e7d3674-d0b0-4b87-8bd8-8202114b7665.json | 132 + .../30d21295-beb1-4179-8c6f-7bac79b29474.json | 132 + .../3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json | 105 - .../e2fc95de-b9d9-4043-b55c-aa2819d4f52f.json | 132 + .../e6031abf-1ae2-431c-8247-3124fff41d17.json | 105 - .../2917ef74-c8cb-4255-8bda-76280fbe7c64.json | 105 - .../7fbd7f97-baf9-4acd-ba0c-90ffbf0c47a5.json | 132 + .../23a21492-0897-44b4-a046-cf93fa8c2a64.json | 105 - .../336effcd-d8fc-4477-846f-70fc40bdc111.json | 132 + .../28f87820-d587-498e-b713-7c0af0cdc324.json | 132 + .../af67712e-7436-4703-ac22-9878dd8e190a.json | 105 - .../2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json | 105 - .../f1b671ab-ebb3-43ec-86fa-832982d04cc1.json | 132 + .../327cde83-d107-4455-bc03-7e03026c52e6.json | 132 + .../cd2de45f-874a-4d63-bb6d-0afe5e687964.json | 105 - .../570c991f-06bc-45d1-8409-d779a07df9a6.json | 105 - .../7497b8fb-9a7d-46dc-868e-1a2bbcdc7860.json | 132 + .../81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json | 105 - .../92c8afbe-7735-40c8-af0e-29da687c2070.json | 132 + .../28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json | 105 - .../bca052ac-6556-49d8-94e3-f4bda560a5d3.json | 132 + .../5f74fe6e-8575-4cea-959b-e6ba03c7e273.json | 132 + .../917a9361-af08-4e12-a93a-01321629b31f.json | 105 - .../677221cd-f218-4982-8363-d969913d7a22.json | 105 - .../b0f696f5-ed70-4293-999d-a9121192c137.json | 132 + .../18751a6f-062c-4915-bbe0-ae222cf9ae0b.json | 132 + .../292e77cb-e6e6-4d10-9956-1e09369e9669.json | 105 - .../398ebe04-638f-4a11-b99d-6778ff3ff97b.json | 132 + .../62414bde-98c1-4cae-af6d-18d3b0ecd50a.json | 105 - .../3258c5c6-d12d-4e09-8404-22b6aaf82e87.json | 105 - .../b4f197f2-3456-4221-b222-10dfbbb50f56.json | 132 + .../0a2fa86a-f9b3-4a49-b215-4cd3ee9b4c22.json | 132 + .../3cc8c02f-87a8-428a-8991-a0d52500d927.json | 105 - .../1561ec50-1cb9-47ce-9db1-09efe9c3fc61.json | 132 + .../692e0ff5-0607-4aae-8996-45bbbc4d2288.json | 105 - .../496525ff-394a-4b7b-9d93-f5b38d2a1ee3.json | 132 + .../7b634b21-8d89-4656-89d7-3590fc8a883a.json | 105 - .../37071760-d24c-43cc-9965-d8c7873c0ee8.json | 132 + .../ad58e69a-0917-4375-9e83-5db2ad50d0ca.json | 105 - .../91a71a49-5dd4-43b1-9e1c-fd9492236712.json | 132 + .../c8b72a17-837a-45ed-b285-bf472a4f6d45.json | 105 - .../05707286-d03b-4cb2-9a0f-48245c867cc7.json | 105 - .../d1d48abb-6dcf-4905-958f-c3a3e75feac6.json | 132 + .../2b644863-f52f-487a-85d1-3fc3ce973d90.json | 105 - .../68282f29-f56f-420b-bd1e-9cc54783c1a5.json | 132 + .../4f1d1b68-311f-4409-bf5b-41629a889da3.json | 105 - .../cd1c84dc-6c6e-4789-add7-0e3ca783b0ea.json | 132 + .../22a9d3b8-ac45-4433-8926-5d28681af922.json | 132 + .../f3af4295-9508-4a3e-ba5a-6336a560fd6c.json | 105 - .../31395ff6-82da-4585-85d6-459fcac9408f.json | 105 - .../57c4b9eb-dffd-4623-a2d5-b2374d3c9109.json | 132 + .../24adbd8c-df3a-4b58-94e6-61a3dfa6828e.json | 132 + .../cdacd0e9-fa22-4053-b16d-d3bac8541829.json | 105 - .../08ac7c80-0f13-43c9-a538-683eb6927b59.json | 105 - .../6ed62f64-c2be-4bca-b17d-bd0184a3d498.json | 132 + .../91ec0c61-73ca-463f-b3be-3386293e4fc0.json | 105 - .../db9e4d03-03a8-4a10-8739-16bbcfbb06d4.json | 132 + .../35807c64-beed-4022-a4ba-1284c5f6124f.json | 105 - .../7b0fc4fe-51c8-4f01-b07b-5bca05b40859.json | 132 + .../3c2e7750-3257-4012-8b43-44387707170c.json | 105 - .../6f286418-d8e3-4c11-8941-cfe5a18b1037.json | 132 + .../8be55d6b-7fe0-41cf-86a6-66327dd88003.json | 105 - .../b0a83b1f-3af2-45e8-9d88-d7302a529112.json | 132 + .../0462fce1-51b4-48d8-8278-a90048ffd637.json | 132 + .../8b15f9a3-6f39-4210-b48f-4dc5569114e2.json | 105 - .../7c5c8fd8-2fbb-41f3-88f3-92a544200204.json | 105 - .../e02f597c-c368-4223-ac90-c99d82c90634.json | 132 + .../32e63ffc-c64e-4562-ba99-14873f5bac2e.json | 132 + .../58573d8e-602a-4088-8dec-a738b7e55e9c.json | 105 - .../6af4faad-05c2-488b-9685-e11ae4e1cbf0.json | 132 + .../b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json | 105 - .../8aa7701b-7019-44a0-851f-cfc9108fdfbd.json | 132 + .../b0a2ef10-8705-4eae-892d-51f3633dcd87.json | 105 - .../3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json | 105 - .../a2f95fad-5ab5-47d0-b9aa-33358c673caf.json | 132 + .../5658866d-fd86-4203-b14f-84f9a4784028.json | 105 - .../aef73a77-9df7-4d4f-89ef-50905d326198.json | 132 + .../9ef7e716-8638-46ac-a455-f601c1cfddc1.json | 105 - .../e9ffdfb6-6f91-4bac-89d2-40b1eb43f3ee.json | 132 + .../3320dceb-b5ef-4267-81d3-b6fe2a415eee.json | 105 - .../8ff39438-907c-465f-ac7a-5a25cfd8d824.json | 132 + .../83d831c5-a74f-4699-9961-664a7a51b7b8.json | 132 + .../ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json | 105 - .../54f51897-7b47-4e95-9c1a-58ecd64caa96.json | 105 - .../83fb88ec-f640-4c1e-b71c-53a123fc4c2e.json | 132 + .../3811cc34-45cb-4932-b862-39bf042331e0.json | 132 + .../fbc53f61-cb3b-4f85-a724-fc07c6912c22.json | 105 - .../5b2a16a1-7a2a-40b7-add6-b99378b6af00.json | 132 + .../9343177e-5432-47c7-9fb6-90f2dc9125e5.json | 105 - .../1dc2a5bb-40b6-401e-8f1c-6110cb4c0f0d.json | 132 + .../5a835cef-3db8-40c9-8ae3-022d0719c89e.json | 105 - .../5d6eb91b-518c-41ae-9e52-bb741b005601.json | 105 - .../742e0a1c-7496-4076-bdbf-ada0a8e528c2.json | 132 + .../70471d77-adb1-49df-ab72-8f43f379ab23.json | 105 - .../f0664035-3256-444c-b848-ef603e0d46b5.json | 132 + .../2bbec710-ce13-4fa3-861b-fce8eee26b3b.json | 105 - .../9159aaa6-8663-491f-901a-74da4c343d20.json | 132 + .../170808e4-7506-44c9-8bb7-5dd92037a347.json | 105 - .../5179b145-9fdb-4ab5-8cca-87966ecf6519.json | 132 + .../86b9c040-4c5e-413d-ac23-1603c499b5de.json | 105 - .../da872193-1d25-4e8e-bc22-9138a9d121ba.json | 132 + .../141d8908-50cb-4457-a0f0-93d55d1c705b.json | 105 - .../967fdd26-1f8a-40d6-8f7d-ca731c7ef2e3.json | 132 + .../1c21cfd2-2b01-44d3-8daa-41493a743a75.json | 105 - .../dd615b4c-189e-4361-bcf4-879fd59b28a2.json | 132 + .../0aeee3e8-00ce-4f95-bbd9-307d93a194a4.json | 132 + .../88df4a25-089c-4f21-b403-a1f5dad112b3.json | 105 - .../102ed90e-cbe3-4219-b9c6-cec82c78941f.json | 105 - .../8c583b51-4349-48af-98d9-8eaaf43d60b6.json | 132 + .../34aab556-5e97-4ea2-9ada-d17dc3624be2.json | 132 + .../60aebc6f-b3ee-4b32-8b89-4359c990fb23.json | 105 - .../8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json | 105 - .../fbd9d5e3-15f7-45ce-92fb-368b3bfcc526.json | 132 + .../6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json | 105 - .../b177e329-ce6b-4bc6-aeac-1c01306e6b1f.json | 132 + .../7f371c11-e8f0-4233-b359-aac39c0a1110.json | 132 + .../cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json | 105 - .../26d89e91-7f52-4913-a4e0-3275cca1d8d7.json | 105 - .../9f758d4e-d121-4688-8ece-8dc67a499811.json | 132 + .../903b8c71-d54d-4ce4-9845-71eb8ca8733a.json | 132 + .../f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json | 105 - .../9bdc17bf-7b81-49c8-81f5-c6dfa31b449b.json | 132 + .../cee29aba-b6c1-42a2-88d0-a92080b3c083.json | 105 - .../28109e00-87c1-4809-a4fc-dddebba52621.json | 132 + .../2f8ce822-9278-49e5-878a-69439e794623.json | 105 - .../6a21381b-426d-4a5d-ad6d-2aeb57ed14c5.json | 132 + .../eff11f37-ec26-4866-8109-0ee6dcac7fec.json | 105 - .../03a8091c-473e-4fbe-af70-35f791a23a0f.json | 132 + .../394ac507-8bdb-4d06-bf6e-87911443ec2b.json | 105 - .../709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json | 105 - .../ed75e9ed-841b-4783-a201-bc72651afd0a.json | 132 + .../38cd418c-9770-49d2-8b30-ac47e445cee3.json | 132 + .../3f8011c6-6826-4788-b848-ec6938eefa7f.json | 105 - .../395b9855-e394-46c9-b95a-75203399aed4.json | 105 - .../d49b6a48-ae81-467d-87c5-b17f9ca306f8.json | 132 + .../39b7e250-9f71-4833-941e-85692a48b6e6.json | 132 + .../bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json | 105 - .../c0d102a2-ff8c-45ac-a825-31472b98b871.json | 132 + .../464f363d-ab94-4cac-8846-fbcf25be3dec.json | 105 - .../7c5674a8-6a1c-483e-be9c-b0a6d00d3ac4.json | 132 + .../8778fbef-d0f0-4a47-8adb-8e8f594d9195.json | 105 - .../d34b899e-b067-4c9c-9fa2-439f8b2d589d.json | 132 + .../3f92cd91-57b4-46eb-864b-2e4870b920fc.json | 105 - .../8c7b2332-510b-42d3-bcbb-e177c35d27d5.json | 132 + .../34dec14e-846a-4037-8dbd-f1d1599d5adf.json | 105 - .../685f107f-e431-4dba-a117-8d6f1dd2c296.json | 132 + .../7f5fa4e0-e28c-46df-acbd-22e7b010a407.json | 105 - .../e1570804-85b6-4518-a099-5f21ab27d12c.json | 132 + .../393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json | 105 - .../a779ebec-76ab-4a1e-aa4f-d1a6adfe2d5c.json | 132 + .../1ed7f6ed-d04d-4cfc-a36a-1ef0f72d4814.json | 132 + .../d436f2a4-ebd5-4712-871a-0616f491bda4.json | 105 - .../827c075e-78a2-4e4b-a561-b95728cdf2b2.json | 105 - .../c901a9ee-069a-4e3e-ac52-3017d67d8800.json | 132 + .../08317b59-ff74-43c8-bea5-2a266c38816e.json | 132 + .../d44a7888-1463-4492-9359-f8287a8f7f01.json | 105 - .../4106d4d3-344a-4c1f-b9ce-a3140d435013.json | 132 + .../aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json | 105 - .../2b308fad-8494-4056-8b84-82733cd2710a.json | 132 + .../c7e0c75d-f0c1-4a44-b540-607e99c69e92.json | 105 - .../70f7842f-1111-4c6a-914d-35e48537d1fc.json | 105 - .../93c867d0-4f10-440c-838c-91d1633fe584.json | 132 + .../1a4a69c5-4acc-4ad9-adb2-bd9cf0fa2875.json | 132 + .../4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json | 105 - .../151226ba-9744-45bc-b923-30df57f7aa3e.json | 132 + .../219e3183-8d9c-4188-a550-72d7f20ff1ec.json | 105 - .../45e281e8-f28c-40a5-92e4-c16b627adb32.json | 105 - .../98363657-0793-4eb3-94de-28961afc92ea.json | 132 + .../1d1bf908-44fb-4b87-b52d-845a1cdafc08.json | 105 - .../a32b4ded-6bff-441e-afbd-736e6d8cce5c.json | 132 + .../326bcf4a-02e9-4218-8bf2-55a94a79435e.json | 132 + .../e9124a70-037d-41ed-becb-953382a3f43a.json | 105 - .../145facc2-ab11-4c68-b841-762e0ad9bd5a.json | 132 + .../7d7eefa4-193a-4158-a903-9a8484b36e9a.json | 105 - .../a2cad434-61a0-40be-8740-6c6a8e3cea25.json | 105 - .../d3e6aae6-9284-4309-8d8c-02c9e797a58b.json | 132 + .../6ee8537c-90e8-4455-83ca-c8c375a5ead7.json | 132 + .../9f4730ec-a162-455c-83ef-c8fa9ebd036c.json | 105 - .../1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json | 105 - .../6efbfb38-57e5-46c7-b765-f7d0356afb97.json | 132 + .../13b16b8d-533f-4323-a75a-e16df96b8351.json | 105 - .../f4d418d9-1089-452d-9c7f-4cc4712e6ac7.json | 132 + .../1c9b325b-92b3-499a-a3ea-026269c63c88.json | 132 + .../d0461daa-d106-44ce-9d9c-03a6fef37b45.json | 105 - .../414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json | 105 - .../c546ccde-cef3-4de2-a49f-24517d76dde5.json | 132 + .../ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json | 105 - .../e85d3ccf-f48d-4e5c-b893-771a107773d4.json | 132 + .../15dbba84-b177-4bcd-8874-0153152f0015.json | 105 - .../b8d22ade-874e-4ff3-9fcd-dbe14220d48b.json | 132 + .../39b77252-2729-429b-b220-3b19ca0b6a6c.json | 105 - .../97e8e7e2-74a4-42a5-a0b1-250e47d3c3e6.json | 132 + .../79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json | 105 - .../b2d56bb6-a726-4e47-8bc6-c016a51aac5c.json | 132 + .../3366f6d8-41bc-4c2c-a72c-bc0fd7dc8dd2.json | 132 + .../922fec6c-cfec-47cf-a374-5676635a5b40.json | 105 - .../5945660f-40e1-4c49-8f28-581f06b51e59.json | 105 - .../7ba52efb-3890-4691-8740-9f051f1f645e.json | 132 + .../0adfce8d-0070-4375-be96-a34466851101.json | 105 - .../7b192b49-057e-418a-b47d-44b0ec82a6b6.json | 132 + .../d28bdd9d-53bb-498f-84cb-7d482f41d005.json | 105 - .../f2120d53-bef6-44d6-84a6-a6f8e3537188.json | 132 + .../38e5b086-4a73-4ffa-9b32-eb80405fecb5.json | 105 - .../f5408aa9-85c8-46e5-b225-0480b2e18e97.json | 132 + .../c1918f55-286c-4b29-ac53-2ee8f9d36d9e.json | 132 + .../fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json | 105 - .../52659d37-67f8-45b8-88e4-11917dc90488.json | 132 + .../b9fadd79-8220-4023-b92a-c38b07a90e8f.json | 105 - .../556ae77c-effe-44ab-ac4a-1ad7cbd7c363.json | 132 + .../b351842a-aa2a-494a-8159-c732f071c7c6.json | 105 - .../048fc971-3baf-4740-a132-2f9476d01b7a.json | 132 + .../21947721-9f9a-4cc2-aa88-e1853f488167.json | 105 - .../abd28d25-01e0-474d-be35-08d816d281f5.json | 132 + .../cdc03c25-5bfb-4185-8e29-40e1af2ef253.json | 105 - .../17f49724-6553-4baa-b354-45ffd0f2c844.json | 132 + .../49d98c73-75d8-4629-8cc2-a03592b0f551.json | 105 - .../3e60d982-d7d5-432b-962e-b7734cc90534.json | 132 + .../edaf2deb-16a3-4109-84e0-e65498e09d1f.json | 105 - .../763eec85-4395-43b6-aa79-9ecb024eb7af.json | 105 - .../79a0fdf3-b432-4598-be62-f9eb57fa5a43.json | 132 + .../538f74e4-2587-43d7-a3fb-7826f3995ad9.json | 105 - .../662566e0-2af3-40d6-90de-9b361bcae355.json | 132 + .../2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json | 105 - .../d81c0035-a0b1-426c-9080-8ccbf745642b.json | 132 + .../100bc243-158c-4e5c-918b-1439bf26fee8.json | 132 + .../b385729e-27f8-4bf2-b2c6-674504fcd75b.json | 105 - .../45e32080-1464-40e0-a232-310fdda967eb.json | 132 + .../a9d24835-302c-445b-b1fd-89d41e3e7878.json | 105 - .../e89b279f-d548-4aa8-b5e5-0bffdd98b840.json | 132 + .../777a53f9-891c-4f9e-99a8-bb1988f61f19.json | 132 + .../b1b0d419-e025-488a-a367-6769edfdf8ff.json | 105 - .../64afccfe-af45-4c26-878a-eb01b56f3524.json | 105 - .../f15846b1-8eaa-411b-88f7-25064161af4e.json | 132 + .../1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json | 105 - .../e803fc85-fb98-4db8-aab0-a63100dcd5fc.json | 132 + .../50620749-5ecf-41eb-a131-611675560e07.json | 132 + .../86f45b60-19d1-41fa-8538-3d22ea28a98f.json | 105 - .../2d40a551-6440-4d71-87e4-639d486c1c5e.json | 132 + .../c2465654-27c4-4cad-94fa-3b0bff1fd242.json | 105 - .../22235942-2e3e-4ef4-b7a0-5800f507571a.json | 132 + .../c1bff8a8-6159-4fe6-a9bd-846846d0e633.json | 105 - .../76309e63-a135-45cf-9f06-b091215726d0.json | 105 - .../ac06867d-3a34-42f6-9e2e-226cf86748f6.json | 132 + .../394f1fc8-dc2c-4ff9-9ad0-7b3a8a8ddeb3.json | 132 + .../a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json | 105 - .../03e52d4f-78d7-453c-9685-844dd1636904.json | 132 + .../1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json | 105 - .../3ce136d5-be81-4b8c-a7dc-4e1346935d35.json | 132 + .../de113d87-7875-4f5c-89eb-48a59797b19b.json | 105 - .../cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json | 105 - .../fb35accf-0c5d-4f72-8d73-ba366a41a76d.json | 132 + .../75e5ca5d-cce1-4463-b398-553399ce6833.json | 132 + .../c426bae7-b98d-4343-b419-ac8206196a95.json | 132 + .../1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json | 105 - .../b17de9f2-6f94-49f6-b908-fa983e8f8f9b.json | 132 + .../58ba7ca1-8cca-4668-836b-824491d9cf01.json | 132 + .../23da100a-13b9-42a7-ba79-234be551d0e4.json | 132 + .../2d0c12b9-cff8-4366-a3ce-7772e4c098c9.json | 132 + .../4b87eea2-169c-411e-9d15-caf6b7826590.json | 132 + .../62a3cce2-4ff5-4dc9-beab-a06001fd82d9.json | 132 + .../0e5961e1-af27-4eee-8b9b-c82ee4ab61b1.json | 132 + .../e268be37-589d-41f2-af98-a85bb412eb44.json | 105 - .../4df16bb2-996f-473f-9096-a8a8e152ca9b.json | 105 - .../b62352d4-e3b0-4b4d-8d68-e2d973d820c1.json | 132 + .../7fadc486-767e-45ef-979d-74ecb858cb99.json | 132 + .../d59ad4b0-e58e-48d6-90eb-93398c46251a.json | 105 - .../a21cc55c-e9df-46ef-beed-b67a1750ddb7.json | 105 - .../d0628e6f-a6f3-42eb-b9fc-e880ae8c0688.json | 132 + .../0999a066-1151-4445-b130-00d8fe4a516e.json | 132 + .../0afcbde6-b822-4264-8733-bc255ea73314.json | 105 - .../1efc09d8-6a5c-4d48-b76e-2e04ef97b676.json | 132 + .../c9db5f06-9aac-4678-bfe0-65773ece4558.json | 105 - .../1a59412f-fe78-4ecf-8951-8f2996dd374f.json | 132 + .../6ae207e3-2596-4b28-b058-d47d07465192.json | 105 - .../b5403311-2069-488d-af98-27da14496c15.json | 132 + .../6c10c176-b2b6-4216-91c0-1444944612f7.json | 132 + .../80ebd92e-d9b6-46ce-b77e-973c3f3f6051.json | 132 + .../0418e36f-17ea-46a2-bfeb-91cc0ff719bf.json | 132 + .../4f5ba3fc-694a-45b1-ae9d-2c7d33e41519.json | 132 + .../75939d35-c0ca-4256-b667-fe6042ca5979.json | 105 - .../8b0d1556-bbd5-49e3-b881-32224bc1aa9a.json | 132 + .../524e634f-280c-4f3a-9f1f-bdda19fad740.json | 132 + .../79790560-846a-48fb-b37a-462162eb0e97.json | 105 - .../91b7917e-a908-4281-9a4d-a2c1e7558105.json | 105 - .../cb82e92b-f207-4fbd-9bfe-43184769cdbd.json | 132 + .../0b674103-4e55-41f4-accb-b7be73671801.json | 132 + .../9270e697-84b1-46c5-afcc-481065f2be8f.json | 105 - .../83990950-a34c-463f-9a1a-d9371910da6f.json | 105 - .../fa0290e0-723f-4502-90b6-c77007fffc1f.json | 132 + .../c3827ecd-d02a-4464-a098-110f4fb54516.json | 132 + .../af9700fe-20c0-4b7c-9f3a-c4d78fab7911.json | 132 + .../959a4e4d-211c-4e45-94f1-f8f877e0b36f.json | 132 + .../96a8b3c0-d6bc-41fe-8967-0d798669aa8e.json | 132 + .../b367fb18-f302-41ec-a5f9-7d47766ca6f3.json | 105 - .../ed5d2ca8-d551-493d-8877-348204ef91cc.json | 132 + .../04e20a14-8346-4801-8515-189861c857cb.json | 132 + .../a99828d9-a521-4b46-bd81-e791fae7bcf8.json | 105 - .../eec2da56-ba0a-418f-afe1-8a46882b9839.json | 132 + .../321cf68b-9220-4ada-89da-061341a20a9d.json | 132 + .../86fda025-2345-4a40-9094-223b96b21f13.json | 132 + .../bdb69cfa-cce7-4813-babb-b6f987be90de.json | 105 - .../3c734233-9868-4ba6-83c0-2b63f2ce8980.json | 132 + .../627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json | 105 - .../7f5eca48-0ab9-4ef2-85c2-a7f1fe713afe.json | 132 + .../f5e0e809-08b8-43dd-a44d-875f365610c3.json | 132 + .../8d267135-a7e6-4ec5-ae09-66478804bb66.json | 132 + .../b8b22223-7ef6-4fec-9928-68de2ce516e6.json | 105 - .../4940ed0e-2c1e-4408-9806-49ceed30a69e.json | 132 + .../5f6f7b7c-ef6a-4468-aae5-d7dfc25c5659.json | 132 + .../7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json | 105 - .../ba8d6727-fe89-4bab-95a2-5f70d77034dc.json | 105 - .../1ad587be-8544-4c37-bb8c-e21ad685039c.json | 105 - .../5244ee3c-7d65-434a-acfe-cdb277ff5264.json | 132 + .../5f40e687-560e-4846-bbc1-4c2300680d4b.json | 105 - .../eba4644f-d455-4a23-a16f-8ecb038ffe7f.json | 132 + .../839b6ee8-2f25-4b53-abec-a0a9dd198f04.json | 105 - .../fb270319-7010-4946-b60c-409aebe41aaa.json | 132 + .../6f29d957-8b65-4ee7-96dd-da2477023403.json | 105 - .../d57bd77a-11cc-497c-b0bb-31c1ffa63dc2.json | 132 + .../0220984e-fe8c-4e72-bc3e-92b949ffe769.json | 132 + .../c39007d8-b4b8-485a-88af-39d18a6007c3.json | 105 - .../16482634-ec03-463a-9deb-2230ee955800.json | 132 + .../506bb9ca-e322-4ee3-b2d6-96e334a99473.json | 105 - .../4c1db32d-96fc-4a66-b083-530a3e75ad6d.json | 132 + .../e351aba3-7a05-400b-abbf-d09c1fe333e3.json | 105 - .../6a0cc28d-d7bc-454d-ab7c-93c823256f30.json | 105 - .../c0c5c846-395a-47ac-9e8e-e598939f317d.json | 132 + .../6b3f6b59-a8eb-48c2-acbc-92e8f34b2dd6.json | 132 + .../d017e3bf-2abe-4b84-810e-e0eaf973adc3.json | 132 + .../62a3ecb8-f6d1-429c-807f-5545b2a5897f.json | 132 + .../748557ce-1a49-4b3a-9c38-9007dc04aafb.json | 132 + .../95d43d01-a75e-4af4-a2cc-b60f832071d3.json | 132 + .../ee2ab45a-4a93-4942-8510-aef93b39b7e3.json | 105 - .../4dc7c889-7839-4047-b48c-33be5b688e72.json | 132 + .../644cdea0-49f2-43b9-b94d-55d31c0e0d54.json | 105 - .../751851c8-9a7f-4135-a106-eab4efbd0734.json | 132 + .../2930e30c-9f2e-4248-ae3b-ed7ffbd12f8c.json | 132 + .../e2422bfe-8569-4181-8ec1-955086bbb8bb.json | 105 - .../98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json | 105 - .../c1acc460-aeb8-4a99-8ca5-376ab60fb74a.json | 132 + .../33b8b64f-7da5-45aa-bf80-7145ef704229.json | 132 + .../2662d257-49e2-430d-b44f-b0b347c61271.json | 132 + .../3e1fd9a0-a037-4278-baaa-b444d3723557.json | 105 - .../870b639b-ee7a-4b13-872b-52657539c836.json | 132 + .../6ff20678-a335-4fa8-8126-9f96ce247f34.json | 132 + .../9c141030-9c3f-4e80-8b97-9297f3d81df6.json | 105 - .../19c4ea89-896a-4577-a386-c2470eaf743f.json | 132 + .../d09af70f-bb55-40e8-88f2-a78f20c90b8e.json | 105 - .../099d3be6-bd40-416f-90a1-582f66049c54.json | 105 - .../22eb2479-16ff-4a56-b9e4-e8835da7ca0e.json | 132 + .../03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json | 105 - .../aca3f1fd-9c46-47f6-81c6-dc56a702c1de.json | 132 + .../071ca686-5950-4af4-80f2-969b1008e370.json | 132 + .../9835468b-c049-4562-8633-864d29c7bb75.json | 105 - .../78977c34-33f8-4037-86e0-dfce1d01c3f8.json | 132 + .../b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json | 105 - .../480e4294-c8d9-4088-9b8c-7a239d57f683.json | 132 + .../5b06f64a-5c31-457e-a414-00e35888a6b2.json | 105 - .../83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json | 105 - .../be9b21e8-90ce-451a-bcaf-2ebc7c72bc34.json | 132 + .../512a09c1-6c1c-4120-a659-91809607393a.json | 105 - .../b0054dd8-e62c-4d0c-9b18-090851c3a7e2.json | 132 + .../985e479b-658a-4548-9b5e-c9c04b8838c1.json | 132 + .../f92ef151-aa21-4240-8de6-1ff04bec55d9.json | 105 - .../88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json | 105 - .../d0ef8af4-156d-456d-9e33-b2cdb3f8c04e.json | 132 + .../5050c787-2f95-4a17-a4b0-c094860627b5.json | 132 + .../ec3846e6-d111-4c77-93fb-8d1d8106271a.json | 105 - .../bb5c8274-4324-47f2-94c5-d0c831ce0de7.json | 132 + .../ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json | 105 - .../8113a26a-5941-4f3d-872a-bdde5456ad97.json | 132 + .../5b60047b-2e85-4a47-a31f-4c07f4bd2c30.json | 132 + .../de86ca37-ffcb-41df-a0d1-68cb545ec1de.json | 105 - .../88d79858-3a35-43eb-8da6-95b80b5deef6.json | 132 + .../63266a49-01ea-40f1-83ef-778f391aff2b.json | 132 + .../f0da069a-833f-489a-a923-c79542a3a9a6.json | 132 + .../205b9da8-d561-41ec-946e-1d2f9a43e437.json | 132 + .../2ea4da56-4b95-4222-a4e2-f57c73e0ee4e.json | 132 + .../7e03e547-5324-4c5d-b364-413014fad7eb.json | 105 - .../c086f693-cef1-4212-9c17-669b210f4caa.json | 132 + .../290995f2-9982-4f29-ac74-dc646905206c.json | 132 + .../c60e65e6-d771-4c53-80d0-c1e09aa39377.json | 132 + .../fcff202d-3b4f-4ba9-b3f6-1122d8abcac1.json | 132 + .../5f0fa37a-e829-402b-b2ab-c68ffa248b6e.json | 132 + .../0115907a-a473-4f12-8f0b-5dafd729fc44.json | 105 - .../a0b4a345-3530-4da2-8403-87259bbd1405.json | 132 + .../3548f0ea-f3ab-4a0e-9c77-5ae62014ed44.json | 132 + .../63b6d34d-1a59-40b6-b663-1d81544867f2.json | 105 - .../707270e3-334b-4eba-84c0-2795ae53d79a.json | 132 + .../c827bee3-a181-42bc-9387-ca132d59c8ba.json | 132 + .../ef63850d-6acf-4d04-ac01-7ac407bf3b89.json | 105 - .../9c2ab331-44f5-4306-a57c-5ddb0154ba63.json | 105 - .../d3e8949b-f6f8-459f-891b-f4900ff806cd.json | 132 + .../35d5f5e3-74eb-4eea-9f78-b7b8969830a2.json | 132 + .../80ff60c0-820c-425d-8b32-44fc61128c9f.json | 105 - .../4cf4479a-622a-4bc2-86f2-aa526216f24c.json | 132 + .../d3b94b8e-8612-4928-bdba-81226af143b2.json | 105 - .../6ed27890-3e61-4c7d-8c94-a78c0b34ba32.json | 132 + .../87b5e360-7867-4edd-b45e-e7bb92a91b69.json | 132 + .../d93116b8-28ff-41ea-8273-56f7ae11cf18.json | 132 + .../ba8c2c17-64f6-4cdb-b3b9-8977ce1bdbe2.json | 132 + .../5e5602cc-b4de-4247-aa6d-940817fc849b.json | 132 + .../ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json | 105 - .../cc5f27f5-36d8-49bb-9c9d-7879598bfe71.json | 132 + .../a641d61c-aa42-4bce-afc0-ba7639f0a24e.json | 105 - .../aec03bd9-808a-4c3f-bbde-40bcac5775fb.json | 132 + .../b4ae6f0b-8a6b-4c60-8eb2-3e202877bcf5.json | 132 + .../5d5a7561-8a41-48ea-ae1c-e986ac666f19.json | 105 - .../c68deb4d-73a8-40ab-b4e5-1773b7ec4ed8.json | 132 + .../a93c5674-599b-429c-a322-3c6bc7248f45.json | 132 + .../f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json | 105 - .../5e6374a6-56bd-4bd9-b04b-30ec9cf234bc.json | 132 + .../11d9d5ea-29f2-412e-af48-858626ebeec5.json | 105 - .../c3d2fc86-a5c4-4e92-bcf9-26096ca32ad4.json | 132 + .../1b49cb06-3ee1-4945-aaed-12c868d9e45e.json | 132 + .../32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json | 105 - .../65853bb5-ff3e-4880-8c32-ce9aabcadd7b.json | 132 + .../65c5a05d-0b24-4767-88ff-24984fa0f988.json | 105 - .../7fecc176-debf-4bf7-b3f3-479d05678a1e.json | 132 + .../fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json | 105 - .../3c965626-a264-40db-93e1-cd7659d0662e.json | 132 + .../f4f2289c-5b3c-4040-9e34-ac20352f45d7.json | 105 - .../0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json | 105 - .../50fa6f0c-d689-4380-b619-253209b5badc.json | 132 + .../44324409-5cb3-438a-9751-9ee868b35233.json | 105 - .../adb25c88-6113-4307-bbf0-d377f757bc18.json | 132 + .../b9ac5e03-c878-4e46-a89c-1906f3b91dce.json | 132 + .../d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json | 105 - .../d6a6badf-4472-44b5-af9e-4282e4406a8e.json | 132 + .../fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json | 105 - .../157d1e12-ced4-4b48-a651-5671a2b85ee6.json | 105 - .../92e62d3a-3091-4538-b6da-ba705e11687a.json | 132 + .../04f5fdc6-f1cd-4b2d-947a-86fee67b3b62.json | 132 + .../5450695c-a1fd-431f-9201-19d858e48867.json | 105 - .../5013ccfc-6bc5-4862-898c-1ca781f92572.json | 132 + .../d780dd37-3e71-400a-93be-f9512ad77d3e.json | 105 - .../38fff98c-72b1-453c-a2cf-cf077dd19d10.json | 132 + .../4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json | 105 - .../42911928-ef64-474b-828a-02ce3383773e.json | 132 + .../7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json | 105 - .../7989d7d3-c5e9-43c6-80a1-6de51533f9bf.json | 132 + .../93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json | 105 - .../3b102085-a3f6-4da6-abdf-f906f0b37f3c.json | 105 - .../5b9acd52-7eb6-4099-98be-ecd6cae07835.json | 132 + .../666bef5a-2d62-4743-bff1-07365716ab19.json | 132 + .../c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json | 105 - .../85de411c-2308-4824-bd6e-3327eeb6fe3e.json | 132 + .../d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json | 105 - .../c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json | 105 - .../df28c4c2-d6a4-4ab0-a1ac-faf00a93de99.json | 132 + .../6fb37ad0-b41b-4ad7-91a2-79bbb835d445.json | 132 + .../75cbe3a2-cbfa-482b-8c35-b74caf046df8.json | 105 - .../062fa044-0fd4-49ea-988d-f477c7930496.json | 105 - .../c41df02e-5aff-4de6-a1c4-d45b5585e29d.json | 132 + .../aa587b4a-9c19-4231-ba72-9b66446460f9.json | 132 + .../af001f63-a060-49ec-9bd3-f06b2ad96dc8.json | 105 - .../556e1124-135e-473f-9e62-852f095b3118.json | 105 - .../be14e75e-4fb1-41aa-b168-1ec23eb305e0.json | 132 + .../73be4a2b-28c9-4208-8107-3734fea25008.json | 132 + .../982d6727-aa6c-41fe-abe7-47811ad3c9da.json | 105 - .../0bf2fa4e-3bcb-46ff-a068-f4c796123c6d.json | 132 + .../7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json | 105 - .../1ce9e40f-5613-4d95-b451-a34f3feb961e.json | 105 - .../9f8fc05a-8658-4ed3-994a-965e6882d242.json | 132 + .../bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json | 105 - .../ced11f6e-490d-42e9-8f3e-00e22cfc2910.json | 132 + .../4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json | 105 - .../70ba788b-fe8c-4667-a859-0fb122de22b9.json | 132 + .../52f63809-1390-4a66-8ae2-8f150425d2d9.json | 105 - .../e93f2d5f-7ffc-44b8-b2dc-d07b73de44ab.json | 132 + .../15cacfe0-bdfb-4b87-a813-bfa70ff71984.json | 132 + .../6b7b5025-01c0-470b-8856-b628b11f4e6c.json | 105 - .../b85e5d55-dbdd-4383-ac86-75c83648c522.json | 105 - .../cff00e2a-41e3-40d2-aab3-4bb3bd7d0d0e.json | 132 + .../dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json | 105 - .../e1eab0cf-2c6d-44b2-8aaf-a75347741529.json | 132 + .../3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json | 105 - .../ed221db8-cf81-4257-8785-db9381eec5b7.json | 132 + .../41e2bd81-2369-416a-9287-021872efd931.json | 105 - .../b314468b-401a-4318-b022-c966bf3366aa.json | 132 + .../a0dbb2eb-66c7-48a3-a85c-725b49141edf.json | 132 + .../bfaec047-518f-42a0-93a1-c6bda3589c26.json | 105 - .../5f79d177-3ca8-4c95-83bb-2abb0e803e72.json | 105 - .../812a36ec-4928-40a9-9aa8-ee39d7bb02f5.json | 132 + .../77af2424-0a23-49f3-97b0-316d04a33547.json | 132 + .../6f422676-2d7e-40ed-a5e3-4afc25564cfc.json | 132 + .../26ca0085-db25-4664-823a-f56e08081dc4.json | 105 - .../43923dd6-838a-4259-a938-7766dfd9c07e.json | 132 + .../dba94a49-02b0-4e92-bd6c-c6bfc9be3cfb.json | 132 + .../fdc9ea4d-acf8-4f2c-b727-482f464eb925.json | 105 - .../16a782dc-0795-4281-aad6-4f664a0940ab.json | 132 + .../e39160a3-8332-467d-900f-52bb7d1446c1.json | 105 - .../1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json | 105 - .../5d24d4ad-9f37-4634-ba23-74fbc74fd298.json | 132 + .../043cd315-fcb7-4871-ae79-dee3fdefaef0.json | 132 + .../eb76e049-3a5d-4786-9724-800b719a6113.json | 105 - .../3c377d7e-14bc-4c82-9ada-7560552abbe4.json | 132 + .../99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json | 105 - .../43bb650b-8bb7-41b4-866a-cb2dad1499d6.json | 132 + .../bdf8f907-37ca-41ca-9a4e-f4dd446f895f.json | 132 + .../14a1872c-7afd-4cd4-ad87-853e4fc0847e.json | 132 + .../887e4ca9-ed48-4b33-b933-f8534a8d0377.json | 132 + .../68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json | 105 - .../c585488d-4043-482f-b1fa-4a61e96f7f0f.json | 132 + .../aa363693-a300-4545-b7f3-05492646c202.json | 105 - .../d64541f6-19ef-4f04-a991-93efec6fe24f.json | 132 + .../1b9a4b84-1766-49ca-bd11-17a2340b9736.json | 105 - .../1c13e194-8bee-4456-a249-f71e7e34b0eb.json | 132 + .../1d3db737-20e7-4da1-a311-e60de0b41c93.json | 132 + .../235adbd2-8128-4428-af57-8d8e310ba56f.json | 105 - .../7b73d50e-358b-4961-8b58-63765ce5a82a.json | 132 + .../a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json | 105 - .../81dfd69c-cf01-4114-8157-fd09af6f490c.json | 132 + .../baae7cee-8b76-456f-96dc-5ac900a9a36e.json | 105 - .../9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json | 105 - .../f38240ab-35e4-431e-b4d5-b1b0e1d57c5f.json | 132 + .../01863b4f-9550-49c3-ad83-74c0bb535eb9.json | 132 + .../af440c67-78de-4053-98d8-8cded9657860.json | 105 - .../9c443687-99df-4cd9-8e19-d40cd83b30bc.json | 105 - .../edd25437-38bc-443c-9da3-bc041270447e.json | 132 + .../31836d43-5022-488f-ba9e-379195809069.json | 132 + .../b6bf7c36-006c-4256-a315-1de70e2540c3.json | 105 - .../2a5a3ed6-7137-49e2-a141-497ceba88757.json | 132 + .../89947a58-5e39-468e-bbbc-2f3556a1c8f1.json | 105 - .../0b1c6aa6-b94e-4400-9b0d-c39aa1bcd808.json | 132 + .../69423132-adc9-4b97-b799-15f37de1d7e5.json | 132 + .../7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json | 105 - .../90d4e4e1-2185-4d21-8730-f1a4bf413157.json | 105 - .../54d5bf0f-7c4c-40b1-bca6-5484ef8e2a04.json | 132 + .../aa158f5d-94a5-4f40-8a65-87fe9605abc1.json | 105 - .../cfe8f9c7-e9bf-4a17-afa0-d5b8f46d24e7.json | 132 + .../d13def83-5ff8-4cde-aef5-b3c268c40c16.json | 105 - .../6d7f26d7-2336-4def-9d17-09d30a89e02d.json | 105 - .../7fbc0323-1c78-46b6-a08a-6e5870c64e53.json | 132 + .../1c769f0d-b99d-4b82-a529-f5264f7b3349.json | 132 + .../de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json | 105 - .../a9365685-e299-48e2-931a-c63e123a9e00.json | 132 + .../bdf2d61a-daa1-4b1f-9245-43ff263540fb.json | 132 + .../f0b4eef9-dab2-48e2-87f8-ad83ec33ec23.json | 132 + .../f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json | 105 - .../29e10491-8c34-4b7a-a0bd-77f6ca0dc54c.json | 132 + .../a2445d2d-b8a2-44e4-9c74-7401e7afde75.json | 105 - .../9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json | 105 - .../c588d86a-80c4-46d1-93e0-b7fa8491f3b3.json | 132 + .../0b11eb9a-61c8-4af1-8335-24bef2597e5d.json | 132 + .../f5f73aa0-2223-49c0-a2ad-df38ee33355b.json | 105 - .../5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json | 105 - .../7d31e5fd-700a-42a8-bea8-8989e8c52603.json | 132 + .../9ddf874c-16a9-4f66-a3c5-140f10bc4787.json | 105 - .../f993880a-3c7c-4af9-a3ce-3c27207b9a3c.json | 132 + .../2fae7e4a-8c28-4be8-9391-ca79077e32c2.json | 132 + .../436e651e-6f04-44ff-ab3d-db8ed0d639bd.json | 132 + .../9fbccac2-c840-494e-a24d-a6f0c9a07b88.json | 132 + .../a4ee6a33-df51-4a4e-a13d-45488a094fd7.json | 132 + .../a3923f10-e64c-4556-9616-4fe7072eff60.json | 132 + .../ca15d972-9075-42df-884b-5d069f6ff425.json | 132 + .../37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json | 105 - .../905909a5-abef-46bf-9392-c97873e229df.json | 132 + .../3bc34460-661d-404b-bb1c-5b2fe395b897.json | 105 - .../95bd05cf-8f59-409d-a99e-d249bad6c561.json | 132 + .../76b12246-33f6-4992-a0ab-38704dcf6345.json | 132 + .../cf208ef7-8a9b-4633-8161-dae0825c380e.json | 105 - .../e4415806-0ec0-465a-b28f-9c8741436fb4.json | 132 + .../98e62ab5-d35a-42dd-904b-bed9c50f3745.json | 132 + .../21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json | 105 - .../8fb3596e-224e-492b-bdb6-a95a16656eb0.json | 132 + .../154203c4-d86e-4c36-806b-c45c5cc568ce.json | 132 + .../e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json | 105 - .../94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json | 105 - .../e42c01f7-2869-4103-bbfd-81aa5a15c140.json | 132 + .../323d2f94-5e04-4627-9f74-129217f53eea.json | 132 + .../f709afd7-3220-41b0-909a-74d9086c7dd9.json | 105 - .../6bcc284b-8973-47d5-b5b1-1abb7a3242ee.json | 132 + .../691cace3-5316-4f5b-8693-67efb24a0a06.json | 132 + .../d387b3dc-9e76-44a6-9a9f-132a4fd762b4.json | 132 + .../f6f515d3-f5e9-4362-be51-bb8fc05527e6.json | 132 + .../2e1e215f-b622-439f-a13f-531441e25ae3.json | 132 + .../d50d66a9-a0c4-4b82-922c-9d012f1b50a1.json | 132 + .../2029aa96-40b2-4af8-a7fa-8ae968b20502.json | 105 - .../ea7292a8-3f07-47be-b8ae-7d352ed1ecb6.json | 132 + .../3b9d5166-4144-4222-a39d-3d1d3956a6e8.json | 105 - .../4eedd6d4-279f-4660-8d71-708a27bb53e0.json | 132 + .../9c0f67d1-f95d-4ca0-a234-2e09ac788f55.json | 132 + .../8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json | 105 - .../e5c0fbc9-f424-4b04-839a-8335adaf89cc.json | 132 + .../690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json | 105 - .../d91107fa-eb8d-4d01-90a2-fc9831f337b2.json | 132 + .../926999bf-1ba6-4321-82b2-fcced4336739.json | 132 + .../e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json | 105 - .../57d481bf-0db9-4208-afda-dcd20df13964.json | 132 + .../adff7af4-9bae-420a-9751-9f68ab81bf99.json | 105 - .../8cd60e42-3429-4938-b43e-9c951a57ca9f.json | 105 - .../eb417e47-fe63-4dc5-b3e5-28782f3782da.json | 132 + .../b0f516dd-7185-4906-87a5-3c6f019894d0.json | 132 + .../ec13c105-c846-4420-91af-d42e98b7a818.json | 105 - .../1e562944-a205-4ef7-aff1-3776595d131c.json | 132 + .../236f7bdd-be50-4287-82b7-6efddc9dd3f4.json | 105 - .../09b81183-8ff2-44d5-a515-63cddc3e55c6.json | 105 - .../6ccaf08d-1b0a-4ca9-941e-a71e2dce5cb4.json | 132 + .../2064938d-9f05-4740-a4d4-2a2da0eac21d.json | 132 + .../db57503c-bfe7-4691-983e-68af941e8b1e.json | 105 - .../43240184-8245-43ff-a971-678523918fe0.json | 132 + .../9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json | 105 - .../b3b854b6-700c-4297-b335-6acc3c385f84.json | 132 + .../df60b16b-184c-43d9-ac79-8627f09d265b.json | 105 - .../1761caca-524f-4d59-81dd-631e3e24e0e5.json | 105 - .../a9d79c6a-f99a-4b60-8e37-ee2cdfe75f30.json | 132 + .../06409b6c-9d26-4bee-af75-16e6edb87a93.json | 105 - .../09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json | 105 - .../88e1dd78-d3bc-401b-88e9-d963bac181db.json | 132 + .../a41bd607-f319-4063-a6e4-813f43e40568.json | 132 + .../7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json | 105 - .../8629aef1-c673-4b17-a9cc-b361a53bdaa7.json | 132 + .../532c927a-dc0c-4e65-8ab0-7b9ddd889d89.json | 132 + .../e69e4e90-8177-44f5-8497-0a45ca9155ea.json | 105 - .../843f9927-9865-4066-9cc0-f0522d3b914f.json | 132 + .../de0dbc50-5d26-4005-967c-3dcbde3a1282.json | 105 - .../df720663-5e82-4de7-9a19-88287bb5f56a.json | 105 - .../eeecb2cb-e286-443f-84aa-d825702a4ad8.json | 132 + .../36ab4f5a-b2cf-4d01-8283-9eaf2c90928f.json | 132 + .../c4e810f1-ffb3-4ece-b445-64e339761530.json | 132 + .../025725b6-0034-48c0-a720-5fc210e5e24b.json | 132 + .../7bdd8928-c336-494e-9c87-de9ecc2749b8.json | 132 + .../ff7369dc-3ff2-424b-80b0-e06a141b54f3.json | 132 + .../23b6bf8e-c79a-4620-9e15-2742f45130af.json | 105 - .../a6dc7253-75fd-4897-be85-8ac89fc11f8e.json | 132 + .../296ceacc-542a-4000-bf9b-ae59b33a53ce.json | 132 + .../f8842523-53de-4197-9cf4-979780cbe127.json | 105 - .../0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json | 105 - .../13870577-7579-48b4-9c92-202318ca6ecc.json | 132 + .../617dbd41-3ca3-46d8-8fd2-491d6be39554.json | 105 - .../6ebd2806-2623-4773-93bd-1036ff01cb8c.json | 132 + .../99d6a44b-d556-4674-8ade-a5b30cf99255.json | 132 + .../605118a3-316a-46b5-9719-f596e361a2a8.json | 132 + .../c645a252-366a-4890-a16b-bf687bfbb593.json | 105 - .../271d2829-fbd4-438e-9f09-59539af68c8b.json | 132 + .../294c1745-38cb-4b1e-aae6-e2878ab9065a.json | 105 - .../107bc549-75c1-4272-b567-f8ab9f6cd675.json | 132 + .../dfb451e9-c1c1-45a1-8082-155763366129.json | 132 + .../e8bdfeef-9795-4b00-adec-6ac41c6718f7.json | 105 - .../f28b57ba-103a-41bb-93b0-7b25fd155351.json | 105 - .../817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json | 105 - .../b2d80977-d079-42ec-b057-5aac530b9d70.json | 132 + .../16b33b80-3b4b-4edb-b89f-3d93dca8969c.json | 132 + .../2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json | 105 - .../3565fba3-e63d-49f8-9e8f-deef83531eb9.json | 105 - .../63c94e0a-4572-4b8a-bfe0-7f88bb847d7f.json | 132 + .../538f2b43-328c-456d-8a40-ff2b37924453.json | 132 + .../fb7a68e6-716e-48c6-96c0-d227735f9a7c.json | 132 + .../0b9358f8-1e27-448f-9932-1f2c6feac036.json | 105 - .../3593d4b8-5602-4cca-935f-a76e342f060a.json | 132 + .../72d503fc-b221-498e-811a-a806769175d6.json | 132 + .../d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json | 105 - .../ad7d9698-d9e6-4f2d-9767-987835626c8c.json | 132 + .../f611991b-11c1-4232-bc63-8cf2942605ae.json | 105 - .../27d9d5c2-39d8-45e5-9614-a343144f05d8.json | 105 - .../98899942-fcf0-41de-8587-44d7429bea47.json | 132 + .../060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json | 105 - .../bb51eb59-88f6-49c2-814a-11b2c80313d0.json | 132 + .../d8563f36-e299-4186-a5dc-9dae51824e1f.json | 132 + .../f7455f30-e04e-4bc6-9d71-e33272d4577c.json | 105 - .../420cf07c-f043-49db-a62d-91e0c21aff2f.json | 105 - .../43bc0528-7bc5-4eac-8848-c9995079450f.json | 132 + .../7da8cc7e-791f-420d-9004-b29ddf54e381.json | 105 - .../ce19893b-a7e1-4f8e-96f2-eb9cee2afeac.json | 132 + .../24629e14-d197-4a5b-adff-7840af652f22.json | 132 + .../42960491-549f-42bb-9669-5231ca0c436b.json | 105 - .../9c3ea35c-2cf7-4c31-8b83-c69df3cd9448.json | 132 + .../46548403-6eb5-4f7a-874c-1327420f4cab.json | 132 + .../0bd9c061-b7ee-4bc2-9deb-ea7eea012c49.json | 132 + .../aa2fe858-111c-45e8-b0d4-0048d7fc7ef7.json | 132 + .../46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json | 105 - .../ad03cae6-b126-4157-a225-9576e4d651d0.json | 132 + .../0d57b65d-3dd4-4185-b8cf-531105e94b5e.json | 132 + .../1ff4251b-d01a-4ced-8868-776210e1ecb6.json | 105 - .../c3c5cb61-3c4f-4796-9d3c-493618db0f91.json | 105 - .../f8882044-6e71-4788-b2ee-f51f85e67ecc.json | 132 + .../3c8f96c5-af91-4f41-a0b4-6e1b7d55d8ad.json | 132 + .../48e6f9aa-5034-4653-8832-b0a16bf01079.json | 105 - .../00efca13-0d04-4700-a90f-bd621a971555.json | 105 - .../e26743b9-4caf-46f8-bd5a-7e4445c850b1.json | 132 + .../f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json | 105 - .../febd4016-3a30-4b26-93e5-f7b556781b9b.json | 132 + .../206c756e-1edc-491f-9f86-7e00c7ab7085.json | 105 - .../ae82125e-94ac-48ca-8240-807e4b7ef9a0.json | 132 + .../5321fa0b-b010-4e1d-9f20-a97b56f4f937.json | 132 + .../7d591ed9-5802-43a3-bb38-ec45b69adb08.json | 105 - .../d25a4602-ea50-4a53-952c-112ba250123b.json | 132 + .../fde79985-6832-4315-8650-fdcf9ad68087.json | 105 - .../232e3fc4-5cd2-4515-9e15-acd7d56bc34d.json | 132 + .../aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json | 105 - .../975f54fe-a581-4ce1-b0c1-7becb7605f09.json | 132 + .../b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json | 105 - .../3eac4497-66af-4fc6-bf89-459631e4a418.json | 105 - .../92ae4461-48bc-47fe-a3ad-ea4c3452d395.json | 132 + .../638e1cc0-9baf-4555-a278-4b21c46af86f.json | 132 + .../9d58433f-a74c-4345-bd47-a8f2c4e2361e.json | 105 - .../cef4161a-4e1c-4a92-bca8-b07f957a13b1.json | 132 + .../e8109e5c-6276-4935-bfa0-fc969f118d3b.json | 105 - .../715b556b-2bc0-4864-b4b1-b7413a5d45bc.json | 132 + .../9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json | 105 - .../5e307ea5-70da-476a-8d9e-1d488385565f.json | 105 - .../7552ad5c-5d1f-478b-a931-036083b2954e.json | 132 + .../343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json | 105 - .../7bb3ae9f-9bb3-4bf2-9d97-d7f4f30697ac.json | 132 + .../821d67e5-da8d-4383-8825-3bfa72a91fc9.json | 132 + .../bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json | 105 - .../902849f8-dc58-4e01-ba30-ff95412272d3.json | 105 - .../c5bddcba-4a40-4fbb-93e8-aebd06a70a66.json | 132 + .../4c5cace1-70ce-48f3-aad1-d141924c24de.json | 105 - .../dc35237c-606d-4609-927a-566bea767312.json | 132 + .../3924d1af-e167-4186-a34b-d9b4b8c26d59.json | 132 + .../e42051f2-90f2-4fbe-a4bd-623482abf10f.json | 105 - .../e70423b6-5a7d-4745-b5a3-968f363a3b7a.json | 105 - .../f733c4cc-90fc-4b31-bed3-c57dba6d4b6a.json | 132 + .../08f933a0-b096-4271-890e-0df7e20d1d20.json | 132 + .../2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json | 105 - .../8434e448-ed77-45f2-9c31-39128912f842.json | 132 + .../dfa1b391-4b18-4ac0-a397-a983070647a7.json | 105 - .../96d31674-0011-4621-9131-31b5f6ede223.json | 105 - .../d801037b-1eb0-4058-9096-429e5237e015.json | 132 + .../d8663966-a5f5-40e6-a327-1255f7c3395f.json | 105 - .../e0c46f18-598e-402f-8955-68e71fab67cd.json | 132 + .../4b987cb5-cf7c-4866-8cf0-9926f78c2de9.json | 132 + .../a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json | 105 - .../57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json | 105 - .../ec658058-1075-4918-9dc9-fc79d0dcf897.json | 132 + .../93597efa-6da8-4074-8049-6ec66f499cbf.json | 105 - .../b68baa86-3e1a-4888-98ba-2ecede79b4a7.json | 132 + .../00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json | 105 - .../0b11c8ab-2cfa-425d-9d81-d999f94401db.json | 132 + .../13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json | 105 - .../a3e48db8-3679-4f19-853d-82a73ef49400.json | 132 + .../14a173b6-4d56-4d22-a888-57ea46d72e67.json | 105 - .../7dbf35b2-80c1-4181-80f9-850ea51cead2.json | 132 + .../231f47db-1662-4313-9ff4-f32883f5615c.json | 132 + .../f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json | 105 - .../c79df898-14c6-4f00-9f65-0d01cd34ed61.json | 132 + .../de200bef-71a2-4efb-bc34-02f69385b636.json | 105 - .../2c52917f-c396-410d-bc78-c93c433797fc.json | 132 + .../7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json | 105 - .../0f1d2925-4e1c-495b-94be-f3515fbd53d7.json | 132 + .../82d38084-32b1-4224-810c-b66dd337b3fe.json | 105 - .../5cbb1972-9895-4689-9f6f-7e0037829a78.json | 132 + .../972e0d76-63bb-431b-9d9b-68dd6b738447.json | 105 - .../6bc42e37-1f31-47cb-97e4-9d0b28b53691.json | 132 + .../7337bc31-54b6-43b9-bb26-63f2273ffc7e.json | 105 - .../a1573b95-59e6-4ae0-bc12-6ef6fee90b76.json | 132 + .../c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json | 105 - .../78c61b39-3c76-4af9-8d5e-fcd67d6c8779.json | 132 + .../972d45c5-acd1-4e54-8310-9ff56c5fb061.json | 105 - .../2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json | 105 - .../e4c06400-da86-4448-b421-23476f50bdb3.json | 132 + .../15b28d99-e02a-4021-899b-adef87dfe96a.json | 105 - .../48f4c2a7-e819-4789-92ea-e02c5e92d3e4.json | 132 + .../b643171e-adaa-4f6e-8860-542950810578.json | 105 - .../cd9cbbac-f1ca-4193-88cc-e5968cc1bb62.json | 132 + .../a26204c0-90c5-44fd-8814-d69c6e4f4585.json | 105 - .../ab3685ab-1795-4a0e-8ee4-4f509616d1b8.json | 132 + .../9018f443-a63f-4e07-b10b-272f66d1eb0d.json | 132 + .../bc45fc30-c472-471a-b0c8-f68b9397d844.json | 105 - .../548d1536-b941-43a9-a60b-ae5448b70933.json | 132 + .../dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json | 105 - .../99853109-17d9-46fa-a502-e4c977c1fb8f.json | 132 + .../a6385d82-407e-44b2-9148-9cbf8f353557.json | 105 - .../17fb5411-3dc6-44b7-971b-8a080ed93de0.json | 105 - .../e171a0a0-f46d-404f-84e8-539155284e17.json | 132 + .../670b89a5-2a83-480e-a33b-6903609a10dc.json | 105 - .../eadd93e5-5770-4d4a-a1b2-6e732a82ce34.json | 132 + .../151cb8c4-0a7d-4886-80ea-560902e1f932.json | 132 + .../e660922f-847b-4993-91a4-b96809ff1e85.json | 105 - .../1acb97c4-a9d2-4ec8-9486-77eb6857646c.json | 132 + .../41d18fa1-d19e-47cf-8fec-b04725ff097f.json | 105 - .../122a997d-f452-4511-96f3-f31ecb5d8d7b.json | 105 - .../1d803ac5-3ca6-4cb0-bcd1-779eaea1562d.json | 132 + .../81562e50-23c5-4ef1-b98c-b40625f3b8c6.json | 132 + .../c0d7514b-6809-49d7-9193-38e9c9ad03be.json | 105 - .../923f6446-f9fb-47ae-b585-ac131d75c107.json | 105 - .../95fa292a-ee64-4844-9646-ce3cc7f730d2.json | 132 + .../4d14c584-b5a1-41cd-9605-78088dfebd7f.json | 132 + .../da330322-f144-44bb-833a-7b92c11f3888.json | 105 - .../10014f98-cae2-435b-b6e7-17064bb079a5.json | 105 - .../1415d3d9-d7f8-48ef-8a2f-aa675c4c14db.json | 132 + .../4b0ab369-e72f-4229-b449-3a21ee9d2c95.json | 132 + .../c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json | 105 - .../478b6c1f-3329-4c9b-9d90-59b8b551c1af.json | 132 + .../b4d7f827-d1cb-46c6-9eea-248867fdc07f.json | 105 - .../212f8dd2-3c61-45bd-a3de-2326334feb73.json | 132 + .../d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json | 105 - .../9251282e-f72f-406e-a2cf-e7063516f624.json | 132 + .../9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json | 105 - .../6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json | 105 - .../91a3c739-7e16-4d21-8879-bb2fd4d4c6ad.json | 132 + .../4496da44-d4bd-40a8-8f91-56b2cb2fa766.json | 105 - .../aaa78d8f-6050-4b5d-bb67-da6c9d1ee065.json | 132 + .../1f0430fe-24ff-4ef6-8577-ee5bfa74f18b.json | 132 + .../69c6593c-6e84-498f-8d68-62c1809a4606.json | 105 - .../b1c0f775-987a-4da5-9451-09bf295b16ba.json | 105 - .../f374772b-2685-41e2-a455-9002e48e3739.json | 132 + .../6db801f8-5253-47c0-b87e-6779bff42f6b.json | 132 + .../c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json | 105 - .../0d704671-c0b6-4296-85b5-eaf972d6be6a.json | 132 + .../1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json | 105 - .../7e31545f-0865-4843-914b-a71f8a84314f.json | 132 + .../a44985f9-2255-421b-93b9-fcb5761e17b8.json | 105 - .../431c7130-5a19-4a71-8a92-fea9726769ac.json | 132 + .../ad59cc80-784d-41bf-9a3e-9d9f286667d2.json | 105 - .../0b72d3c8-aaff-4eca-854d-07d132e9aa25.json | 105 - .../ca850c4a-14d0-4145-9977-0d33e6e3e362.json | 132 + .../021eca20-1a26-4eba-9006-fb005e91696d.json | 105 - .../7389caa3-6d8f-43e3-b3f2-d9320e56f621.json | 132 + .../1e822b0f-0d80-4613-983b-ebd2e6fbfcd6.json | 132 + .../8662faaa-8964-468a-991b-43b2f0449d48.json | 105 - .../1206f592-e6f7-4e7d-83cd-cbe82b37ec58.json | 132 + .../56cad8c7-566f-46e5-9692-3c11f4408921.json | 105 - .../e4085c6a-bc16-4328-a724-4b9838b55faa.json | 132 + .../f86fb81b-29b8-425f-8129-ea054108a214.json | 105 - .../3c5ff9bc-b33a-4557-9c76-ccc041de985c.json | 105 - .../b929b955-1fbb-43d0-add1-4d58fdc4097c.json | 132 + .../64e0c863-f33c-44d7-b244-e5288e5018fb.json | 105 - .../df723a0f-9a32-42f3-9421-780159f7d821.json | 132 + .../09f59d70-2948-4eb6-a14e-2550c97b5542.json | 105 - .../c1046d2c-0b5b-4ab7-b173-8d5b5ecbc07d.json | 132 + .../57d9c59d-8cd8-4253-a076-8b16becc740e.json | 105 - .../60c02070-7554-4764-8a02-841ca75a0d5c.json | 132 + .../5fb209a6-3d82-4017-8e44-3615d7c50218.json | 105 - .../d243f226-149b-4824-837e-e80ab68bae9d.json | 132 + .../2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json | 105 - .../4f9361d0-2ad9-44da-a1d9-876d43451ae6.json | 132 + .../1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json | 105 - .../6c6e9ebc-f83d-48d5-b69f-be43d4167a0e.json | 132 + .../7cd2c0da-15b8-4ad6-8cad-feb68631c079.json | 132 + .../f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json | 105 - .../36b84cf2-d221-4e9a-b728-37dc2bf7e1d6.json | 132 + .../d1ae295e-1364-442c-a3e4-ac2ad9884a78.json | 105 - .../1fd0d1db-1d75-4b10-bae8-33023c2c7466.json | 132 + .../86c29317-7d5f-42c2-a156-615d3c4a259d.json | 105 - .../70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json | 105 - .../c6c02512-6c91-4818-a084-c48915fd83de.json | 132 + .../047ed340-ddb8-40ca-b1ee-10f12b182e43.json | 105 - .../326affa2-9ea4-4fc9-b60f-d2abeb7493c3.json | 132 + .../94b65c53-7e0c-4506-bd19-82d23709d269.json | 105 - .../b3a190d1-5b86-4439-a21e-1f118239db82.json | 132 + .../1c779874-5568-462e-9e6e-0e3fd42d023e.json | 105 - .../b37a7db5-b26f-4a82-b27c-6c3a2ba72fda.json | 132 + .../05a59445-b816-4982-9b1a-1c2394ffbaa9.json | 132 + .../f562a3e4-6afe-4c1d-a597-6265af34f925.json | 105 - .../cdbbfad9-85e8-4c8b-b70c-708c08a62798.json | 105 - .../ff952579-e92d-4af8-9497-f49fed5efba0.json | 132 + .../9cf15d33-3624-4161-bdad-069b09ab2290.json | 105 - .../b541ede0-6de9-4557-8280-43567fd3dd96.json | 132 + .../658df4b3-084f-479f-b507-3a4247683651.json | 105 - .../8514f601-0bb2-4639-90cc-29e96088e7de.json | 132 + .../4e72cc33-538b-4fa7-8038-89794fed6511.json | 105 - .../57e6d0cf-943a-4b83-a1f4-4f03b5066523.json | 132 + .../891bb442-c054-4941-9bd1-8352139f143e.json | 105 - .../ec205127-21c0-4edf-bb3a-ec8ccac4fcdb.json | 132 + .../14b260e6-4300-43ec-b7af-587a2f5b03fb.json | 132 + .../ac94a989-668a-49e6-9975-9169d7394574.json | 105 - .../53de1fc9-7097-4103-b731-588a7bf39f80.json | 132 + .../6961b682-04e5-45af-bd2b-8ad6546503e7.json | 105 - .../1a1031c5-3ec2-4d12-93eb-e0a3b0448ed4.json | 132 + .../eb0f4662-54f5-48ca-b871-726e34bbf540.json | 105 - .../51b62d59-f39c-49ca-af0a-73df6440e29d.json | 132 + .../e4e00595-e1ed-42c9-a518-ff104253cad9.json | 105 - .../3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json | 105 - .../622a0ae1-0eb5-49f0-bc44-d396c7233e27.json | 132 + .../71291a41-283e-42ca-b192-7b759e3c3712.json | 132 + .../f78ac837-d5f4-48f1-8a9e-1549b0020160.json | 105 - .../2ae9cee5-8f3c-4303-802f-481a03edaf9f.json | 105 - .../7e504fef-b304-4c1a-856d-06e56a8869d7.json | 132 + .../654b55d0-940c-43bd-9478-0bd67bb7b0d8.json | 105 - .../f8258f5e-8826-4fe1-b9d3-61708e79d4ab.json | 132 + .../099ce031-1e11-4a07-bac1-03bef9b915d6.json | 132 + .../c23f1072-c7be-4eab-b866-16c6429071e4.json | 105 - .../75ff25fd-e5f7-4380-b192-cbc8a8ee95aa.json | 132 + .../c02ad005-8e12-46d9-8bb3-090f62c6a946.json | 105 - .../cbc43c7a-d8ac-4b03-a383-703f7fa51757.json | 132 + .../e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json | 105 - .../02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json | 105 - .../72d7f252-1bff-40ad-9ec8-1ac2a2e02a8e.json | 132 + .../4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json | 105 - .../5eb10878-11e6-43ad-9bb5-658a3495129c.json | 132 + .../23b29cd4-cfd0-49f1-8959-c3aa8be9722f.json | 132 + .../77255cfb-3e18-4a3b-98a8-b0072aacb669.json | 105 - .../03db2532-f8e0-41e9-ac0c-ff2913f4b12a.json | 132 + .../be9afede-e624-43e6-99dd-52e0d2b413ac.json | 105 - .../273f0d50-aa4e-4469-8360-2ce0a2e1a850.json | 132 + .../9632892a-a6b2-4f17-827e-bfef9a712985.json | 105 - .../79a48e79-d59b-4f86-a8f4-3af174a9ee0b.json | 132 + .../a690910a-388f-4a51-98a2-fc1e1bb327e2.json | 105 - .../8c8eafcc-bb0f-4483-93ff-1379158a5d10.json | 105 - .../9da9a0e6-257a-41f6-b3a3-e3279a4924db.json | 132 + .../6c009b93-145d-4630-bda1-fb24bf764e7a.json | 105 - .../dfed058c-48b2-4e1e-9a29-624771e3e9dd.json | 132 + .../1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json | 105 - .../bcb53a8a-1670-400c-aab6-bd8ed2ebcdf4.json | 132 + .../4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json | 105 - .../8438a108-0d5d-48b6-b73a-981d13329daa.json | 132 + .../3650d718-e20a-4310-a248-3897f7713e93.json | 105 - .../88616292-1e38-4481-af30-6b60e28fb097.json | 132 + .../44094907-0b09-4706-a117-116a7e10a6e5.json | 132 + .../6e224cd8-7f12-42a0-968e-311450d24e58.json | 105 - .../1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json | 105 - .../d19e8078-87e9-4760-9b91-6b5f478820e1.json | 132 + .../896464f1-01bc-4370-8d90-3368323b2908.json | 132 + .../c5829ba8-e45c-4242-b308-9455f832cb58.json | 105 - .../9889f0b9-9051-485c-bd44-32b1e56b865c.json | 132 + .../6563ce79-6df4-4c78-89e2-064f1250d898.json | 132 + .../b1778755-e6e6-47e2-925d-44d786c4ff62.json | 132 + .../3ae923b8-e9f4-472e-8d5e-54fa5f42ce01.json | 132 + .../40831e23-0a9e-4bdc-a365-9399b6b82ff9.json | 132 + .../4a60fa82-34dc-4b0c-9102-65adac5039e4.json | 132 + .../75ff2c43-dd19-48ae-9ba3-f99cdbadda1c.json | 132 + .../d7962833-660a-4b9b-9836-8a2f3251f38e.json | 132 + .../ad8ecabf-a868-496e-892b-582efb54fa6a.json | 132 + .../49f25d3d-80c9-4723-8fa9-1501d44d70aa.json | 132 + .../70ea520c-3e0c-4412-9dbe-40a00801335c.json | 132 + .../8e7f8bad-812b-4f6c-8dea-1cf44584c300.json | 132 + .../3b39a8f0-c5ba-4f74-9d27-bf5b389e038c.json | 132 + .../702a14d5-a7fd-4926-ab26-e4c3b7f5eda7.json | 132 + .../20e5d087-7b20-4a39-81da-7334354b61f0.json | 132 + .../4c5a769c-0472-402c-8e97-d24e5b302bac.json | 132 + .../96166735-ed03-4931-81c9-d3daed1913d9.json | 132 + .../06d9b1e3-d054-4fa5-bf1f-9d6149e5111c.json | 132 + .../776fd8d8-9846-4359-97d4-2340425d1315.json | 132 + .../197ae1c5-c9b1-4912-91a3-8ccacddc1be6.json | 132 + .../1fffd3d9-1c6b-4965-84e6-980bb0a13af3.json | 132 + .../57e8aaf0-f10b-4024-9f93-7b7f13f3ab10.json | 132 + .../304d5bee-df2d-40fc-b4a0-e3d99178f4bd.json | 132 + .../6126d30d-e2dd-4b8b-9cb3-acdc76084bbb.json | 132 + .../fc7284d9-a73f-4562-a781-5cb87247183f.json | 132 + .../26ab447c-a850-4197-983a-a0dca4532029.json | 132 + .../ee9e2131-aa99-49e1-9814-f0664614354b.json | 132 + .../23c472f7-f060-4a69-8f72-12490675825a.json | 132 + .../04172bef-c06b-4c08-b2af-9e1fe4d97664.json | 132 + .../3436355a-d2fe-411f-a764-4cb8284deb4c.json | 132 + .../265655c0-2ead-4dd7-8c7e-4bee69d51bce.json | 132 + .../645cae82-9e7b-4d1b-b944-e3783089c1c1.json | 132 + .../ab658117-7c6b-428f-8f60-bf88a1d8a5bc.json | 132 + .../03c4b5ce-3b22-4d9f-bf60-b626b52a114b.json | 132 + .../ce7e3a31-c65b-4521-b685-fcbd067c75d9.json | 132 + .../adb53e2c-5dee-4840-8eae-e0186c6e103f.json | 132 + .../ba89563d-f53a-4bf0-91e1-92ac950523d8.json | 132 + .../3fc0ad8d-4bb2-401a-9baf-b94b39b7e1aa.json | 132 + .../ed816bcb-bbe9-48ae-a6ac-3603779a985f.json | 132 + .../f347ed24-066a-4cba-8478-f03628cb2b5b.json | 132 + .../ffddfea0-d17e-44e7-8931-a9601e9cb26b.json | 132 + .../ec351fa1-78c2-48c6-83f0-7c2a9b2f0731.json | 132 + .../a0038c34-130b-49dc-a93f-94706a3dad50.json | 132 + .../cbd5ea42-1e5b-4984-bdcf-e60fbfb9d692.json | 132 + .../b902e2b2-a0b3-4467-b076-b98717c40d74.json | 132 + .../4c749665-59ff-49df-a193-0262f66e6003.json | 132 + .../c99899c6-95e1-4dea-ac12-f8df49728a3b.json | 132 + .../13deca9f-073e-444b-bf79-35e816f7c312.json | 132 + .../c8adc0a5-f4bf-4f88-984c-aba506eae6a9.json | 132 + .../b146daaf-ce1f-4520-bc19-21ce8679b220.json | 132 + .../45e1d037-1ed0-472c-a311-c651fde270fc.json | 132 + .../4d7428e8-41a2-4834-900e-e43b05f4d131.json | 105 - .../3f4ce54a-01f3-4c23-a4ba-22d47e0344dc.json | 132 + .../9e8f395c-f481-4a64-86ee-053961b17c42.json | 105 - .../470d52be-9dbd-4714-b004-f65cc82d245f.json | 132 + .../913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json | 105 - .../55baee54-fb05-49a1-962d-145a93de91a8.json | 105 - .../c836fd05-1969-439c-91e1-fd0cab816f6c.json | 132 + .../14774c6b-eb03-4abc-92df-1e7a196ca8a4.json | 132 + .../601e250a-5c2f-4947-9ea3-0f903b2823ec.json | 105 - .../5293ae0c-8022-44d4-b2f5-4f5390dff93e.json | 132 + .../8ab1619c-6edf-457e-9834-0e9dc127d6a4.json | 105 - .../5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json | 105 - .../9020f91f-a8f0-447d-af68-247aa81a25c6.json | 132 + .../0cd6837a-8c3f-4529-9ea0-8755e1725467.json | 132 + .../6621f47a-13c7-421c-b054-cc9116a04e4e.json | 105 - .../7cb17011-cf77-4e86-b67f-84e6ff4b8086.json | 132 + .../086831f9-c677-428b-a997-4da58733633c.json | 132 + .../d71893b8-b82c-490b-a700-b579d64e0610.json | 132 + .../9893689f-c27d-4148-a27f-cd07b07e98b7.json | 132 + .../90f2df23-a9ec-44be-ade5-89b59cb7368a.json | 132 + .../afd545da-390a-478a-b0f5-ea819f088f27.json | 132 + .../ce776f68-856f-4aee-b7e4-e55d15e8d714.json | 132 + .../9b015729-524c-44f3-9c2c-c42981d7a61e.json | 132 + .../56a54ffc-4692-496c-95df-8e4ad19d4d95.json | 132 + .../464673ee-0238-40b4-9c15-1a1551b9f65c.json | 105 - .../4b105969-2ce5-4c62-89ef-efd392c2ca89.json | 132 + .../31af79b1-48c1-4399-9d16-8582c92996ee.json | 132 + .../59a67f29-cb7d-497c-b7bb-1764a665ae33.json | 132 + .../08fcda98-72e9-4338-b2a2-6db924a47288.json | 105 - .../fe57367c-74b7-483e-af54-4f404cbea75b.json | 132 + .../4282c191-344e-4326-a80e-49b712687e7c.json | 105 - .../fda2277b-1513-416e-b586-ed05920a0bb4.json | 132 + .../58fe6545-2f0c-44de-a29b-2da839b141a4.json | 105 - .../b3dde216-f80a-4664-aadc-b5f5dd3e5895.json | 132 + .../07ed6241-fd1a-46eb-91fd-92a4a8f6bd15.json | 132 + .../fe896cef-7667-482d-b7f1-5361fc66ccce.json | 105 - .../64802b86-879e-4072-b5ad-aab17d7251f0.json | 105 - .../ba76c356-cd6a-4636-8ab1-18bb9df69881.json | 132 + .../936cbaa1-e55b-46b8-9610-a5a8faaf4434.json | 105 - .../c6ae54a1-2821-48d1-b689-bbb85aaa70a6.json | 132 + .../6f296f0e-80ca-49b7-94e7-cb45b795c715.json | 132 + .../b5509e11-820a-4ad4-8c6a-0294762502a8.json | 132 + .../90d73665-8d83-4e74-ab7d-29b1d3b6181b.json | 132 + .../98f5e59e-0bdb-405b-a18e-3addd8920951.json | 105 - .../72387647-cbac-4b72-9c22-db7029a39457.json | 132 + .../6219ec01-4b6a-4acd-aee1-96c3e8e48643.json | 132 + .../5c323d7c-25cd-4718-8a1f-54d986cadaf2.json | 132 + .../adfab21a-941b-4efc-8b63-fdfb3074ba9b.json | 132 + .../350d00a4-7501-4130-a069-323530bc9729.json | 132 + .../66f84aee-5d79-4fec-9fff-799ac874d165.json | 105 - .../ea809d28-178e-4a0b-ab5a-34739077c5ff.json | 132 + .../243d5ccd-58f3-4da5-8718-553f3f456490.json | 132 + .../5420d88b-bc26-4d04-9812-ffce8a3564e6.json | 105 - .../a45537a7-76a6-4855-b83b-abe965f13460.json | 132 + .../dbd87f5e-e5ba-447b-8416-b6413c3dab09.json | 105 - .../9be911b6-b9f4-47b1-849d-62eb20c9e944.json | 132 + .../acb8e4cc-41b2-47ef-b819-d480189c618c.json | 105 - .../33d7d5f0-cbee-4a26-b5e8-48bdd12492cf.json | 132 + .../4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json | 105 - .../4355fbdd-ac72-4f26-8e07-b7e8d774d238.json | 132 + .../4bffc633-e20c-4874-b7db-d1b7dabb8070.json | 132 + .../2d5c844d-d950-4254-bac2-0a986659c541.json | 132 + .../3d2603e3-d556-48e8-ba94-555faf9f1807.json | 105 - .../b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json | 105 - .../f6e74b3c-9ee4-40c3-bf92-35d965503a04.json | 132 + .../4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json | 105 - .../8f1d2600-7347-48b8-9759-11570598459d.json | 132 + .../cd653bfd-2c06-4224-aeeb-bf591995a69e.json | 132 + .../0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json | 105 - .../cdf1fcc7-429d-44bd-b76c-d26ee743f6fe.json | 132 + .../02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json | 105 - .../4828bd36-5453-4383-8985-08d04a7ebecd.json | 132 + .../4c2baa59-c2f1-4779-9d21-1f69c0821968.json | 132 + .../555c1079-c4d0-4b9e-9d2d-769e7ba32429.json | 132 + .../58a4a1c6-0ee4-4524-9ca1-b40870f1d600.json | 132 + .../eea2a38a-4f1b-48d0-894c-09974894f264.json | 132 + .../3d8063ab-0ad5-43e4-83ff-90b46dee766f.json | 132 + .../97f7c73d-6d69-4c04-9cff-4914253003b0.json | 105 - .../da5e0284-7c44-42d4-a110-a23880de277f.json | 132 + .../e2a2d764-ba6b-450d-8f94-abf2af95e793.json | 105 - .../a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json | 105 - .../bef017bb-47b1-48e4-93c4-3b222a16af7a.json | 132 + .../401c83b0-b7d2-4987-9e46-f127fdbb595f.json | 132 + .../7fa474fb-4aa1-4855-9759-a28056c7a5e7.json | 105 - .../c6fde59b-73ed-4179-a907-076be068b262.json | 132 + .../f7e7c296-74f4-49fa-946d-142341749355.json | 105 - .../4b1e267f-90c4-403a-a7cd-5c006153408b.json | 105 - .../90997fea-6c67-493e-bd8e-5327cfb33ea4.json | 132 + .../08957d63-7462-44ff-9dd8-060a5801a31b.json | 132 + .../86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json | 105 - .../a434f569-e7d6-4464-afa8-6104be43fa06.json | 132 + .../13e12b5c-d3bb-4634-967d-e5741e623be1.json | 105 - .../e32ed251-e817-409f-b4c3-8f168f1ff822.json | 132 + .../1d9a65a3-d2bb-48a7-8a00-8e4a79c36db2.json | 132 + .../745591e3-3c6a-473a-9e51-4bffe1c86fa7.json | 105 - .../608398da-ae2a-4be2-aaf9-6ec8899aa63d.json | 132 + .../61739e6e-92b0-4577-acd2-8c58ffc612a4.json | 105 - .../80e04641-be7d-4351-a4f6-1318981ef834.json | 132 + .../917081cc-ee33-4c1f-85b0-9256ef57f6b3.json | 105 - .../60fa19b9-bf1d-4f39-b421-cb59379f5206.json | 105 - .../e74222c6-636c-4075-8d4d-30c73fa70fda.json | 132 + .../684962b9-d734-4a10-a0cb-45bc4d957c2c.json | 105 - .../aed80361-9304-44a0-934a-52976d7f1bf3.json | 132 + .../709bd280-b03e-4908-808f-34566bc968f4.json | 132 + .../af87bb98-cc36-4c8d-9694-7e7428a899ac.json | 105 - .../5dc300f1-e908-4d71-addc-2717e3702b12.json | 105 - .../66c495b3-4b09-42ad-b742-4d753c3bde7a.json | 132 + .../6005fc02-9f02-436a-a535-ec68a3c6dbc6.json | 105 - .../e24f7be6-3051-4990-8b93-121aec5402eb.json | 132 + .../0321571b-4246-4490-bd6c-7b106eb8e15a.json | 132 + .../4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json | 105 - .../54dbf947-ab18-40dd-9cd7-a496289b2e72.json | 132 + .../59b40f56-c27f-4b15-9288-b7033e2e4f26.json | 105 - .../d841e204-ed6a-439d-8408-d5cfb3b38dae.json | 132 + .../96b57891-83e3-4948-ad48-64a2a370e166.json | 132 + .../30301818-6dad-45f9-acfb-a68ccc7c0609.json | 132 + .../771366a5-e227-4ff8-b60f-744020994bec.json | 105 - .../4e44fd55-9538-4065-8763-5d1c3d00be5d.json | 105 - .../50743107-30de-4c5d-bf83-cc003af8a5db.json | 132 + .../625ee1b3-e0a1-4a86-83a4-6e66b380f864.json | 132 + .../a4fe370d-1722-4fdf-bf75-8416baeaba19.json | 105 - .../7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json | 105 - .../89fda762-1989-4850-837c-f79ef538c58c.json | 132 + .../1de1f906-0e36-4f79-b159-16ef8ee33ab3.json | 132 + .../f6b84bde-67aa-4c50-a46e-1f80605037de.json | 105 - .../d8588222-9e4b-47c1-9f86-92f47c9c8e38.json | 132 + .../db8614eb-2b53-460c-a80b-dceb47a9703f.json | 105 - .../15e6e6e6-39fa-424f-ba12-5f209cd4b2cc.json | 132 + .../a3e19823-43ac-44ac-9dee-960a98139fa8.json | 105 - .../04631aa2-f1fd-4aea-ba88-53b474c71fe8.json | 105 - .../81225b85-1523-49c1-b770-897112d2e6ae.json | 132 + .../254deaf7-a253-4d41-a10d-1143f86b288c.json | 132 + .../4f5fadb6-5fad-4b82-a027-1d4f497dc476.json | 105 - .../8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json | 105 - .../ba0b66f5-724a-4a6b-ac20-a36d530a8b4b.json | 132 + .../a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json | 105 - .../eed0b3b4-e277-49ee-aed5-f3599b2d5653.json | 132 + .../96a21b6e-ed47-40fb-85cd-15924330e60d.json | 132 + .../e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json | 105 - .../daa704a9-2eed-4549-a847-3606c9e8a733.json | 105 - .../f41f5471-6384-4510-85d2-41f236082583.json | 132 + .../2728eccc-525f-4350-901b-dbc352c78014.json | 132 + .../3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json | 105 - .../3e7ae935-46c3-427c-8713-41c659c1828a.json | 132 + .../f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json | 105 - .../66782676-c942-4aff-b754-b96cd96cf1f9.json | 132 + .../cdbebbea-4749-472b-8cec-5da5ffa96d65.json | 105 - .../3143a635-10da-4cb5-9c2f-eae2988d9e60.json | 105 - .../941a9e27-2ac4-4dab-a6d0-cb9319c79a27.json | 132 + .../a6d3b7b1-8834-4b74-8849-6d80381c46f5.json | 105 - .../caf93f75-530e-4f4d-9cc0-2cf9b0a7f2ff.json | 132 + .../7f53cef7-fba6-4802-93a2-b54f82a32d74.json | 105 - .../d3ca0458-ee97-4a4c-a6a9-066880ffefb5.json | 132 + .../615bf89b-9357-46f4-82ed-f49b0021da01.json | 132 + .../bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json | 105 - .../06398630-23ad-4000-8ea2-fcca230568d7.json | 132 + .../fbd83964-530c-4d0e-a305-9f8451affb23.json | 105 - .../10d76569-edca-47db-abf2-1d0fd73df198.json | 105 - .../bdfa30f8-da0f-418f-adaf-caafda4c81a5.json | 132 + .../431f8459-3c12-4260-a158-c58ec910590d.json | 105 - .../bcd8c141-d286-4567-bb06-934e546a5c7c.json | 105 - .../bd5e550c-5355-4e01-bafc-2ca89899253a.json | 132 + .../f842ad5b-24f0-419b-9d65-5a6ff1f5e04b.json | 132 + .../3a09590f-28f3-4161-8a93-d42cec62aa90.json | 132 + .../9cc77018-d090-4202-bcf5-d0031097b84e.json | 105 - .../0b365c44-3cc2-4149-8614-7de6b6c2581d.json | 105 - .../0f6b76ca-c4b8-40b2-a3af-2ea1c3650933.json | 132 + .../dc90b971-313a-4a76-b042-350adf37a43c.json | 105 - .../f276ad54-4e3b-4718-ae1f-0479565e4565.json | 132 + .../a4a38b96-036f-40db-8a0b-024a36f004f5.json | 105 - .../dec20396-6555-4773-bf02-2cd1fcedda89.json | 132 + .../558a0ed7-a667-421e-bbab-094b46274239.json | 105 - .../eebc33e1-0016-4adf-815a-72653a34c01b.json | 132 + .../803c3898-c1a6-4832-ac3a-a86139489810.json | 132 + .../ee856df0-01ea-4f06-9323-951144c9e82f.json | 105 - .../4ea0436d-6ec9-40db-af56-2f7f1b0317df.json | 105 - .../bfaa3d3e-66fd-4477-85af-4b83f13ff05b.json | 132 + .../99debdd2-1dea-4eb6-be5c-c144656cfe20.json | 132 + .../d5dd0be3-e7a7-4636-b513-3c1d5532807f.json | 105 - .../ad67bb88-7f74-4eb4-b771-0b3b60be4416.json | 132 + .../b4b57280-49db-4a07-929f-dbe2f222250c.json | 105 - .../6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json | 105 - .../af2f579d-1e8a-47d8-8e44-a599bee83e37.json | 132 + .../51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json | 105 - .../763c840e-ea73-453e-8e54-5f4fd6fda9cd.json | 132 + .../4fb40ac4-a637-4b9a-b69d-ba551c0f0938.json | 132 + .../86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json | 105 - .../285688d5-c7ad-437b-a54c-9e6108d85267.json | 105 - .../ffc4ef41-4a28-4816-be54-8ffd8e153073.json | 132 + .../85ce2909-a5f9-413a-8719-cd0a66874535.json | 105 - .../f75fe902-f1c7-4e6c-87d6-128688db8d94.json | 132 + .../8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json | 105 - .../dbd3098b-4532-441b-a81c-072c52579be6.json | 132 + .../438e4aa3-5e02-446e-bd3a-07ef724d24ff.json | 132 + .../79336acd-d465-4938-af7f-f7a688f46fd4.json | 105 - .../027fdc55-61eb-416c-b6ad-4408912d151b.json | 132 + .../ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json | 105 - .../37a4895d-def5-494d-9b62-d8c97ba9350b.json | 132 + .../89f92d24-19c1-4021-819d-9c7ed717046c.json | 105 - .../0d53c27e-962c-428f-b540-35ab027883a8.json | 132 + .../24fa44cb-86d9-4e67-be8f-42f7fc574d52.json | 105 - .../6f7b2d91-24d6-442c-93a5-9afc88e9a308.json | 132 + .../b13652e3-43f1-4670-94f7-1a0bbf622f33.json | 105 - .../21793520-7d1a-4040-bb96-fa7fe98ae580.json | 132 + .../8201723e-92fb-4207-afa8-df7db794c889.json | 105 - .../59d53c40-5b16-4a70-a693-5fb554cf7614.json | 132 + .../e166fa17-c285-466e-ab2e-1eb106ebd271.json | 105 - .../983323f2-7caa-42cb-8838-8ea041303a70.json | 105 - .../b28a569c-6bdf-4547-a2ce-c3e224764be3.json | 132 + .../2de129c8-2259-4367-a619-85d9e8f61e06.json | 132 + .../a79378f7-01b3-4bf0-8b76-2e670d2a7366.json | 105 - .../1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json | 105 - .../c242030f-fb2b-42dc-a5d1-687273b17282.json | 132 + .../3b3fdb16-b6e1-40c8-9ac0-02f1f2207eb7.json | 132 + .../64c0088b-f9e7-4a9a-b449-3e1b514370ff.json | 105 - .../d652c8f6-d5b4-482f-91c7-5eb9529765c1.json | 105 - .../ef6e8e0d-7ba4-45ea-aaf7-617f68f2e97c.json | 132 + .../7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json | 105 - .../f8c131a4-1fee-4694-8753-88853418ef4b.json | 132 + .../169fe3b3-527a-408f-9442-5bc3616cc320.json | 105 - .../27dec9ff-fb18-43dd-949f-7c0587a5858f.json | 132 + .../060df34d-ab67-43e1-bd56-ebaceb77abd3.json | 132 + .../fd4405cf-9849-4606-a01c-a20459198853.json | 105 - .../060f29d1-8b1d-4651-808d-b1419bd76cd9.json | 105 - .../a6357673-3daa-4593-8593-2b65a7d5477e.json | 132 + .../07981f28-b019-42f8-b14b-44ab73ebaa0a.json | 105 - .../121d4877-1955-48db-a23a-6b0ad0623b9e.json | 132 + .../1f1eab02-219e-4ad8-af50-e103541e1c9d.json | 132 + .../4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json | 105 - .../471aac2a-5c4b-4b1b-a56b-490fafc444d8.json | 105 - .../b4cccfb3-1c17-48a3-a211-a26c44de757f.json | 132 + .../05e97a86-681d-42a2-8a47-beade25d8fc9.json | 132 + .../f44f513c-0814-4f3b-94a4-9e28318da40e.json | 105 - .../6c0899b4-f066-45f6-827d-11c535ef0634.json | 132 + .../a4beba0f-b860-4d7d-b1c3-0f569ba59171.json | 105 - .../cd4408c3-d966-4195-bcf2-5bc80eca1501.json | 105 - .../f9660557-b9f6-4ecc-b260-c245f0e62b5b.json | 132 + .../64c75370-981d-43ae-9823-d4fb0696d468.json | 105 - .../89168032-5840-4c2c-821e-b3d717ade46f.json | 132 + .../10d0aa63-67d9-4dba-9bdc-db7ab3b4547d.json | 132 + .../404afbae-0393-48e6-874c-e1cb28e9a1eb.json | 105 - .../6f66ae5b-8cb6-4263-98a4-4a1eddfaca10.json | 132 + .../d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json | 105 - .../25368664-1f32-4d69-9afc-91d58efd01e2.json | 105 - .../5e715199-7030-47b4-89c6-83ba0968c07c.json | 132 + .../3fca39e8-443d-47da-a858-83a68c18eec9.json | 132 + .../dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json | 105 - .../41c47381-66d5-4d3a-8bfb-4269cb882385.json | 105 - .../b7518bd2-d3af-49e6-823a-f8d507e8e60f.json | 132 + .../0c21359f-8f0b-44a8-813e-a5f612f13658.json | 105 - .../fa399f16-1652-430c-be19-afaf5ab96be1.json | 132 + .../aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json | 105 - .../cbe5032b-122c-4a0b-a099-50e998a4bc77.json | 132 + .../a863e655-ee86-4f39-ae1a-0a65992f7eb4.json | 105 - .../fd8c3209-dcc0-4d27-a3aa-d0f76ef86f8d.json | 132 + .../1a18d49c-ad7b-4823-abbc-7191e9d659cd.json | 132 + .../6a81c514-57b9-4a45-9a1a-0378e7554d04.json | 105 - .../9e2c614e-1104-43a6-9e8f-b7851562e01a.json | 132 + .../e9371530-675d-48d1-9145-7ea15c893833.json | 105 - .../3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json | 105 - .../7d4b83ab-9c9d-46e5-8cbf-b8afcf781230.json | 132 + .../a42b5d7e-be7f-4cde-aaf0-001e2cf05a44.json | 132 + .../f20fd926-d690-4fe2-80a4-3e79dc37f03f.json | 105 - .../21f6688c-be52-4352-9c95-d37c0a5f6c94.json | 132 + .../8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json | 105 - .../a656eacf-8134-446c-8417-e1c3c54fe941.json | 105 - .../e92ba586-7bee-4a9b-b388-e35efde3d36f.json | 132 + .../0d276bd3-a338-4383-88b0-9e653ae01387.json | 105 - .../45ed0bb3-efbf-4a32-9735-d814aa08790a.json | 132 + .../150d0730-e194-4d2b-96e1-54f914b5fe28.json | 105 - .../eff28375-89a7-4970-9342-428b07d0c6f4.json | 132 + .../23877e30-b8fb-45ea-a803-47df757ea909.json | 132 + .../b23913b9-f774-4927-be16-874d8e146218.json | 105 - .../8bc25d04-9cc5-4551-a9c5-ce185c7ad974.json | 132 + .../dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json | 105 - .../9574abe0-00e3-4e38-bda0-b217f002a480.json | 105 - .../d2d4b5a5-109d-4d26-a166-3d97b341584e.json | 132 + .../ac404d92-7a06-4758-ab1d-fcf840c2b995.json | 132 + .../95ea7fbf-d3f2-4fc1-ba17-05549f6e4d25.json | 132 + .../c101e272-24d2-44db-9b0f-2ed4d17cec41.json | 132 + .../d020a655-1cc0-49e9-9db1-f8b871babd5c.json | 105 - .../2cb789c7-dddf-42b2-8fdf-4cbd5132946c.json | 132 + .../a414aefd-ce24-49a9-b431-0c6014ebfbd8.json | 132 + .../44737b7e-4942-4496-a818-fddce66da4d6.json | 105 - .../91fcb6a3-d351-48c8-87e8-e2a06642e925.json | 132 + .../3cd90efa-ddf0-43c4-884c-84337ded14b2.json | 132 + .../c66c21e9-a332-40f9-ae87-bdd78a25d753.json | 132 + .../0b4def91-29df-45d9-8dd4-c4097ec47ba3.json | 132 + .../2cbf258c-369e-4b1c-863f-43cf97c3a7a4.json | 132 + .../8372889e-f9cd-4cf7-aec0-8e18d5c627e3.json | 132 + .../ce4cc270-57da-4d08-9130-62508b409cb2.json | 132 + .../fa64b745-6b4b-4fee-b77e-d744e54a17d6.json | 105 - .../401f6afc-9a2a-4bfe-87b2-daa6df848424.json | 105 - .../4cfedb8f-0e47-4008-9bc5-fb15e4afa607.json | 132 + .../de3c949d-bab5-4430-bdd1-48e1b7860934.json | 132 + .../011e53cd-409f-479b-9c3d-bfce75a1277b.json | 132 + .../1ff40e45-5be4-4625-9f66-5599a829903d.json | 132 + .../fed97d94-2949-4383-8f25-fa79bd413508.json | 132 + .../f4820bc8-7dfd-4439-af95-21b6cc9367ac.json | 132 + .../36e576bb-de50-49ec-a91f-f134c11bbe38.json | 132 + .../0edd388b-7a1b-4334-9b72-52d84653ff67.json | 132 + .../b3199674-328e-41a0-9aa4-bf39aec735bc.json | 132 + .../52db4d79-7040-4525-934e-0f33e4acec63.json | 132 + .../ee34821e-9182-433f-a8b0-745711e23738.json | 132 + .../10ef0990-5356-432f-b24c-dd107188ec5f.json | 132 + .../47de680d-33b1-4441-92da-4b97a5fc513f.json | 132 + .../96ac0351-2ade-4d76-bcf9-bc0f633f8694.json | 132 + .../31aae266-c14b-451f-8bab-62ee7d5d382e.json | 132 + .../f6edb102-e867-46d1-afdc-3c45166bd510.json | 132 + .../8b7756cc-9af3-4f98-84ac-7fef4c1bdaa0.json | 132 + .../dcf33a22-5e57-4476-a2cb-ebd60407a920.json | 132 + .../15659480-be0b-41c8-a463-873be444b194.json | 132 + .../0444c1bf-a3d3-4d23-bc6c-0a98c4dc1e9d.json | 132 + .../8fdc62c0-215c-4502-8f56-188455fe2d9e.json | 105 - .../93aa3a13-5069-410f-a1df-6944e0231e0e.json | 132 + .../ea928079-f00f-41b1-a628-c1539b41e63d.json | 105 - .../427ea7d0-c1f1-4cfe-b6a7-555262a7a317.json | 132 + .../c6dbe372-7a3c-487c-87c0-fb324c39f8c9.json | 132 + .../ab59c1cb-ac90-4fe1-b782-2e038734366e.json | 105 - .../cf8d99c8-8790-4bdf-bfc2-1a6d1fe35916.json | 132 + .../5b5d42d7-8012-46f1-826f-32d839806048.json | 132 + .../5e1bf2cb-55c4-4806-89af-cb9953c7c1b1.json | 132 + .../b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json | 105 - .../21ee4b33-9829-4cca-9603-c30fd4a1f7ff.json | 132 + .../a713dba7-110a-40a0-9d89-d48567d423af.json | 105 - .../c6c14a8b-0e9f-4b97-b9f3-27c7250fb8f2.json | 132 + .../f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json | 105 - .../4756be0b-fd98-467f-a256-73aabba09c97.json | 105 - .../6586fa94-9f43-4814-8c8a-8ed244ac94e7.json | 132 + .../df7d7db2-867e-47f0-9abf-d71b79e97630.json | 132 + .../e2502e7e-3a10-49f3-b5c6-b20496fed998.json | 132 + .../51cde18f-09b0-4b66-a962-811ee49e192f.json | 132 + .../43d2e788-e186-485d-8c34-10bdfd7a6b65.json | 105 - .../4ea48b42-8026-4799-b35d-46757fd2753f.json | 132 + .../52e9b4ae-9119-4f26-87e4-6532d1148ecd.json | 132 + .../4bda68c0-cc09-4945-961b-48776b7b5fc8.json | 132 + .../b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json | 105 - .../18ea0ad0-a216-4906-a96c-c8b040398dbd.json | 132 + .../eb307f58-db7e-44b3-bf03-7264a39bed69.json | 105 - .../1e2321f6-93bd-4acf-9f5b-c82807a40233.json | 132 + .../13032961-52a1-43cf-b69d-1802c43e1bcc.json | 132 + .../74d2724e-9d5d-4142-9cff-3fd40c931882.json | 105 - .../9d444061-2c29-499a-8906-77ef58aba34d.json | 132 + .../f7ca7fb6-b02c-4c27-afef-662bb62cd054.json | 105 - .../1c3dfe6a-28e7-4125-a802-1898336b1beb.json | 105 - .../1ffdf6b0-b3a3-432a-a0e4-69b4d447bb76.json | 132 + .../8ce733ea-e6e9-4f9b-ab28-f93202507265.json | 132 + .../ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json | 105 - .../0e88aa91-609c-4d2d-9296-25b06eeb0342.json | 132 + .../aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json | 105 - .../2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json | 105 - .../3e235ea0-3f04-4d99-9db2-7cafcbdbac6f.json | 132 + .../5e31a55c-f222-4192-b031-27bb40ba56fa.json | 132 + .../11fd4b70-4ea7-4bee-8caf-8921d4c89f24.json | 132 + .../6f36320a-dcfd-4e93-87b2-53763dde5c57.json | 105 - .../8e721067-898d-45ca-b4f5-9f523c4ce3d3.json | 132 + .../716552b2-6343-4339-b9f5-a573fa47c384.json | 105 - .../be5d5480-ce4c-4ade-8c6a-c08cd2826909.json | 132 + .../49532386-7e9b-4719-9c24-5d463dea6cfc.json | 105 - .../54dec074-29f8-4863-be37-2c08f6f2c3cb.json | 132 + .../88a15025-556b-469d-be77-c773f2c61038.json | 132 + .../8d0e995d-2859-461b-8be7-60d2b2690d6b.json | 105 - .../09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json | 105 - .../b4f4596b-17e5-40bf-ae60-0b17492ba9f8.json | 132 + .../97ce858e-a64f-4881-b6d0-0a2c0814336d.json | 132 + .../f4512664-c531-4b13-b76e-e96c2b03febf.json | 105 - .../1becd83e-e9b8-49c1-a137-80c5a8dbdf0d.json | 132 + .../ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json | 105 - .../1e2759fa-3e87-447b-b0ca-5a4e2e293589.json | 105 - .../337bb321-9c6e-4751-9c9b-d8ba0120dd07.json | 132 + .../19143059-07d5-44b2-b599-193147f6196a.json | 105 - .../cfa95cc9-5bb1-4921-97c7-078f2f929a2f.json | 132 + .../6d5ba3c4-a0c2-40cd-9766-68d36d21c5b6.json | 132 + .../c68859dd-6db0-4bdc-a031-92ac7d1d2585.json | 105 - .../1fb0056b-4f66-404b-89ac-a58185747ce2.json | 105 - .../6cc4404a-f3e1-47b9-b56b-34e4269e1261.json | 132 + .../8d820e43-ff42-4247-9ad0-4ed8e70672b4.json | 132 + .../ce4ee4fe-8a38-467b-b189-b25311c23c4e.json | 105 - .../d858ce8e-6a4b-46b1-8d51-03ebc2d8aaec.json | 132 + .../9813dd88-ff70-4d9e-86c5-9b73444275c5.json | 132 + .../a65af628-f518-4da7-afc5-7cba4234415b.json | 105 - .../ac677432-e7d1-4439-9c05-426059c285ef.json | 132 + .../018f270f-3cfe-403c-a236-483038a0b04e.json | 132 + .../718a40ea-26b1-4cf4-9584-57be798640ae.json | 132 + .../207a28a9-ae24-4a31-be95-96296b2e466d.json | 132 + .../72efedb8-d456-41ed-b1ae-4887cb6c18f8.json | 132 + .../ac91fb37-5742-4a3d-b93a-86c63b90cad5.json | 132 + .../387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json | 105 - .../c71d025d-e954-4420-b397-e07c3644d1f4.json | 132 + .../968c3759-de5f-4255-ba95-cafc7a3c70a7.json | 132 + .../5e23b2f7-33f7-4e49-b73a-a02b8650ee0d.json | 132 + .../1b6c64f6-acf8-4cff-bcae-6e8b3725c6f1.json | 132 + .../7908f572-8886-4add-ae84-b4ec0ec17c26.json | 132 + .../9e04ec5c-2208-4569-9b63-4768ed4262b9.json | 132 + .../ee2c8beb-6566-4b19-91d0-8e48c12a3fdf.json | 132 + .../c7579616-0c21-443a-a149-0c51a0ae92ac.json | 132 + .../cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json | 105 - .../ef7a1429-db2f-433b-a606-339a9d868e7a.json | 132 + .../f531e13c-79ed-45da-a246-857fd2c884c1.json | 132 + .../0f525d93-663a-442c-9a51-1ad3a5054172.json | 132 + .../15af21e1-3193-47fa-a3fc-1f087216d4d9.json | 132 + .../67b270d9-3422-4770-9957-7bde65acca0a.json | 132 + .../33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json | 105 - .../e2d38bcc-9133-4051-82d0-4e4fd66e00f8.json | 132 + .../4ff256af-73c7-4a5a-96da-19546a786c59.json | 132 + .../225cbeef-1d0d-40fc-949d-4ba6696fb690.json | 132 + .../24fcd662-5abb-4bf8-b8df-1c21b048cd92.json | 132 + .../48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json | 105 - .../7badcb45-7826-4fd1-b964-c697fbda76cc.json | 132 + .../bfb532f1-3319-46ff-80ae-0ca783a18bb6.json | 132 + .../ea304515-b41f-4e96-a0ec-78c897ebf9a4.json | 132 + .../1fe79ea5-1922-4a5e-8857-1c832353b0a6.json | 132 + .../8f0a6518-d153-43ec-b426-02136a2bc367.json | 105 - .../67915bce-0b54-4996-90f6-cec6def9bbba.json | 105 - .../9098d70f-cbcd-4f6c-bcba-0b1da743396e.json | 132 + .../898e5e91-c4c0-4494-baad-37c2bfd1931b.json | 105 - .../df4ed9e0-30bc-4a3f-b7a2-8955cbb38d31.json | 132 + .../f68957d5-20a1-438f-9931-6a787aaed467.json | 132 + .../416e0c04-9119-4230-ba71-b0f47e2d4997.json | 132 + .../e49441f3-99a5-4cdb-bff1-79cc21711bab.json | 105 - .../83e46bac-5266-4f65-a4dd-76240b297adc.json | 105 - .../d57780e2-154e-437d-ac2f-0007e1f9140e.json | 132 + .../027d464b-1375-4de7-aa57-e1473d16ba89.json | 132 + .../77cc280c-b794-4a9a-addc-e2eb0a1af896.json | 105 - .../22cbbb6d-1014-42af-96cf-1636fcb40679.json | 105 - .../a81f20fa-57e8-498c-a162-6d8a9be09ee6.json | 132 + .../8aa85bd2-eab2-491b-95a3-ac6321cbe298.json | 105 - .../d72ddbff-8ff7-446f-a74a-10a46bce6e3e.json | 132 + .../f681d612-f574-4641-b34e-95b6de97f9e8.json | 132 + .../cae1adaf-e424-4dcd-943b-5bbb708aca57.json | 132 + .../969ac825-92f2-448c-899a-226e69dee377.json | 132 + .../ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json | 105 - .../2918f03e-3fd5-4183-be8d-2911e0204e8d.json | 105 - .../e108ad28-c155-4162-852c-0f588a136bdc.json | 132 + .../93cfeba9-7d31-45b4-a6e2-99a5f318f5b3.json | 132 + .../95abd2ea-1fb7-4ef8-b186-bfe67148e486.json | 105 - .../4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json | 105 - .../c1b16b84-9392-48f3-b483-0a9786925506.json | 132 + .../b0c6e08d-b426-49d5-8a66-ee3d70131b62.json | 132 + .../6a6651a3-b34e-404d-ac25-42c151fb9ba3.json | 132 + .../940d1360-047b-4c12-a7e5-cd002675c69c.json | 105 - .../da63b789-5571-4ed8-976e-146d385b18e2.json | 132 + .../7c100a09-f34e-4bd7-b201-3779ee5a769d.json | 105 - .../87b900e7-3bab-4e60-b0ef-349667cb2656.json | 132 + .../4c54b609-0af6-4116-b62f-1c8a4d68f06b.json | 105 - .../c9fd4740-4990-4174-b782-9b63c34d6407.json | 132 + .../2582a049-e940-408b-b2d9-7a7bdf470e49.json | 132 + .../2cc4a013-ff0c-44b0-b2e1-66e103606e12.json | 105 - .../99310118-d2ec-4647-85db-fcc22aee9161.json | 132 + .../a152be8c-a542-4a73-8164-a43e1f04c595.json | 105 - .../bedd12e4-da18-4ca6-ba51-6d13e1c80bae.json | 132 + .../ebfb99cd-9672-4c30-9540-46e4035a0d43.json | 105 - .../6767e14a-bbfa-4a0d-8120-1f48a565474e.json | 132 + .../990d6877-4045-49ef-ae23-f5a6302185d6.json | 105 - .../70260aac-1bbf-4913-9dcc-58633d055314.json | 132 + .../c14766b4-5339-4c6e-87d9-fc2bb953e176.json | 105 - .../0f9eeb32-85fb-4778-8618-436aa4f891ad.json | 105 - .../fba6e1a2-c197-4731-91ea-f6d059ba8b16.json | 132 + .../22e74d0c-70d6-43c5-be4d-62842d93fedf.json | 132 + .../3c08189e-294e-4682-a7e0-e73a8d498fb2.json | 105 - .../43ce0bee-e8ee-417d-be0d-841d6e26b330.json | 105 - .../f7c33065-1da1-4da4-81c7-f2c9307b6e9b.json | 132 + .../2e482de2-60ca-4758-9de8-4482e42a5b7a.json | 105 - .../ecdb4661-426a-46be-aefc-7e04483cebc0.json | 132 + .../236976b3-af46-45ac-a8a5-f5897e3468a1.json | 132 + .../d7d1e48d-86af-4f65-803b-30fff69c78b5.json | 105 - .../fd175296-a5f6-4914-80e9-b8b75bc659de.json | 132 + .../d910bbaa-d55c-4b00-9320-856a8a6713c0.json | 132 + .../99a5f123-5d2e-469b-884e-c9a64c6bc197.json | 132 + .../d0ce5c14-28fa-4fde-901e-6670db6943de.json | 105 - .../467a9428-e85d-489d-be59-91842b389732.json | 105 - .../ed17a715-f0ae-461c-9618-ac952c450ec5.json | 132 + .../3dd2a474-9ea8-4e26-8986-5bcc67c78c39.json | 132 + .../5553fa1d-6bf9-469d-b870-590dd4965209.json | 105 - .../b39e14a6-c05f-4e88-b2d4-63a199aa61a1.json | 132 + .../39893637-552a-48d8-9b83-433415eb26c3.json | 132 + .../38876858-0585-4edb-a4af-e4c71530429c.json | 105 - .../f9549713-f487-4e26-bfeb-ec6d394b7014.json | 132 + .../02579c41-f117-4412-9c00-ee7db3e9ab97.json | 132 + .../211449c7-9b14-4d20-a599-58718e9c5e4b.json | 105 - .../17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json | 105 - .../bfa1d761-00aa-4438-a5de-972d934c63d5.json | 132 + .../20a84d88-05c2-4e02-8c84-2afa84cc659f.json | 132 + .../95b94fcb-7aba-4473-b88f-36dddcd646c1.json | 105 - .../0fa6785d-8db5-40f9-b259-3368ffb547d4.json | 105 - .../84eedce3-3a93-4630-b914-aa281fd2efda.json | 132 + .../7a295af9-fb47-484f-8748-af3ee245d2c5.json | 105 - .../b3b7b62f-ac82-4ef9-9634-afb81645ec19.json | 132 + .../1c020e50-fe68-40c9-a36a-7bec201f409a.json | 105 - .../283c5166-b9c5-4d20-9653-0cd0346d87c1.json | 132 + .../478b54cd-6410-41e5-8a53-4e46bcd9d7af.json | 132 + .../7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json | 105 - .../de2ae7a9-93eb-4149-b3ff-b5b7dfba29c4.json | 132 + .../ef5aa9db-804b-4a53-9c22-9c99f6c69eeb.json | 132 + .../553fd36d-08dd-46a3-ab04-77b9039e7921.json | 132 + .../b0f68843-2f49-4d2a-91ab-ad8d07791125.json | 105 - .../e2bae853-cc0f-456a-a635-98d5f87ac47c.json | 132 + .../e8313b88-13ee-4926-90f8-696b0604c7b9.json | 105 - .../d6c5f196-c97b-4a0a-81b0-59143ec4b10e.json | 132 + .../5d92e02f-b590-4b6b-8c64-30690f79e916.json | 132 + .../ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json | 105 - .../172f121a-3843-4b01-94e1-a95001909bb8.json | 105 - .../e10f38df-b5d5-47c6-924f-563c6f8a6616.json | 132 + .../27257dc9-750c-4673-8865-986434bc5c0e.json | 132 + .../fd23ba4a-a0ce-474b-9aa4-b5295d872028.json | 105 - .../cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json | 105 - .../e599f3f8-e5eb-4bfe-a102-efc5a967434d.json | 132 + .../6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json | 105 - .../8e56f2dd-49d0-4eff-beea-53d01cd96f0e.json | 132 + .../f1a2b5d0-2c8a-4bbc-8bc5-0484485c2dad.json | 132 + .../2c12ee67-0c77-4cb2-9e88-1c731ed55c3f.json | 132 + .../567f8f54-225f-4d9b-be06-f24091adc1e6.json | 132 + .../ebb59730-9522-4c45-8f42-c0d941fd728c.json | 132 + .../2c44fa8c-ebd3-4ea6-8578-61da38965c09.json | 132 + .../3ef26b8c-6bfb-457b-a160-a65c3cc8b0c6.json | 132 + .../0ab721ba-fbda-44ca-a349-1d3abfaabe62.json | 132 + .../2fea1128-4f0c-40d8-be87-72c42c0648fb.json | 132 + .../db9dc9d2-4aa2-43d0-9f2e-15fbd05af62c.json | 132 + .../28399fd0-840c-49d3-8179-407ed83d3bfc.json | 132 + .../d7108c13-e14a-4366-9a39-204f853b1bee.json | 132 + .../56152d05-9273-4701-8c0a-723e2cab618d.json | 132 + .../55d2f23d-cb6c-42d2-8b57-837451d3c6df.json | 132 + .../7479ae87-e795-4e20-848a-291614176def.json | 132 + .../04ceb40e-bde8-487b-9d29-dc8f681af9be.json | 132 + .../e26b00b0-d9df-4ce2-a649-b19f8957b8ce.json | 132 + .../9954194c-69b5-4eb4-8b32-859845548cb0.json | 132 + .../2afbc279-242a-4276-85f0-facd29c2d89b.json | 132 + .../ba307ad4-3647-4785-9bf1-cd4dacf3c71f.json | 132 + .../d03c73ca-7364-4517-aea4-f0ac564c49df.json | 132 + .../1dd4b82a-ca80-4c9c-8800-f97ab2b9cbe7.json | 132 + .../f2363099-c39a-4874-bf77-ccc0fa087680.json | 132 + .../596eeee8-3600-4f8a-8888-978b610eb2ca.json | 132 + .../595ddba1-c450-4b69-85b7-0e3118c8c6c7.json | 132 + .../64890314-bba0-4fb2-8c21-38b413cff4c8.json | 132 + .../470b8b0d-fbaf-408c-a28e-57d1b294f8a8.json | 132 + .../00a1579e-8636-4eca-9a63-c0b067a5f3dc.json | 132 + .../a52cc4c9-6d60-4083-ac77-591e247d86c9.json | 132 + .../ac5c321a-d35a-4e0f-a1be-bcc0b7109f91.json | 132 + .../c4d11b01-ae5b-4198-b102-07160f100a41.json | 132 + .../19405ead-2263-4613-8053-43beeafb4bfc.json | 132 + .../6c698a60-a813-4be7-b55f-b684029b492d.json | 132 + .../b67c4a44-7787-45e2-b88c-5d7e8e496fa3.json | 132 + .../a20a529e-c52e-41b7-a8ee-909167048bfb.json | 132 + .../2735e6f4-839f-4ab1-8ede-3447891b1b26.json | 132 + .../e74e7e7f-8550-4cba-97cd-2626c82d6b29.json | 132 + .../14f4c00d-8915-413d-8e85-79f395127682.json | 132 + .../9119b586-d3b2-4ce0-a243-d584e2087184.json | 132 + .../629f3f1a-f8ee-4d1b-b604-7bbd35c6517b.json | 132 + .../a6ac828c-904b-413a-a5fa-a5ed06a28143.json | 132 + .../251a3ef9-c7ae-4d79-8a60-4bc021a3f001.json | 132 + .../243b045a-8442-41fd-a483-e4e25b771048.json | 105 - .../962b48a3-23d7-4104-b34d-4e5c2af31d58.json | 132 + .../2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json | 105 - .../e4b0be31-6f9a-4a57-b433-e561da9bd827.json | 132 + .../9a31f208-b7d8-4baa-b96e-99926ecb35af.json | 132 + .../318afc06-f294-4253-b1c9-173a7f56083b.json | 105 - .../8d933df1-60cb-471d-bfc3-b11c93150203.json | 132 + .../35315c3a-ec06-433a-b3fa-ae7a4a59b7ea.json | 132 + .../f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json | 105 - .../0cf3db2f-9b23-4602-ac92-265bafd36410.json | 105 - .../3530db9a-0d61-4cf8-9fff-b15f6488c845.json | 132 + .../7d9901e0-eafe-4d49-a5bb-fab059708bcb.json | 132 + .../93f56942-30d8-4a0f-af8d-901fb264436c.json | 105 - .../ee7f9025-bb2c-4902-b8e2-bfac2b63d2fd.json | 132 + .../4414a96e-0664-4531-9c0f-3eb4a062fbe2.json | 105 - .../6157f79e-2673-4ad6-99d7-e5cf5e4e1db2.json | 132 + .../0aa7572c-1aa6-4997-a2a2-3b557fbde639.json | 132 + .../6f5df760-2d3e-47b1-b55e-4031a5f11d41.json | 132 + .../ac676b03-c3ce-4ff1-83fc-5c8db82f1497.json | 132 + .../2229cdf8-3ecb-4f11-8824-9c3bfbf6f968.json | 132 + .../e841483e-042b-4a2a-8dbc-9ed7529f7618.json | 105 - .../95ebc5b8-a541-4fca-9e7c-692720e73362.json | 132 + .../ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json | 105 - .../09a2508d-a171-493f-9ff2-e7f375815c91.json | 132 + .../cf030461-1234-48ce-a025-ba0f52cdf191.json | 105 - .../12a4a921-5859-4fd6-9d64-677a7d8ef696.json | 132 + .../ad87ba77-99a9-463f-aea3-1d29fc0317b0.json | 105 - .../6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json | 105 - .../b79f12d0-cdfc-4c9d-a88b-40612dcbf64d.json | 132 + .../2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json | 105 - .../d162cf7c-3ef4-420f-aab4-789a98b1195a.json | 132 + .../7e49018e-5e2d-4cdb-be5b-2ac04ec84bf5.json | 132 + .../a73461e6-a1f4-43c9-9a0f-f03c9be46276.json | 105 - .../24677f2a-ea89-4289-bcb6-13699de9782f.json | 132 + .../2f5caa38-56e9-4740-baca-22fb02e57150.json | 105 - .../3e09df3c-2224-4a29-8e55-18a485db2b25.json | 132 + .../9836e2c7-30df-421d-bf02-d4434f97d990.json | 105 - .../9a263094-fb31-43b9-9307-6ae5f64f82c0.json | 105 - .../cc0bd236-8fc4-43d3-a18f-4b2afb112946.json | 132 + .../5afd4c0f-b61d-452f-8c48-d298780d91d5.json | 132 + .../6ce53368-e6b5-45a1-a997-ca5468f27c13.json | 105 - .../dbdae48e-5023-453f-b15f-cf779068e030.json | 105 - .../eac52141-4fd8-4e21-9c78-920ab8933e5a.json | 132 + .../31f784e4-bded-48d8-b7a6-7936b5d21d9e.json | 105 - .../8449837f-64ac-4293-b1f8-210e62779202.json | 132 + .../684a3a6e-c74d-456f-b80e-c099b8c9973c.json | 105 - .../ab8a665c-8234-484f-a8a9-8ee79d73edff.json | 132 + .../a954242f-41a6-49d7-a71d-3bfe940cdb92.json | 132 + .../f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json | 105 - .../2a4428d4-a6c9-427c-ba67-72f08b590b8e.json | 105 - .../6d1c518f-3f42-49eb-9208-b30e27e7e87e.json | 132 + .../87931db7-42a4-48df-b5a5-8bd934061dbe.json | 132 + .../dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json | 105 - .../54088dbc-04cc-4b35-b4e1-e495b7cfd47f.json | 132 + .../f91abb9a-6690-4fec-b1a7-f519dfe66d24.json | 105 - .../7129efad-8ab2-4f7a-b6ed-055989b3e131.json | 132 + .../cfc6f85f-e4b6-4164-b7eb-4efb888e1ba5.json | 132 + .../0f053a45-cd79-4e51-9b4c-ae5c51006c17.json | 132 + .../d8002b35-1454-4635-a31e-b419c7000b53.json | 132 + .../4c08530e-d529-49a1-a3fe-2351c422981a.json | 132 + .../d16879dc-7ed7-49c4-aca6-4c9cd3b3a350.json | 132 + .../70656b13-e0a2-4ef4-af43-0d9995d57af6.json | 132 + .../6544f1ca-02a6-4e58-98f0-e19cc6082682.json | 132 + .../5cd3796f-fb31-49c1-a974-019c5c5b20ae.json | 132 + .../49eff9ad-90c9-43b1-a1f5-cf371ac4b39b.json | 132 + .../59720f7e-7e09-483f-8332-8dc7aa19ae78.json | 132 + .../877421ae-8135-485f-805e-489ed70dc886.json | 105 - .../a3a89e4a-0589-4776-a1da-227552482e94.json | 132 + .../b3c04d1f-80e3-4d86-9779-c5e4bbce6f35.json | 132 + .../bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json | 105 - .../3c196d70-44ad-419c-8c4c-80fc7f184687.json | 105 - .../448fda35-bfdc-42ae-90f9-d44383e0a454.json | 132 + .../0d97542e-82b6-4f27-9822-62b67e7690c2.json | 132 + .../80a7b60b-77f7-4dbf-96c8-071c56179fec.json | 105 - .../2725bd69-839d-4427-8e05-0e289fff70de.json | 132 + .../adb71488-adb8-4848-bf1d-aecd04cb6718.json | 132 + .../c7736577-c4c3-4233-9308-a4bb9b2dbb89.json | 132 + .../76fe52f4-9fa5-4ccb-8c92-7bd9eb9886ee.json | 132 + .../1d92e45f-c5a5-4dd6-a61f-8e0f7246117a.json | 132 + .../5e1513f1-4375-4380-85fa-b96a419c013b.json | 132 + .../a18a259d-1795-4848-94fd-3b9c3abfb9da.json | 105 - .../fadbf3b2-283a-4f8e-9acf-463d75924b97.json | 132 + .../1904c811-34ae-4f52-9978-622bc6dd6f2e.json | 105 - .../c04ffe5b-c313-4249-83bb-bbe07ad6fc69.json | 132 + .../a9aa164e-386b-4987-9f49-2dde64ade45c.json | 132 + .../e454276c-3113-49f8-9397-9c1ad5e7bcc5.json | 105 - .../61173be4-9a87-4dfa-812d-b414b4d2bccb.json | 105 - .../e4c1b3ef-e1db-4eca-b818-f3b1680cc5f0.json | 132 + .../1ab95edc-ea3c-4d3f-9f59-dc7f7468adb9.json | 132 + .../2d6ff76b-9d81-45a7-8768-6a240b5395ab.json | 105 - .../3dc51dce-222f-455b-b61a-04904c7fc855.json | 105 - .../80a81bbc-6edf-48b9-afb7-e4e0a03753d8.json | 132 + .../16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json | 105 - .../afb24bf8-3c47-4278-9b84-19b05017745b.json | 132 + .../4f8cda4d-959b-41ab-a79d-d2b35968eb89.json | 132 + .../7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json | 105 - .../2818aa8c-5c73-4de9-bcbe-fd8f68e8bc6b.json | 132 + .../6a683ead-0f3e-449b-9ae1-8afc9f1ab33d.json | 132 + .../99b31db9-55f8-41c2-9eb9-f21511deccf0.json | 105 - .../1bce579e-9fac-46a9-92ef-48080832abbb.json | 105 - .../38cb02a8-862d-40e1-922a-e65f537df87e.json | 132 + .../49fef1c9-bf18-465c-acdb-b8f17e93dbad.json | 105 - .../f816e2a7-2629-4abe-9ed0-3d1299e95194.json | 132 + .../286fae5b-544a-4033-9092-d633fc80f47b.json | 132 + .../ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json | 105 - .../93477bf6-ea00-418b-8a2f-975a9554263e.json | 132 + .../f18c51de-f5eb-4986-8c44-35bd71db5e8b.json | 105 - .../3d7c6576-f99c-4bb3-94fa-4f713e2898f6.json | 132 + .../d1e9a242-941f-4461-b75b-7043c2c01ef7.json | 132 + .../4ffdc303-b5e4-45f0-839c-432f04dc5d57.json | 105 - .../e39661af-ad93-41d7-8892-1230064f1a1c.json | 132 + .../595b61b2-5220-48f6-91a0-3aa0d37c63d8.json | 132 + .../628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json | 105 - .../3173263e-2a42-4e8d-956e-8175ef464e76.json | 132 + .../d04d6474-5784-4492-8347-a2bc03eca6ba.json | 105 - .../6a41fcba-f13d-4839-8a91-ff3f18de5114.json | 105 - .../f77f8291-1573-4fb6-a984-1cc099c09621.json | 132 + .../c4681e14-513c-4e5e-af8c-88ca11849176.json | 132 + .../d5135349-0757-469d-8ad3-80ef56d1f7de.json | 105 - .../0c220edd-2563-4fec-99a4-ef8c210ca5ce.json | 132 + .../2609af14-3cff-4b19-9741-e1caca56f58a.json | 105 - .../3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json | 105 - .../bd7ef5a7-aa75-4eb4-8860-aec63f8bf9d1.json | 132 + .../65ba6556-712c-42cc-817b-ad8c2014dc4c.json | 105 - .../85c20522-03c0-4dac-a1c8-2945e4bf0e0e.json | 132 + .../f180fddd-077f-43f9-b2d9-38c5f33be44d.json | 132 + .../f3a7f01c-2893-4887-a210-d126d9135edf.json | 105 - .../ef384329-8406-4767-ac1a-3eba3131f726.json | 132 + .../2ddeae27-77d3-413c-a6e1-9de0f3980c4e.json | 132 + .../ba1129fd-f158-47ad-b194-7cff794b9ef2.json | 105 - .../19bba814-812c-49c2-acf1-9d056fd7d62d.json | 105 - .../38b2dbbe-be86-4ef0-a39b-89841f662141.json | 132 + .../999a8091-22bd-4c08-bee1-772202e7edde.json | 132 + .../fda91d98-d259-430c-929b-78852cab64ec.json | 132 + .../535bfa4f-ab63-4832-9f17-7b245ff2b2af.json | 132 + .../681a6cc5-5519-4b13-8b50-93adcab4a3f7.json | 132 + .../141dd12c-6901-4a96-a051-f35647ddcc73.json | 132 + .../5b095779-aacc-41f3-9a3f-83f64a1c0d4c.json | 132 + .../7a88c95a-b253-4f36-8fde-1b0158bbf0b6.json | 132 + .../7938a00e-4e11-4223-a900-fa53df168ab7.json | 132 + .../8f966b4e-1baf-445f-9f10-4ba6b47aaf9b.json | 132 + .../247ee47c-e441-4020-97e3-14e3ed8d22c9.json | 105 - .../a334d998-21a5-4108-96e3-9935507a9f8f.json | 132 + .../941e27c6-81da-4ce1-b1c8-544c1426cd11.json | 132 + .../e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json | 105 - .../b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json | 105 - .../e409a374-685b-482d-82e4-2436dca37309.json | 132 + .../84713625-97b6-4fad-982d-41b5c500d73a.json | 132 + .../b7edd9ab-a018-4b2f-9b01-b56cbe98abda.json | 132 + .../ec896115-21ef-4337-9fdd-32a04c574a05.json | 132 + .../a2b990cd-e692-44fc-8b39-ac91eab85cef.json | 105 - .../d8e5f49b-7bf3-41d4-a91e-c566219609f6.json | 132 + .../24684939-5eb8-40b1-99dd-1ebe693680fc.json | 105 - .../ce1a92a3-6bec-410f-ab42-c567c5d23856.json | 132 + .../0a125470-b50f-4ca0-90dc-1f6b69c3ccd4.json | 132 + .../f6a36220-0b31-4b0d-9262-7e0e508e64db.json | 105 - .../0e288116-902d-4fef-9020-a3a4dc80e698.json | 105 - .../aeee0165-ac7e-4da6-8102-ba60f43587de.json | 132 + .../9d135662-43d6-4b05-90cb-5d2c856b0b89.json | 105 - .../b47b8666-2556-45df-ba5b-9a5e94186784.json | 132 + .../0bde5d57-39be-4497-a2a8-d08d3c8d65f4.json | 132 + .../7636a893-1404-4257-9778-653f3cfb601b.json | 105 - .../86599961-3ec2-4837-89a4-809f1dd7226c.json | 132 + .../cde00174-ac52-42da-9641-0866739232e4.json | 105 - .../cef8e01a-071d-4ee4-997b-44679ef5b56e.json | 105 - .../dc3ca25e-41b2-4206-afaa-7d2d10fd27a7.json | 132 + .../26787f2b-8f30-4cc8-b39e-447b8c53aa85.json | 105 - .../cd77d407-3be3-4b84-8a73-34a15744de93.json | 132 + .../1cd20db5-0225-4724-b1f9-7c32eae456e1.json | 132 + .../e94a0550-93fa-448a-a4a4-187fd1b7d24e.json | 105 - .../af17be77-0ae3-4b90-ba85-a4886450cd43.json | 105 - .../dfc45dc3-51e6-454b-aee9-ea6b0714f0ca.json | 132 + .../01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json | 105 - .../3da2a408-672c-47b8-be32-61f56a15e9f3.json | 132 + .../94700c3c-f18d-4f96-a794-65bcf483fca9.json | 132 + .../6f3481d4-076f-45bd-8564-d485109c7a63.json | 132 + .../9f5ca3b2-747a-4fd0-b382-bf7ef503ba25.json | 132 + .../f1932041-263a-4841-9c8b-c6cc9fa50c21.json | 132 + .../141239bb-c7e3-4c38-b289-12cd59f592d2.json | 105 - .../691bef38-bc9e-4f8d-b774-9d7c62eec72b.json | 132 + .../5795f693-9ebc-47c6-9d2c-185dd0d32044.json | 132 + .../349ae5f5-55d0-4486-a6dc-2b5644fac045.json | 105 - .../eb83f474-0d3d-488c-bc0f-93e5d1dfb2f3.json | 132 + .../3c942d2f-0b53-498e-ab05-71d5075cb974.json | 105 - .../f93b2053-11c4-4868-860f-90fbfe8288fc.json | 132 + .../8984fe95-9fd3-48ff-aa5f-18df63ecd6bb.json | 132 + .../fb66b283-bfd6-4437-95b7-d74a0d8d2814.json | 105 - .../a0f6f5de-578c-4290-85b5-c51aed985074.json | 132 + .../8ccc76ff-25c9-4706-b6a8-31b49f8be813.json | 132 + .../1d91cdce-0bdb-4567-9296-6225db3aa0bc.json | 105 - .../924f8b31-506d-4df2-8a7b-d0cd66d55f6d.json | 132 + .../8e7dfd9f-350d-406c-811d-453f1744dd53.json | 132 + .../d1875dfd-05ab-4a49-8c7f-02cddf35a695.json | 105 - .../62b12d95-1da2-407c-8552-8c5e951c5c85.json | 105 - .../b713d1d2-351f-43a1-b77d-27723e1d4267.json | 132 + .../322a9442-174f-4223-b839-6f8f9664d5e5.json | 132 + .../56f36430-4bb1-425d-ac4b-30d85237667c.json | 105 - .../b12e71d1-c435-4172-a28f-38e26791dadb.json | 132 + .../ad33b0e8-39c8-4118-81bd-bc86b482f122.json | 132 + .../53cb44c7-f7bc-40fa-88e7-511b9dfab004.json | 105 - .../db8a7864-293b-45e9-995b-5301071c902d.json | 132 + .../31e3beea-28dc-4b47-a5e9-5fafc89226db.json | 132 + .../49315a95-394f-4508-8e6c-7c1d5547c257.json | 132 + .../c604f0fb-517d-45db-9e1c-6c911bce43e7.json | 105 - .../375d3a94-97af-47ef-82af-afd7581663d4.json | 132 + .../fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json | 105 - .../77cfe896-4aa1-4bcd-a39a-f437c3f7e738.json | 132 + .../ff308837-dc35-4257-a4cd-de463feb733e.json | 105 - .../3d69ec7d-9999-4e16-8dc9-99fad35e156e.json | 132 + .../d8145a39-f1d0-4b6e-958b-a96585eeec9f.json | 105 - .../c005ab13-1d42-4e28-802e-12438aab35a4.json | 105 - .../d2a7459b-8a12-4529-b978-c7237979f16b.json | 132 + .../38dd1b21-b357-4daf-94b3-c4a28809e56c.json | 105 - .../e7a228ad-69de-471a-9f31-6bdc7221999c.json | 132 + .../9196ae39-adb0-4d53-8399-0ccd4d628065.json | 132 + .../f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json | 105 - .../b78ef40e-91b1-401d-9576-1ac2f600b32a.json | 105 - .../ea318f99-a1ab-41ed-ae5d-39c62ac40e1b.json | 132 + .../05f69fd6-a77e-478d-ad86-3e83e615e892.json | 132 + .../6613aff7-8f26-4b74-b08b-37fbd7990e42.json | 105 - .../5b8e9508-befb-4674-bd84-9c722a0864ce.json | 132 + .../86023703-88e2-4219-b38b-4c871e2ee381.json | 105 - .../8beb3730-23e8-4b89-933d-2d3f1a1d1365.json | 132 + .../07417712-1933-4920-8964-67ba74bf6d01.json | 132 + .../eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json | 105 - .../65d32305-4f23-4041-a107-8625822c1322.json | 105 - .../ae4cc05d-a65a-4f18-a99c-f133603686d1.json | 132 + .../195acbac-1db7-47ed-907f-98e312fc8921.json | 105 - .../54df4d3e-0ef0-4e30-aa46-b47a4589a34c.json | 132 + .../349bccfd-1816-4845-a1b9-2d9f4936adea.json | 105 - .../a717d466-9157-4991-8459-f39847d914a2.json | 132 + .../15a8789b-27de-49d1-b3e5-9b1fc9b5694e.json | 132 + .../c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json | 105 - .../921562fe-cc21-4ff3-93de-a62e1d4bf7e7.json | 132 + .../a70222dc-0589-4f09-ac8c-3ff4fa72328f.json | 105 - .../19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json | 105 - .../863969d9-e567-43cc-a0a9-7f80eaba374a.json | 132 + .../2987fa45-363e-4a07-8e9f-db01586a135b.json | 132 + .../e44eddb9-9764-4bc9-be85-ec7995846da0.json | 105 - .../3488de21-d9a6-49e8-ba8f-d9beee9bdabe.json | 132 + .../0cacf042-6b62-4b67-8821-97cd703788d0.json | 132 + .../ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json | 105 - .../9f0dfceb-1332-447a-bf6f-6c6c40686a6f.json | 132 + .../bfffc240-22ab-4cc0-97c8-466ddf472ac4.json | 105 - .../c1308f95-6d55-4ff6-b14e-1bd09b467d99.json | 132 + .../fee7966f-3e1b-43d9-b129-b0c23aac53b5.json | 105 - .../05dc0500-be97-456f-9d12-12192626ea39.json | 105 - .../4ab16120-8d39-4dea-aa76-5c249506848d.json | 132 + .../7cdfef58-c871-4158-b97d-ed843f7d667b.json | 105 - .../f9647ea0-6464-4aa0-b1ea-a994a7bcca3c.json | 132 + .../713b1c64-9637-4d83-aee9-f81988fec0b5.json | 105 - .../c5ef47ab-2e73-43d6-b9ea-1ee7e50d9df8.json | 132 + .../9ef7a4a0-b751-45ff-ab1f-d50687a3f4c3.json | 132 + .../c315527d-ea14-42a8-a002-4bb67c085fc0.json | 105 - .../7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json | 105 - .../8b303795-557b-4fa1-bbc6-d36bd77ee739.json | 132 + .../3dd99496-1274-439f-b7c2-1fd731745753.json | 105 - .../7fec288e-0b0d-45c0-b0e6-17b905cd7ea3.json | 132 + .../5a09783b-82da-43ae-a607-2cfea550d931.json | 132 + .../dc89616f-c86d-41d0-9945-12703dc8f905.json | 105 - .../2d22ab53-547d-41bb-8700-12bc5b16c97d.json | 105 - .../6c2d191a-a2d1-459c-b2e2-5766bec62ce7.json | 132 + .../09bbb732-62d8-4cec-972a-273b728df1f4.json | 105 - .../121cb5fc-2fa2-4718-b325-c40014802e40.json | 132 + .../8bbfa040-b16e-4116-ad3e-b3e4e58a7de6.json | 132 + .../a0dde1eb-a763-4568-8122-1b280dedb2ce.json | 105 - .../c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json | 105 - .../c8891914-c9fb-4b4d-9592-826f04520e7b.json | 132 + .../e77ffcb3-c7d8-4700-b4ea-fe4e5ba94223.json | 132 + .../f626897d-5003-40fa-8020-c100748a847f.json | 105 - .../da237415-f34e-4cbb-9a94-3ff621f3df8d.json | 132 + .../35c401bd-ed12-475e-afbc-e664243d90d5.json | 105 - .../479f3bfa-d614-46a9-88c7-9891852b0d8c.json | 132 + .../a70e7642-3cc7-4719-bc22-68182baa3857.json | 105 - .../f5f0c7da-fb03-4023-81a7-801b0729a19d.json | 132 + .../40f51424-2922-498d-bbbc-d500667a8554.json | 132 + .../ee38e1c3-7a6b-4357-94ac-b309da33d14b.json | 105 - .../4f25d177-6bcf-4864-87a4-1beb21a7373d.json | 132 + .../cfb61ec3-ab7e-4697-892e-a8dd62518f39.json | 105 - .../b160ab1f-be6b-4dfa-8fa9-36fc65a64782.json | 132 + .../c4a79914-b049-436b-9de6-640cc3e119ee.json | 105 - .../9b753075-a150-4bc3-9425-2371010daf8b.json | 105 - .../d497a7e3-11c2-4e0c-8788-091caabede56.json | 132 + .../2b50b73e-9734-4502-b088-8d4936291aaa.json | 105 - .../4a55bcf2-e1c1-4fce-8f79-472dae869b26.json | 132 + .../156424f1-2a1e-4e61-b081-bb066ee3958d.json | 105 - .../5b00dd5e-0ad3-4ea0-aa0d-2327d610e6a6.json | 132 + .../1c80d383-1ccb-4f32-a63d-dd3954fe5f6b.json | 132 + .../e3d7453d-0ba6-4980-be81-827122149bb6.json | 105 - .../75065074-7ef6-41ac-be7c-496cc458640a.json | 132 + .../814ce716-6f61-4980-a8f6-7918c7b0eea5.json | 105 - .../49a0287b-48d7-44db-bf20-a084919d332f.json | 132 + .../ccb33ad4-98f5-4980-a442-1a1772fab792.json | 105 - .../7b2861ee-58f9-4ac9-99ee-2ec663e1b157.json | 132 + .../c50c07fc-b529-43c9-9f3d-0f1ff174b905.json | 105 - .../51f419c6-1107-41c9-896b-fadbbde4f5e9.json | 105 - .../628542f9-fac6-42a7-8ec5-5cd93f977a7e.json | 132 + .../1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json | 105 - .../5b0924ae-cf52-4245-a687-91e4b1742c16.json | 132 + .../459c2b98-c3af-4334-a4bc-13334efe49b8.json | 132 + .../dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json | 105 - .../7533defe-b19d-4571-a403-c443ec03a31b.json | 105 - .../b2780aa3-d299-4180-8441-dd54e94255cb.json | 132 + .../dea8c833-7deb-43f8-9b15-acbadf4fc749.json | 105 - .../f55d398d-0555-4e89-a37c-def04741a0dd.json | 132 + .../63caf8f8-9e55-4ef6-ae76-ee7184a50675.json | 132 + .../fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json | 105 - .../37f890b7-5487-46ea-b61e-d91b5349d078.json | 105 - .../f82ccde3-bd3b-499c-8b8c-182822392cea.json | 132 + .../8a52fb4a-d6ae-4c8d-aed0-2137e0a83ea1.json | 132 + .../b7cbc2fb-2c52-4c13-9266-52103421f2ee.json | 132 + .../cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json | 105 - .../3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json | 105 - .../f4474361-e897-4dbb-a89e-5451a4724474.json | 132 + .../773228d8-7e03-4ba8-87c1-f59ac5aad425.json | 105 - .../de257b5e-4629-4f8a-b08d-d2ca372593e2.json | 132 + .../a37aada3-104a-488a-898f-245ff257de46.json | 132 + .../8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json | 105 - .../d9d655d1-d94c-483a-a3a2-ca196e1391d1.json | 132 + .../6f2d122b-f7fe-448a-ac8b-864314e94692.json | 105 - .../77bf7126-0cb9-43ef-8d23-5f1395f91642.json | 132 + .../73f410be-3084-4994-8406-f8ac70880626.json | 132 + .../db82138b-f915-4451-aa85-8bc4c7fdd225.json | 105 - .../24caad7a-15fa-4820-91cc-0f544a34d173.json | 132 + .../70e3145f-d67b-403d-af2a-1b06b2ba0f24.json | 105 - .../3336c8fa-fcef-4513-946d-9254f537e418.json | 105 - .../e087b221-f813-4688-8d98-17980f98ac5b.json | 132 + .../8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json | 105 - .../f4d03bff-3b34-497f-a17f-0379bc562f11.json | 132 + .../2ca21612-ea90-41f3-b618-3ea81c09c3ae.json | 132 + .../66743ed1-93ab-41f7-9002-0080e7f74722.json | 105 - .../79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json | 105 - .../d4dc2088-9911-4966-afe9-022df89dd522.json | 132 + .../a3ef4bc2-c560-4a62-8227-2bd30120b537.json | 105 - .../ad03a075-8f24-46f6-ae04-5a04eb7061c1.json | 132 + .../2d1da226-e65c-48a0-aabb-46b1cf670a82.json | 132 + .../82826944-e4a1-47bd-b240-c70e21acfc51.json | 105 - .../7fb3a035-2b83-4a58-818f-16fe6d9a8ab3.json | 132 + .../87018726-9f81-47b1-883e-609afea7fb37.json | 132 + .../292b9333-96c7-4fc7-bf35-78bbce9f10d3.json | 132 + .../30942374-a112-4035-a4f2-e30bff57f9ce.json | 105 - .../b44224c3-ed2c-4120-9e2a-e6286358a4da.json | 132 + .../e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json | 105 - .../f7a2c9af-c55c-4307-bfef-1ca709525d82.json | 132 + .../84018db9-2b85-4b6f-beff-b4930b230399.json | 105 - .../d9655f35-edfd-4c53-b359-559870e8019e.json | 132 + .../3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json | 105 - .../afdd962d-652a-4395-92f7-c16dc874a779.json | 132 + .../2594e917-3ebd-428b-8f36-cb0da668695d.json | 132 + .../6d983237-925e-4197-a592-17cca9219bda.json | 105 - .../25a672ed-3e0e-416f-abf4-a935e63171c6.json | 105 - .../91a86644-ad96-4c66-8691-1c0b531b572c.json | 132 + .../331f56ce-5e45-46d8-9143-3f66be20b699.json | 132 + .../9f15293c-5668-4895-b4d0-4062cac344e7.json | 105 - .../6138ebe0-8483-4cfb-8d95-b334bb09e831.json | 132 + .../ae69fb3f-19a1-4b00-9309-8685e107aeba.json | 105 - .../4d16dd47-42d1-4ea6-8f1b-dc50648bceab.json | 132 + .../b0659361-fb53-40db-81a7-2a72771bbd1a.json | 105 - .../8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json | 105 - .../a6b0f2bf-08da-472f-b858-8be967a44cdc.json | 132 + .../4bff88c0-89fb-4d07-a83d-251c7aaeace4.json | 105 - .../57c7553d-f3e5-4a31-8c16-66aae570d8ec.json | 132 + .../58c31bdd-f86f-4fbb-8549-191bb9f46f02.json | 132 + .../2b97259b-d7a5-4934-b350-7b1322964899.json | 105 - .../dd25c1dd-0edf-44ca-b18c-633dbd47368f.json | 132 + .../2a030613-b5f7-4393-ac39-d2d072c913dc.json | 132 + .../e9fa96ff-d790-4948-9071-dd1376701fc1.json | 105 - .../70d25d8c-96e9-45e4-b0d1-684a89278064.json | 105 - .../f8c73290-c400-4f1f-a00a-516592497b0d.json | 132 + .../13e6cad7-a063-4530-bec9-e70e4e98ccc0.json | 105 - .../b31908fc-5e7e-45d6-835f-4e86a05b23fb.json | 132 + .../35443539-9756-466b-a36f-66adc5f68ddb.json | 105 - .../4320cb98-7f9f-4510-bb88-448ce231bae8.json | 132 + .../1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json | 105 - .../28b986d1-2e67-4462-9165-6cb8f260b6c6.json | 132 + .../a9d4b6a9-33af-42a3-be29-d3214a171433.json | 105 - .../fe1e21cb-7934-4022-a74a-777172310021.json | 132 + .../90871638-b828-484d-8822-95ffceb20909.json | 132 + .../04a98dfb-8e96-444c-8df4-ed7cf72a26ea.json | 132 + .../5c04fa63-11be-42d8-8133-4e79e08e42ad.json | 105 - .../4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json | 105 - .../8c5c22af-f230-4d34-b80d-f42ef27e1675.json | 132 + .../4311b63a-282b-4c16-8609-a1d4ab93ace9.json | 105 - .../f3466a90-541b-4a08-a9c6-d5a79b2299b0.json | 132 + .../2755da2c-8347-4bbd-80ee-c58e77a26f5e.json | 105 - .../ef9ee5ae-d92b-4143-af1b-d62a7c3c7fd4.json | 132 + .../859af708-ac37-4749-bc06-73d92338d1f5.json | 132 + .../e274380d-e0f7-47c3-afc3-e603e6cecf9e.json | 132 + .../19810be8-ea81-4db5-9854-1830b05a5732.json | 132 + .../96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json | 105 - .../1258c282-3672-4b42-9d4d-117568e17bf5.json | 132 + .../9b9f6e01-238e-4893-b398-4e1c83c44dfa.json | 132 + .../d6107bde-875e-40f6-8471-3a3507758910.json | 105 - .../b267621b-dbba-4c4a-bb9f-fa85734d0f59.json | 132 + .../7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json | 105 - .../a7e4e787-8e95-48a0-9d50-53ba9f05cd1c.json | 132 + .../3d39dcab-55df-4ad3-bdc8-03ae684e4390.json | 132 + .../1b499881-9edb-4626-a919-977393d6bef1.json | 132 + .../e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json | 105 - .../84b8970c-6c29-4ee1-93b8-c97e4a7c4950.json | 132 + .../2e070663-2622-4a8e-bd39-7f0ef9df399e.json | 132 + .../c14a0d32-1d27-4596-90d4-10a793aef9a2.json | 105 - .../047fa91e-2dc7-4881-8254-3dfbd4a2ff1b.json | 132 + .../6d73016e-078e-4ffe-b2ae-5b829d1456df.json | 132 + .../e3417d3e-7883-45a7-a631-9e5d105788c4.json | 105 - .../0b68b5bd-d22c-4194-9ddf-f22e9181f84d.json | 132 + .../03d51d90-fd15-42b7-ad5f-c7326cc642a7.json | 132 + .../42e3c9e4-bf1a-43ae-87e7-056f735abe03.json | 105 - .../d3e5c939-c53a-49d6-80cd-34420dbb176a.json | 132 + .../ab321358-26f9-4577-a5fb-1f5d4b8784b4.json | 132 + .../daec0873-964e-459e-a1a1-49da96cd17cf.json | 105 - .../a43aae68-f12c-4a6d-b846-c498cf35f6cd.json | 132 + .../6986e9f0-d008-4418-b3cb-1e870cf57e02.json | 105 - .../b84615c0-43c4-49ec-83fe-5d3f8e6026af.json | 132 + .../7e687d24-9e12-4ecf-b283-e222efb9473a.json | 132 + .../4aea143c-28fd-48bb-b911-37ac3fe58220.json | 132 + .../984029c7-f957-4555-8460-dfecd99f44a1.json | 105 - .../34a8daec-bfff-4cf4-9011-0542b30c1d10.json | 132 + .../3e919d7b-53db-41fb-ac93-224e2768b9c6.json | 132 + .../50496313-dc6c-4456-8a8c-15cd8ddbb480.json | 105 - .../66becca1-d92b-409f-ab56-44d05cac66fd.json | 132 + .../6293b269-7c4c-44da-bd85-e51954c173a1.json | 132 + .../d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json | 105 - .../add3b058-e7bc-4b7b-bb98-0d7039979072.json | 132 + .../3e1ebb01-6fbb-498c-af58-022f50247ec9.json | 105 - .../db0b6b3f-e5a9-4367-ab87-e58d5c6ccd81.json | 132 + .../54b055d0-80ae-4bba-b729-bd77b3ec7502.json | 132 + .../1c7bb42e-aa1c-4522-a4b0-bcc460876125.json | 105 - .../5c22d0b3-5082-4c6e-865c-71da03cf9378.json | 132 + .../f8e5ee9f-519d-4ed8-bd2a-88897075f401.json | 132 + .../2f749e28-b845-45ab-a628-8f9b6a9029d9.json | 105 - .../b74c3215-7bd5-42d1-9193-f4c9c6a8bec2.json | 132 + .../27df1e06-463b-4519-87eb-a1666ad3f98c.json | 132 + .../6dd0eebe-ef61-431d-bf7c-c170475bed5f.json | 105 - .../14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json | 105 - .../883755e2-69eb-459b-ae7f-5548914aa65e.json | 105 - .../9d975b05-7bee-462d-a33a-afa0d5af94d4.json | 132 + .../9ef9135a-473e-43a5-a460-fd3ec50226f9.json | 132 + .../c57cae01-328e-447b-8945-e3cd2c4b8a7b.json | 132 + .../494c86cf-7f37-49d8-8160-b81859552c87.json | 132 + .../9744dd76-a8cd-4400-92a7-f10b375710ae.json | 105 - .../6de5e76e-4297-4bcd-b06e-f63fa28da0e0.json | 132 + .../52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json | 105 - .../9b10cd14-82f3-4b36-a4be-5092127d68c3.json | 132 + .../1f3e04ab-9f97-4eda-9d40-669eda073ac3.json | 105 - .../bbd94181-0523-4543-80a7-056b041e03b7.json | 132 + .../e10d8573-e201-460e-a931-49a1b13ceeea.json | 132 + .../c921186d-6e97-46d6-b968-894159271620.json | 105 - .../e2ca9477-2414-4b8a-8d22-68f9ced54ae5.json | 132 + .../831246b8-5433-48e6-ba11-8a4239373106.json | 132 + .../8277994c-8bf5-4ece-9f34-4fe9a4310bbf.json | 132 + .../9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json | 105 - .../5aabc7c5-eb3a-42e0-8b40-0a08004f6e1a.json | 132 + .../9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json | 105 - .../cbb73c83-ad94-4973-9bf5-a5e7ca4d1653.json | 132 + .../3ed06a16-d5fe-43d3-a369-f4ed29fb3a5d.json | 132 + .../f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json | 105 - .../fc817789-2f44-4d2b-b40e-2422fe33d104.json | 132 + .../5e1c8723-7c43-4d8f-8c7c-386c2eb6b9cf.json | 132 + .../7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json | 105 - .../b6740747-19ac-4a9c-892f-6556013ddc8b.json | 132 + .../3263ab46-09ae-4c24-9332-b6874d0d0330.json | 132 + .../f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json | 105 - .../a8706a7e-5693-4768-a955-a448549d2e77.json | 132 + .../3c932329-0440-4799-886f-10bc4a5aeb09.json | 132 + .../c0ca7adb-6221-415f-8ed6-0de6439db168.json | 105 - .../b1e42d9d-827d-4109-8d1b-182694033b21.json | 132 + .../0c6f0d92-3ee0-48d7-b3fc-70149911a51d.json | 132 + .../73b07681-8e10-414e-8922-650908f9cf6a.json | 132 + .../7629f304-5235-485b-a7f6-f5a7f91fd35c.json | 105 - .../81749833-4f2a-4883-a789-c465c11b33b6.json | 105 - .../8b1549f8-0602-4538-842c-abe9dca7baff.json | 132 + .../393c9602-bd87-48d7-ad95-6baf85ed3341.json | 105 - .../ad395ad4-0f9f-4b49-83c9-b89fa6b6dd89.json | 132 + .../14c01681-fbef-49c4-b737-a7baaa02d393.json | 132 + .../64574dc3-4982-49c3-8526-09ebd5781175.json | 105 - .../3ad495c0-da8e-4776-8d05-bc7dce1fe120.json | 132 + .../6ba8109e-8906-420f-a780-d0bef4015e1a.json | 105 - .../0762ca9e-f0d4-408e-9992-e91a10e0e65f.json | 132 + .../ec6c1d05-cea7-445c-bed3-9eee1e1ff03d.json | 132 + .../faa623a7-1bf8-4da6-b381-7701f0446b70.json | 105 - .../1fc39812-77fb-4d0c-b9fb-706e94c40afe.json | 132 + .../fdc3c502-53ad-4bf7-85ce-51eaed72754b.json | 132 + .../3f74c1c7-f349-4193-95cf-b0033112fea0.json | 132 + .../36a803da-83ab-4c49-8855-9344aaa7a68b.json | 132 + .../8ccda2e0-9801-41b0-8491-eb36615860f2.json | 105 - .../9a90826f-9062-48aa-b047-d24f4e0d85ef.json | 105 - .../df986996-249e-49f9-b074-91e8dcdf62e2.json | 132 + .../549f9869-4b59-469b-b9fd-ea26114405a1.json | 105 - .../90f007e9-e323-4a82-b276-ac1b928030ca.json | 132 + .../2b627f93-5cc7-4a5e-b682-d129396362e5.json | 132 + .../91e22241-7b65-44b9-a437-34b56400af7a.json | 105 - .../2fde07ac-d218-4cc6-947e-8ceb87eedbee.json | 132 + .../cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json | 105 - .../2a141bfe-4632-4058-a232-1f2c5540c41f.json | 132 + .../5eddb8a8-7281-4ae2-a4bc-f174598727e3.json | 105 - .../d20e8883-4cde-45dc-9d60-10284a2a5cdb.json | 105 - .../fa2d74a5-e8f6-4a1c-9310-a9b16c2e59d1.json | 132 + .../861d8edd-2acf-4593-9768-8f77488ce8a4.json | 105 - .../c7c0ceff-9273-4cc3-8f8e-bd93181590ba.json | 132 + .../398e665d-af8e-420c-95ce-5f9f4a4988af.json | 105 - .../c439478a-1734-4038-aa8b-bb2d12ec022d.json | 132 + .../4a36f73a-9495-4ea2-863c-220b8ca6bf99.json | 132 + .../faa9d3b9-343a-4a9e-82c5-6bc81bc87b9c.json | 132 + .../a55bf380-d567-4228-b30c-57e9df31e844.json | 132 + .../dfd92311-4f3d-4355-8ccf-a59f29914b8f.json | 132 + .../d98e190e-5b5f-46eb-b701-e32d2dbef3a0.json | 132 + .../32edb764-2a42-4efe-ac86-9eda81942b84.json | 132 + .../36855ebd-2030-4d5d-9c42-ca049244e694.json | 132 + .../76f26fef-fa87-4cf5-a317-ea4b743e7432.json | 105 - .../9651a0a1-4004-42f3-ad8f-2aebb38ec967.json | 132 + .../a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json | 105 - .../93930443-dc12-422f-9920-470917ef8d7d.json | 105 - .../a59e55dc-e2b5-43be-8469-49eee0e98d55.json | 132 + .../7f08546a-3f05-4612-879c-3f293daeabd4.json | 105 - .../a956e306-f184-4dbc-ac7a-3793ae735801.json | 132 + .../b5d64806-0d01-4c99-9ba6-6aff88c894bd.json | 105 - .../c05cc6ce-12fd-491d-b41b-57cc14b6d34a.json | 132 + .../0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json | 105 - .../415875b7-fe10-47e7-aca0-029c2f51c067.json | 132 + .../c505ee64-3d3b-48e2-9c8a-f59609a758e9.json | 132 + .../00003185-c291-40c5-bba1-f87eae0afc08.json | 132 + .../63522d1e-d4bf-4071-a086-5ef016243ec1.json | 105 - .../328f61d7-677b-4a06-b464-0da42153f9ae.json | 132 + .../f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json | 105 - .../369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json | 105 - .../9cb5b8fd-062c-4161-9301-640980d21b9f.json | 132 + .../09284b75-a2f9-40ea-8135-7aa61c626fa2.json | 132 + .../98275290-dbd0-462e-9028-4daa65cd5ce3.json | 105 - .../e2502331-6ac3-43bc-8218-259b44333283.json | 132 + .../8dde454d-aa48-4ee1-b5c6-f3353087d492.json | 132 + .../9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json | 105 - .../662c8ed2-2407-4606-ac1e-ec7ade185d2d.json | 132 + .../332aef8c-7c62-463e-ba3c-07ae0205d457.json | 132 + .../b50b5452-b824-4fd6-b0e4-cdaea09139a2.json | 105 - .../cfdfcf21-e445-430e-a295-946cb8c3fce9.json | 132 + .../db8c1ba2-4029-45c5-b8a6-5343356266eb.json | 105 - .../a5606b92-aa2d-44e3-a92c-47d0b38fef9c.json | 132 + .../465d473c-ef28-4725-8cac-02f2a031b22c.json | 132 + .../9e6c7958-689f-4437-b81a-c055d53ca33e.json | 105 - .../2c636544-8676-4eee-8bcd-d623be0275be.json | 132 + .../4c2150fc-f473-4bdc-8823-960778ccbc75.json | 105 - .../511ac4a5-6fc8-4338-845d-859d73d57678.json | 105 - .../8b332fac-1cfa-498b-853a-52ec5492ddc7.json | 132 + .../2bf1b38b-e90b-4fa8-b19e-47d93ff9ab4e.json | 132 + .../ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json | 105 - .../69bb0243-75b2-4858-ba6b-5e70cfb516a7.json | 132 + .../4bb7e325-8741-4c09-81f6-9efdb30ef5a5.json | 132 + .../da01b31f-dde8-45dd-b793-c8258a09ddee.json | 105 - .../87878b74-22ce-4554-914c-03e486d13de3.json | 132 + .../5030f8d4-f216-4f78-84f1-dd03b0324bb0.json | 132 + .../7763650a-8a37-41f2-aadd-b1db7b41d0b3.json | 105 - .../c5e244fd-e85e-4fbb-9703-b8e733fb91bf.json | 132 + .../e3f05df1-a653-41a0-983a-4a7d86b85c60.json | 105 - .../21472871-fe74-447a-894c-80d77ae4ad0a.json | 105 - .../38261a01-62df-42b2-9b1d-f924598e70ef.json | 132 + .../5736f0b5-3903-4774-a84a-c3db260d36e4.json | 132 + .../6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json | 105 - .../1169b5fd-9418-4986-940a-276d163431c0.json | 105 - .../70134d58-972e-49c9-8cde-4ba2691d3dc3.json | 132 + .../30b98827-5afb-4bfe-b765-9c81cb4580f4.json | 105 - .../d4bb1440-2064-4752-bcb3-c9cec234fd1b.json | 132 + .../d9e6059e-d20b-4465-b7ba-2ee3a72562b6.json | 132 + .../f86649f8-8962-4496-8cd8-fed702a7e63b.json | 105 - .../b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json | 105 - .../f8b02d65-c8a0-43eb-b48e-d1e1f7f363d6.json | 132 + .../7bf23db0-877c-4700-95c8-e35dee5e57b4.json | 132 + .../f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json | 105 - .../07f8351e-c7c6-463f-9e91-ee1d3bb2b35c.json | 132 + .../e82f1a2e-f679-47b8-9fbb-a53116e2195b.json | 105 - .../5115cea0-d3bf-486b-9609-36698e845653.json | 105 - .../8535ffae-f39d-46ed-89bb-a1656885db91.json | 132 + .../57934f76-c8bd-4264-a3b4-14234dda0719.json | 105 - .../5e832121-9a67-44d9-973d-fffdb1b37975.json | 132 + .../03cb237a-0519-449c-b9c7-d9fbb4d119cd.json | 105 - .../92d3f67d-a026-49e3-a440-68c10fb358ae.json | 132 + .../9d0baaef-bd31-4a96-bb2a-e92b62b748d2.json | 132 + .../b1527426-9cc0-4eb5-af52-30e36e0e04fd.json | 105 - .../0cf37c9e-9218-4366-8065-befea0d2b749.json | 105 - .../489e8e84-5e30-46fa-a421-f52308f051e7.json | 132 + .../a208f807-c930-4e81-8ebd-dcbb4db76442.json | 132 + .../cb136400-7d0e-4194-9a45-1646ff8cac95.json | 105 - .../4956539d-a255-4c56-877f-257e463fa3e4.json | 132 + .../ff057dd9-0102-485d-88d7-7e50145b5f7e.json | 105 - .../3451eb65-020c-4e34-9128-7410e6b293cd.json | 132 + .../e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json | 105 - .../7f508bd9-7f95-453d-9e96-747ce91a64b3.json | 105 - .../b5cd0061-e4dd-4049-a51e-b16490e69120.json | 132 + .../c4686af6-0b7b-4df3-9152-14a3ef087b7f.json | 132 + .../f814a3bd-b82e-4769-9ef7-a4670420bca0.json | 105 - .../155885ca-11e7-4cd2-b26c-53e001e2a6f9.json | 132 + .../5d5bda4e-8994-4cef-9772-d4bd435e9644.json | 105 - .../d9ca5411-def6-43b3-a522-595131d8e5e6.json | 132 + .../df1e7d22-c300-4466-92b7-770078a1dc09.json | 105 - .../d8cc8e9e-b672-4b26-a454-f97cd7a08648.json | 105 - .../e54553ab-0897-4cb5-9213-5bb72758d2b5.json | 132 + .../1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json | 105 - .../eed48cdc-18db-4c03-84bf-d2d50e3328b0.json | 132 + .../a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json | 105 - .../d7952aef-37e2-4c15-a1a4-598690773bbb.json | 132 + .../14e1dd44-92f1-4d97-be67-fa98c9802ff1.json | 105 - .../5e1e1376-bb22-4fc9-a1d6-3f2fe7d302b9.json | 132 + .../cfdae559-f3f1-4a78-b4cc-fbfb8bb37b16.json | 132 + .../e3e0180f-bbd8-491a-a41b-54801e9f71de.json | 105 - .../a12208ce-e9e1-4476-8054-0d565efad92c.json | 132 + .../b759686f-082e-44b6-9cf8-44a48f66c136.json | 105 - .../8449b01f-c489-4008-97d4-aa3f0394cda4.json | 105 - .../f46e1eeb-8b8b-4d47-9510-445109b5518b.json | 132 + .../7dc4970f-ce35-4ffa-9052-2ab40abb1e55.json | 132 + .../854baf47-af97-46dd-acfe-a3710976fd57.json | 105 - .../53556d59-3b32-44bc-9932-c52f05939b57.json | 105 - .../823e886a-1431-4078-81a3-4b941983461d.json | 132 + .../583609f0-de5b-43cd-a667-bb2c36679fd2.json | 132 + .../a5e13aa9-bf5f-4201-bc93-504521141f43.json | 105 - .../07f036d7-af59-49a8-8346-8a9a9dd21439.json | 105 - .../2d2cea8b-167e-4d63-b01c-537f372672f9.json | 132 + .../10923a84-a611-4830-b84c-0e91c0628541.json | 105 - .../f584f596-3a17-404a-81a2-3033ad38cad6.json | 132 + .../e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json | 105 - .../ebb0930f-92be-4e1b-a2a6-779f69d2151c.json | 132 + .../2329f6f2-228a-400b-9b2d-4ad6dd278b79.json | 105 - .../b8926567-e208-442e-8ba8-c6dd4ecc5c4a.json | 132 + .../4bf6efe1-81fc-48f6-96ba-8df9ffbef2f2.json | 132 + .../537a91f9-b1f3-49bf-bef7-a9ef8578c284.json | 105 - .../05ffcb7a-2694-4276-bf45-73e1110bc494.json | 132 + .../cd884e16-7e4d-4d17-8bad-5819604e0384.json | 105 - .../9da5b03b-0207-4e98-a5bf-5a658225e78f.json | 105 - .../dc3b944b-a57a-44ab-87ac-8e1882b7bcce.json | 132 + .../154f70b4-d77c-4d1b-b85c-bc81fe8162bd.json | 132 + .../a1593642-8d60-4680-90aa-8c3789d536d6.json | 105 - .../2a4293ca-2434-4752-a08f-163257e0fde4.json | 105 - .../998316d2-389a-4ce0-b0b0-0430c1361de7.json | 132 + .../a66efce1-f6d2-4fad-964b-cc4e80012145.json | 105 - .../ce803cde-6e23-433c-a4d2-38c5cb5ba14b.json | 132 + .../2519485b-47cd-497c-a349-9e69db0266f3.json | 132 + .../cb550de6-4cd6-411e-9426-dc12421404ad.json | 105 - .../56d86e26-4ee6-4652-9b7b-a538238a24d4.json | 132 + .../c2c87be8-4137-4bcc-8cbe-4589d193e94d.json | 105 - .../416b89e4-5e8a-4131-9403-e8967a4127b8.json | 132 + .../dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json | 105 - .../347a90e8-d8b7-4266-8242-ceac865796a0.json | 132 + .../a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json | 105 - .../389f7ab8-b30e-4d0c-b9a4-625e74a1f73f.json | 132 + .../efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json | 105 - .../6ae33b7f-53a1-45c5-8b0b-d462188c3f9d.json | 132 + .../695d7b01-14e6-40e4-b398-541e87a812c8.json | 105 - .../d96fb0b2-7cba-4cc4-a5f4-b8a451754857.json | 132 + .../f3f888bb-5e99-4521-83b2-4e182f492220.json | 105 - .../f8d362f6-eafc-4d11-bc40-d169d69d3a95.json | 132 + .../4bacd3dd-44c2-42d8-98c0-3eeb920dc0f0.json | 132 + .../5cd3794f-990f-4965-9fbc-7faf3216e808.json | 105 - .../688f9751-e261-41c6-a7a4-2dc33a702e09.json | 105 - .../de073f45-0d14-4f8a-9d3b-d4fd961186b8.json | 132 + .../abf448a9-decf-432d-8883-6e1492a7c040.json | 105 - .../fd88d234-b3f9-4f48-896c-af58f1a69880.json | 132 + .../18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json | 105 - .../273745b1-3761-463e-b9ab-7860968064eb.json | 132 + .../101d84d3-e741-4eb2-bd8a-db6c12022fe2.json | 132 + .../258aae52-b934-4ba1-bdb0-e15bd8277234.json | 105 - .../9c82deca-1998-4506-b038-c5dd592324d8.json | 132 + .../cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json | 105 - .../1bf65062-4526-407d-ba4f-866b045dbf3b.json | 105 - .../da620a94-4c0d-4c50-9619-10e12001fb5d.json | 132 + .../51dade8f-34e7-4237-8691-22655249bf76.json | 132 + .../dc7243af-efa9-4169-8d31-36ef75dfe2e3.json | 105 - .../5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json | 105 - .../cdd59385-0a54-4ca1-b24d-9316a70f2875.json | 132 + .../514a3103-e8a1-49e8-b9da-a85963f5b3dd.json | 132 + .../660f8ede-1b7f-4438-8a97-51db77058725.json | 105 - .../135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json | 105 - .../daafaafa-1e00-4433-95f3-91c169598ebd.json | 132 + .../09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json | 105 - .../50e53ad5-8693-44c1-b5c7-45b91d7e0ae4.json | 132 + .../bda5d02f-7973-41a3-8f8e-4e33a12b74e0.json | 132 + .../d730a2be-1cd8-4851-9ecf-55139af1e8f7.json | 105 - .../99ff5ca5-4409-4d9c-9ec0-4cf392afeff2.json | 132 + .../e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json | 105 - .../362f5875-4dbc-4e68-90ce-789f692bb533.json | 132 + .../9c10e944-3955-4478-9d07-f79769d6b884.json | 105 - .../85a94072-ac79-4c14-abaa-9a6424a03ab5.json | 105 - .../fdb5faf6-2cdd-42bb-b154-d6e93b2348bf.json | 132 + .../279b82ae-62b2-4703-85f2-1e79e42366f0.json | 105 - .../93f829b8-b8d9-4389-a210-2a38c3a30edb.json | 132 + .../2c83813a-8254-4765-9367-efb9ad8c5e6c.json | 105 - .../6ec3554d-377b-4bf6-88ef-8a4c9e70f485.json | 132 + .../482fbdd6-6f39-4971-ac65-1e5e181b667f.json | 105 - .../70d749cf-2e92-4847-86de-7964fc8eb990.json | 132 + .../0b1758f7-4aee-40a2-b33e-f519107b6687.json | 105 - .../623f2b04-6cd7-4ea0-8844-badb0ff6c9c6.json | 132 + .../b206b1c9-3469-4b77-b85a-dcd3c6394c67.json | 105 - .../e1aca741-2765-4e47-b6a1-49f3d9532432.json | 132 + .../4f42366e-e6aa-4974-9a40-5781e350616d.json | 132 + .../52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json | 105 - .../4ec2231d-c012-4ad3-830c-8ff86c977202.json | 132 + .../b1b0aac0-2921-44ab-ac1b-873b715e9b52.json | 105 - .../1d2e5513-bd0c-4795-8487-f5266c6e368f.json | 132 + .../977a0388-5c46-42ab-bb93-91f036963f8c.json | 105 - .../104172b7-86f5-410a-a454-63e1cfbeb87f.json | 132 + .../52438151-a1c8-440c-a9be-3670b18c1ef6.json | 105 - .../993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json | 105 - .../d28e04ac-7d18-43fb-80b8-82c0662fec79.json | 132 + .../20bb3819-9d85-4d84-99ba-65e33965f0c5.json | 132 + .../53a6fd3e-37c5-4abc-b387-0ef9f4225760.json | 105 - .../3a4bdf58-0137-4d85-b567-59b3fed3dad5.json | 132 + .../420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json | 105 - .../04f843ba-947c-4732-979c-2aeae7d34e5a.json | 132 + .../e7007251-609e-4c81-86cf-d6fb79c896c2.json | 105 - .../173a31d3-7d12-4ab1-a963-005a81aee767.json | 132 + .../7117b360-ef16-4da9-9226-b66b6aac9703.json | 105 - .../8f41a438-e9b7-43c6-b0b2-447a71ac360f.json | 105 - .../d0555736-b614-43ca-91d7-8264e3566872.json | 132 + .../4b7b13b7-4aee-4462-87e6-aa6c15068236.json | 132 + .../f119b2b5-2303-4772-9ae0-ce8f573f86c3.json | 105 - .../4b1f9ce5-bb12-42e3-b0e0-afaa784b0c4c.json | 132 + .../5d7a3d90-8017-4415-a1da-eb70f6145fe4.json | 105 - .../2ee4584d-b18c-44dd-af63-22c28b92e107.json | 105 - .../acbcd5a5-bcd8-4209-b35f-425feada7e8b.json | 132 + .../cb9a415f-1a02-46ad-a731-bf825ddd78ae.json | 132 + .../92cde6db-47f4-43c6-9ad5-643c35faa226.json | 132 + .../5e88a037-f9bd-4b39-944f-f0781bb7884f.json | 132 + .../ff284b60-0c7c-4825-af77-5922831cb3b8.json | 105 - .../9ff82d83-2a89-48d8-8ad0-91637a77bc76.json | 105 - .../d4b08f5d-5add-49f4-b8db-c1a12e0a5313.json | 132 + .../ac5adf39-f0a4-439b-9873-9141e0a554b1.json | 132 + .../62965c92-cdf4-4a3b-b035-990abaab615c.json | 132 + .../83fa529b-8c61-4017-92a8-ec0f46eb7bba.json | 105 - .../3866ece8-d70a-4061-9e86-0798ecd98bd6.json | 132 + .../1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json | 105 - .../ff484d0e-bb14-4a80-ae29-2351b03cf278.json | 132 + .../06ac1718-fe71-4e05-a47f-1200e067336c.json | 132 + .../af3374c8-5a23-4a87-990b-123803107ed8.json | 105 - .../31fd60ef-db8f-4785-b486-7a06f1cdf981.json | 105 - .../4ddb1616-7889-45ef-96de-823fee338e1d.json | 132 + .../487dd91b-5bc4-4355-90d3-c82ecc789ab3.json | 132 + .../983cf552-1ab1-49ba-aab0-1e644e9a7acb.json | 105 - .../a74e86d9-8b94-4f60-8f0c-73cc4b04d905.json | 132 + .../e8f1d0e1-4086-4645-983b-b9470a22b522.json | 105 - .../98406fba-a2e4-4afd-a121-e33a723d2eb6.json | 105 - .../9a9239ab-9e0e-449b-bd1b-6ec280fad505.json | 132 + .../2c710cd5-75a6-46b7-8356-212da7bf864d.json | 132 + .../377d5240-73b5-48d0-bbdc-0960ad1d9069.json | 132 + .../3a0633f1-070a-416d-a7ab-f41dd44f577d.json | 105 - .../9f31a6da-c5bd-4143-b2f9-715c0e9f7b74.json | 132 + .../104a0157-c614-44cf-b6cc-9f15dab4b187.json | 132 + .../bb379093-c169-44bd-ac86-edb8ab8fc225.json | 132 + .../e29001c0-17c0-4deb-8ca2-ce9ad06d8cb3.json | 132 + .../43d87bf5-2620-4f8e-a8b6-f86fc157d987.json | 132 + .../735d9d75-d9d1-4553-b7cf-f8e7c2e65218.json | 132 + .../0c6dcc87-343c-4973-a589-3e3393829184.json | 132 + .../7c1d1657-e9ae-433f-be9d-523431bfc7ae.json | 132 + .../0b2d9a65-c028-4f4b-a280-dc0c35ac9516.json | 132 + .../21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json | 105 - .../dd1936aa-9b21-466d-b74a-807fafd9f24a.json | 105 - .../e87e1d3f-1476-499d-a9f3-b6463b429262.json | 132 + .../2304646d-a399-40c0-8577-0bab9ad2ff3c.json | 105 - .../246e8450-3c53-4bde-99bb-5663f751e88e.json | 132 + .../496b9e45-2f64-456e-b35e-12a94c5643b1.json | 132 + .../9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json | 105 - .../05890047-a95a-433e-b6b6-fb037592cdd1.json | 132 + .../f7f3caa2-0468-4dfb-a817-bb5cdc977911.json | 105 - .../4a30580c-1d25-49d4-984d-2d28ef3a5656.json | 132 + .../696d7966-d140-4f43-91df-54f02247b34f.json | 132 + .../fdf10ab8-e3f9-49e6-8fd0-ed116868c217.json | 132 + .../9ac16d1f-d894-414d-8a14-110e971d0ba6.json | 132 + .../2eb01e0e-8f7b-4956-9a2d-b32ecaa936f6.json | 132 + .../3b221b0e-6158-471f-bcd2-b09514f28bd7.json | 132 + .../c8af8428-aab6-4d19-b185-2b437c0334fa.json | 132 + .../c617d12b-c37f-47ef-9704-e19774c67aeb.json | 132 + .../577f31e2-1808-45e2-a528-5933019cfa85.json | 132 + .../7bd7f5c8-be9e-473e-be18-03ad22a195ee.json | 132 + .../bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json | 105 - .../5036a549-5583-4775-935a-1a12b6de3e7d.json | 132 + .../7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json | 105 - .../5c0ffff9-542c-424e-88e9-89584e686e12.json | 132 + .../9354b915-68cd-47ca-a1e8-7481a8b33c49.json | 105 - .../5c6a045d-2c90-4938-9185-9c1a0f82903a.json | 132 + .../02480176-2058-4e71-a970-9698be8d235e.json | 132 + .../4be1e5b4-254c-4287-907d-cc845042de37.json | 132 + .../6523a08c-7a43-4784-9650-e1d5144fcfcf.json | 105 - .../21b51852-5cad-414e-92d5-31878f025d67.json | 132 + .../9eb07d4a-1f01-4696-9137-d477ffca43be.json | 132 + .../4236485b-aa92-4bc4-a652-17ed3231ecf4.json | 132 + .../55a6c2c7-d29e-43a2-abd6-435117967a5d.json | 105 - .../9c0d6b71-8c6a-4294-961c-972a002b847f.json | 132 + .../101d8dec-2e39-47d1-b76d-d91d6562feff.json | 105 - .../d1e906d5-8f0d-49c2-88c3-cf71774de600.json | 132 + .../32feb55a-fde5-4bbd-b93e-abffc1a7e573.json | 105 - .../798e4f83-6262-4d5b-a854-6ff114167209.json | 132 + .../29d6834e-38f7-472f-86be-79a8fce03989.json | 105 - .../dd2603d5-e99e-4778-95d0-159c788626cf.json | 132 + .../41c71990-e79d-447f-b082-63c96fd67a1f.json | 132 + .../8282705f-6b69-40c2-825d-8e0c72756083.json | 105 - .../57a36976-0868-462e-ab57-3addef7ea2f9.json | 105 - .../b9e25948-2871-4b6c-933b-8a731e48e81b.json | 132 + .../7c70df74-2bc2-40e0-b0f4-77be1a7e044c.json | 132 + .../e418f7d1-8fd6-44ea-bc33-62fb525589f1.json | 105 - .../c12a519e-9d34-4671-8e98-c69178e08ec0.json | 105 - .../ea71bdd5-3aa1-4d26-9256-5aeb2f79fa8c.json | 132 + .../b0e9c0ca-cd56-42c8-96ed-477884bfd9f9.json | 132 + .../ed5f857e-6799-4729-a2e5-afbea4b89ecd.json | 105 - .../5db5f87b-9bb0-4d29-b578-72bb896f3359.json | 105 - .../7395fcde-49dd-47f4-a8ea-463eda40f5e3.json | 132 + .../1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json | 105 - .../a130087f-566f-4405-b662-1102f1664c49.json | 132 + .../3be58cf3-4761-4459-9f3c-eabf812a3c19.json | 132 + .../875156be-2ff9-4ec4-8085-27f22fb19259.json | 105 - .../dbdd71ad-db5b-4b4b-8856-68b55adbe127.json | 132 + .../1b0bd686-fd26-441f-b280-97b10bb1449c.json | 105 - .../da159a16-48a0-45e3-ad4d-bdc9e8b5288c.json | 132 + .../224b4cbc-e36c-4f68-9918-edbdaf947191.json | 105 - .../77d5f51e-5ad2-42a6-a32c-060cd844b949.json | 132 + .../22ea218a-e3be-4e05-9a94-af716bb3a624.json | 105 - .../724cc582-cc83-474b-9606-70dbc22f3581.json | 132 + .../8a1b2aae-d717-4b49-8ed2-a7ee2cee1940.json | 132 + .../a469604f-f755-46e0-8b1c-db4a365dec34.json | 105 - .../0dfb062d-a6ec-42a6-a9f9-6f6424bbdf0c.json | 132 + .../fa793cb5-5522-4777-8d6f-e4719a51f767.json | 105 - .../ab2512fa-2335-4817-9a76-3259690bbc67.json | 132 + .../de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json | 105 - .../fe7f1442-b7db-42d5-bc83-b8afd1d0c802.json | 132 + .../068a06f4-3fdc-495f-b7e4-0effebe24e42.json | 105 - .../0e14484a-69d7-423e-bf6c-33d0992f408c.json | 132 + .../173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json | 105 - .../881eaa2c-af5f-4e84-8807-d0835c10ebd2.json | 132 + .../6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json | 105 - .../ef8a7079-9d13-42b7-ab2d-b72df5ae5d95.json | 132 + .../a9c4a482-6b02-4cf6-a7d5-3e16334df634.json | 105 - .../db8d3fc4-58f4-4f07-8c27-c73a4a4719fb.json | 132 + .../0c44a429-e705-4794-b702-1a731e52df90.json | 132 + .../57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json | 105 - .../92b3d2c1-61f4-432a-82a7-43b4367f7ef0.json | 132 + .../cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json | 105 - .../5703e81d-055c-459b-8202-80ec382a8d5b.json | 132 + .../f336c7ee-2275-4045-a227-1a7abbaebf63.json | 105 - .../5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json | 105 - .../f6260b6e-52a2-4142-93ba-5393807fa0d4.json | 132 + .../83b84506-4826-48de-a6fe-2af6ae5d425a.json | 132 + .../9bfe838e-a568-4933-b03d-3e9ae6d2026d.json | 105 - .../58bacacb-2936-4685-b0ba-dc8f47f3232a.json | 105 - .../7483e260-9853-4d3f-aa10-187796d96de9.json | 132 + .../5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json | 105 - .../f9925806-4252-44e8-b67e-917737572bd4.json | 132 + .../70470e6c-8d66-4249-b762-a5a2e3589a53.json | 132 + .../a5d0fc39-cac5-409f-8375-636ef97fba8c.json | 105 - .../14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json | 105 - .../d3abfe3c-ebfe-4dfd-b0db-93c14d32c585.json | 132 + .../a35b06bc-d759-421a-94cf-f408a98e9273.json | 132 + .../ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json | 105 - .../bbac659c-7cf8-41d4-98d4-ded4c471bd98.json | 132 + .../fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json | 105 - .../0c73f3a0-0a92-4b1c-abfa-6eb77138dacd.json | 132 + .../af8665b4-d9be-4243-9c8d-0b43e7abd540.json | 105 - .../290206b5-0e46-4f92-a2bd-f2c53ef3d147.json | 105 - .../a7ab6f16-717f-4567-8057-a4a18e1a1e77.json | 132 + .../2abe2c9d-032d-469e-852b-114eca5e84f8.json | 132 + .../78813c35-3eaa-4ae6-9099-bf79efb8b0df.json | 105 - .../1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json | 105 - .../2e8a83dc-c760-4f42-a361-e02cf3a65427.json | 132 + .../57a4ddc6-0447-4840-94bc-5bb136025aab.json | 105 - .../743dfe64-e7cd-493e-817d-8d5fcdc2ea24.json | 132 + .../4e37c90b-65a8-4b71-bfc2-d63541fb8962.json | 132 + .../6fbb6156-196d-4523-900e-35316100d3b9.json | 105 - .../2e34d74e-1b69-4daf-8bee-77e5357fd439.json | 132 + .../38e620aa-c577-4b14-bebd-e98ebcbe48b2.json | 105 - .../0646e2f7-d2e6-42d3-8f09-f8daee302709.json | 132 + .../15b92d44-3d68-4c6a-bddd-5676ebda2e10.json | 105 - .../c66b1ff8-9c04-4f9c-b83e-088f31f79590.json | 132 + .../1bd2affc-9970-4149-b52b-51549b1f0029.json | 132 + .../464bae3d-bd06-4264-a939-59ab8e562ca6.json | 105 - .../a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json | 105 - .../f0479d74-4684-4b41-a63b-16d7fe0e3290.json | 132 + .../1ab70352-9bda-47c8-8bdf-90860934cfc7.json | 105 - .../95deb890-a15d-4c71-8151-ed45c3dfb87f.json | 132 + .../1c07fc4c-a773-4e03-bb14-7144e7815c01.json | 132 + .../7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json | 105 - .../10823e50-9478-4a8a-83cf-5169a0bc1f1f.json | 105 - .../e7e8388e-db3c-4881-b67c-5177c60562b9.json | 132 + .../2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json | 105 - .../c4923208-2a47-45f2-a74a-4483e4b99bee.json | 132 + .../3e1be4f3-478f-4061-9856-f1beb0a749de.json | 105 - .../b5f06a78-5b57-45a5-93be-4f3c1b36f208.json | 132 + .../7beef3ca-6423-4a81-836d-0e4cdc4af973.json | 105 - .../835f19d3-515c-4bc4-ab96-5cb5bece45dc.json | 132 + .../3344d19c-c79b-48b3-be5b-f5f27d6920ce.json | 105 - .../7dd96382-6fc1-4a39-924b-d9034b5b0839.json | 132 + .../77a666a2-a9b2-43cc-8e64-67172f4ab6c8.json | 132 + .../bf9c0bfa-98e5-45b2-8819-0911af81d78f.json | 105 - .../35f89ab6-c6c9-41cd-9296-af4921490c3f.json | 105 - .../e3eae267-46ab-4433-a8f3-2a2f8448299b.json | 132 + .../0368a3ba-e461-45d1-a037-3b9160a8efbb.json | 105 - .../e31308c4-8eb2-4a72-8127-18049d58b814.json | 132 + .../c7098a7a-e865-4ecd-b511-abeb2c0872bd.json | 132 + .../e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json | 105 - .../0392cccb-0a1c-486e-876a-1404f14a1080.json | 105 - .../b3a8c734-e63a-47f7-af2c-a3b6518802fa.json | 132 + .../35937965-2791-4f75-8954-5a2280381c91.json | 132 + .../588b0fce-37cd-41f1-8eaa-50383cdc0f00.json | 105 - .../4ab806fe-738d-4f5b-89e4-004134d2f7fe.json | 132 + .../926fb6ed-0750-4d04-8e3c-da470e236db2.json | 105 - .../a937e27e-b757-4de7-b679-01ac29d8bb22.json | 132 + .../e64503c5-d9ce-4544-8caf-0fec97a2b592.json | 105 - .../0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json | 105 - .../1d906aab-33a6-4ffe-8a63-694482d83d09.json | 132 + .../380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json | 105 - .../9e101298-6482-4ae8-83e4-b948ba8fa550.json | 132 + .../3818710d-80a9-4e7d-90e3-f06afffb71ac.json | 132 + .../3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json | 105 - .../a18ec0c4-6f3f-4904-b69c-e40770df169e.json | 132 + .../f83b7584-0e52-4658-ae15-f295064b9111.json | 105 - .../51368b21-1b48-4c07-9b09-8cae0786200b.json | 105 - .../529c2bd4-6b8e-4e3c-8737-c0b794444d13.json | 132 + .../52b41117-c308-4e8c-9c61-ce8e4faf778f.json | 105 - .../9e994362-a1d1-48f7-9db1-dd9d532b9f35.json | 132 + .../8ae81cea-b179-4025-916a-9bc73755de82.json | 105 - .../cf35b7db-f675-4362-8916-36b0582b64f4.json | 132 + .../79ee7e34-36cd-4024-8978-86c1b059ae5f.json | 132 + .../bf31323b-bfb5-464a-b343-0605dafb5a60.json | 105 - .../9ec4fb99-ed4d-416e-9342-0c036aadd35d.json | 132 + .../e31561ff-779a-4ebe-b6fe-686b2895c53b.json | 105 - .../8788e4fa-04c5-4f7c-bb4e-523287901f71.json | 132 + .../c383684a-2f70-46e9-ab55-4d68903613b3.json | 105 - .../015f91ef-9318-44d6-acb2-17628000c273.json | 105 - .../18097bf4-5149-40e9-9850-558c3f143ed8.json | 132 + .../8e7be46e-af57-4e88-9df5-3161110dfa66.json | 105 - .../b5942721-5c30-4c49-a6e1-fb5419539652.json | 132 + .../6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json | 105 - .../76d27de3-0309-4e4b-8d0d-0e402bde0a31.json | 132 + .../5c0553ff-4910-45a9-aa8d-3a76af098403.json | 132 + .../839ff423-8c5c-4fab-aecf-b535ee06af36.json | 105 - .../9330c290-ee47-4a7d-9b8f-62903402e0e3.json | 105 - .../fd97d1d9-a1b5-429d-b73d-1ea92ae1d61c.json | 132 + .../09670c05-9463-479f-89e3-5029fd5d7ee7.json | 105 - .../f77aa103-5a09-409c-ad72-7992b6049f94.json | 132 + .../0afdaa1d-c1e7-4283-a2b3-f459c09df4a9.json | 132 + .../c6a9173a-bacc-40bd-9572-239f9901e065.json | 105 - .../044ed79b-0c54-4a7a-94ba-a3f999adeb0d.json | 132 + .../c0035841-a312-493e-9c44-a75133e894d1.json | 105 - .../ac6b884d-62ea-4ff5-8eee-cfce08869030.json | 132 + .../f5876dc1-b769-431f-84fe-365d2457902e.json | 105 - .../8ffa696e-adef-4808-ba0e-bb04921a433d.json | 132 + .../077f7956-8c9b-47ef-8c4d-40455bbb0027.json | 105 - .../8a2cfa62-5f13-447e-8d0f-2503e4962ac5.json | 132 + .../4f24fc46-3686-41fa-bf25-a0e39b252cc9.json | 132 + .../f0c306f0-683e-4582-81b7-f0a2c372060f.json | 105 - .../0af9353e-10d5-42e3-8bc9-4c736720ff30.json | 105 - .../b1375cb4-b0d5-4cb4-ad43-394ebd1a481f.json | 132 + .../4ce062da-acfc-4684-95c2-679cbe5a697b.json | 132 + .../e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json | 105 - .../3d785765-befa-4e53-8672-769f7bb87dcd.json | 132 + .../bd038a6c-1241-401d-962d-e033434ba735.json | 105 - .../ab0d3a24-19db-4d00-892e-bcb7c0f2f30f.json | 132 + .../eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json | 105 - .../31f0b186-1805-42ff-86cf-d8455a66d538.json | 132 + .../ed6b3e7e-d294-420d-b9b9-460a52cd0239.json | 132 + .../91dec0c0-9854-4790-a0a5-e17d19636f17.json | 132 + .../599616fb-26c1-47e3-a98b-9ad922a95c08.json | 132 + .../93503cc0-80aa-44b5-9155-c81cd44a9ac9.json | 105 - .../aeee4365-c34d-46b9-8c98-29976010bb62.json | 132 + .../1ec68708-94c9-4561-bb99-7f211d7a9950.json | 132 + .../0b53e7b4-0e91-40a2-911b-cd0d415e9fad.json | 132 + .../91bcd646-fe3d-458b-a426-a6a8863d69a0.json | 132 + .../1315f2ad-2e39-4cab-b09a-c74d0779f895.json | 105 - .../2e0458cc-e092-4770-bd80-00dff169d754.json | 132 + .../d56ef415-0edf-4fde-8277-ae44b4bb4ed2.json | 132 + .../ec8a8e25-f985-40a8-80ff-0c7d7595029d.json | 105 - .../89d117f3-7a67-4e30-82b2-b42efaf44024.json | 105 - .../a0a1beb8-ee9a-4e88-b939-6e0104ed76a7.json | 132 + .../98ea850e-7019-4728-a558-8b1819ec47c2.json | 105 - .../f9b7c3ee-ea8b-42f0-a55a-6171d4e3d0ea.json | 132 + .../2c8c6c6a-ce95-4d11-a33a-d547859fee11.json | 132 + .../47858744-3378-4ed4-9101-8acbc3a53cda.json | 132 + .../2aaeaaa7-89ed-4666-b0a5-8c1320ec4ec5.json | 132 + .../23ae6a72-5a1f-4961-8662-feb4d8ad8a26.json | 132 + .../4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json | 105 - .../312ec315-6175-4f99-8741-97d97eb26b47.json | 132 + .../7869bbe3-fd17-4e6d-9546-94d3df5e83ef.json | 132 + .../68c9fb85-f90e-442f-aa96-458dabe30b39.json | 132 + .../21d5973e-d827-4bd6-b050-346da350a0aa.json | 105 - .../6891d1dd-0e1a-42e8-9206-64a4c71854f9.json | 132 + .../c62eb6b3-2a3d-45bd-acdf-bad717e51766.json | 132 + .../f21e98c1-5535-4cb4-a9f0-541e49aff795.json | 105 - .../55d4a6ae-44e5-4a1b-9509-299fbc6c3a36.json | 132 + .../227e3e19-29d6-414f-b538-9f6f89d47677.json | 132 + .../e922ac2c-e8d0-48f2-99fc-da70c925136c.json | 132 + .../0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json | 105 - .../59f93c1c-3712-4ee2-a3d2-999e5acc2ee5.json | 132 + .../2178eb24-2558-44db-aff1-7903c2e0f657.json | 105 - .../a98dcf1e-6abb-402b-9e0c-da7c23b74bde.json | 132 + .../22c87268-7e49-42b4-9bbb-16a4b305c595.json | 105 - .../a889f561-0d8a-4345-9131-0a897ec215ac.json | 132 + .../6402facc-6258-43a4-a0fd-78e21765c504.json | 132 + .../872cc338-765c-4291-8b50-77b4bce719fd.json | 105 - .../29fbd2e0-e08a-48f4-905e-d2aa54886915.json | 132 + .../8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json | 105 - .../313e0379-d3ea-4f5a-8e06-4b0a94317487.json | 132 + .../f326fbd0-5f92-4324-a587-1f08cf7da208.json | 132 + .../d61310e9-5267-4a87-8e24-ae25172cd64e.json | 132 + .../60953e5e-523d-43c0-ad00-f746308030b1.json | 132 + .../5afd8861-d7cb-45cd-af1b-6db966cb56e0.json | 132 + .../c3972df1-4414-4c71-b473-fb9459cf085b.json | 132 + .../b89d54b7-2329-4608-b9f6-07017e63f1cd.json | 132 + .../f1eed2d5-89ca-4757-a5f9-9a90e811f075.json | 105 - .../50389350-af23-41ba-af46-5ffe338ff9d2.json | 132 + .../818cb0a4-7458-4cee-aca8-7cc72db341f8.json | 105 - .../96454d40-4535-4439-87be-0ea7b55cd88a.json | 105 - .../b8f8f045-2306-43ad-8fa0-6a8bdb494db6.json | 132 + .../7cd59011-75d7-4497-956c-322d5d609c5f.json | 132 + .../be032e7e-39b5-4153-81b9-c29115b231b4.json | 105 - .../0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json | 105 - .../1313d865-9c5b-45d2-ad64-629c65f07f2c.json | 132 + .../0efc2583-bf21-4b60-96cc-716928768eb1.json | 132 + .../be0a2737-19a0-4401-998a-a03663467133.json | 132 + .../1c795b39-a382-4315-8b6b-626423b9ccfe.json | 105 - .../71720e07-2de0-4402-bdfd-102150c61765.json | 132 + .../38c84c69-5cdb-4f24-820d-4b39c5b118ff.json | 132 + .../77871404-f2e3-46f9-8c48-808fb89442cc.json | 105 - .../37534f85-e1ae-482b-89d0-480c4bbc50e7.json | 105 - .../de9d274d-f213-4037-9711-3e9d3dbbcc96.json | 132 + .../51e5f1f2-a43a-4ade-9207-1b15d172ba08.json | 105 - .../92381da4-b9d1-43c4-a5c9-59f375017e11.json | 132 + .../28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json | 105 - .../44ab6a50-027d-47df-a518-5aa944eb2a61.json | 132 + .../2a1947d7-74e0-43d0-931d-b2862348e90a.json | 132 + .../befea823-7dc5-4e69-81e3-e75c4ff117ac.json | 105 - .../3677b71c-387d-4182-b15d-c3525bc7bc36.json | 132 + .../f2dcc214-e25c-4c73-97f0-4e47304df09b.json | 105 - .../6b125a8e-5b53-48ca-8875-926249879f39.json | 132 + .../b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json | 105 - .../13bb7db2-9d89-4dce-950a-14ccfb3492aa.json | 105 - .../af851d4b-69d4-49a9-a160-a180146c3963.json | 132 + .../7aa6ce37-c0e4-48ce-b9db-f158ac47d366.json | 132 + .../1bce093e-27c0-41ad-aad6-b656f6773ed5.json | 132 + .../5c6cffab-ef72-4e12-808c-c26ee8ec6999.json | 132 + .../ff136a9d-7e29-4a44-86be-c69bc115102e.json | 105 - .../63bc0215-741c-48ab-8ce3-d4c036c74a42.json | 105 - .../e288a874-f750-4a90-be07-616094c220cf.json | 132 + .../0607da8d-3f4e-468a-91a6-b975261a87c0.json | 132 + .../5515e597-5f9f-46eb-8d3f-0482bdd69715.json | 105 - .../be2cc2fd-c8e7-4421-b8c8-d3b937272d0d.json | 132 + .../15ffe64e-72fd-4e65-8632-babf137a386d.json | 132 + .../ce1c0d4f-f5a3-49e7-ab77-65ff51bbd0ca.json | 132 + .../b5afab38-13ba-4abd-9d04-a433c41061c5.json | 132 + .../ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json | 105 - .../2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json | 105 - .../a862c2a5-f66b-4d09-ac57-6cbe565f9f35.json | 132 + .../9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json | 105 - .../d8254f6c-8110-44d3-800e-101fc731d779.json | 132 + .../ccbcd5a7-2b98-4d90-ace1-3ad5971a5f18.json | 132 + .../cf34d222-197f-4d3d-9786-fb5c019f2552.json | 105 - .../c208b19b-4ecf-4fad-b931-54f65d4b711b.json | 132 + .../debaf4a0-c734-47ea-bea0-2ddc65dc397d.json | 132 + .../0eeb5962-ccc0-407b-92e6-7cf17c00941f.json | 132 + .../4b60e863-482c-4f91-8cd1-6c993d3c5988.json | 132 + .../a8086735-c7a7-48b5-9219-829e288040f5.json | 105 - .../f5f0bc72-427d-4703-aab1-1bb1bea73895.json | 132 + .../271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json | 105 - .../aae7f543-7b5b-435f-a506-e3ab901a8c5a.json | 132 + .../3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json | 105 - .../6e6ff4c3-3cfd-4790-80c4-544d9cbe47e2.json | 132 + .../3ee76278-89d4-44fb-a449-717534b00161.json | 132 + .../4eed8b1b-591d-403b-96f4-c6db11e8b234.json | 105 - .../a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json | 105 - .../fa2854d3-9e2f-4f79-ac8c-e1cb5a638745.json | 132 + .../9ddaa721-bf3a-416a-9be8-291188793cc9.json | 132 + .../d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json | 105 - .../1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json | 105 - .../d659077d-7261-4c69-862c-d61be21662a2.json | 132 + .../e87ba227-c55e-4666-949d-b45913f8336b.json | 132 + .../077f683a-af6f-4a71-b599-b9b269546b7c.json | 132 + .../26810cc0-541f-4ca5-b76e-f1a63baa61f6.json | 105 - .../54808b08-d10d-4a06-ab60-8d99039311b8.json | 132 + .../138e6fdb-7092-4ee6-be82-7bb86c1fc759.json | 132 + .../fc5be34b-0fad-4fce-9df1-851e4fd3119d.json | 105 - .../1b27423f-62cc-4189-a293-5af84ef1f2c8.json | 132 + .../848ac6f9-2bb5-48fe-821a-83f28da91f92.json | 105 - .../f5468512-d2c7-4486-9d31-bef61225af52.json | 132 + .../fc4971f4-983d-40f9-810a-16ed998c1dad.json | 105 - .../0e0ec1a9-76aa-4d7e-9c0e-946d6b000a6a.json | 132 + .../54093f2d-15c3-465e-b876-5e4027deeb19.json | 105 - .../07b87b98-0d61-4479-937f-7447565b4631.json | 132 + .../aad7ed5c-d51d-46d7-af15-9c0447a02036.json | 105 - .../08cc58ae-b1dc-489c-ba25-338bb11db2ee.json | 105 - .../85b11b91-d686-49e9-8db0-971dd7cafb75.json | 132 + .../21bac032-a092-4afa-8d29-ebdefb3a0650.json | 132 + .../29e3a687-429f-4f33-ae5f-48db85127364.json | 132 + .../2a6af60c-eb46-46ae-8140-d050b48069ae.json | 105 - .../d98493a6-f237-4565-8508-9e4cc3188d2d.json | 132 + .../2def6fbd-7488-4e9f-a822-2405d4f7a315.json | 132 + .../f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json | 105 - .../819143d4-9538-48b9-b7af-128bc15c518a.json | 132 + .../b57a86fa-8994-4004-a79d-d6da64e64b4d.json | 105 - .../c29d47af-a9de-4edb-acac-6763c0d44ca3.json | 132 + .../22bf3fb7-9235-4a57-b8fd-c85b12047b0e.json | 132 + .../2bea7014-460d-470b-918f-468b58d70fd6.json | 132 + .../3927a5dd-002b-441a-b769-ba68547cd5f3.json | 132 + .../1cf0506b-dbdd-4f7e-abf5-d812763a722e.json | 105 - .../476fc734-dedd-4192-aa59-eb2f9dabf16b.json | 132 + .../54a29a68-c69a-4b49-a87a-cb93c459146a.json | 105 - .../817e2fbe-0866-489f-b987-391228a68c53.json | 132 + .../74342d21-8eac-494c-95b9-4df1e828473b.json | 105 - .../f25f5eb1-ff22-4be3-a639-a9d25207078f.json | 132 + .../972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json | 105 - .../f71d1c31-184b-46be-a288-bdc92f0ebe09.json | 132 + .../0d9547b3-7bef-4815-9c44-7d714fe81bbb.json | 132 + .../5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json | 105 - .../22dbc5a2-0ff6-4566-9bfd-e5ce314be597.json | 132 + .../697ad115-9040-42e4-b94b-529ab27011ee.json | 105 - .../afedb249-f1a5-42d6-b6c0-54b2cc303f64.json | 132 + .../f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json | 105 - .../61b1bf5e-6aa4-4e90-af2c-dcf5fc9903f2.json | 132 + .../a4e4a936-5203-4a9d-a698-417cc9da866f.json | 105 - .../302fa968-5d2d-4750-a1e6-c87534c1eafa.json | 105 - .../c0adc04c-1e02-4891-a5a1-1fab0ddf18ca.json | 132 + .../cc57e6f0-ab55-4ab9-983c-63d74632d016.json | 132 + .../d891d79a-1ec2-44e3-83cd-c28739aecd6e.json | 105 - .../0d3c5fdb-c4a5-4436-b9d4-f0f42cb4db96.json | 132 + .../9f32b229-a2d5-409b-98d2-65681616aff4.json | 105 - .../7a93ddc1-8694-4b16-8183-1b7f46dfba92.json | 105 - .../a6ec2934-e9fd-481d-8f00-932603bc6e0a.json | 132 + .../e2553c93-60df-4126-9e64-ecd4a5003389.json | 132 + .../a06dc6ef-5d16-402a-a855-b7feec423aa5.json | 105 - .../e7c2fb42-e82a-4dac-9cc3-a9f41ab54e0f.json | 132 + .../a807ee8c-509e-4b6d-a414-df24444d8a0a.json | 132 + .../e25fa684-c237-4bce-8498-7bdfaac970a9.json | 105 - .../2199024b-7944-4950-8335-32a536efad02.json | 132 + .../df3de449-9abc-4f0a-ba6e-caa48720893a.json | 105 - .../97919c86-6161-4548-95b9-d44263a29f8a.json | 132 + .../fec678b9-c51b-4945-8d4f-f06af6528227.json | 105 - .../11262698-480b-425b-b013-f362fae2f254.json | 105 - .../c40c1a46-2e30-4cf1-bcf3-a316a793fbcd.json | 132 + .../c1294268-b5f5-4d64-b91a-147f58a21a47.json | 132 + .../ccffe03b-c166-48de-8516-8253b2c2f96e.json | 105 - .../2b029e6d-a0b8-4b6c-b62d-144b8dc4f739.json | 132 + .../b926ca6c-60c9-4353-9671-0453b46d0222.json | 132 + .../44db30b4-2010-4f96-a39e-9ccc8568374f.json | 132 + .../07af3512-a045-435e-a965-8daa0836905d.json | 105 - .../2210d673-d417-46be-aeca-de48cd846e01.json | 132 + .../0c5c315f-63c4-427e-a307-1422a197895c.json | 105 - .../892d27cc-dfb3-40c7-ae0f-a7cd06784808.json | 132 + .../49b3f293-721d-4d44-9748-88d1ce275050.json | 132 + .../49e095af-ed90-4e64-b476-4fc62d6e6997.json | 105 - .../70fb41fe-46af-49e3-8270-5882e12f710f.json | 132 + .../d8d05a10-8889-40aa-b56f-365e0a12052c.json | 105 - .../13e2489f-9d96-4f68-8e22-c937604c2145.json | 132 + .../0c386ea0-4706-4a6f-994c-b6ee21dbce92.json | 132 + .../a8d5a193-6c87-4b5b-8ea3-b3ab78e73104.json | 132 + .../4018f4bd-492a-4814-9a7a-1f0c376f2d2e.json | 132 + .../568072cb-118d-41af-bfe8-fa14cb4c7348.json | 132 + .../a6d08766-8c36-41bf-8bbc-acdfdc3f8e23.json | 132 + .../2504fed5-c8a1-4ffc-8ce5-9559aa8c4325.json | 132 + .../3d3598fa-4b23-4ec6-a010-fb20232a5121.json | 105 - .../359dde31-d9dc-4c22-b829-77df652dcc73.json | 132 + .../34a79823-b993-402a-89a7-538e126ee02a.json | 132 + .../d9785857-b164-4d38-8d03-0e03e2d0fbf5.json | 105 - .../eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json | 105 - .../f392c5c3-9bee-4111-9a22-6a1b706fd2ad.json | 132 + .../73bbdd22-4e5f-496b-b39f-290d8e0d2aa4.json | 132 + .../ad99531d-4d52-4175-8ebd-cb172b4577de.json | 105 - .../2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json | 105 - .../72a66eae-9c94-40e3-b3c9-211303e5cba8.json | 132 + .../aa425d3e-e363-46bf-a5fb-cbf524657e85.json | 105 - .../ef7390b5-599b-4354-805b-9486e4ce34fa.json | 132 + .../22ae39ae-883c-43a7-abbe-3213b9035b58.json | 105 - .../57f964c3-0504-4b60-9539-ce0e369816ea.json | 132 + .../4e6c0336-5d94-4417-a194-92a4d6f38481.json | 132 + .../be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json | 105 - .../35512aeb-611a-46a8-849e-442fc3fcc23a.json | 105 - .../fe38dea8-92f4-4fb2-afdf-c5932d7c9e27.json | 132 + .../5ced7497-5a05-40d2-80cb-cae63ca62022.json | 132 + .../52a66aaa-193a-48ca-b693-4dcab811eaa3.json | 132 + .../e0e4bcef-cb73-436b-9353-b18ade293e8b.json | 132 + .../f105fe57-632a-4e3b-bbcb-f063f2e10874.json | 105 - .../1ae45791-7e47-4083-bd72-4530fa26893c.json | 132 + .../b5db7846-f777-4fa8-86e9-f09fdee1dfee.json | 105 - .../b2731f04-a9bd-4e36-a545-85be5b66f5a7.json | 132 + .../24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json | 105 - .../ed6de552-d04b-4d51-8456-610e2cb41d85.json | 132 + .../3e08a589-d2b3-487b-900e-85725522a2e4.json | 132 + .../b2717503-d081-40ee-b1ed-fcadaf239049.json | 132 + .../9915eb01-5c45-42b6-82a3-ad782411642f.json | 132 + .../e4b13fb1-11c0-4696-856f-de393fe2f8b2.json | 105 - .../190eb7ca-46db-4e1d-8b71-9bb20af74ede.json | 132 + .../d1b47391-f36e-4819-8093-5aff774dff94.json | 105 - .../86b9077d-9ec3-411d-84c5-326ba97742c1.json | 132 + .../18bfa50c-20be-4027-8ee7-f6cd1411c882.json | 132 + .../eb1a099a-48c7-412b-b62f-143537c41f06.json | 132 + .../e530a4b7-c2f6-4bad-bab5-2895e950ed63.json | 132 + .../52ad7152-feea-46a6-b2d8-20e1a70514ce.json | 132 + .../a61162a6-ef3e-46f4-8aa2-241547fadea2.json | 132 + .../9f208aef-8544-47c8-bb1f-a3841aff208b.json | 132 + .../da237ab6-df39-460f-9efc-e1649e1ac202.json | 132 + .../c81b3193-9d01-4590-8b72-da97aa3c9dc4.json | 132 + .../1a9ffe50-69ae-48bc-b636-89431391eb37.json | 132 + .../b0c67359-1da0-4f55-aa1c-f54f88038bd7.json | 132 + .../c700798b-583a-41be-94dd-382669bb495f.json | 132 + .../3c0b9735-2ef1-4f27-b94a-f246eb57b73c.json | 132 + .../e8c9501b-c985-4b78-a902-a1a030c72e60.json | 132 + .../df978fce-3373-4073-8c44-d6a83df1d9d1.json | 132 + .../e46ee8d9-81af-4259-8fef-3d3113fb6168.json | 132 + .../aa6ab404-89ef-4336-b811-7c8064e26107.json | 132 + .../a14e6c79-4a78-4c02-a7ca-35e783f32be1.json | 132 + .../b0332107-4b84-4c0a-b488-187fb3d534ae.json | 105 - .../787cc582-61da-4afd-bfac-431377809fd9.json | 105 - .../ba1fb85b-bbc0-46ac-95d7-e61b91f65c2b.json | 132 + .../5b614673-6566-4b82-bf7c-13268ebb1577.json | 105 - .../f6312fc7-c7a8-45dc-a57c-91f56b4ca28a.json | 132 + .../28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json | 105 - .../335f5c32-f3f0-4a16-8c9d-8f07b2aae54a.json | 132 + .../035c5e35-0ebe-4e91-a598-8d01688462a3.json | 105 - .../b7c7a907-7ecc-4d5b-bc6f-8b8d82954b21.json | 132 + .../112f01a2-f0fb-4257-86bf-61c9a184eb92.json | 132 + .../2d9410d6-7162-4811-bf7d-9de2c2b48fd2.json | 132 + .../16ff8fa3-4676-473c-99ad-908ddb59d8ed.json | 132 + .../9b153ac9-f95b-419b-b7f9-beccd769ddad.json | 132 + .../8a5df3c2-eb71-4e12-b013-fb43685f2916.json | 132 + .../8ddec5bb-ab90-4c98-8482-a412e7735246.json | 105 - .../35fa3213-5c08-4b19-ae76-237fdd25444e.json | 132 + .../ab4f785b-779f-423b-9905-31a3b66dfeff.json | 105 - .../242ce55f-1471-435e-bcd7-d28b5fc87fc4.json | 132 + .../f9d2286c-ed89-4c23-b6a2-c623373331cd.json | 105 - .../8c4ff628-41b6-4769-a33e-b1dbffa913cf.json | 105 - .../95f509f2-5e67-404a-968d-f7488d684e32.json | 132 + .../5f9a01b0-632a-4ee4-aedc-279002c7496c.json | 105 - .../bcbcdfe9-0663-417c-9a29-60906e63db8f.json | 132 + .../2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json | 105 - .../d95a7493-2f99-4c10-8067-711c7388af7d.json | 132 + .../06eb233f-5182-4b9e-be3f-21c928eef397.json | 105 - .../789848a0-6d8a-4583-93c3-a72df74d0071.json | 132 + .../14af87df-0fc5-46e1-9d0b-c25c8b6a7ce7.json | 132 + .../e897d1fc-2c71-4c61-971b-eeddfae1b75c.json | 105 - .../18a12670-8785-44ef-a365-78ce797b8ba5.json | 105 - .../379f559f-9bfa-444f-b477-562c25b4c299.json | 132 + .../effb6a3d-c98f-4c3a-be77-902c61cda21b.json | 132 + .../6c1c1405-afa4-412d-ba1f-49dc1cac4509.json | 132 + .../6f4ed7c2-c775-4fd2-8600-4cea523f53e4.json | 132 + .../5fd5206b-186a-43b9-a4f4-07e75aa0293a.json | 132 + .../b707ecbf-0658-4226-803d-53456d16d54b.json | 132 + .../dca1ee57-5e86-4532-a2f3-ac6a619ca576.json | 132 + .../1233476a-7839-4a22-a7ca-1d0f237d8888.json | 132 + .../5c4bdeca-5ef8-4002-8f82-67d49b5ff722.json | 132 + .../18f5fd6c-2b79-4d48-b7e9-18845db16271.json | 132 + .../a9039374-fa5a-4b8b-800f-5f4651cf812d.json | 132 + .../3f9704b4-bf25-40da-b6dc-b927c3569f40.json | 132 + .../a8f858d8-a792-409f-b79d-948a19e2aa87.json | 132 + .../e582afbb-99f3-4b43-8ee7-b786680124a9.json | 105 - .../5c34a168-b8cf-436b-a3b7-a2d1feadffb9.json | 132 + .../77092cfe-9820-45e8-94c5-31d27f1daa7c.json | 132 + .../cab8fed8-de68-4fa5-b4fc-d9483fc56571.json | 132 + .../a8103350-b208-4856-8e7b-8ea8918ba0d1.json | 132 + .../e849c03c-c569-4059-8fc5-6a98cf391342.json | 132 + .../f1d8bffa-61fc-47d5-85cf-48cebcb31af5.json | 132 + .../97bdb352-2e9d-4cc5-8b70-55348ef3a217.json | 132 + .../78053a33-24c8-4e9f-8791-f127f21eec1c.json | 132 + .../03082966-87ba-4560-a784-5d8677003500.json | 132 + .../97f26b20-db66-4a30-ba2a-c18a31081271.json | 132 + .../85f9ccda-8c47-4fa1-9d47-e9da4730b077.json | 132 + .../2a57d6f4-643b-4b30-8d67-03032d454887.json | 132 + .../d333f360-c1c3-4916-8480-4a1fc490875a.json | 132 + .../37a41261-a7b0-44b2-916f-770cdfa0ad39.json | 132 + .../c46cd6cc-b56d-44c5-a03c-b49381ba3462.json | 132 + .../612b6226-c25d-42e0-bcd7-be7faa844530.json | 132 + .../2fc7a4d6-88e0-4f11-9110-dc53942870a4.json | 132 + .../34665752-58d8-48ee-81a6-f1a068c23026.json | 132 + .../cc0767b5-4aaa-4418-8f68-72a721323e9c.json | 132 + .../ea507a41-1654-4515-94cc-ce2e38800c61.json | 132 + .../c44e773f-4cca-4780-bdd4-f486e65c18e0.json | 132 + .../f8a46bda-d53b-484e-8832-7939f7d0762d.json | 132 + .../c3968a2d-4a9a-4f62-8bea-a3b4b6dcd378.json | 132 + .../d6a9abee-29ee-44e0-802c-c3e4354ebbac.json | 105 - .../da18242c-d6bb-4a0a-a2f9-2e42099f4e8a.json | 132 + .../87231cbd-d911-434d-991b-1eb373cdde4f.json | 105 - .../ac078124-85d9-4715-bf7c-1428b1063732.json | 132 + .../9c1dcd75-8491-4890-ac6f-000868099a3e.json | 132 + .../e80773ef-5ca2-43de-ba99-a7a997aab7f0.json | 105 - .../7850fc57-49c7-4124-b7c6-e1e7bb2bc726.json | 132 + .../8f38374e-f373-4639-9278-24441ebd0325.json | 132 + .../c007938e-3427-4896-8493-1500abdfbd2b.json | 132 + .../f3f55015-88c7-41ae-b588-9a1eedd56fc2.json | 105 - .../df81dc0d-6c72-49e9-862b-02e9b6642cb6.json | 132 + .../f904e587-76ac-4583-9235-fcdd20d9a626.json | 105 - .../46c96d8e-568c-48f8-a74b-9dd4b4195037.json | 132 + .../de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json | 105 - .../1f4f7181-8a81-49f4-9e81-925d5d69a37c.json | 132 + .../45c46c5d-cf81-42d4-bf9e-61aca49b2959.json | 105 - .../3ea343b6-93f6-4c61-a164-3db95d13cbdf.json | 132 + .../68382b86-8a68-428e-8338-144a76b8c293.json | 105 - .../a9ea8bb5-05fc-4da3-8e00-f53ab8ea6af5.json | 132 + .../c0fe65df-7e51-48ad-bf40-fd163804cad1.json | 105 - .../0ea74ce5-43c9-43eb-92bc-3d928062d9e0.json | 132 + .../d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json | 105 - .../6896faa7-7204-4091-8f4e-9cc0b53d673a.json | 132 + .../70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json | 105 - .../6021f954-951a-47e1-980d-ce729f9f39b4.json | 105 - .../88064453-fd8c-4bd9-adf1-39f43972bec1.json | 132 + .../a18ade45-acba-4059-b969-445e529a82e2.json | 132 + .../e027a39b-1213-42aa-b66f-b1853c644532.json | 105 - .../4264c0fc-9f40-4c27-b877-63a751678a1c.json | 105 - .../6c0e4132-71e7-44af-95fc-83b0a6be2a82.json | 132 + .../46564b0a-1489-4c98-9e7b-20daf58c2f87.json | 105 - .../5d9ab422-4f4f-460d-bd39-51266b43d7e5.json | 132 + .../c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json | 105 - .../cda03c45-0782-40cc-a17d-67d808657b83.json | 132 + .../50f5451b-41c4-4ba5-8bee-ee8a2deb7e79.json | 132 + .../ee8952db-9f0a-4892-bff9-4d2ca1b66364.json | 105 - .../7a5fdffa-146b-43fd-a979-728c37ae599f.json | 105 - .../cf758994-6e94-434d-bf68-74cca188b5e8.json | 132 + .../611f9549-0788-44e9-8125-18df06cd80d6.json | 132 + .../4ad4a260-770a-4cce-9ba7-546cfa4cde58.json | 105 - .../59cf23ba-027d-4bac-a0e1-526376396b4d.json | 132 + .../1f02bbd3-ddaf-4db6-b7f8-31bad8ffac66.json | 132 + .../1e737e28-d926-43e8-9e4c-e39fa91d7977.json | 132 + .../43ef8eee-5d8a-47e7-ac71-1a898421370a.json | 132 + .../750b35ad-fdf6-4243-91e7-aee90f84fa5b.json | 105 - .../856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json | 105 - .../d8d03c71-942f-4aff-8a5e-5c265c639b44.json | 132 + .../96262938-1146-4993-92a1-a2ddb2519f8a.json | 132 + .../292d7cfb-3e3c-47d8-8cca-33507f9ff081.json | 132 + .../e6d8d952-5a3d-4a97-860c-8275b10c6516.json | 105 - .../3f29c10f-57ef-435b-85df-2cae30ae72fa.json | 132 + .../d7f022fe-86cb-4e4e-a672-62c2dc8cffd3.json | 132 + .../baa35c90-c494-4dff-af28-cb549e40bed8.json | 132 + .../2fdc3186-6791-4550-ac4f-a1a5a5a1d514.json | 132 + .../f687df8b-42b5-4d94-b741-1b516d9221b2.json | 132 + .../c3a8a952-6869-4eee-a59f-4ae33ac72986.json | 132 + .../a7a74117-71e4-49b2-bd65-add82c9165d8.json | 132 + .../04ee694c-0c89-4f25-b10f-315a24743ba2.json | 132 + .../47fd4acb-acc3-4f12-8af5-c425d3754c38.json | 132 + .../e19577f5-d1ba-45ad-8500-d18ae2b14440.json | 132 + .../0e9ed58c-1a3e-49b4-8013-994642a95920.json | 105 - .../e86443cd-453b-4ca0-8e7e-054764fe4bb9.json | 132 + .../24cd9977-f3fb-4619-aea1-59e1a36b2a5e.json | 132 + .../6bf4063b-44aa-4809-a400-5406abe5eb2e.json | 105 - .../1401f0d9-6f4c-41d2-819f-eb9487c5c1e6.json | 132 + .../2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json | 105 - .../4b1f2aab-ef92-4231-9bdd-96918b26914c.json | 132 + .../4956e127-14a1-405e-a0e0-76fe94ea727b.json | 132 + .../90fb6e40-88f7-4ce2-ae99-308d87e69718.json | 132 + .../cdad0f08-1c60-4493-bed0-9733894b367a.json | 132 + .../e0b9044d-1b87-44f7-b59b-88d790f429e5.json | 105 - .../8e83b4f7-736f-4e03-8256-2a1fc421b04f.json | 132 + .../a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json | 105 - .../962b4977-63f0-4a87-a36e-f3e592b74761.json | 105 - .../f0d6639d-8485-4bcd-b069-046a747dfbfa.json | 132 + .../ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json | 105 - .../d1fe36ba-04f8-4110-8c39-81d393c4cbfc.json | 132 + .../5a8ab5fb-ec1e-490c-b643-e3b9d49f5d34.json | 132 + .../84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json | 105 - .../aed1ac03-5364-477e-ab8f-68b599170128.json | 105 - .../de944f89-d2d4-4b01-b4b5-e7cbd1d8d1ae.json | 132 + .../19b4d65c-39c7-4b81-bb71-f166ab4f9490.json | 105 - .../db96601a-2f7f-438f-915b-55fee0e0d1d1.json | 132 + .../27912f7d-7033-4b7c-b93a-af1673ce4a9b.json | 132 + .../b5707c22-a2a2-4787-a902-b72945ebccd9.json | 105 - .../da58a484-4a45-4a70-a651-031ada8023d5.json | 132 + .../ddd32642-ed7a-41b8-974a-f85b7f04d0db.json | 105 - .../e8bd221d-8a89-4e3c-8815-0bff27574053.json | 132 + .../ffc21c2a-59fb-4ad8-88a4-930879b6eba0.json | 132 + .../1e506afa-0d08-45d6-9242-b06104aa67e8.json | 132 + .../7d66bb93-cb2f-4be6-b133-1f0325be58e1.json | 132 + .../e1462a5a-d120-4c0f-ba13-fbecb18619a0.json | 105 - .../936f3c5f-7817-4118-96c8-e4061d4560fb.json | 132 + .../7d36ceed-2a1b-4b20-88ae-0a609cc161e9.json | 132 + .../a28de361-e90d-44f7-b609-e4d64ae1be6f.json | 105 - .../77cace56-503f-4531-a4eb-0178a68cc283.json | 132 + .../9e49b710-2413-42f3-8943-bc9dbf68cb3c.json | 132 + .../108ead60-3cee-43e7-925a-619bace5b65f.json | 105 - .../9a5b3564-97df-4661-a171-37322386ac4d.json | 132 + .../0fc0450d-cdf1-44b5-a809-202d1dd6b5e3.json | 132 + .../7f06c78c-f95e-4e50-aa57-da0579adcdae.json | 132 + .../fbedd898-b839-49c1-bd6d-3a8744d4138a.json | 105 - .../06e55e47-9995-4fa2-877a-c728e9f9f1a1.json | 132 + .../39af1e0a-d1e3-4372-bc18-d07f3dff09f0.json | 132 + .../7a6d7a66-5772-4793-9597-ef0225b63f30.json | 105 - .../f32d59d6-8ab9-4b7d-ad9d-f62ce6d559bd.json | 132 + .../7ddc3aef-c6c5-4d04-8473-3b3bba219d7f.json | 132 + .../ce80ac07-22d2-4883-ac6c-40b080e00b81.json | 132 + .../cbece170-f872-485f-a6c2-5db17ced73bc.json | 132 + .../ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json | 105 - .../c1fd751b-c6c3-4350-9618-f4b4840e1b69.json | 132 + .../bfd28b91-3a72-4417-b52b-804d2cbae12f.json | 132 + .../32c26cbc-3697-47a6-bd12-18187df9dda9.json | 132 + .../e9546f28-0f6b-449e-a2b3-c6ab262103cc.json | 105 - .../02280b9f-bc01-4e44-9d09-1e4ae8c0438b.json | 132 + .../350b0559-6331-4b8b-82e2-0463baea9d8a.json | 105 - .../831b6f81-1552-4a7b-acac-eb927001e440.json | 105 - .../a57d2d49-5ccf-48f5-8035-b1d480c80f40.json | 132 + .../2c4626c7-3016-4641-9862-0ba4f7f7936c.json | 105 - .../6b5a3c69-f8dd-4952-96fc-b6e4dec1ed9d.json | 132 + .../a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json | 105 - .../fe0665dd-b976-4d90-b16b-6c2acfef15ff.json | 132 + .../8c6bdc44-fd29-45e7-b161-2c8e07ef2935.json | 132 + .../e7c70ff9-59ad-4d09-8af0-ef9cf16d1dfa.json | 132 + .../26c4c993-ae49-42a0-be0a-f157be9f7d58.json | 132 + .../19adf124-c120-4e97-80cf-49c40a66eb81.json | 132 + .../66bc5d38-8d25-4934-bce8-41ce4ea0e385.json | 132 + .../541eafe5-807e-44b0-b652-a0752210fc71.json | 132 + .../845a2484-9f17-4c0e-b06b-6250992298bc.json | 132 + .../e62b6b26-5f3c-42c9-9541-bb8b23caee66.json | 132 + .../21ba6052-9614-454e-999d-ef4f0f693c6c.json | 105 - .../745bd077-3a0f-4c06-8d19-d7c160512446.json | 105 - .../705ae322-fed9-4a98-a79e-e0b289065ba9.json | 105 - .../7888b813-8ef1-4367-8168-edd1bd3c7888.json | 105 - .../19ff3120-2171-48b3-8db6-1c76bb57cf47.json | 105 - .../ee2c5dd9-09db-45fa-8e67-961993d30672.json | 105 - .../1a2d8396-4ff1-4386-a76b-d4863c7736c5.json | 105 - .../4f7f368f-0646-4c16-80de-69d9c5e28193.json | 105 - .../dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json | 105 - .../3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json | 105 - .../62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json | 105 - .../62b4c918-b33b-40cf-888b-42b116a9e04d.json | 105 - .../3bf71784-e6f1-405b-ad23-e74a91df7051.json | 105 - .../2121d736-eec6-4a86-bae0-cd032f9eb603.json | 105 - .../2a633e8b-b35a-4a26-83bb-b471bab18ed2.json | 105 - .../46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json | 105 - .../4429613e-2db7-4061-931f-eaa70d202b71.json | 105 - .../782219f0-25f7-465b-9f86-5e48c9d4703e.json | 105 - .../7abe4912-4e21-4774-8011-482603f7bcc0.json | 105 - .../63a1000f-1de8-42ef-a905-70b78bf46417.json | 105 - .../6966d397-d336-455a-a156-c2e6430c813f.json | 105 - .../15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json | 105 - .../7b125482-fd80-4f71-b398-9421333ee736.json | 105 - .../36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json | 105 - .../8412921a-ad8c-4106-a3a1-9259d2ddb074.json | 105 - .../5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json | 105 - .../d6700ad3-d858-4420-96b1-d690984ebcaa.json | 105 - .../7c4a43f8-be43-44d7-a514-f02b70ec367c.json | 105 - .../5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json | 105 - .../f269bb45-d627-49b9-953b-5c8591433aa7.json | 105 - .../c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json | 105 - .../df6199fa-3797-4b88-b5fc-e429f513932b.json | 105 - .../b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json | 105 - .../efd5d269-fc83-43f0-9054-dc3bdf40f180.json | 105 - .../8359ce66-d904-4092-92be-5e2dbb372677.json | 105 - .../6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json | 105 - .../a4f5037a-381b-4726-b90d-ba559058772c.json | 105 - .../852ffa19-285b-4037-ac60-63f24cafcecb.json | 105 - .../4fba9290-886e-490d-aaeb-068f8c679006.json | 105 - .../44823eb6-717b-4508-a745-7821545dd3c2.json | 105 - .../e2621a1f-af39-48fe-a56b-18e9b396a476.json | 105 - .../6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json | 105 - .../b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json | 105 - .../f4505219-fc0d-4f7b-ad71-3c9fef064c28.json | 105 - .../49eccc70-6321-451b-87e9-29907cfb53a0.json | 105 - .../4857c00b-e4fb-417a-8b63-a5b7e9298b40.json | 105 - .../c5330fb2-e914-4170-81f8-77a317ba557c.json | 105 - .../4b7dd9db-5e94-4885-96f8-189af8d97c09.json | 105 - .../78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json | 105 - .../d65793ba-f363-4665-9ff5-1ac08e819d55.json | 105 - .../c142222c-836d-493f-a9f8-857426e0573c.json | 105 - .../6669c8b8-91d6-4f14-8cfb-a6422352850d.json | 105 - .../78ec8596-ee15-4e94-8bc8-77c6bdffc541.json | 105 - .../f9cac378-3bdb-4c66-8193-502773c5c5eb.json | 105 - .../04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json | 105 - .../041f6e95-b7d1-44c6-a995-0c8257e188aa.json | 105 - .../b36b915f-3c4a-40e8-ab78-8442dbe116e1.json | 105 - .../3ba36700-5019-4525-bf5e-6a87cce7ecc5.json | 105 - .../9e315ba7-3eea-4934-822e-461e64bf8551.json | 105 - .../777b5587-70b2-472f-a6e4-820d653669cd.json | 105 - .../a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json | 105 - .../a0730f18-1058-44b4-b6b6-0881ae2e6338.json | 105 - .../6e852e78-e666-413e-ac29-ad374bbc74f2.json | 105 - .../07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json | 105 - .../0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json | 105 - .../301f71c8-fc1f-42e8-9029-f9d03574872b.json | 105 - .../65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json | 105 - .../062a1dcd-2553-4657-8f89-a481ff62a193.json | 105 - .../82b47608-08b5-4368-bead-aa117736c06d.json | 105 - .../747310d0-7c30-4261-b2e8-a783d8753e9a.json | 105 - .../a7b6a07a-70fc-4d34-9a92-265b848d22d7.json | 105 - .../99139c71-a4f2-45d7-95b8-a8b7720681aa.json | 105 - .../6407040d-023d-476a-ac79-ef85e104eace.json | 105 - .../64f71756-0a54-4a42-a96a-7056071c7dd0.json | 105 - .../8c18d418-a0a4-435a-b31f-7d879c793b4c.json | 105 - .../75e153a7-d699-4822-90b6-9d7da259e124.json | 105 - .../836cc2ab-edbc-45fa-af8c-034d0239635b.json | 105 - .../f270e1bd-7e75-4c6c-a701-9def96275025.json | 105 - .../02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json | 105 - .../9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json | 105 - .../c3a945da-be07-4132-b558-f20202530b4d.json | 105 - .../723afa16-d986-421c-a6ec-d1b00cb9d765.json | 105 - .../03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json | 105 - .../6992c085-939e-48b0-8c8f-53d6ca9737de.json | 105 - .../59e7ed2b-8385-4c83-b357-6dfa52e429cc.json | 105 - .../495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json | 105 - .../6c5809dc-67b3-4567-8d1f-4a8104a11507.json | 105 - .../44c78761-2672-49c4-85f4-9b0d575dd914.json | 105 - .../b33d4765-4633-4c2b-a118-1ed82b0c842b.json | 105 - .../8d200434-ef84-403e-9fb6-86c15c4ccfed.json | 105 - .../3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json | 105 - .../7fbad2de-a9da-4962-ae18-47298811ba5b.json | 105 - .../1fad00cf-e472-42dc-8b87-a0501cb051ab.json | 105 - .../c68fad94-ce6a-4053-b991-2c1e660fe7d9.json | 105 - .../a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json | 105 - .../e3471a51-fad2-44cf-bd0c-ad1250d22f83.json | 105 - .../5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json | 105 - .../fc83f198-e606-4c3d-aede-cb646b080b3b.json | 105 - .../e0452e02-8cf3-4da6-83f6-844f1de6fac2.json | 105 - .../0792bedd-3891-4622-983b-886c126ace68.json | 105 - .../31e52020-32b2-4271-89b5-31dfde730404.json | 105 - .../06074d49-defe-4303-9899-18f074a06935.json | 105 - .../1ef0a501-863d-49dc-9bda-5151fb161b41.json | 105 - .../15177605-2eea-4d8a-8462-7b64f7d29071.json | 105 - .../09996570-4086-46c5-900e-887c3d5d5826.json | 105 - .../8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json | 105 - .../ac310031-4080-4124-a858-e1293532b222.json | 105 - .../75a8a0dd-e64d-4462-b8be-8006f6710653.json | 105 - .../8469a871-39e1-4b21-bb7c-fa21026a01ba.json | 105 - .../046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json | 105 - .../fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json | 105 - .../6d30ee72-d0ea-496d-8375-892968c8602e.json | 105 - .../903b0e99-e50a-4afa-8085-1fd01872c048.json | 105 - .../225277d4-e1b9-4992-8e2d-678ac6157b06.json | 105 - .../4991436d-59fd-4f66-b588-9103beeeba5f.json | 105 - .../6118242a-de0a-4734-979d-86f2cc6fc65c.json | 105 - .../a6b71abf-7ee1-438b-8218-98803bca8de8.json | 105 - .../f7fb8d6b-9773-42e7-a426-a35a401f689a.json | 105 - .../eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json | 105 - .../0f231e27-deec-4b10-a995-d493ecf8400f.json | 105 - .../5a28540f-3a94-478c-84c0-5be8db86328a.json | 105 - .../f12c6b15-107a-41ed-98fa-40b0af5be42e.json | 105 - .../cf6aeb1a-4814-41ad-96f5-b59caafb902f.json | 105 - .../479d9f2a-82f6-42de-b8d6-92405f60638c.json | 105 - .../a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json | 105 - .../c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json | 105 - .../974e902e-0959-42d0-98f8-288e1a6ce887.json | 105 - .../eb6e6d30-b349-447c-83d3-fe7760e83037.json | 105 - .../eb958d5c-aa2e-4640-bef7-c8b10a892847.json | 105 - .../17c5c728-e03d-45e9-aaae-816c4e90b14f.json | 105 - .../79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json | 105 - .../92bff089-baed-4f1f-852b-f274a7920a1a.json | 105 - .../c4b27a1b-28dd-4a79-839c-ad8673034937.json | 105 - .../46a21741-1860-4498-8284-c94fccad1ed0.json | 105 - .../d540acde-9601-4119-8ae2-f7cdf82f43f7.json | 105 - .../c723fc6f-2656-4084-81d0-4cbaf0587049.json | 105 - .../526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json | 105 - .../56232cf6-7ee7-45ed-b139-ea20e148b5fa.json | 105 - .../51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json | 105 - .../eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json | 105 - .../b3e7af18-231e-4839-809c-bc5bfe7b4182.json | 105 - .../757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json | 105 - .../dffd1a4a-a056-43c2-bda3-0cfa21406656.json | 105 - .../b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json | 105 - .../682a38c6-2fb8-4c42-b6ad-69fbe65be484.json | 105 - .../cf14f098-cd46-4ca0-acec-02012eb78ea3.json | 105 - .../f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json | 105 - .../ee23e137-57d2-49aa-b267-27bd48457d46.json | 105 - .../ae68a60d-a2df-45f1-b446-1400901cb6ff.json | 105 - .../6c31df3b-e408-4a6c-b475-78f174630cad.json | 105 - .../2b841a46-6210-4092-875f-ca3ae36f3d25.json | 105 - .../250897a9-7d48-4323-813d-fa48befe2cbe.json | 105 - .../154b7a41-e1bf-4827-a6a7-279ea170ab7e.json | 105 - .../1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json | 105 - .../8b769df2-18f5-4712-a02b-962d3e2bb7c7.json | 105 - .../3272e904-21d5-4116-abde-0e74fe48b9d5.json | 105 - .../5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json | 105 - .../762f6ff3-4823-4de8-8351-045e1d1d383b.json | 105 - .../65f44cf9-f619-4f43-a03f-09e22386d319.json | 105 - .../f592bc27-c97c-4b14-abcf-30782d8c0056.json | 105 - .../6910eff9-74bc-46b0-8f8c-20642bef4a12.json | 105 - .../ebd5da9f-60d5-492e-916b-5e123442316c.json | 105 - .../eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json | 105 - .../c0182d01-454b-4194-be7a-81b9a9672d07.json | 105 - .../a954be32-0c84-4ffe-9c4f-7f895c77e197.json | 105 - .../7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json | 105 - .../1326f0c0-9355-47ff-813b-0729370e1487.json | 105 - .../788241ad-d975-498e-80ef-b0d04bd8db85.json | 105 - .../e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json | 105 - .../7c828833-fd36-4a84-8530-d3c1769ca822.json | 105 - .../29389e2b-7898-4f9f-ba8c-8fe4dad80295.json | 105 - .../9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json | 105 - .../b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json | 105 - .../1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json | 105 - .../102378fc-7b98-4088-a6f5-3039e7b638d5.json | 105 - .../c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json | 105 - .../cdf3b683-29d9-45b4-b6a6-1f67927ef953.json | 105 - .../6eb76673-0633-440b-8849-8fcf8cf00954.json | 105 - .../aafb84cd-5950-4b93-98d1-9e50fd294b65.json | 105 - .../fc683e1a-327f-4a69-bd51-9022c587159b.json | 105 - .../196e965c-4570-43aa-ba0d-13972796bda9.json | 105 - .../fe474496-4efa-4ef7-844d-32b17abda7c8.json | 105 - .../c8110747-f2dd-46d0-b2b3-706d70e1d714.json | 105 - .../9982c576-75fd-47f6-8fe9-52b56fc58d3f.json | 105 - .../b02dabaf-2aac-468d-b0cc-c7194c2094fd.json | 105 - .../9dd61039-27d0-42f3-9b03-65b0a59465d4.json | 105 - .../43062e28-5532-4e31-ac49-fbd794c7f664.json | 105 - .../89ce1911-289d-40bb-be48-f9a4d8d73ac2.json | 105 - .../bed92e1c-8f11-4f70-826e-569aa55baa09.json | 105 - .../d0ae041c-8b56-4ce1-841b-96622a724894.json | 105 - .../743c517a-ad0f-495d-b9d0-cdca01335933.json | 105 - .../5e82cb32-8291-497b-ac56-16b50947d1bf.json | 105 - .../8fddcebe-58d2-4d40-8147-f02feabc0d9c.json | 105 - .../648e69e2-54de-43c4-93ac-f8422fa4b9c1.json | 105 - .../72a11594-1d83-4e12-b82f-137b6749f5ab.json | 105 - .../1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json | 105 - .../20a6e090-2c78-4eb9-870e-9abbcbada6f9.json | 105 - .../a846978d-de78-48e8-a738-54c732e50c28.json | 105 - .../4977e0d5-1446-41ba-b00b-e8236c896d2e.json | 105 - .../8713e6fb-8843-43f2-af3b-57a59d326670.json | 105 - .../2d99163e-9ebd-49d9-ad13-ee1f780d277c.json | 105 - .../6dc5b101-c681-4010-941a-3983cb9eff53.json | 105 - .../a059e151-6f32-48ff-900b-4e232aef3cc0.json | 105 - .../64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json | 105 - .../7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json | 105 - .../ea18a046-87bb-42d9-a1b2-d01fe875c970.json | 105 - .../8012de5a-8cb0-4039-895f-70c20e9237ee.json | 105 - .../a0802c61-1314-4a46-9b61-7a89246bac42.json | 105 - .../071d7565-90e5-43e8-a158-ab333beacdcf.json | 105 - .../7621e05b-1b5e-43e5-a65c-322334575e68.json | 105 - .../f6223009-028e-4063-90ce-e008a3b5b284.json | 105 - .../f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json | 105 - .../07e72fc4-9c37-4a81-a788-8619035c66d3.json | 105 - .../43b106fe-ff02-4cfe-956f-cfc9e272de78.json | 105 - .../ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json | 105 - .../eed9909e-db3e-4d6a-8caa-3f208ace941d.json | 105 - .../f8aa8470-6803-458e-8207-b217969dd6f3.json | 105 - .../c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json | 105 - .../90fe60dc-76dd-4e90-99b4-c16d026afcb5.json | 105 - .../856c2575-700c-4b00-8883-bcde8841e262.json | 105 - .../b20c1304-d782-4d41-9c15-0091f9c914e4.json | 105 - .../c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json | 105 - .../5cf588ed-fde6-4ee1-833e-a6743cc1834c.json | 105 - .../97a591f9-2052-43b3-851d-ac73c793a000.json | 105 - .../89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json | 105 - .../4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json | 105 - .../a55039b6-922f-4732-9feb-fa757f627ebd.json | 105 - .../ddfae432-5d3c-4c7e-bc7f-087cddea014f.json | 105 - .../fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json | 105 - .../1835078d-7897-4517-9d7b-86a2285dfa27.json | 105 - .../ad6edd05-e83f-4da3-b200-c1d972548e8b.json | 105 - .../6d4ac88f-7a02-4f78-9990-6736972f43f7.json | 105 - .../ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json | 105 - .../29058700-6465-476d-b1c9-2bb89d70c52b.json | 105 - .../2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json | 105 - .../1de35d6f-c62f-48fd-b921-41e85b55434a.json | 105 - .../6a676239-eed6-44dc-b395-1b2453d5b0ba.json | 105 - .../e0545222-4bd1-490a-a315-5b9ce9742310.json | 105 - .../441375d9-0375-4a15-9d50-267395d3ab13.json | 105 - .../9ecdd8a3-247b-46b2-ae3b-5798685329ef.json | 105 - .../c76d318b-eba5-4407-be86-a92051791f00.json | 105 - .../b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json | 105 - .../d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json | 105 - .../ab78a98d-0cad-4215-8f37-f3093066a98d.json | 105 - .../2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json | 105 - .../4f6bda51-89d3-4005-9133-db6d871ae87d.json | 105 - .../0c7e0639-a082-47f1-bf32-0c45ce573f0a.json | 105 - .../4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json | 105 - .../f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json | 105 - .../2dd14fef-53f5-491d-a5e1-7e19f6043049.json | 105 - .../7e4c528f-bb42-40e7-b849-86732d2f2a18.json | 105 - .../8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json | 105 - .../d912a685-7187-4b56-a7a8-881ed678ae2f.json | 105 - .../500a7a12-9c94-4ed8-b2b4-33473141c3c7.json | 105 - .../336aaa71-3f35-48f3-bede-cb9ab3324cfc.json | 105 - .../7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json | 105 - .../614f3e27-e150-4edb-9438-06d0b0f38ca3.json | 105 - .../457f0bc3-68e1-4ecb-a983-5f504b1246cd.json | 105 - .../78544e05-7eed-465d-9199-35b25e1bebfe.json | 105 - .../de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json | 105 - .../3518e992-9548-4025-a641-99a2cf3833e4.json | 105 - .../0c556e08-bb71-406c-88b8-d45fc4cc43f0.json | 105 - .../a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json | 105 - .../b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json | 105 - .../956640e9-97a3-4641-9ed0-a63831a8ee58.json | 105 - .../ba80d36c-7688-40e8-8182-251c6b9e6b19.json | 105 - .../18c67de4-1518-44b6-b92f-b490e9d55877.json | 105 - .../1393cab1-31aa-470c-bca1-53f99d7ea1e8.json | 105 - .../da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json | 105 - .../5135513f-f255-412b-ab16-f0d613e4525e.json | 105 - .../95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json | 105 - .../4a4c258b-2b03-4fad-a5e0-b623a25fb735.json | 105 - .../2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json | 105 - .../636ed71e-3d86-4d5d-8b8d-3019f26261fc.json | 105 - .../a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json | 105 - .../06a2a807-3dbc-42c4-adec-4d6caa01cf74.json | 105 - .../88727af1-7672-4ab5-9cc4-f56d286f3967.json | 105 - .../619fde94-d095-4f5c-b36d-19a38b6a8109.json | 105 - .../d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json | 105 - .../40933520-61e0-4cbe-b6b2-b4d19063a1b9.json | 105 - .../46a36382-df06-4dc1-93ae-6ae61343a969.json | 105 - .../269f307e-3af1-47a2-92ec-00a59b4725ac.json | 105 - .../244417b6-88a2-483f-adba-c1d944c9cc29.json | 105 - .../1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json | 105 - .../41186ba2-77da-496c-afd0-c0f11ea05c9b.json | 105 - .../407adfd5-6a1f-420a-a5de-2e37740d7025.json | 105 - .../744cef52-b155-4bb0-9411-2eb47938b5d6.json | 105 - .../f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json | 105 - .../678a08d8-3089-4d97-879d-c5485344de05.json | 105 - .../9c8db160-fc92-473f-a766-fb00fc099f6e.json | 105 - .../fd05a73b-5b6a-460e-85d5-547710ab6bac.json | 105 - .../b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json | 105 - .../7be8016c-2454-4228-b10d-badba12e845b.json | 105 - .../131132b7-5b2a-421f-aa02-360ef9b7f206.json | 105 - .../49243e70-a24d-4e0c-b4c6-4275be1db944.json | 105 - .../7e6a55fb-da39-4b16-a59b-70635e636c02.json | 105 - .../bfaeefb1-93c9-470b-9376-9c67a1d20862.json | 105 - .../ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json | 105 - .../33cc8f90-d019-49d9-8220-d66260659435.json | 105 - .../a9fe98a7-e143-4100-99cd-adea90917c4c.json | 105 - .../56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json | 105 - .../39ce157b-e374-4963-8b40-6393835574f5.json | 105 - .../c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json | 105 - .../672e66ed-80e2-4b45-b52c-d9265f8efac8.json | 105 - .../af89079b-b84e-48f1-876a-ebf2d933d91e.json | 105 - .../e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json | 105 - .../865ffa1b-af08-416e-8de0-a16091d4ec79.json | 105 - .../e949a47b-85f9-4072-8302-8bfef92579d9.json | 105 - .../744d1978-7aa3-44b6-91a0-664383a66f8b.json | 105 - .../139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json | 105 - .../8348f83b-0739-411f-8b87-bd9d5e871ab3.json | 105 - .../4dcf1412-4182-40bd-bd1a-2246e29f18e9.json | 105 - .../f43b9387-56a9-4c21-850c-5cfda84fc8b5.json | 105 - .../497c8c15-1b77-4468-b33d-efa190c28e78.json | 105 - .../80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json | 105 - .../1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json | 105 - .../f435a5b0-cc12-4603-b7b0-4625dc547ed2.json | 105 - .../daf38e27-1149-44a8-84f2-93f842f4740a.json | 105 - .../4a5bb50c-017d-421d-8ea1-21a8316db0f4.json | 105 - .../20de3a0f-fad0-4832-863e-2b2049037c4f.json | 105 - .../0f460b31-7249-4e2d-a614-d1230e95f3cf.json | 105 - .../1879a765-f4ab-4bad-9525-47f428b43220.json | 105 - .../9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json | 105 - .../85b10038-d136-4be7-8e04-7298ddb4f7d2.json | 105 - .../c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json | 105 - .../4fcee29d-6351-4875-995d-81834fd878c3.json | 105 - .../4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json | 105 - .../5d3c9637-0558-4a2e-9950-8e7017d013f8.json | 105 - .../c04e8c21-3ae1-457a-9609-682341323a88.json | 105 - .../d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json | 105 - .../77e70ef3-fef2-4b75-9221-b165ec29f31e.json | 105 - .../6731c6b8-0b23-4fc2-b284-01025ce30887.json | 105 - .../4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json | 105 - .../b197728d-b390-45a8-8adc-ed8567b628da.json | 105 - .../09deb823-536f-4afc-95bf-ebb0a8eb2e00.json | 105 - .../30f8faa5-777f-47bc-b128-f31b950079a3.json | 105 - .../93187c79-f1a4-45f9-9d95-a254a185f7a4.json | 105 - .../69318100-73ee-47f4-96b2-6e7b310fbcd1.json | 105 - .../5aab957b-f25b-4208-9bf8-2d16887245bc.json | 105 - .../b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json | 105 - .../26034d5d-5d52-40d8-aa9b-e90dbd255903.json | 105 - .../606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json | 105 - .../6c40f966-753b-4301-8c9b-f7b4905c0b68.json | 105 - .../cfb071af-7283-4155-8ce1-40f751dd46ec.json | 105 - .../84ad6756-cb9d-4303-8e7a-395c1dc7c222.json | 105 - .../b481d1bd-e678-4b78-aecb-d43a561dd969.json | 105 - .../c42196be-c20b-413d-8870-f10759058098.json | 105 - .../8d8663a1-12f6-4e88-af3d-784ff86e8c59.json | 105 - .../a0621e6d-4178-49c9-aa2b-f56930884b82.json | 105 - .../dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json | 105 - .../746630a6-de1d-4976-9168-d8ff06980904.json | 105 - .../7f577380-2691-4906-af13-8ca3011e6316.json | 105 - .../9b6c775b-ef08-4e57-8441-52d7887615b1.json | 105 - .../7288fa97-efd7-45d5-8769-e0071e9b5488.json | 105 - .../b664e033-1424-431e-af8d-09a11b449286.json | 105 - .../8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json | 105 - .../15a4291f-4918-43a6-b242-90db88fe4a3d.json | 105 - .../9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json | 105 - .../661b1590-f312-447b-a494-1d37ffd93cae.json | 105 - .../83387977-a8cd-4cdd-abc7-301006380458.json | 105 - .../7f53fb66-2c19-434a-acec-7cdcf9fce04d.json | 105 - .../4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json | 105 - .../6732a278-0613-40fd-bdbc-88a586631279.json | 105 - .../cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json | 105 - .../7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json | 105 - .../96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json | 105 - .../72853b4d-cc12-478f-b6f4-977b8fbabfa0.json | 105 - .../25674b98-92b5-4e2d-97ab-084eabb13db2.json | 105 - .../67fd0572-cf55-412d-8ec6-0cb168d3ed08.json | 105 - .../69d04754-3779-4408-9aa9-68c9ba65de7a.json | 105 - .../91c0e116-7dc0-4931-ac61-b98bac2af3e0.json | 105 - .../172e7bfa-b430-4e14-a15a-a54ec5c9133e.json | 105 - .../5849d742-02eb-4370-8c97-efc5eec4f1ed.json | 105 - .../1812829e-2c91-410e-9e2e-cc758b652e9b.json | 105 - .../593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json | 105 - .../45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json | 105 - .../ee2b789c-951d-426e-87e3-232c07d65ade.json | 105 - .../2316b408-c94b-471e-b64b-c1f8f345868e.json | 105 - .../49d47f6d-0d11-4b07-b42e-b94310c97d3e.json | 105 - .../0ec990b0-b908-44f5-9fb7-5ee603737bc7.json | 105 - .../34c33a97-ae07-42e9-8025-9076e2bce3bb.json | 105 - .../bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json | 105 - .../4aa966fc-ee99-430c-8688-99565f5e6fcc.json | 105 - .../e908901d-c122-4458-9d4e-9a7d1242211c.json | 105 - .../e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json | 105 - .../769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json | 105 - .../8c4531a4-4418-4090-9c82-f60bcf8d9935.json | 105 - .../a5c9246f-a7b5-4183-9a64-93151b536945.json | 105 - .../1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json | 105 - .../b347eea5-e676-478e-b0ee-d53abf2c8697.json | 105 - .../ba005ac7-761f-4cd7-91ed-34b88028240f.json | 105 - .../35e56ec7-deae-4674-abfc-3c45f5dec040.json | 105 - .../af7f201f-3af3-4ffb-9416-c83235851cb6.json | 105 - .../8ae7c857-be7e-463e-86c2-6b165920a45c.json | 105 - .../c4f888d2-c08c-43c4-a1f9-79edf519c893.json | 105 - .../d42a520c-15dd-4497-a26a-b6f77b3257e6.json | 105 - .../c9393ea7-3269-435f-9159-95638b9c691e.json | 105 - .../08e49740-3cdd-47b2-9b95-b96d8a13dd79.json | 105 - .../249b0b65-5c71-4c5d-9802-28df0ead0cdf.json | 105 - .../fe084d09-ee80-4c7f-93a7-3ee0f9081177.json | 105 - .../078cedea-7b3a-4c77-b932-3d42f0c841fe.json | 105 - .../dedc34ed-fd8f-4b29-b898-3c9830993247.json | 105 - .../05f391f3-68ac-422a-b7e8-01eba1729a0b.json | 105 - .../c443492e-3b5f-4394-9fbb-761dba338638.json | 105 - .../19b72caf-a841-4928-98c3-c505694724c3.json | 105 - .../36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json | 105 - .../80d3a785-dde1-44fa-b6e1-93722849fdb1.json | 105 - .../6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json | 105 - .../feefc068-9257-4d0f-ac55-acd08ededeca.json | 105 - .../25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json | 105 - .../6e342711-8d2d-42ed-a019-11be429e10d8.json | 105 - .../1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json | 105 - .../fd65e319-bc38-457b-9913-9a2214e69823.json | 105 - .../56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json | 105 - .../be0058b1-23b2-40b7-b336-ab40bf82c997.json | 105 - .../f47334f2-f0ab-48f5-814e-f3ede36802d9.json | 105 - .../fd91f8aa-a521-4e9b-824a-aa21adade569.json | 105 - .../95d33475-a71b-41d6-a08d-3da30e631897.json | 105 - .../bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json | 105 - .../34b9dd9e-dc03-4354-b016-3b1463a902f9.json | 105 - .../cf3f376a-92ec-4678-a57a-cee2e40032a5.json | 105 - .../99d27765-a9c5-4f50-8bd1-c3ce67683621.json | 105 - .../f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json | 105 - .../0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json | 105 - .../76e3f2a5-7545-4270-800d-6413e39608ad.json | 105 - .../2e6c1c46-01af-493a-a2ce-266d13b53000.json | 105 - .../dea423e8-cdbd-4895-80af-f53dbb5caa1c.json | 105 - .../997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json | 105 - .../649483fb-4b54-4824-82eb-e78e55e53912.json | 105 - .../0d99e863-596f-43b7-932e-a4a27435e63d.json | 105 - .../399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json | 105 - .../d621c163-5ca6-4e54-8913-d931e4a2c6b9.json | 105 - .../170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json | 105 - .../2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json | 105 - .../bbd39707-6062-461a-8e09-c8b8bc3451f7.json | 105 - .../9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json | 105 - .../b6578885-9721-4349-ad55-5a80fd054c85.json | 105 - .../dfacdde9-fd5d-496f-8038-aa0439c0c991.json | 105 - .../0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json | 105 - .../845f96b7-62dc-4ebc-aa62-fcc6263e437f.json | 105 - .../0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json | 105 - .../9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json | 105 - .../dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json | 105 - .../3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json | 105 - .../1fe21571-0375-43c3-8071-1aaaf0223baa.json | 105 - .../0ced7574-bfc4-4958-a6f5-0944f9ac411a.json | 105 - .../3e3344d2-6911-4d5f-85d6-6593cbed3b49.json | 105 - .../ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json | 105 - .../3a5b1794-12f1-4004-bdb2-309cc950c757.json | 105 - .../26aea3e6-571c-4751-8b0f-40a86a144973.json | 105 - .../2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json | 105 - .../08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json | 105 - .../7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json | 105 - .../eacd8987-9631-4199-97ef-2cdc41879e8b.json | 105 - .../4edb337d-b56c-4009-9199-22223d4ff9f8.json | 105 - .../c332cc18-e556-4b23-a45d-df26c250faa2.json | 105 - .../4f7b356a-1484-458c-8bc1-2640e039ab70.json | 105 - .../ca077d1a-a122-4040-b7d9-924773ce67ca.json | 105 - .../796ed438-2be4-45e6-9de9-c98ddd51f3d4.json | 105 - .../51f579c0-b5b4-4e01-9c19-b68fb6a21210.json | 105 - .../91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json | 105 - .../07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json | 105 - .../976e132a-8352-43fd-abdf-0fc4a04e9429.json | 105 - .../1ae05e9f-d432-4e7f-a662-4b4a118333d9.json | 105 - .../23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json | 105 - .../3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json | 105 - .../91240596-5842-4441-b976-01ed7545bd1f.json | 105 - .../5842364a-2721-4882-90f3-97eba7c3b93a.json | 105 - .../cbd0163f-fbea-4f40-a26b-a0508ec02061.json | 105 - .../f2571e64-be03-4482-b5b4-d120444b0586.json | 105 - .../be4ee67a-59d7-4098-992e-5f75cd53cdbc.json | 105 - .../41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json | 105 - .../e68bc90b-1274-4e28-b280-65e6ceba53f8.json | 105 - .../dc7af75a-f45a-449a-b6ba-cc033d7de79f.json | 105 - .../5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json | 105 - .../9df5ab5a-16cf-478f-87f0-1b8717e1e330.json | 105 - .../dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json | 105 - .../ba7b8cb4-608a-4bf0-b107-51e721f88dee.json | 105 - .../9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json | 105 - .../b3b73406-3b25-4a23-9e13-53fafdd66552.json | 105 - .../dceb35c6-30bb-483c-aa62-8273b409311b.json | 105 - .../100a253a-3409-4145-8a9d-0bf821e3ce91.json | 105 - .../174b2a17-c4fa-4021-868b-9c23a99603c9.json | 105 - .../3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json | 105 - .../8127e367-fbd2-475d-a4f0-b8895dec6741.json | 105 - .../c68a024d-fa21-4584-bde5-42121e919af7.json | 105 - .../ce1feb87-4f78-4ff1-a548-b3409591166f.json | 105 - .../96b75db5-4e23-4179-bbf7-801f35d31af7.json | 105 - .../16e0de9b-9717-4451-babc-8df8748c4efe.json | 105 - .../8eecc1a5-d42e-423c-9155-daf66a414361.json | 105 - .../93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json | 105 - .../6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json | 105 - .../6be09829-08e5-4d45-a091-5451f6c74d51.json | 105 - .../cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json | 105 - .../f06fc349-e84e-4ec7-a9c9-8819896c2beb.json | 105 - .../86591e86-5bfb-4e8e-b910-bf6b5011562c.json | 105 - .../f2b35397-f539-4129-8e1f-f9dae9c9431b.json | 105 - .../50ae9dc0-efcc-43cb-8704-6dfb9270656a.json | 105 - .../a6c5b80d-e685-405a-8444-1be1ed763d2e.json | 105 - .../052e63b2-028b-4a4a-ae2b-51514e982239.json | 105 - .../f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json | 105 - .../a9434630-a7cd-4dc1-b542-e76402344166.json | 105 - .../f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json | 105 - .../37c4d6b3-9964-45d3-a6ed-8b84229ed304.json | 105 - .../50c37538-a425-4b30-a9e0-9a60f6b2492f.json | 105 - .../58ac7b57-e498-4de0-95aa-475c9c56aaf6.json | 105 - .../6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json | 105 - .../7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json | 105 - .../fa88bc37-eb6b-4d69-8983-7a489ab09665.json | 105 - .../9332e745-f594-40a9-af22-98709efc179d.json | 105 - .../65c35557-ec37-49c3-b7f6-11ce837500f0.json | 105 - .../aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json | 105 - .../fc41cf78-6547-4fe6-83aa-ef5edd99a392.json | 105 - .../57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json | 105 - .../7c73720a-03d8-4d90-9557-cd579c7c3e86.json | 105 - .../b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json | 105 - .../b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json | 105 - .../336dbfac-133a-46c8-87c9-40f1ad12a714.json | 105 - .../4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json | 105 - .../f585e5fe-c3b5-4134-97ed-67b57d74adb8.json | 105 - .../5f69b85b-d66c-400b-8d40-58b96233ec3c.json | 105 - .../a3ff3d30-5dec-4ec3-87b9-004d570b005a.json | 105 - .../b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json | 105 - .../20854e9f-ba11-492c-8d81-08e13ca1ec35.json | 105 - .../e7862d19-b3d4-47f6-b174-b53015229a42.json | 105 - .../9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json | 105 - .../626a924c-618b-4047-bed3-9ff67b6e47ae.json | 105 - .../0fac57c3-7bea-48fc-bb38-b679ab835d91.json | 105 - .../5e0690cd-21e6-4778-8af9-7d9f623f5f52.json | 105 - .../6dc1a4e7-6ce6-4337-a242-420fe4139538.json | 105 - .../676745af-1929-4875-9a78-d57354883d75.json | 105 - .../7b134cb3-7794-4984-9240-b889e2a3b6b4.json | 105 - .../76b52fe1-c232-47d9-8052-077a945364cd.json | 105 - .../1dc524b8-18d6-4bc0-9146-713ef8abd983.json | 105 - .../5b8044df-ce6a-4a5e-9aed-d657188fa114.json | 105 - .../4ff2e991-ee62-467e-9fec-cdf334ca7fca.json | 105 - .../2451252e-2cf6-4394-9009-544630696c75.json | 105 - .../9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json | 105 - .../b790e9c5-2412-4aa0-a975-37b8662a82cf.json | 105 - .../ec773b66-24fd-4b6f-ac9c-ebcd355e4be7.json | 132 + .../6921281e-5756-4f0d-a37c-3b05ff6b2703.json | 105 - .../a70b8356-94ce-4f0d-b44a-2215076eed5e.json | 132 + .../b182807d-587e-4702-bf30-dab11983b8db.json | 132 + .../c1f0944a-c44c-42e9-90ba-a847509cbd66.json | 132 + .../64bb8530-7071-402e-ba9b-1d15ecbe275c.json | 132 + .../81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json | 105 - .../4f1fc265-f8b7-47e6-a9e6-cfa61b89ad4a.json | 132 + .../1420df5c-690e-4b01-b99c-c21c793689ae.json | 132 + .../35674acb-a68c-4ac1-9aac-ac9cb44801e6.json | 105 - .../aa9d0b0e-cb3f-452e-bc85-f7cf172d2b8b.json | 132 + .../dfabd777-8620-40e3-b19c-a9227f57b638.json | 132 + .../08fe3877-ab04-426a-9e27-72ec4ff8ffc3.json | 132 + .../5d7caae7-0242-4a5d-b3be-c677b958d130.json | 105 - .../4b264bb0-bd7e-4b15-9591-50b5a521f100.json | 132 + .../d13f5416-1d95-431b-8f01-b969066ec960.json | 105 - .../17df660f-6a91-476f-a7e8-7169eef1c24d.json | 105 - .../a8cfe336-0c3e-401c-a1e9-d951e64918ec.json | 132 + .../5e66c653-41b1-46de-b677-ffd8426ba5ec.json | 132 + .../7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json | 105 - .../6308f97d-aecd-467a-91f0-5a1650ccc22a.json | 105 - .../9f0f0914-1f7a-468e-8a2e-7ae122fd064d.json | 132 + .../af176c4c-b06f-44ac-bcba-1331d9148958.json | 105 - .../cc64a143-4f1e-42ee-ade1-fafc4b316336.json | 132 + .../a580b690-0829-43b9-8d52-6dd226208901.json | 105 - .../cf322e64-2682-4a9a-a48f-c4ec47b852f2.json | 132 + .../30b32261-b24a-49e3-ba57-172dc1d03ba0.json | 132 + .../af1bb542-77cb-47e2-89f1-16cc91e89452.json | 105 - .../0681c01d-23f3-4b8b-9516-a5cc41761fc4.json | 132 + .../9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json | 105 - .../7693ed8a-f76d-482b-92c1-f11810e522ca.json | 132 + .../f8dc0128-c606-490a-b965-59d5377dd778.json | 132 + .../844547f7-658f-41dd-ab4c-dc0569030e59.json | 132 + .../75c291b5-6d60-4bde-8621-f865196a6ecc.json | 132 + .../36d54b12-594f-47fe-9637-a9b740416c5c.json | 132 + .../57733383-9573-463d-a467-068d2685014c.json | 132 + .../eda1ac9a-98e1-496f-bdeb-1e256b52c14a.json | 132 + .../00b8bfda-c6b1-4e1f-b68c-bff7335e2dff.json | 132 + .../0a3b9ad6-b853-471d-a292-413b30273034.json | 132 + .../d61c3ace-e353-4c0b-9472-c9a1928809cc.json | 132 + .../2293a19a-b650-436d-9448-1b641e63d407.json | 132 + .../340dfc7b-9af0-4545-9d7b-6950ea69bd57.json | 105 - .../c15b977c-c781-4b17-ac9f-25c77602c875.json | 132 + .../42c191be-c0ae-4170-8b6f-565053ae7d9c.json | 132 + .../ea9cc238-75d0-45e7-b10e-e214516ca36e.json | 105 - .../a8a69b0c-02c9-437d-975d-69f1ddc6959a.json | 105 - .../f5cb910d-6e5b-404a-a751-d5cb90668150.json | 132 + .../988da677-c00d-4e7c-847e-6ca553e0124b.json | 105 - .../de806e4c-dbf8-48cc-a0d8-033a61dfc777.json | 132 + .../59150b73-b05a-451e-ba3f-696d04effe05.json | 132 + .../ac45b8ec-454f-4a91-9418-a3dc70535119.json | 105 - .../84926b81-360a-480c-b240-f154ec7fe0ba.json | 132 + .../1b8abf32-6b66-4e9b-9b82-e1978d07a483.json | 105 - .../8e6edb04-302b-4dfc-b38f-94b437c921a8.json | 132 + .../8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json | 105 - .../db92c564-1cf9-43db-9e25-1f450c7b1e7f.json | 132 + .../e3796243-cbba-4ec2-ad7c-89547ad24342.json | 132 + .../effba194-3b2a-4847-9708-e3cb62a7c964.json | 105 - .../1479be90-df8f-4e1d-b9db-03e84000187a.json | 132 + .../d2e6c48c-1c18-45a6-ba1a-b335325c980c.json | 132 + .../f843e45a-f66b-4091-a964-75583c2d7fc5.json | 132 + .../275fb96e-4779-479b-937b-f5db6aa530ea.json | 105 - .../cbc3cd41-e187-4c4f-b207-37bceab423a4.json | 132 + .../003c05a1-abb7-41d3-a264-efc6923b64ef.json | 105 - .../0f124566-5e94-4233-9a3f-5ff9cfdf160c.json | 132 + .../36176ae9-e852-4604-9961-b7f02e4c3e55.json | 105 - .../98fabba8-7d70-4a1f-b03c-37e1a9ac94e8.json | 132 + .../91522dad-529b-477c-8372-793f631e14b7.json | 132 + .../ed27cd90-e73f-4432-aed9-dd36f29cba1a.json | 105 - .../7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json | 105 - .../cec22734-493c-4d11-ba86-6c7ae2005124.json | 132 + .../1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json | 105 - .../704a6e19-0d86-42a5-b8f5-05a5856e9c29.json | 132 + .../af52a422-e959-4662-98e8-c94fa83bee3e.json | 105 - .../bc54349d-59e0-4ae4-94f9-3f5ae98261f4.json | 132 + .../d20d533a-758b-477c-b4eb-073adaed640e.json | 132 + .../df7621bc-5af2-45c5-b8e4-ebc158dad966.json | 105 - .../195b1c31-c766-479c-a445-39a6150404fc.json | 105 - .../f7c9ad0d-3fea-4bec-8ac3-46f01a3449fb.json | 132 + .../141507b5-67df-4c38-9eeb-b9d3cf98b08f.json | 105 - .../9db1f823-e068-4a39-a5cc-b9c588099427.json | 132 + .../23818b45-bf5f-48a2-982f-1e2a0d35aac8.json | 132 + .../54a836bc-8048-4c2b-a65a-937acc2fa414.json | 105 - .../6a0f5973-6377-4707-a0e3-414ca1f22b32.json | 105 - .../de6eda66-b8f5-4b23-89e1-44bbac600953.json | 132 + .../632974c2-57e2-41f9-8c00-671e07e7594b.json | 132 + .../f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json | 105 - .../80aa0629-7ea1-4f69-b302-c0502abcbbab.json | 105 - .../e86dcf4f-6282-4aa6-b645-00f93a2e9077.json | 132 + .../818e21b8-da78-4649-a71a-ba71c89d1fe7.json | 105 - .../b20be5c9-9720-4076-b587-728549dd19af.json | 132 + .../30b74d3f-7247-4c93-9c94-dc8beba14b70.json | 105 - .../5e193803-39d1-4f12-8726-ebbe5f71563c.json | 132 + .../61131a6c-f412-42bf-814b-7d711a840d44.json | 132 + .../fc6d4451-0a9c-4d53-8d22-179ff7059d61.json | 105 - .../535e72b1-17e0-40e3-9d66-d31f8ec70413.json | 132 + .../ba46f82b-2129-43db-ae21-09e6576dc4e6.json | 105 - .../b98b76ea-b068-46ec-b929-4ca1037eaf99.json | 105 - .../ea15479e-24a8-4924-a754-a8567c511e61.json | 132 + .../5799f285-c61f-43a8-a6a6-053808cf4e8f.json | 132 + .../d5a47313-b2f5-4833-9539-b8f56e4a5fda.json | 105 - .../36feef44-3d3b-4102-8606-ee6420bddcff.json | 132 + .../95228f47-8fb1-443c-8ad4-0021504e34e0.json | 105 - .../b5790fec-6c12-42a3-853c-488658bf949d.json | 105 - .../fd55f19a-2c22-4f29-82e0-15b02f25b9a9.json | 132 + .../18e5decd-c95e-43d2-9ba2-007ba32e216f.json | 132 + .../e5d9bded-a8e4-4133-84b9-6eac517a4226.json | 105 - .../718ef6de-5926-4a4c-bade-9a162ce8e730.json | 105 - .../85a4996e-8c44-4e4f-9478-19a8c5513617.json | 132 + .../5d818d86-2caf-4b29-9c15-8fa27217de22.json | 105 - .../db6d57c8-df0b-407e-b937-67c55b513a5f.json | 132 + .../89ac933d-0a7c-40e6-8fa7-35bb6205e44b.json | 132 + .../a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json | 105 - .../0a9be33a-792e-413c-b60d-3e97a060fa78.json | 105 - .../c79e690f-3e09-4fac-9412-937a3b7ef352.json | 132 + .../ce74b7e3-8505-4c79-a7de-12d1e6b47155.json | 132 + .../3c562d8a-2df9-4d3f-9699-bfaee4a1ce2b.json | 132 + .../4bc3f55b-0638-4fc2-b1d9-04780707acef.json | 105 - .../152b0cbe-e27b-4438-8326-e67f4e70e600.json | 132 + .../c733c91f-79a9-49e5-9398-3a424ee1940a.json | 132 + .../32d7b6c6-de5c-4864-a446-97dccce378c5.json | 132 + .../7b22d02b-5bfd-4243-9ad9-c858d0af55a6.json | 132 + .../99650529-55d9-42b0-b812-761a30277e5e.json | 132 + .../81abbc2a-791b-4a39-bb46-97edfa14b9c0.json | 132 + .../c658e535-7098-40fc-bea0-f5734d8f4ca9.json | 132 + .../9e0656e9-9b82-4f6d-b00a-c09cf9cbc105.json | 132 + .../07c36058-e0e8-48ea-85f3-0a2cb2fe3443.json | 132 + .../c41d8925-b56b-458e-b1a9-27dbbcaee149.json | 132 + .../9136feb4-5c3e-48b3-bc70-c7816b8b189b.json | 132 + .../c395ef02-9a50-4696-aad2-bcb32ba05f67.json | 132 + .../93f47969-556a-4fd4-b7bb-4d1c861a8d71.json | 132 + .../349ae559-6c1f-4b2f-954c-e83cba1e603a.json | 132 + .../3e43c3f6-645b-4ab3-b684-b23eb67bc5d9.json | 132 + .../500c8cd4-fe4e-44f3-86b7-b0efd387ab92.json | 132 + .../340a3ebb-bc06-404f-84e7-aeccc016fd32.json | 132 + .../a6426f88-d7cc-4e6a-a2b5-76e59a52a6de.json | 132 + .../bdd05c8f-b895-4c91-9a9f-a608a4259cbd.json | 132 + .../0e1e45d4-2747-480d-9b1f-2b200e250271.json | 132 + .../50289a8b-4522-4dca-b6dc-aa42193deefa.json | 105 - .../00f3f9ca-ae7d-4e62-9e7e-6bd202dbed59.json | 132 + .../c9e57ab2-c2a4-4935-b976-4bf24647b777.json | 132 + .../f125c8d1-57f3-4b79-ace4-2104b008a507.json | 105 - .../61e517f7-e2db-48bd-8f4e-f62b5859b62e.json | 105 - .../c22436a2-ec60-4220-82b3-123618165eb2.json | 132 + .../1f990438-dd84-44d2-99f9-a10035ecd652.json | 132 + .../2db948db-a9e5-41cf-9567-2f9198d80900.json | 105 - .../52ab1e94-4e6f-4876-932b-a45a033dec1b.json | 105 - .../f4564f5e-3595-466e-8201-0e2a4c50ff0d.json | 132 + .../040def3a-702d-4868-b429-39697ca36207.json | 132 + .../98455065-72e1-4dad-bce1-1c3ceddf5433.json | 105 - .../9e24fd65-56ec-4160-b299-b34d702a3231.json | 132 + .../b35eaca2-0f77-4171-bbcf-23a191b055f2.json | 105 - .../216bf9f8-9521-4311-a40b-8a847271265c.json | 132 + .../dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json | 105 - .../45f8c4fb-3591-44df-a4f0-57093b9bae23.json | 132 + .../723d2f60-f12a-4abb-9061-807fd38e7d51.json | 105 - .../d17275ef-8a32-4fcb-94f4-fb24299ba50e.json | 132 + .../61b79e7d-0f50-4cfe-825c-ed5b23d943f3.json | 132 + .../ce55aca1-80bd-4711-ad05-d812d206bd14.json | 105 - .../113c3507-b738-4b06-ada8-da93b19c6ae2.json | 132 + .../8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json | 105 - .../420f8334-c420-4b8f-8853-fea8f4f5ac6d.json | 105 - .../8835d5c1-8350-4d42-a753-82b94dffda3b.json | 132 + .../dc3bbda7-5007-44c7-b1ba-af0c82d100ee.json | 132 + .../0d24ee06-a6b4-4be7-b3ef-c4f53b4fc414.json | 132 + .../cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json | 105 - .../f2415b7a-2cd7-4a05-834b-7da992e1da1a.json | 132 + .../01af237f-40d8-4841-a90d-13dce6db8634.json | 132 + .../3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json | 105 - .../7ea2cf22-114f-449c-a9cf-c4f379646cd3.json | 105 - .../d69bb392-fd38-4f57-b567-24566896167b.json | 132 + .../63503943-1c1e-4dac-9c41-4933fbb44b70.json | 132 + .../80c5d343-41e6-45d7-8921-62586a3cd270.json | 132 + .../2c27d7f6-60fd-49f3-8666-784f2a16031b.json | 132 + .../5b3176a0-7ded-409a-bc54-70e0ecf9b325.json | 105 - .../b46bef60-b37b-4510-a92a-fb4c0cabb357.json | 105 - .../cbcc1e64-8455-4382-8999-654d1757bbd6.json | 132 + .../1bea4f6b-7a41-4907-baca-430c7ea179e9.json | 132 + .../68bdab24-8324-4190-abd2-ad3ad5a7a853.json | 105 - .../298ce89b-966c-4f4e-9da5-3803a395188f.json | 132 + .../85ceb275-787a-4dbc-981a-513fd16606ea.json | 105 - .../ea27a4d6-8c32-4b36-873d-1046ae6240e5.json | 132 + .../1ad7b4c4-8074-482e-9010-ce1552325e15.json | 105 - .../73d5905d-7825-43ba-8051-7e1f5639b857.json | 132 + .../956b8589-a048-43be-9cfd-05658d3c57ca.json | 132 + .../c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json | 105 - .../186687f8-ed25-44c9-b634-36db1c734844.json | 105 - .../36f597b4-8f53-4b40-9c0e-c9284743e456.json | 132 + .../7b67e526-7588-4c62-9293-55e77851c4c7.json | 132 + .../a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json | 105 - .../8bc96d6d-0cd7-49c4-8112-7d8fb1c45199.json | 132 + .../6751a200-0bd9-498e-a991-ebe22375633d.json | 132 + .../f41442e3-5aa7-4ca4-9e61-a5e13965a3e4.json | 132 + .../b105b62a-ce77-4387-b679-1adf2782b2f4.json | 132 + .../6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json | 105 - .../72180fd7-bf34-4758-b02f-7d11859700c7.json | 132 + .../72821a7d-cc27-4557-82d4-7e30286ea126.json | 105 - .../ac5aaa9c-79ab-4082-b8c5-084fba3e122a.json | 132 + .../2d266d7f-8edd-40fd-adfc-597a7742167b.json | 132 + .../31d80ab1-348f-4b5a-963e-f027adf32101.json | 105 - .../484ccbf2-87e2-423f-9de4-a4bd54291b54.json | 132 + .../74e67572-01d9-4890-9c5a-27b5559cf752.json | 105 - .../4de79504-f9e8-4235-9aad-d38f0799e081.json | 132 + .../9a74a1f1-0322-4f96-8e52-76bbde948fa9.json | 105 - .../8ace78d5-5390-49ec-935d-2c7faf7569ca.json | 105 - .../b4bde9d8-f50c-448c-ada4-5bc05f302c04.json | 132 + .../42df1809-0021-4968-a18b-86cefc0125d7.json | 105 - .../5da3240b-b5e3-4333-ba61-925343b56043.json | 132 + .../d6727b7d-cdf3-48d5-8e30-484e86ad60b6.json | 132 + .../15b86bbf-8d3b-474b-98f0-abb3972a7271.json | 132 + .../c7ba8947-fd38-4ba1-9169-6c9164123273.json | 105 - .../5e3f808c-964d-492d-a003-37594dd36f89.json | 105 - .../c0b339f6-4a46-46eb-b2d0-945176afe676.json | 132 + .../113ce0c6-c292-4924-adca-afdbcdd4c381.json | 105 - .../79367289-6245-4bf0-99e9-42bc3ff7649c.json | 132 + .../55401aa6-ad61-42d6-9163-5d105a9091bf.json | 105 - .../c3ec5505-1086-446a-9739-523810e93d13.json | 132 + .../ae6d070b-71de-40c3-8f69-944ce2e33abb.json | 105 - .../3891ad0a-0acf-4d3e-a9e8-533633d9557a.json | 105 - .../c6c5e462-d373-4536-afc3-b740fb7e300f.json | 132 + .../95d1d5d9-b613-46b4-b0de-540641d8d81a.json | 105 - .../b7537abe-8177-4206-999f-5bb7e95c72c8.json | 132 + .../a037593c-0f98-4b23-a139-12cfc435de3c.json | 105 - .../eb2f6159-e37e-46db-9419-6a66cb7e539e.json | 132 + .../01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json | 105 - .../0b2d0a06-2907-4258-be33-1591e18ac6a2.json | 132 + .../0284d867-45c4-4fe4-883c-8e3ea169d66c.json | 132 + .../126f5eda-1529-450f-8557-dcd6a33b7bd4.json | 105 - .../1a2da513-104e-4074-b3b7-601ab11bf6d8.json | 132 + .../9063608f-8d32-4e98-ad05-621f6239d0ba.json | 105 - .../189db16b-5e78-439f-9f79-6eec979c3a79.json | 132 + .../1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json | 105 - .../84a51879-cd67-449b-ace0-f87cccd6ea8c.json | 105 - .../d751f1c5-5505-4c12-8d51-091538b49949.json | 132 + .../b6f9144f-57a0-4c18-9e52-ffccf2d8ca9c.json | 132 + .../67dc7fb2-1455-4f60-9dcb-59a8197741d7.json | 132 + .../7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json | 105 - .../7f4ab590-29fa-473a-b617-00135dd1d6ee.json | 132 + .../62afba84-9929-4882-843e-3f7db7b030a3.json | 105 - .../d67db62e-e21d-43c8-8b4c-bfa353e47636.json | 132 + .../325cf0a5-6a72-466a-8e1e-531f03db6083.json | 105 - .../85abff46-8ae5-4a75-9522-721793224363.json | 132 + .../1736bbd8-4457-4d55-8c0b-0ae6e001ee62.json | 132 + .../cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json | 105 - .../4777e427-8d17-4e06-8cbf-0883c95bbfd8.json | 132 + .../cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json | 105 - .../4df0b890-d4c5-408e-8994-88f7383e9235.json | 132 + .../76a5a59d-f5fd-4fb0-849e-7db7772b555a.json | 132 + .../c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json | 105 - .../60185907-11c2-454c-bfbc-3c5741651ab7.json | 105 - .../6c8399d0-01ce-45cb-a20f-a49e4e760a1e.json | 132 + .../92c2c5ee-dfa2-4db3-8401-887d02cc21dd.json | 132 + .../ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json | 105 - .../6f40503d-59ee-4cdc-a697-ef405d9644a7.json | 105 - .../b40ef568-f277-4d5c-87cd-53feaa71598b.json | 132 + .../893d5149-c535-41c7-8a1a-26bb6b33e407.json | 132 + .../0b649ed5-5af4-4910-b853-2408e3b58f1f.json | 132 + .../a9e3fe74-400c-444c-9b28-6f49c6671f96.json | 105 - .../509c2895-70ae-4381-94ef-f6cdf9ee07ef.json | 105 - .../5c8edeba-5c65-4168-b67e-02143acbcafb.json | 132 + .../67e657ef-d602-4f58-b898-874a22f4a009.json | 132 + .../ce2f5cc8-a187-454d-ba99-4446d29aab7c.json | 105 - .../53d2bf07-689a-4e69-a534-b288313c8481.json | 132 + .../9255090f-6862-4ff1-ac91-fe0cd7613445.json | 105 - .../1fa5dee9-c360-40d9-8e67-9b415cd36616.json | 105 - .../34d6a184-d4d5-4609-8305-c0e2ee1c585b.json | 132 + .../39b627ab-3e64-42f7-a88d-abe5764fcf4d.json | 132 + .../45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json | 105 - .../d8467b15-8a03-4cde-9fc5-5c08bdabb6c6.json | 132 + .../85bc5976-0d40-4416-bbf8-9b1dbf372343.json | 132 + .../8c7e8e64-672e-4c7e-a808-a49f1792d3a8.json | 132 + .../de8651eb-16d1-46ee-a1df-b8c72caaf205.json | 132 + .../6a744db8-814f-4e8e-b6e5-0d096267dfa5.json | 132 + .../028b7c37-770e-4356-a7c6-0cc74650d5fd.json | 132 + .../3b399c64-922a-48ba-9a25-862102749647.json | 132 + .../88fb101e-35dd-40af-922f-9b66a2711249.json | 105 - .../d5e46a11-3e81-457d-9d26-9fd17f96f076.json | 132 + .../4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json | 105 - .../b3abfbc1-911a-43b7-a338-efb25f746f9d.json | 132 + .../5985fed7-9c54-458d-8f64-533e248a38da.json | 105 - .../6b471ee0-9444-45ff-92cf-da624aa59bf6.json | 132 + .../b56bd924-0a63-4ca2-8f2f-97b581e47a36.json | 132 + .../e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json | 105 - .../21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json | 105 - .../bfe9098d-7207-4f8c-9a3f-549a29303b5f.json | 132 + .../7856172d-ec3e-4e71-befe-54952478e330.json | 132 + .../ae256440-486f-43cf-b4a3-8d5c0ff196c9.json | 105 - .../a68aada5-61bd-4a4c-a8e1-b9a2ace349df.json | 132 + .../d509b0d3-a043-4057-bf80-37ec5ceedeed.json | 105 - .../8e2e1f2f-4715-4b8b-b641-d5e552500408.json | 105 - .../9d19c44f-4912-4c95-ab3f-2dddb055d932.json | 132 + .../4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json | 105 - .../6cef3550-27d7-4073-b4bb-0f19a2c5f553.json | 132 + .../08ab8f6a-9aaf-4ab4-ada3-eb4a75f46995.json | 132 + .../fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json | 105 - .../622f9379-6a30-43ba-a7a8-fbd08c484fa5.json | 132 + .../a58c4863-e5a9-425d-ad3e-5924d6146718.json | 105 - .../24f728e6-de5e-44cc-8b6d-51e0065c1475.json | 132 + .../c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json | 105 - .../6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json | 105 - .../c3b2bf18-d355-40fc-a862-376c1b988305.json | 132 + .../089a5215-70a4-4255-ac01-1b70d4e8a494.json | 105 - .../79474be5-2587-4087-a2cc-1337e3b696dd.json | 132 + .../22ff2700-70c0-459e-96a2-0ce1710947bc.json | 132 + .../49ec948c-c06d-4c01-be83-9f74ed15ea17.json | 105 - .../7d3a47a3-83d3-4f51-ab72-6a2fa5b5ef80.json | 132 + .../69dc0f8e-16d7-4907-9741-484eafa62b8c.json | 132 + .../81670e41-16d6-43a6-9af9-6924a52a8300.json | 105 - .../2d468a71-7364-40eb-8a98-1dbac956b3cf.json | 105 - .../e516abc1-9c3c-4921-a385-e2533d45fed3.json | 132 + .../8baa5832-cc07-4a31-a815-0e8151426ea6.json | 132 + .../d0c92f20-72d0-431c-b8ba-881b3a6ae158.json | 105 - .../0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json | 105 - .../509fbca4-f405-4c27-85a9-1eea59025070.json | 132 + .../2f023511-2446-48f8-83e5-47225f15e905.json | 105 - .../6f45ed56-6bec-4439-9adb-e79fcd74667c.json | 132 + .../512ff924-c1d3-4d75-a468-2bcdcda25cf6.json | 132 + .../fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json | 105 - .../67e74757-9950-499e-9258-7ccd20b29835.json | 105 - .../86b561ae-c4d3-4293-a884-bcab26df026d.json | 132 + .../38864e75-9bb0-4eaa-ba87-c631838a9ad1.json | 105 - .../516d1972-9731-4234-a4b3-b96423ebba5c.json | 132 + .../274f6e02-c81f-4f2e-9747-e5de5cee1933.json | 132 + .../4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json | 105 - .../61638b55-296b-40fd-a39f-cc2276d9f94a.json | 132 + .../6a9c649c-fbcd-489a-bc01-083014932a45.json | 105 - .../11c1b6fe-4815-415b-a4a8-d14073df6ee1.json | 132 + .../a630e843-ec9c-432b-986a-2b181c789507.json | 105 - .../20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json | 105 - .../88e2cb24-288e-4f37-8753-f0daa825051c.json | 132 + .../8a1a6c44-17fd-402e-a22e-e795a1f612e3.json | 132 + .../f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json | 105 - .../1121af0b-61fe-424a-bc66-3164bcb1d833.json | 132 + .../950f6bff-e0ec-4556-85b7-81444008d1d4.json | 105 - .../35300d67-7ee1-4874-b351-87f46267cec9.json | 132 + .../8932da66-d29a-4453-9b61-bee48f1a28f1.json | 105 - .../6180b7b3-4b21-42aa-a62d-084a91568b43.json | 132 + .../b8c00b3b-c35a-4511-965b-6096e9b116de.json | 105 - .../7414d344-0e67-424a-9e16-00de0487ce02.json | 132 + .../82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json | 105 - .../655ea5ea-d94a-43eb-a4bf-182fd021d65a.json | 105 - .../f5fcd407-080c-4cb7-a299-7a7f919c734d.json | 132 + .../efe03731-6021-4dcf-b7fe-24cbf2d60fac.json | 132 + .../6ffed624-cc22-4b62-a447-3c02b0e43ded.json | 132 + .../ed867fa8-be8a-49b0-8c94-38085808b58b.json | 132 + .../fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json | 105 - .../c8b9a56b-0933-4085-8d5f-a1d8294699db.json | 132 + .../ddc116b6-5b9a-409f-a0ab-09e5630d1289.json | 105 - .../9b178661-ed9a-427d-b93c-b905b8089ad8.json | 132 + .../e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json | 105 - .../69588e07-7559-49c2-9423-19fd143e42f7.json | 132 + .../71268c77-565a-401b-a51d-122060ed5945.json | 105 - .../1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json | 105 - .../317589da-d673-4f90-93e9-59983f2ef54b.json | 132 + .../efab322e-ea15-4fe7-9bfc-15246003e59c.json | 132 + .../fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json | 105 - .../b1eac68e-b292-414b-9594-c921f8e10818.json | 132 + .../2b74949a-c0a3-4061-8cf4-4330850af288.json | 105 - .../b7d08c65-8219-4067-9504-99e438a86038.json | 132 + .../3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json | 105 - .../e9c5b479-0dce-4de3-84d6-90c7515337f1.json | 132 + .../3c766465-29db-4b3d-b42f-a3222b38a096.json | 132 + .../44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json | 105 - .../09aa04cf-9369-453f-952a-2f6c74e4707a.json | 105 - .../e6c85677-61ed-475b-85a5-48b91ec76bcf.json | 132 + .../7385c595-5b4f-4491-8e71-ece57ffffbd2.json | 105 - .../7b68fa5e-dbbf-4542-8767-6874aabf8f40.json | 132 + .../53eac61a-064e-4786-bc94-962382d88f77.json | 105 - .../c103b7f4-a432-42d6-86ef-cb369e0c16ff.json | 132 + .../284ba4fb-cae4-46ac-a5dd-a36fb145da55.json | 105 - .../643dda41-37d0-4c1e-b856-58b774612886.json | 132 + .../8adb8bb9-d057-45df-827a-cd8f014b4ff6.json | 105 - .../ba2f284b-d7c6-4748-a8dc-4f80caa30c6c.json | 132 + .../16e30aa0-736a-4ef8-8ba6-78285b84546f.json | 132 + .../88f90805-7410-4ec1-ad19-8e8a146f1ba3.json | 105 - .../73eb729d-adfd-4dee-9bde-04a31f5528f6.json | 132 + .../82454b92-cca1-4ac8-a620-e1a8487a5b8e.json | 105 - .../0daad2ae-92d0-4522-a067-20332f72c96f.json | 132 + .../d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json | 105 - .../44dd13bc-56f0-4dd1-90d0-bb411239109a.json | 105 - .../a3e3849f-a289-4132-b4a8-f67d67ad46a1.json | 132 + .../109acb38-3026-4573-b082-8277b9501f09.json | 105 - .../59a9ed26-a67a-4e76-8858-520400c90766.json | 132 + .../6c5c61b4-8037-4b28-8616-1aefa7963eb8.json | 132 + .../e9f9b836-fbdf-4996-9b35-2c8145a7f01b.json | 132 + .../5b3dae43-5d5c-4d19-bd47-5c0f68ecbb81.json | 132 + .../d5b31b1f-ace0-457f-bf8a-9041398b8344.json | 132 + .../b34702cf-ffb8-4e75-9c9b-f5c52623d4c8.json | 132 + .../c701f1fd-166d-416b-8f78-edf17f2fecd4.json | 132 + .../4217b403-e924-4f67-9b0e-ad1d4ed293a1.json | 132 + .../03816e41-5fb8-4815-ab9c-4108ab19a3bc.json | 132 + .../a763b10e-350a-4342-ade3-b782437ca3e2.json | 132 + .../9e806fd2-edbf-40e2-a008-834cee537bb6.json | 132 + .../fbcf861c-62db-4079-bba6-becd4e231216.json | 132 + .../22b591c0-3386-4bd5-860c-20c0c6001986.json | 132 + .../dfb9a9c4-114e-4188-9940-4d6df7e4815f.json | 132 + .../38fd5f4d-0f3c-4dc2-b250-a9ee7090aac2.json | 132 + .../e53cbc94-fc9f-4d53-ae28-26bc8c2caef8.json | 132 + .../2165e69a-c50c-419a-932e-909f53b73b71.json | 132 + .../cb442f90-a0e1-4588-900c-548b994a764d.json | 105 - .../46430a07-15c8-4727-9102-2f471d4f1d3c.json | 132 + .../3c7f540a-c850-4e20-ad93-60e021d17133.json | 132 + .../58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json | 105 - .../06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json | 105 - .../c3ab4f38-6f7b-4589-ae4f-21ace05b8c44.json | 132 + .../2708c0d6-03e7-4a17-b6b9-e16f3ddcf5bb.json | 132 + .../c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json | 105 - .../6427a5ef-8508-430d-970d-054fc485e754.json | 132 + .../6fea29aa-174f-4e3f-be91-c79842126c2c.json | 105 - .../08984ad9-1e9b-4916-b214-af26dadfcc0b.json | 132 + .../64e92286-72ea-4318-aaea-4e0be87a0067.json | 105 - .../1dbb5d03-fdfa-4059-9d50-d037ada6b1ac.json | 132 + .../70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json | 105 - .../6bf42faa-c3e9-4069-bf93-ffd626062f0f.json | 132 + .../f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json | 105 - .../9feccbdc-18eb-4077-b50b-986db0047fc8.json | 132 + .../195957fa-9d4e-49ec-afd9-17125ebcf62d.json | 105 - .../a074c33f-782a-409c-987b-7dd62c65ccc7.json | 132 + .../2f2c0dea-dcd4-4e54-9f40-9fda4b91bd40.json | 132 + .../9fbe416c-de18-4f83-812c-f48071a49917.json | 105 - .../84481fee-3727-427b-912a-30e2744df28a.json | 132 + .../aaa801dc-1a47-4009-9ad4-7129a8d4e651.json | 132 + .../3ac92cbf-c85b-4e00-9ef9-4322f961591a.json | 132 + .../162b511b-4684-4595-9261-a33f3a4117f9.json | 132 + .../814129ce-9101-4d9b-9e53-9161a010743f.json | 105 - .../20d5d59a-028d-4e34-9414-d9edaf2e59b8.json | 132 + .../82cc30d2-9bb6-499f-b522-c66688e07c00.json | 105 - .../a21b53fb-783b-440b-9f3d-d8ada3bd18ea.json | 132 + .../d851bc0d-5f11-40f6-982c-39809dffe946.json | 105 - .../06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json | 105 - .../0d2ab1e8-a2d7-45cf-b123-67bcab2d9dff.json | 132 + .../6b4a37c8-c7e6-4156-9d6d-8cba51b74d82.json | 132 + .../78582fec-2f69-4b37-8497-12ceb097b44b.json | 132 + .../949bf65e-c2ae-4701-82f0-39d0c62a0e87.json | 132 + .../8812151c-4301-4131-a414-d64d025e476e.json | 132 + .../2db1542f-a8da-4fb8-91a5-6dd1a942b55e.json | 132 + .../9feeffb2-3763-4e43-933e-89100b76f7fa.json | 132 + .../721102b5-ed5e-4631-8600-a6adfff0c784.json | 132 + .../18c185f7-5ca4-46ff-81c2-6c538f096409.json | 132 + .../7ab5911c-e229-43e5-a798-095287d0a597.json | 132 + .../f800c4e5-e918-45bb-8a12-3ca2a64c6b23.json | 132 + .../5fcf41bc-30dc-46a7-9cf2-4ce2c7a5850c.json | 132 + .../d4b20ef4-734e-40a7-818e-f77e170d7437.json | 132 + .../e0996c96-c9e5-4d39-8e6d-1455ef1f9544.json | 132 + .../3ad2b31e-ce2a-4cb4-9b85-79cdebd5d364.json | 132 + .../9aff874c-1953-4b97-9bff-9e6120b0bfa7.json | 132 + .../45ae7f45-8c36-46c6-989d-bc672cdf8eff.json | 132 + .../7d36e44e-a329-4b96-a891-365ad900f718.json | 132 + .../a8c26325-1eec-43a6-a8ad-3bcb2e378924.json | 132 + .../bde1a879-6852-42ce-9217-f427af85a46a.json | 132 + .../dd7a0377-f4d6-4390-b9f2-bf50b05ec0f7.json | 132 + .../12cbf241-d6d4-4d25-ad3d-13a42d7adc74.json | 132 + .../1f66fd7c-40ee-4249-8963-5c7bb93a3eaf.json | 132 + .../cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json | 105 - .../7076406b-7e0a-49c7-8150-2e6a243aa23b.json | 132 + .../96c3fd80-a601-4629-a1ab-bf7f366a909a.json | 132 + .../1302c9a5-d35c-400c-b9f3-d990243e5d59.json | 132 + .../c7f48bbf-6583-4ddd-ae4d-671c43218dae.json | 132 + .../5f07e092-2eb0-44c2-b2ce-5f1b31a9ea99.json | 132 + .../15701682-97ce-46cf-8010-a6bdeaf8c7aa.json | 132 + .../c6eecf0b-fa16-484a-8eeb-d196203b3c3e.json | 132 + .../4337b1c1-cc00-4a15-8148-e8d0739561b9.json | 132 + .../1151ee14-8fe9-4f97-808d-8103b353c2ec.json | 132 + .../a2c18179-aca3-422c-b9f5-8345109cea13.json | 132 + .../07495d34-1505-45a9-bb48-887af0da8a0c.json | 132 + .../567baf6d-99f9-46a5-8c40-c6899986f1ff.json | 132 + .../a337df3a-28ff-46c9-adae-4bc029937101.json | 132 + .../b201a849-44e9-4598-918b-ffa27c894ee9.json | 132 + .../acd82774-f29a-4b19-b08c-693706bb4603.json | 105 - .../dd87ebf3-3088-43b1-851c-a97d12a68ea8.json | 132 + .../1b3ef805-8b0c-44bf-b048-773a0dd94d0d.json | 132 + .../4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json | 105 - .../220cb478-58c0-4028-b51a-ec5fe1050746.json | 132 + .../85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json | 105 - .../17cb8ab1-e7ba-4daf-95d4-2cdbd2777434.json | 132 + .../744f9f56-fbb4-450f-9427-35e6e49ca014.json | 105 - .../2b55023b-b8bc-42a2-aca8-dcaf39890232.json | 132 + .../636c4294-b3d0-42fc-b437-e4a80f70b4d9.json | 105 - .../31736569-5992-4b1d-9d66-27a6c1620506.json | 132 + .../bff23021-087b-4118-ba4d-219a97a1dedc.json | 105 - .../630b37b5-351c-403c-ac76-ccb68ffc5d53.json | 132 + .../69cdef01-30dc-4f75-97fa-9daeebcec72f.json | 132 + .../9aa1acb0-c791-4dea-aa1e-c912cea69466.json | 132 + .../0c1d66f3-8fd7-47f2-8538-a1aa8985aebf.json | 132 + .../2872dcd9-421b-4346-812c-b27bb32c6e86.json | 132 + .../2f3e2fc0-f1e0-43cb-8a8c-6aadcc538646.json | 132 + .../d0a76497-84b0-45b9-b748-04ffe9bc13a3.json | 132 + .../185b6560-6790-417f-aeba-f7405fee808a.json | 132 + .../30a8074e-df03-4866-9b8d-a5a7eece3c71.json | 132 + .../ac8874ae-d6d6-45d3-aabc-06a3852f68d0.json | 132 + .../bc98b048-18d4-438e-80c4-0cd851798da5.json | 132 + .../c88c011f-0a24-4e78-a104-035d25af2430.json | 132 + .../f9e3c31c-02c0-4f5e-ad4f-3be0801a0f41.json | 132 + .../5484405a-2ec8-4515-af75-76a5dd348d3d.json | 132 + .../7dc117b9-c2a2-44c1-8471-f3bc8a116e3e.json | 132 + .../e2d314dd-b5b3-49b5-8e64-1e3464f4b963.json | 132 + .../7ecb453b-1ba7-44ec-abfd-1f8be4c817fd.json | 132 + .../d0a70e95-fc72-41c6-ac42-09b8f379b566.json | 132 + .../e2ef8ea6-b464-445e-81df-ef0779c1d0d4.json | 132 + .../6d88de9c-062d-4858-95ef-a05f6a29b6c3.json | 105 - .../f3d7cca2-141c-4b84-abc4-396ad2d59e3c.json | 132 + .../e3f48d7a-c8a3-4e75-99d6-7f2946696b12.json | 132 + .../fe31c10e-8231-49f4-afb3-e2588396c032.json | 105 - .../3feb9449-49a2-427f-a317-c21e6d1ca66c.json | 132 + .../6359e37e-0405-436b-903c-8f0e740dd6c7.json | 132 + .../f5daed76-f6e5-4a7d-84d7-80537a046b83.json | 132 + .../03af2b1d-989f-4afc-ab13-8793093b9c50.json | 132 + .../5db7ec54-7feb-4c11-b2e0-042226ba1f94.json | 132 + .../f1f5615d-8a78-43c9-b5c6-edc180252381.json | 132 + .../9c89bf8f-4b8a-4c01-8685-fafc687c673e.json | 132 + .../58b69c0f-826d-414f-915e-dd0b78d9298c.json | 132 + .../101ea548-2ffe-4f47-b3b5-5fbe9a3854b4.json | 132 + .../259c4798-ff03-4f58-8fb4-59150710212b.json | 132 + .../5249691a-3672-4ccd-98dd-d9b937bca750.json | 105 - .../f731caa1-f777-494a-8490-da0c815f0708.json | 132 + .../ae109e51-8631-4e09-8839-8e9ed74da4c7.json | 105 - .../d4d25d38-b21a-490e-9ca9-556504ec00ea.json | 132 + .../75bb85a3-40bb-4630-95a0-50e40b008412.json | 132 + .../cd979586-e334-4964-b06c-f33c66f09c0e.json | 105 - .../bb44f3ef-eefa-48ef-a257-2eb345c89a00.json | 132 + .../ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json | 105 - .../2dcf1771-3dbe-43ad-974c-54e2e2860bcc.json | 132 + .../dc783bb0-c784-4cf4-888b-36a3bfa37a84.json | 105 - .../345560e2-c981-4aca-9388-4f3a5e95ace8.json | 105 - .../caa0c8df-5488-4bf9-a5b8-0fff831e6732.json | 132 + .../2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json | 105 - .../c6f8e581-e849-4e28-b3a6-1838ee522770.json | 132 + .../f0c361a1-a3ac-4415-ab5d-069bdf27e7a3.json | 132 + .../fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json | 105 - .../44129be7-f73d-4580-8375-e8ef324e73a8.json | 132 + .../a4bcc6f3-b745-48f7-a394-90cd42363aae.json | 105 - .../2925ecde-a9a5-4369-b391-d23a8605d35c.json | 132 + .../8409e464-fd16-4b41-b533-2f6cae4fe894.json | 132 + .../86f6c6eb-8b08-4e6c-a1bc-0d941a00f10b.json | 132 + .../aa2e6df7-a0b0-42f7-8057-e2763fc34834.json | 132 + .../2bf9a06e-f3bf-4b55-804b-e553a722e0de.json | 132 + .../aa805bcc-3847-40b5-86eb-397982106d18.json | 105 - .../b380a675-39ea-4950-ad0a-d9771f09ddde.json | 132 + .../482358eb-7d3b-4de0-b5d9-451308f104e2.json | 132 + .../bdf85c5c-6eaa-4df6-a393-66b71aa28952.json | 105 - .../ef04a83d-7b89-43ec-ba33-30e1006422dc.json | 132 + .../7b64cf2e-c7c6-4b48-8e51-ea2aa0914145.json | 132 + .../e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json | 105 - .../52c8e3f4-1063-4d9c-80d9-fdd0a72fc98e.json | 132 + .../1f4a827d-31cd-42e6-871d-7c0cad010f58.json | 132 + .../56d6d99c-fba1-42e7-aad4-631370b44da3.json | 132 + .../9be76c82-0f70-4b76-8476-7707d4da85bb.json | 105 - .../006a0ac7-d6c3-42c1-b0cc-6a0bfe74f884.json | 132 + .../fdbef33b-dffb-4146-bc83-f8b03c842b2e.json | 105 - .../33a82686-6202-4a4d-ba34-bd4537105e5f.json | 132 + .../b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json | 105 - .../38d45554-44bd-4b40-b7c9-c0b7ba44b862.json | 132 + .../37d7e3ab-db9c-4ad7-81d1-933c030a6250.json | 132 + .../542d450b-8108-4abe-a2ae-5b9a577558d6.json | 105 - .../9cc49b3c-4e51-4f67-92ea-4ac8a3cbed43.json | 132 + .../b6bd8515-4c95-40ce-b2d5-af8873d261ab.json | 132 + .../d102e75d-3e20-482b-a243-bae3ec44e2bb.json | 132 + .../68920da1-af71-4ccd-88b9-554e3c72c4dc.json | 132 + .../c0eb144f-c726-4a80-bce9-384fb7a641a7.json | 132 + .../0b26f82d-36f6-4fd0-a0fd-05e4a1368a6e.json | 132 + .../8fe4360a-0924-4386-b4cd-89069f7ff55f.json | 132 + .../eeeb082b-7112-4a08-a87a-b2c9ae37efff.json | 132 + .../b8f933e9-867f-4934-9648-371d1e632116.json | 132 + .../8d225023-4b7e-48cd-ae67-6d00b541f17d.json | 132 + .../ee3b45e7-a5d6-4fa8-8abd-f6a77d5a6d5b.json | 132 + .../177ef040-da5c-4a65-adac-efdc555bd110.json | 132 + .../e9dc8337-eb35-4eb9-bca7-30ec1cd44092.json | 132 + .../f4549a39-0b28-4e06-998a-774f5f02cfba.json | 132 + .../a79af78a-adab-406f-995a-adb3893e1510.json | 132 + .../4e8e457a-85eb-4afb-a9fe-8f8ce6eaf4d7.json | 132 + .../eeb3a10a-d584-414a-90de-e018c47615c2.json | 132 + .../e83dadb0-5092-48b8-b408-e6bb1ac8a0ba.json | 132 + .../cebc7767-fbc9-45a2-808b-51e1a4f0f35c.json | 132 + .../b64b6416-b18b-47cc-a516-c613cd670b37.json | 132 + .../64e96d56-72a9-413f-8903-45821b98f71e.json | 132 + .../a3f44cfd-d1fc-4a3c-aa5b-a0f37fc4a192.json | 132 + .../247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json | 105 - .../79314f48-d92b-4992-b3c6-d31278c0867a.json | 132 + .../2bfb7bea-a344-4249-8bdc-e6c483518df5.json | 105 - .../5a007612-c8e7-4f6b-baa9-a21af7e908c6.json | 132 + .../fdefdd3e-2d83-4430-bd95-e16a1935dff1.json | 132 + .../ffdd45bf-3409-4b92-909a-25a32ba27f82.json | 132 + .../a78ab8ac-2c2e-405a-95ee-0d1d27cf533b.json | 132 + .../fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json | 105 - .../b32a7808-7a64-41a8-aad4-030efc512906.json | 105 - .../d9d49bf7-f6f0-4c25-9182-d815454940e3.json | 132 + .../deb48e93-0378-482f-8a5d-7ec350497e0b.json | 132 + .../302a9a47-8603-42d9-85fb-64c60e7c6f44.json | 132 + .../b7ce290d-d082-4586-ac4b-516e8130ddc2.json | 105 - .../28d52801-3998-421f-a37a-2b7b677d0eaa.json | 132 + .../675f6dfe-c623-4694-94cb-8705aab5521f.json | 105 - .../32b4e23b-9430-45a8-bfa2-eea2e89792c4.json | 132 + .../ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json | 105 - .../0336e168-e313-44cb-a030-42e6d20e92df.json | 132 + .../adf85459-eba0-48a8-ad54-1e17d1ea5b31.json | 105 - .../11bd8b5b-2ea4-4ec5-8fe6-654aedb40fc9.json | 132 + .../17febb53-0735-4983-8049-85319818ab84.json | 105 - .../639e4921-9fa8-446d-b539-f03a7589b142.json | 105 - .../6d97749c-3bfa-4c32-b581-a5e2b73303f3.json | 132 + .../62299ec1-dd42-4751-a224-3bdda71d3cdf.json | 105 - .../ec58907d-b67c-467e-a3dd-b9f9c10138f0.json | 132 + .../a7f09a3d-025c-48fa-9358-863b9ae382b1.json | 132 + .../c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json | 105 - .../a8838707-f188-440e-801f-e780e0dd362a.json | 105 - .../bf2be2d5-58de-4550-b733-a5910bded48d.json | 132 + .../52b32c1f-6189-4850-b3f4-de442eb2ccb5.json | 132 + .../68f999d7-2dc2-4b3c-ab02-6140387893c0.json | 105 - .../106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json | 105 - .../87b44160-c3dd-452d-8c15-c4f758f8db7b.json | 132 + .../3e6814d3-54ea-493f-a9fc-85ae9eed1b05.json | 132 + .../fcc755d0-6269-49e6-890b-4a14417601a1.json | 105 - .../35b7ff42-3825-4240-97bf-f8af7e8c23ff.json | 132 + .../c108173e-1582-4c99-9291-46986d7ba1cf.json | 132 + .../6feb08b0-1c67-4fe2-a001-0b3b84529687.json | 132 + .../d4ab3df2-109a-4eec-9742-dc3bb79d5a58.json | 132 + .../53ec995e-bcfd-4a72-bd9a-45d14da3f219.json | 132 + .../299a0397-89c7-4329-9599-9fc29a52db87.json | 132 + .../41adbc32-6cdf-49ba-980c-6eb6f722b40b.json | 132 + .../eeea1c5c-bf81-4533-aace-ccb85153320f.json | 105 - .../4236ece5-f2b2-44e7-9503-9731bff20155.json | 132 + .../e11d46c2-c121-4c74-94ae-e6ec9a5898af.json | 105 - .../b33d672c-4a96-4093-bc13-25c42303b918.json | 132 + .../b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json | 105 - .../2b4f42fc-8b25-481c-98f7-911c52fdd242.json | 132 + .../32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json | 105 - .../634b7a64-2bd3-48b8-b2f4-a93189801850.json | 132 + .../81c514f2-5a06-4d50-8c00-dc8b97529f46.json | 105 - .../31d8cf18-7b35-438e-8dc6-cdba0f593348.json | 105 - .../72a4bcc3-9dfc-4268-be4e-cda5837a3da2.json | 132 + .../78fa85f6-baff-4d95-ad3a-a0663f51b0a0.json | 132 + .../359231a5-6eb9-4f73-a6f1-d7fd7f35c7ed.json | 132 + .../79b81e37-f75e-4b18-b145-73c42625ced5.json | 132 + .../2d99af7a-f67c-4e74-9ba2-f1401dfdf9fb.json | 132 + .../315fa815-fab0-47c9-8185-00bc597c0176.json | 132 + .../4fc01471-7a04-4f46-a973-42f5a3fd67be.json | 105 - .../0c1686db-b396-4ecf-86f1-e4e092491acd.json | 132 + .../613c1922-270a-4e8b-ae9d-20fa25573258.json | 105 - .../57455fbc-b5a9-4a3b-9a30-7da0593fd778.json | 132 + .../a8f9d0e6-5a1a-4d09-ac78-47fd586384df.json | 132 + .../f0ccf0c5-269f-46e1-a13e-b54f2903779b.json | 105 - .../9d0d4eee-0b87-485c-843f-e32d08aa601b.json | 132 + .../f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json | 105 - .../6d57a63e-0fa7-442b-9156-5a8985e04762.json | 105 - .../e47c83ff-9a16-488b-8ccf-4a2fad2b14fc.json | 132 + .../69cc67cc-52f9-464a-ab04-b00bb3d8c459.json | 105 - .../8c7e25df-884d-4940-8185-4c1b82fac8c5.json | 132 + .../83611d50-01d0-4642-a104-daf77f1a0fe8.json | 132 + .../b012b4a9-52d9-4b75-b80d-819579572f05.json | 105 - .../5cbdafba-6071-4da1-8b19-3de612e9ff18.json | 132 + .../1c934cba-c94a-4aad-9645-84658e0b5588.json | 132 + .../2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json | 105 - .../7aad3f6b-89d9-4c9e-9339-cf4111fc37c6.json | 132 + .../f738c507-0826-4d7a-a999-8a01274d8697.json | 105 - .../38d4a8ca-4273-4e6a-8a39-3b5ff20ec461.json | 132 + .../54d2c316-3c41-4d13-879d-a23c071a6885.json | 105 - .../3d65fbc2-bf91-479c-a687-e9ef702794fb.json | 132 + .../f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json | 105 - .../3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json | 105 - .../650cdbbb-e066-4581-8d61-77aa6a4c402c.json | 132 + .../05d566c5-1810-483c-8ce0-84635b9457dc.json | 132 + .../7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json | 105 - .../32a4d80a-9d28-47f4-b68f-36e95a400bf2.json | 105 - .../37e3456a-92ff-4122-a697-ffbdc1c79555.json | 132 + .../4f09e60c-e68a-426c-ac7e-f5e6755e14be.json | 105 - .../70c908d4-f1bf-4553-9bf7-95eb593b4853.json | 132 + .../023756a1-66cc-423a-803b-0d8b0f368bd2.json | 105 - .../2ccc9c20-5414-4286-abcd-ad2b20f8652d.json | 132 + .../50f4560a-e172-42b9-b552-437aff158a38.json | 132 + .../b045b20a-cdbf-4495-89ae-b235ada2e9e0.json | 105 - .../258520cb-360a-4629-be8e-e4ffca8a81b2.json | 105 - .../c6a3abac-8a34-4725-915b-c27c3d0bc484.json | 132 + .../0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json | 105 - .../a8ed68ea-6463-4ff9-9dcd-034080272dec.json | 132 + .../5799ce8b-c00d-49f6-96dc-f7dd057a268c.json | 132 + .../85472ae2-d5f0-4896-811b-d4217241bcef.json | 105 - .../0d261023-3e35-4160-98ca-241bbaee927e.json | 132 + .../0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json | 105 - .../f0454d3b-18b4-488a-94dd-fb24729996c7.json | 132 + .../f9485436-6935-422f-9eb1-ee7faeb231d1.json | 105 - .../62dab9bd-df83-4a0b-be94-0ddd981da6e4.json | 105 - .../6bafa7a7-3a2a-4141-9564-a762d1cdb1d0.json | 132 + .../37f20f86-40ba-4f63-b29d-efff6cb0e09b.json | 132 + .../bf0e7ce4-09e9-4879-993a-eb50b2a421d7.json | 132 + .../ca1e127b-ded1-4015-85b9-be134c26644d.json | 105 - .../94f92919-36fb-4aed-8c0c-2bee0cd1d301.json | 105 - .../bcbc29f7-ea03-4dbe-a83e-d4940b2c6bea.json | 132 + .../1b0bb4ca-9553-4ddd-bf35-cab66685668d.json | 105 - .../cbea8d66-0370-4998-8e3a-06fef0a60f0c.json | 132 + .../a2a90b7e-f6db-408a-b5df-284d0b4a6353.json | 105 - .../ca48b670-b82e-46cc-beb9-2fd0f11d3585.json | 132 + .../6f344c50-fdf3-477e-9a76-558ed61fd509.json | 105 - .../d37f99f7-f9c3-48b6-84d3-7da5d77f5030.json | 132 + .../503c8a24-4ced-4dca-b9df-5733ce89c2ca.json | 132 + .../5ed2650d-d76f-49d6-915b-ac551129913e.json | 105 - .../5c5283a0-819f-4112-bb90-5277423d9c00.json | 132 + .../c402fb6f-6e91-4e33-b847-87371373a6eb.json | 105 - .../6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json | 105 - .../b636bc82-1625-49b1-beec-cadaf4e1b1a9.json | 132 + .../00f481c1-0ef0-40bd-bd95-81dc9443a62c.json | 132 + .../5767ea0d-318c-4c65-9c96-890d27973302.json | 105 - .../7ea22fef-2d79-49ae-bf72-9153a4e239c5.json | 132 + .../ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json | 105 - .../01c33f76-994a-4a1c-951d-88b34e471498.json | 105 - .../64f441df-1781-4d01-b73b-2156413ad403.json | 132 + .../4e3676eb-8607-416e-986a-7098bc192820.json | 132 + .../65ce9e6f-cab9-4ccc-af89-de9be928529e.json | 105 - .../2101369c-5042-48f3-a8f2-f9f56e7b6ae7.json | 132 + .../abadd81a-bd45-4eba-ae77-25190c751085.json | 105 - .../c4b86264-3725-4742-91f0-3e01f8d965a4.json | 132 + .../0308147c-dabb-46bb-8add-d332fcd5a800.json | 132 + .../a9977a0d-e199-488a-a26e-6269806fdb2b.json | 132 + .../56b89ec8-90c5-4e1e-a458-1bb8b5b92be8.json | 132 + .../73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json | 105 - .../4185c376-91c6-435d-ae3b-47cd85151049.json | 132 + .../26e45f5d-1e3d-425f-ba4d-b444dcda7f74.json | 132 + .../5117b75d-3060-4434-a40d-01c471563685.json | 105 - .../09be48ce-61f8-4ba9-b082-b9c475fa714d.json | 132 + .../5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json | 105 - .../27417bcb-fb2f-41d2-9dfa-9865a36f38d5.json | 132 + .../8f00112d-767f-4ac5-ae1c-e37781cf7eec.json | 105 - .../7b6fc3c2-a67d-450e-858c-fa87be122376.json | 132 + .../f4ceacae-0b81-44ac-8b9d-31d81e145bab.json | 105 - .../1653400c-137e-4745-8676-eeaf39bbcc13.json | 105 - .../76b86418-5450-48c6-ae56-58a19016d055.json | 132 + .../6b208d1e-96f1-4b72-8d31-6c6e43c42111.json | 105 - .../e06594e4-899a-4285-b130-f7b605e5a6b9.json | 132 + .../9efdc773-a5c7-4709-88c8-96a67d84a742.json | 132 + .../1fcc2f96-afc9-403f-b82e-8e1804506582.json | 132 + .../bee1e134-9a43-441a-b977-522c510dd1ce.json | 132 + .../b70e1089-d136-4b2f-a253-f361bcf8cdcc.json | 132 + .../8b7e9c34-a982-4f4d-b5dc-66a12578601f.json | 132 + .../0ccc36d0-f546-46d1-91d3-15a40c7bf6c1.json | 132 + .../066abe97-2c6c-4f3b-9e5e-e144f130258a.json | 132 + .../a3af8f77-d915-4482-a2b6-c99744aada4b.json | 132 + .../d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json | 105 - .../82cc8b37-e242-441e-ac74-1662bcc0a0e2.json | 132 + .../1527c8bc-c1ec-45f4-9663-4cffbb808f94.json | 132 + .../d9f6c1e9-84be-4666-b64f-5da37cf98202.json | 105 - .../337b8ce8-d697-47f6-94ac-7a420dd7d91b.json | 132 + .../e2291d7c-7627-484e-a0c1-1857c642be2b.json | 105 - .../3d6ed2bb-5be7-4838-abb7-49754f9c3bfe.json | 132 + .../4d00474d-97e6-4384-82f7-956b2e7268e9.json | 105 - .../0a6c7056-1bce-479e-84b0-f4eeea0bd3cc.json | 132 + .../6474672b-7728-4ab5-8fdf-749e996272a2.json | 105 - .../31618256-7ca8-4a3c-bfbf-4397bf2cf339.json | 105 - .../3e236ad8-3828-407f-9076-743b465b8d15.json | 132 + .../37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json | 105 - .../9e90dcdf-ce2a-4a7c-8b89-6af8b7c2bcfe.json | 132 + .../940d88e9-085b-4065-b8c8-92ebe685deb0.json | 132 + .../a5004f95-0854-40d2-8a71-004875544499.json | 105 - .../7fdcd616-2c72-4c44-9646-9c32344bfa0b.json | 132 + .../d0d8274c-7d05-4166-a510-487cb294135e.json | 105 - .../9d358f55-810c-4ac1-adc7-83f95bd74c11.json | 132 + .../9ba3fe31-772a-4cf7-aa13-3680b6ad51ba.json | 132 + .../40016b83-0730-4e67-b7e9-3b1d29d9d1be.json | 105 - .../651a32b1-77fb-4acf-89bf-2d45b684944d.json | 132 + .../192c4037-753a-4790-80d0-33c4d277102d.json | 132 + .../5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json | 105 - .../679d66bf-244e-4080-9a42-0a0c6cfdc965.json | 132 + .../38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json | 105 - .../73b0ca8a-fb16-43eb-a9af-a01219cf6196.json | 132 + .../7f00ecbc-fcc8-43ae-867b-cb160e63a80c.json | 132 + .../a8238bd4-3982-4e45-92e4-bab77e528e29.json | 132 + .../f87f9f08-e989-4e99-a254-a3650e7ab1b6.json | 132 + .../e58aecba-3254-426d-aac2-05a32c3cbdab.json | 105 - .../f40496a9-fb14-4b2d-8070-84f55e6417f6.json | 132 + .../8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json | 105 - .../cc52f59d-5669-44b0-b1af-e6fd0836e284.json | 132 + .../67525a37-f658-40e8-89a1-de8bf6275a00.json | 132 + .../3cb34886-7a93-42b9-a8fa-fab5f4bd8624.json | 132 + .../d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json | 105 - .../0a84406f-a970-4a03-8d2f-c82a8bbd3872.json | 105 - .../0dd1f9fc-cf54-47ff-8ccd-148b45f3c921.json | 132 + .../0a160c2d-06ed-43c0-8705-bd76e47c093a.json | 105 - .../7a05616e-7335-419a-914d-00fb287fe663.json | 132 + .../070a21b5-4cd3-41b7-9653-0d2d2e4f273d.json | 132 + .../3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json | 105 - .../5afc044a-3138-443f-89cf-74f1272cc632.json | 132 + .../7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json | 105 - .../4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json | 105 - .../a6c1d914-647c-46b7-b0e1-712b8d506780.json | 132 + .../43f35eac-0946-42f9-a128-eb8011c29588.json | 132 + .../04c22be7-2cf4-4774-b479-863199c7c3a4.json | 132 + .../a905005d-85fa-44c9-848b-286f9100bab7.json | 105 - .../f02ca364-4bf8-4f00-aecc-492ac1f0817a.json | 105 - .../fc3d436b-ec61-4458-a3c6-1df41057ea70.json | 132 + .../8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json | 105 - .../e3ed157f-f306-40fb-b3a1-d3434236759e.json | 132 + .../8793b3e3-f409-499a-81f8-c250c8092841.json | 132 + .../fe29c3e7-463b-45a1-8377-97e7c7f21874.json | 105 - .../33572f63-15ba-4fbc-b1cf-56b978384d02.json | 132 + .../5a03703c-6934-437c-aaca-2acfdd4ca629.json | 105 - .../44c636ba-8303-4d75-bcb5-46e3c07a991a.json | 132 + .../6429c440-4d89-4d31-919c-63cde25ba99f.json | 105 - .../0a002444-3e5a-4fc8-acc6-72210a4181a9.json | 132 + .../7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json | 105 - .../8675526d-af0b-4bf2-b143-123249371076.json | 105 - .../bbf936a5-3594-4d0a-b5af-7a01740d0c81.json | 132 + .../1164abea-4cc2-46a7-a44b-f024a2ce40b4.json | 132 + .../1883ddb6-e4cc-4935-81ba-af30af1537e9.json | 105 - .../bfd88bec-fcc2-4580-a5c7-4792a0300a5b.json | 132 + .../f5bfa461-15bf-4e32-8471-74f456c62fd9.json | 105 - .../7f49e582-a01f-481f-8345-1c384fc8b567.json | 132 + .../8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json | 105 - .../10937ed1-56e2-4aad-b717-5125bc8ac72a.json | 132 + .../e0329607-d832-4252-ad71-81e8a8c4bb31.json | 105 - .../c598dbff-4ab5-4405-b75d-13571ae3d862.json | 105 - .../f4622539-c0ac-4e9f-86d4-00e3c826d03b.json | 132 + .../264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json | 105 - .../6b13b2b1-68cd-4aae-8f2b-2400f40760d7.json | 132 + .../5b02726c-ba3f-482b-9f10-87b8d69ffeb4.json | 132 + .../f31f7ad3-9018-4891-be05-12787728904c.json | 105 - .../21d6f2dd-7bd6-42a9-b14e-c25777497890.json | 132 + .../63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json | 105 - .../8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json | 105 - .../d0bc11cb-56ff-4c77-9446-e76e550e0919.json | 132 + .../f98b051e-0984-423d-89c0-352368168d75.json | 105 - .../ff78dc97-e9cf-4215-a607-3e80892af82c.json | 132 + .../0ff1c6ff-5404-4d61-b6c6-f6ef7ae9ca8b.json | 132 + .../454be483-8a45-4bea-a370-5f5a74a924ea.json | 105 - .../48837141-2556-4658-87e0-bb88cfcd562a.json | 132 + .../afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json | 105 - .../60ac5509-346d-4717-a729-0413fce4b203.json | 105 - .../f2d6da5d-3685-43de-8ceb-5b798f88e24c.json | 132 + .../8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json | 105 - .../9ec02ccd-329a-4d62-9f04-87de6fda5011.json | 132 + .../781d0332-e332-4ff7-8585-9c2d8395a147.json | 132 + .../f61e534a-06b4-4558-8ee6-227ad1e97699.json | 105 - .../a1d14150-3b2e-489f-8d18-8894862e9ab0.json | 105 - .../d6dd460e-c352-4d31-8941-183c6eabd0a7.json | 132 + .../4b337805-4bd3-4106-bcde-adb7a6fbec23.json | 105 - .../66bf6442-04ea-437b-88c4-e61afc6f7139.json | 132 + .../0d1911f5-a2e7-4511-a8d8-098cbf9207df.json | 132 + .../701cb3af-8916-47ab-b118-1cd778a23e66.json | 105 - .../8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json | 105 - .../abc18648-ef96-4695-94d5-fa14be277431.json | 132 + .../af85e87f-1308-4968-850a-27382f36a63a.json | 105 - .../ff1e7aaa-3f29-4192-a0e0-80fcd11ba055.json | 132 + .../cc8ef5bd-957f-4308-9539-00a696182056.json | 132 + .../abc7652f-b88e-40ba-847c-c99dce9f2719.json | 132 + .../56e36294-e616-45a1-8dc9-2c14cf3ee8d0.json | 132 + .../4b81caad-92ed-4bd5-98bd-58582854b5d8.json | 132 + .../2cef0040-6d4c-4c38-be40-5477911f3063.json | 132 + .../4aeef94f-823e-4be5-b4f1-37463e052748.json | 132 + .../1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json | 105 - .../3d367147-373f-4543-be19-55a6429558a2.json | 132 + .../950d2518-7245-4ed4-9b16-91f944aa8f15.json | 105 - .../cb93091a-6c46-438a-b111-cbf7e2fac420.json | 132 + .../47960f3f-b39c-4641-8a94-fb70f9a6a53f.json | 105 - .../ea6048f1-8be4-4ec8-a5d5-35ff1523d74a.json | 132 + .../f4dc1659-800f-49d2-a290-48e9d4b15581.json | 132 + .../9329922e-7594-497d-bfab-9c8a18300dc9.json | 105 - .../d4d8a784-5bd5-4437-8e0d-75dcb967ae33.json | 132 + .../91017e73-f33a-49f5-ac87-f6e6a178d885.json | 132 + .../c63fc7e4-87ae-4516-ad3d-df95693133d5.json | 105 - .../936751f5-4483-4986-9a8c-cb002feb8858.json | 105 - .../b7a75bca-6afe-448a-8e5c-53ebd577c964.json | 132 + .../7352f47c-8b57-477f-8190-b08b5b23dfb5.json | 105 - .../8cdced5c-23bc-4426-a0c9-b9bf82913683.json | 132 + .../106c33d2-84fb-4ea3-b2d3-78981834fdb0.json | 105 - .../368784c8-6fc2-4340-8277-a6a9a9800a99.json | 132 + .../761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json | 105 - .../f7ddf26b-4b4c-404b-b9d3-6ceaf78d39aa.json | 132 + .../0aa40e02-762d-4a80-932f-f967057c4f50.json | 105 - .../f423b0d1-3536-4865-9615-f89b9d15b14c.json | 132 + .../a28f8779-d2df-4371-b946-472b335f3ca3.json | 105 - .../c7e8333d-1d79-4cfa-9833-fa42f9fcbb4b.json | 132 + .../b6149d15-3e0f-43d2-ae90-eca290a94edb.json | 132 + .../fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json | 105 - .../9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json | 105 - .../e21f5d83-6b71-488d-ad55-d23268fbd611.json | 132 + .../332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json | 105 - .../68e1a42e-4318-4b5a-a45b-2607b7c2fe05.json | 132 + .../12a03ffb-d66b-4d00-a43b-fd5be80e1b07.json | 132 + .../a07149d4-66e5-4a0d-b4ae-b696027e821c.json | 105 - .../adbad8dc-7d13-44cc-a5c6-e8da1de27c37.json | 132 + .../b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json | 105 - .../7fb595e5-abbc-43ff-8135-c4bb4a2ea593.json | 132 + .../1bb09da7-1675-4e57-b46a-9791c888ce6f.json | 132 + .../c2438204-5b2b-41ce-aa95-27afad6f61a9.json | 105 - .../3ed7dd5a-e431-480a-91a7-5ccd915057e4.json | 132 + .../d2180e09-02da-48d2-adf6-1710299b272e.json | 105 - .../6f6887bf-961c-4b6b-a285-a78459a46488.json | 105 - .../9cab35b6-d6a7-475e-b715-e4493d07cd92.json | 132 + .../ef7149ae-8d50-4890-89ae-fb561a86d130.json | 132 + .../3fa14e1f-82a5-4c04-9c76-2a3f6d56aa81.json | 132 + .../6e3decae-f2a9-4f71-9511-76d28a675cc2.json | 105 - .../3666aa17-279d-4f0b-a6c2-2c8198729df9.json | 105 - .../4418c7d1-72da-4ed3-9d5c-9d8520f6641c.json | 132 + .../83a638be-6f3d-4d5b-b1de-6515634aebbd.json | 105 - .../8fe13380-a045-4d63-96f8-ec977540478c.json | 132 + .../6da42427-c7de-4830-b368-ca7757ee1d51.json | 132 + .../5faf24b3-38af-4f3f-8377-bba70d75f8df.json | 132 + .../9a26214c-2601-49be-b1b1-03796b704059.json | 132 + .../b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json | 105 - .../fa71ed09-45d4-4a5b-bfb1-a61a359a8f0c.json | 132 + .../25c5b304-46d3-4df3-9ac3-75ffa972849a.json | 132 + .../61543864-320f-41ef-889d-7c0e95a229bd.json | 105 - .../88ed0272-39f8-4676-970a-525aee058991.json | 132 + .../a7daa424-7b22-4320-bddd-be350d54b08d.json | 105 - .../d8eff5d0-061b-4b83-b96a-04f9ba47ea6c.json | 132 + .../ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json | 105 - .../dcb90e75-8709-4729-8c00-e756e6a9a49d.json | 132 + .../6d500e75-5605-4268-88a1-dc4abc7c5a7f.json | 105 - .../81dcf3ca-f5c2-40a1-8871-b0188d5e9ceb.json | 132 + .../0a0a4d32-c7a9-49c9-bba4-dae6b464a5b6.json | 132 + .../3c550631-c27c-4743-98f3-3ab65c5fa906.json | 105 - .../82a3a8ef-7e5f-48d0-a48e-41ea2c5b6452.json | 132 + .../d7d6baf0-00d3-4960-970c-949bb9919ac9.json | 105 - .../017ca821-f6ea-43bc-bac1-28dd30c2341d.json | 105 - .../e635e798-fa85-4430-bf1e-9d5ad7fe9f22.json | 132 + .../41d72b83-3c55-460f-9d21-88866eed6b9a.json | 105 - .../7ccaa29a-4f73-4794-83a2-b925d755d91e.json | 132 + .../ba8de8f6-c118-4bc3-ae8d-851e964684ed.json | 132 + .../e2f13357-053c-42e5-8149-465b4f16d334.json | 105 - .../4011975a-e2a0-466a-9b34-923e1b4f8733.json | 132 + .../5561b7bd-bd90-445c-b969-8d400e99e629.json | 105 - .../8a172205-39c6-4dd1-86b2-11b234b37e3c.json | 132 + .../9c2cee8b-3f35-4a49-814e-ad316fcede7f.json | 105 - .../495b2e8e-e2d8-4158-bc6e-7568604d44e9.json | 132 + .../cdd1de41-4e85-4872-be9f-e3af4e9221a9.json | 105 - .../121f28df-65d6-4a48-aa77-4ee794034032.json | 105 - .../e6a97d0d-9dc3-43a5-a69f-8132e19f9c77.json | 132 + .../4aecfd45-f47b-4f02-a0ed-288cbef46a6f.json | 132 + .../d976888b-5e17-4e5c-b557-0b48bf36d4f7.json | 105 - .../a6f7bc45-c2b5-47d8-a062-60f20c3d7ea4.json | 132 + .../e7ca66f4-852b-4b5b-8781-d6272a43c559.json | 105 - .../906db90c-7ea4-4878-aa01-06fd1ad0d18a.json | 105 - .../c85c79d6-28e0-4deb-ad84-901b725aeca8.json | 132 + .../08195b61-5fe5-4cce-8da4-34b731289278.json | 105 - .../73271472-d06f-405b-af9d-2da7c17e1eb0.json | 132 + .../40e4c93e-7a54-49c2-b513-33edd87f59b0.json | 105 - .../4e40bb43-c33d-4324-aa02-5bb7f88a5d1f.json | 132 + .../988c6ec3-e967-4cec-993b-e060a5a18e97.json | 105 - .../9b36e4c0-0d13-4988-8145-b9254da2e76e.json | 132 + .../6a464798-0111-4c71-b156-72a5aba1da63.json | 132 + .../b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json | 105 - .../78252135-f15b-427d-86de-c32cd3dbcd0f.json | 132 + .../92dc5ec0-5aea-45f5-9237-32b5a65e095b.json | 105 - .../c3b7bd57-9bc3-4d83-aad9-7d6315748c0a.json | 132 + .../d877dbd4-b3da-44b5-974a-1267db396435.json | 105 - .../bce17582-e807-4b91-b0e7-0a890bf5eb24.json | 132 + .../f8371e81-f6d4-4441-bc6c-5d4a18da7d08.json | 132 + .../78407b2e-1f44-46f0-bc21-76bdc68f8d9c.json | 132 + .../bdb9e2d2-8d09-4994-a320-2f968bcb4898.json | 132 + .../c57d15c8-9581-4bb5-89e4-2fea1e3c584e.json | 132 + .../550d5665-7a8a-437e-b318-000690dd250f.json | 132 + .../a1922f33-32f5-4f99-8df6-e2080808d292.json | 132 + .../6ccc376b-24a4-42cc-8ea0-823ef14336db.json | 132 + .../214ebe7f-357a-435c-9bf5-451bdea1ca9a.json | 105 - .../878ec84b-a365-4887-b7fd-1dc738f6eda8.json | 105 - .../93d08946-76b5-4547-8bf0-966c5cccd8c1.json | 105 - .../fa16a47e-4009-487b-8252-1fef155ce6b4.json | 105 - .../15b910c7-6c36-4af8-af78-d48278dbc4db.json | 105 - .../0f948238-5ed2-41ee-a815-3ff20728de89.json | 105 - .../ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json | 105 - .../25418041-6fe1-4cd8-88cb-79456a65210c.json | 105 - .../a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json | 105 - .../66d2e2a4-a75c-4fb9-af6a-3181f17281af.json | 105 - .../3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json | 105 - .../a403d91c-4f30-4d05-9f00-24ce97cc91ac.json | 105 - .../b708a2a6-d738-48a9-9c20-0838bdb19646.json | 105 - .../21096485-ff49-4481-a530-48746334fceb.json | 105 - .../a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json | 105 - .../c05e106e-203a-49e7-b656-22809ac16037.json | 105 - .../ea4bffba-6e14-4380-a060-2b4deb6d94c0.json | 105 - .../d4bb122a-87b4-482e-8050-7c1716a4ed5b.json | 105 - .../179d4baf-7da1-4a56-82e7-35ea45204e13.json | 105 - .../4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json | 105 - .../7891a95c-8d95-4181-96e8-cdc2f6ab538b.json | 105 - .../f9fb4008-db4e-4a84-b12b-050bdf35084f.json | 105 - .../dccf426d-63bb-4298-958f-d1f4776f03b2.json | 105 - .../dcf4d2bb-ee8f-4083-baf6-8870731515fa.json | 105 - .../51d4db96-4c38-464a-9e7f-0ade67699c8d.json | 105 - .../e3ee4f00-1037-4da7-96e2-934b5ccefd15.json | 105 - .../8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json | 105 - .../af954640-6806-4e4c-9c0b-b81215eadfc8.json | 105 - .../6cb560eb-08f5-4430-8797-1116f1d2f56c.json | 105 - .../8768f068-452f-4a54-bddb-9f6cffaf5a19.json | 105 - .../b004d154-392d-4f31-afbb-547b058996bd.json | 105 - .../16070acb-e8bb-476a-b5aa-863a85cb0aee.json | 105 - .../140b0661-2961-46f3-8c75-cb75147e0acc.json | 105 - .../246e4c1f-016c-411e-870e-9ade63713daa.json | 105 - .../26229a4f-9f53-453f-9899-77808040f8cb.json | 105 - .../64cd00af-6782-431b-aac1-445e39d56717.json | 105 - .../3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json | 105 - .../3c066bd3-ec6c-412d-86a1-759c228610b9.json | 105 - .../7b093f59-7a4e-4e72-b9a6-7d10870917ea.json | 105 - .../962205b9-009a-4201-b382-5143c80e78ce.json | 105 - .../6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json | 105 - .../153fd43a-fe54-4a99-98dd-5420f2bf8b66.json | 105 - .../687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json | 105 - .../fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json | 105 - .../f318d457-d295-4447-9222-0b0d92708b5d.json | 105 - .../b002a274-9b4f-40ad-b0c7-e4efabbe431f.json | 105 - .../e66f4326-2585-4581-b45f-d9a81fb1576c.json | 105 - .../b010858c-edb5-4e49-b5b6-72b06943ab2c.json | 105 - .../5395cbac-afe0-4936-b4eb-f554fcb5be75.json | 105 - .../06b75d54-4d17-4116-a4d5-0917eedb2dc4.json | 105 - .../6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json | 105 - .../3bcdf1ca-ad29-45cf-ac97-6bc508981545.json | 105 - .../e52ac657-26a3-499a-949f-bf2a0b620d8e.json | 105 - .../42d79295-bdb0-411d-b1b0-5cff954e925c.json | 105 - .../eeb46285-0c8d-43b7-9b6d-e86c24064fde.json | 105 - .../1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json | 105 - .../648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json | 105 - .../f94f3bf1-cf85-4673-a5cf-368f250233e4.json | 105 - .../3aed9fd2-45bd-4568-8885-7fc2370bb26d.json | 105 - .../99333370-c7d5-4763-b3a4-14adde0fab9e.json | 105 - .../32e38c82-d412-4888-9d9d-f89aef0989fd.json | 105 - .../e325b56f-4306-4e37-adc5-c09b300a8c30.json | 105 - .../0b19d8bb-1952-4515-8d29-e55e1106e92b.json | 105 - .../80531a18-00d3-4264-bf84-cd1d4d90df08.json | 105 - .../865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json | 105 - .../2568a2b7-e95c-4224-9850-5816466b50f2.json | 105 - .../ebada07f-e700-4f38-aec0-f801959969e6.json | 105 - .../95a2d032-e2a4-46df-84d2-6b7529d5bb01.json | 105 - .../af7a7129-1b6a-4ff5-952f-075ae4f7c137.json | 105 - .../3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json | 105 - .../6547b6f3-63dd-4516-b294-62c4246c3dc7.json | 132 + .../29a10f53-dd38-437b-a7f3-9756035df640.json | 105 - .../b2f24392-29aa-4a24-b489-87ea9b85daea.json | 105 - .../6d66b056-c83d-49b8-ac84-04396c0d97df.json | 105 - .../000b7f0b-9e2f-499a-9bab-b08767efb8ca.json | 105 - .../25c93024-ce65-49d5-96da-00107bb37f77.json | 105 - .../e289e629-17dd-440e-8839-d5dcbe535fd6.json | 105 - .../9ba31c7b-13df-46f2-a164-1729563707e1.json | 105 - .../69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json | 105 - .../a58bf2d3-d209-41b8-a795-ba7a16e4a28f.json | 132 + .../b15ad3b5-7ef2-439e-9acd-a85eab520d31.json | 132 + .../eb2e1202-9292-4f5e-a366-abc84897c66d.json | 105 - .../368a36c5-8211-4240-ac88-3fd5e5414310.json | 105 - .../64da2654-9fdb-4a08-ad16-cf8793a30ed8.json | 132 + .../37080215-ee30-4e59-a407-b14695ac2a38.json | 132 + .../98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json | 105 - .../ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json | 105 - .../b83a0ce7-bf13-4a98-81f3-04e5a44105f7.json | 132 + .../bb7bea21-5bc6-460d-98ff-b3ed02d5b215.json | 132 + .../e15f4783-510e-4b92-a999-072caa425d4c.json | 105 - .../99941572-3e23-467c-97df-dfe1a2aa9805.json | 105 - .../da9ddecc-43cf-4055-a19e-795b1ee98826.json | 132 + .../a93ccb3f-f2d9-415d-8397-0c7fb765fada.json | 132 + .../d0f86765-bdb4-4367-986b-28303bbe1844.json | 132 + .../693bb191-ae83-49dc-9df1-2f68b1b5fe4a.json | 132 + .../7b2c0b72-6421-4f33-8593-a4bbfd0c6d6b.json | 132 + .../c4ee822f-fc8b-4523-95b6-7c3f12a334b3.json | 132 + .../1810033a-185b-4c91-91d3-43b8f6c61443.json | 132 + .../beb721ae-a35c-4f6b-a80f-aac4835d5f8d.json | 132 + .../cf20e77a-340f-4d8d-b593-9645bdfc5877.json | 132 + .../eec73e49-ac2b-42ed-a115-76e45007cd5d.json | 132 + .../aa06d058-87f9-4fde-ad53-139b29a71448.json | 132 + .../3f1d571a-fc42-411b-88ab-4700d5861367.json | 132 + .../74a56080-aeb2-4cc6-a825-bbe4d9a5900a.json | 132 + .../2eb433ba-5c93-4355-99dd-edcb65721603.json | 132 + .../826fc3ab-6ff8-44fa-a745-a0b80bcb2db4.json | 132 + .../6da54964-e3b5-4567-8ce4-7e0f279af84f.json | 132 + .../9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json | 105 - .../5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json | 105 - .../6929c338-76a5-4386-9fa8-68e35a989a86.json | 105 - .../5bcf96ce-efd1-4f90-91a1-edd548de71ad.json | 105 - .../12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json | 105 - .../64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json | 105 - .../251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json | 105 - .../a09fdbce-489c-4d14-a05f-7663121bece7.json | 105 - .../07e74f27-e0c3-448f-9a8c-a07ff8a73178.json | 105 - .../53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json | 105 - .../e8cef406-d6cc-48bd-872f-3d5b74bcf092.json | 105 - .../2ac50111-a850-4bd2-8136-c373990742a5.json | 105 - .../50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json | 105 - .../2dd86ebc-0253-4801-ac99-2bb3494ad29b.json | 105 - .../30146048-ee0f-431d-b3e7-8c066c820740.json | 105 - .../630e3cc0-fccc-41b3-b439-85a875dae401.json | 105 - .../a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json | 105 - .../218a5d0f-5242-43c4-8166-81f5c09626bb.json | 105 - .../c7095b76-2d50-467b-a8d9-d7a277f1f14c.json | 105 - .../1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json | 105 - .../57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json | 105 - .../47486923-2194-4b8e-930c-ca14bd5f8a26.json | 105 - .../bb063d7a-65fa-416b-88e9-7bacdef1da3e.json | 105 - .../2cf17692-b105-41df-9783-6c7728ab778f.json | 105 - .../4d0574f4-4d91-4395-afff-133216eee509.json | 105 - .../8fff2cec-a733-4505-bce9-8b605044181a.json | 105 - .../9e63ff64-f862-40ad-b594-31063ec0d31e.json | 105 - .../2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json | 105 - .../501e2a2c-e32c-455e-8e5f-f8bde053fddc.json | 105 - .../433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json | 105 - .../c9020f27-9175-4f12-a108-6cbff1c0cb22.json | 105 - .../0ad192a1-b33f-4362-a21d-ccc590986c5c.json | 105 - .../42b3b64b-0e15-4a49-b542-da27ab7e2143.json | 105 - .../86206a02-3ab9-4a86-a00c-2900e8cd2e18.json | 105 - .../80a35d79-893b-439f-b100-a538a3c86974.json | 105 - .../9ba72d50-4321-4383-8be9-286a56607624.json | 105 - .../7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json | 105 - .../29dfbb00-8760-46d8-bef8-d036870fb0c0.json | 105 - .../ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json | 105 - .../4fa1e172-f570-4a96-b53a-8ecf31854191.json | 105 - .../fd59fb1c-3681-44d2-9172-b10891ae9c55.json | 105 - .../778a10b0-c537-4592-9dbb-2b0de07ced4c.json | 105 - .../d048e6ad-cc57-4ebe-8376-262564e86f0c.json | 105 - .../53602c70-73d9-461b-b27a-24c6a1a538e5.json | 105 - .../d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json | 105 - .../c0e95e3f-37a4-4b2f-a37b-37854546c241.json | 105 - .../b84aedba-7b87-445d-87c2-b029cb0038c3.json | 105 - .../41f04f45-2f1d-42fd-87de-cc5e484cada2.json | 105 - .../9499ec24-5be2-478c-b13e-3102d1555668.json | 105 - .../7e6685d8-af21-4810-a9cc-edb296f4b937.json | 105 - .../b71c5ede-010d-4ce4-9f12-552388e2d9eb.json | 105 - .../f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json | 105 - .../621fb00c-90a0-4295-9bd6-f5e102bc0bab.json | 105 - .../a7dde688-a0ae-4731-909f-0bef0c6eeba9.json | 132 + .../0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json | 105 - .../eb2a8a60-2240-4b08-9dc3-be0215aa7bfc.json | 132 + .../5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json | 105 - .../9b05919f-d7c1-4e04-9dd8-9ae70e0005e6.json | 132 + .../38520cce-b3b6-4f22-a6a8-313f6181f5ea.json | 105 - .../6cd98538-74b6-4ac6-a3ac-9a311cfe47f6.json | 132 + .../b6514bef-f106-45e0-8571-da3507b0e95b.json | 105 - .../b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json | 105 - .../14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json | 105 - .../cb85dee2-acee-48f8-85aa-1d5664179fd5.json | 105 - .../4c450b48-8477-45cb-9cfa-814c21dd39d7.json | 105 - .../49e352c1-2319-4bc5-aa3f-1697739a05b8.json | 105 - .../42bed40b-ac71-42c8-b56b-47d1f930c736.json | 105 - .../4285b38c-aba8-444b-9b0b-b265c7b1fef1.json | 105 - .../c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json | 105 - .../054a662a-e425-448c-9556-6998833e51ff.json | 105 - .../0a685d8f-38c7-4521-9613-7b36ad1cac73.json | 105 - .../d31a41b0-6500-4e1b-8435-b9d3e9725c02.json | 105 - .../5826c93f-3642-44cf-b385-4a5ab5103086.json | 105 - .../6a15378c-36cc-4f5e-b184-5a19a6fbb192.json | 105 - .../47cfe707-ba31-4c9b-aa15-9ab8b566e206.json | 105 - .../8d3bd687-89f5-4d62-af46-93646aea4341.json | 105 - .../28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json | 105 - .../8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json | 105 - .../b0ca2dec-387f-4b27-9adb-772af1899832.json | 132 + .../53c4b397-b78e-4699-a01e-3535aa072225.json | 132 + .../f5b251f0-741c-4ad5-ab04-19c5202854ea.json | 132 + .../7b2ba13a-e01d-4442-9abe-d16df1a1668a.json | 132 + .../4bdefb85-2413-43b7-8938-869ad0cff58f.json | 105 - .../43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json | 105 - .../bf79f87c-3f14-49e8-acba-725e709d5f11.json | 132 + .../3fbac7d4-cbbb-4b77-9db4-fd7e122cc90e.json | 132 + .../659053b0-7694-41e7-916d-28406b3ed572.json | 105 - .../70fe199f-6c81-4d99-a595-208b7abc321f.json | 105 - .../9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json | 105 - .../23800723-b5bd-4fc6-9d07-ca937c8680c6.json | 105 - .../7321bd04-6f20-427a-8219-0ff2e299cb01.json | 105 - .../3cc8621a-b38c-4735-af09-027989774289.json | 105 - .../44b47789-f529-4bae-9e87-196abc325efc.json | 105 - .../5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json | 105 - .../c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json | 105 - .../78f235b0-fa98-48e2-bb03-9f7e9f986004.json | 105 - .../4f0262d9-2a01-4127-bb40-1bbf437bbc07.json | 105 - .../827af354-0efb-4a44-b62a-c8562fd0065b.json | 105 - .../4d3c877e-3dea-44af-8133-d555355971f8.json | 105 - .../119f453d-714d-4324-aac5-8448bab91771.json | 105 - .../0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json | 105 - .../6f5cbf98-67b4-4651-acee-160fe2e36f59.json | 105 - .../79319862-c5eb-40a1-9424-ecc3835c1c9e.json | 105 - .../7442a4c1-e225-4cea-b107-2d975460e214.json | 105 - .../4431b126-a8b8-4776-8dd5-448ec4fb0caf.json | 105 - .../2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json | 105 - .../0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json | 105 - .../29e65163-3e59-4bfe-a950-60092cb3171f.json | 105 - .../bfeb5972-e865-4892-b01b-0c92fdab79e9.json | 105 - .../8025c7ed-3553-489f-8858-091d1ff81a15.json | 105 - .../0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json | 105 - .../b149c82e-0099-46f6-a302-0eac4127f418.json | 105 - .../75c81dae-2bb9-4d60-94e2-61141c31ccbd.json | 105 - .../c2bad77e-c0d0-4a43-8853-9363cc618603.json | 105 - .../7b515db9-e76c-495f-b4f8-a65b913f40e9.json | 105 - .../f9e1d208-d1ab-4518-9b1b-1470af8bef12.json | 105 - .../3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json | 105 - .../061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json | 105 - .../b869eab0-f736-48ef-8870-b98636cc4da1.json | 105 - .../2871c1f6-4010-48e4-8020-1c5024474934.json | 105 - .../69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json | 105 - .../e10f8a93-7131-446d-b792-d179f522a262.json | 105 - .../182a7558-c9f7-43a6-a928-d5d97e082a91.json | 105 - .../46f2caf1-29e8-4173-b2b2-e54e905e71d9.json | 105 - .../fcf4087e-9d89-4e8a-a817-6c9092445208.json | 105 - .../5b8bdeea-19cf-41c0-890a-55ae1b740e75.json | 105 - .../01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json | 105 - .../17cda965-9f4b-411c-977f-1fe3238f527f.json | 105 - .../e6c0f96c-6189-4ed1-bf68-e762249170e7.json | 105 - .../556a83e2-9b7c-432e-99d5-804da880dfc6.json | 105 - .../1aa85069-5409-4c32-91d5-1f417be4e465.json | 105 - .../eb55e4d5-dde4-4349-b8aa-9297604cedf0.json | 105 - .../3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json | 105 - .../35e1f76a-96d6-42af-a51b-b1b453536723.json | 105 - .../4b9e66cf-0ddb-4878-8800-2bc05dec750a.json | 105 - .../2144960d-f674-45bd-9509-3cf711dc697b.json | 105 - .../0644b140-506f-4c7a-ba59-50ab48fad799.json | 105 - .../1964f25a-d5b2-467a-a30d-9338082bdcfb.json | 105 - .../55315256-9b4d-4dbd-bc53-7ec384e0fdca.json | 105 - .../71710546-99cb-4180-9454-1e77696fccf3.json | 105 - .../96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json | 105 - .../53dc50c8-fa89-4d31-92d6-f8b02543e272.json | 105 - .../95fe9cce-c93d-47e3-a053-defe922abefa.json | 105 - .../9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json | 105 - .../42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json | 105 - .../0b8f178b-9980-4250-bc82-66facb367eb8.json | 105 - .../6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json | 105 - .../e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json | 105 - .../b84ca7e1-4746-449a-841f-fcfd71774104.json | 105 - .../b38dc953-12fb-41aa-a887-d9a30ff1799a.json | 105 - .../4a35f213-f9b7-40c5-b164-722f6b4ee933.json | 105 - .../ae4224f6-36e8-48e2-a0bf-a79299c365ad.json | 105 - .../a312ee46-fd2f-4a0d-a778-7e235910a147.json | 105 - .../b311d3f4-6eda-4053-91d2-416c4d796c6d.json | 105 - .../d59d00da-e88f-4d1a-9c47-538020ae0114.json | 105 - .../1ff959c7-3477-40e5-8460-971337adc788.json | 105 - .../6cbd7c31-df0a-4920-9c23-be53f107698e.json | 105 - .../b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json | 105 - .../6737b327-bd1c-4eee-a461-af685edcd7b5.json | 105 - .../dd306da8-60aa-4022-8d04-1942fd19bc0b.json | 105 - .../e0354dac-3ad8-4342-92a9-be0182051cac.json | 105 - .../b1a8ede3-2f27-4825-a413-e1772743b7c6.json | 105 - .../522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json | 105 - .../64790745-5edc-49d9-8111-822d54518b58.json | 105 - .../7e232332-cf13-4127-be18-1311921931e6.json | 105 - .../f1312aef-339c-487a-b0fa-1bf4a77f0910.json | 105 - .../73f07833-1d35-484f-8fe3-57f4c27e1277.json | 105 - .../e0eb1bbf-923b-4bee-8390-288c21607e0e.json | 105 - .../b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json | 105 - .../a2b9a953-31e2-4a6f-8005-993e1133246e.json | 105 - .../6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json | 105 - .../7f429355-b60b-4298-8eb0-a072a80898d7.json | 105 - .../774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json | 105 - .../e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json | 105 - .../735bed66-1e83-4647-b730-14f0d571d597.json | 105 - .../6efd0dbd-b8c1-4c66-bdf7-19055c16ca22.json | 132 + .../a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json | 105 - .../1388b8d4-c711-480c-8a06-a8b7bd8aa79c.json | 132 + .../79d366fc-e21c-4e5e-bb94-8d221d9df715.json | 105 - .../03393ffd-1923-4767-ba14-d0e3e6751842.json | 132 + .../b7d049dc-127d-4075-8067-22adac9a58c3.json | 132 + .../89d79024-f4b8-4165-bd88-47f2b0010800.json | 132 + .../d2c0fb0d-6c0c-464a-b09f-6382a57b6afb.json | 132 + .../a891b28a-2dcc-4b8e-ad20-1f23d663b44b.json | 132 + .../55e274bb-1e2c-4402-b7ae-09ff7b1f9738.json | 132 + .../6b615d1d-7dab-4414-88a2-72fff1b5fce7.json | 105 - .../251c7560-4672-44a6-82df-2b8ce9a99a5e.json | 105 - .../fe7a6940-fc4c-4345-84be-609c8155be57.json | 132 + .../377105ce-c655-47fe-a565-71a4de8c3683.json | 105 - .../77eb2b0f-e3e3-474c-bb02-dabde2998ef0.json | 132 + .../5f15d683-bae4-4888-8d1c-352aac802fbe.json | 105 - .../94d744be-5d28-490a-ba9a-8440cb97dce9.json | 132 + .../2765061e-7506-4eb6-b63f-312f6290665a.json | 132 + .../167c937c-66c7-45a8-bbd9-97d98531bf7d.json | 132 + .../9587c35c-1def-46e7-8642-7acb0340be5e.json | 132 + .../1c9594fe-03d6-4ec1-9da5-99960da0dcd4.json | 132 + .../2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json | 105 - .../8ed2c4eb-bc72-4dde-a559-1afd1698d37d.json | 132 + .../68faa5a3-82ae-462d-adad-505134024710.json | 105 - .../a2f9536a-9266-4aee-be90-d04f4dcbe53c.json | 132 + .../7f116aaa-3880-4e53-948a-4b06e0d26cff.json | 132 + .../7cbe4516-2be2-421b-95f4-c9500ad64ca5.json | 132 + .../a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json | 105 - .../07df565a-bc30-4a9d-b472-7a85f35938be.json | 132 + .../ff64dcc7-9646-4c53-8b1e-68b62a025574.json | 105 - .../7545f7db-10bb-4d97-9b3f-4346f4f26bad.json | 132 + .../47384f10-ac6a-4629-92db-86f01a441f7f.json | 132 + .../3c9f022f-3e2b-48d6-acb9-07f066cfceb6.json | 132 + .../1d851cfb-8624-4516-8204-85569c60dc67.json | 132 + .../a7990990-7498-4b74-a0aa-9c266910698e.json | 132 + .../0b41d37e-0728-4575-9662-c150e2e29bd0.json | 132 + .../c565a7e9-bd1b-41a5-bff3-3a349553f4e8.json | 132 + .../1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json | 105 - .../680a4507-755e-4014-877b-6032f0220270.json | 132 + .../5ace8dc6-e348-4267-bb4a-f71a335d074e.json | 132 + .../07549821-db51-4b77-980a-056131b5dd29.json | 132 + .../ff12a0a1-a913-441b-955c-bcbd50056acf.json | 132 + .../947cfc2b-b73c-40eb-9e57-be5278776711.json | 132 + .../53639078-c50a-4147-bab0-16993f1790b6.json | 132 + .../ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json | 105 - .../3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json | 105 - .../b2cf96e0-382e-4200-a4a4-d66e8a188878.json | 132 + .../d3df3cb7-5e79-49e5-9ed1-1e2771318915.json | 105 - .../d4ed3eb6-f569-4d4b-8da5-50eaaf824128.json | 132 + .../210f7063-e0d9-424d-94f4-3645e4e1b401.json | 132 + .../c917765b-a4b4-4e5d-9c11-eed791349daf.json | 105 - .../4ecd26d8-8416-4dba-8d53-96f4013cfef0.json | 132 + .../5f5d83bd-91e9-416b-b40d-506f3861ed3f.json | 105 - .../15712b7d-e69f-4a4f-b13c-4e79ce859399.json | 132 + .../9446f216-e3d6-4fca-ae00-937b4a76e5bf.json | 105 - .../9148c375-7c08-4c1c-82ed-5f935b2a4f04.json | 132 + .../aa67ad0b-e469-4b49-a797-4542370a2e94.json | 105 - .../39aa4e41-376f-4ee6-8925-8bf746a871a0.json | 105 - .../fb93274b-b7d8-483a-a95d-96340535febc.json | 132 + .../0818b755-ec49-457c-8635-73f01816f30b.json | 132 + .../7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json | 105 - .../6e87be06-ca0e-48a4-ae28-4a5794600117.json | 105 - .../77962326-0160-49bd-9ef1-59b403b2bfce.json | 132 + .../272abbe5-8b61-442f-9860-d7411e7fec99.json | 132 + .../cccf983e-e1b8-4f0f-b147-abccdea65548.json | 105 - .../14d617a8-18c6-40a7-a4ba-19cf5fc5f4e3.json | 132 + .../ab7dcb4c-3884-428f-b342-38034dd51b56.json | 105 - .../ef7b5e6d-b5b7-4c7b-9781-6f90eb1ff5dd.json | 132 + .../1970e257-7c93-4342-9ff4-a96af21acc67.json | 132 + .../376d342c-669b-4c76-9e7b-d49566ac441d.json | 105 - .../15d71696-4b21-41ff-a4c6-0aea92fb844a.json | 132 + .../bae0b772-8ae6-4fed-ae78-d6d83e560a95.json | 105 - .../ccb85394-5252-48d4-8980-8b3a6c67ab1a.json | 132 + .../d5466af4-2bef-4ce8-a659-9e05a5e674b6.json | 105 - .../2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json | 105 - .../ea9837ff-f4c7-4bb0-b2af-7ae26371baf0.json | 132 + .../5d01fa6d-4280-4926-b166-e98892ee60f4.json | 105 - .../fe9012a7-d07f-48d4-b460-eca256078d8b.json | 132 + .../8e8d2071-8e7d-4dad-8536-4698b2d00316.json | 132 + .../fc8605ad-f7b9-4a73-afd3-85b996fc2549.json | 105 - .../dbcb41be-9ed6-4244-ada8-77f363c3487e.json | 132 + .../f933fbc2-370e-4231-94a9-c833c2aa793d.json | 105 - .../017a681e-1bbb-4890-bfcc-f276954678e1.json | 105 - .../e48e2d7e-6c14-4bb1-bd12-74d93a145ca3.json | 132 + .../30c2d908-3eaf-408a-a2b5-301e0cd9e052.json | 132 + .../d6ac7c9f-212e-4000-b89e-d977122d2e2b.json | 105 - .../88a4587f-d3d4-4b08-b800-13a2daf4a660.json | 105 - .../f7624d04-66d1-4c05-8c01-d015ecf8412c.json | 132 + .../511e4aad-1e5a-4515-9433-46989fc3945b.json | 132 + .../863e71ec-03a4-47ed-8bc9-b064d5571162.json | 132 + .../6a6dfcb4-192b-44ff-a34f-76b31bbf5ad3.json | 132 + .../e0dbec0b-a154-448a-be23-ef9b764469ea.json | 132 + .../ecd91300-b0cf-48ce-9e5c-253a7991f90e.json | 132 + .../e3df71f1-63e1-40f1-918d-07cb3ec939cf.json | 132 + .../52066a23-9847-490e-90e3-57eee3c63276.json | 132 + .../91f15ba3-a062-4b01-8a61-6e51fdf5f8d4.json | 132 + .../323630ee-fbe0-49a7-aa11-816fde38ba2d.json | 132 + .../e5c8f97d-1873-4c9d-8bed-50dc592543db.json | 132 + .../7ee2803c-b8f8-4156-8472-bab4baab8863.json | 132 + .../78573f63-3073-4be4-93a7-0ea00b1383fd.json | 132 + .../42da7295-d78d-49a4-9279-8406063240c4.json | 132 + .../b61c5735-53ca-4dda-a223-79921eee7f3e.json | 132 + .../310124ef-e33f-49de-83eb-e665a5143aaa.json | 132 + .../c9b056df-8bbe-4959-ab44-85813157c95c.json | 132 + .../7a60385f-48dd-4926-8b66-3d42a1631db3.json | 132 + .../da365c7b-74d0-4a9f-a8fd-cf4049ec4de6.json | 132 + .../e2930715-b616-49a4-83bc-53e92fc3580f.json | 132 + .../543f45e0-a158-4fdb-bbb1-8deb38f4515b.json | 132 + .../b96a20e0-d044-4a66-8909-437aeaef569c.json | 132 + .../408742ff-4b21-46dc-b4d6-4c78d652d228.json | 132 + .../496a9fbe-376c-4546-bd90-b42f583924ce.json | 132 + .../c507c0ac-759a-4013-8dd0-7ab5a959ca65.json | 105 - .../f32c07b4-21a8-4cd2-91f8-f0f26d0b1b38.json | 132 + .../cc36cc37-0f41-42aa-8051-54cc135820ef.json | 132 + .../20d3dac4-9f8c-431c-b20f-364dd860e37f.json | 132 + .../89022ea8-2a5b-4eba-8d7a-320ba13d30a4.json | 132 + .../5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json | 105 - .../97bfd152-79c6-4c96-8d3e-588275339e41.json | 132 + .../461ee093-b573-4ce9-9168-c9852dc9745b.json | 105 - .../93061947-2bcf-482e-ab22-38ef8ee33bcf.json | 132 + .../41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json | 105 - .../8f65748b-1251-49f8-bfed-d1e4a937d5ba.json | 132 + .../4f278881-69d3-42b5-b72c-ff8627a6ef44.json | 132 + .../92cad41b-64b5-48db-b865-77d0ea2ef834.json | 105 - .../b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json | 105 - .../d88e85c5-73df-46cc-9234-f0556592ad5a.json | 132 + .../15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json | 105 - .../44d2a20d-e867-4fa5-af3d-087f9c1b4067.json | 132 + .../625501d4-5d1e-48e0-8690-e301c51f652d.json | 105 - .../e83b3e7e-dc34-4b06-bcfe-95b3ba28aab4.json | 132 + .../44f2948c-4564-44cc-98d8-4f82a30e1f09.json | 132 + .../50854a36-b87e-421d-b8d5-7a46054ecc59.json | 105 - .../846cf1ff-62c3-44e7-b6dd-0135ec77451a.json | 132 + .../d2054469-b38b-4b1d-bd40-7324319f8eca.json | 132 + .../ce60608d-5b52-49d4-bbce-4b20e8272cef.json | 132 + .../f177bb70-fb7c-4b57-965d-acbcb4936bfa.json | 132 + .../a5b2ab3d-1f12-4a5a-a110-2514185568b6.json | 132 + .../63b887a1-a0b9-46db-a563-b9bd67a0805a.json | 132 + .../92d122f7-f29d-49e3-99da-bf20edf377a2.json | 132 + .../a0b71344-f3a8-4ad0-87c5-6393148488b1.json | 132 + .../19afc23f-5849-4147-b240-9bb7ddea4d58.json | 105 - .../821ff784-c48a-4623-9fb5-b77b7114b625.json | 132 + .../62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json | 105 - .../ed251513-4807-4e31-bc8e-3ab0217ae4f3.json | 132 + .../e7fa3baa-07b4-4f10-aa9c-8424d8fea303.json | 132 + .../f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json | 105 - .../11dfd131-00bf-4561-a913-f1c0cb15bf9c.json | 132 + .../7044a4d4-1c07-40ef-917c-d242b61d7877.json | 105 - .../3ba34f38-2340-407f-a7b5-82749f8a0ee6.json | 132 + .../52e253ba-0291-4e78-b292-806cabe74697.json | 105 - .../91b9649b-bdf6-4b15-a038-47edc2e79ef6.json | 132 + .../afc49838-c7fc-40ed-841f-74b0bc3dd36e.json | 105 - .../184f8ef6-7cb7-45f2-b983-70dc4503a968.json | 105 - .../24670e63-32e1-4c5d-82fe-0d0c45a4e165.json | 132 + .../198d1441-1d13-468a-a998-c8cf9f1e7a57.json | 132 + .../ec853cc1-7c48-4334-9ff6-d9669750570b.json | 105 - .../e9eb1499-835c-4a70-b531-4be5a9718c34.json | 132 + .../f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json | 105 - .../7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json | 105 - .../b1fd95ad-767d-4c13-a936-00b08c74ca3d.json | 132 + .../b7b71327-323b-4b7c-92a1-426911bed479.json | 105 - .../f87bd357-535e-4450-b01d-b41e1b7571e0.json | 132 + .../300fd27e-4dce-441f-91da-f38bd14ffe5e.json | 132 + .../d4dc4d78-33a3-428c-9490-382dd0c19c08.json | 105 - .../17192714-a653-428d-a7c7-06dd41db77fa.json | 105 - .../1fd9a2e5-856f-4303-8ac1-611311f3e7b5.json | 132 + .../4c34d5c6-af1b-4519-8d08-67bd837e9b97.json | 132 + .../8167695b-db96-4687-91b8-0af55e67a606.json | 105 - .../971e6eba-61ff-42e6-9740-1895080ff94f.json | 105 - .../ddc27df7-1c4c-4563-92b2-5a39380423a8.json | 132 + .../3e606ef8-9caa-43d4-81d6-8eae9936ab4c.json | 132 + .../fcdf14a1-900f-4856-aac6-8ed47910f882.json | 105 - .../8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json | 105 - .../b9053559-3b90-4de0-981a-dbb49db38eb5.json | 132 + .../1e0c27fc-8111-4325-8e61-c24c2f8124f7.json | 105 - .../cea89bc6-b1a1-4b67-a136-45e097563a5b.json | 132 + .../10cbee10-0344-4da0-a26a-4298fd8f4d11.json | 105 - .../5eb16113-7d0d-47a0-91d8-ec7dab35efdd.json | 132 + .../45aa6545-d20a-4dfb-a8a6-01f2fd34c9f5.json | 132 + .../6d6b2e81-8b90-4703-aafb-40de92b3ede3.json | 105 - .../39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json | 105 - .../c94079d1-d8b1-4198-8129-8c5a11c310ca.json | 132 + .../982accb5-ea5c-45bc-8cdd-08edf5e543a1.json | 105 - .../cb45306a-096c-4ed5-a028-6d720b26afe9.json | 132 + .../2d21a773-8f72-4b7d-ba94-80867127c54a.json | 105 - .../f301908e-474b-4ba2-a873-610ca1b6c2bd.json | 132 + .../06f5865d-a62a-48da-b33f-486fe29e3685.json | 132 + .../509f5b3a-6110-4757-a313-80181ecd3228.json | 105 - .../4f952c51-91dc-446e-bda1-43ed66e1ca3e.json | 132 + .../f1eb3ba0-225e-49d5-9509-422702927c9f.json | 105 - .../7fdbc273-200d-4085-8a03-8f56cde4f2fc.json | 105 - .../dcba3a6f-8f4f-49f6-af74-541de16be435.json | 132 + .../20c0d1f9-24b8-4993-82f1-d9889c18c56a.json | 105 - .../b5d39bcb-dab4-4880-9cb1-68dbd20a3ce5.json | 132 + .../1e597e9b-4e75-4981-842b-dad6f1c15ed7.json | 132 + .../824cb85d-e7a0-421a-994b-c0b178ab8e56.json | 105 - .../18752dc4-76d1-40dc-9f43-62b8087b7a88.json | 132 + .../2faf039c-9c8e-46db-8472-6b741c451bf1.json | 105 - .../314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json | 105 - .../fa30c36e-20f1-41ee-a59d-0044f2b76dfb.json | 132 + .../5391ae8f-41b0-41cb-9365-b5cb7649c8b7.json | 132 + .../b07e3d05-409f-498a-a324-82c4a592d4dc.json | 105 - .../1fc072c6-ad31-4151-8420-7402b565510d.json | 105 - .../a95ab4cf-456f-4b3d-9bab-2b755649758d.json | 132 + .../68e99fe4-634e-4462-b1db-d2d40814ff0b.json | 105 - .../9840baa9-2ddf-4dd9-b3b0-3ec3075089bc.json | 132 + .../26ff113c-95ca-4716-83f7-4792b46be246.json | 132 + .../ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json | 105 - .../285e1d08-15a0-4d8b-a844-e4cad923ea9b.json | 132 + .../8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json | 105 - .../0462269d-94a3-4991-9af5-e55592f344e5.json | 132 + .../774c0461-5e81-436a-9347-7a4cc15ca019.json | 105 - .../4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json | 105 - .../c47c4cd6-90b6-42df-a3b9-4fc8f1b3c980.json | 132 + .../0fecafe4-f8f0-4f97-ab2d-589a3856e1af.json | 132 + .../780c711f-774b-499e-881e-25dba76273a1.json | 105 - .../4b5529b9-0800-4cd6-b720-a905ab5e6c9a.json | 132 + .../5220bee5-74d3-4730-9fee-4ca488e1a37e.json | 105 - .../43a30cf0-ccb5-46ce-b520-55ee110002c9.json | 105 - .../84783e4d-5eed-474d-9463-a01a0890850e.json | 132 + .../37602e25-bd23-462a-8566-38f3b0fee63d.json | 105 - .../d9fe39c5-24a5-4240-bfc9-59860fcb3911.json | 132 + .../2ddf850e-36dc-41b2-92da-e2b45d1544c6.json | 132 + .../fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json | 105 - .../7a137ac4-8445-4c1a-9203-abc5f4131213.json | 105 - .../b10a9284-fa5e-4a4e-8240-edc98cea6d9c.json | 132 + .../2c51bd1d-ebe8-4de9-9749-5f42f7ba3d5a.json | 132 + .../859a9706-f73b-4426-9c5a-052625d62f5b.json | 105 - .../425e6f1e-50dd-444f-b0da-5a0c47d5bf06.json | 132 + .../72412b78-cc3e-4652-9034-32c72aee5796.json | 105 - .../6bfbd9d6-b376-4169-8e6a-2c3210040e97.json | 105 - .../7e1fcf4e-9f64-4112-934c-4808f07d32b2.json | 132 + .../6415adfc-35a9-480c-a740-dac02725c8f0.json | 105 - .../d3666566-09dc-4d53-9996-2301c6fb2721.json | 132 + .../36e5efb9-e3f0-4903-a9f1-3d51453bfdc4.json | 132 + .../94d01e56-d7d5-4680-b577-ebcc0198ca0c.json | 105 - .../a2de66f0-bbd1-40b9-95d3-74e0335b853b.json | 105 - .../a6dba337-81d2-40c6-89c2-aee6de82282e.json | 132 + .../8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json | 105 - .../e44b8d9a-f270-45c8-b126-6a8911c35436.json | 132 + .../44d5e1ac-45d5-42aa-b9fa-f18112cf6676.json | 132 + .../b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json | 105 - .../4246401d-9049-4c83-83d4-e2d9efa4dded.json | 132 + .../8a14ed64-1408-469e-ab8d-05c897904d20.json | 105 - .../1c166a10-c176-42c7-9421-750e170f5706.json | 105 - .../26c4785a-0caf-4b01-be5d-1e421bfeb698.json | 132 + .../0c5bb530-f59b-4097-8a79-9e4f524385a2.json | 105 - .../cc9b9a25-18f9-4cc3-a756-3975a3a3be7d.json | 132 + .../26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json | 105 - .../b4edb7f5-a675-4627-af96-7ed0909da1e5.json | 132 + .../461b6f40-6f19-48b1-857e-f0fb37f929f9.json | 132 + .../d7313786-f553-454e-b2c8-62a0162c9339.json | 105 - .../359daeb1-3546-473f-801b-c9942fd010aa.json | 105 - .../e924270d-a655-4093-91b2-f73b7f12eefd.json | 132 + .../af8905e0-e969-45bd-8e09-e7316fff0914.json | 132 + .../fa5d2148-c45b-4266-a6a0-11b471273f75.json | 105 - .../b619dad2-fcb2-45ab-b603-ae1da3916eb7.json | 105 - .../e92a6d31-2277-4093-8fae-b3dfaa2d47dd.json | 132 + .../47472cd9-36d3-4074-83d4-af53b9c23758.json | 132 + .../cf0a4a2d-a104-43cf-ac01-66250e880ff0.json | 105 - .../b922f4e1-1fd9-4a32-94ce-4784430cef51.json | 132 + .../dd7cb16f-0752-4639-aa99-90b9be448295.json | 105 - .../5bb2e77f-7709-4eb8-bd08-3c8da4a56310.json | 132 + .../643da0d0-176a-40dd-b096-5aac8de827e9.json | 105 - .../35937213-bb16-4935-9d92-9fa8fd61aac3.json | 132 + .../9c6cf7a1-1a17-4070-9ce3-633461334f42.json | 105 - .../04122d1b-929d-439c-bb8d-f08508f7a00e.json | 132 + .../e109acd0-c7e3-4a9f-8e06-c428b95acc83.json | 105 - .../03beb242-2628-4ea0-a2f3-c3ec43d379de.json | 132 + .../ee088f70-5734-4951-8bc0-e0579a053fd2.json | 105 - .../46d55b7b-1972-4cb0-97ca-e04d306282a7.json | 132 + .../ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json | 105 - .../32730d82-cfac-481f-9a22-9cbe40646218.json | 132 + .../7470c7d4-80fe-4e88-a695-c628f9ed3682.json | 105 - .../701743bb-1ddf-4810-824a-38959d4a0e02.json | 105 - .../a290a75f-753b-489d-87a2-ce0637c09f41.json | 132 + .../0eebefc6-138f-4af5-a8b6-a35c798a38cb.json | 105 - .../54032eb0-c4cd-4c76-be2e-f0c81bd26365.json | 132 + .../73b59506-cc1d-413c-a28b-d25e0e6bf413.json | 132 + .../ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json | 105 - .../847b4e14-a07c-45ed-b2eb-ecea0f80147b.json | 105 - .../bea2dcd6-4772-4aac-bcbc-4802cfb33495.json | 132 + .../5427828d-b53d-4e44-82ed-df6a9c0f9a47.json | 105 - .../66275215-28e6-42bc-bc22-5d152682ce53.json | 132 + .../9015365c-400b-4fa3-85f2-a1033b030cf7.json | 132 + .../bf5e2b11-79ce-49ed-947b-fb34110a3802.json | 105 - .../39325b65-ad12-44ef-a1bf-ffe9e870ced8.json | 105 - .../55d52914-0904-4e6e-8b37-c22b06f5f2bf.json | 132 + .../3677260a-2fd5-41bf-9010-f1b31cedacbc.json | 132 + .../b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json | 105 - .../f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json | 105 - .../fc54f87a-2e4a-4f3f-b407-e268c4487d16.json | 132 + .../8d893736-1707-4c0b-860d-16c62ec26d78.json | 132 + .../c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json | 105 - .../9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json | 105 - .../d3d2728f-74bf-4196-a909-43797d8b628a.json | 132 + .../ade14c35-442b-4a0a-8345-99b7b58dc194.json | 105 - .../ed241e67-8718-48be-a6e8-19e295a2b5cd.json | 132 + .../05aafad3-e07a-453b-a70b-f18fbd4eb218.json | 132 + .../fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json | 105 - .../210bea5c-35de-4bd6-93db-871704add0d6.json | 105 - .../f79ac32e-ab83-40c3-9c18-35623f5ae1d4.json | 132 + .../95dd235d-6930-48fd-8594-5acb0110be29.json | 105 - .../cec76b15-1069-4d37-b8bc-74dde28101f6.json | 132 + .../67e351c8-6cca-4982-86e9-e774786c6862.json | 105 - .../e4ac0d0c-65ea-4b43-bb4b-7371c6cd5d61.json | 132 + .../93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json | 105 - .../f8d629bf-df0b-4c6a-8c18-17dda002b089.json | 132 + .../6739d8e3-f4bd-4fd5-98f3-887f5ed3f9c0.json | 132 + .../e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json | 105 - .../a51722f4-29f4-47a5-acba-4c8b5355551b.json | 132 + .../dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json | 105 - .../06d0a21f-f6e4-4ca9-a679-8c4502aaaad1.json | 132 + .../f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json | 105 - .../04a4dcc9-3784-4aea-9faf-9db49c2e4c43.json | 132 + .../4b4a9630-c942-445e-b396-4a988d489aa7.json | 105 - .../e4668365-d3dd-4996-9bb1-5b4e6f510264.json | 132 + .../4d743678-e14d-4866-b1bf-0d660787847b.json | 132 + .../592bd629-d0bf-48b0-83c6-abfa3731fd14.json | 105 - .../43f7613d-bd9f-480d-a2ed-dcabf3169944.json | 105 - .../720b1476-876c-47d1-bf46-d037389b4b2f.json | 132 + .../4e4f3b2d-5b17-486a-a2ab-c2e89194c765.json | 132 + .../be1ab009-3aa6-43da-8b8e-11e5287a0370.json | 105 - .../3986b43c-2752-4a8f-b1e1-c3657734f84b.json | 105 - .../b738668e-3ac1-4a36-ad71-ad7d2a5256ae.json | 132 + .../38ba0438-f5ed-434e-af2e-fed71988f7b9.json | 105 - .../623f1b73-1505-4527-b41c-dcb2b711226d.json | 132 + .../53f03454-9587-4208-bc01-21de62f59195.json | 132 + .../fc23ef4f-2ef1-4a3e-b029-9d646145e135.json | 105 - .../d4bba57d-2a3c-4945-ae47-7830840d0259.json | 105 - .../fb38d8b4-6320-4b8d-bf3d-e3d22bb0ed83.json | 132 + .../767b5c7e-6319-487f-906c-2abca794f884.json | 105 - .../b127a923-3bf2-4cad-9225-d738efe800e3.json | 132 + .../a94ae52a-7936-4750-83f5-4740f23adf15.json | 132 + .../d37e87e2-53c3-42fa-b78d-04d2819b14d3.json | 105 - .../95e689c6-cd19-4114-b3b5-1672ab849214.json | 132 + .../a651c814-41e2-4951-bb8f-df799cc6e470.json | 105 - .../28245528-26e8-48a8-9cc8-68d7a6389bde.json | 105 - .../890a8414-bccf-4a66-8013-6c270d017965.json | 132 + .../0f8ce410-cf3b-4f78-81b9-a0a1fe91b963.json | 132 + .../8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json | 105 - .../121096cf-356b-4069-a0a3-8cf6aad52b81.json | 132 + .../e3e4a9b3-ce68-4999-966e-2ef2baf99266.json | 105 - .../cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json | 105 - .../fb0bcadf-32a0-4320-909f-2c38ba7d9372.json | 132 + .../ab941c52-cf33-4b8e-87af-4a73930cf72a.json | 132 + .../c60869f0-7009-48c9-be41-339335e5ee4e.json | 105 - .../08c242fd-0258-4817-970a-668584ed9385.json | 132 + .../cc85ba7f-bbc0-43e7-a678-949fd5be8498.json | 105 - .../2171af9a-be5e-4daf-8e67-a5239ccec7bd.json | 132 + .../6df8e489-865f-4692-a673-6abbf2159d1d.json | 105 - .../706f75a1-2f6b-47dd-809e-a830e739b574.json | 132 + .../c36d07f4-b263-4849-86f9-d3fea355c068.json | 105 - .../a9cd0399-4670-4f5c-8c64-c82dac97cd8c.json | 132 + .../67cfd12d-0551-406d-bd1d-8ced75c69478.json | 132 + .../cf6b0824-45c4-4b47-bf23-e5df5673b74e.json | 105 - .../0064f2f6-672e-478c-9184-e7fd32ad06b8.json | 105 - .../0a31d2f0-196b-4508-861a-1ba7bd28ea23.json | 132 + .../4381d7ab-d19f-4fa0-a69a-978af28df8fa.json | 105 - .../57576999-2749-441a-91d6-5a976e83a658.json | 132 + .../4e616fc6-8baa-4c9a-9098-b8d108911ad2.json | 105 - .../e44792e6-0329-4784-832b-3043478e70a4.json | 132 + .../8b3789d6-51be-472a-95d3-2ae7c34ad140.json | 132 + .../9c7ee100-754e-4665-8527-452021a2243b.json | 105 - .../0563ee22-d981-45cb-83f8-7dbdb2734d10.json | 105 - .../3f4765f2-551b-485f-9020-0cf17a36a887.json | 132 + .../6375a845-5d86-4dcf-bfd2-e836daa4ca11.json | 132 + .../746ffa2c-cc95-4d69-9e46-0e8f4febd440.json | 105 - .../65a74446-6964-4f5f-8ea6-aeb1b09595ae.json | 132 + .../f9e1901a-854d-4437-8d49-a6c47799f687.json | 105 - .../8919b3ad-529c-4391-bec3-65b81dad97c3.json | 105 - .../dcba5998-3b84-4753-a4fa-2558ffe3e69b.json | 132 + .../0af6b3c0-6638-4bd8-bdd9-349e2b9ca71c.json | 132 + .../3030519e-f137-4091-9394-26a0779f0ad9.json | 105 - .../4e332594-d0b9-4913-9950-208abe4faab7.json | 132 + .../ac41e588-0664-44f5-9fa9-eafd6508078b.json | 105 - .../5ad2ad73-47ed-465d-b4c0-b358e6b6435f.json | 132 + .../eb68e0e3-1e39-4779-bc99-4e1825d9c602.json | 105 - .../0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json | 105 - .../c9f716ef-0aa6-445f-8fc9-b102f3a0ea2a.json | 132 + .../5d5ae047-72d1-4083-8e28-dcce7337ed25.json | 105 - .../a2e32a77-867c-4921-ada4-c7b169efbebe.json | 132 + .../e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json | 105 - .../f76f759f-d05d-4eb6-a2b9-3b1dfbe840f0.json | 132 + .../81c8704c-7124-42d1-b320-77e31e35898b.json | 105 - .../ece0bd6b-4eec-485c-942b-e23f3295c2f8.json | 132 + .../6705072a-5a46-49ae-925f-1cf7da1ea288.json | 105 - .../ada110bb-0988-4c19-9798-74577dde5ce9.json | 132 + .../d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json | 105 - .../ed4f994d-d196-40bd-8f8f-f6a7f07c3c90.json | 132 + .../57395f9a-0534-453e-80fc-96e9dc5cd9c3.json | 132 + .../e2aa230d-452e-42f0-a780-af255c62120e.json | 105 - .../86e94a19-e497-4539-802b-597ce0e0ced0.json | 105 - .../f8f70702-9ab4-4e1a-a11d-090627d58f02.json | 132 + .../320c581d-f667-4dab-a32c-bb9f2621e84d.json | 105 - .../3cab8bda-bdf6-4345-b89e-18d34a8f6361.json | 132 + .../0757cecd-bc5f-4095-90ee-25920ae6670c.json | 105 - .../0955fc17-8878-401a-9ec3-149528ee51e1.json | 132 + .../c63bf49a-e7d4-4853-8684-9cc03eaa7840.json | 132 + .../f58f0ecc-a059-448d-a2f9-e36b601e2154.json | 105 - .../2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json | 105 - .../65e6a3b6-4291-4591-bc0b-576930061c68.json | 132 + .../11486e0e-a9e3-43b0-b26e-299a86555d16.json | 105 - .../1ddf9e02-4066-440e-a777-fcd3f96bc4b3.json | 132 + .../75037d12-da94-4c55-8de5-a7cef098d4b0.json | 105 - .../f9f96bb2-edbc-4112-97aa-a7420dea32a1.json | 132 + .../3a24b30f-7698-4ecb-ac26-3537a0b38616.json | 132 + .../9f0aa20f-8687-4c21-b222-39a322f90842.json | 105 - .../91c2897a-3ae3-402b-aadf-26d0b8d746c5.json | 105 - .../d4030df6-2be6-4f46-9c9b-ce3037b9a004.json | 132 + .../4a60fea6-e0e8-497e-9b29-439e7641e77b.json | 105 - .../ec234403-f43d-46a0-84a4-ab47673226b3.json | 132 + .../805379f4-784f-4602-92e8-180df4da9fc3.json | 132 + .../d9c819c2-a3f6-481e-bd71-47912aef9847.json | 105 - .../6e20f902-8752-466c-b8d4-34787fb90fce.json | 105 - .../9f3920aa-9400-46f1-bcfa-969f69b3335c.json | 132 + .../26cbf444-ab93-409a-b85d-e2bd267eae5e.json | 132 + .../d25510e4-6549-4f64-8ec4-37ac8671050c.json | 105 - .../58e279d4-da0f-4e2c-a74d-c51caeaad884.json | 105 - .../7c2b17a8-1de2-4441-a281-fe3fd043f831.json | 132 + .../64c07a98-4f3f-49f7-99de-9963dcfedeba.json | 105 - .../94c5756c-cbde-46e2-90d2-207678373061.json | 132 + .../abebffbf-48b5-4452-8c7a-bb1175a7e979.json | 105 - .../e0048124-89bf-4327-88a8-00aa51ee29af.json | 132 + .../1810feae-7a27-4c17-8174-3cd8a143b21f.json | 105 - .../9d776307-43af-43bb-ab64-52fb7f331cfe.json | 132 + .../1fc6ca13-157c-4502-8724-be153afb6347.json | 105 - .../d8d41981-a7c8-48e9-a63c-86520a0f23d5.json | 132 + .../1355985c-fbcb-4eac-8435-417d6034f2f0.json | 132 + .../c20f5702-24fc-443a-875e-495401776eeb.json | 105 - .../24e11e0c-fb61-46c1-a05e-c533eb392195.json | 105 - .../44486b02-7bdd-4f59-8d4e-5c8deeb1fd60.json | 132 + .../15deaa33-87a2-442e-9618-13f5ab6c299e.json | 105 - .../45ae3dc3-6dc0-4d10-99cb-a7f330110906.json | 132 + .../6b54763a-6329-47fb-bf50-296604251b47.json | 132 + .../bd4cc259-d535-437a-afc5-d74a60154b07.json | 105 - .../96a26bf3-b4b2-465f-8ce6-a2ef943c001a.json | 132 + .../aadb6262-4f31-4681-983c-0d19e8bbc5cd.json | 105 - .../41e3ecda-8988-456c-b413-19770e2f05c7.json | 105 - .../655b047f-c3a8-4c9c-b864-81d318b2f506.json | 132 + .../c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json | 105 - .../f62fed77-e166-422d-b5ce-c50b7bccbf4c.json | 132 + .../7ffdabf3-0a8e-4316-b6bd-85b10a81db53.json | 132 + .../9a6b85d5-bb26-4832-915e-8b1ac90b0793.json | 105 - .../2c93c987-b32d-4a02-8df4-949cc45b8eb2.json | 132 + .../4bcdbab0-7220-40bb-832f-01003f59da0f.json | 105 - .../02e7c1d6-9db1-4de8-b13e-afd752b3669a.json | 132 + .../8f16aed2-8b31-48cc-b874-8d437f26f3db.json | 105 - .../262a66ee-04e4-49d5-8bb2-efe0a93801ad.json | 105 - .../580a3045-338a-47b2-8ed7-54c993d5aa90.json | 132 + .../e71d3be5-ea9d-4426-aa58-5806b7541aa6.json | 132 + .../fd2a2a9c-639f-4348-9861-00271ed070b2.json | 105 - .../1174683a-9488-4c6b-be6b-e5a96328a96f.json | 132 + .../53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json | 105 - .../3789b37f-daf0-4c21-82b8-309cbf00312e.json | 132 + .../c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json | 105 - .../798c2f08-e10b-4115-bdd5-0d6053d03b60.json | 105 - .../8586cdc1-dd4e-4112-a59c-f6bc2766701b.json | 132 + .../388ef85a-db27-4851-9e6e-2002a75bc6c7.json | 105 - .../946a7b16-dfa6-42ad-97c1-955bf8a40dae.json | 132 + .../cd0c4096-93ee-4a04-83b0-44063770e81b.json | 105 - .../d9a6cc31-57c4-4480-a019-25a34b31fcc8.json | 132 + .../279bd5fa-0ab1-411b-871b-bd9ff23853f6.json | 132 + .../69f3e2b2-8918-41a8-abc6-c84c3d674f94.json | 105 - .../60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json | 105 - .../c26fae10-e65a-49ac-a2da-2dbf024fd10d.json | 132 + .../6d37b2b4-630e-4471-b7a8-50f8a58902fe.json | 132 + .../d3af54be-9d9a-4a4a-b03e-3468a801795e.json | 105 - .../de687865-4297-4130-bcfe-0c5116c9b0d1.json | 132 + .../e7cf15b2-0347-48a8-bf84-08e27b3688fd.json | 105 - .../8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json | 105 - .../ee1acad1-5dc4-4d8b-8aca-544af5dc2392.json | 132 + .../52e3f1b1-5a1c-4cca-a36f-9f60284e1883.json | 132 + .../e14cedfb-79a9-446a-ba16-64f378a47b4a.json | 105 - .../2d54c67e-fad5-4a61-b3ae-0393f16dc1ba.json | 132 + .../84a37d06-2668-4143-8e2f-5a08651f2dfb.json | 105 - .../5120e433-f5c7-45fa-be56-566101556271.json | 132 + .../72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json | 105 - .../7f4b4668-c3a0-4575-957d-ba321d55f420.json | 132 + .../9819f2bd-8108-4fc5-9208-ce295d860435.json | 105 - .../2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json | 105 - .../9245b74d-4b9d-4158-a402-0c3742097eba.json | 132 + .../29a5fcd3-9c22-424c-ab17-70cfe187aea1.json | 132 + .../b6ca35e1-8680-49e8-a6dd-963214be7411.json | 105 - .../155b7412-cc16-45c3-9261-acc9322a0dcc.json | 105 - .../af71bfa0-1077-4c96-a4c1-0aa28dc789bf.json | 132 + .../258ebe6d-191d-4804-b5e1-5cd6ce93ba88.json | 132 + .../94668ddb-d2fb-44e2-8ed7-10179d145366.json | 105 - .../4765f197-82ed-44b3-9a7c-7cbabc6ecd8e.json | 132 + .../828a6bd0-a205-4327-bc77-2e8a84c0b69e.json | 105 - .../8c0a66fb-c87d-489d-b071-b4a599562ead.json | 105 - .../a5d66f97-1f4b-43da-a83a-4a262e297fd9.json | 132 + .../5d29cf73-65d6-4965-a504-4caf07108cc8.json | 132 + .../ef8025de-fe9f-4a79-97f6-c26c18ab049a.json | 105 - .../15ec04ae-30d3-4ffb-9b0c-54ba63410e3d.json | 132 + .../a31fbd82-2e21-40e7-a73a-c6351c80bae7.json | 105 - .../15ec7997-1333-43c6-869a-ce4589af56d1.json | 105 - .../2ed96c70-390b-44de-aa08-9883a2f33ff3.json | 132 + .../67c95889-8a67-40fd-99e2-62e767c16416.json | 132 + .../86411dbb-e28b-4e9d-856e-fcc001252fbe.json | 105 - .../804f4be8-a8a9-473f-a898-d71b742a62eb.json | 105 - .../a518f39d-e073-493d-9a4f-9af53fc71abf.json | 132 + .../24f0d9bc-d743-4f46-b5a6-e855e39a1daf.json | 132 + .../736ee66e-bd19-4275-afaf-73c2112c2fbd.json | 105 - .../3d27f6d9-05a0-44bd-a225-6e6a0bf4a35b.json | 132 + .../da5a3c32-371f-44e5-89a7-c9ba6e98664e.json | 105 - .../ad28e7b8-69e6-4fb9-bec4-62c67fae6d58.json | 132 + .../af3bd92d-45f5-4a48-89aa-b8c956209d5a.json | 105 - .../0da639d4-181c-4ee1-808c-3de8003c2471.json | 132 + .../c98928d3-0d7f-429c-927c-bf8fa432101a.json | 105 - .../480bd62c-bc67-4379-bce0-b28a5d6bdf4f.json | 132 + .../787d8040-25c8-4893-b140-cf041260d767.json | 105 - .../6aad7ade-7bd0-4515-b4ac-2299c58da098.json | 105 - .../dd94c18e-b2c3-4135-aa2d-5eb0248315d0.json | 132 + .../81914fd7-1410-4b80-9be9-6ebfbb664613.json | 105 - .../a2ae2953-e341-49be-8469-32bd41d780d7.json | 132 + .../23bdd694-f250-46dd-9b8b-526fda47bc9e.json | 132 + .../3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json | 105 - .../cfe4ab09-c772-4617-88b6-77e49553605b.json | 105 - .../d600a69d-1952-4e30-abe8-1769ab63ac29.json | 132 + .../a369ff4f-7fe9-4764-be74-83563dbaf635.json | 105 - .../afc031d4-852e-4ead-9098-6ce30112b459.json | 132 + .../cb33e29f-e5e1-4bf5-9e20-86d9c3486d2d.json | 132 + .../f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json | 105 - .../a4b93124-1151-4f69-8a5e-6b916e8cf11f.json | 132 + .../f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json | 105 - .../0b27b829-6588-4f7b-80fe-6e6767287a38.json | 105 - .../efe11d8f-65e6-4ba6-8148-fdd43c9346be.json | 132 + .../923da7be-2ec8-46b2-8187-fe08eb86d5a0.json | 132 + .../a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json | 105 - .../0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json | 105 - .../1652b9fe-640a-48f9-b7a5-20ae28fb5985.json | 132 + .../572463ed-f6b9-460d-9c38-0e0ee5327511.json | 132 + .../68435a43-944b-4c66-979b-eb48f7a8e77a.json | 105 - .../0dc95982-e5b0-4011-9e5b-48af7e3048f0.json | 105 - .../5f6bbbfd-16a8-4ea8-b9d9-b436a882700a.json | 132 + .../012eeeed-c556-460d-82f6-34bdc31da5cf.json | 105 - .../32322361-f18d-480d-9475-cd11a45bc4bc.json | 132 + .../37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json | 105 - .../f62d1aee-2d9e-466e-85e2-002fae5d2504.json | 132 + .../6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json | 105 - .../af389bf1-da63-49a9-9e49-32613d8d05b8.json | 132 + .../a637936e-646b-4c21-964a-61e253fd3705.json | 105 - .../ea13ae62-d050-4cc4-9cbe-99eedfc206e2.json | 132 + .../1e697620-36a7-459c-b88c-405febb57c3a.json | 132 + .../a56c62cc-c318-4de4-b6c7-0fa10229a127.json | 105 - .../532723e8-a9b7-4f72-a015-c2bd9363b5d8.json | 132 + .../bcab8546-ea69-4207-b69b-ab982b603e55.json | 105 - .../6097086b-8c8b-493e-af1a-71146a2ed566.json | 105 - .../be096a57-7d81-4999-919a-ed8a243012b2.json | 132 + .../7166192e-42b0-4990-8218-88bb38fd1bdb.json | 105 - .../cadeb016-e158-4a49-921c-efe0e4eb0cb2.json | 132 + .../3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json | 105 - .../c606d7b9-3ea3-49d4-9ecc-9610ed4b4eac.json | 132 + .../04a5eed3-7eea-4d9f-acc6-5a96ec987e2b.json | 132 + .../19490f78-486d-4325-b31e-af8555c32ea9.json | 105 - .../61e7c49e-abb9-4e38-ba3f-1018db104d83.json | 105 - .../a1c60d74-dabe-423d-9e40-3dd8112d7d8e.json | 132 + .../243e6b7b-a34f-44cd-b027-176f877ff8e7.json | 105 - .../29c7bc9b-6833-497b-a553-2941026efea5.json | 132 + .../09a60955-978e-4136-bdde-d5459e37ad2c.json | 132 + .../85ba493b-05f1-4853-a0ff-44570a7c2a82.json | 105 - .../501744a2-070a-4378-9232-f7ccd9b2a67e.json | 132 + .../56837896-11a6-458b-a17e-9540ab5ae66a.json | 105 - .../369efdc6-6529-477c-b5f0-d229c8102491.json | 132 + .../db0c4182-7391-40e7-ad6e-5374c8eb28e1.json | 105 - .../265e3cbb-484f-4cf7-8994-050f414ecf37.json | 105 - .../906645f3-2041-4380-8118-ac26b92297ba.json | 132 + .../472b725a-2bd5-440a-9768-ba8db6fe6b34.json | 105 - .../57fe8deb-02dc-43a8-8a92-14bdaf61dd67.json | 132 + .../60c18178-ff40-4e9d-9683-077cc2fa254e.json | 105 - .../95f2fa22-3da9-4876-ace3-50763f2b2453.json | 132 + .../6b3c3872-cd4d-4827-8651-6baa9d2423e7.json | 105 - .../b2f9e38f-c2a1-4e5f-a7ce-4e33a05b503b.json | 132 + .../9f30c4d4-4a3c-459e-8444-e143ef75f84e.json | 105 - .../b3173a2a-8309-498d-961b-0167d5d5dea6.json | 132 + .../0d59dd75-c999-4a7e-919a-fd084202fc9c.json | 132 + .../80bbd567-b13e-4ed4-ba85-9098639a3642.json | 105 - .../639e91d9-ebbf-4ba2-bce3-6953e7c91e32.json | 132 + .../e574e35a-56cb-471d-b4f1-df0858f5ce66.json | 105 - .../56a5fb9b-a4b7-4290-9ec9-6864b3efaa82.json | 132 + .../ec314c97-9bc0-4e14-9d57-d6204e699428.json | 105 - .../980887dd-2948-4e5f-b22c-3cc03057f493.json | 105 - .../d03fb481-be0b-4dfb-bb4d-54067e058e99.json | 132 + .../6d544c96-53c9-43d1-9cb1-6077d7235fff.json | 105 - .../d8fc3475-83e9-4790-a472-72b442087562.json | 132 + .../57efd335-4873-4e01-bfc3-0d704b3d482a.json | 132 + .../fd3c9666-09bf-4562-b49d-eea905469761.json | 105 - .../25fdcc8a-0e7d-4148-8508-2631ea6deb05.json | 132 + .../978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json | 105 - .../9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json | 105 - .../f5f63d06-7e51-4b91-8814-ecbda604fe6b.json | 132 + .../5326c33b-6b8a-472a-9058-a9e9fe83b599.json | 132 + .../7c388cc5-fb2f-48ba-967c-a931fcb25a42.json | 105 - .../28674053-e1b6-4f0a-a90e-5dd5082ec164.json | 132 + .../348c8f2b-807f-464b-832e-0049f8329b86.json | 105 - .../fab7388c-87ed-4108-ba4d-e1621925f264.json | 105 - .../fd27bfa7-11b3-46d3-915c-373ddf5a9865.json | 132 + .../91f190ba-39c8-47af-8351-73d1f382dd99.json | 132 + .../a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json | 105 - .../974b1542-8716-4ea3-b097-f9893c9c9656.json | 105 - .../b637b55c-dd05-4060-bf33-e63e9de7fac9.json | 132 + .../bcacef79-d7c0-46e7-9194-43541c2f01fc.json | 132 + .../e8dfd77c-e2c8-42ef-b341-5476411d038d.json | 105 - .../16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json | 105 - .../77a358c7-59fa-4b22-a190-dfca86c5166b.json | 132 + .../00b1b367-c4eb-4048-b80d-a8253e7c2048.json | 105 - .../ad4c8922-7079-4383-8f42-d3de6326a1e1.json | 132 + .../7f89eded-e5fc-4b3b-9afd-dcd71b7b44d5.json | 132 + .../85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json | 105 - .../07cb94ab-0aea-4ce2-89b0-4378cb892c7e.json | 132 + .../c9df2e30-5e2d-42cc-8597-dc354602350a.json | 105 - .../5fb04756-c7bb-4772-b209-0d9a300bbf7d.json | 132 + .../90830134-43d5-4d0c-9a93-4be2c1c7dba8.json | 105 - .../0c02d1b6-2d31-4c54-b881-588cbfb0c686.json | 132 + .../b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json | 105 - .../a32e4d22-8096-4537-a68a-98ff9171ac8c.json | 132 + .../f6dced28-f64c-4995-88b1-ac9a82903de2.json | 105 - .../4e45b666-fa7e-4a38-8b6b-65846876c8d9.json | 132 + .../956d92e9-51fb-4770-8687-6003f9594345.json | 105 - .../5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json | 105 - .../d9cb1d13-2af5-4385-aa78-5c053e00e6c6.json | 132 + .../44d85302-1af8-48ef-aebe-a9512c5bc387.json | 105 - .../6afaec07-ebb8-4f3f-af48-c679f38f4917.json | 132 + .../a05ce252-928c-4482-95f7-f4c0fc2c7c10.json | 105 - .../bf8370c9-baed-4034-ac38-c6f796baca15.json | 132 + .../23127691-ff90-433f-97d2-322e1191d821.json | 105 - .../d397c078-6fe3-44a8-859c-a0f7c551dc3a.json | 132 + .../91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json | 105 - .../ed61cd6a-bbf0-45f2-9536-a7a262d5d6fb.json | 132 + .../6be795f4-0784-44bf-8926-e3060ec37dcf.json | 132 + .../c63fc798-cf74-4767-ba95-6353b6761bcc.json | 105 - .../5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json | 105 - .../d4d808f5-3b79-43b5-8076-d3f785083789.json | 132 + .../370f5923-91d7-40d2-bd06-bf2b657b8ef2.json | 132 + .../a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json | 105 - .../4dd614dc-b68b-456c-ac55-f2221a479caa.json | 105 - .../5334e5e4-d243-4c20-912c-d0ded74d6ea5.json | 132 + .../2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json | 105 - .../7306f2cd-4fd2-4dd4-b06b-8c9aa558388b.json | 132 + .../68cc19eb-423b-4d6d-a3bf-eac6f666bc4b.json | 132 + .../e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json | 105 - .../59aa26a8-93b3-43fc-8c38-ef67cd8efd80.json | 132 + .../dad898e1-ee18-4864-b432-462d17ac8006.json | 105 - .../1373c279-13b7-46d3-94a4-7b47c9319f88.json | 105 - .../220cd306-0613-4c8f-9848-4af812a1d37f.json | 132 + .../22c3022f-d538-4a4d-8d4b-05e915506451.json | 105 - .../39a6a40c-3fa0-41ba-9d13-da9381263d4a.json | 132 + .../4d037b71-5d03-41a1-bf23-c0aea0cdcbbb.json | 132 + .../4d1ddf64-4626-4877-a0fa-84e06f6cf977.json | 105 - .../16baf620-7dcc-49f3-a787-b431e11ad4f6.json | 132 + .../2014c198-5e12-41ef-8f65-7321d0423573.json | 105 - .../4745add2-7bcb-4c05-8b12-6bd30856890b.json | 132 + .../725e5a72-548f-46d0-b268-12209e5cb085.json | 105 - .../8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json | 105 - .../f68b122d-4dec-4d5c-ac22-198da3d3e96b.json | 132 + .../2e20f780-ceab-4d1d-a1ab-35f4f0ac44aa.json | 132 + .../6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json | 105 - .../67f972e1-4ebd-4b78-b740-fdc03ac88aac.json | 105 - .../f21bcd75-fc9f-4266-8976-3227b18b6b32.json | 132 + .../7c1a81ec-1cb7-4858-8f1f-23b3ee49b73f.json | 132 + .../d461545f-ebcb-49e2-94ce-a6591e31a94a.json | 105 - .../1cbfd1ad-237d-4cd3-8b5d-3135c194fcc0.json | 132 + .../267e641c-7fbd-40d3-a9b7-eb3621240b2a.json | 105 - .../0bd6a333-afc0-43a4-9d14-fa44c2364184.json | 105 - .../ef5c1813-a74d-4b3d-9911-c27a46c1c84e.json | 132 + .../38a5c599-a098-42f4-a7cb-acee487e382a.json | 105 - .../df50857d-c90e-4ec8-a9b6-96a6d2f894b1.json | 132 + .../774d54fb-a445-4ed9-b79a-9c1346537e98.json | 132 + .../cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json | 105 - .../41b46842-dffa-4791-8225-99d676f563c9.json | 105 - .../420b8be3-3560-48e8-8ab3-bb55338a9069.json | 132 + .../6b63598f-4891-4b71-99ca-bc56b780d829.json | 105 - .../c118b75c-597f-48a7-a4eb-675af72c9930.json | 132 + .../e75534d3-b994-4e88-9274-7b62f61916cf.json | 132 + .../ea79ca75-c55b-457a-b952-528a22567dbb.json | 105 - .../770a1ff1-057f-49a7-9402-c6dd881ac03d.json | 132 + .../f7d63a4b-070d-4581-acce-cd356a3dea47.json | 105 - .../6cc9790d-9b02-437e-8ac7-be4152f5b17d.json | 132 + .../85502cb7-db11-43ce-a3cf-f9329ecec2e1.json | 105 - .../264f5b42-a3ac-4af1-8145-c5763b8e7fa6.json | 132 + .../5f36e182-fa70-41d9-9cc6-12367035fc76.json | 105 - .../549db368-437a-4982-ba5b-5c4d7bf203ae.json | 132 + .../79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json | 105 - .../0091eabc-3888-4e1a-a29d-8c4e98b599f2.json | 105 - .../0d098a19-7e8f-4a52-8466-729be91388d8.json | 132 + .../1a1eaa84-9926-4c4b-b254-96cd667c25ac.json | 105 - .../83335f65-25a4-4bec-a901-587567ed0e99.json | 132 + .../02fb24c3-927f-4c21-bd47-b883521162a3.json | 132 + .../59703023-61e1-4df0-8542-703d5a318756.json | 105 - .../2a6507c7-44c1-4416-9ff1-36abd6af3b73.json | 132 + .../fea528ae-4015-4adf-bce0-f9775554cc5f.json | 105 - .../1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json | 105 - .../327a146a-8cfd-4480-8342-46afde530677.json | 132 + .../0700fb7a-e722-432f-a64d-c040bba4deee.json | 132 + .../1430e550-80ca-4f84-952f-b5b10fbca711.json | 105 - .../131d3a7e-43dd-4189-8466-6562703b3bdd.json | 132 + .../c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json | 105 - .../8f6d7008-b8de-4a76-94aa-bbecc93ef3f7.json | 132 + .../f4b76351-e472-47a9-8011-6bf2e7e33a71.json | 105 - .../03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json | 105 - .../aadb0ce5-a1aa-4b0d-bec4-8bb0e8e54a1d.json | 132 + .../a73250f1-399a-4afa-bf83-4036dce78ef3.json | 132 + .../fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json | 105 - .../1095577f-7b50-4854-9c7c-5beb59206e60.json | 105 - .../f68bf680-9626-4952-b95e-12a18fd60820.json | 132 + .../2597a3df-0f30-43d1-b1b3-7a0baac07675.json | 105 - .../d6a78a5c-4a2e-4370-88f2-d8627a94f1ea.json | 132 + .../050afa51-be7c-4cad-ae8b-bd63384df297.json | 105 - .../7b5eab2e-fba3-47d5-9839-02249c2568c5.json | 132 + .../0d7698b6-de52-4781-831f-a3ca8b23dd72.json | 105 - .../2acee2c3-4322-4152-8151-c1d571475b7c.json | 132 + .../67ffb2de-0410-44a2-aad7-4a32e2c49c7d.json | 132 + .../cf85253f-0ecd-4943-a508-eab1e562a497.json | 105 - .../2923aeb3-982f-400d-9588-707583c75a1d.json | 132 + .../e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json | 105 - .../0e1cd676-f95b-4562-8c5d-e932f148dc23.json | 105 - .../b6a622da-5ce8-4ea5-a82a-f3a2a299ddf2.json | 132 + .../3a8a175f-5173-491b-9acf-87fe781f16df.json | 105 - .../7b06ac17-bfc6-43d5-99e6-d2b7a31290fb.json | 132 + .../b1f4196a-0050-4107-a97b-4e1bd6ece17b.json | 105 - .../fd481b93-55b2-4831-9be9-1b1b2886fda3.json | 132 + .../03761253-711d-428d-a3bd-89974a50b490.json | 105 - .../f159748f-234e-4962-b582-cd5805448f33.json | 132 + .../044d53dd-d134-4959-a70c-46f11cc0b300.json | 132 + .../e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json | 105 - .../0755b7f9-bdd7-4e2a-92da-6650934db265.json | 105 - .../f05501fd-7c06-46d5-bc20-a9d0cc5c2e0f.json | 132 + .../2bcc7f9a-9c36-487e-8522-bfbe1910b857.json | 105 - .../5c44a2f2-23e3-4c9f-9b7c-9012ca8b15e9.json | 132 + .../80e5134b-0733-41cc-8b4f-ef32fbe57066.json | 132 + .../98f97092-7c95-46dd-94c7-4030f153d197.json | 105 - .../61123e41-7b2a-40da-9f7f-b830c27d7f12.json | 132 + .../e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json | 105 - .../98772920-a700-4fda-88fd-53c16ac4b1a1.json | 105 - .../b93c31d7-54c3-47b9-a267-3f8fdb796805.json | 132 + .../b3eaa4c5-7abc-4e2d-9c11-c70ecb8a843b.json | 132 + .../d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json | 105 - .../3b06f75e-3d22-4428-8d4f-2e704b96961e.json | 132 + .../f197c7ce-c30a-49ad-bd6c-9571d3b25637.json | 105 - .../80c756a7-9d47-4b49-bf42-bbada0909163.json | 105 - .../dfda4aab-f8d4-49ee-b141-78539b69007c.json | 132 + .../690f3c19-c148-458d-b4c5-87761d72b851.json | 132 + .../c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json | 105 - .../3f3eeca1-d401-436e-b7e6-5fa82c099270.json | 105 - .../b6a18246-776d-463f-80d5-140df74e9704.json | 132 + .../9831abdc-ad08-48c0-8384-86240e7350b5.json | 132 + .../ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json | 105 - .../6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json | 105 - .../96a572e5-4751-46ce-9202-deb223ef4dfe.json | 132 + .../f4320b1e-ea4f-4aea-8dab-cdb221ce53e5.json | 132 + .../8376c0bf-f9c3-4529-b13c-c57106182d15.json | 132 + .../97a80145-e621-4603-8ff8-2cc4bd74190a.json | 132 + .../f8461982-37ad-4975-8445-996bdc9e59ce.json | 105 - .../739c83a9-8ff7-48df-af0c-494891df487b.json | 105 - .../99a7881c-cca0-43d6-96f5-ce5292ed60a0.json | 132 + .../60ca8f7e-1c20-4adb-bb84-892bad3c0d63.json | 132 + .../cc65b968-d766-4825-85cd-c36872eb1986.json | 105 - .../4a0f8dc7-9446-4dda-bf49-8cca4851746c.json | 132 + .../70097d1f-8c48-49ab-b285-eebe2c85628e.json | 105 - .../6eb3a040-8234-4d31-8274-6987b0e4e3b4.json | 132 + .../e108df0b-a1ce-4c07-b683-6d3b33fd3988.json | 105 - .../16053077-38fd-4136-81a5-fea0d4cd927a.json | 132 + .../77bd2442-4004-48cb-ba45-eeb1ffec2a39.json | 105 - .../25abb99f-536e-4638-8611-a1db5dee931d.json | 132 + .../70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json | 105 - .../aaf0e5bd-b033-455e-bb23-b12b6f7c4520.json | 132 + .../b3a46478-c5f4-4c74-9bf0-d1ba616ae24c.json | 132 + .../169fb05f-5201-47b8-a06e-7d01e574c689.json | 132 + .../db076309-32e5-4d46-9786-ff14f8daf5d2.json | 132 + .../cde914dc-7d57-425f-9787-e4b8d36d61cf.json | 132 + .../5d793ce3-a7fd-4ee3-b32c-c9da63ec0566.json | 132 + .../8c645c9f-02f6-44a5-b295-d6364ed49464.json | 132 + .../97bb5519-e2d3-44d5-abf4-b5263c2b3245.json | 132 + .../bd3d78d3-3ff1-4a92-a316-e4e30787a331.json | 132 + .../d8951ed7-f4ef-49ce-891e-8d8509e9cf93.json | 132 + .../e1772d6c-fd26-43a7-82b3-7997d8a6809f.json | 132 + .../febaf893-6aaf-4c87-89fc-cc865ebf2859.json | 132 + .../0ad591f4-c846-4fd1-8536-a169e0a7e4ab.json | 132 + .../0a318ebd-7bbb-456b-a6e4-9b480a858b5e.json | 132 + .../e1cfdc32-3c5e-4f4b-a205-f416c96cf5e6.json | 132 + .../85426280-8138-46d0-a111-b59b0d7c86c8.json | 132 + .../32bbd26e-05e7-4a0f-a491-8f54cea9f3d3.json | 132 + .../86ed6833-ae85-4a8e-b840-b0c9540083ce.json | 132 + .../2f751ac3-5ca5-4d0d-9ad4-48155e51468a.json | 132 + .../9677e68d-afda-4917-825c-83318219ff59.json | 132 + .../23cd57c2-bf7f-440a-ab3e-edfdede5e8cd.json | 132 + .../bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json | 105 - .../bec23315-f98a-4211-81a0-c49f395e66c9.json | 132 + .../1ac5faef-7fa0-4b58-a6ba-0c444a2023a8.json | 132 + .../39327803-11e7-4b28-8750-81feb027e8f3.json | 132 + .../f521cb33-487e-4636-9039-fe1af3e090f2.json | 105 - .../386bc585-73ed-443e-b8ce-8723c533e41b.json | 105 - .../ce2b6874-0fc8-4364-a526-7b25b101e1e3.json | 132 + .../09585af5-dd80-4418-8f58-c6ae718a1eee.json | 105 - .../9f9ebc90-31f9-45c1-b9c2-07b727b12f3d.json | 132 + .../d189a2fc-71f5-4bc9-a0b1-7e744a19921f.json | 132 + .../f7207c82-5fc7-447a-b532-42bdb77ecfb4.json | 105 - .../1eb697fe-9dd4-4a41-aa47-33456df39e2d.json | 132 + .../592dcd83-1adb-4193-add2-fb0ae66ea7ee.json | 105 - .../2c82f973-c6cb-4aa2-9121-51bb0343aae4.json | 105 - .../5f10df7b-cd2c-44ca-b13a-2852483c71f8.json | 132 + .../3abbb4b6-8050-44fd-b066-0f061ce2f4d7.json | 132 + .../42b63cfd-3b06-4363-bf78-40c40da10299.json | 105 - .../5f47e65d-293f-469e-a18f-5627ca1adf44.json | 132 + .../cd4acb74-9433-435c-b0e9-9750fa52e3c0.json | 105 - .../b753c1aa-8a0c-4600-99ec-8eb51ab50da7.json | 132 + .../e9a9ec78-4ada-4ce4-ad92-c27332279f84.json | 105 - .../15c21655-9af8-4bee-9884-b047683e9adf.json | 132 + .../b4e42076-bbff-4179-897d-b45a0e959020.json | 105 - .../4017ff46-f389-4024-be9c-4360b0b6e64c.json | 105 - .../f642de95-218a-4db0-807f-1bb97618b4f6.json | 132 + .../01443b06-9ad3-41f5-ae0d-bc84086e0a0d.json | 132 + .../6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json | 105 - .../1ee8c377-2236-4225-942f-ef8ce5770741.json | 132 + .../872cddea-7a06-4b80-9243-423bf49c222c.json | 105 - .../4ee9aa78-d9eb-4a1c-91c4-f29f093b95d3.json | 132 + .../9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json | 105 - .../184a8906-d998-4e03-bf6f-f66ca904a7b7.json | 105 - .../419c6631-805f-43ba-9db8-5296f8d221ec.json | 132 + .../11f14586-5f0c-4e0b-b41e-f3e0f298b781.json | 105 - .../3fc1822f-4a43-4a3b-90d7-fc163491c90a.json | 132 + .../3b9966ca-8157-4f32-b276-9d36dd1045e1.json | 105 - .../76b4037b-c5d0-435f-966a-bd88b1665dad.json | 132 + .../414c1eec-86bc-4d86-a014-2ea586eebfb1.json | 105 - .../757b85e7-84c8-429f-aeb4-870852fa8959.json | 132 + .../acab4982-1205-4362-803e-306b1e2371bf.json | 132 + .../c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json | 105 - .../0e549b5d-c1d9-443d-9a80-8dd34dadd22e.json | 132 + .../4532b233-abbc-4fbd-ba77-801eb1398361.json | 105 - .../d3d4eccc-8792-40e5-91cf-22885f4cbaf5.json | 132 + .../d5916658-91c3-418f-9cd6-c49dcc8927a3.json | 105 - .../708aded5-6252-44e3-bf0d-08bf3e7f32e0.json | 132 + .../a29cab83-e937-4a2a-a9fd-986fd1c67e03.json | 105 - .../2aae97a9-6d0a-438d-9f74-e7a30e85face.json | 105 - .../ce6d31f2-f38e-4af3-85a3-d2f6c80f71f1.json | 132 + .../060fe548-f690-4492-9c0f-ada0210b0386.json | 105 - .../5efcc291-ca9a-4ca9-b2ed-dab37dce5f5a.json | 132 + .../47320824-8064-40d4-a08c-810faafbba77.json | 132 + .../7d709f22-c4e8-4903-b924-a86728dcf26b.json | 105 - .../8baeef58-0ba6-4723-8f23-7a4c386f2cad.json | 132 + .../c45c03dd-efbe-4c86-a07d-e7831210e017.json | 105 - .../0387ca63-1e31-4eaa-ac7c-35d417548c54.json | 132 + .../3b51b346-a23c-4add-9623-86c9591eddd0.json | 105 - .../35557106-88b1-4f6a-bf33-17ea6744f208.json | 105 - .../733983fe-4b9c-47e6-963d-c57829b6f1af.json | 132 + .../80c4859d-8016-4650-939f-100ba2e6d808.json | 132 + .../89b55a5a-8f83-4a87-906a-32c1e84b8220.json | 105 - .../21724d3a-cc6c-43eb-9d69-46d8d91c97f8.json | 132 + .../ec8e412e-96e8-43ae-98e1-f605228f3f6d.json | 105 - .../29b19ca6-ec5f-4ef1-9721-cb2199661873.json | 105 - .../d781945e-e9df-4136-90cd-632f0bed6246.json | 132 + .../12f38eb7-57be-45c6-a53a-9d4859413e94.json | 105 - .../8f146bb5-dd4d-49ce-ac60-76f66321feb8.json | 132 + .../22ae576f-6bec-450f-812f-4315779be0a1.json | 105 - .../89bfba6d-c622-445e-b0b9-512aadcea7cf.json | 132 + .../76c364c1-1e67-4536-8f23-85f84f0cd554.json | 105 - .../9c27f2e6-ebbe-4fac-bc51-74455d3a6512.json | 132 + .../455ef1e0-bdf2-49bf-a53d-2c9e3d00d5f3.json | 132 + .../fe0cfe19-b019-459e-a71d-46d55612a95e.json | 105 - .../aae9e150-7992-4241-91af-0c55d03d709f.json | 105 - .../e04a76a6-ac22-43b2-bbf9-196a08de2949.json | 132 + .../2fcb74f0-add1-4d46-8a0f-8578a616dbed.json | 132 + .../b56c681a-592f-491a-aa0a-030848356563.json | 105 - .../51530638-ef76-43ce-9396-8a0d07988712.json | 132 + .../9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json | 105 - .../74d99e4d-0e6f-4804-aa52-0dc76d37fac3.json | 132 + .../c68ca8a7-07d8-4295-a535-a573fc3893b7.json | 105 - .../80e8b9f0-b507-4927-9d24-1c793e3783cc.json | 132 + .../ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json | 105 - .../7b037520-a5e9-4b58-80f3-f0ecc5957c67.json | 132 + .../85b8aede-7eb3-4997-9529-2f7d4603fb9e.json | 105 - .../10b88d05-62d2-4603-9d04-b0854e39ed40.json | 132 + .../6837502d-0f08-48d8-b85e-70f3e07a2531.json | 105 - .../4b693f41-d811-4b64-892c-d840eee5ace4.json | 132 + .../f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json | 105 - .../08a646ba-9b4a-483e-8adf-f4e203a9be5d.json | 105 - .../90d86c8c-3aa6-42ba-a94f-75c961e65c41.json | 132 + .../7f969b69-cb14-4291-a15f-60f2b56e23ad.json | 105 - .../8318ae52-6ae3-45ce-82db-73f8cb5ad7c7.json | 132 + .../b20a1d13-2f14-42e4-bdde-49f053cef325.json | 132 + .../f34988e6-20f5-4d77-9233-70d5bc6193fb.json | 105 - .../51521dfb-d4b5-45df-ac2a-54190aed0b9f.json | 132 + .../8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json | 105 - .../643a510c-b9f4-4222-a1b0-09d7d5434de8.json | 105 - .../997a1ceb-185a-4e6c-8383-eb5a6f976771.json | 132 + .../22101998-c3d3-414f-9ed1-99330cdbe3b2.json | 132 + .../ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json | 105 - .../a2408953-a7eb-449c-b80c-3620915d44d0.json | 132 + .../f28fc4d7-d3eb-4915-967a-db97667e85bc.json | 105 - .../03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json | 105 - .../d65e5b08-7d3c-4c0d-85fa-496db65a235c.json | 132 + .../4c7575d2-d538-4767-8d7e-d905b11f84f9.json | 105 - .../ce2c9614-46d2-481d-ac25-3cc71a93bd5e.json | 132 + .../4148a653-5fda-41c2-bf7e-1c03d385b7a1.json | 105 - .../e9ba998d-8147-4046-afae-9ee7d544e98d.json | 132 + .../8143abf5-bd1d-4cdd-b555-5135f04945c3.json | 105 - .../c44f1012-1123-42c8-b110-5735dc756fd5.json | 132 + .../5088f6a6-2acf-4d10-8b78-0d5bd4126ab5.json | 132 + .../a452af19-e167-45ca-99d2-5def2e4ad774.json | 105 - .../3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json | 105 - .../b4d96088-5cc0-4ebc-8b8b-8c7e9f90420b.json | 132 + .../2a78f22b-d898-4f92-a2a5-c2930c16916c.json | 105 - .../529dba11-53af-4045-ae46-04e1b9838d4a.json | 132 + .../391f6d6c-418f-44be-910a-fb90b5712649.json | 132 + .../46f57920-759b-4d1a-b2f5-fe66aa740170.json | 105 - .../2ccccb4b-7260-4a1a-9426-117e359c7c5c.json | 132 + .../10be7d08-18a9-43a6-80ea-81d704600eab.json | 105 - .../63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json | 105 - .../84afecec-453d-491c-9f5a-de31d8fba43e.json | 132 + .../dba3a3a4-cd23-44c9-823f-0bd88cf6465b.json | 132 + .../1179bcce-558e-40ad-8537-c74c59557975.json | 132 + .../6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json | 105 - .../ae2afa83-4607-43ea-be11-86cc57f3b848.json | 105 - .../fe0a5c17-6c8d-4f06-a58e-47648ef9ecec.json | 132 + .../81cf8cbd-33bc-44ab-930a-65242e1ae7b2.json | 132 + .../173bb053-e817-4551-b169-c3f71163650a.json | 132 + .../25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json | 105 - .../b7e6a86f-340c-48ed-a828-2e80a13aa515.json | 132 + .../bd221eee-7aa8-4d6f-a6be-89ee5568e729.json | 132 + .../8727a325-a515-4456-ba34-65c30f84644a.json | 132 + .../9ef977af-b10c-4434-bf4c-9783903e75a9.json | 105 - .../3e4011fa-d480-4c16-9371-2025bc834358.json | 132 + .../867499a7-589b-4564-b04d-a004b7c0abb4.json | 132 + .../52f1fb51-fc7e-4cc2-918a-7c7226ae2ce5.json | 132 + .../5f4a8fb6-b22d-4eb2-aaef-da05ca45fbeb.json | 132 + .../3278855d-7bd1-4e7e-b27b-b1393006e7e7.json | 132 + .../5193ab4d-1627-43b5-bfb7-89e08ea1f810.json | 132 + .../598faeda-48fb-43a8-aaa9-849d5dfcea79.json | 132 + .../326fc05a-78e9-4e36-933c-aa0219661e0d.json | 105 - .../d1afa2fb-1256-4dd3-b13b-802917bf481b.json | 132 + .../397c9bc3-0af5-453c-9b68-5360783dfbf7.json | 132 + .../6cb03909-9850-4519-9e67-f2d875652e02.json | 105 - .../9bb39652-c79a-42bf-b6d8-c4ed6174a4c7.json | 132 + .../51a11592-e099-4059-9e97-f8924e1c2437.json | 105 - .../7e793244-b746-4aa4-a401-dcf5884f61a4.json | 132 + .../26a8da03-debd-41e3-8ee1-2827d76b26ca.json | 132 + .../e214c326-dd84-4915-bba1-faaafbb026b2.json | 132 + .../98a5ea0a-6e45-48f8-8219-32099b9fa9d0.json | 132 + .../af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json | 105 - .../40d7d17d-2d41-4d23-83c1-ab5f3320e36e.json | 132 + .../6619dec7-71cf-4be6-90e2-815e8dd4e56f.json | 105 - .../ce4ddb86-646e-4c59-8a03-3687dbb77021.json | 105 - .../d881a83a-9ba8-4919-8b89-45f5a7220621.json | 132 + .../08efd69e-6ff6-48a1-b260-ddbb4a942d12.json | 105 - .../d6c966a1-7927-424a-9886-b98688d27e6f.json | 132 + .../737cda34-7dea-4c68-b6a3-5b10066f9241.json | 105 - .../c09fe163-a7f7-4b6b-b407-ee8d698b2ee8.json | 132 + .../b3979c7f-0596-4a24-b264-73a17ba19821.json | 132 + .../012fb237-8082-40d9-882e-0dd7bc9c74cb.json | 105 - .../f6156893-92e7-4c4f-bff4-8b6d774ecbd8.json | 132 + .../869daca0-a700-464d-a551-290ed454421e.json | 105 - .../8b1c19e0-8b47-46ae-8bf3-f84c7d3a9c0e.json | 132 + .../6221102e-4e8c-46dd-8c03-fa9e92b7e4ea.json | 132 + .../8822f27f-90ec-41a8-b71a-611f7c5ad590.json | 105 - .../329e5e91-10ba-4795-ae86-dda95e698b4f.json | 132 + .../fa3c7a13-b37e-40b3-b814-b1ae421081ba.json | 105 - .../2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json | 105 - .../3fe89b13-135d-4790-871d-74e7a28ea2e9.json | 132 + .../4b807741-f1b9-4964-9bc9-bb93f9b34217.json | 132 + .../85ac95fd-cb36-4158-818d-69c45f83dae9.json | 105 - .../c52a8a4d-be91-4a0d-8cd5-8473a42f0978.json | 132 + .../fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json | 105 - .../f6e157c4-0ce9-41c9-b885-9222d894ff0c.json | 132 + .../36137543-78a7-42a6-ad41-a4121797eec4.json | 105 - .../fe52a94a-5324-4b59-accc-dfd1f9d4aead.json | 132 + .../1241f5e3-54eb-429e-b109-a5e163e39eda.json | 132 + .../89742249-c51e-48e9-8bf1-7aad55e222c1.json | 105 - .../8ccc7c8c-1d14-45bb-9a6b-f8f69e506139.json | 132 + .../c8a287fc-db9e-4088-aafe-0562aa305011.json | 105 - .../5531b59e-24c0-41af-ab6b-d6a5e38b0a98.json | 132 + .../63e82cb3-2f6f-4617-abb7-ae093bc27830.json | 132 + .../0feb74e6-40d4-472d-9233-27faa2d3f802.json | 132 + .../e74dd005-c9b5-45c9-b7f5-455c3110e09b.json | 132 + .../d094bf6f-9952-45c7-995e-d7eda07f4668.json | 132 + .../0e5f3393-8a6a-4f2f-948a-a37ae4d8fdeb.json | 132 + .../f91982ac-0cab-415a-8503-e090d195bd05.json | 132 + .../fb1af66e-7828-495b-8277-5cff77c3070e.json | 132 + .../ac84c157-4d11-43c1-8731-b1e5cfa91668.json | 132 + .../bbc812dd-9a9c-4f99-b813-50361025eea3.json | 132 + .../fc818799-49d5-4fca-b131-ebe8d5d831f1.json | 132 + .../33349989-8573-4d71-ae0f-99691fdaffc3.json | 132 + .../91551de5-d8ac-4c0d-b9b4-3627db947f0e.json | 132 + .../c2d2c1f4-aaab-45f1-b3f6-5b4ea56b696e.json | 132 + .../36821a8b-af18-4631-b4b0-7e4b37bb194b.json | 132 + .../e402d129-f4f1-4b95-b079-4f30936119aa.json | 132 + .../814e1ea7-a639-4b05-9208-0bf537ea5479.json | 132 + .../35a50d36-31d0-454b-a13c-80ca26945f94.json | 132 + .../b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json | 105 - .../7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json | 105 - .../87347017-4ff1-4bd3-a1d7-8f3999061209.json | 132 + .../317a27cd-9458-4157-a304-0c1e3739d0fb.json | 105 - .../976184ed-c4ed-4898-83c7-521a8a8309ac.json | 132 + .../6fb1242d-bf20-43e6-acfe-77a88c020eee.json | 105 - .../fa52f072-7725-4a4e-b728-042e5897a1bd.json | 132 + .../6374dcee-301c-4f28-9316-82ed8e693089.json | 132 + .../cb14b942-7c2f-489f-bede-d25279ea39ac.json | 105 - .../b7c95cb4-f32f-466e-a28c-32afd9ec5578.json | 132 + .../c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json | 105 - .../653cb458-4616-4325-b377-a79ee4a5d9c6.json | 105 - .../bddd742b-f7c9-44aa-ad2f-83f51a4625be.json | 132 + .../099af0ee-c06b-4435-8f97-27681f3eddff.json | 132 + .../6f16b360-346a-4299-8f60-fafc0bb8ebcd.json | 105 - .../fa826f3a-8688-4518-8d44-68189abb47ba.json | 132 + .../10d29dc0-3486-40df-9933-1ce8f0fabaa2.json | 132 + .../741ff375-3392-461e-a9b0-e0dab4e6e9f8.json | 132 + .../c3d709de-118d-40c2-ab89-040efedd7fdb.json | 132 + .../9be3dd27-93fa-49e9-a628-5a77a8a3bb9a.json | 132 + .../0f29b1ac-1943-463a-8a79-a4c0ace371cb.json | 105 - .../be850d1b-bf75-4c34-830f-8881792ac842.json | 132 + .../6b644b97-4fc3-4826-9ea9-68be1dc8e947.json | 132 + .../861d41f1-6d33-4e07-96ea-2c39a36c4b63.json | 132 + .../7501b038-4847-45bc-8b92-6800d7a58c1e.json | 132 + .../db48206d-700b-45f3-b597-8752110113b5.json | 132 + .../b52b76e4-9dec-4336-88b1-d98b95b95d2a.json | 132 + .../ba9ec2ea-2bce-4999-9e48-e1d0795b31d0.json | 132 + .../724221ce-d7b2-43cb-8e16-72ac529a7b60.json | 132 + .../552f3814-d071-4d00-a895-b739dffdcb2d.json | 132 + .../d3819133-bae8-493d-9a86-aee67da5d115.json | 132 + .../5c3a022f-7221-4b4f-ab67-d5b69c558434.json | 132 + .../c161b868-746f-4d88-9f41-eb8283a7b87a.json | 132 + .../f79a76fc-09ff-48c8-b0e7-5f18e0750e6d.json | 132 + .../334bc38a-becd-405b-8982-dfaf5de35c4b.json | 105 - .../39f4d1ab-fd42-4746-b949-9666ce32f9d1.json | 132 + .../8348f316-9109-4229-9fee-edc02431befa.json | 132 + .../eaa1adca-5379-4aab-bf39-8641df58a530.json | 105 - .../6b2346c6-5fbf-4195-b3bb-66bbd446ca53.json | 132 + .../844c959f-6859-4220-bdd8-99e6af53808b.json | 105 - .../2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json | 105 - .../8645ffc1-6487-4205-b8b0-e980e094ac6c.json | 132 + .../183cd87c-2415-4428-9ad1-9d41c0dcdc41.json | 105 - .../2c6d1e57-7673-4a86-808e-6ff6a7146a11.json | 132 + .../5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json | 105 - .../64ab8b1a-62be-4561-8f0c-e42f1fe37178.json | 132 + .../3eb22885-eb7c-4c85-b79f-cd47ffacd551.json | 132 + .../b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json | 105 - .../26c5c07e-8482-44b4-8f11-a602e79fb730.json | 105 - .../8956d608-c627-469b-943d-bfad6c7382af.json | 132 + .../81d006e2-3be1-4941-bf85-74f1b313c7d7.json | 105 - .../9ff060c8-d4fa-4880-a0cd-9581f5c2f574.json | 132 + .../e3d6b3d7-a231-40c1-bac9-0b7fcb478bca.json | 132 + .../f453cb41-346c-48b4-a660-64f13ec69fe4.json | 105 - .../20acb302-3a74-4425-af4c-a1d719b90a88.json | 132 + .../869339ec-939c-4222-b178-533c3ca5b0d1.json | 105 - .../4c3005e9-fffd-491b-8ce1-58204986b787.json | 105 - .../a8613588-687d-4291-ae5a-57688501cffd.json | 132 + .../83dd67cb-5508-4aa5-9435-d5585b7f3d52.json | 132 + .../977449d7-d8f0-4e32-b56c-8950006a09a4.json | 105 - .../03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json | 105 - .../26d981bb-f2e5-4195-8d6f-594bb0b26f4a.json | 132 + .../df06c977-b54c-4668-837f-eb583ef24d29.json | 132 + .../31a8ac03-f58b-46e3-9f17-53311b1fd506.json | 132 + .../3e4a7141-7a82-421a-a107-bbac3cbafc9b.json | 132 + .../e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json | 105 - .../9a3069f2-81ed-484a-b6e6-a45a259e9a43.json | 132 + .../df26db97-8e5e-409e-937d-45951c81a8cd.json | 105 - .../c0a3d0c3-c541-4606-a925-4100b062284f.json | 132 + .../20685a4b-686f-4cd4-b49d-3067a005256d.json | 132 + .../8b330a87-7689-45ae-a005-0349e09f07ac.json | 105 - .../27575e22-2e66-4177-aa8f-ab4ebd4743ea.json | 105 - .../85a91293-cd51-4f79-8b98-2f4bc67d78c1.json | 132 + .../d2e3a6c2-4e67-4150-b9a8-fec979fb1658.json | 132 + .../f4fde074-8a05-42ec-884c-447b4bfaba39.json | 105 - .../c4d686f2-2af1-4271-9556-09380f07ba5f.json | 132 + .../93167303-b38e-43f0-a552-72c26ccb4339.json | 132 + .../b52a176f-f369-4791-a7e3-88a72709c868.json | 132 + .../b6310012-17f1-4ee0-abd0-0079a9299350.json | 132 + .../7aa1c718-9ac6-426b-be50-5c7f37849b90.json | 105 - .../f581e832-0f77-496e-bcd3-6cfec51ef594.json | 132 + .../47b47c89-b13b-4099-98b2-854feae05f63.json | 132 + .../a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json | 105 - .../8d51ae58-7b20-4fa4-b234-2abb9cdeaad4.json | 132 + .../dbadece3-665b-423b-b2d9-e74d7c676133.json | 105 - .../4d4d5679-8ec6-49b8-a5d7-2a76497b44b7.json | 132 + .../0bdb6574-69e2-4858-b7aa-a90a5fadf741.json | 132 + .../fa1a92bb-ad25-4be2-a35f-7fdebbeeeba8.json | 132 + .../d62ea0a1-cc9d-41b7-8d60-479b8e2262b5.json | 132 + .../912446e3-efdf-4ed0-80bd-261c6c87a3d0.json | 132 + .../5e86dc31-ae3e-4ef7-858e-41e29b3a8031.json | 132 + .../80680e5e-ab83-4a59-aeec-9d4166509c47.json | 132 + .../c5bc9c92-8469-4174-aafd-67bb61aaccf2.json | 132 + .../1d67b792-178b-4baa-a108-2362f658bd4e.json | 132 + .../eb0c87b0-4795-4029-82c1-57ce37ba8259.json | 132 + .../dc9b2300-7ab0-4e92-9d23-15fe9ca52994.json | 132 + .../e005624d-c822-4be1-9477-873642aae228.json | 132 + .../4e9eef3d-b851-41de-a3b2-88950f1d426f.json | 105 - .../e9756d91-b9e2-4dd0-bf08-c6154c7d1f2e.json | 132 + .../2a0bcf8c-cf70-4d13-a713-67054bc98412.json | 105 - .../704598c3-c5d6-4ce0-bab3-0fa98118e16a.json | 132 + .../f24a1f02-da21-49f0-91b9-65df4fd770db.json | 105 - .../fafc9463-d725-4827-8bc1-5cd9e83814b6.json | 132 + .../109820e0-ee00-449c-9ae5-58a7bf1da5f8.json | 132 + .../2632f42e-cbe3-4c55-b434-f4a239aeffa4.json | 105 - .../0f7f339a-5523-4551-ba77-4fe34779d017.json | 105 - .../37f29d5b-d803-4195-9ce0-75e45e32c160.json | 132 + .../43546f48-8c46-4481-b1e5-f4b1ad2535be.json | 132 + .../764c4dcb-caea-418c-b206-ee401ea0d979.json | 105 - .../51cd189c-82a8-4475-8df5-9a855394274a.json | 105 - .../ec81e0ff-9cb4-4d43-9f78-1d5f4edc9103.json | 132 + .../02fe0385-223e-4578-b3fb-d6819f783861.json | 105 - .../9290c86f-40b0-4520-b8aa-3460de62c396.json | 132 + .../3ec5106d-86be-48a8-bb3d-6574b6971641.json | 105 - .../a4bf576e-9556-4956-8dcb-4d8906d45db0.json | 132 + .../320a5c00-3307-4bc3-9f47-9befb88e461c.json | 132 + .../cd68d6d9-a5c7-4f32-b372-0e954af830ad.json | 105 - .../81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json | 105 - .../844d1556-6bc6-467e-a145-f92646770727.json | 132 + .../78923f4b-c2e7-4472-8398-10a0a8453ec5.json | 132 + .../9693b68f-ac5c-4111-804c-0505ec8bf06d.json | 105 - .../17abe1bf-2e97-409e-88e3-4f661861a195.json | 132 + .../5064ebea-3ec3-4344-867f-e33f8937d096.json | 105 - .../062e407e-7820-459f-83da-b670f8adff9d.json | 105 - .../756978e5-1dfe-433e-ba88-339004a50ea7.json | 132 + .../a889ae3a-5d86-4454-bfb9-332c4b61b836.json | 132 + .../b1669ad9-450f-4a93-8094-26f427beb49f.json | 105 - .../2c5e1086-03b7-4cdd-801e-03fb26183076.json | 132 + .../afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json | 105 - .../c9632855-db4e-40bb-b140-2ff524d31fd2.json | 105 - .../d9578847-b732-4c75-b246-9cdf03674fe0.json | 132 + .../4c6f83fe-7896-4cf3-9434-b5f8d499f5ba.json | 132 + .../9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json | 105 - .../619037af-d528-4579-b7e3-58628468d8fb.json | 132 + .../f86cf126-4fb3-4419-82bf-e5c0168e25cb.json | 105 - .../4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json | 105 - .../5113b737-8d9f-4321-9a67-91f1aabb40a1.json | 132 + .../50627b31-a8d4-401a-8449-5f33cfb17893.json | 105 - .../641ac372-2e5a-4b44-b22e-a17600a6a868.json | 132 + .../51fc3a16-67c2-448b-9854-07ab8adc4dea.json | 105 - .../7cbb0b08-871d-48fc-bf3e-86267f5ef19d.json | 132 + .../84de36db-b427-40c4-80f6-2114c8ad4e4f.json | 105 - .../c82e887c-c8ab-4221-aa0b-e8b7a86e7c46.json | 132 + .../05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json | 105 - .../50c65a83-9d08-4155-ad2c-5a2f8ffc8743.json | 132 + .../99d97aef-bb6b-471b-8ed7-f6f92f75842c.json | 132 + .../cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json | 105 - .../4800a6d0-8458-405a-95ca-6d0690a8f769.json | 105 - .../b98504a0-f1d6-4872-b748-2ca8199c5328.json | 132 + .../5a159667-7460-4a97-884e-6a96df59873b.json | 132 + .../95c9ef47-8194-4c00-bbea-a65a7715f9f3.json | 105 - .../16a2eceb-073d-4dc3-87a7-a15c641c5ebb.json | 132 + .../b88d6df2-5642-4837-bf04-4d804a4ba3c4.json | 105 - .../679f1499-572e-4f60-9b2d-4c8199d71107.json | 105 - .../e8e2d04b-21db-43dc-8b8f-7fa3bba87abc.json | 132 + .../8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json | 105 - .../acbb93b3-f8fc-479d-9610-392efd7d4ecc.json | 132 + .../6d0589bd-1f05-44ee-afa5-3657b960d7c9.json | 132 + .../134663d8-05a8-4336-90e2-68e7cba5f1df.json | 132 + .../8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json | 105 - .../3bfced28-b06e-46ab-a6aa-171b0c424337.json | 132 + .../b6a83b82-6b05-4437-a076-e2a3982f6169.json | 132 + .../f4c341cb-6489-49a1-9532-6b78c2238b2a.json | 105 - .../f621201b-f571-4487-9f1e-b767675c659d.json | 132 + .../710fdb79-fba4-42da-8e26-45b4caf75207.json | 132 + .../7e72df4d-7a54-4e11-b4a2-44224db285ec.json | 105 - .../35fa7a5e-8866-4ce3-9899-8737e908f34f.json | 132 + .../2b24b69b-15dc-4666-83f3-c77db545bdbd.json | 132 + .../0d00d849-2147-4fc1-9e5f-d42a95be6ca5.json | 132 + .../7385392b-79e9-4764-9326-d7bc1586b918.json | 105 - .../f45135b0-3c26-44b5-9922-a6c0817a172d.json | 132 + .../67eb0d6c-9086-4c80-8506-c3e1489f2673.json | 132 + .../ac6f2c5a-32b7-4553-acaa-e329f1916c85.json | 105 - .../79d3dc85-08f6-475c-ac2c-1ff32f5a089f.json | 132 + .../ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json | 105 - .../4e9b3fa2-d3d2-4e4c-a1fa-c812f481f64a.json | 132 + .../a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json | 105 - .../6e62a8a0-0bdf-4b6c-93de-593423dadd3a.json | 132 + .../89568570-298f-4dc5-9b7b-c9ce84d4010e.json | 105 - .../871131c1-295d-40a0-a396-09d24b880064.json | 132 + .../d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json | 105 - .../44eefbb2-22d4-4dff-889d-a87fc40b2eea.json | 132 + .../7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json | 105 - .../cd1de470-a174-4c08-9efe-a06d493dc4b2.json | 132 + .../c948d98a-af63-43d6-a7c9-9ee61654a239.json | 105 - .../fdb55a14-0697-4775-8358-fed202498b4f.json | 132 + .../c069a224-638a-4cad-a9ad-e4f8579e8c15.json | 132 + .../10e5c103-f25f-45bb-bfe6-a22876cffe87.json | 132 + .../8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json | 105 - .../a9ecca9a-c5d4-45b2-a403-e74a98a46322.json | 132 + .../c87fbaff-133e-4312-87bf-d2fa397d66c4.json | 105 - .../630d8a60-03b7-4550-82f4-e879b2e01c6c.json | 132 + .../69409961-b60d-4616-8a8e-8d0a9c6c966f.json | 105 - .../206b5a96-ae07-41fd-822f-436d49c57dcb.json | 132 + .../2989b505-bfe2-4ca6-9445-af450ad9bee3.json | 105 - .../5a607a63-42bc-4f2b-af2f-4126234516d0.json | 105 - .../702d2120-5301-4e03-bb0f-1f8ab19e522a.json | 132 + .../5fd04483-684e-4991-adea-ca5496e05208.json | 105 - .../61e39700-c237-49fc-baef-3fa573b3b0c6.json | 132 + .../8892ab84-750d-494f-9f87-ad28e73cf364.json | 132 + .../bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json | 105 - .../538a2eb7-34e4-4e78-a382-60a13710096e.json | 132 + .../83a86bdd-4605-44a5-8168-ce88242c4ee6.json | 105 - .../79cd4642-8b10-416b-8a24-e3e3dc99b28f.json | 105 - .../a041629e-8ed8-4a6c-95ee-98e759501e19.json | 132 + .../09f05984-5815-4b3d-bc73-83ea1e5ecc27.json | 132 + .../1d97c368-3e12-43d4-afb2-e3977bf7cf35.json | 105 - .../6535524e-f8cf-4f2f-9d89-9ba70aedac91.json | 132 + .../08ea4f9d-0e3c-4a8b-85e6-075290d30ba4.json | 132 + .../51411c24-49a4-48a7-9079-1f8c06e5318f.json | 105 - .../631f0a1f-a6f5-46f6-9aa0-31ac9764c086.json | 132 + .../3c870b5c-ab3f-4a21-836a-655d0e30efb9.json | 105 - .../b771f6db-7516-4423-9010-3467db0e26e3.json | 132 + .../cf580dfb-2924-4c4b-9352-394275b959bd.json | 132 + .../5623efdd-2f43-49d3-9e89-21432db474f4.json | 105 - .../ba549fe6-7718-4abf-a610-7e0f48611483.json | 132 + .../b92440b1-78a9-4288-a432-f057f2b04a2f.json | 132 + .../838f3932-edf2-4f72-9238-981d1aadc771.json | 132 + .../b5009142-e716-45b2-877e-9259a3a705da.json | 105 - .../61e933b2-5cd1-4f08-8a9e-5b06ef54b6d5.json | 132 + .../0b307c78-94c7-418f-bc47-5106b81c30de.json | 132 + .../b21f94af-3dfd-42f6-a380-3c5faebc90d8.json | 105 - .../18783694-3e7b-4d06-9378-5a3fa4a7a0a2.json | 132 + .../dab922e5-1b46-4a90-b75c-1b26cd6cc6d3.json | 132 + .../ec976588-9788-45e0-ae89-4682e3c8799a.json | 105 - .../8cfa1f00-3b26-4d75-9b0a-0dea65e2e352.json | 132 + .../b227d987-1bec-4124-955a-d81e2e2a52f6.json | 105 - .../f74d26e6-9dfb-4e81-8522-8309b27760cf.json | 132 + .../2022bcf3-a057-4b0a-aa33-6cf074ffc714.json | 132 + .../5a0ae810-10a3-4497-a81c-a88d2106a5ba.json | 105 - .../a6e79d12-42f6-47ad-95fa-ba03fa4d3a06.json | 132 + .../108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json | 105 - .../24d850fe-1817-4041-8767-085f4bd2bac3.json | 132 + .../610a3be1-1032-4079-ba37-d6c2c5f9fd55.json | 132 + .../df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json | 105 - .../857bb10e-1b43-4714-a758-0cef5816ba02.json | 132 + .../0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json | 105 - .../cbe8101a-f057-4151-9391-dbd883f4c09e.json | 105 - .../fae2328b-af2f-49ff-a817-9406cf40c3d0.json | 105 - .../2e3e8be1-725f-4662-a8b1-da4437018e31.json | 105 - .../c97c2d67-79d5-4813-8569-64eaefe66f89.json | 105 - .../1d33cf05-9690-41ba-9288-5f39e5b3c17d.json | 105 - .../19a6e24f-819e-480f-a15f-90273a0a06c5.json | 105 - .../ae5f1f84-091a-4f80-ae40-92ada7e04f94.json | 105 - .../3c1f129b-4f54-4187-876b-c93942179125.json | 105 - .../03c78dad-b50d-4f80-91f8-bd8fbb87235d.json | 105 - .../26596bba-b99d-417f-87be-91de8fa528d3.json | 105 - .../a0de28f1-8186-4eef-b5b4-ce6da71d8271.json | 105 - .../99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json | 105 - .../f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json | 105 - .../904e3917-3bfd-4c83-8088-6b5ac596e7ea.json | 105 - .../ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json | 105 - .../9ca4809e-2bf0-477e-b960-64718561583b.json | 105 - .../7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json | 105 - .../e2668c3c-a862-4564-acee-3c3ce439f74f.json | 105 - .../979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json | 105 - .../01484796-f32b-43fe-b865-517b1a5c0b10.json | 105 - .../c256cede-47bb-487d-9de2-ae7352faa165.json | 105 - .../42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json | 105 - .../0670ba93-c3d6-4a74-94e4-4a77311d4984.json | 105 - .../a4b935d4-1664-44e4-ad82-639755c2b909.json | 105 - .../20b46645-a1dd-4974-9ad1-444f8ca78481.json | 105 - .../01a0a741-5f78-4c31-a743-8e42ba73a22d.json | 105 - .../83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json | 105 - .../e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json | 105 - .../5e116cf4-1be5-44aa-b266-494b1e4127d3.json | 105 - .../a3b69c21-b6bf-4bf9-9097-ebb26c586829.json | 105 - .../d827463a-19cd-4bf2-8823-399b22b57387.json | 105 - .../efad116f-dfc7-4a63-95b1-c61655cd7f0c.json | 105 - .../5af2dce8-b12c-474c-b9e2-b5a38687772d.json | 105 - .../f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json | 105 - .../62d01464-4163-432c-a017-bedf41cba649.json | 105 - .../a9771320-cc89-43fc-b398-7797505bc4e2.json | 105 - .../c380c4b0-7804-4b59-a7e4-700f0a7122b3.json | 105 - .../5723e611-e7e0-47c0-a5ac-162f22690d70.json | 105 - .../07d16051-fe48-46e6-a47c-806e9f95a92b.json | 105 - .../7a91746e-e622-4eef-aef8-5f0ba04f03c9.json | 105 - .../0da22342-b4ef-4dd2-b4f5-327710986701.json | 105 - .../f8e00446-f253-4ff3-a9ff-ef182cf9e147.json | 105 - .../455764e4-7b66-4189-b2e8-907047a92d45.json | 105 - .../40bc60f8-aa35-460b-a7af-b4cccd138c80.json | 105 - .../74f0ecd4-e04a-4775-9551-fc0e9fa40314.json | 105 - .../a4da2ab3-adb3-405f-9bb7-2164d740d424.json | 105 - .../bee65c80-73f2-46e5-9532-8f92b38c4fc5.json | 105 - .../1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json | 105 - .../ca297bdd-d804-4c43-bb6e-0b7e230974e2.json | 105 - .../6424a285-b3dc-4221-b3ba-5e7922185269.json | 105 - .../490df557-2f50-434a-a28d-a78a234da9fa.json | 105 - .../bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json | 105 - .../f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json | 105 - .../39a6c969-d938-4e4c-9adc-f71f1d30143d.json | 105 - .../cf0ca830-4bb6-4317-97ae-380f54518d9f.json | 105 - .../32c712e0-4f63-4188-b4c8-5f37b6101e3f.json | 105 - .../cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json | 105 - .../88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json | 105 - .../60d939fa-9ae2-4226-a955-d586c27fea68.json | 105 - .../1bfd3789-e95b-487c-9c8a-516c017f6558.json | 105 - .../85ff1b65-eade-4d70-a278-99605f324e5a.json | 105 - .../5938f7d8-dddb-4989-81c6-e57e177e52c9.json | 105 - .../ab812077-8d2b-40f8-bc49-65fffd7f6f26.json | 105 - .../610f3053-b2a9-45a8-ac09-af3edcb8c826.json | 105 - .../14560449-0481-4346-aab2-ff75fdab691b.json | 105 - .../807ed760-775e-4082-90ea-7b524038bebf.json | 105 - .../392ea212-afd9-44a3-a6bb-2bba8f124492.json | 105 - .../536229bc-b1fb-4078-826c-074b09c362b9.json | 105 - .../b77a4371-97d7-43a0-892f-a0c01c2b8528.json | 105 - .../de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json | 105 - .../2790feab-6850-4d51-a3a1-78ada0c56d03.json | 105 - .../42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json | 105 - .../9ce9031b-76fd-4c33-b209-3011643d9266.json | 105 - .../5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json | 105 - .../febdde9e-8e67-458b-be79-6a9c91a7237a.json | 105 - .../3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json | 105 - .../52e6e50e-4621-491f-9e46-8d6d398c4344.json | 105 - .../66846c9d-e2bc-416d-95b4-fed31d1b781b.json | 105 - .../52eb695b-3d17-4abe-a386-7927348e5dd5.json | 105 - .../22a01298-038f-4069-b847-43409d2d4baa.json | 105 - .../636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json | 105 - .../a3abb802-acd8-49c7-bcff-3b79a4023d96.json | 105 - .../10d1f626-64f0-4f43-9597-1221cf94c948.json | 105 - .../d7410909-8a7c-4afb-9cab-2537f837a9a1.json | 105 - .../be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json | 105 - .../fb698ce2-d422-46eb-aa98-17fb7645461a.json | 105 - .../69037dce-5276-4e26-aa05-0a7bd2c4739b.json | 105 - .../d27e73c5-654c-48c6-ad60-652a60bda72c.json | 105 - .../98616cce-563a-4977-b5c0-bf8df3102303.json | 105 - .../8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json | 105 - .../5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json | 105 - .../11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json | 105 - .../3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json | 105 - .../26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json | 105 - .../e5843711-00cb-4167-a47d-4874af0c3ba2.json | 105 - .../670580f3-ca8a-473d-a3df-8c01952bda00.json | 105 - .../00332c0d-d698-4ecd-9c2d-5f56921709d5.json | 105 - .../2b993039-8980-4578-a9e2-a22a39385664.json | 105 - .../72cf7999-e4cb-4987-a694-cdcfae37bb02.json | 105 - .../0c22748e-74ad-4bac-a714-c64a19a88af7.json | 105 - .../4293bc9f-4968-4af9-acd2-0ada64be43d4.json | 105 - .../5f6f312f-3131-417d-b12e-3e30bb998d27.json | 105 - .../ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json | 105 - .../02d060d9-d545-445b-8d22-4ae117b8f324.json | 105 - .../13881952-9fe3-4308-93d5-912e59465d6e.json | 105 - .../9fb11511-0c66-495a-b634-da6bb0934706.json | 105 - .../5f540be5-6932-41f4-b588-b88f8cfb89c7.json | 105 - .../629b8df0-6ce3-4230-baf7-45b3944bf0d5.json | 105 - .../0338e807-8f8e-41d9-b4ac-d80239340678.json | 105 - .../c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json | 105 - .../0f52efcb-1b9b-4df1-820b-a8c0698481a7.json | 105 - .../82d77852-64e4-4dd0-a636-785958786fd2.json | 105 - .../2084dde6-b1e3-457b-9854-ace18cc5d943.json | 105 - .../267ac6ef-168e-489b-a7cc-0ff448b0acbf.json | 105 - .../0c540f58-808b-42fc-b4b9-346367742f70.json | 105 - .../478f0d4e-41e5-41c7-b9da-07db69c1d561.json | 105 - .../d997330d-6679-4d63-839c-677694ea4abc.json | 105 - .../31a37662-052e-440c-a475-66543b6c52b1.json | 105 - .../c819ae59-5f32-4bba-a835-84fa9497de6b.json | 105 - .../ced5680b-ff4a-42be-a609-6fc2541d6109.json | 105 - .../f58be76c-043d-4ad9-81df-9a94d380808c.json | 105 - .../80e08062-397f-40d4-b6b2-a3e03d9cc320.json | 105 - .../22c931f2-cf99-46b1-b4f8-50db5a172a66.json | 105 - .../e216df49-368d-457f-9153-e33741b7b847.json | 105 - .../8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json | 105 - .../9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json | 105 - .../66d7e97b-0a79-4d39-8d6b-cf083239aa93.json | 105 - .../7e8b2abe-68e5-445b-ae22-5b827e53b72d.json | 105 - .../8cf1e62b-f646-4082-9d10-8cf376154d40.json | 105 - .../0acfe83d-3876-4c08-9b26-931450d24bfd.json | 105 - .../ed373700-5ff1-4a84-8746-12ec4c278e00.json | 105 - .../3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json | 105 - .../16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json | 105 - .../53ae919d-c56b-415f-87c0-c6273730357b.json | 105 - .../83da2d8f-542c-4d21-88f9-b83f4e960579.json | 105 - .../ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json | 105 - .../67010272-067a-4dd4-a31d-9da58d72118e.json | 105 - .../9aa57eda-6d6a-449e-801d-96e16499ddd6.json | 105 - .../bedae6ba-9f3b-435b-bb7f-cadb7a684804.json | 105 - .../8a3df59d-9f38-4682-a760-5fa7903cab99.json | 105 - .../62ef54cd-d97d-473e-9dd2-42fe185e4d04.json | 105 - .../b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json | 105 - .../78ecc0f4-dcd5-4c25-a598-ef95114f5868.json | 105 - .../f8448236-89b9-4a9c-949b-9bb45db5e400.json | 105 - .../3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json | 105 - .../ca49f981-e4eb-4235-b472-de832ffedd72.json | 105 - .../ca856917-9100-41ea-9900-91d12be1de44.json | 105 - .../b1f9e472-38c5-409f-b112-3006bca90b94.json | 105 - .../4733fd17-2d7a-44cd-83bf-1201a3173495.json | 105 - .../9d44d069-44b1-414a-93c1-91b46ceabe66.json | 105 - .../615e5bca-6f64-4bf9-a131-eefd7ec32c08.json | 105 - .../82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json | 105 - .../e73d5aee-ad0f-4bec-8230-2087669444bb.json | 105 - .../99589a08-8f1e-437e-b6f0-e33a9dab5806.json | 105 - .../35eb03f0-f11e-40d8-a830-7ce2cfde2956.json | 105 - .../01b841ba-ecb1-4025-91b7-fb2c443ef85c.json | 105 - .../1cbff8d9-a857-4816-8427-0450871021d6.json | 105 - .../10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json | 105 - .../e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json | 105 - .../6043c193-a533-4194-8cf5-9ed83d095f0d.json | 105 - .../4b512748-f6d0-4ed0-8ece-5b853a174329.json | 105 - .../d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json | 105 - .../dae3d027-e262-462c-9930-cfee221cef58.json | 105 - .../f3922129-7e69-495d-925b-c3c8a1b70c5a.json | 105 - .../deb8be23-8976-4dfb-b038-70a4b77de9f6.json | 105 - .../11c52cd6-75e0-4800-9b98-fbc4aa81260d.json | 105 - .../dd17eeb9-c1d1-4f98-986e-aad15a592891.json | 105 - .../8254ed33-9ce6-484d-9171-5402156a1933.json | 105 - .../848752ff-c92d-4ce2-94e8-5b8c8b765b77.json | 105 - .../980cf18c-0163-414c-8ed0-dff894a328ee.json | 105 - .../99397e12-f601-478c-af40-c8f428b923a8.json | 105 - .../00ccf406-3e59-44cb-af59-6dcd391678ff.json | 105 - .../6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json | 105 - .../f81acd72-b38a-424a-878b-833d094518da.json | 105 - .../f4686eff-f1d7-49e0-85be-2a6c7f125e29.json | 105 - .../a3d85774-ddac-436f-9c64-a751d2924bb5.json | 105 - .../2346a7eb-2148-49f3-b960-363ba6b776d4.json | 105 - .../e701f5dc-d604-4bbb-8e92-37d69781ae5f.json | 105 - .../8c67c634-82f0-4bb8-bd70-e98902649d96.json | 105 - .../d8a359e5-2899-4d3f-9fb4-3120f61951f4.json | 105 - .../af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json | 105 - .../99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json | 105 - .../e48bd1d8-1082-4b79-8145-87d7f013fb82.json | 105 - .../b9300d76-c854-48a2-a900-b661c1fae7bf.json | 105 - .../7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json | 105 - .../6ab36d53-da10-4f80-bd1b-dc037a020362.json | 105 - .../e067537a-a621-483f-b1cf-ee78f57a39da.json | 105 - .../e3e717a5-a987-4e94-a528-9aafadb6774f.json | 105 - .../50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json | 105 - .../c2593003-ca2a-4699-8473-a07683e7cd85.json | 105 - .../3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json | 105 - .../2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json | 105 - .../960fabe4-5395-4d3f-9680-65fe0b8655ac.json | 105 - .../77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json | 105 - .../b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json | 105 - .../7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json | 105 - .../85379044-198d-4fb5-82c8-50857f8d65d0.json | 105 - .../6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json | 105 - .../49768a60-0b77-4945-a048-013a6fb719ca.json | 105 - .../489b8b24-4295-41b3-b286-14f79972fe93.json | 105 - .../ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json | 105 - .../d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json | 105 - .../b482d6e6-8520-4a77-a729-ebe2e9635a6c.json | 105 - .../1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json | 105 - .../3b02898e-b47f-4d53-9bd4-575d47df29af.json | 105 - .../fe095b66-350c-4236-ab1b-e2e19af73486.json | 105 - .../0130c0ac-a790-492d-aac2-55e999b724ef.json | 105 - .../dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json | 105 - .../4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json | 105 - .../b8ce63dd-5c8a-4bba-b381-147efcdcc161.json | 105 - .../c8b29113-7815-4cf3-be36-76e3e87d6068.json | 105 - .../c3977d28-b18d-4e86-bc69-1aa08422585c.json | 105 - .../4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json | 105 - .../aba2e376-936d-4960-a82b-da09d2266826.json | 105 - .../ed1798c0-348f-4294-b546-8a7892225d33.json | 105 - .../6ac51916-9278-46b6-9b0f-059745f3d845.json | 105 - .../83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json | 105 - .../9235cd92-5335-498e-881f-21938da4ed61.json | 105 - .../27e6623c-49b2-4763-ac6f-b35f1f9002a8.json | 105 - .../da7be2d8-96ff-4902-9628-c1781391c68e.json | 105 - .../fffe8411-9f9c-48ce-adb5-8d483022bffe.json | 105 - .../d0e4c608-0c64-4cf4-aee6-714475d500db.json | 105 - .../19c08486-99c5-4f53-a6cc-69cb58e0808a.json | 105 - .../f45610c5-ead3-4670-9639-aa30fb145829.json | 105 - .../34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json | 105 - .../08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json | 105 - .../37a5a439-e2ac-46ec-af94-b60f127157de.json | 105 - .../6d191a68-8817-468a-850b-01f5ba76e05f.json | 105 - .../e98879cc-d7fd-4e97-ab86-0ca28265abeb.json | 105 - .../b36e0fba-9fa1-4e74-9d26-b4889343f113.json | 105 - .../e0889500-8f6e-496c-b275-ac110458c56d.json | 105 - .../8638b115-f092-42f1-949d-162321fe5833.json | 105 - .../a20052ae-dfa0-4df7-a9a6-f182dbef513d.json | 105 - .../8d2c510b-a092-4e5d-b468-6e58501cad8a.json | 105 - .../8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json | 105 - .../99c5044d-1308-4f30-9413-bc2672545f76.json | 105 - .../e81db661-b05a-4d95-8be4-d663317d3d13.json | 105 - .../d8a0873b-58e8-449a-aedd-7117e9931546.json | 105 - .../9383604e-dd29-4c51-87eb-68f19ff929ec.json | 105 - .../ef25dd23-7cc0-46ad-898d-31bfb5205aad.json | 105 - .../b31d5098-4324-4307-aa50-2413ceba5481.json | 105 - .../88532e60-eff6-404b-8e74-fd8836a99ff9.json | 105 - .../9bd6ca33-d62a-4327-a11e-f36188f0256a.json | 105 - .../ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json | 105 - .../d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json | 105 - .../3ccecc91-6528-4592-8ca3-722a62bfa102.json | 105 - .../29843ea0-0ab4-44e1-8206-10a1135cce8a.json | 105 - .../8ce06258-4909-4e46-a326-85052d28c5ff.json | 105 - .../0f2ddff5-6077-4166-8fe4-ade89d3a6003.json | 105 - .../c3448f16-33c4-42c8-bde3-b503786cba7f.json | 105 - .../1193d16a-5ba8-4a6c-b13d-116bb7731a71.json | 105 - .../c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json | 105 - .../60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json | 105 - .../1650ab9b-4e64-48f1-9551-fb58758cb2f6.json | 105 - .../6f4c4594-6f73-44e3-b531-f7651b523e8f.json | 105 - .../382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json | 105 - .../8683a084-2521-469c-8575-9b2595c112dd.json | 105 - .../e1c4e454-79c8-448d-ab33-629900a35779.json | 105 - .../aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json | 105 - .../08843042-f5ed-4dbb-befe-82c48e370664.json | 105 - .../8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json | 105 - .../4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json | 105 - .../e1d82962-59c9-44e7-9243-ea62f6639d1e.json | 105 - .../71e3ab93-9667-4e99-b0a1-e25b701b13fd.json | 105 - .../5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json | 105 - .../6c3a0d11-d421-4420-9df7-359164a85893.json | 105 - .../13c07664-1ff1-48a4-a43d-877fc05bd19d.json | 105 - .../06985382-8aec-4aa3-85ff-774da25ed2d3.json | 105 - .../5edf6193-a8d6-41d3-b2fd-20f7ce537770.json | 105 - .../5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json | 105 - .../b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json | 105 - .../55f777f4-460f-4b83-a309-7e9e9113fd55.json | 105 - .../cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json | 105 - .../458dd163-075e-48ca-bb3b-650912f55696.json | 105 - .../2c35754b-3763-4098-8686-39694028e0d9.json | 105 - .../18072fb3-a27a-4ad7-93ef-a3770637a0dc.json | 105 - .../38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json | 105 - .../1007d3aa-f8ca-420c-b974-a0f552c649ac.json | 105 - .../ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json | 105 - .../e68ae3f7-3f46-43bb-8e14-0523af96998e.json | 105 - .../ea57e277-5694-4981-ac47-d2fa633847ca.json | 105 - .../eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json | 105 - .../15617903-e280-4c61-a326-5f615b46b3a8.json | 105 - .../f8515d35-c7e8-440b-a61f-16f5acfdc003.json | 105 - .../869f9850-417b-43d7-bb40-61375a8bb09c.json | 105 - .../417b2c35-090e-42c3-8a92-04f7258702a3.json | 105 - .../6f966179-a456-4914-807d-45ab507e0388.json | 105 - .../455bd496-7a32-45c9-a792-3982781fdc16.json | 105 - .../6301252b-2353-438a-9e60-c6a572adfc5f.json | 105 - .../54da4a97-6e12-4bb0-9138-dacd981b04bf.json | 105 - .../d07eada4-e73c-4dd6-8538-e3a9cd471f34.json | 105 - .../9f796e5e-6c31-46e0-b839-e21d33a403c4.json | 105 - .../4ec306d4-3f34-4330-9898-fb5ccb9a3483.json | 105 - .../82c24fd7-de74-4dc8-bd22-5761243ed826.json | 105 - .../8577766f-d696-489d-8194-31b48c17941a.json | 105 - .../de2d2321-b6ed-4791-9114-757afc963876.json | 105 - .../f9aad6f2-ba24-47de-a613-b4011a2c52d1.json | 105 - .../30324407-0848-48ae-bbd7-80676d9467db.json | 105 - .../006cafcb-452f-4df0-b42c-058719eb63e4.json | 105 - .../5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json | 105 - .../1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json | 105 - .../5ad18861-1b4d-456d-9e1c-e945c1f71530.json | 105 - .../8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json | 105 - .../21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json | 105 - .../ab5ef6c9-76de-470e-b524-497036db94d4.json | 105 - .../d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json | 105 - .../dc25bda9-966c-44f8-991b-ad891d59befe.json | 105 - .../f020ec4e-f026-4034-a219-1aacfcbb16b0.json | 105 - .../086ca0cf-79a3-4b94-980d-9384f1848562.json | 105 - .../4a623195-2073-4637-b748-696012109846.json | 105 - .../4bc80120-a5e2-4824-b278-c2de7140a2bf.json | 105 - .../aaceb35d-4106-4d6c-b895-446b87394f3b.json | 105 - .../4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json | 105 - .../61523c37-faee-4708-be49-4c7e31d760e6.json | 105 - .../da59bcfb-1f9a-41e5-9a8c-14f672dce595.json | 105 - .../ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json | 105 - .../ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json | 105 - .../7d031f11-6623-40c0-96bd-b3f0c135600b.json | 105 - .../5b0421b6-04ff-422c-a13e-9649306959d4.json | 105 - .../6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json | 105 - .../60766e3b-e153-4ee8-8615-1c1e68b7cd75.json | 105 - .../b43702d0-eef7-42d8-87b9-c1cbd0edb417.json | 105 - .../9cb855b6-e141-492a-99fb-98858d76f66c.json | 105 - .../76edae8d-f4d3-41b2-8a24-cc676feed31c.json | 105 - .../f150ea9d-0e4a-49c7-aa12-a703ca011755.json | 105 - .../8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json | 105 - .../bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json | 105 - .../7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json | 105 - .../0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json | 105 - .../485d4a25-810a-4022-828b-15c255fa2004.json | 105 - .../d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json | 105 - .../f1af1d33-fb95-462d-830c-5330d6481b6a.json | 105 - .../d59a73eb-0aee-49f8-abce-6500f1fae79d.json | 105 - .../4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json | 105 - .../eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json | 105 - .../ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json | 105 - .../8d0fa497-cdaa-4206-ae80-babed3089d43.json | 105 - .../0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json | 105 - .../4cd18600-a389-4a22-88f8-0e35739665bb.json | 105 - .../e89bbd89-f8fa-4156-94d8-6f390a383557.json | 105 - .../f7aec62a-004e-4034-b4d9-152452bb519a.json | 105 - .../dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json | 105 - .../fa439482-ca9c-49c3-9732-1147c3965c56.json | 105 - .../0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json | 105 - .../d985b9ab-a760-4a50-973e-6985e778b97d.json | 105 - .../7c975279-f21e-418b-bc0b-739a933b91dc.json | 105 - .../d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json | 105 - .../55eeee3c-b812-4359-ab5f-4e3fa976648f.json | 105 - .../d79e4774-159d-4b47-8cc0-64d7844e7bfc.json | 105 - .../d987e61a-c7cc-4072-9e2c-faa6304eab65.json | 105 - .../73270182-a54d-4fc5-834a-89283677c1af.json | 105 - .../8df04772-fc5c-4dfb-8366-f9844bf52a0e.json | 105 - .../650f54ba-4d43-4e31-92cd-16c7c1913b34.json | 105 - .../854d263a-00cc-488a-83eb-c69bb74da5b5.json | 105 - .../0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json | 105 - .../10047fc1-254f-406c-807c-3274d9780550.json | 105 - .../ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json | 105 - .../000fcba9-c157-48de-b672-f583f4cd3881.json | 105 - .../31381b9d-77fe-491d-891c-de4fd37fa1cd.json | 105 - .../07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json | 105 - .../c4fa1166-5255-4b95-8c7b-e1f93265f126.json | 105 - .../92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json | 105 - .../eb10ecab-2be4-4b75-9b85-d2f2786fd095.json | 105 - .../653ff1ac-158e-4d36-a813-22ebef4a76ce.json | 105 - .../63a32ad0-b871-437c-991a-342de8c13345.json | 105 - .../46fa0a20-2810-4f0b-befe-afc3fc774734.json | 105 - .../12e0e194-ef37-4da5-9354-e82f983fadb2.json | 105 - .../9b7181ec-81f6-438a-8af6-a219f356f430.json | 105 - .../3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json | 105 - .../48f5e083-9fa3-4753-a734-578ac3e15e1f.json | 105 - .../f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json | 105 - .../6a173156-75b3-47f4-9f88-ecace0ee6942.json | 105 - .../ac20706b-0370-47de-bc6b-ae188f8a9259.json | 105 - .../f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json | 105 - .../30482674-45a3-4400-84e0-eef215540eb5.json | 105 - .../498c4d5e-0500-42da-9c75-e8da578516f8.json | 105 - .../de82dcd9-adae-4b28-8248-156e324e036d.json | 105 - .../df6327cf-82e1-437f-9c9a-c31205452717.json | 105 - .../ecee6e6a-15a1-4455-9724-34ca14477064.json | 105 - .../cf2de222-77bf-456c-acb3-c3aa33367a9d.json | 105 - .../be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json | 105 - .../23b559eb-4493-462f-bb37-5e232b3336bc.json | 105 - .../20b49499-5df3-450c-a20d-dc421b937e91.json | 105 - .../2bff16e4-f0ed-4957-8b20-4ae269642088.json | 105 - .../61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json | 105 - .../198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json | 105 - .../f202b553-56e6-4a27-b2fa-0f98feabe11e.json | 105 - .../a4111230-4313-4f75-bcd3-c598e436987b.json | 105 - .../5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json | 105 - .../0806c872-f913-493a-ada4-7db88a93b840.json | 105 - .../9ffc9dbb-065b-47ae-a985-541ee7f7126d.json | 105 - .../03587c1e-14e3-434f-9582-448914832c95.json | 105 - .../8bb5540b-b19d-4641-9dea-36ea43b07250.json | 105 - .../73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json | 105 - .../7407c2ed-23f5-4c92-b987-2c3a91147d98.json | 105 - .../41e4d24f-9790-40f5-a915-ee4155d5cbc6.json | 105 - .../211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json | 105 - .../9eae434a-fb2a-45b9-a592-f39a9c469f07.json | 105 - .../0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json | 105 - .../0906fee9-0edd-494f-bf01-a34711f17596.json | 105 - .../88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json | 105 - .../49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json | 105 - .../e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json | 105 - .../85a2710f-feaf-4dc2-aafa-04c33abf6425.json | 105 - .../66d98c7d-7fd1-41bc-9229-855f9d02412d.json | 105 - .../22ae03c6-dd4f-4263-a005-624dae701da3.json | 105 - .../13b8357d-225e-4ba0-bf34-45479a562532.json | 105 - .../37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json | 105 - .../90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json | 105 - .../ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json | 105 - .../fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json | 105 - .../8eaee9b3-78b0-4523-9151-695c27c5cfa7.json | 105 - .../36ebe051-2bac-46cb-b990-33025df0ccac.json | 105 - .../32e1b138-c236-48e3-8152-d3715127d309.json | 105 - .../18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json | 105 - .../d43699f9-e6e5-428b-ab52-9d7114443608.json | 105 - .../7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json | 105 - .../155f55e9-34e3-4753-a783-31df44e791e0.json | 105 - .../94d286c8-8356-4bdd-ac91-2ce517b6b974.json | 105 - .../85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json | 105 - .../23dca426-d0d9-43d0-86ff-50e01cc292d0.json | 105 - .../bba22496-6f3a-4ddb-8a69-5995e72aa15f.json | 105 - .../7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json | 105 - .../70acb3cd-fea6-481a-8bf4-fa72e953c110.json | 105 - .../36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json | 105 - .../88d33049-cd88-4b4a-94ba-d0c35a635cfc.json | 105 - .../58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json | 105 - .../b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json | 105 - .../19aba348-6bdd-425a-bd7b-505aa2658f6c.json | 105 - .../dddadaa0-6808-4b34-a6e2-29663460c3e0.json | 105 - .../75f6ae05-a987-455d-8167-fc345d55c370.json | 105 - .../7ba5e7cb-3050-4838-8762-4b31a5c9d912.json | 105 - .../3c843cd0-ce71-4feb-9452-65fc7534518e.json | 105 - .../ce85152e-fdde-406a-9818-0eb945ff1d6a.json | 105 - .../43a51d6d-e038-4476-a63b-2f4260d736d4.json | 105 - .../e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json | 105 - .../fd31a5f1-986e-4040-b04b-3018161e6e66.json | 105 - .../eef221de-8dc3-410a-943d-900c810948ae.json | 105 - .../07190707-16fb-47fc-9813-4f2408a04bdb.json | 105 - .../b8b5b30e-d259-49ae-8155-7f63ddae88c8.json | 105 - .../56f52103-ea5e-4228-ac7b-3c6929fe5b76.json | 105 - .../09ec0c0c-d403-4f23-99a4-61196c70734d.json | 105 - .../b94f468b-7c0e-491e-8404-de1bad7ff0f0.json | 105 - .../a9af8b88-8f00-4662-8ca4-d042030885ae.json | 105 - .../fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json | 105 - .../144ff584-3230-42e5-acae-35518b10a1e9.json | 105 - .../e011ff58-ea5c-4857-a76d-503c4188886f.json | 105 - .../c17cced5-be98-49c5-a919-c15b641ba2e7.json | 105 - .../43f5a551-7257-4595-9b0c-60799ade231b.json | 105 - .../aa3467df-1a74-47af-b635-0318df88dd58.json | 105 - .../a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json | 105 - .../e5a71267-56c7-418a-bfcc-b4b5ed10496e.json | 105 - .../12a56879-c48c-4422-bc6f-fad813c94414.json | 105 - .../d52d6e93-b291-4f21-aca7-2c8d48313dec.json | 105 - .../c5a71d25-35f7-453e-9551-7881046fdeff.json | 105 - .../1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json | 105 - .../3d3862a4-79df-488c-8d17-dc332fa3abce.json | 105 - .../71e87ce8-88f2-4858-b65f-9225f59cc3f9.json | 105 - .../73f2659d-ff95-403f-99e0-09de7c807c3c.json | 105 - .../46728c83-957a-4eb7-8a04-0fee4efe50d1.json | 105 - .../3a05547d-850b-42b5-978d-0aff574cb5ca.json | 105 - .../f37d1682-5df9-45dc-92ae-6bf587a03e9b.json | 105 - .../8fb0f696-49a8-4611-ad82-3b7e19d5d867.json | 105 - .../5623295c-0170-4832-b3e9-df00c660c59b.json | 105 - .../6c3ed9db-730c-48cb-95f9-662467957403.json | 105 - .../c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json | 105 - .../98402d5d-95a6-4f48-9745-8653b298b48e.json | 105 - .../31c103fc-22ab-44a0-aeaf-769a9ff803df.json | 105 - .../8277cf4f-865b-4b3e-afcb-b906064dfc20.json | 105 - .../8b9ec467-1555-415c-b1ee-23be18ded9e5.json | 105 - .../f99bad90-e7b2-4205-9f51-93f96e90188c.json | 105 - .../2ff7d218-348b-4069-808f-6b32e7a77a5b.json | 105 - .../c86ed5b4-8793-424a-a5a2-9a54689cb388.json | 105 - .../e0e6bdbd-91c2-4d45-be73-03890ed13709.json | 105 - .../0032ea65-98dc-48a9-90e7-835e389acecd.json | 105 - .../bae27b4d-4046-45f1-b798-8356fa962df4.json | 105 - .../97c9b209-b2ed-439f-9b01-cad25e205fa9.json | 105 - .../4c0ac526-821a-49eb-9eee-152d594ed25b.json | 105 - .../219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json | 105 - .../0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json | 105 - .../6c3d4b07-14c5-4218-862f-2aca386f5144.json | 105 - .../171a1779-0f17-4514-96ae-e4f9acea86b4.json | 105 - .../578905fb-a4a6-4dcd-9b09-ff5289568b91.json | 105 - .../3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json | 105 - .../020f77a1-1051-4f85-8037-ed4f8b12474a.json | 105 - .../5ab1b41f-ee87-475c-b48b-e154c580d560.json | 105 - .../914b588e-6da8-4a08-9313-ac7004fd8b97.json | 105 - .../6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json | 105 - .../5516c5d6-29c9-46dc-ae29-61876fb488c2.json | 105 - .../fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json | 105 - .../99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json | 105 - .../74260e1f-8b2d-40ac-ac96-f268d65fa838.json | 105 - .../2fb27531-96ee-48d2-9416-43ef790d7196.json | 105 - .../75da6225-cc30-480c-b33e-359648932d9d.json | 105 - .../2f104869-3a3b-4d25-987b-77dba089b817.json | 105 - .../55eb0438-f0bd-4f9d-8bff-577d0245a57c.json | 105 - .../d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json | 105 - .../af3522f6-e26f-491f-8ccc-df064e5d3010.json | 105 - .../2ecc5d1d-edb7-4713-9bde-f83ab4736690.json | 105 - .../14deb011-b6ce-47c7-b855-c7ebcc291121.json | 105 - .../0744b5c6-e109-4ccb-acc9-955106ef5562.json | 105 - .../cfbdbc52-d846-48e7-bad4-f6240f1d2551.json | 105 - .../a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json | 105 - .../286860d2-7f43-4488-9d43-9058fe59b248.json | 105 - .../f73009ad-891e-41e7-a6bc-a271894f5511.json | 105 - .../b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json | 105 - .../f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json | 105 - .../55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json | 105 - .../f96ce5a9-7cc2-4380-9285-09052b906411.json | 105 - .../ab796471-db79-40a2-8147-72ed7099b355.json | 105 - .../3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json | 105 - .../32f38aeb-615c-4785-a674-bd8a50eb1057.json | 105 - .../2695c341-eabe-4809-9b87-9e771e1ee9d6.json | 105 - .../4734bf79-d464-43b4-8df3-1937f7c37796.json | 105 - .../abebe996-35e4-4fa6-a16c-0b33481d7357.json | 105 - .../1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json | 105 - .../4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json | 105 - .../d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json | 105 - .../19f198e5-37b8-4d62-8cbe-849f6875d39e.json | 105 - .../a13b4873-22c0-461a-b4ba-41246ede0dfa.json | 105 - .../f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json | 105 - .../6fc094c0-ca29-4594-b086-2dae90195e8d.json | 105 - .../42ea4b8d-98af-4c57-8b55-cef38c473fd5.json | 105 - .../3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json | 105 - .../78e423de-2f66-4c53-8d07-8401802973ca.json | 105 - .../e57e6483-7e4c-4a64-8c58-890aafb38f37.json | 105 - .../17d4fced-6a93-4e5e-8349-25dae16596f8.json | 105 - .../29dae40d-4786-4fbc-92fa-3415b0c35488.json | 105 - .../cad93026-baf2-47ef-a554-4d0ba0d5a946.json | 105 - .../ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json | 105 - .../e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json | 105 - .../deed0e49-b9fd-4623-bb90-3e885bec9bb0.json | 105 - .../469379ff-5526-44f4-be9b-8bf6185b917e.json | 105 - .../3b0e49aa-931b-4625-8e59-fed02b31372e.json | 105 - .../89bafcc1-b175-45ec-b365-45938c1e8f33.json | 105 - .../2176e0d8-e0a5-4118-b15f-b272dc643d89.json | 105 - .../76bbd348-21b9-4253-8085-d8c4eb0932f6.json | 105 - .../5077856e-f85c-4395-8be9-e3e9bf3655cb.json | 105 - .../eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json | 105 - .../4ccfc9fe-c222-490e-badd-bfeecc9ede91.json | 105 - .../501bff5b-2809-4af7-9600-d6471167b701.json | 105 - .../2bde390d-b448-4ac2-addd-215d722aa66b.json | 105 - .../45cd6db1-064f-45d9-89f2-d931b4f82326.json | 105 - .../cdabdd54-6101-471c-9bd8-446953be986b.json | 132 + .../29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json | 105 - .../6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json | 105 - .../4ab23cde-aadb-424d-a88e-e7029a2f5c57.json | 105 - .../118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json | 105 - .../07d85f99-840b-403a-bace-99712f3469b7.json | 105 - .../6d501ffa-e205-4522-9af5-7036463a5b05.json | 105 - .../da5d131c-5ae9-462e-87b1-92ead75eddb9.json | 105 - .../f9ce1ec0-e727-474b-acb7-1ba49311e355.json | 105 - .../4180c069-33e8-4109-9d35-dde82549ba26.json | 105 - .../720029f0-41d5-4161-878e-4218f230455c.json | 105 - .../8029cb75-8d3b-411d-b0eb-74539b8ecb2f.json | 132 + .../0fd25475-5202-4cd1-b399-bfb8e113d85b.json | 105 - .../3c97155d-c086-42aa-af12-14316fcf723c.json | 105 - .../38fae832-3d96-457d-851b-7fcded3f7796.json | 105 - .../a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json | 105 - .../9d85345f-d46b-4431-b5fb-5cca99d92f21.json | 105 - .../ca0a3f22-099f-4207-acfe-4b70aa00171e.json | 105 - .../4d0a565c-14b2-4ce9-97c0-4d114548fe48.json | 105 - .../79b4a850-85b6-45aa-8cc1-5210230a38aa.json | 105 - .../69433e39-158a-46df-a987-ac2a6b3af2af.json | 105 - .../56593987-babd-4a30-9a20-f83e7d233809.json | 105 - .../99b96f53-5ac6-4001-abc6-2a4e43f09028.json | 105 - .../cae2d4a1-4632-420f-be40-594f4c001d4d.json | 105 - .../a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json | 105 - .../e54de9df-52e5-43d2-92c3-9d5207c0e335.json | 105 - .../582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json | 105 - .../46494bad-fb41-4fa3-b568-be4e6a22ae5b.json | 105 - .../4f9c7197-1eb6-45eb-851e-46707017fe7f.json | 105 - .../65d10996-2c5b-4e11-9a07-319c2446a237.json | 132 + .../c13a5d55-44f7-43fc-a633-9af7677a26fb.json | 105 - .../ef21d739-b122-4ab8-a8ff-a7cfecad5c8e.json | 132 + .../3970f988-26f6-4810-839a-e5f4fcd6618a.json | 105 - .../0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json | 105 - .../45f3b963-497b-4d89-ac66-9ff0ba8dadf8.json | 132 + .../1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json | 105 - .../4173435b-d907-4ac5-a8bd-dfa2759f3fb6.json | 132 + .../0bcfeb34-8944-4f16-83d8-6fe851c39af6.json | 105 - .../b4a79f30-3a04-4f78-861e-1571316a0642.json | 132 + .../0c861cdd-1ddb-43a1-991b-300887e1da1b.json | 105 - .../53426038-df38-45ba-b621-34231c9cad7f.json | 132 + .../97e50198-ba06-4c17-81d3-59270b71a89d.json | 105 - .../fa758fe5-21ec-45cc-941f-5cb5ca0612b1.json | 132 + .../d2a92a62-3bd0-4cb2-897b-742ea0d5203f.json | 132 + .../f7c1a443-006b-4ade-9b0f-895392e52b7c.json | 105 - .../8b752519-63d4-4638-b56e-1c45c7f4694e.json | 132 + .../f4c62b5d-fc1d-4421-9be8-e7e4af642284.json | 105 - .../8da71b7c-7b73-453f-998b-84e70b54e471.json | 132 + .../ae57c3e7-4042-43eb-baa2-b033d1b4867c.json | 105 - .../2b7b1216-3ea7-48f1-89f6-e5d84fef2b32.json | 132 + .../42448d73-f9e0-4eb2-bd6a-74614d08d55c.json | 105 - .../1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json | 105 - .../37e19712-3197-42da-a8f2-ae1f36c2b06c.json | 132 + .../ed579ba1-fcd3-4279-ac93-d0340a771e43.json | 105 - .../b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json | 105 - .../1c2a87ca-9f1a-4d32-b1da-743927b722b0.json | 105 - .../44749932-f3e3-45ad-bb4b-135a6d656e3b.json | 105 - .../5f1b91c8-28d0-4274-8979-32416003fafb.json | 105 - .../d101111a-31bd-4eec-9a53-52543f6d5fd5.json | 105 - .../056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json | 105 - .../bd98b886-a899-4022-aee4-09ea0e491fe3.json | 105 - .../7d9a3955-232c-4a93-b879-bd065bab4768.json | 105 - .../12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json | 105 - .../56fa06dd-fd07-4613-9ac5-81c739cb6a64.json | 105 - .../ef628438-c2ff-4939-8bf1-09f1df25fd15.json | 105 - .../c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json | 105 - .../9ed49666-aee1-43d0-8c7c-98c178860f0c.json | 105 - .../29c3f781-f49c-4afc-bbc4-a47aebc91f71.json | 105 - .../ae8b39a7-7fca-441f-bae3-8db76879cefe.json | 105 - .../aec0af15-927b-48bd-a889-d4715aff4c42.json | 105 - .../058de011-1e80-4a6d-803f-8ba7f927cd7f.json | 105 - .../2c53181b-8681-46ad-b739-396b1ecb163c.json | 105 - .../0367a9de-960b-4c1d-8e63-8dea06197bfa.json | 105 - .../f7f557cf-4c63-444a-8c8f-515796b9b127.json | 105 - .../83ec9172-5769-4737-a766-0ca2006dd3e4.json | 105 - .../9e7ef237-2e59-429d-9784-45de952f60af.json | 105 - .../08f1ef63-efc7-449c-92cf-6f180b9d2712.json | 105 - .../60823e05-59e3-4c4c-a23e-8ef495aa39be.json | 105 - .../5bc6e404-5798-4d19-88d1-5a8153947227.json | 105 - .../5832ef9b-bd14-46ba-b04d-049280bc5267.json | 105 - .../92363115-37f2-4d2f-8178-61fc98c8f337.json | 105 - .../cee9b876-96b3-4429-af70-6a5b45747a3b.json | 105 - .../29135c1b-e6a0-428a-ba4f-459e9b652d25.json | 105 - .../377bc688-a18e-4abb-91f7-d78a934e1649.json | 105 - .../cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json | 105 - .../5db77608-f892-4ac4-93c4-03f177696484.json | 105 - .../c8de0acd-7cce-45c0-9032-2b717f3917b8.json | 105 - .../bc007572-56ff-449a-9e3d-5ab770c3ae44.json | 105 - .../69724e46-4038-4d3a-a8ff-e84a56bba9e8.json | 105 - .../c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json | 105 - .../75810fb9-99b5-4707-80a8-8974bbb0844d.json | 105 - .../68315e0a-603c-4784-a567-e342a6185c07.json | 105 - .../8a641aee-1604-4910-8164-9e6d5c0652b1.json | 105 - .../4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json | 105 - .../e1003371-d503-469d-ae41-e813d097ea43.json | 105 - .../6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json | 105 - .../958ad3b8-9b65-4165-9d3c-a49e25802fd3.json | 105 - .../36476eb7-a89a-45e1-b423-7755edfd5be1.json | 105 - .../0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json | 105 - .../4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json | 105 - .../05488c6f-dfd4-4481-a3d4-15a918b115d3.json | 105 - .../d11d7e47-f9e0-4502-9e71-0654819c3cd4.json | 105 - .../70337ca5-7810-4e52-8382-0c2568a6ab70.json | 105 - .../bda90ce2-cb80-4942-8492-28329d7f5aeb.json | 105 - .../da866c81-296f-463c-962b-6b871d6fb633.json | 105 - .../c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json | 105 - .../b88d579f-6bc7-4aee-a117-28786cba3300.json | 105 - .../0bc55439-f6a1-4588-858a-082907876d6e.json | 105 - .../e38ef3e4-585f-46de-beb4-c794d767b579.json | 105 - .../5481936f-d52a-486b-871e-d2e48c1b0278.json | 105 - .../f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json | 105 - .../260f2500-c920-4e3f-901b-10efc03f0390.json | 105 - .../d64a8825-610a-4128-8c68-55150a76ed88.json | 105 - .../c5a2a30d-99b0-4658-97f5-4c9be5576073.json | 105 - .../ec051c9b-9399-4c8d-8710-6a182a234890.json | 105 - .../1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json | 105 - .../e671d26c-1d8a-4d22-b360-dc3e449886b8.json | 105 - .../a3c07d22-20d1-4878-80d5-04b949580829.json | 105 - .../8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json | 105 - .../8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json | 105 - .../b3466ac6-df1f-4440-9d7b-7991cac7d733.json | 105 - .../f5971ede-de93-4729-8a03-b9ec3abea21e.json | 105 - .../c6ae6691-64ec-443d-8d76-af614c8cc7f9.json | 132 + .../80567722-8c6b-41b9-8103-3bdaedfdb8ee.json | 132 + .../20192dc4-ea3a-4413-8457-18a592fa0c64.json | 132 + .../8c878c05-86f7-4d61-81d7-9bb286516581.json | 132 + .../fa753be0-4a98-4ec3-9cc9-3bf7b380ad17.json | 132 + .../a7a2af83-7047-4601-bfdd-ac25abf3890d.json | 105 - .../88c03059-5add-46ea-b423-4cf8496c5763.json | 105 - .../8e84f2de-117a-4526-9d58-86a63011a07f.json | 105 - .../91c5f088-38fd-4ea7-bf95-3d6a69653cca.json | 105 - .../b90749f4-0542-42b6-a708-4e14bc586ad1.json | 105 - .../ec19309c-9bbe-4d42-894d-3638dbe5dfac.json | 105 - .../d58bf1bb-e269-4741-a9f1-be242443ad4a.json | 105 - .../07ee76dd-a928-469b-912e-cfd2e0a26ef9.json | 105 - .../bf679659-f55f-43c8-86b5-ed7805e8c3ee.json | 105 - .../ace18207-a255-447d-9aba-8afdee092164.json | 105 - .../bcbdde44-0736-4162-9faf-cd9d8e89d360.json | 105 - .../f017d759-59fe-42a3-947d-a4b787f084d7.json | 105 - .../c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json | 105 - .../3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json | 105 - .../c6080b92-d05a-4bda-ad07-e1b59a427844.json | 105 - .../49cd8aff-0c7a-4245-831a-f4fc64383b48.json | 105 - .../0516b46b-a957-413f-aadc-58f4339dc60a.json | 132 + .../97200dd7-7ed0-4a7b-ace9-31c173f017f1.json | 132 + .../758f8332-ffa8-4059-ac6f-400f9367bb23.json | 132 + .../cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json | 105 - .../b1103662-055c-471e-ace8-dd75f607491d.json | 132 + .../27b0d675-498f-4351-b92f-7c0d1a3c83bd.json | 132 + .../3f1f88d4-2908-4f28-b8d3-4f9ded18ba0e.json | 132 + .../3883b0d3-e442-42d3-adc6-ed959c902dd3.json | 132 + .../7320b12a-7511-441d-9d56-f7e713af4470.json | 105 - .../a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json | 105 - .../da172cdb-1388-42f5-97b1-ae8e15291631.json | 132 + .../7c94dbfa-4b3a-43fd-9f2c-b3d63d8ef700.json | 132 + .../e075cb71-eaae-46e0-917b-bf84482f76c9.json | 105 - .../7cdd1de0-767d-4527-a024-c67166bb8b20.json | 132 + .../d4702278-54c4-42e8-a901-dfe5c7f2004a.json | 132 + .../149f8ee5-4376-4fcc-8f87-7412a3083570.json | 132 + .../83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json | 105 - .../de82b746-c5d7-450a-bc2b-1b2859d91d6b.json | 132 + .../d2a916a6-288a-4761-a3fd-ca674edb67c1.json | 132 + .../cda497f9-c7f9-48d6-944b-0167476e5e5c.json | 132 + .../4fb7a806-1176-474e-a039-b388f050cd45.json | 105 - .../393f8623-7f38-4aaa-a460-cbdcb74c2d04.json | 105 - .../5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json | 105 - .../0444a153-1852-4a0d-959e-750c933777bd.json | 105 - .../b798f31f-5fab-4f21-8689-fe832afb873b.json | 105 - .../3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json | 105 - .../3008b476-f005-4672-a953-c86b29ba3ef2.json | 105 - .../9a3f7863-0041-4473-b3f0-ad25f0d9310f.json | 105 - .../830423e1-ec14-4477-8c82-8516bb8e954f.json | 105 - .../5cd26359-d15a-4d0b-92f1-c31101e7b993.json | 105 - .../b1f439ee-711a-41b8-b63d-dd28cb63266e.json | 105 - .../b8d954d0-a820-4927-a7f8-b0083cf9db9c.json | 105 - .../c6411eb6-8304-49e6-ac7b-5300deb27c55.json | 105 - .../c7fcd944-78ab-422d-b0ef-8dc394266473.json | 105 - .../c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json | 105 - .../21944667-04e0-46dc-9896-eef32c26fa6b.json | 105 - .../56d07a1f-1f1f-4559-b57d-bee3bf884860.json | 105 - .../062d38c7-07e6-4f71-a7a3-e40a187b6f77.json | 105 - .../c2274449-ebc7-4e53-94bf-82e1f6810f6b.json | 105 - .../07ac72af-fa7e-4fe2-8a67-e893edbbd206.json | 105 - .../189f08b4-7e58-4820-9ff7-bcea4530e3dd.json | 105 - .../e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json | 105 - .../01c4d932-bdcf-4840-83cb-e441585d70e2.json | 105 - .../b0e6d5e1-3f41-4dfc-8845-b6d028820816.json | 105 - .../66cc8076-71be-43fc-9efb-edd8ad19a6b6.json | 105 - .../01613adc-1206-4695-ae19-31f2b7ee0d9d.json | 105 - .../cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json | 105 - .../c5e7d08d-4430-43f6-a293-5381b2f13ca6.json | 105 - .../7a9d4b20-e704-4f50-a09b-ccb67d417824.json | 105 - .../34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json | 105 - .../ce2ee38f-cb48-403f-894d-f2824d00a388.json | 105 - .../4729a245-9e2d-4f65-bf14-67db4bb2590f.json | 105 - .../3bccbf0f-e578-426d-93bc-84364f7d8017.json | 105 - .../ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json | 105 - .../12f4db59-10fe-47d0-86df-343ea8978249.json | 105 - .../b0ae93c7-b251-42df-a67f-ca8b8a865937.json | 105 - .../893da954-ca56-42ab-914d-44fbc4a6f1ff.json | 105 - .../f05d6512-16ca-4f44-a31f-392f8f71da74.json | 105 - .../3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json | 105 - .../9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json | 105 - .../d0dbcd95-252f-46e0-9699-81b293cb7db5.json | 105 - .../d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json | 105 - .../d9804b0c-37db-492f-a1ba-851137e697f0.json | 105 - .../1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json | 105 - .../58269430-efba-4d04-a69e-8ef666f2afee.json | 105 - .../7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json | 105 - .../034c23f5-6c03-4cee-b6b2-7263426cf975.json | 105 - .../811cf797-62a1-4fda-960c-ee51f3e24a03.json | 105 - .../04a1b79b-a5af-420d-829b-0750341490cf.json | 105 - .../ff710b55-0a89-4582-8caa-867efb88cf98.json | 105 - .../7ef5c287-cf98-429f-80c3-d71743612a73.json | 105 - .../a65136c6-b3d7-4107-8d3a-0ce84b77965b.json | 105 - .../810fc203-f10a-49ad-8a6f-58cbd70f2205.json | 105 - .../abeddace-67d6-484a-b410-95d92819dfe5.json | 105 - .../b6fa1ae6-3df8-437d-a844-3fa022c12370.json | 105 - .../44381c62-a310-4f01-bd66-9d1434638cf4.json | 105 - .../1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json | 105 - .../51b35f7f-f6f7-44ca-9816-b3d812112131.json | 105 - .../6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json | 105 - .../b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json | 105 - .../4384c278-c869-4591-84fd-a8b2843fe42d.json | 105 - .../f1822f64-0594-4f16-98f4-29932c604187.json | 105 - .../27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json | 105 - .../3381e897-35f3-45f4-ac05-3ca47441b772.json | 105 - .../0da50308-a631-4466-b2e4-2793412b31db.json | 105 - .../95ba0175-5578-47fe-aec9-93ccf4f9f9af.json | 105 - .../5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json | 105 - .../03b30ba7-efc3-467e-bdde-c6a18437929b.json | 105 - .../a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json | 105 - .../80a9277b-5768-4da0-96c6-3289a7b8a9bc.json | 105 - .../b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json | 105 - .../5b3de7db-009e-46c9-bf34-fe5912c39b81.json | 105 - .../0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json | 105 - .../c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json | 105 - .../178418ad-2d0a-40cd-a057-105bbe69f937.json | 105 - .../012b188f-db69-4529-bfe3-db34c77e7dc0.json | 105 - .../e5582319-d8e6-4223-97bb-a64a2cc03853.json | 105 - .../d66604f0-15b3-4ac3-b0e9-083ab6906da0.json | 105 - .../5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json | 105 - .../2e86d526-de04-4339-8495-e88c5a9f3f18.json | 105 - .../99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json | 105 - .../ed825fd6-f749-449f-a1d6-c3ad7a82e354.json | 105 - .../1cb58f83-841d-474a-9c7b-adece8cab805.json | 105 - .../2043110d-2b63-4133-9c53-b39b5b7869b6.json | 105 - .../45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json | 105 - .../b83d5033-b513-4472-84c1-1b757c533137.json | 105 - .../db2dee58-3a9c-4789-800d-ed7207c6699c.json | 105 - .../b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json | 105 - .../9f84023e-a23c-4d2c-afb3-f93629f97a6f.json | 105 - .../9f8c4246-9770-4790-8db0-095e722d89e9.json | 105 - .../10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json | 105 - .../b111507d-92e8-4af1-882a-9434d6825f51.json | 105 - .../e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json | 105 - .../97b61e29-2157-4167-b5bd-94919ecdcacc.json | 105 - .../9cee29c1-b8dc-4a2c-b117-d5912b890824.json | 105 - .../7d2d135a-ab81-49fa-8c17-07f9bd54399d.json | 105 - .../7bbc4787-9899-4d90-90c6-dec88bc7dd52.json | 105 - .../729b4f81-32da-41d2-8fa4-d18553b37b83.json | 105 - .../ce626634-c5a4-422d-8b03-1a28108809ce.json | 105 - .../7b07e583-36df-47df-8439-224eca2e5761.json | 105 - .../e2f4255d-12ff-4c88-996d-bac6b51aaa33.json | 105 - .../dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json | 105 - .../80934f3c-8d0b-49be-9f42-e187b4729cff.json | 105 - .../626bfec9-65d1-4250-8d07-d9c8a008b554.json | 105 - .../f24b2adb-f12d-4dd8-984b-8ab43e15720f.json | 105 - .../66d1a6cf-41da-4226-a06c-fc99641e754a.json | 105 - .../53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json | 105 - .../b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json | 105 - .../5adde1ed-2d8f-4aa6-96f9-042df5358747.json | 105 - .../9902ef50-5208-4053-bb90-e08c98211b3f.json | 105 - .../cebdb6d6-a12c-47f6-b912-4b8e98763c48.json | 105 - .../ecfdb6a4-36d7-4252-9677-10655b3855e5.json | 105 - .../6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json | 105 - .../4d45347d-4491-4d7b-9abe-02c42974f520.json | 105 - .../83a71a32-796a-4fec-9513-2f4b5e032749.json | 105 - .../99c28dc3-f614-430a-99d7-31c2218c4d7f.json | 105 - .../b56c6c01-a226-4090-9332-330535d79e24.json | 132 + .../0ddc8e10-9cc5-48eb-b5b0-a2c2f071862b.json | 132 + .../d0cfd22e-6bad-4784-a172-76892d44f70b.json | 105 - .../2917c469-7e22-497e-8d62-9b9972266658.json | 132 + .../ef779e6f-1c12-4237-aa45-e6315ed01d92.json | 105 - .../2424d85c-e092-4e7c-bf4f-ae014d08a159.json | 132 + .../fb55e940-f03d-4d79-9363-ec17eebf9596.json | 105 - .../90278363-1d8f-47ca-a7dc-c51c6b511dc9.json | 132 + .../ddc775e5-a4cc-49bd-ace3-113f325134c0.json | 105 - .../3c3197ee-675d-4bb7-874d-28104d2a3cae.json | 132 + .../eb5a8679-bfdd-40f2-9a32-55c04a65ae7e.json | 132 + .../1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json | 105 - .../d770f88d-b110-4f27-85e9-e52217c11798.json | 132 + .../364328ce-5de7-401f-ad84-0c76e3c1dc91.json | 132 + .../3758a033-b197-403b-ab9e-7457856f3ebc.json | 105 - .../f7dcfdbb-ff12-4692-9702-712de3d0b7ba.json | 132 + .../d641aa88-9981-4a25-90d5-fcc4564ede52.json | 132 + .../15f66094-73f1-4302-adad-69522872682d.json | 105 - .../8915e742-df2e-41bc-b83f-3e111edfd257.json | 132 + .../a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json | 105 - .../e29a5e35-8677-4e53-83fd-85e919b4366a.json | 132 + .../e5c55d38-dc04-42b4-9aca-ae7be436ebe0.json | 132 + .../ee88881e-cdeb-4a55-b784-6b41b983d7aa.json | 105 - .../504baceb-6684-430d-a532-b7b5b0b061fe.json | 132 + .../2e1de889-2df9-4c81-b5ce-c00c602704b7.json | 105 - .../31fcd34a-af1e-4eab-bd9a-5ec17eb572d2.json | 132 + .../01ab0a3e-393a-497a-9b32-8af790b7581a.json | 132 + .../541967a6-b856-4dc9-958a-9335197fba99.json | 132 + .../a6032673-fee4-4c8c-97fa-167729f495d6.json | 105 - .../ee31c801-67cb-46a3-9e39-02e842c0473f.json | 132 + .../65fabe8b-05af-461e-b804-fcff3492da34.json | 132 + .../7e1a7121-2c9f-4196-bbdd-48aea257f384.json | 132 + .../dd32609c-316e-4511-8791-fcae33a1a506.json | 132 + .../0ba6add2-4495-4261-baab-224c0b6c683f.json | 105 - .../d95d7058-49eb-47d7-b790-3a253291d22b.json | 132 + .../37cbc3d6-1198-4e23-b86c-1fd979eacd9a.json | 132 + .../76d0d338-e502-4638-adad-c4c4df00c26f.json | 132 + .../d7eb4408-6857-4df1-b92b-9dd4712a4f23.json | 105 - .../b0867447-6dd9-453c-af09-da0db5651e65.json | 105 - .../f47375bd-547a-4d0b-8c96-bbe2bc1ac445.json | 132 + .../6b1ed68c-3099-4bd7-892b-cdc36c90ccfe.json | 132 + .../b18517f1-db51-43a8-812f-75aeccae508f.json | 105 - .../0e59c8ca-cde0-4482-ab03-3309bcb8737c.json | 132 + .../12efcd4e-13cc-46e5-964a-35d4be69a01e.json | 105 - .../d7e900e2-0574-44cd-a68a-0dd2715cf48c.json | 132 + .../fd626c3f-566d-4193-9a85-e7c9a89e671c.json | 132 + .../196b04ae-fd53-400f-9f08-19edd4959f6e.json | 132 + .../3ad89b65-5719-4e54-aadf-c10d3f27857a.json | 105 - .../4653087e-b528-47c1-86eb-0166538229bc.json | 105 - .../57177299-076a-4506-89a7-ce54af08df4f.json | 132 + .../d3bdf36f-7f89-4b5a-b6cb-847b49200b5b.json | 132 + .../605f3f59-204e-4332-8b4e-9da04871ca1b.json | 105 - .../92619b9e-dacf-4d0a-9f8b-6e131af74fa4.json | 132 + .../0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json | 105 - .../cbb408ea-ced6-4f47-9066-d4ff6d604b1e.json | 132 + .../05fe5948-c228-46f5-ac96-3c234bc5b3ce.json | 105 - .../6999bb02-29fd-4c59-886f-184362afa06e.json | 132 + .../913d1d8e-0b02-4ce5-9b7c-403143a8c880.json | 132 + .../d4b40160-579a-4e66-96a2-8441e5c02694.json | 105 - .../82c87bc0-29cf-4150-92f5-c80fb0028ea6.json | 132 + .../a18834ad-6143-4ce2-9842-471817a60a39.json | 132 + .../9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json | 105 - .../be900bcf-8ec9-484f-81db-0e83975c1ecd.json | 132 + .../d226ccf6-674b-44c6-8b11-d782b59a961a.json | 132 + .../d769592a-faa3-4269-abac-373679f42c62.json | 105 - .../d8839a1a-8d07-4e0b-bd44-2668c84f750c.json | 132 + .../e90b04db-2eb3-483a-ab0e-ea8aef821d84.json | 132 + .../900921ae-fbb2-4488-ab19-18987c1d008d.json | 132 + .../0da0a7cd-c075-4bc0-8e88-8acc7212e5c3.json | 132 + .../cf47622f-c921-4610-adef-bed2a4670249.json | 105 - .../b50a49cd-2909-4dbe-9c9f-c150abb99845.json | 132 + .../bbcae028-046e-4e87-b991-5d7b92c42cc2.json | 105 - .../13831d81-a9dd-43c7-bce1-240aad42fbc6.json | 132 + .../e6fe5591-f6aa-40c6-897f-f90084682109.json | 105 - .../56ea7cb3-3a1e-477a-bac8-26a0fde6297a.json | 132 + .../b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json | 105 - .../8ce19b33-4f2b-4b8d-80bd-1ed399a5e9dd.json | 132 + .../18ab167d-b72e-4fa9-94a8-09edc641c73f.json | 132 + .../5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json | 105 - .../7df237ea-29c0-4d0a-9092-c41df4c13aca.json | 132 + .../e0d9dbcc-8df2-4207-b849-2c4984340605.json | 105 - .../aacaba19-8c17-4d20-b27b-672810272ed4.json | 105 - .../e5dc8caa-2d86-4ff0-af8d-22d85c8faeb0.json | 132 + .../01591bb6-9daf-40fb-b802-0a007f4cc388.json | 132 + .../91e89f4c-d05b-476a-a8d9-0186ef8d1418.json | 105 - .../672c6991-3c7b-48c3-9e95-389175e7cd6b.json | 105 - .../f6c32abf-bbae-4827-9ce2-29ce20c9463e.json | 132 + .../049eb195-7ca8-42a7-bf2a-e072b7929958.json | 105 - .../74a6605d-3557-4458-bef5-cc9420434e68.json | 132 + .../ae71ec28-7e22-42c4-8549-4334dff8a811.json | 105 - .../dbe6e126-d35c-4634-a544-adf374ed5d00.json | 132 + .../d68681c1-01e4-4af0-9a81-e0aaed0ae865.json | 132 + .../de9620b8-7112-436f-8941-fae2c5e7f9e0.json | 132 + .../cafee7ac-deb6-4c4b-af8f-81548648cb14.json | 132 + .../3e3cb617-6f19-4731-b31a-b1f4d88237d5.json | 132 + .../5256f7b6-f830-4733-a092-01470607558d.json | 105 - .../3c2c2c14-d065-4d6c-8c98-44ba8f2ca461.json | 132 + .../7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json | 105 - .../8909f916-401b-4457-ab8f-2691696049c6.json | 132 + .../ae191508-7dad-4cac-ad4a-af95d7a15b5d.json | 132 + .../507f5047-fac3-415f-b9fa-aae4311fa837.json | 132 + .../0ee8716c-74f0-41b4-94a2-efc715150293.json | 132 + .../fcf491f4-cf57-4c95-9de1-4702ab5d54c7.json | 132 + .../4fd20259-c7c7-4da5-9013-ae2feb2175b1.json | 132 + .../a7c8c345-cade-48fd-93c0-0f344044d2b5.json | 132 + .../7a8e3986-7688-4a26-a74c-a9bb47cd3e8d.json | 132 + .../7a2ffb4d-1135-42a1-b28b-3b4e4d014979.json | 132 + .../25468720-93d7-4f10-a534-30c4976657e8.json | 132 + .../5ba1d617-9d9a-4c3b-b9cc-3224ace129b3.json | 132 + .../27b2b46f-1323-4ddd-9f65-d8fcd9cd6508.json | 132 + .../65917125-bb7c-4d64-ba5f-b5e4f67ec332.json | 132 + .../30bf22d8-b93a-4775-8073-30e14e15e35d.json | 132 + .../ff510365-a13d-4e44-9709-59a56e864991.json | 132 + .../6d1eebc4-228b-43f3-b31c-3d5b1591ae2d.json | 132 + .../09ba1be1-4b42-4eba-810f-a0aed64aafc0.json | 105 - .../f1e8cdbb-14b7-4959-a053-fb1b37629aff.json | 132 + .../4145d1a0-8d6a-4d64-8a45-a89cf343ac46.json | 132 + .../d6966190-e254-4902-8472-cac59bfbdbe0.json | 132 + .../5fdb5437-f413-451d-9800-42036cda7686.json | 132 + .../02606fe0-ca08-4102-9670-8a18a9cc6f81.json | 105 - .../347577a4-2768-4472-ba48-9b174ad89724.json | 132 + .../33af440e-837d-4454-9340-af0d3ee74f77.json | 132 + .../f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json | 105 - .../1a1f4709-8d05-4905-8105-0c3606d5ef5b.json | 132 + .../28421948-089b-4487-bb71-a06e5ce74402.json | 132 + .../3fa0c783-9226-4fc8-b3a0-6e960684f43d.json | 132 + .../743b7fe2-f998-408c-98b1-af02d9c1ee2a.json | 132 + .../0039c88b-a881-4ce0-9a0a-a10f1a8cbc70.json | 132 + .../e7337143-6ec7-4467-b6f5-907492705cc9.json | 105 - .../87c7fbd9-7648-4d0d-ac9e-8ba85860e335.json | 132 + .../6ca3ab87-c05e-46b5-879d-4fc8bf75417b.json | 132 + .../525f1b9f-88a2-459d-bb4a-7c01a0107968.json | 132 + .../503f79be-7f05-4464-ac9f-0f284f1e7965.json | 132 + .../86ec7d95-6f6d-4ca6-97d5-7a910f42a06d.json | 132 + .../894b90c6-c701-47d8-b930-4e271e28962f.json | 105 - .../d472ba79-6592-4f8a-a99c-ec3f71468d3e.json | 132 + .../3644fc16-b0fa-42d7-b17a-eb8f8332193f.json | 105 - .../6ddc052c-6bda-4d8e-ad97-20d881c8cfb7.json | 132 + .../76d1aed8-80fe-4b4f-bd81-ea0d6bf085c4.json | 132 + .../435e3ce7-479f-4624-978e-25d755dee811.json | 105 - .../d2845d6e-65dd-4448-901d-d554b3e741f3.json | 132 + .../f7dd203f-24d8-4875-878a-12ed99e20cd3.json | 132 + .../287ae246-bee5-4fae-b78f-203491aa8df2.json | 132 + .../9ee493f7-e031-4593-beae-65be17678e00.json | 132 + .../86b10c6f-41c6-4d0a-ae59-f90e204e466c.json | 132 + .../043e3533-7d5c-4d45-bcd8-0dbcc8ca4819.json | 132 + .../1b3269fb-4b16-42b6-80c0-3d54bc2b4fed.json | 132 + .../ee625c29-62c4-49da-9790-e7e67233157d.json | 132 + .../02b16bf2-62bb-401e-9726-2135d8d610be.json | 132 + .../db10c6f9-2962-46cc-aa4e-4c99c4b494d1.json | 132 + .../aa37bda0-2e0a-4361-a5b4-468154d8ac72.json | 132 + .../d9a6565c-5a0b-4893-b6e0-1fc52ec55bf5.json | 132 + .../becf9805-83a9-4137-a938-81a61a10e4f0.json | 132 + .../6e848120-bc31-4628-af05-30707a6dcc41.json | 132 + .../864af855-71b0-4b11-ae3f-56294a7d0db9.json | 132 + .../285bd390-1dd9-4db2-af45-68dea557da3c.json | 132 + .../459e2375-1a15-4129-bee0-dc8852d531e2.json | 132 + .../7b4c7d92-f581-4057-bec9-e3a8c6a5386e.json | 132 + .../5f68a07f-4442-4453-92c3-b615323da96b.json | 105 - .../7ceab841-f9a3-455b-9314-243d8fc3cd11.json | 132 + .../b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json | 105 - .../c1e2fb45-22d8-4eb4-8971-ce89c3048b9e.json | 132 + .../68cb2ca1-1648-41a2-92b7-969bccdca4ee.json | 132 + .../5f285d61-5e4b-4c5c-8960-c10313d76ae3.json | 132 + .../86a45185-8753-4cd0-818f-63a62f03423f.json | 105 - .../3af19898-8590-4aec-b324-46c7fbf596d3.json | 132 + .../5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json | 105 - .../c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json | 105 - .../e8472266-6d03-439f-bd6b-e3ac5ef2cf09.json | 132 + .../3f578b45-48f9-4022-991c-32a71706aba3.json | 132 + .../9b1f077d-5893-417c-ac87-1d0beb39b750.json | 105 - .../b4630d14-950d-4dbf-8897-74d46dd51130.json | 105 - .../ef8c22a7-3898-422e-88e2-1a8c14ab5bf2.json | 132 + .../4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json | 105 - .../81630ea2-d496-4872-92b7-e476badaf50d.json | 132 + .../9436d04a-9c81-47ad-a7b8-496e14058627.json | 132 + .../bd8025f1-66d4-4644-af1b-ca5366a32964.json | 105 - .../95281cbf-6f27-4e17-b21f-9a0604d5629b.json | 105 - .../f1e6e54e-cb97-4980-8957-2190ee5c4c34.json | 132 + .../30914dd3-c857-4aaf-b6b9-d1c7e4917e89.json | 132 + .../514b1b8c-d80a-4851-afec-e04968b2e733.json | 105 - .../1c389a32-68b3-47c0-a6b8-2c2291293002.json | 132 + .../6736897b-390a-4c19-8a04-9b606c1705b1.json | 105 - .../5b934386-a0e9-437d-bf9e-a51074415a1e.json | 105 - .../e759a217-6571-446d-9bf9-d1512793f307.json | 132 + .../753f3b21-7365-4117-b2a0-a91f03ec3d39.json | 132 + .../907047d7-1767-4009-8e04-02f5dc366355.json | 105 - .../297ef102-67c1-4e9c-b418-fed026bb1f8a.json | 132 + .../43da500e-cdc7-4b70-a0eb-6ae3371670d9.json | 105 - .../9fbf73d7-7d67-4d6c-a5b9-efc627cd1b2b.json | 132 + .../b1446577-f13f-434a-a0b4-916091395d4a.json | 132 + .../fc8946aa-8b04-482c-8c05-d026d2af07be.json | 132 + .../fabe3784-948c-4618-9cf0-c76a3ddd3820.json | 132 + .../736dcf09-6a19-4e88-a790-7a7ee74d8717.json | 132 + .../75b4c750-1570-4825-a04a-965c06861fd4.json | 132 + .../b7f8b678-2aea-4d41-ba21-2083fc472574.json | 132 + .../a8010630-58de-448c-af08-70b8ffec431b.json | 132 + .../4a0c2ce5-a4b4-4d35-b65d-bbc6e36a649b.json | 132 + .../748c7e5a-697b-4763-a43e-e3b6a6f2951b.json | 105 - .../1132251a-59c7-402e-9957-f9288864508f.json | 132 + .../e2fac049-8f9f-4b71-bcd3-5746b7d90150.json | 132 + .../d891a1e1-ad65-498f-9ee8-59523c1bfd19.json | 132 + .../9dd3103f-6c4f-4077-ac27-3a9b0f4a5882.json | 132 + .../ca031f70-5785-46d1-8a58-b279d8340776.json | 132 + .../18457711-92b8-4c27-a89a-928fecdf5724.json | 132 + .../3398aeb8-08a8-4be9-a24c-efeabcaa2139.json | 132 + .../707bc006-4318-41bc-b91b-aa43ca7cba6f.json | 132 + .../7bfda919-13be-4b68-8655-99fe6a4605a2.json | 132 + .../c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json | 105 - .../ebdb6805-f14e-4fb9-b1c8-acd258b93385.json | 105 - .../f844e739-5f0d-4db4-ba66-bd33b1290571.json | 132 + .../0cde6639-6a89-4682-bb3e-a2a24a1bc8ab.json | 132 + .../5b522625-39ed-4faa-a3f6-1cec01baf906.json | 105 - .../4207b373-ef5c-48f8-a463-814b81a44410.json | 105 - .../87652005-4404-4c45-bd4f-5f63c44adf63.json | 132 + .../2d36210e-e2ca-41a8-9434-c29168849a28.json | 105 - .../a7e0bc2d-784d-4719-ac08-d8fa0c29d178.json | 132 + .../053f6333-9722-4c3e-a5bb-246b273225de.json | 105 - .../e8ba93e6-6f90-4169-8403-381b7f9e26ab.json | 132 + .../e551e936-41fa-4fda-84e9-dec9f5694c5d.json | 105 - .../ea86b542-3d06-4e71-b49d-17cdd362b465.json | 132 + .../15615d2c-46a1-47c7-a273-697e97bdf9f2.json | 132 + .../28396f73-b949-4db0-b685-77fc5901770b.json | 105 - .../0081cd67-9178-4443-aebf-721b75c0fc77.json | 105 - .../a2b8da3f-c99e-4dba-b4a2-23739281eaf2.json | 132 + .../76f3fa3a-1629-4cdd-b457-3a108784b427.json | 132 + .../fb148468-c189-4fe5-b803-7532af8dec1d.json | 105 - .../16b4d316-db1d-4282-a5c0-b8ffe4af817c.json | 105 - .../c9e979e1-4433-4a38-8fd4-c14895e74f44.json | 132 + .../3f2effba-1ab8-476d-b228-ed9491e83adf.json | 132 + .../5981cb70-62a7-4e42-bf12-081c67c1b792.json | 105 - .../426bdea2-83f2-4915-9e82-ba4c8c8f4224.json | 105 - .../a5f0fb1b-27a7-495f-a010-3307afdb8949.json | 132 + .../22f2aa1d-fff1-430a-9c20-3b32859d9665.json | 132 + .../d2b0785d-a169-4773-a3fc-95b536fe3cc2.json | 105 - .../4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json | 105 - .../daff0e6f-d29f-4861-855f-902a0cd9a469.json | 132 + .../0f5cb926-b691-4d57-87f5-290235fd250a.json | 132 + .../5d37ba65-09f6-4762-836e-4634c06ac9f7.json | 105 - .../5009ba04-1a8d-4e91-bd32-659fe67c4d26.json | 105 - .../d9e813da-2966-4901-99f9-c7627c64fc52.json | 132 + .../4cb98a5b-3eb7-4fa8-adfd-17add38d3332.json | 132 + .../f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json | 105 - .../59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json | 105 - .../f7494fd4-d248-46a6-a46d-f9d8db560aae.json | 132 + .../4b8533d1-7770-435f-ba76-a5c658aabd8f.json | 132 + .../e94f28ff-ae6c-4109-96a2-9dbe07621e03.json | 105 - .../309c7906-0010-4f17-848f-185062d96a26.json | 132 + .../f18ab2ab-098b-4e46-8f8d-433b52cdb81b.json | 132 + .../b4a70c71-dfac-4888-937e-d5220b491b0e.json | 132 + .../b879a534-6b24-4873-a0e4-e18453540121.json | 132 + .../c67ae8f2-596b-4dab-8c4f-768b2f0608b4.json | 132 + .../7766c638-b4dc-4b2d-8c14-becdb1b709ef.json | 132 + .../dd211bef-3940-4d78-8f7b-a67da81d605b.json | 132 + .../87e20b7a-85c8-4845-94b0-ace1e18814cb.json | 132 + .../9ab01db6-3154-4c5b-b6a2-35479538d332.json | 132 + .../9d35316a-011d-4e45-ae57-317b53de621f.json | 132 + .../c9e7fec0-b244-4ca1-a117-a52fdd4671a5.json | 132 + .../ddd234e4-0665-4b36-943f-e99f0a293f50.json | 105 - .../0659cb01-0d52-42cb-9e3a-2d8cac01692e.json | 132 + .../af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json | 105 - .../98490bb1-70f0-4e7a-8fd6-698ec9fcbd5a.json | 132 + .../d7964788-36a6-4b86-add6-cd8a1a42eb7c.json | 105 - .../6e0f7e7e-8927-436e-95a7-5a7c626ca241.json | 132 + .../9c5b3f4d-6e0b-482b-b142-dd7b387cae22.json | 132 + .../04840708-a4cc-407c-8b2a-876b382920a1.json | 132 + .../83b0844c-70fe-4b63-8ed2-4147390518ee.json | 132 + .../9cf10c60-bee1-4f4f-9e03-c3c10287bded.json | 132 + .../8e92dd9e-a68c-46ef-9b03-955c06a21437.json | 132 + .../dd1139d8-2b44-4516-b24a-1219826f5482.json | 132 + .../e37e86f7-b67b-4f0a-b1bd-92f30842b303.json | 132 + .../5128233e-41be-4e26-9ec2-2b7926c66b7c.json | 105 - .../bc3b55d5-35ca-48b5-832e-8544e145b1b1.json | 132 + .../5757cd3d-c64e-4743-8200-5e610e24bf95.json | 132 + .../928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json | 105 - .../ae8cd3ad-ce7b-41f4-8e4a-f11002af2e58.json | 132 + .../bee54048-ebb2-4051-a18f-aa85b0f2ce27.json | 132 + .../2f98c85b-5a2e-467e-9626-b1bdefe7bdd7.json | 132 + .../eb608d79-545a-4cc2-8d28-e539a3af7f17.json | 105 - .../2c530a3b-888e-4a61-b97b-ea875b30ec9c.json | 132 + .../4c9fb322-735e-4644-8121-088d00f78c5f.json | 132 + .../e7e7733f-682b-4e68-8f07-85f3ba7a7ae1.json | 132 + .../e9a4e1e2-bd55-4c3d-99eb-8fafd8f6ec44.json | 132 + .../42ed92b3-63bc-4fa1-bc16-c19bfb73368f.json | 132 + .../67582e10-cebf-4938-bfca-2eb6883e2c39.json | 105 - .../915ae579-786a-4eb2-a1bb-107a12c9c40d.json | 132 + .../f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json | 105 - .../027ad81a-1271-4c25-9966-02370f6ee49d.json | 105 - .../3489ffea-a607-4f3d-a0c2-bd17147f244f.json | 132 + .../7b5ba8a8-16c3-4169-b97d-13dd5d4f8395.json | 132 + .../e557a750-53b2-4181-a19c-dfdeee11ee61.json | 105 - .../6411c44a-b2b3-4fe3-8ba4-9422a0a0b31e.json | 132 + .../fe344f84-7428-45af-940f-736275bc4d50.json | 132 + .../60956ea2-8b0b-4e4b-801a-d0689f9d46f4.json | 132 + .../1ad54bdc-419a-4dd9-9fbb-d7b7ee7038d1.json | 132 + .../bedab076-13e7-468a-b8e8-dddb57d78583.json | 105 - .../1d5c35ef-ec57-42a3-8459-6db62627c6d2.json | 105 - .../2ab375f0-2477-48a5-a5d9-0b5d0d7d0a84.json | 132 + .../e0525a52-d38c-4b2f-b59b-048b4bf71cb2.json | 132 + .../e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json | 105 - .../01bc964f-552b-4cda-9ed0-cf720f0c8de4.json | 132 + .../b32d34eb-14b5-410a-8772-041d40ca73b8.json | 105 - .../c9e95c55-978e-485b-8a77-ab2e668e3254.json | 132 + .../c71c606b-ccb7-48e9-a6c8-b72205ec6c06.json | 132 + .../ee687c56-a9b4-4205-866b-b3067c066992.json | 105 - .../ae1801cb-d112-4d1a-895d-c6743779846a.json | 132 + .../008e3601-dfc4-4bc1-bf8b-f5cef43ae098.json | 132 + .../379b315d-96fb-4edb-b2d6-3dc113a10c17.json | 132 + .../8cd36aa1-6f87-4d4d-a1bf-adc87e0a26c6.json | 132 + .../7e0f008e-4327-4ee0-a810-b5564b651233.json | 105 - .../f76ce244-29f7-44f0-9850-7291f8e4cbf1.json | 132 + .../506871f1-0c87-4e8c-a270-eed7b5da2599.json | 132 + .../5b7a80ce-0fb2-4fb8-9381-184d7a434706.json | 105 - .../1ff70031-dbe8-467a-9dbd-9fd789b9841b.json | 105 - .../c20264fd-b1f9-4e0f-9f6e-1d58f1c18cda.json | 132 + .../59f14dca-923a-41f1-b443-cc3551063f45.json | 132 + .../633a786a-fe99-4a6e-b402-888e36e8b6c9.json | 105 - .../a1ba054f-b0a1-4827-b7ea-3988aa4cf1f1.json | 132 + .../a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json | 105 - .../08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json | 105 - .../51d8f53f-ad7e-4dae-9e2a-0895729ff790.json | 132 + .../421119ea-0da8-4b26-a335-f2e720618c44.json | 132 + .../d0680660-92e5-471b-a4c9-2658e7c59dd0.json | 105 - .../8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json | 105 - .../b0e6bfb2-a8d4-4b1d-859a-aa821f646e57.json | 132 + .../4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json | 105 - .../7c4c2ccf-7d7b-4d24-802e-20c182290d07.json | 132 + .../5fdd0c8f-3393-4b59-8cc1-511c524c493a.json | 105 - .../95212a55-f382-4869-9e11-cfa201ba865b.json | 132 + .../a7da2118-063c-489f-bb31-40f1b7beeefe.json | 132 + .../e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json | 105 - .../9a75ae18-8f9a-40a5-8a7b-0c38df34e9dd.json | 132 + .../ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json | 105 - .../89f9149f-1f6d-4389-819a-d958b0ecc6b8.json | 105 - .../a85d4a1f-fbd9-4d21-9700-9e55e30c1391.json | 132 + .../2fd1c45e-209c-43da-ae85-d60887513a96.json | 132 + .../ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json | 105 - .../91e0e6aa-b933-4a02-a28d-8d69e698c60a.json | 132 + .../2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json | 105 - .../6f3f3d06-2937-4c55-9b95-a62ae5253571.json | 132 + .../9b3ffdd3-ac18-4084-9e83-1bfc61db0ec2.json | 132 + .../f5e52953-2dfc-4661-81cd-ed96d7a52482.json | 105 - .../3f6ec864-adf4-422f-85c1-19ef2417489a.json | 105 - .../60077cbd-87af-4a00-a359-9235acb011ed.json | 132 + .../577936a8-b450-4233-b633-064565b3d1a4.json | 132 + .../470b9413-2cc8-4bf4-9e7c-0b8e99929568.json | 132 + .../f4c299f0-d957-4784-8512-23f72a26a095.json | 105 - .../3cbf9c73-0dc8-402e-bc94-c6d52b9f1af7.json | 132 + .../ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json | 105 - .../3fccb1d0-5ae1-427a-adae-37004ecbacaa.json | 132 + .../4ae25fa0-54af-4f47-853f-c97cd7b312d3.json | 105 - .../6463183f-4043-4b96-b4d1-0bd41b4d6876.json | 132 + .../0b102423-1a06-4e5b-a287-710695658b63.json | 132 + .../4207b47d-711c-4af8-9c70-becb270973eb.json | 105 - .../4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json | 105 - .../b7e4ffd8-2a5a-4364-844a-a308dd7c899c.json | 132 + .../3fa2e3ef-a375-4ca5-9f85-7cb986313d53.json | 132 + .../84b63639-3343-4568-9fa7-d353ccb5b465.json | 105 - .../abd48d9d-0443-40be-a23a-68922771e14f.json | 132 + .../436ff0a4-9907-4e56-a5f2-c97f1b13f81a.json | 132 + .../8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json | 105 - .../7a654100-b206-4011-828e-fb386df27d0c.json | 132 + .../9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json | 105 - .../2f0e262c-a099-41f4-89f1-8b251708a960.json | 132 + .../d070a397-6bd5-4407-b030-aecdc31eb47c.json | 105 - .../7bf3e9ca-7d6f-4d43-b8fe-aceb8d60c7c6.json | 132 + .../a0cdb8e9-7920-41eb-864d-9995c3168277.json | 105 - .../8703dbdd-12ef-457b-8cda-f570c8f5c890.json | 132 + .../c373de55-1c2e-4cd5-a0e9-ec462f80010f.json | 105 - .../d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json | 105 - .../d77f3e8f-1eea-478e-babd-ba873d2d427c.json | 132 + .../783a4385-c802-4bb3-9a21-90629d16efc7.json | 132 + .../93164a9c-187c-45eb-94e0-12910b6ebd9d.json | 105 - .../92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json | 105 - .../bb4ff51e-ce3a-42f5-871e-3e5e8977bc42.json | 132 + .../8098c6f4-c2a4-44d9-92b5-72dfccd83395.json | 105 - .../e80d25b5-3f4b-45a7-9472-09f98db03bf0.json | 132 + .../68285cd4-9573-4fa7-af6f-321c7b4c8171.json | 105 - .../7fed0b1d-0d79-4784-8fd6-42f8611b1751.json | 132 + .../9ae53763-119d-40af-bdf2-97dd34eaf9e3.json | 105 - .../be534cd3-8245-4370-ba6c-9687b431ee8d.json | 132 + .../db2c4148-d7be-4f13-a449-095b78bda7c2.json | 105 - .../e98967b7-3aff-4baa-92eb-eff86bf09797.json | 132 + .../8736a22a-f980-4a01-953d-217f27050129.json | 132 + .../91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json | 105 - .../75a2b5c9-7c73-4bb4-8e99-af4a3a27589d.json | 132 + .../c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json | 105 - .../0e0ebdc7-a5bd-4314-9bd7-fc8a11541a4e.json | 132 + .../5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json | 105 - .../aeda694a-795c-4a42-8b40-d406b7223627.json | 105 - .../f8579305-003b-4727-b904-bad4f363a616.json | 132 + .../3103f36a-4a88-4a39-8261-0b597f8d6db4.json | 132 + .../3e26804b-13fa-4115-a000-d6be3339e7b1.json | 105 - .../d1f24979-eced-4dca-a5a1-4e4bfee28779.json | 105 - .../eda9de3b-ae53-4102-b203-eddadbc50464.json | 132 + .../171ae287-000a-491e-9ecb-ac7d29217e9e.json | 105 - .../b7a0e530-08f8-4c6a-9258-733b59096812.json | 105 - .../b7de4fa8-d97d-400f-bc3f-ecb1963a03ed.json | 132 + .../fa6ecaf9-457e-4135-ad25-4790ebc27737.json | 132 + .../ebaa99c4-ff66-421d-8ba7-dae2c5fa274c.json | 132 + .../f14d0513-676d-45e3-97c4-bf386f61b856.json | 105 - .../4d673b5a-3237-433f-9e08-f614fe10edc4.json | 105 - .../e388c707-8b35-49a4-94eb-f32e983fe33e.json | 132 + .../e19c2b24-4deb-45b4-a0a9-2d055bc90446.json | 105 - .../f6273192-31cf-4ee1-af45-c2f62de05330.json | 132 + .../105650e6-d9cf-4106-9d55-6f3c08f2f1cf.json | 132 + .../449f6b1a-5264-4c7b-82d6-60e61841b7d6.json | 105 - .../2e22170f-839d-482d-bc8a-ed345aa900af.json | 105 - .../a1d23749-40c0-4ccb-a104-bf0de63bc2bd.json | 132 + .../4e4b4cf9-48d5-4ff6-92c0-1e9d7b874b6b.json | 132 + .../75f9224b-df09-4693-8b04-c00e17785250.json | 105 - .../3c4713a3-3973-4a04-9c4a-a6782251734e.json | 132 + .../4bd52ced-e009-4805-8d0a-ce37b25f103c.json | 105 - .../14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json | 105 - .../de70c700-a007-4e87-a3db-941ee285eb1f.json | 132 + .../a1324a7f-1911-4fa9-8d83-be891f752a61.json | 132 + .../d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json | 105 - .../9c4af0df-f538-4755-8cd0-eec6b2b26524.json | 132 + .../a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json | 105 - .../dab94fc0-5bea-4875-a802-8ef793bc7fc7.json | 105 - .../fde650a6-a5d1-4edc-bd64-8be806663263.json | 132 + .../96dd1a08-b166-4d8e-ac31-5e948adf931b.json | 132 + .../e16d5502-1721-424f-a149-9a6233a2183a.json | 105 - .../3086045f-e22d-4aca-9459-fc64454a2fb2.json | 105 - .../3b90b9db-a68e-4ee9-bd4d-a18cec357753.json | 132 + .../444a6ace-77d4-4d93-b80b-ff5c7e2f6888.json | 132 + .../7e11a778-fccf-4a91-81cf-c06f1a5c77c4.json | 132 + .../e5d126d7-e0bf-43dc-95c0-184ea1d586ea.json | 132 + .../d05b129c-6b9e-4e6b-80fc-af65db620c5d.json | 132 + .../d9792fac-29c1-45b2-b649-cdebb6830e2f.json | 132 + .../fcc2f06a-e6c8-4c28-bf22-4ee582392912.json | 132 + .../c6e13327-90b3-440d-9367-dbcec54dd6cc.json | 132 + .../30b02429-350c-4d86-aded-ba8597bec4d5.json | 132 + .../7d1ee802-106e-4313-ba1d-72d5a0676c88.json | 132 + .../1b3af020-f65e-44b8-a9a2-ad60fa686427.json | 132 + .../6e40871d-bc23-4f1c-a005-f5b8eb096f84.json | 132 + .../96ae17c1-69ef-46c6-bb15-c1b576ba8131.json | 105 - .../1ab33ed2-ea3b-4c6f-a2ac-2465ddd844f4.json | 132 + .../6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json | 105 - .../0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json | 105 - .../ec601f5d-bf19-4407-ac41-6b9272d94735.json | 132 + .../87e53761-e8b7-4032-ae7a-c3a91704d115.json | 132 + .../f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json | 105 - .../59492d86-4b85-4865-84e9-84ab4ace630c.json | 132 + .../feb0d715-d1bc-4b0e-8585-a0646c07244b.json | 105 - .../cc082df2-259c-44c1-abe4-ef349056a2a9.json | 132 + .../3f069053-b24e-4242-9302-d46b82e511aa.json | 132 + .../62cd9bcb-a74c-40b9-be84-a0077235ae3c.json | 132 + .../b4cd25f1-87d5-4173-a4d3-928444f6cb37.json | 132 + .../ddd4716e-d8ae-46a1-8fb4-c27e2da40e6e.json | 132 + .../e791a3d6-928e-43c9-96ee-156901e8b18b.json | 105 - .../bf683545-a6df-4deb-9a91-ea6b8eae8be7.json | 105 - .../6b5b21c7-9284-4117-a63c-65628604e1a5.json | 105 - .../f822093a-2bdc-4284-8af2-8048d09afeb2.json | 105 - .../1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json | 105 - .../dc615b98-9255-4a6e-afe2-c79d59362520.json | 105 - .../cff09938-5918-4825-b974-194019b48165.json | 105 - .../6ebf0016-f747-4ccd-82fa-db427733b2f9.json | 105 - .../0da6366b-b997-411e-ac76-c25b061e13f8.json | 105 - .../9084d476-dee7-4447-9955-e0f066bd35ba.json | 105 - .../685fc779-4f8b-4110-82da-5a49697153a0.json | 105 - .../ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json | 105 - .../a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json | 105 - .../43c1b559-e9e8-477e-95d9-1c28ac5d265c.json | 105 - .../e28a8f11-68f6-464f-b1b8-21938cb41aa3.json | 105 - .../cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json | 105 - .../152e8d2f-8470-45b2-8318-9b6c44438978.json | 105 - .../3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json | 105 - .../15499118-2a47-4a6f-8c86-158a87a9350f.json | 105 - .../f68c55dc-0d74-4c75-ac57-62f23cce01b5.json | 105 - .../39a68088-0a01-482d-81b3-c6a84d98d0ca.json | 105 - .../435a8268-cf26-4c78-8789-758dd32759b1.json | 105 - .../a18409fa-1372-401e-8ae5-f25eaa6386d2.json | 105 - .../a661e335-7ed5-43b9-aa3b-1e027cebdb75.json | 105 - .../ba4e0ed2-201a-4007-afbe-65e8276d853c.json | 105 - .../03196258-8cc8-4c57-badf-9085ede8d658.json | 105 - .../07a71559-e618-4ba7-8721-bc6834f1c727.json | 105 - .../fd270937-c889-4a2b-aada-341a44c80d46.json | 105 - .../af890cb6-9d90-41b0-a7a1-c87f3584b93c.json | 105 - .../1e5b62a3-018b-429a-b2b4-325545ee99dc.json | 132 + .../53b78e02-9491-4f3b-a03b-7c015dde640a.json | 105 - .../51cd5c94-7c87-4758-aadc-46acf20ab4b0.json | 105 - .../958d410e-ce43-44c0-8a56-685c0a618408.json | 132 + .../57c53f20-aa32-49fd-926a-f26c9d0759d4.json | 132 + .../6b3c8f0b-25ed-4ae3-be89-a91815091de0.json | 105 - .../2305b9e7-1c2b-42d7-b306-802e32d53e0f.json | 105 - .../76def522-6fe1-458f-bfbf-99b50ece3367.json | 132 + .../c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json | 105 - .../c467bc88-6769-48ac-abd4-867ee38bbe57.json | 132 + .../2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json | 105 - .../801681eb-66f4-46e0-bb2b-7ba4b46679af.json | 132 + .../b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json | 105 - .../cdd0ea1c-b17a-4816-953c-1d7164c64114.json | 132 + .../b2060893-1f7d-4e7a-a458-3623147ac118.json | 132 + .../62126b06-5bd2-451f-a76c-7c227690f149.json | 105 - .../cf8aac35-679a-4ebb-bca8-6e0f2d42e71b.json | 132 + .../34bfe887-5a3a-4626-997e-c35d3a0ec341.json | 132 + .../9b5b23bc-44bb-4d47-91a2-18e23571743d.json | 105 - .../69a65ae3-71fe-4e33-be2d-20bc0c25969a.json | 105 - .../b81acc47-6fd5-4f89-8c70-f8f14b677e04.json | 132 + .../30b977a8-7882-49be-8621-9ee3fce270ec.json | 132 + .../d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json | 105 - .../3367fd79-713c-4691-80cd-4abb6b2818ef.json | 132 + .../d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json | 105 - .../9468fda5-a233-4d19-9a99-602e694f4a02.json | 105 - .../add899b8-f3e6-4d87-8846-8254f4dfbd5f.json | 132 + .../49f92222-f6cd-47e5-968d-10dc4345dd90.json | 105 - .../53829ec0-f233-4b61-a672-6a467823caaa.json | 132 + .../525f2e27-bd77-49e9-85db-61efddbdd186.json | 105 - .../e2b41200-bff2-4835-a0ea-27ff56937570.json | 132 + .../3d33f26d-72be-451e-bcf0-501e0bc2f1db.json | 132 + .../9924f2bd-abe5-431c-aa06-be24952ca363.json | 105 - .../3b4c05fc-2ccf-46db-8d64-045508f6614b.json | 132 + .../a108864f-40d6-492b-8440-1cbb5d87a5fe.json | 105 - .../af83a91c-3b07-48c6-9726-5bd77347f810.json | 132 + .../bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json | 105 - .../48759b07-9aea-42bd-8d73-9c4208d2789f.json | 132 + .../773c97e1-0e43-46ae-a134-8a08ca9b5094.json | 105 - .../68820679-55f4-494d-91a0-0db1bccb8983.json | 132 + .../029774ac-a63d-4acc-a37c-4194e4afdecc.json | 132 + .../036c4f96-2d08-40a1-968d-293e0b3a1ed0.json | 105 - .../146df856-e2c8-41eb-b860-ceb78c126e55.json | 132 + .../d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json | 105 - .../74c6bea7-ad16-4f08-a2b7-9c894b9ce207.json | 132 + .../beae9826-35b2-4758-a20a-10c8402daa42.json | 105 - .../69cb8c68-5847-48f0-b2bd-0756ec761837.json | 105 - .../b5e97b2d-d8a2-485a-8b0a-71590e4a376e.json | 132 + .../12a231e0-deed-4d2b-9904-79a8b543d200.json | 105 - .../e79d0a8c-caec-4dec-b119-3229ffa69a73.json | 132 + .../1dad9bda-fbc8-499b-aab0-29be59b6921d.json | 105 - .../2c760893-b52a-40a9-9420-fb193a62a5c3.json | 132 + .../cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json | 105 - .../ef9b84e0-68b0-4caa-9980-96ea5e7f440b.json | 132 + .../e45a0914-baee-4fd4-a231-3495b18db9a9.json | 105 - .../fb48aff8-3f6b-4934-9fb8-d72bf8614d6f.json | 132 + .../3e875ab6-6065-4400-8038-0fe6437f44d5.json | 105 - .../9450acd9-16b6-49a2-9b73-cf1161b96df3.json | 132 + .../0d50ec2d-5dd4-487e-80cb-9533246a9876.json | 132 + .../702f1485-2941-4e27-9c96-11cee2449df8.json | 105 - .../f6e6827d-fbf8-49cd-bdad-e8c7ea87550a.json | 132 + .../f801b633-5767-4b74-a0db-e474c9349820.json | 105 - .../02201ae1-ec65-496c-bfdb-0dec8aa5308d.json | 105 - .../c5e48fd8-0eea-46a9-8790-1745923561d3.json | 132 + .../870c7739-8886-47df-8e20-09bfae03b9c5.json | 132 + .../bc38a266-c3bd-4ecf-8149-6b26bb32803b.json | 105 - .../65d0aca2-06ae-4a09-9fb2-2bb54939a554.json | 105 - .../d8eb5fd1-f1d4-481d-85af-88a11d7b6f6f.json | 132 + .../6625b2e0-1f65-4dc5-9913-ceb0e82e6439.json | 132 + .../e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json | 105 - .../24e7df20-e046-48f7-909e-502d0c70216a.json | 132 + .../69093327-3726-469d-9750-b9fa39423310.json | 105 - .../7920f562-9e7f-4a64-85f4-584b13af44de.json | 132 + .../e3746ac6-3ee4-4d95-b800-509bed07aec3.json | 105 - .../2f2f821b-037b-4f3f-87f6-16703c0dc61a.json | 105 - .../c6620817-69fe-40e2-bb0a-1e9c739ab65d.json | 132 + .../520e2d66-4143-493b-8533-64f86c6d676e.json | 132 + .../7836190d-33df-45c2-b020-8ccec01de1f3.json | 105 - .../2ff28335-81a0-4d61-b221-a7edb877da4a.json | 105 - .../993bdfd2-3a88-4de3-9ed9-9b7b63c0f4f5.json | 132 + .../332f06db-35f1-4759-b3f8-973b1fe6fb9e.json | 105 - .../4e1be694-cc4d-4943-a8e4-74913cfb2ebe.json | 132 + .../1cc45753-aeed-4804-a6da-413437dbb940.json | 105 - .../42c174d1-6211-4438-bb9a-24f3cf386a6d.json | 132 + .../625bf39b-a118-4ec6-82d0-5405cf70ba53.json | 132 + .../fad200e0-05bb-42d7-b7f3-caba938ca09d.json | 105 - .../42a8b694-ef8f-47d2-8da3-e4db453641b3.json | 105 - .../e09cb198-d259-42ea-a356-6efe61b1e12b.json | 132 + .../5838b130-c2e6-400c-80b7-6822efb5db2c.json | 132 + .../83024ec4-e4a4-4dd3-adf4-654c90c3a271.json | 105 - .../52b51638-64cd-4b19-8fc7-c223d50bc549.json | 132 + .../8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json | 105 - .../28b3178b-c963-4267-9649-3f7fc10fba3c.json | 132 + .../2ed76213-e562-4b36-bf46-93f09df88ee9.json | 105 - .../748298a2-5042-4636-ac7e-051c28916f3a.json | 132 + .../d8fefd3b-78e6-472e-854c-15f40ace7878.json | 105 - .../03bcd4e6-1620-424a-9200-c0cf4b73bbd2.json | 132 + .../828bcb36-3902-4157-9323-a5dcf592a795.json | 105 - .../c7fba530-63cc-4ece-a171-4a2919aa8057.json | 132 + .../cd34091b-2639-476c-8419-e6c327cfabc7.json | 105 - .../c25c1046-a8d5-4f4b-9a72-c4591cfb4023.json | 132 + .../c3800a5c-310b-41cb-9b07-cfc1f1b13256.json | 132 + .../f98bc033-55c9-45c1-a101-3881507bb733.json | 105 - .../e8e2b99f-cf83-4776-9117-aa2b5d9c8068.json | 132 + .../2da19e45-117f-446b-b956-b35a20bb7411.json | 132 + .../72eccc9b-df63-4b2f-8975-a1c89940802c.json | 105 - .../9e982a33-19cb-4381-8560-884bc8946a2b.json | 132 + .../e30fead2-6516-480f-abd8-6ad0713cb053.json | 105 - .../9130a862-cfd7-47ce-a92a-f60438739491.json | 132 + .../858d3717-fcb2-45d9-8eaa-1b00ae0ca918.json | 132 + .../9c801b4e-228b-42a8-a7f7-ea2bf125d716.json | 105 - .../342c7c0f-92f0-4296-8e0a-519724133bb5.json | 105 - .../5f1f137b-cb2f-4ee6-8bc9-5e0b94939f35.json | 132 + .../6feca911-7a6e-43a2-b59d-7cb48070fe8e.json | 132 + .../8afa4f43-96fb-46b1-84e8-bf98928aa484.json | 105 - .../71d5525f-c257-4b88-b84d-d75b3a8328fc.json | 105 - .../d3ad9813-273e-47de-be16-312cc67ac64f.json | 132 + .../317205ee-2cc6-4523-9662-be6508314b08.json | 132 + .../6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json | 105 - .../3b5fe65a-50a1-4036-b81a-86117356cab9.json | 132 + .../ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json | 105 - .../812ac262-97f4-485e-93de-f8d420b8658e.json | 132 + .../985ac874-e7eb-4431-81c2-a79f3865c696.json | 105 - .../39cd7eb0-781e-47b6-8eaa-c72e702f778f.json | 132 + .../cc9fb769-3d0b-4e53-9942-d4f99203a629.json | 105 - .../596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json | 105 - .../9411a8a4-306e-43da-96d7-c93eb3aac398.json | 132 + .../314cfcd7-674a-49d2-adf5-6d45c30e2382.json | 105 - .../c93feb32-0526-44ac-b3ed-95f08c37cc9f.json | 132 + .../1a3b0f7a-afb6-4002-9321-23a86f000c5c.json | 132 + .../494df3f9-7ce9-4f81-99c4-e6100d6e4187.json | 105 - .../2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json | 105 - .../8d29363d-3096-4c54-a40e-acf4a7318a04.json | 132 + .../8cea452d-63b8-4e82-9511-64c94f8e140d.json | 132 + .../95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json | 105 - .../5e5b5424-1d48-4a5e-8775-52c75609c338.json | 132 + .../6ae028c9-19d9-447b-93c1-c4548aef84f9.json | 105 - .../73787033-ed1d-4d2e-b7b2-e886ef6f1036.json | 132 + .../81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json | 105 - .../54c9403f-2525-45c0-a585-9ff598f95f6b.json | 132 + .../5f35c42b-2d34-42bc-b94e-127a678cad2c.json | 105 - .../77d0d88d-7ca8-4f3e-8b79-295f53140635.json | 132 + .../e8602fbb-422c-464e-87f4-79c6e1a4afcf.json | 105 - .../28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json | 105 - .../727f27e3-2a3f-4572-8db5-87e498c4b6ca.json | 132 + .../8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json | 105 - .../b6e0cc97-27cf-4082-a908-95d5c39014b8.json | 132 + .../1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json | 105 - .../3b77ec51-fd47-4bc7-9e96-ed46202fef7c.json | 132 + .../041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json | 105 - .../b24cdd3f-3e44-4ebe-b2b4-209ee0bbfbd3.json | 132 + .../bc221748-c03b-4fee-9147-8f63b0017f0c.json | 105 - .../e47a3cab-dfef-47f6-9377-9ee32489bab6.json | 132 + .../1e4481fe-458b-4c23-8a6c-55439fb8b4fd.json | 132 + .../e93eff52-c6e1-474e-8089-f672000fe1e5.json | 105 - .../5a5746dd-0270-4151-b774-8eaa6860d5e0.json | 105 - .../6421e9dc-e7ca-4e1c-9f4f-1d1ac409c4d1.json | 132 + .../55f43b53-6ed9-4c16-bf75-c968999a6f36.json | 132 + .../aaa9cd01-cca9-489c-91e0-79ff026eb258.json | 105 - .../5e499da1-f8c1-4830-828c-7d4013ea0243.json | 105 - .../6ce93e70-04b1-46b8-b3e3-7eb0df35e1c1.json | 132 + .../95096a89-2baf-4b14-bc6e-1f30e920c086.json | 132 + .../fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json | 105 - .../4c2ab1ed-8177-4518-ae3d-754f9711369d.json | 105 - .../f1651632-2787-47cf-b471-89d1b89a6b01.json | 132 + .../133d7669-db7f-47b6-b838-51b9577a9e68.json | 105 - .../e1fb2ac9-8f60-4dc1-9e0d-99fcb91a53a9.json | 132 + .../b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json | 105 - .../d3accbc1-d698-4357-ab08-0b98fb49b4ed.json | 132 + .../5388a25a-5780-4ae1-999f-172b558a7b52.json | 132 + .../a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json | 105 - .../034fa9fa-4103-428d-a50e-b117ef5e0726.json | 105 - .../9e4143ff-d461-4fdb-8bc7-86f959f69e68.json | 132 + .../5d843bd7-b34b-41d4-92ff-c25a709b4930.json | 132 + .../fbbd671a-3005-448a-bc15-718ba23bcf72.json | 105 - .../2c28dcd3-af20-41ab-9234-a8296ecc98c0.json | 105 - .../87975b2f-298b-4297-8f4d-e5bb1bf5d113.json | 132 + .../41bb8174-f3d6-4862-b892-dbc9f6e2e696.json | 132 + .../9bed5ccb-35c0-40e1-89b8-617656787052.json | 105 - .../259a0166-2ee3-409a-85ce-963d90d05ae7.json | 105 - .../683ad2cd-5e39-4088-b98b-94d89dda7b88.json | 132 + .../08ffd7ab-ccca-4258-be6d-cbc151cc43aa.json | 132 + .../0df26c01-7fae-4254-8e97-e03c6078d861.json | 105 - .../4b6efad4-c697-4f0a-8d24-75dc49d8ec06.json | 132 + .../fed6b773-040e-409b-884e-a97a1abfedc0.json | 105 - .../4986c30a-85b0-4263-9be4-d69c9b067e0c.json | 132 + .../ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json | 105 - .../36735132-1510-42cf-a68a-c46507f52edb.json | 105 - .../47b5a878-1a4a-425f-ae6f-ac286f681cca.json | 132 + .../992a6862-46b9-415e-858f-2eff8709ca81.json | 132 + .../9989efbb-bd01-4c7c-bf30-67fa81698906.json | 105 - .../0a5ce684-675e-4fbe-b141-df12903228f9.json | 105 - .../c6391381-c973-4068-b72c-af08762d9e5c.json | 132 + .../0f6e18e6-1b0f-43f4-a9af-6632f6ce63cc.json | 132 + .../8b5493df-86fd-495a-8dce-9c5398795fc9.json | 105 - .../56d9ee92-6774-4c9b-9861-c5f0a9945e7c.json | 132 + .../a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json | 105 - .../d3e753cc-37fc-4d77-8b2d-da90a7843d60.json | 132 + .../eb08ef6f-6631-47c4-8f52-bf9454ad34b6.json | 132 + .../2207b154-c5d4-4e5a-ade0-271e62d6345f.json | 132 + .../5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json | 105 - .../f4161154-7777-4261-9275-a3002a1305d8.json | 132 + .../7f1c6c88-823f-4597-9794-bf05c076d4d3.json | 105 - .../8523812d-1db6-4a9d-b06b-ac904191789d.json | 132 + .../4e78f82e-aa31-414c-9c59-9c8e318fff17.json | 105 - .../6cd9ea81-618d-444e-a892-d4f9819daa67.json | 132 + .../2217326d-377a-4503-8180-206c12c87436.json | 132 + .../715be726-e0e3-4589-91cf-85e41dbcbf8a.json | 105 - .../3bbb10fc-e3b9-4c6a-ac35-ee5de9ecd330.json | 132 + .../c4041b70-acce-4088-a3b9-299d4424e240.json | 105 - .../01124f11-b739-422b-97f7-062074b8d0fb.json | 132 + .../a7b425bc-9160-44ed-abf1-18c3b84cede2.json | 105 - .../0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json | 105 - .../7cc4c93b-7c43-4bed-84a3-fa1cd9130abb.json | 132 + .../74d10ea5-3d08-4bb2-9246-5e053eb20fea.json | 105 - .../bf3aa551-f9c6-4203-b2d4-55cf9e6e2872.json | 132 + .../2eae8905-5338-4a78-86e7-d354d06efa23.json | 132 + .../487e1883-01c6-4714-9447-67837c78655b.json | 105 - .../79bccc27-27a0-4194-9c46-5e89b0f21b9e.json | 105 - .../9dcc4121-e046-49c7-969e-7255b0c32d3d.json | 132 + .../6eeb591b-aed2-4cdd-85bb-75011c9c5760.json | 105 - .../dd7d4acd-549a-467b-b461-0eba5b019122.json | 132 + .../159969cc-32c5-4f6f-b586-8e6d44180b44.json | 132 + .../689d38cd-898e-43ec-92e8-238cefac6776.json | 105 - .../2edb276e-86c5-4bde-a696-4f68fb659b4e.json | 105 - .../b80e559d-e519-4678-8abc-ee5591b81fac.json | 132 + .../380cd349-5309-40b8-b549-ac6d6d42331a.json | 105 - .../90c137c9-939d-4e77-9fcc-9e33551a6121.json | 132 + .../6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json | 105 - .../f25d6fef-d337-4cf7-ba05-ca6ff5eccd52.json | 132 + .../9c414577-7f2d-487a-9f2b-7675e0532ac1.json | 105 - .../c6f92306-dcdc-4549-bfc2-feb62a3a6ef6.json | 132 + .../96c64d23-d23d-486c-83a4-4c0ab4f09d60.json | 132 + .../adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json | 105 - .../243abf0b-0f88-4b4f-ab51-6c8aebaf19be.json | 132 + .../785e4cde-ec97-4e36-8ee3-3fb4c2543901.json | 105 - .../438fb728-d6ad-4c28-a43c-ff82d522cd50.json | 132 + .../7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json | 105 - .../120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json | 105 - .../94b45b8d-b754-4fb4-843d-b7ffeafc4f1b.json | 132 + .../5618fc82-d455-4261-8e34-1190d70fd3f3.json | 132 + .../343e0d36-5470-4865-aeeb-a9963b38f90a.json | 105 - .../395f6339-3fca-4f4d-befc-2d231008efdd.json | 132 + .../10d2454a-ae69-43b6-962a-77102645ed56.json | 105 - .../b22696ac-7074-44f2-b72f-c59ca0a41ce6.json | 132 + .../6856f8b6-a719-4f69-be71-4df582015f28.json | 132 + .../6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json | 105 - .../9b63b3ad-568f-4f15-9cc6-36049ac89727.json | 105 - .../f2c0ea2b-76ae-4469-832e-84c0b79fa283.json | 132 + .../5619e3cb-eb3e-4420-a156-6f7b2a5d372d.json | 132 + .../d721cfe0-eb01-42fe-955a-bfd219c38917.json | 105 - .../9d5e329f-491a-4608-bcac-1ee63046b34a.json | 132 + .../dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json | 105 - .../80953f08-6530-4bab-a375-cc542081aabb.json | 132 + .../ccce28fd-d3ae-427c-b848-f08b2cf85692.json | 105 - .../0b8691a8-f394-4da3-a67b-faa1af9b42c9.json | 132 + .../9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json | 105 - .../26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json | 105 - .../fb541a2b-d9bd-4aa2-8b83-da62a3b77731.json | 132 + .../b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json | 105 - .../c20d1c62-d3e0-4e30-b0d3-4c62a6585d23.json | 132 + .../7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json | 105 - .../8a10eeb6-7178-4c78-8940-68fad78e389b.json | 132 + .../5c534761-19b5-4111-b1f5-c2fc3e121b24.json | 105 - .../f0bb774c-a842-4261-b817-b169ce65a493.json | 132 + .../59afe234-3a7f-49bb-873c-df6cf793e5e5.json | 132 + .../8a89468f-fe2f-4bc9-be99-c9619c605efc.json | 105 - .../4074081a-66a6-42e4-994f-72541f90888b.json | 132 + .../f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json | 105 - .../6a618ec8-c029-49ec-9ea5-da52b5231280.json | 132 + .../edc8f510-c961-4c1f-9757-e80c4247f275.json | 132 + .../aaa5d1e6-5aca-4471-87ea-7195610a6c1d.json | 132 + .../e8596a17-9e5d-4ac5-9968-44d302628c31.json | 105 - .../89b45e8b-9979-4c7f-8aa6-c6ab7009cab0.json | 132 + .../d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json | 105 - .../21b53896-3b7b-470a-a49f-4b2cb4e6adef.json | 105 - .../41000c74-8b29-4369-996f-cf3a2fd09f63.json | 132 + .../a1765846-74e1-440a-8851-12a571444059.json | 132 + .../cdc5671a-e164-43b9-864c-808a9464e618.json | 105 - .../37276848-95fe-4403-896d-bf9fafbff04e.json | 105 - .../9c6b594f-387a-42a3-9e40-3b26363e6071.json | 132 + .../2b910401-457a-45dd-920a-559f4595897b.json | 132 + .../90b7be49-53a0-4d7f-8995-cbc52fe3a70f.json | 132 + .../5e8854ba-7147-4fdd-a568-1ea58e79e7d8.json | 132 + .../df6e0cfb-d720-428a-a5ad-b1529faa07c0.json | 132 + .../a88a6e6f-2253-4b67-9527-55ab6153e40f.json | 132 + .../00c66a37-b46b-47e8-a098-ce12433c1135.json | 132 + .../6ad5483c-13dc-4e79-a719-66af383d195a.json | 132 + .../df470b21-0d55-4d28-af25-75908799a0cc.json | 105 - .../9fa6813a-7acb-4c08-9912-6dc0d356a7e2.json | 132 + .../d22507ab-2601-4bf0-a8d8-b456102c85af.json | 105 - .../07236482-8709-4aa8-8e63-762b2f591b2a.json | 105 - .../3880e3bf-6ff0-4eef-a519-2649014254e1.json | 132 + .../e77efb9d-b1fc-4833-8e7f-8da683019018.json | 132 + .../2bcc02df-8d27-412a-8b58-c331df98e4d4.json | 132 + .../f50a6538-057e-4e57-af79-ba3a5b7121cb.json | 105 - .../622531d5-03f8-42cf-974e-94291aa1e515.json | 132 + .../f71c4189-288e-4c6d-978c-d793ca57fedf.json | 105 - .../b772f20f-afbd-496c-9f94-e5fd30d54466.json | 132 + .../c6dd1b78-b487-4197-8a66-c364487ff6fb.json | 105 - .../002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json | 105 - .../169d5ad3-ae4a-42de-b951-f264d85bf623.json | 132 + .../ac65fabb-07d5-457d-844e-19aecf2b18e0.json | 105 - .../e84c3b50-4ea9-4f41-be11-50c6aa3d4656.json | 132 + .../594780dc-d969-4a6b-b90b-1cc32f40c452.json | 132 + .../8a0828ef-56a0-4c2b-bc61-f955c56b7700.json | 105 - .../4ff7c238-d69c-4b92-83d0-69cacdfa0fe6.json | 132 + .../94536d01-2de8-4305-83aa-2673a226ab64.json | 105 - .../bb576dc9-eede-48d6-b438-732da91a4d29.json | 132 + .../e2ac8e52-8326-496a-b904-ca0e48190b3b.json | 105 - .../0fb2fe17-b55d-4802-ad48-bd4d711e1e0f.json | 132 + .../8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json | 105 - .../03d59002-dc98-467f-b2a9-605ef8d9b763.json | 132 + .../972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json | 105 - .../5cc40900-fe74-469a-99c0-74e998b0e316.json | 105 - .../8a7034fd-7027-4a87-9cac-c95b745935d0.json | 132 + .../717f745f-1eae-4277-8a31-dbed140ef3e8.json | 132 + .../2dc78735-c0c3-4dd7-8e97-52c92785e623.json | 132 + .../e9ab98ff-5cf0-4437-9cf3-c77ecb546c84.json | 132 + .../6303d73e-4129-472a-a6fd-c64cb3de7204.json | 132 + .../8a689e8f-19cc-45b7-80be-ce861a549af7.json | 132 + .../84881315-55a4-4f05-a115-cf82f850090d.json | 132 + .../970dc71c-42be-4d50-86ac-f7301ec969ca.json | 132 + .../c02e1fcf-a837-4b8a-a42d-63837c56128d.json | 132 + .../37280340-5b9a-47d9-aa37-9299d9025518.json | 132 + .../79832ae5-0a80-4e46-8175-4baa240dc4d9.json | 105 - .../46e7ad9b-b774-46b9-933c-913d1b307f7a.json | 132 + .../94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json | 105 - .../80407172-765a-4aa9-b189-a322150b1a7b.json | 105 - .../c154d3f5-39dc-43c0-85ea-2e43b08494b4.json | 132 + .../71114773-e285-4666-ae7f-5fd7c9084104.json | 105 - .../abd830e4-2b7f-4895-8262-75926edbafd9.json | 132 + .../2c945021-72e3-4e7a-9c6f-81efb27b2206.json | 132 + .../87fc8696-17f1-4a86-8d0d-f5b124144384.json | 105 - .../23f056f6-67dd-41fd-b1af-a1cf9abf784c.json | 105 - .../5f0ea694-7f73-45fa-b54f-49fc06d1a6d9.json | 132 + .../34aec318-6db4-4df6-9d6a-ad15e353f36a.json | 105 - .../6c73f6ae-8ffd-4948-8071-33eab07437a6.json | 132 + .../8c05d496-c21f-4a70-b312-1c1ba37d877a.json | 105 - .../fbf71df3-b9c3-4f9c-b538-e4ccf097e81c.json | 132 + .../e3dcfd94-ca04-4cd3-ada5-e701a8b776da.json | 132 + .../e703fed7-cf06-4caa-b78f-3e398b437671.json | 105 - .../8dd67de7-0d3b-4359-b390-d90c609dea5a.json | 105 - .../9278bcf2-bfab-437f-bd64-7496b24fb8cf.json | 132 + .../633aa068-5613-41d8-a194-aebc9ce1586f.json | 132 + .../a723f173-af0e-4172-a43c-278ccbacac18.json | 105 - .../d3c1a922-a453-4c7b-b33b-52934e7bf72b.json | 132 + .../3a27b2a6-5eea-450b-91c7-1dc006229985.json | 132 + .../b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json | 105 - .../395e37ae-005d-47c0-9cf5-919460e34350.json | 132 + .../b03b7c7a-f263-4712-bcf4-2e32ca4bd237.json | 132 + .../452ab810-6921-4922-9446-f2a5c081dc61.json | 132 + .../8ce4dea8-d674-4b95-b025-0c6ab60f6544.json | 105 - .../1abba5a0-f1a3-4f39-a81c-f4cd641d33ac.json | 132 + .../3b12518e-ef16-4a72-89bb-071802ca636c.json | 105 - .../0d354980-9f24-4b79-afb7-a7e6f52e8131.json | 105 - .../b2eefd3a-795c-4dc0-a10e-924bece05ea5.json | 132 + .../008cc919-f156-4a2e-af4b-eed015ca91f6.json | 132 + .../5ce1b22c-7daa-4714-a774-d7d509fa869f.json | 105 - .../6a1519e9-062b-454f-97cb-e57454f74e9a.json | 105 - .../9d56082f-5e46-4d7a-8f06-cb44fc983b3f.json | 132 + .../79a8057c-0791-42d6-adef-924a9cff0917.json | 105 - .../7ea26e73-a501-40bf-8f01-81ab8e850a91.json | 132 + .../41acaa59-3232-4c6c-be64-0acb38019405.json | 105 - .../e3343130-cf4f-4e5c-b2d3-5dda13d575b9.json | 132 + .../ba1965f8-b59f-4d71-920c-e3b401ca0534.json | 132 + .../e0eaf433-d842-47c2-b47f-9e0ddd95df72.json | 105 - .../0faf87d0-2b35-4256-acd9-4fe57f574d06.json | 105 - .../6dc87410-a39e-41b1-8759-68c1556c8419.json | 132 + .../01448351-5f76-4329-9bfd-4124e29de920.json | 105 - .../c4ebe788-fb60-453b-914b-56bf87dd6374.json | 132 + .../45a44cc8-a550-4d2f-b0f4-37b4aac6a2b5.json | 132 + .../ee5ad026-8df4-41c0-9158-3759d4a3ef02.json | 105 - .../10593c13-3b30-4605-8063-c6a6526fc9d9.json | 132 + .../7d8850c3-61b2-41c3-a01b-8e23511558f6.json | 105 - .../00f8547d-4bb9-4510-a29c-c37376c274c8.json | 105 - .../12b8f4d7-2ae8-492c-8756-f7cb21a58c76.json | 132 + .../96d9b675-c299-4138-a381-fb4de36287e5.json | 132 + .../f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json | 105 - .../17fffa9b-8ed4-44c7-87ea-7ee2c1f28e6a.json | 132 + .../fa0776bd-e95e-4d54-9004-82dff09307b8.json | 105 - .../06bc6426-310b-40ac-bbeb-0460215b8981.json | 105 - .../8999a5f3-f421-4663-835e-7626cebd2282.json | 132 + .../1f235238-05e0-4c76-b136-0bf0cf470ba2.json | 105 - .../951e1a4f-ed6c-49ca-b648-6086989e333f.json | 132 + .../17167e2a-1f42-4ea9-a947-8749259738a8.json | 105 - .../2acc0666-e0ff-4760-a74a-227a02775344.json | 132 + .../1ca04810-a377-4390-944a-1a4ec91a7962.json | 105 - .../3196c71d-0e0a-4d29-8bca-c31ba3d99dfd.json | 132 + .../4d801ab4-0c2d-445a-beb6-4de824618e75.json | 105 - .../e858aa6c-c424-447e-b512-7dcf794f9f0f.json | 132 + .../8773eac5-205e-4264-981b-58f1a25f872a.json | 132 + .../cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json | 105 - .../c26ae286-a9b8-499f-b886-4b75be0cf2da.json | 132 + .../ff5bb366-3692-441c-8e8f-8c23c5143aae.json | 105 - .../14b789c6-8b7f-4292-8ced-279e7ee856a5.json | 105 - .../d3a61998-2d41-4349-bd15-ce29143cc910.json | 132 + .../56b66428-2751-4c62-b98c-6c60e58c45ca.json | 132 + .../5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json | 105 - .../735058a7-c22e-42a7-94f5-d7e2459848b3.json | 105 - .../9b2ec4af-4a7c-4cf7-8b7d-79b6cc219880.json | 132 + .../00637ba6-99e5-4940-94ab-a620ff248ca1.json | 105 - .../5855a920-428f-4699-becc-73d4422f706f.json | 132 + .../f1004f08-7f46-4eb1-8f60-66893fca7180.json | 132 + .../97db158a-3035-45d3-8d92-a08c9e605493.json | 132 + .../0d81b928-2a24-4eb4-93d5-224e3c505532.json | 132 + .../bf4cc7ee-cad4-42af-8638-6b371577ec68.json | 132 + .../5b574dda-0d85-47aa-9ebc-7f8581d402ca.json | 132 + .../6043830f-8a9d-4a03-9de5-4805724a9ae8.json | 132 + .../90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json | 105 - .../fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json | 105 - .../7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json | 105 - .../9d5fdb25-0d6a-4d5c-bcfb-0903504e620a.json | 132 + .../217819b0-2c4b-4c26-823b-1ea14f893e01.json | 132 + .../c1a0b34a-d3b5-42b9-b779-b31b9678faed.json | 105 - .../0f844855-fb46-4b53-82c2-f36e5721c385.json | 132 + .../46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json | 105 - .../59aaa7ed-27d4-4765-b115-90570ad86c77.json | 132 + .../5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json | 105 - .../4478c5ff-3b51-4be2-abce-3fb6a951b6e7.json | 132 + .../655920b7-5687-4555-8890-ab1d08f3f00d.json | 105 - .../52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json | 105 - .../9202146d-5889-49fd-9025-e03153ba9093.json | 132 + .../94257d3e-2b1e-47a1-bbd1-7fc696a574b3.json | 132 + .../c45cc504-88b0-4110-9650-47f4d328f769.json | 105 - .../2245cf71-fb8d-44ca-b58d-06608312ee8c.json | 132 + .../302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json | 105 - .../123331fd-a4fb-4dc6-a30e-17f230618df9.json | 105 - .../9a823fde-7802-4876-b72c-d8f73cd17236.json | 132 + .../cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json | 105 - .../ede99239-ef8f-49eb-a48b-0ec2553c99e5.json | 132 + .../4a307570-994f-491c-87a7-ad90b7965b8b.json | 132 + .../bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json | 105 - .../eb448d78-6417-4533-8458-99c1869a74ae.json | 132 + .../f524ebb6-64cb-43e3-8cff-6305ef122890.json | 105 - .../dd44686d-13da-4c88-81d3-6d01676baa4e.json | 105 - .../e1b8e4ad-4327-46b9-b957-fbd02e57c87e.json | 132 + .../4092651d-1d14-408d-922d-6189858aab36.json | 105 - .../aab6b224-b948-4fb1-84b7-0dbe5c46d527.json | 132 + .../2e5cd1de-6109-4f76-b722-abbd4b207f4d.json | 132 + .../701a4aa4-b057-42d8-8b89-dd59950d1981.json | 105 - .../767d1296-4971-478f-8d78-1d63d162ae5b.json | 132 + .../9f586b02-3514-46f7-b1df-4e78f286893e.json | 105 - .../eab74e3b-de61-4fa9-87c2-56e69b70349a.json | 132 + .../3219d563-3bfb-4618-8cb3-e9b198d5b11f.json | 132 + .../40662202-f976-4dc0-acf2-f4794bb5d744.json | 105 - .../011f32a0-458f-4bea-8192-b18a19ddd0c7.json | 105 - .../233fd27c-561e-4c9e-a917-cbc5b08c055a.json | 132 + .../a875e8f7-a4e6-4c17-abbc-b8d4b73b7501.json | 132 + .../aab84d55-c491-402c-9ed0-59347573fea9.json | 105 - .../4b68ba49-6681-4add-9197-2cd711701e15.json | 132 + .../c27064c4-93d1-41a1-a61f-cde7a991b047.json | 105 - .../37822fb0-4ada-4413-aa77-6938678994d9.json | 105 - .../5679ca73-3d5f-4bc7-bea2-5e9e713db0cc.json | 132 + .../a6c631f6-890c-4199-abee-18b012bc48df.json | 132 + .../133866e4-6e3a-4d88-95f3-d7e1bd414988.json | 105 - .../1edc3610-40fc-467d-8410-26d4b6adebce.json | 132 + .../42c773ba-8fb4-4b3c-8ac7-0688519bb55c.json | 132 + .../cb8d28e5-d423-4a62-8b73-7542fb990d8e.json | 105 - .../1a371df5-447f-4fd8-8fe8-dbf9a1dc079a.json | 132 + .../e4d90e2b-f510-4941-8e10-be027693c3d4.json | 105 - .../821a21a0-6fd7-438a-933d-5e31b2dd2adc.json | 132 + .../f56f3dda-a774-45d7-b949-b5e04174a413.json | 105 - .../556eef3e-7c58-446d-acc5-26af0413d2bc.json | 105 - .../781a4cc6-a69d-4106-81aa-06e114f7c897.json | 132 + .../86234365-2d3e-4d49-96e8-8f034990c902.json | 105 - .../e49c98b4-46f4-406e-9eeb-7072bf72b9a3.json | 132 + .../3b7524a8-d17b-4788-93f2-11076df464a7.json | 132 + .../6188a57f-4bc3-42a5-ad18-c59774e40407.json | 132 + .../28689805-7c4c-438e-8431-f4a6aceb5e94.json | 132 + .../7c156689-9668-4ded-bacc-c88a03ad1526.json | 132 + .../7e43f187-1959-4dfe-802f-094ba88f3b0d.json | 132 + .../a6170173-ef17-4cfa-a76e-8e51cb8cb970.json | 132 + .../e998d52b-dd94-4ef2-9cfc-5034ded0105a.json | 132 + .../a3ac60bd-8fb3-47d9-b378-1f0c4d74fed2.json | 132 + .../0f69217c-74ed-4398-8d1b-53d1a43be890.json | 132 + .../60e8f886-62fa-444a-8193-273905cbd4e8.json | 105 - .../b973adcc-769c-4009-87c5-5f5af02a5d3a.json | 132 + .../16052a72-b235-47df-ac4c-fe54e49b9131.json | 105 - .../4b30f11e-a2b9-40e9-b080-9d7484a5d048.json | 132 + .../befdae09-4caa-4996-a3ac-fe36310aaf01.json | 132 + .../65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json | 105 - .../8cd7fc1b-2873-4154-9de7-c0b8e5f4f5e9.json | 132 + .../611c449e-3d86-4dea-94a8-a2b7719fa1ae.json | 105 - .../7f6e5858-f5d4-41cf-9bb7-c3c82a55c392.json | 132 + .../7b8bf84f-4101-41a1-b6ff-9cadbb5f84a3.json | 132 + .../1f3a733d-a6d3-453b-9763-61992cd514b0.json | 132 + .../d0eed3c1-2226-48c5-a314-e429f66c5053.json | 132 + .../957f02f1-45c7-4cce-b5aa-86bb5e485ad3.json | 132 + .../a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json | 105 - .../55a01e8e-318a-4609-a862-bab4d62b3e7a.json | 132 + .../cbdcd76f-be8f-42fe-89ed-d1d09d9d785f.json | 132 + .../c7b6515e-6f96-468b-8bc0-15212c31e790.json | 132 + .../f27f3a1d-c19a-42b2-8b49-64ecfe5d3405.json | 132 + .../994aa481-627a-4bed-8719-9e874373cbc6.json | 132 + .../9f5cd849-20b1-4e8d-9deb-f286dcfd9d6e.json | 132 + .../c4dd34f2-7acc-4a94-a9aa-3c6aeeae8a8c.json | 132 + .../e908b473-a015-4156-8e88-d67153479cb9.json | 132 + .../173af77d-7a51-4d5a-8fd3-366aaa5d78a0.json | 132 + .../0bb65f09-323d-485f-886e-5a35c8bcd342.json | 132 + .../86b4c877-ef2d-4563-93a2-92d7e77eab5c.json | 132 + .../be2ee3f6-37ee-4895-821a-3d3c7eb04eac.json | 132 + .../caf5de06-ab13-45e4-ac51-d4e40796952e.json | 105 - .../e574af17-dd3b-4c09-8689-ea598d44e562.json | 132 + .../83958185-047a-4356-918d-2f45f273c08a.json | 132 + .../929abd2b-3f19-4df3-81ab-406751d52919.json | 105 - .../d04c6e84-0b63-4de1-9278-aa37c9d2c8e3.json | 132 + .../14421b7b-6f4d-4b4f-91e1-27a9c0919498.json | 105 - .../a218e260-7f56-4676-af58-254bd84d0327.json | 132 + .../2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json | 105 - .../f21fb2c8-4abe-40de-ab2c-9d23e95ee281.json | 132 + .../ce364468-f5ef-4a29-8026-89e455fa4350.json | 105 - .../da5774b2-8a6f-4f2d-8267-beb25490b06a.json | 132 + .../18284816-2f69-41c5-8cf3-5209ed77cb7d.json | 105 - .../274705bd-8eb6-4863-8998-f5d67c4ac827.json | 132 + .../5b95cc2f-3378-45e7-9f56-6bb7e1ce4826.json | 132 + .../e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json | 105 - .../6918d1a3-e547-46b7-9062-274057c1f513.json | 132 + .../b577bd26-a9f9-4a50-bd2b-f47bc5222748.json | 105 - .../599deb3c-49f9-4c0b-af8d-78f9e166820b.json | 132 + .../f9dca394-e108-48f3-a45d-a282f7b39098.json | 105 - .../b4ea3f14-3787-434b-8f26-20ff640c0146.json | 132 + .../fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json | 105 - .../2f6a8cce-672f-4634-99ed-9d42df9cd26c.json | 105 - .../6952c527-ca23-494a-910c-1c027e4a5a29.json | 132 + .../3f12e79c-dd1b-428d-9094-10a047205e3e.json | 132 + .../c2ffce0d-069d-48bb-989c-6fb18bdd9059.json | 105 - .../d508da29-0288-4a0a-b727-fc5355515c5e.json | 132 + .../48cf5a8a-70c6-4c55-8959-32d773d6dbcf.json | 132 + .../4bb7d331-f305-4c08-a073-87ba7b2cbde2.json | 132 + .../94639454-c525-4e6f-af27-d92d45a9ac40.json | 132 + .../9fa81bb7-7abc-4764-9465-d61217590da5.json | 132 + .../9a683492-4057-4de4-a30a-aa66becffb13.json | 132 + .../b917df45-62f2-4c3b-943a-ad6c98ef8bc1.json | 132 + .../ba658bc7-b89d-4fb7-a794-f48bd3715a49.json | 132 + .../93f79cdc-ffd7-4299-9876-c0c7bed55ae5.json | 132 + .../5a91b0bf-b043-41d2-960d-5f0e78abc400.json | 132 + .../263f56e5-b578-475a-9bc4-b5ffc142f9e2.json | 132 + .../941a914d-0ca4-4896-9dfb-929c08c8651b.json | 105 - .../9219ff66-73ba-45d8-99a0-23d23b3555ba.json | 132 + .../d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json | 105 - .../74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json | 105 - .../b2328396-e9b2-464d-94e4-f03db19144ea.json | 132 + .../3f895edf-8f54-48ff-a731-666144af0fda.json | 132 + .../6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json | 105 - .../79022531-2599-4c19-93e0-ecdbde7bf736.json | 105 - .../b48b8e16-a555-466b-8b1c-246137223311.json | 132 + .../588d2387-29de-41bc-8233-674081948787.json | 105 - .../5fdcb98f-4c50-4cdb-bd99-dd32efc6d6f3.json | 132 + .../5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json | 105 - .../d49c5e72-0dd0-4663-a310-9cd9bf1f5150.json | 132 + .../0176903f-e6ca-4f21-b98a-00bc443bf244.json | 132 + .../11f32afc-95c1-4531-ae45-5a0974d36b3a.json | 132 + .../fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json | 105 - .../70657dd7-63cf-40f4-92a0-1097fc1ce9ae.json | 132 + .../b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json | 105 - .../53cf325b-6f32-4791-8f95-8b982ea03b23.json | 132 + .../dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json | 105 - .../78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json | 105 - .../8c50491b-6ed4-4f38-9d3f-d5168600cf4f.json | 132 + .../248541b3-aeae-429d-93ae-06cc3bc82cd8.json | 105 - .../7adf79de-a51d-4b87-989a-c218ec6d99e3.json | 132 + .../92358e5a-5e73-4747-9e92-e5ac003b97f7.json | 132 + .../e0c03300-a08f-409e-9f39-f00d5e9e126f.json | 105 - .../f1636512-b98f-4fe4-adf3-abd556dd0ab9.json | 132 + .../faa7be96-1419-48be-9b95-e97689296de0.json | 105 - .../8125700c-d9e7-4d6e-9b78-049331dd571b.json | 105 - .../9333afdd-4866-412b-b11b-dfb118a06db9.json | 132 + .../319484e0-12aa-4212-b55f-d19efdd2f719.json | 105 - .../840c0e19-6d75-47a2-b64b-f9c51cb1dcff.json | 132 + .../071b49f2-8e23-47b1-9858-78d676d9905e.json | 132 + .../182d68d5-9b03-41bc-850c-1f571c36e630.json | 105 - .../d3821f53-87aa-470a-a403-c8e3cd100ae1.json | 132 + .../389dbaba-c9cd-4e6b-afb3-f2ee3951faa0.json | 132 + .../6ef15d50-74b7-4e09-856c-05343841e24b.json | 105 - .../5f78f39a-42cc-4cf6-bb27-e2160765bf24.json | 132 + .../b6e3d811-bf9d-474e-b82d-358a44e0dfc9.json | 132 + .../bef1cbad-4f75-4dde-b467-6145f72a87f4.json | 132 + .../654bebe0-b461-427e-a4cf-06386e9272d8.json | 132 + .../37ef4e34-58f8-463a-950f-48b3a6833d54.json | 132 + .../20687086-8aab-40f1-aec6-03917f4f9bf5.json | 132 + .../49334550-08eb-49a2-9cea-f90f22533ab1.json | 105 - .../53a0a998-a0a6-4800-80bf-bfd83123f2f6.json | 132 + .../4ee8df1c-e8ff-4a56-816c-0c2258a226e7.json | 132 + .../d4b778ea-ae70-437f-a295-772abc659027.json | 105 - .../42c8d84d-c8b8-42c6-8f49-4e971df173d7.json | 132 + .../77b57dea-22e1-48a6-b8ae-9e474f08ad5f.json | 132 + .../f0a224c2-037a-4229-bb00-5d76d3974078.json | 105 - .../9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json | 105 - .../a9ed5d04-57d2-4566-91df-b798be939fdb.json | 132 + .../bad4ec47-fe84-4518-b072-6955938f0c86.json | 132 + .../2a71923c-8697-4b62-94fa-4c16874df7a7.json | 105 - .../497e585c-059a-4e18-9a8f-bdaa066f59ea.json | 132 + .../65a2c055-9bb5-458d-8a65-89b363b47a3a.json | 105 - .../e24b2a4e-83e4-4a79-bc41-03a54af00595.json | 132 + .../15e39361-585b-4870-b91a-64dce4fb37ec.json | 132 + .../563e2894-10bf-43e1-af67-5cd97d52f033.json | 105 - .../234f5f98-a5fc-417a-8463-186bf600993a.json | 105 - .../96efd11b-e9f2-4bf1-90f9-561714137edf.json | 132 + .../98e9936d-d376-4c72-80a6-0a28cf722ac4.json | 132 + .../7ada9c83-7851-4da2-b9d1-d744b174b777.json | 132 + .../a6ed72b7-14f1-464c-a7f5-590791982696.json | 132 + .../bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json | 105 - .../79e3f38d-ae2b-44a7-be0d-024adad6bcd6.json | 132 + .../81546997-4dda-45ea-81fb-23db1b3b5cd7.json | 105 - .../de11a0bf-47ea-444f-bf89-45e9208cfd1a.json | 105 - .../ef13bdea-cf73-4ead-b6d7-73a155fa9a79.json | 132 + .../2663884f-941c-4e16-8029-b38e3a543733.json | 132 + .../e6926be5-561b-453b-8d5f-e64f380c4a51.json | 105 - .../ca7af645-4796-4b31-ae7d-2cbebe5a369b.json | 132 + .../f95e098c-d320-4db1-887d-8c3252bbaf77.json | 132 + .../2bbf6dc9-8dd5-4dee-908e-d4a8fc03bc84.json | 132 + .../24473e8a-2631-44b5-9cc2-81f0669d8032.json | 105 - .../5f4edfdb-a62c-4410-83a3-1ceb15d2e7b0.json | 132 + .../aadfae06-73b6-4306-b056-0a733b9bd8f4.json | 132 + .../c2034822-689f-4e8b-9575-b63081584aec.json | 105 - .../cfecbfbc-46c3-4dd3-8bd9-afe4cd386973.json | 132 + .../162b8329-ad84-463b-bda7-7383edda04d8.json | 105 - .../97640dd1-d415-4b56-818c-cdcede3c52fd.json | 132 + .../b1097c42-10fe-4892-8e85-60385ecf35bf.json | 105 - .../b750c460-ef70-4abf-b77d-118a82039598.json | 132 + .../c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json | 105 - .../f4c20519-9e33-4698-a17a-07e5fe7d2707.json | 132 + .../0f204733-55b4-4c06-bd12-dbc2e2593abd.json | 132 + .../aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json | 105 - .../0bb226ed-fe88-4678-9b50-f77883ceb708.json | 132 + .../6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json | 105 - .../fb297e45-9e14-4853-8384-75c187b28a9b.json | 132 + .../4f6eba27-2ab4-4b33-9568-814d15fbd6b9.json | 132 + .../bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json | 105 - .../92d4d9ca-d19f-45c5-b506-5b1039100c92.json | 105 - .../c3bc3d69-a987-4dd0-b6a5-e0ecc50034fb.json | 132 + .../5d02ba78-cf8b-44ee-a1b3-e51ecf437d89.json | 132 + .../dd7005a5-281d-42e9-9916-663b1641718f.json | 105 - .../480b1187-5f66-4414-84b1-4c6ce1ebf137.json | 105 - .../4a43fa67-2438-4c2a-b17b-9d2f221e5a86.json | 132 + .../2c044767-1169-48c6-9e37-e9d1e35f4cfe.json | 132 + .../5919f71f-8d7b-4cce-a7ce-01680c08acf2.json | 105 - .../b3b9b1a5-4495-4649-9943-58986d94fcb1.json | 105 - .../bad67b35-d9ef-417a-955b-9c33e87cb927.json | 132 + .../60eaa315-f489-405d-a67d-7f1312e90cab.json | 132 + .../e7577048-db59-4629-aeb0-f50b72cbb827.json | 105 - .../50de312a-293d-41a4-8bee-4feb0c148b90.json | 132 + .../e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json | 105 - .../56f24cac-394c-4439-8f2e-8270e7519bda.json | 132 + .../57084771-cc66-485c-99ca-470556e14c1b.json | 105 - .../8efa1423-0a39-4674-a94d-3d92448010d6.json | 132 + .../95f82b68-6135-4d7d-a2f8-b589d4041776.json | 105 - .../350b3491-cba8-46b4-a07f-3d1277270530.json | 132 + .../8fe84e89-c582-44d0-b961-d6ed4d889193.json | 105 - .../0741ead7-24f3-49b0-9967-f726df84f78a.json | 132 + .../1ea4d10e-e099-4967-8c43-e84acaeb40be.json | 132 + .../6c78d9f7-a61e-4f65-ac57-61597f735541.json | 132 + .../e9bcfb1f-c688-4e7a-918a-e697adaf7aa5.json | 132 + .../153cfe7f-c27a-40b8-b8d2-54351f26f583.json | 132 + .../b58372cd-5d55-4f42-a5da-2970e55b44b0.json | 132 + .../34a028ac-2002-480c-a1af-5b945ffe872e.json | 132 + .../065ffc51-154c-4a93-a342-0dd476fda473.json | 132 + .../ebc74f4f-157d-4ee4-8b99-9fb5b685afd5.json | 132 + .../91004d26-7b8b-4c0a-bd8c-8880654dc93a.json | 132 + .../5eb1aa92-a031-40d4-ad64-552075dae68a.json | 132 + .../3ebc147d-58f2-4605-a011-a71c591fac0e.json | 132 + .../01795776-e909-46d3-8b6c-0989334e3d0e.json | 132 + .../00dffa94-31f9-4b5c-b032-03dd20fc2e8d.json | 132 + .../736249d0-cea9-46c6-9677-ecae4b410af4.json | 132 + .../ef602cfe-3453-4189-b583-292cf05421d1.json | 132 + .../559af2c1-deca-4c35-b83a-004c22ac958a.json | 132 + .../8d66d895-626a-477f-91b6-2195f35aacb3.json | 132 + .../004df803-70da-4e59-b3ad-f210c790f29e.json | 132 + .../bb2972ca-e673-4be5-bc7e-2689adeac3a9.json | 132 + .../eacf2411-a0ea-41fd-8363-e565fce0f26f.json | 132 + .../4eefe3cd-ff42-4d4c-89c6-c3e48d8c85e9.json | 132 + .../f19dab38-48ed-438e-8a62-86e4d111f6c8.json | 132 + .../ff4b6d28-62e2-4671-8df9-690ce7f13f0b.json | 132 + .../9c05a7e4-f495-41d0-a7f0-1959e7434ba2.json | 132 + .../404e3d61-26d3-4f95-9847-064f0c7c6970.json | 132 + .../0b4574f2-1b71-427f-9923-17db449be191.json | 132 + .../775b88cd-98e8-4d93-acca-e294f68f2da2.json | 132 + .../89464568-47cb-4659-af37-8b061d3f0c8c.json | 132 + .../9fad9d73-acbf-4ffc-886c-551c1fe1ed45.json | 132 + .../c1882335-0df5-4df2-bfa1-c16126c328fb.json | 132 + .../291471ed-3b7c-4bd4-91bb-c27cd74ec460.json | 132 + .../53565fe4-0368-477b-9916-ac9a4b8a9c7b.json | 132 + .../f6cb5e9d-c4c9-44a2-9adf-7fa5639d84d9.json | 132 + .../e51fee25-7648-49d9-a8da-b8dbc68a722b.json | 132 + .../6acdc96b-cfde-439f-b6b3-a66257b3fcde.json | 132 + .../850da8de-ca13-4f15-bb9f-68b910355cfd.json | 132 + .../542fbb7a-d4eb-4cbf-b63a-4305cb108361.json | 132 + .../1dbb8206-6a86-4e2c-8ee0-d80fed014a69.json | 132 + .../3191b3a3-761a-42b4-bd31-b8dc22a4c722.json | 105 - .../6341de3c-8d4c-4af8-8f0d-c81e948bacd6.json | 132 + .../e6cb6a87-6db8-4aee-bede-ce8a60dc8f4a.json | 132 + .../ee7d14c9-aa49-49df-99fc-057e7dae251f.json | 105 - .../5113439d-1394-46f2-a38e-34b54e94a9e6.json | 132 + .../7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json | 105 - .../a03d88aa-7ccd-4f8a-9a1e-c9469d3ae559.json | 132 + .../c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json | 105 - .../1cfb40a7-7373-417c-aa1c-f6ab63ecb3b8.json | 132 + .../1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json | 105 - .../446ac93f-d47c-4207-bf32-0cd94e88a931.json | 132 + .../aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json | 105 - .../7e4ba4f8-2768-4e7b-a11d-75ad22a47c45.json | 132 + .../dd216882-a64e-4a0e-8fdc-ff5f99639566.json | 105 - .../9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json | 105 - .../ca77f821-4722-45b1-b731-7d774232acb4.json | 132 + .../3498b101-b86e-4968-abca-a3d3d42a4e5b.json | 105 - .../f32d2a11-edd3-4662-aed7-88c6820b2c2e.json | 132 + .../4aaff24b-0364-4cc9-9680-5f5c6d04128b.json | 105 - .../71c56883-dd14-4f16-b839-5ce607a4aadb.json | 132 + .../639004c2-81a5-410d-bd61-e3e263f55335.json | 132 + .../834e5703-00f3-47d6-817f-cf039c53d915.json | 105 - .../5f232a99-07c9-4df7-9d3b-837966ea6de5.json | 132 + .../70c377ab-41b4-4c30-ade6-65cc52ab916a.json | 105 - .../482e34ee-8974-46c6-b3f4-4cc9872ef562.json | 132 + .../822b7413-b84e-4df0-8aca-cc0e95283a86.json | 105 - .../13743252-3ba3-406d-8e95-5a4cd3ac3772.json | 132 + .../680f5fa0-fb15-4687-a40b-7807af2e0fe5.json | 105 - .../ff25cb66-ed6f-421a-a038-1feb24666645.json | 132 + .../843f0d9a-04e8-4cea-bb18-94651a814d1f.json | 132 + .../f9798139-bc7d-49e7-bc42-bcd0ee808c68.json | 105 - .../d0e259de-1261-4d31-a1d4-4689112deca0.json | 105 - .../fa3ccf4a-9b26-4a76-a974-3a776adec7c2.json | 132 + .../ac56cc08-585f-4930-959d-7cbad08c34b0.json | 105 - .../ef4ac8ab-4ff5-4fce-94b6-443b1ef7964f.json | 132 + .../468bbea7-6dee-4a1a-84b3-e44b0f3ab95a.json | 132 + .../7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json | 105 - .../bd8fdfa5-bda1-402b-9010-94bf78b0127b.json | 132 + .../cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json | 105 - .../1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json | 105 - .../a0b34b40-3e68-463f-a7fa-3c58c15aa16d.json | 132 + .../ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json | 105 - .../dbf4fbac-cd99-426d-b725-600e60af00d2.json | 132 + .../22aad948-bcc7-4f8f-bb42-a839e3d1be96.json | 105 - .../f793c471-1638-476a-a050-455a32368e29.json | 132 + .../1d9c1beb-f84b-4eb7-9c1e-ce5a70afabfb.json | 132 + .../21f9d0a5-3ed3-40de-a233-a45f68d669e0.json | 105 - .../552dc523-3082-4980-a533-ad5d48f1260a.json | 105 - .../99396d97-d875-4cd9-a8a1-a9aec5c43bfc.json | 132 + .../78db2373-3fcf-468b-8c87-21db03b2fdda.json | 105 - .../82a44b46-156f-4232-92e4-6a08d7a4f197.json | 132 + .../3b40defd-5a2e-4d6e-838f-dbbbf12236fb.json | 132 + .../96179bdf-3e1a-47ee-9fc2-ac0b23307556.json | 105 - .../3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json | 105 - .../dde41cd5-e6d1-43a9-9593-1a5751bc5f44.json | 132 + .../1cffcbeb-ef81-4efe-b883-0a8540a799e7.json | 132 + .../94960f86-3898-4add-8590-8abeff66a987.json | 105 - .../033ef96e-3d2d-49a4-bbff-8bc815a1b40e.json | 132 + .../40a09314-bb43-41ff-a36a-b39064c37add.json | 105 - .../bfe654b8-cb79-4845-bf14-85012207ce90.json | 132 + .../f73b09b4-020d-49fd-8ede-6a690088be94.json | 105 - .../3f2549af-9bc5-4ad1-a429-79bbb91c929f.json | 105 - .../5c4efc23-9591-447b-aecc-4c82797d7d01.json | 132 + .../a5fe3fab-95d9-41ac-a95f-66205e489dae.json | 132 + .../ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json | 105 - .../9285700f-106e-481d-88bc-5d59b5d57377.json | 105 - .../c0bf8ffb-444a-43a3-9514-76aa92c5f5b7.json | 132 + .../3d556d9f-036b-4368-bb4a-18ad6b444bdf.json | 132 + .../97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json | 105 - .../689a346d-191e-4ec1-93b5-6f64c1a293ff.json | 105 - .../92905e27-1033-4423-b87d-23236f9be964.json | 132 + .../17326bb0-42c2-469a-ac19-6a4b75d9e6e2.json | 132 + .../1d12c40a-a9b5-483b-aaac-07e323de73a9.json | 105 - .../11574f56-6c34-48e4-8fb5-c58d42f07330.json | 132 + .../b814d738-b9f3-42df-8774-0708d456c2ea.json | 105 - .../7c2e9776-92e4-457b-ae08-32c3e351b8e1.json | 105 - .../8f728c51-15f9-422d-bbdb-4d976961ab9d.json | 132 + .../65f19ffe-7428-41e5-a52d-02fad8e595c0.json | 105 - .../8d6e4b5e-ad17-4390-bc6b-ab6581a62442.json | 132 + .../07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json | 105 - .../5e33bf05-6c67-4ecc-982d-7590e9953145.json | 132 + .../eab26e25-e8bd-4c19-8f14-a933506372c6.json | 105 - .../f55ae879-bd95-409c-a8a3-9a57cd615a31.json | 132 + .../3e78ef29-f546-41b0-af2b-f3ae4154e396.json | 105 - .../b8426ac9-14f1-4e07-9c7e-b50cb2c7a1e3.json | 132 + .../2f177d4b-50fb-4a87-a157-84d1094d3971.json | 105 - .../51fd90b0-0d5a-4199-ba5b-ff29eeeab06b.json | 132 + .../b1070a2a-7694-472d-84a4-f20f4cfe1b88.json | 105 - .../c46e4fa1-afae-4b68-a13e-034b5cd2b779.json | 132 + .../42cc06ed-20fc-4e84-836f-3d7243ec336d.json | 132 + .../c85bdaec-43e5-4507-a615-89549901e392.json | 105 - .../21d1f676-4a7d-4305-b248-4a72d7ce0121.json | 105 - .../aaa53387-af33-4454-95f0-3af85f4778c0.json | 132 + .../465bca6d-b32a-4d34-9916-fc8b3166faa0.json | 132 + .../9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json | 105 - .../bf138f3d-09d9-4dea-aa43-5efc804bc775.json | 132 + .../7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json | 105 - .../cb4e944c-66f6-49f2-b1e0-d90454e34315.json | 132 + .../b2b6bc49-bda1-4a3e-a071-ec0a0bdc1313.json | 132 + .../2c918f65-3565-41f6-a9c2-d042608bc592.json | 105 - .../933f3d40-8726-418f-be2f-1f9686e9ab02.json | 132 + .../af1bf15c-7c5f-46fa-ba3a-821b521e86f4.json | 132 + .../f4866eb3-28b0-416b-92c7-764d38905686.json | 105 - .../43df4336-1eb8-4df7-8309-1199aafc07b1.json | 132 + .../44ae222d-407c-4c8b-9b67-75440631f848.json | 132 + .../b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json | 105 - .../a87db0fe-3727-4ff1-875f-9edd3109f3a2.json | 132 + .../fa7a31f9-9c10-4f5f-a06f-e628363a726a.json | 105 - .../0c73e33a-7f6f-4925-970b-db289069d5ca.json | 132 + .../11243917-73a3-484e-ac8b-40065c65ea8c.json | 105 - .../02bc7f5c-dc2f-4d8c-adcb-a89a34ff5549.json | 132 + .../590c031c-2aa6-48e6-9b3f-68b1a585dd39.json | 132 + .../970c9fb8-c217-444b-a025-f4d9acdd679d.json | 132 + .../07a08dd7-822b-49ac-859b-d2fc75b9c88d.json | 132 + .../0c0e9250-b75a-4549-9fb2-2b5c9ac2ef49.json | 132 + .../2ae306b1-5409-4418-b5e4-50feff9dafe7.json | 132 + .../44bf5d75-afb2-48fa-a0fa-96d283b0ae94.json | 132 + .../e3860bb2-b2e4-4fdf-91cb-3343ad6440d7.json | 132 + .../6369fceb-148f-4491-9488-420182a9838f.json | 132 + .../045c814e-a30f-4b6b-b4f4-382dee4063b7.json | 132 + .../59d2b375-5696-47d0-9c96-1a826c08bea0.json | 132 + .../ff601b4f-24a1-4376-8c5e-5bda2ea88f65.json | 132 + .../8c043ba8-f7dd-4cc8-a3b1-7201042b8dc8.json | 132 + .../ce27dff4-9ca7-47cb-bc18-b5dd167c72a2.json | 132 + .../ed7c36f0-5b1a-45ef-be66-f9880cad099d.json | 105 - .../8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json | 105 - .../d69ecbfa-5036-48b8-8fed-f9162e2857f5.json | 132 + .../6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json | 105 - .../b5924329-c182-482a-bee8-22fcb348281d.json | 132 + .../2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json | 105 - .../a6a6b6f2-ac28-4c4a-806e-8abe8c7f9190.json | 132 + .../59ebeb48-88c4-4c63-92bb-888752ea9dad.json | 105 - .../b904301c-d0c0-41a4-b92e-92b2d7c9c13a.json | 132 + .../282fa475-0ac8-4230-8020-9dbb7fda03da.json | 105 - .../b5de0218-91dc-487a-be90-70f8bcb64803.json | 132 + .../3870f65b-3429-45c2-846f-6af30155a78b.json | 132 + .../fee6fbc3-c115-4668-8b5b-35b307c15fe8.json | 105 - .../cb6879a2-41b6-40b6-bb20-723aa0b213e1.json | 105 - .../d6c33a51-be09-4cb5-9942-4348668d3e5e.json | 132 + .../1ccd36ee-445a-4861-8835-d602973148fc.json | 132 + .../96412e92-8a74-429b-8014-30a526521356.json | 105 - .../4c7ef4ee-3a7e-4f15-8a4a-c5853b1c6a47.json | 132 + .../f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json | 105 - .../6a69202c-1c68-43e4-bd45-bbc2ff2db743.json | 132 + .../01f536ff-7613-4b09-b793-1f51bf32f705.json | 105 - .../a053d6a3-05d4-4d0b-a9b8-7865cf7ac612.json | 132 + .../727047f6-974d-4980-a8cd-672728885485.json | 105 - .../f76d3d30-4fce-48a9-a26b-7d714fff1d29.json | 132 + .../3964e579-bb1f-46be-8740-ba8097d8f7ef.json | 105 - .../eb38a092-1b56-4348-8188-baa2243f7046.json | 132 + .../1c4cfb94-fc66-4fe2-9879-78683abe654f.json | 132 + .../2deef730-c37b-46ca-82b7-de38ae724fd4.json | 132 + .../6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json | 105 - .../fe623f86-5397-4818-aa3f-75c2f6632bec.json | 105 - .../04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json | 105 - .../13a92beb-a8a4-4853-b2f5-1b09d3e2a64a.json | 132 + .../36cf5b59-5369-4baf-80c1-3a47678eb5cb.json | 132 + .../87569202-e422-423b-a2a6-96f94dbaf99c.json | 105 - .../ca946b2a-4345-42b9-aefd-0907b91759d7.json | 105 - .../fced3ef1-fb69-47fe-bf68-3efe72db3142.json | 132 + .../3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json | 105 - .../7a83d75a-332e-476a-b0f7-986b2ec9cc5d.json | 132 + .../61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json | 105 - .../6f413d72-cd9f-435c-b13e-9cec14edeb5c.json | 132 + .../a7822bbf-bc23-437d-8e5b-32fb06d3a9ec.json | 132 + .../c44ac25e-9139-477d-abcd-442b3a0dc2cf.json | 105 - .../0b19508c-4996-4fb7-b0e0-9fa952854fa3.json | 132 + .../447c22c1-8929-420f-b59b-01ab32a22281.json | 132 + .../55d876b7-159e-4c76-848b-1480b4c2f4a2.json | 105 - .../ab3dbe43-658e-4c8a-a399-b3d070d467ba.json | 132 + .../ee5c87a4-aa06-4728-a9bf-2fc35284b987.json | 132 + .../089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json | 105 - .../6a1a58f6-e399-4ac3-a516-f02a37b6ff68.json | 132 + .../9e2bfd77-b73e-436f-ad50-ccfd379cd3f2.json | 132 + .../d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json | 105 - .../100cf60a-c43c-4b3a-a667-a45cffdd562a.json | 132 + .../ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json | 105 - .../2088fca7-11d7-47de-808d-d47da0caad0f.json | 132 + .../da94039c-b214-4ad0-a312-a38cea28498b.json | 105 - .../bf0b3560-9d38-406a-ad30-5fd157f0fe43.json | 132 + .../9ce12fbc-00f7-4cc8-bd9d-67ead83a0801.json | 132 + .../14501de3-dac0-44af-8c17-7abcd9bbba8b.json | 132 + .../c9db8ce4-6f0d-4c13-8484-6fca9e9c3798.json | 132 + .../8c6c06be-bbc6-4307-ba5b-336dc2bb466f.json | 132 + .../1326ff61-d0b4-46eb-9bcf-f978166e622b.json | 132 + .../4c9e829f-7a99-4d61-8730-7457215a4fd6.json | 132 + .../574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json | 105 - .../afc24d42-6d25-4036-8f22-fcf944b481b7.json | 132 + .../6f6db681-991e-408b-8d4e-71fff9e1c974.json | 132 + .../9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json | 105 - .../2228ade6-6243-423f-857e-66f5584a1511.json | 105 - .../f3fa76bf-f11c-4dee-9b9f-00f1ec793dac.json | 132 + .../77b457d9-4957-4f0d-a8d3-e005ae382239.json | 132 + .../c5c34d42-c043-4d60-80bf-5cb522e9d915.json | 105 - .../11474a7a-73a6-4a3f-8bcb-bef783e12a2b.json | 132 + .../727869c4-3498-482a-a04e-c6a779c0e558.json | 105 - .../23cc1e7f-0994-43a5-8403-5361a2976285.json | 132 + .../998d2bbc-2722-4fb8-9a6a-230c146e2e37.json | 105 - .../88c257d3-d5c1-4e1f-bbc8-9fc6bd65e15e.json | 132 + .../b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json | 105 - .../ec4c2032-8fc0-448a-a7c4-ee9b35b642db.json | 132 + .../0d1c0e64-8a5a-4797-9234-91a4f1726171.json | 105 - .../3c7ac4de-1456-4afb-b7ac-07beb6cb4d39.json | 132 + .../0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json | 105 - .../a06ad94f-13ee-466c-b25f-87cd87012678.json | 132 + .../4634b7d7-110e-422c-af60-80cd9df06dac.json | 105 - .../9e1ca6d0-d2b2-48c5-acc2-ad299ce02e1f.json | 132 + .../7dcd6e37-3685-4b08-b983-b2a711aeaf73.json | 132 + .../86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json | 105 - .../b1ae6801-0139-41d3-85dc-102ad5cc4c6a.json | 132 + .../3ac95acf-830a-48ca-a144-42b610558062.json | 105 - .../4cc037a2-d952-4566-a575-015f8e3a5925.json | 132 + .../a1eaadae-8601-4c18-ab0c-4f6d80d3307b.json | 132 + .../40e452df-8f0a-4473-a3d1-41f9c288c12f.json | 132 + .../216020ac-276b-436e-815b-d6968eb83770.json | 132 + .../1bb4aeac-a5e1-4fd7-9e70-64fdcfc600cd.json | 132 + .../b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json | 105 - .../25739611-f690-41b4-87de-9f4ea8b3d815.json | 132 + .../b8c27fdd-5b35-41ab-8a35-b5a48f27cceb.json | 132 + .../92999dc0-7075-44ee-be68-1ec32ab5645d.json | 105 - .../fa237949-c3ac-482a-8a54-5a2019f24016.json | 132 + .../b60dd828-a3e7-46a8-b4c2-322aeca42faf.json | 132 + .../5de9f914-333f-4181-a93f-79257a3daf54.json | 132 + .../cf71c265-ef73-4410-a2bc-ce9702cfbcee.json | 105 - .../22bab713-09d7-471a-b077-cb8c336ba151.json | 105 - .../e2d23da4-226a-4a02-8390-e8edaea4b65b.json | 132 + .../c64c7470-dcf9-46f8-b789-cab7e902739d.json | 132 + .../f1107803-5a3b-4fcc-b948-ff622b5f26da.json | 105 - .../f6d727a3-19dc-4173-a88f-2c47449896aa.json | 132 + .../490d14c8-2cb0-4328-9f41-6074b28d6fdc.json | 132 + .../51caac64-fee1-4c7f-b474-1b1e0f71212c.json | 105 - .../013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json | 105 - .../9351b079-7ef5-42ec-bb83-f0d8ec7de479.json | 132 + .../852d5adb-f422-4102-8114-082ab0b3c07d.json | 132 + .../c64e98cd-c022-4834-a3e0-3949416d1fb1.json | 132 + .../f101bd15-ac61-49d4-beac-c89bc889b34b.json | 132 + .../11caf1c1-e2a0-4abb-bb0e-d06853a06e4d.json | 132 + .../f0b57a60-8402-4430-93f3-b846a94113f2.json | 132 + .../4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json | 105 - .../50aa8077-4493-47a9-9cec-014c56343ecf.json | 132 + .../5e70d00b-c822-4ad6-afe8-3756a7038c57.json | 132 + .../741838df-e2a3-4c54-84d3-fe491444071b.json | 105 - .../1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json | 105 - .../8162ba41-e630-470f-a297-72fb9f2110fd.json | 132 + .../60dd9d02-476f-459d-a41c-f89f82116dc3.json | 132 + .../a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json | 105 - .../1b0d1ae7-322b-46d2-bc33-160f578499b1.json | 105 - .../73e89f21-5799-4835-a0e0-a6664c0483da.json | 132 + .../7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json | 105 - .../7f355ad4-9156-486d-8cf4-723117da3bb8.json | 132 + .../2420519c-81f1-43b3-9b76-af141d2574f4.json | 105 - .../4ccc6026-b639-488d-867f-d98ea49cf1b6.json | 132 + .../3cf2e68e-4de0-436e-935e-86935e11f72f.json | 132 + .../ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json | 105 - .../766e6e63-5779-49cd-9e8c-2bc475c1356a.json | 105 - .../e9e4ae5d-0dd1-463c-9f15-47cb21efb409.json | 132 + .../69491efc-0287-4288-bdf0-bcc57c53b94e.json | 105 - .../c57eb23a-5998-4ab9-9a98-39b1338f5ba6.json | 132 + .../705a1ff4-2e40-4827-af54-099870fac588.json | 105 - .../94fb625d-f58c-4f2e-8268-1dc4472c1cce.json | 132 + .../1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json | 105 - .../4481ddef-2bef-4284-b56d-21054f5a9a97.json | 132 + .../80048c4b-e97b-45c7-aa04-70ce69481a97.json | 132 + .../cfdece82-631e-48b7-8232-91a8d9ccf65c.json | 105 - .../2b84722f-58fc-421d-ae1a-9e21ac0b4080.json | 105 - .../d21a2557-2348-4087-b2a6-6e1c0101bccc.json | 132 + .../0e9837cb-4dda-4058-a89e-4127b5980eed.json | 105 - .../76290d4b-5526-400b-8ca4-24d220f7c02d.json | 132 + .../3a146535-09b3-4246-8bd8-0e984e0905b1.json | 132 + .../9878c419-fff8-402a-a315-70864e5ae60c.json | 105 - .../6683f95c-f97f-4117-b3c5-c1ed9587289e.json | 132 + .../d0907791-99ed-4c01-8df4-80ab6ecc906f.json | 105 - .../8f186e60-a090-4b9e-9910-23054617fe57.json | 105 - .../bbe74b2b-9e13-4c13-92c8-618078667248.json | 132 + .../61876ce3-acc4-4619-b0c2-78ac4dff48ea.json | 132 + .../ebf9067a-9836-4152-aa62-3ecbbc2459dc.json | 105 - .../b304baee-c9de-4982-801d-2b9e7f1a7334.json | 132 + .../e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json | 105 - .../6f27e746-1bdd-4cec-a955-c27f2f9900ef.json | 132 + .../e2514850-3847-4fe7-abd8-240762ba507a.json | 105 - .../30637c5d-1bc0-49dc-8afd-335a9a66f196.json | 132 + .../7896d77a-e4c3-431b-9490-26d88664385b.json | 105 - .../169e29b6-50d8-456d-aa20-3fe2f3b19a1e.json | 132 + .../cbda0920-b298-4db2-806d-65b7d6550b30.json | 105 - .../427d32f7-190b-4005-b02c-6a8ce089dbbf.json | 132 + .../e523d43e-a198-4db5-9d91-c4959b136953.json | 105 - .../383b2f80-774b-4f76-998a-9d3d20a265db.json | 105 - .../de7551a8-63b1-4de3-899f-9d98cb985005.json | 132 + .../2b84e1be-81f6-474e-be5b-c5f4e60167fe.json | 105 - .../eff6f456-906d-4320-8e6f-667fbbf0574a.json | 132 + .../55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json | 105 - .../6cbd9a3a-7e06-4eee-af9e-6db4ff35c36a.json | 132 + .../2dff318a-f64f-407b-acd3-2b1020d3f5cd.json | 105 - .../7e3d3803-c8d4-4025-8d12-c4c29c49c059.json | 132 + .../39b85f29-d449-40d6-bb0e-cb4790a47cc7.json | 105 - .../a43a6ca9-3543-44bc-8511-ee5c45552070.json | 132 + .../83f6fdec-9592-45a1-acdf-0ebbb400c8a4.json | 132 + .../9ff57503-4fc4-4d21-8899-d691c912bff9.json | 105 - .../6d2370ea-55ab-4ae7-a11a-c1556e988349.json | 105 - .../6e2d4174-303f-437b-9abb-26667b1dd04c.json | 132 + .../53f0c477-6f06-427a-be34-5b0131cbf9e1.json | 105 - .../955e93d0-bec1-483c-b3f0-258e13d5cb16.json | 132 + .../3065ca79-c5e9-4875-9f81-4231e971d818.json | 132 + .../3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json | 105 - .../fc7e485f-a416-420b-b43c-e45e502c4a8f.json | 132 + .../53e882c6-6eb5-4202-a8d0-3a313556c9f4.json | 132 + .../a1609dba-826b-4246-9230-35bd68268fe4.json | 105 - .../ba715669-c0ed-471f-80a6-b67453fb4930.json | 132 + .../316cab27-5cac-4d26-90ae-05d1fc3bd14a.json | 132 + .../8d69f711-74c9-4c1e-87dc-9b46f70674bb.json | 105 - .../c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json | 105 - .../d2b0a35a-ea72-42f4-9f71-fffa1480bc22.json | 132 + .../bf3eabff-fbf7-421c-9e04-548accc7678c.json | 132 + .../d8cef007-51ab-4793-9a74-d9f29d6c0f27.json | 105 - .../b7eeedd8-33ef-46b3-a3fb-6ac87247bc4e.json | 132 + .../ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json | 105 - .../9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json | 105 - .../b1c41abe-e7f6-4229-b776-8ed0b5f91bd4.json | 132 + .../5b769770-3b63-4863-a723-95212e2be40e.json | 132 + .../e78a3888-33c7-4264-a01e-b0661504322f.json | 105 - .../f2264b41-efa5-4278-91fd-2f454aa91c61.json | 132 + .../f6729e0a-559f-4087-af75-37634bf0af62.json | 105 - .../1229310f-22aa-4ef9-b354-71fa249569f7.json | 105 - .../5c3484b4-6faa-47fd-a1a2-881898450f79.json | 132 + .../326b95f8-9eae-4064-a261-077a957e233c.json | 132 + .../1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json | 105 - .../c1c7336e-b8bf-4a69-a586-c1a224ba8a65.json | 132 + .../89e55482-b762-4f5d-a021-211048719bdc.json | 132 + .../81018e12-63f8-4ad8-87c4-181a13202497.json | 132 + .../d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json | 105 - .../25ec2dbd-465f-40a9-80f0-e4001e621303.json | 105 - .../5b09e8cb-aaf1-48fd-a2f4-11a8d4bc9a4d.json | 132 + .../36d52065-1de2-4661-bf23-85276a8ede2f.json | 105 - .../8b344f21-9038-4b15-aba8-308aa62e4b39.json | 132 + .../68ca8f7c-88c2-4ede-bcb7-d4ae23429d8f.json | 132 + .../df557f25-5505-49dd-a0cb-88fff601c6e2.json | 132 + .../a50bf387-bf34-490f-979a-b6217a85a1bd.json | 132 + .../89264aa0-3bed-41d3-b171-2a5434cc990f.json | 132 + .../9d750c83-0b27-437b-ae33-dd21a3313a04.json | 105 - .../a3272caf-a292-4dc7-8932-636a4099ca6b.json | 132 + .../b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json | 105 - .../00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json | 105 - .../c4ade77e-628f-457d-bbe1-3e5a0cb19d04.json | 132 + .../00620da3-d3ee-442a-a319-248906d959c0.json | 105 - .../b030646c-5f5c-43ab-bbc4-405f82992265.json | 132 + .../399e516c-d8c8-4511-a746-76c81f72b36a.json | 132 + .../bd8e4424-7903-43e7-8105-269de734582e.json | 132 + .../9126e939-3a87-4774-9606-084c5b56e933.json | 132 + .../a3ba5a65-b137-42ad-868b-9aa5c24afd07.json | 105 - .../be2ef197-738e-422d-9a88-cafd124584b7.json | 132 + .../e115938d-d343-4c03-8f3b-4d86768b2e49.json | 105 - .../ee22e6c5-8529-4987-86d0-4abf3b525f90.json | 132 + .../50f0ddc2-fccd-447c-ab50-a086ccb4cd3a.json | 132 + .../82346a60-f31e-45ba-9fae-bd738321f390.json | 105 - .../83294141-a70f-40da-b3f8-21b367098cce.json | 132 + .../303ae3d2-fdf5-404d-83ca-8e6071e13e6b.json | 132 + .../33146dbb-8233-4f3d-9fd9-68cbacc3f293.json | 105 - .../1b13d76d-259f-41f2-baba-ce96ef0cb937.json | 132 + .../d90cef97-1e73-4068-bcb5-260a3f2586fe.json | 105 - .../761f0cc0-c202-490d-93b4-447244f1e40a.json | 105 - .../b644a420-0a70-4b3d-9a5a-ff91911c857b.json | 132 + .../33aaa60f-eb69-4d36-917c-6862121a223e.json | 132 + .../1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json | 105 - .../a1d2e571-6de0-4bd7-bdcf-8b3921b450f6.json | 132 + .../ad93274e-3ca0-40cb-9f65-e6e6c66a8008.json | 132 + .../982455a4-fb4f-4eed-96a0-c46d9eb11937.json | 105 - .../b8043d04-c3ab-4d6a-97eb-44b195a52710.json | 132 + .../22f8bb3f-4794-46b1-828e-75711a1233bd.json | 105 - .../c6bff6da-382f-4423-ba3a-d987839132e0.json | 132 + .../f3574ad1-a6d7-47fb-86e7-69c256452dea.json | 132 + .../d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json | 105 - .../f2e47267-6c40-4d70-8420-295c95b318f3.json | 132 + .../395f246e-34c6-40e6-bfeb-b047aa12cf90.json | 132 + .../b906411a-6663-4c9f-9fe6-4d60e99e4e41.json | 105 - .../3a91f8bb-c132-45b3-b8b4-d2ecc9f03f3a.json | 132 + .../82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json | 105 - .../97c92043-9bed-460a-8d7b-70ab3584c75b.json | 132 + .../0a933130-dca9-435c-a529-16065b540aab.json | 105 - .../ab2ce171-bfcf-49ea-a341-2a52b2bd803a.json | 132 + .../3fd95536-ec61-4470-9082-14a116d20e80.json | 105 - .../f9bbd9cc-dc6a-466f-b777-eaea4a15b874.json | 132 + .../176727e5-31dc-462a-8210-4735543c32f2.json | 105 - .../cd0aefa3-b0c9-4683-872f-f9f9d285e6c3.json | 132 + .../2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json | 105 - .../c42db2ab-dbc4-48e4-9c16-7b8a5f8492c3.json | 132 + .../1b32c387-97a7-42ff-892c-d3bacebbf050.json | 132 + .../cbea057c-b0f9-48ac-a075-eb28ebbaf358.json | 132 + .../0b1bb876-9dc7-47d5-855a-f028fb7f2df6.json | 132 + .../e6a0cf8f-323d-40c0-90c2-0e2071321df0.json | 105 - .../a86678ad-344c-430f-80c7-02d634b0cd5b.json | 132 + .../827f3236-74fa-432b-8177-8785ac25ad76.json | 132 + .../00de0fac-e1a7-449a-969d-624cbe9adab1.json | 105 - .../7f694687-77e5-41d2-923b-f2d5f231729b.json | 132 + .../b93d3a57-2535-4150-a2db-71a50569e6ae.json | 105 - .../daa9d03e-63b0-4c08-ae72-e11041200ac7.json | 132 + .../1539822f-acc4-4dae-9e61-133da97ebcbe.json | 132 + .../2e06f258-9d91-4734-aacc-f417fddad77c.json | 105 - .../be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json | 105 - .../eec80fda-ce2f-4ef4-94d3-9e7b90f7f2e5.json | 132 + .../19cd2513-03e8-4d78-b222-566fe3928d2b.json | 105 - .../448cac5f-a7d3-41fb-9b49-666758037eb4.json | 132 + .../5d7c5ac1-84c3-4fd1-ac51-4c00ed8c59c7.json | 132 + .../7e1741cc-f9ea-4940-9b6b-d7a515cfce31.json | 132 + .../97bab408-a5f5-4363-b530-dc4a6966c859.json | 105 - .../2c1cab05-b63f-49ca-a709-b5a4e859ba82.json | 105 - .../ec4d21be-b1a6-47a9-84a4-1a25249c1768.json | 132 + .../c6b03539-04b3-4ef2-909d-8036a7ea2ae1.json | 132 + .../f1980c69-8c24-4fcd-ace1-797195026c7b.json | 105 - .../9add85f6-b577-449e-8a2f-ae77a2588bc7.json | 105 - .../f156ac38-056e-4ef1-bdbe-e83c299a683b.json | 132 + .../11d3c8db-300c-4e02-b729-7adba6844ad2.json | 132 + .../d2451e41-e4b0-4945-9ace-1b046b11528b.json | 105 - .../54a93ff0-bff3-4252-ba4a-e99f06b46896.json | 105 - .../fc75a820-fc0b-4e50-9304-61f0e93795c0.json | 132 + .../8965f266-28f1-43f2-b03c-acc4a9478b7c.json | 105 - .../bb66896f-799c-4e17-8b54-af5e795699fa.json | 132 + .../30a1a786-7478-401f-85ae-57037ada3d32.json | 132 + .../681b02e4-7b57-42b7-9550-59c664511b01.json | 105 - .../05430b16-07b6-41a1-ade9-6211cdf8ccf1.json | 132 + .../c04bef75-d3cc-463e-ac24-a2b18d3611af.json | 105 - .../09bc4d5a-f104-4a36-999c-11e2532eef1e.json | 132 + .../468d60fa-5c01-41bd-a791-e0e86c2d02bf.json | 105 - .../a92cfff6-6caf-4bf1-913a-9d7dd2d8d449.json | 132 + .../8972e92c-ebbe-4dc4-8a8c-6f7a42ab5c11.json | 132 + .../9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json | 105 - .../e33fb04e-ac99-423f-ac8c-5268e527bf34.json | 105 - .../e4f39815-9704-4d0a-8d9b-39359367adcc.json | 132 + .../b8b84752-c112-47be-8a86-35ca0e578301.json | 105 - .../f40df456-eb9a-46f8-8fb0-b6ad2748f3c2.json | 132 + .../16777b0f-3063-45eb-be07-294d13f975ac.json | 105 - .../398996d9-299b-4120-a757-e2fe14e779ee.json | 132 + .../4398633e-77b0-4b61-ae85-29b0e5aad38b.json | 132 + .../bc990db1-c6d9-4113-9946-466bfd5cf9cc.json | 105 - .../1bc60148-512f-4830-b541-f30535cf74bf.json | 132 + .../a9dfb20a-13e0-4419-a747-7c001b2e9435.json | 132 + .../bf253a63-4685-4e51-8a0d-5209306926c8.json | 105 - .../388e3559-a3b6-4738-9843-9bdd048bae09.json | 132 + .../994a6930-42d5-463a-9e7c-0a3070144211.json | 132 + .../143dc973-1063-45d6-9747-9f24a9ae5657.json | 105 - .../cce46320-9794-443a-831a-92e2a21515b0.json | 132 + .../3c9eb291-6171-4d40-aa5f-58d39738fdcb.json | 105 - .../988f4cc0-ebfb-43a9-8a7f-3dd1f1c1e342.json | 132 + .../3c675148-5d09-4778-baad-9295ef8cfc79.json | 132 + .../c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json | 105 - .../620b80ba-81ab-4504-9f42-4965014f3cd1.json | 132 + .../b6c68fc1-c2c1-4cdf-91ef-2007becd7ade.json | 132 + .../19279c18-c2f7-4f75-a9c5-a121b2d4bcff.json | 132 + .../7966789d-8ace-4b39-9093-96bbb8e641d8.json | 132 + .../105021c8-c214-4a6a-ac3b-747c4c48886e.json | 105 - .../5e1d849d-0342-4de9-a7d8-dd5cd5960fac.json | 132 + .../a17563e3-0369-4042-8006-2ec781653f63.json | 132 + .../68369110-e371-4112-ae0a-14f7fe9fc40f.json | 132 + .../2a6925d3-992f-4c4f-a57b-3eb41062743b.json | 132 + .../28290ea9-9ce5-4605-ac5b-aa2d606994d8.json | 132 + .../5ea3a084-bc30-4390-97a2-1933c5422790.json | 105 - .../eb2ed6eb-4789-400d-aea5-841547a20cd7.json | 132 + .../873218a0-7ddb-4287-88ce-8c8214e85c85.json | 132 + .../a9888e61-bd14-4769-b620-cda908c8ba3e.json | 105 - .../99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json | 105 - .../e4c32b92-46b4-431a-83f2-11499f587534.json | 132 + .../71a54215-e97a-4ee6-928c-344bd690b020.json | 105 - .../a05681a0-07e4-4206-ae89-dee4e9706467.json | 132 + .../2fe15418-16bc-4f60-bad2-7329a3670507.json | 105 - .../b078f823-d603-4030-81a2-a3ca1a1117f9.json | 132 + .../26625158-6720-47c7-8c28-46ca7b4b947e.json | 132 + .../f6bcff0a-559b-44c1-9c70-259446b3ebe5.json | 105 - .../4deeeff7-f62d-4c42-b32a-98bdd773a758.json | 105 - .../5e3e8dec-f14b-4b7a-ace1-1e1728395e84.json | 132 + .../35b4378e-52cd-4ae1-985b-c8e2c00dc61a.json | 132 + .../8ec55b3f-e425-4ee9-98d5-dac775977514.json | 105 - .../4d99a55e-39c0-41c7-9ef0-494f739ceaec.json | 132 + .../c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json | 105 - .../a6996896-1464-4b55-a784-28deb06150c8.json | 105 - .../f3c7bacd-e231-45fd-b503-ee4d34caf4e8.json | 132 + .../1bb87d8f-2d66-42b2-a744-1a7cbc2c17dc.json | 132 + .../406f36fc-1243-4342-80c6-95b96fcc003f.json | 105 - .../87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json | 105 - .../ae10fd26-e648-4fa0-ae24-dfaaf4ff510d.json | 132 + .../0af58746-0492-4ba7-8a17-c0a5c43d0700.json | 132 + .../d7125235-7b17-4a90-9125-c993646cd7c8.json | 105 - .../88fff9f5-7aa7-463a-87e0-5fd2f5bacf09.json | 132 + .../d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json | 105 - .../bc79527d-ae58-4b17-afd8-df931562dbf3.json | 132 + .../d1445003-91ea-4b2b-ab38-a47a6392620e.json | 105 - .../3e7423d5-ad7e-48e2-bd25-a4946d443c24.json | 132 + .../4d9c2e04-caef-43f5-9ce1-40517341ff40.json | 105 - .../5d53b35f-6bff-493c-805d-b45517ae0e2b.json | 105 - .../7979fd6a-a886-41cc-987b-356b7c452bff.json | 132 + .../2be6bc34-1e61-426f-b963-6e096b5418fb.json | 132 + .../a099778d-4c47-472e-872d-8fffcdf2764f.json | 105 - .../c4f69339-be6b-4bb4-8faf-a1f40e73d4b0.json | 132 + .../5d25872d-eacd-4e5c-b9cc-9ee014147730.json | 105 - .../c845eb10-a028-4cc2-8f64-25d75480c0d5.json | 132 + .../377e7223-4876-49b6-8057-b1831d7f129b.json | 132 + .../4ddb9ed6-0599-482e-b12e-bcb01975cc85.json | 132 + .../9d5af106-be69-4b62-99c1-fcfb6091d080.json | 132 + .../2f2d7a55-2838-446d-9487-a6cfa0c03356.json | 132 + .../f5005cc2-cec4-4a1c-be09-a670d996d15b.json | 105 - .../425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json | 105 - .../65d20d45-f63b-4b09-b66d-5f53297c0c20.json | 132 + .../42a767cf-7d29-486d-b83e-fcfa51f048c1.json | 105 - .../4712953f-0777-4b97-8f13-f7309f19f0dc.json | 132 + .../2419f2a3-03df-4521-9baa-346e3cb53181.json | 105 - .../84382308-04b5-439f-b486-b26d20da605a.json | 132 + .../a88e7110-2a58-4f47-801f-2a49037eaed6.json | 105 - .../e82be06f-14ed-45e8-a273-d28c50f5212b.json | 132 + .../5815ba55-40fc-4f8e-ae0b-b329c42fd503.json | 132 + .../cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json | 105 - .../c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json | 105 - .../e58eceb3-b501-4924-9d0d-98d7da3c16c5.json | 132 + .../5a88455c-7699-4c49-8a12-76cda15d878c.json | 132 + .../d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json | 105 - .../0bdeac20-0505-459e-b417-ea4cb2f95cec.json | 105 - .../122b4c1e-6e6c-4db5-8991-b091361c3ecf.json | 132 + .../6abeb0e4-32ee-4dbb-9902-b19cc96a2aa7.json | 132 + .../b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json | 105 - .../249af8cd-717b-4ee9-8ac7-740f16708675.json | 105 - .../679f214f-e03f-47a9-8a11-91adbf1c4880.json | 132 + .../338737c7-29cf-44d8-be92-6749167b7c03.json | 105 - .../680e77b8-9c64-4c52-aa83-55236039cef1.json | 132 + .../aa12336f-556c-4222-a10c-529eb74a793b.json | 105 - .../c24c471c-14b3-462e-8b81-6548b27e5ffc.json | 132 + .../79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json | 105 - .../efa7fa62-2e8b-403c-b345-eef876b48dbd.json | 132 + .../40bae762-65bd-4b4c-b422-ffd0fd3790a9.json | 132 + .../5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json | 105 - .../596957cc-719c-44c7-8284-06a9ba0d1a30.json | 132 + .../cb38b3bb-6188-430f-b863-9bf86cc877f9.json | 105 - .../706bbc09-f867-4327-bc4d-b5ede41ebd93.json | 132 + .../938af657-ca9b-4400-84e1-002065f92f84.json | 105 - .../8962e9be-75bf-4f57-8ce2-b29523740851.json | 132 + .../014f4838-22ff-4802-a887-4d2de01a9256.json | 132 + .../5c6eac9c-0ec6-4364-a86b-dcd894d69f0b.json | 132 + .../09b81cf2-3b79-448c-ab8e-87e378c804bb.json | 132 + .../28b9977a-db3d-4f38-b1f7-bd0cdcab5504.json | 132 + .../845ea162-cfa1-47f4-8914-d81d9bf1bb7d.json | 132 + .../706737c7-cd1a-4958-9ffc-2655f0b50178.json | 132 + .../5acd58cd-8dfb-4fb7-8832-6bc151e0b1a1.json | 132 + .../d374a68d-b985-47c2-b087-500bffa93c80.json | 132 + .../23fbceb0-b646-4945-b17f-66dde24a0e43.json | 132 + .../73d9e204-e829-4159-b340-6d9581c6f0e1.json | 132 + .../a6979dda-fba6-4104-b153-3b0a89de8585.json | 132 + .../d22c83a1-9c1c-43df-b033-c6cb75cb389d.json | 105 - .../62e04968-0c5c-4aad-a434-d9d24bccbdb8.json | 132 + .../bae4064e-b10f-4082-876d-e4168ca1a8cc.json | 132 + .../0040b48c-0f54-4c9b-97ee-1ca833c68e36.json | 132 + .../4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json | 105 - .../6050e969-bcde-4594-8e53-05fa74c7287d.json | 132 + .../3aaee358-bf3e-4d91-91bf-bd42e0a7c61e.json | 132 + .../ef5f4fb2-f409-49dc-b3f0-f3e19585cd8a.json | 132 + .../4048fa60-7427-4f7e-9939-e270aa5e8b51.json | 132 + .../f5c9baea-f2cf-414a-937a-6a43f55a1c1d.json | 132 + .../1da70796-d40b-4f2a-8ce3-b304f414a6d5.json | 132 + .../de476f79-2539-4f9e-a1d2-901c6c4342d4.json | 132 + .../80aee542-c894-46b6-a6ed-9f3400aefa9e.json | 132 + .../5c9d4eaf-0985-4f9e-8007-08b4081bb19d.json | 132 + .../4b019824-8454-4ce8-aa49-d122a2491f9c.json | 132 + .../0dfcd13c-f057-4aec-82ad-b5cf2b266502.json | 132 + .../927589bf-f6a0-4155-a24b-120231bbf029.json | 132 + .../1a2740cb-c541-434e-89a1-7a9fd2c4cabd.json | 132 + .../0110d1c9-755e-4f09-888b-0c9c1a263639.json | 132 + .../cda65781-494c-45bd-8c32-7b1fe987f31c.json | 132 + .../2fd7de02-f8d9-45c1-9bb5-db5134bd4862.json | 132 + .../acf07f51-5acd-4375-bafa-7a1a244db3c6.json | 132 + .../ff985193-ba26-45d3-97be-b7d3b17ab4d7.json | 132 + .../21dbea2c-5cb1-431c-a496-af9b932b3440.json | 132 + .../1143955c-c32c-4b41-8484-2c77e72f4946.json | 132 + .../94824ceb-08c3-415c-8003-b70a0d9af09d.json | 132 + .../bf2903cb-b954-4870-98c3-116a96aa49fb.json | 132 + .../b089c439-a38c-438d-bdad-1c68a1265d95.json | 132 + .../c988815b-50e5-47e4-a418-bbbcdf1eb4a0.json | 132 + .../fa11d66c-7ebc-4b81-83b7-d35a4ff23d3f.json | 132 + .../1c81787b-594e-4bb6-aee1-7f193a628b16.json | 132 + .../fd9ce37e-d43d-4ec2-94ec-0eb42e3cc685.json | 132 + .../0625f09a-3e02-410b-963b-49b83dfc5c8f.json | 132 + .../50c1399e-b409-4dff-b4d6-9be01dbb02c7.json | 132 + .../402bdb4a-b258-40a4-ac9f-de74026c02f3.json | 132 + .../65dcf458-db0f-45cd-a8a4-e16108e51161.json | 132 + .../f1346b1a-0e66-4d80-bfad-ccbe0a8e2abf.json | 132 + .../11e7b55a-d872-474a-98a6-fc82ce5a863e.json | 132 + .../19688633-fa6c-412a-8dbc-c16fc49b3276.json | 132 + .../7d67eb9c-a4d8-4b86-8c24-928ebbe58de7.json | 132 + .../447f880c-643f-4041-8cdb-87697d798085.json | 132 + .../653d459e-f8b7-48bc-a9db-779e515532cf.json | 132 + .../4e56faf6-dbde-4059-b502-32c76bdbed2d.json | 132 + .../f161df97-3cc6-48d3-bfc5-d3f01108ecbb.json | 132 + .../7d08412d-e987-497f-a6ec-ce0affe0f80f.json | 132 + .../f042f897-cfe8-4d8c-b75b-bbfca44505ea.json | 132 + .../f24ab334-c022-4e34-a930-3fed6ee18793.json | 132 + .../2bd3c620-780f-452d-92d7-d01a04539939.json | 132 + .../234042bd-237f-4cc5-8c5d-1eacd2e8bfaa.json | 132 + .../d8e0a32e-f307-4056-b450-47a12a0a7b15.json | 132 + .../9dc3c4f5-8974-4496-8a6e-daa4fe3e3c2a.json | 132 + .../037787fb-9c61-4c56-a7fc-704c04b519f7.json | 132 + .../5df3dd8f-4921-4916-8163-8651b796e478.json | 132 + .../50463593-3a53-4b3f-9621-d05670309b7e.json | 132 + .../d7fef356-36c7-488f-8f49-997682a2c01a.json | 132 + .../42e7abc6-eaa2-4971-90ee-e4d9dbb97ddb.json | 132 + .../b1cf06a6-d270-41ae-bb9b-443bdc5446f3.json | 132 + .../e40ea476-bcc5-4d3b-bf8e-e5048d9cbe42.json | 132 + .../731a5f85-a59e-40af-870c-00e519ca0e7e.json | 132 + .../38d93ae8-90ec-473c-8570-33d52c46770b.json | 132 + .../9072fd28-040b-44df-bd58-6e3f59398189.json | 132 + .../14827e00-09c5-4ebd-93cb-8e026ac73d20.json | 132 + .../11e76d74-b8e0-408f-b429-566faa5d60a2.json | 132 + .../944c84d8-231d-47ef-85f4-23c0286a4a02.json | 132 + .../47c8da1d-8ce3-4d19-b8b8-6b5e68e2e8ab.json | 132 + .../ca54a8d4-153b-4169-b6ee-133461a9bedd.json | 132 + .../652359ec-14f2-4f94-a694-b7dc98819bfc.json | 132 + .../b34f3335-c7a3-431f-b2c8-6f0731a81378.json | 132 + .../077306f9-5d40-40dc-9df4-b5ca559af5c7.json | 132 + .../e0f0fe87-8ed3-4398-8683-65aa042d01d9.json | 132 + .../2d968d3e-a3df-4bdf-86a4-034087c0d7fc.json | 132 + .../db476911-87fb-433f-b164-4435718dab46.json | 132 + .../75a967f6-a8ab-435f-999b-4889e8217dce.json | 132 + .../e072997b-2f79-4d25-b8dc-ebf15ac311e1.json | 132 + .../6d681a29-0d1a-4054-8250-5246993509f8.json | 132 + .../e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json | 105 - .../2a6af4ce-e45c-4721-a23c-03071a5e774f.json | 132 + .../5ae5ddff-714d-4a20-b1d3-3eeb95fd858c.json | 132 + .../60052d34-f6a7-4204-baea-532f5ba29880.json | 132 + .../e1ddd882-f8a1-48d0-bb2a-878f43095895.json | 132 + .../d2c3edec-38d8-48e3-9f6d-e26a63442af8.json | 132 + .../dcfafe94-dacb-4e7a-9365-8bb39ecb79ec.json | 132 + .../8ca0e602-bf6b-4d15-95c2-a0d47e78ded0.json | 132 + .../fc262523-dcde-4b45-80ba-2922e66d42c4.json | 132 + .../f8d745da-9867-4348-bace-d8052c3b4025.json | 132 + .../3d410f0f-6b24-4e86-a353-6142c51b1ecc.json | 132 + .../a5490bf2-6d11-4474-b6e5-07a79d30f431.json | 105 - .../46329fc3-974f-4d04-be9e-ba85b3816efc.json | 132 + .../78799fe1-5fbd-4023-9462-8d826dac41d5.json | 105 - .../35068575-06a3-4541-bdf3-120bd6db2867.json | 105 - .../b964d0a4-7c44-4ea2-894e-3e1ca30321e0.json | 132 + .../7b2767f8-9266-486e-8e49-6177930bc258.json | 2377 - .../96a7ea61-8869-4dd0-9164-756519a26ac0.json | 1634 - scripts/global-mmlu-lite/__init__.py | 1 + scripts/global-mmlu-lite/adapter.py | 211 + scripts/hfopenllm_v2/adapter.py | 103 +- uv.lock | 210 +- 9207 files changed, 614920 insertions(+), 778322 deletions(-) delete mode 100644 data/ai2_arc/anthropic/claude-sonnet-4-0/40094cf6-b187-475d-8f14-abb71d998c2b.json delete mode 100644 data/formatted delete mode 100644 data/global-mmlu-lite/Alibaba/Qwen-3-235B-A22B-Instruct/d1b63dce-9740-4347-b7b2-01099fa8b9e7.json delete mode 100644 data/global-mmlu-lite/Anthropic/Claude-3.5-Haiku/b34d5c62-d44a-44ce-9d14-f97445a407a3.json delete mode 100644 data/global-mmlu-lite/Anthropic/Claude-3.7-Sonnet/462fd172-5786-45a9-a938-361fef294d8b.json delete mode 100644 data/global-mmlu-lite/Anthropic/Claude-Opus-4.1/562a23d0-d80a-4564-a68b-6b478817fa0e.json delete mode 100644 data/global-mmlu-lite/Anthropic/Claude-Sonnet-4/0e7e63be-9a07-48fd-a525-7378f6d0477f.json delete mode 100644 data/global-mmlu-lite/Cohere/Aya-Expanse-32B/33c55930-eba4-45d1-a214-bfb0338812b3.json delete mode 100644 data/global-mmlu-lite/Cohere/Command-A/ba5f478e-7484-4c7d-9691-1c4da2aa39a1.json delete mode 100644 data/global-mmlu-lite/DeepSeek/DeepSeek-R1/a00e87b5-bb92-4ff5-aea7-b4e8357663c2.json delete mode 100644 data/global-mmlu-lite/DeepSeek/Deepseek-V3.1/a1dabd04-29d3-4170-88f7-ee899b26c24a.json delete mode 100644 data/global-mmlu-lite/Google/Gemini-2.5-Flash-Preview/ea9d0ff1-0801-4de7-a99a-febdcde420fa.json delete mode 100644 data/global-mmlu-lite/Google/Gemini-2.5-Flash/32612d44-2a0e-44b6-9f23-bdbf8bccb714.json delete mode 100644 data/global-mmlu-lite/Google/Gemini-2.5-Pro/4d20140d-a955-4927-9140-49fe597519c6.json delete mode 100644 data/global-mmlu-lite/Google/Gemini-3-Pro-Preview/658d3005-8fe2-4560-acb9-e2e271b72cea.json delete mode 100644 data/global-mmlu-lite/Google/Gemma-3-27B/21e8fec0-ea47-4375-9c99-c5a3811296e9.json delete mode 100644 data/global-mmlu-lite/Google/Gemma-3-4B/dd08c6cc-919b-414d-a97e-025a7b485987.json delete mode 100644 data/global-mmlu-lite/IBM/Granite-4.0-Small/75bb331f-e492-4dfd-9f1b-b83cad2f04d9.json delete mode 100644 data/global-mmlu-lite/Mistral-AI/Mistral-Medium-3/b08417e3-22f1-40e7-a621-f25531972052.json delete mode 100644 data/global-mmlu-lite/Mistral-AI/Mistral-Small-3.1/f5efe093-1cec-4e7f-8413-05039461ed27.json delete mode 100644 data/global-mmlu-lite/OpenAI/GPT-4.1/ab0d8833-09d3-4d42-b1f4-e0d3e410ea7f.json delete mode 100644 data/global-mmlu-lite/OpenAI/GPT-5/744ce6aa-57ad-4f39-ac32-6ccce3fb727c.json delete mode 100644 data/global-mmlu-lite/OpenAI/o3-mini/c38e906d-d904-4515-8312-76c1082343c3.json delete mode 100644 data/global-mmlu-lite/OpenAI/o4-mini/16f3cc58-7107-4443-b872-c8515feb67ef.json create mode 100644 data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json create mode 100644 data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json create mode 100644 data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json create mode 100644 data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json create mode 100644 data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json create mode 100644 data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json create mode 100644 data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json create mode 100644 data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json create mode 100644 data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json create mode 100644 data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json create mode 100644 data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json create mode 100644 data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json create mode 100644 data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json create mode 100644 data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json create mode 100644 data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json create mode 100644 data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json create mode 100644 data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json create mode 100644 data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json create mode 100644 data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json create mode 100644 data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json create mode 100644 data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json create mode 100644 data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json delete mode 100644 data/global-mmlu-lite/xAI/Grok-3-Mini/d3dd93e4-0cfe-4141-a835-3921fb80ed27.json delete mode 100644 data/global-mmlu-lite/xAI/Grok-4/96c76d71-942b-452b-919b-ad13bd1614d6.json create mode 100644 data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json create mode 100644 data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json delete mode 100644 data/hellaswag/eleutherai/pythia-1b-v0/a796664d-51fa-49b0-ae93-b446171f5521.json create mode 100644 data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/0d7928c3-c769-474e-8249-7a5c70c4c559.json delete mode 100644 data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/f63536ed-752b-4538-9b92-2514a617a4bf.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/8ff13de2-ea43-4392-992f-ba70b6023e96.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/02bac8a7-bd09-4e73-979a-7dbaa7a8ed75.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B/74e4406d-b2b6-4c3f-b059-f52cccf1fff4.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/ec8a6d6c-b8ea-48a3-9af6-d357e0057ec1.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-6B/05307b41-d832-4533-99bd-c8608bf8e64c.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/c09bd9b0-6f85-4120-94a9-b628c68bccb7.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/9f971385-1146-4436-91a6-0e52d4db1f07.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/80ed14ca-b4cd-4ceb-8fdb-24705e47bd0e.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-1.5-9B/db88e3f5-58a9-4783-9093-a6df96483342.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-34B-200K/8cd90f8a-d8dc-469b-95b9-260fcef804d2.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-34B-Chat/b2c82703-2b5c-407d-b84f-a8f8261ac894.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-34B/55462e67-5eca-4e9d-9095-51fcf12de5fa.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-6B-200K/25a119f0-5eaa-4fa9-8cd4-e0f437ada456.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-6B-Chat/efc036b6-d8de-4393-87a1-d4f86fb44d91.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-6B/a5144406-eb85-43b2-a49d-be6b06d6b04a.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-9B-200K/900184ad-656d-416b-956f-5f6e3a991d1b.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-9B/7a58954a-5d7d-4640-99fd-773249640237.json create mode 100644 data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/4ea3146c-b912-424a-b0a9-7c37348348c8.json delete mode 100644 data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json delete mode 100644 data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json create mode 100644 data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/b0276278-6d86-49c0-a246-cd9110ac1deb.json create mode 100644 data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi/04216f67-1385-43bf-b7de-5bae7a60f379.json create mode 100644 data/hfopenllm_v2/1024m/PHI-4-Hindi/fbf7b76b-7ced-4217-8e14-1d02184e271c.json create mode 100644 data/hfopenllm_v2/1024m/QWEN-14B-B100/74ac8aba-6dfb-464c-81b5-d02a9192b9cc.json create mode 100644 data/hfopenllm_v2/152334H/miqu-1-70b-sf/295938e1-ade2-4d36-beca-3cbe506b5b90.json delete mode 100644 data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json delete mode 100644 data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json create mode 100644 data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/f331782f-ea09-41bd-8c6a-e964c88d7e09.json delete mode 100644 data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json create mode 100644 data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/e4e3d79a-1de9-43be-a029-0be4f60e472b.json delete mode 100644 data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json create mode 100644 data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/6914ac28-b543-4f36-81f1-f7491c018e3b.json create mode 100644 data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/b7378f41-46ab-41af-94cc-e7fb10738658.json create mode 100644 data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/acedae59-6192-4ac4-a354-d520ecd6ba36.json create mode 100644 data/hfopenllm_v2/3rd-Degree-Burn/Llama-Squared-8B/ff105961-761d-4261-8a44-20acf2e7f440.json delete mode 100644 data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json create mode 100644 data/hfopenllm_v2/4season/final_model_test_v2/fa0901f6-514e-44ae-84dc-0b793f26169e.json delete mode 100644 data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json create mode 100644 data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/d2dff5df-343b-40f3-85de-14eb72dab050.json delete mode 100644 data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json create mode 100644 data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/8fa3010f-b7a1-4fc1-9156-ba70453add86.json create mode 100644 data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K-100steps/58034f99-3b01-46d6-aea9-90c75d073bb0.json create mode 100644 data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K/e6c08c9c-6d01-45c7-8a24-219b756b8632.json create mode 100644 data/hfopenllm_v2/AELLM/gemma-2-aeria-infinity-9b/cd97ad01-1d20-4cbd-a9bb-2acf3d9fdcc7.json create mode 100644 data/hfopenllm_v2/AELLM/gemma-2-lyco-infinity-9b/95f44ef8-e5ba-4bdc-97a7-2c5a678b07be.json create mode 100644 data/hfopenllm_v2/AGI-0/Art-v0-3B/082f25f0-994c-438a-8086-b1e439aca466.json delete mode 100644 data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json create mode 100644 data/hfopenllm_v2/AGI-0/Artificium-llama3.1-8B-001/31423cbd-08cd-4079-b1c5-ba412acf1b51.json create mode 100644 data/hfopenllm_v2/AGI-0/smartllama3.1-8B-001/2669bd86-da65-4d87-8464-bfa8c741ce0b.json delete mode 100644 data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json create mode 100644 data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/ab2c19ff-5671-446f-b09e-731e2ae515ca.json delete mode 100644 data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json create mode 100644 data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/36250dc3-cb51-43be-8ab0-6788eb5bda7c.json delete mode 100644 data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json create mode 100644 data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/cd616d6a-151f-4aaa-93b5-9c4a758f95b5.json create mode 100644 data/hfopenllm_v2/AI-Sweden-Models/gpt-sw3-40b/9cb09cae-9b1b-43b1-afbf-f44b0a44053c.json create mode 100644 data/hfopenllm_v2/AI4free/Dhanishtha/038c32da-add5-4299-ac17-df6ef3fdea58.json delete mode 100644 data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json create mode 100644 data/hfopenllm_v2/AI4free/t2/25eb4bdf-beb4-4ad2-a5e9-3a2f31c46cb5.json delete mode 100644 data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json delete mode 100644 data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json create mode 100644 data/hfopenllm_v2/AIDC-AI/Marco-o1/77655d60-872f-468a-acc6-d584ef5bf46a.json create mode 100644 data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/4de378c8-ccf6-4f0b-8287-3d138a8645b9.json delete mode 100644 data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json create mode 100644 data/hfopenllm_v2/Aashraf995/Gemma-Evo-10B/8039cadf-6644-44e7-8452-90e9c8069e28.json create mode 100644 data/hfopenllm_v2/Aashraf995/Qwen-Evo-7B/8914d89d-c873-4704-998e-dc807e96030b.json create mode 100644 data/hfopenllm_v2/Aashraf995/QwenStock-14B/c2e9fc29-db07-4b49-a98a-084158831ac4.json create mode 100644 data/hfopenllm_v2/AbacusResearch/Jallabi-34B/58724539-6fc5-40d9-ba43-87410959894d.json delete mode 100644 data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json delete mode 100644 data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json create mode 100644 data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/b13324cf-f6f5-4bf1-9cf3-c196120c4bcf.json delete mode 100644 data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json create mode 100644 data/hfopenllm_v2/Ahdoot/Test_StealthThinker/782b2df0-d1b3-414c-a4bd-59052a4441a9.json delete mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json create mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/b508e41e-0f1c-49ce-8b80-5e7ec82b8f15.json create mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/2824e8d4-2749-4b18-a3a1-b987ed215ac6.json delete mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json create mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/53176984-ba93-4a64-b81e-21f6e0f65bcd.json delete mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json create mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/53252698-7d17-4f2a-9106-3b744ae7a985.json delete mode 100644 data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json delete mode 100644 data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json create mode 100644 data/hfopenllm_v2/Alepach/notHumpback-M0/6dd0f3a2-27ee-48f1-9d97-ef6954d298c8.json delete mode 100644 data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json create mode 100644 data/hfopenllm_v2/Alepach/notHumpback-M1-v2/35f11d5e-88c4-4a95-8d06-a40bee648b00.json delete mode 100644 data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json create mode 100644 data/hfopenllm_v2/Alepach/notHumpback-M1/ba1193c0-42b8-487d-b9fd-ddbc1fd15359.json delete mode 100644 data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json create mode 100644 data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/95733620-e1e7-4442-b9c3-a699165df5e7.json create mode 100644 data/hfopenllm_v2/Alsebay/Qwen2.5-7B-test-novelist/cacfce0d-f5f1-4101-8065-f5f02eaab1fb.json create mode 100644 data/hfopenllm_v2/Amaorynho/BBAI2006/72be5537-198a-43e9-9840-a803083158d3.json delete mode 100644 data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json delete mode 100644 data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json create mode 100644 data/hfopenllm_v2/Amaorynho/BBAI270V4/2e9a3443-970d-4f37-a356-277a11c81754.json create mode 100644 data/hfopenllm_v2/Amaorynho/BBAIIFEV1/1188402f-aa1c-4306-b031-c92ff0a5dd64.json delete mode 100644 data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json delete mode 100644 data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json create mode 100644 data/hfopenllm_v2/Amaorynho/BBAI_375/ee2f567a-6403-46d5-9a6b-bd029f81d660.json delete mode 100644 data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json create mode 100644 data/hfopenllm_v2/Amu/t1-1.5B/d809fdff-f5ff-44f5-afc7-7e8af9ce2f93.json create mode 100644 data/hfopenllm_v2/Amu/t1-3B/87d66efc-173f-4c14-b76c-d8b7e00d575d.json delete mode 100644 data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json create mode 100644 data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/47f62378-c3cc-408f-a0d1-71eb3f522f57.json delete mode 100644 data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json create mode 100644 data/hfopenllm_v2/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/dba8c12c-388d-4f8b-8ce8-83acfc4920c7.json delete mode 100644 data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json create mode 100644 data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/e4087285-1d1a-465e-ac88-91310e939710.json create mode 100644 data/hfopenllm_v2/Artples/L-MChat-7b/09f189d9-74fd-47bb-b5fb-7994cba56ae2.json delete mode 100644 data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json delete mode 100644 data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json create mode 100644 data/hfopenllm_v2/Artples/L-MChat-Small/5754c262-6ddf-4f54-9722-22ff20a8d76f.json create mode 100644 data/hfopenllm_v2/Aryanne/QwentileSwap/cc1bd811-ec88-4514-8b47-4140ded4f03d.json create mode 100644 data/hfopenllm_v2/Aryanne/SHBA/3f08155d-8551-4472-86fe-7988cd6df78b.json delete mode 100644 data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json create mode 100644 data/hfopenllm_v2/Aryanne/SuperHeart/339e12fb-b4a4-4a4b-bb40-899b4ad833f9.json delete mode 100644 data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json create mode 100644 data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4fd60e9c-5c90-492a-b24d-7ca6d1e91eae.json create mode 100644 data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/7f8d935e-3782-4769-8bd0-ee8a0ce91cd6.json delete mode 100644 data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json create mode 100644 data/hfopenllm_v2/Ateron/Glowing-Forest-12B/6fa07e60-9f82-4abc-aa45-4dfc0bcf9b8d.json create mode 100644 data/hfopenllm_v2/Ateron/Lotus-Magpic/99a0022b-3fe7-4612-9cbb-cf082c1f6b70.json delete mode 100644 data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json delete mode 100644 data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json create mode 100644 data/hfopenllm_v2/Ateron/Way_of_MagPicaro/b1153714-d6fe-4ff9-ab8c-85b677d57f8f.json delete mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json create mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-4B/c3d39b6c-02af-410d-8a5c-224495b04572.json create mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-8B/0426fcba-3db4-492d-b622-e34ab8d3fc8f.json delete mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json delete mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json create mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/aa099cfe-ac9a-42dd-8357-f4d8115133ca.json delete mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json create mode 100644 data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/ccbc8a5e-9a97-452a-b023-cc996ffe31f1.json create mode 100644 data/hfopenllm_v2/Aurel9/testmerge-7b/b359a7a3-cf2c-4952-b308-333672dadcec.json delete mode 100644 data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json create mode 100644 data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/0864d5cf-d6fe-42bc-9059-9f2e5ff06b60.json delete mode 100644 data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json delete mode 100644 data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json create mode 100644 data/hfopenllm_v2/Azure99/Blossom-V6-14B/e6ef2559-8a63-43e3-a60b-0d2b7256ad3d.json delete mode 100644 data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json create mode 100644 data/hfopenllm_v2/Azure99/Blossom-V6-7B/45d019ab-b23c-4fc3-baf5-d57576e9945c.json delete mode 100644 data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json create mode 100644 data/hfopenllm_v2/Azure99/blossom-v5-32b/e3cd7c32-e5a1-4cd6-a9dc-95364a8abe75.json create mode 100644 data/hfopenllm_v2/Azure99/blossom-v5-llama3-8b/9be442e8-4b77-43e0-a981-887338e59b78.json create mode 100644 data/hfopenllm_v2/Azure99/blossom-v5.1-34b/a07b6326-f393-490e-b696-d8b45f593d4b.json delete mode 100644 data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json delete mode 100644 data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json create mode 100644 data/hfopenllm_v2/Azure99/blossom-v5.1-9b/b66ed91a-98d5-407c-9896-9c2e2a31e9da.json create mode 100644 data/hfopenllm_v2/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/9c70921d-956b-4727-9201-1addbd01bb8b.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/4ba6d51e-314a-4db4-9552-568a4093e01a.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/835f5056-56bf-4a6c-886f-fbe6f263ac07.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/c2a63afa-9d25-41dc-b25f-848f5a640501.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/f64f9d24-e448-4bb6-89c3-edb66499bac9.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/2de14bfb-844a-4711-815e-8f63487a78fd.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/f953e0e2-ddca-42a2-a0f6-752a137bc6b5.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/98187b98-0cc8-4756-9cb7-c53deb998f90.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/8c79c60d-ebf4-4409-be4f-928a54cedd1d.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/5d5cebeb-faf0-4fdf-8749-6307080e82f2.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/e926ce8f-45bb-4f3d-b579-ecadb3df6468.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/070609d6-5f41-4712-9ad7-e215b1a6bb81.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json delete mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json create mode 100644 data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/8d2909c7-37f2-4198-a1e2-4bf2ebc1444d.json create mode 100644 data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/53587959-25f9-43aa-a34b-f274d8bc93af.json delete mode 100644 data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/Meta-Llama-3-8Bee/2a7f80ed-d404-4c81-b000-b65c83069121.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/smol_llama-101M-GQA/f0983645-4adb-4ddb-bf2f-33480cb7f421.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/161dadfe-4983-4f56-8a7d-9b97f1c5a3c7.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA/694a02f9-4729-4d0b-97ce-80adaef29be2.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-openhermes/0521f51d-22c1-4821-8f04-23c533411668.json delete mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/8fdea71b-5e68-4a78-aefc-8a00650464c4.json delete mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/e2ba5674-9251-4a4e-9eb8-046c834da400.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/4caafdb2-3065-40d4-b5a7-9deb41e1d8a7.json delete mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json create mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/886e0b8b-b2dc-434f-a299-50f668006241.json delete mode 100644 data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json delete mode 100644 data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json create mode 100644 data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/7a6a9443-f331-4dfa-acf9-6aa30049bade.json delete mode 100644 data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json create mode 100644 data/hfopenllm_v2/BSC-LT/salamandra-7b/6d523da4-ec4a-405b-a25d-afc7b1b5aefd.json create mode 100644 data/hfopenllm_v2/Ba2han/Llama-Phi-3_DoRA/cfecfce3-090d-4c2e-826c-03c0c5337e98.json create mode 100644 data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/5aa124dc-4abd-4c5f-b40a-a8d81af922eb.json delete mode 100644 data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json delete mode 100644 data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json create mode 100644 data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ec91b122-c8f5-4dfb-94fd-336ef78c3e14.json create mode 100644 data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/114f246a-6049-40bf-ad86-9a822d13cf74.json delete mode 100644 data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json delete mode 100644 data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/82d28a3a-44f2-463f-a1b8-7e9079ec47b7.json delete mode 100644 data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/ed3c1349-a154-4866-890f-2b115ffaf127.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/47942c55-5ddb-4fda-9c5b-34676ae2046a.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Neos-Gemma-2-9b/d860210b-4c8a-4d15-ad3a-4e39905f91ed.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-8B/d137f429-2b65-4ee9-9d66-3f619b270fad.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-base/1da10dfe-b0a3-4cb8-aaa3-e16d48f3aab4.json create mode 100644 data/hfopenllm_v2/BlackBeenie/Neos-Phi-3-14B-v0.1/6156a0d2-4c32-40b2-9624-ef0c7a6a95bb.json create mode 100644 data/hfopenllm_v2/BlackBeenie/llama-3-luminous-merged/676342d2-f37a-4b6a-967d-3ac750243470.json create mode 100644 data/hfopenllm_v2/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/950b7108-0192-4875-b4e9-c3e43ab71e08.json create mode 100644 data/hfopenllm_v2/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/85672df5-2f35-43be-8648-9937c66872dc.json create mode 100644 data/hfopenllm_v2/BoltMonkey/DreadMix/051c5642-3b23-4879-9d10-639d1b3127d7.json delete mode 100644 data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json create mode 100644 data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/2acf0d12-7e0c-46dc-a079-ebc48a8818d3.json create mode 100644 data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/8ce42090-006e-4e08-8d3f-5b1eb0b8da0b.json delete mode 100644 data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json delete mode 100644 data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json delete mode 100644 data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json create mode 100644 data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/703df6c3-dae4-437f-9379-f8c264797adc.json create mode 100644 data/hfopenllm_v2/BrainWave-ML/llama3.2-3B-maths-orpo/1e349ad3-d29b-4a4b-97e7-b82055e41b07.json create mode 100644 data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/8f677a76-932c-4c35-9708-4b723226aa19.json delete mode 100644 data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json create mode 100644 data/hfopenllm_v2/BramVanroy/fietje-2-chat/ebfe625f-ff1f-45f9-826c-9351ea4134e1.json delete mode 100644 data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json delete mode 100644 data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json create mode 100644 data/hfopenllm_v2/BramVanroy/fietje-2-instruct/66e6a757-ac22-47f3-82ce-81af45e1d3cf.json create mode 100644 data/hfopenllm_v2/BramVanroy/fietje-2/1cd840c7-d432-495c-a3df-af1fa6264259.json delete mode 100644 data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json create mode 100644 data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-base/066f520f-9a64-4564-abfc-6435732c3585.json create mode 100644 data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/aced5181-040a-48c0-bc5f-78d0de3afae8.json delete mode 100644 data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json delete mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json create mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/a4889a38-84d2-4ae1-b8a9-297b4400602d.json delete mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json create mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/d540505a-c67b-4b72-a53a-c03aa6f8d3e7.json create mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/9859afee-02ca-4c48-acc8-acfd20c37e4e.json delete mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json create mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/e222d12b-c796-4890-a584-cd689bae7ea6.json delete mode 100644 data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json delete mode 100644 data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json create mode 100644 data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/c16850f8-0b80-4455-8f38-8ec453cd1d41.json create mode 100644 data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/0d400b0f-cc82-4c86-b600-93a31b133f9d.json delete mode 100644 data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json create mode 100644 data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/90f6f8f1-02fc-425a-8499-e9b43ae8ac59.json delete mode 100644 data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json create mode 100644 data/hfopenllm_v2/CausalLM/14B/6704d6bc-6d38-4c59-87a4-81d3eacde3b1.json delete mode 100644 data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json delete mode 100644 data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json create mode 100644 data/hfopenllm_v2/CausalLM/34b-beta/e8ad6ce4-7efc-499e-a2c9-9e0df898fbb9.json create mode 100644 data/hfopenllm_v2/CausalLM/preview-1-hf/5e9c1273-536d-4280-8fff-9931f46dc968.json delete mode 100644 data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json create mode 100644 data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/460ca160-ac34-4091-ba2d-986b53532b55.json delete mode 100644 data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json delete mode 100644 data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json create mode 100644 data/hfopenllm_v2/Changgil/K2S3-v0.1/ef9d2fab-07a2-44e2-aae2-ede5a2ff31d9.json delete mode 100644 data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json create mode 100644 data/hfopenllm_v2/ClaudioItaly/Albacus/a29a69d3-d64e-4463-aa52-0a9d6d012c98.json create mode 100644 data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/4539c16e-1ac6-47f4-88eb-a09842497330.json delete mode 100644 data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json create mode 100644 data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/2ff33c55-1236-4c57-8809-2d3076e43cc7.json delete mode 100644 data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json create mode 100644 data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/281ba822-49a2-4746-bc04-8de046439508.json delete mode 100644 data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json create mode 100644 data/hfopenllm_v2/CohereForAI/aya-23-35B/0606d916-95ea-4318-af0c-3942329071c6.json delete mode 100644 data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json create mode 100644 data/hfopenllm_v2/CohereForAI/aya-23-8B/005159f0-da68-480d-972c-c160d145a682.json delete mode 100644 data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json create mode 100644 data/hfopenllm_v2/CohereForAI/aya-expanse-32b/2f6abb5d-52b3-44b0-b960-115793485fb1.json delete mode 100644 data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json delete mode 100644 data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json create mode 100644 data/hfopenllm_v2/CohereForAI/aya-expanse-8b/6ffacad9-1a4d-472e-bbbf-0d64d068dd0d.json create mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/26eadaf8-bfb8-4aad-a8a4-90699b6f0fcd.json delete mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json delete mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json create mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/d4536913-5708-45e4-a024-45ae37fdae13.json create mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/848860aa-7de3-4fae-afca-ac11224b96c5.json delete mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json create mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/0241a8e3-d6e5-4ba5-afb9-862bde2ba851.json delete mode 100644 data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/20b69120-d476-4e34-b3c6-8cef11d6ee78.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/696bbbfc-49dd-444e-a90b-76821845a726.json delete mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json delete mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/e6d974d3-467e-4fe7-bd84-79fc7c72cde2.json delete mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/b26ba2b7-1365-4b1c-a1be-35d588e02d36.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/64bd755d-ba4b-4559-ad8e-f56c697b1ae6.json delete mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c4e572cb-1d12-4baf-a4d8-a55422692207.json delete mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json create mode 100644 data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/c6123e10-b1f9-49dc-888b-083881e6ef09.json create mode 100644 data/hfopenllm_v2/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/e1647f10-fec5-463d-b8e5-6b2b880bd687.json create mode 100644 data/hfopenllm_v2/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/6d5fa235-8d69-456e-9f23-0f702760baf4.json delete mode 100644 data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json create mode 100644 data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/e8709a6a-a2b8-4b09-9342-d1aeae89de1f.json create mode 100644 data/hfopenllm_v2/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/603e95c9-7e7f-4892-93f7-92f92b256865.json create mode 100644 data/hfopenllm_v2/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/3e2fd38a-186e-49aa-915c-7eb3cde50562.json create mode 100644 data/hfopenllm_v2/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/16d55e66-9015-4d72-81e4-3f14c42b0368.json delete mode 100644 data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json create mode 100644 data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/696644b9-bd40-4047-bb85-0cb19510a96c.json create mode 100644 data/hfopenllm_v2/ContactDoctor/Bio-Medical-Llama-3-8B/cbae8c39-0aec-4859-98bc-3b2d065833ad.json create mode 100644 data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge2/15fb3cc7-1ba5-4ba5-ba02-8e8a9d2029d0.json create mode 100644 data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge3/357f6051-b880-48bb-8e68-e4b0a7a0cbcc.json create mode 100644 data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme/a50a542b-668e-47b1-a37e-805a58eea3d1.json create mode 100644 data/hfopenllm_v2/Corianas/Neural-Mistral-7B/00f7bd51-0b31-446d-be8c-1e0dc0d82e54.json create mode 100644 data/hfopenllm_v2/Corianas/Quokka_2.7b/26782941-b918-44c5-a7f6-5f770e47c3d6.json delete mode 100644 data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json create mode 100644 data/hfopenllm_v2/Corianas/llama-3-reactor/5547ddaf-8fbb-4259-8b88-e946fc3d2404.json delete mode 100644 data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json create mode 100644 data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/bee5ea59-b97a-4783-b763-b6bd432d4558.json create mode 100644 data/hfopenllm_v2/Cran-May/SCE-2-24B/8150333f-8e79-4230-af8b-7ddb1d5eeb21.json delete mode 100644 data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json delete mode 100644 data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json create mode 100644 data/hfopenllm_v2/Cran-May/SCE-3-24B/be8510a9-ecd4-4ac7-9930-3200cacb7b50.json create mode 100644 data/hfopenllm_v2/Cran-May/T.E-8.1/887e4574-f876-4e75-afb8-e543bcb30020.json delete mode 100644 data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json delete mode 100644 data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json create mode 100644 data/hfopenllm_v2/Cran-May/merge_model_20250308_2/fd21d8bd-28cf-4b91-8075-c38a61f5f32a.json delete mode 100644 data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json create mode 100644 data/hfopenllm_v2/Cran-May/merge_model_20250308_3/c0f05e38-6592-478a-9c46-26567f24ff85.json create mode 100644 data/hfopenllm_v2/Cran-May/merge_model_20250308_4/06cc2913-8e05-44bf-a128-9a7c4aeff536.json delete mode 100644 data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json delete mode 100644 data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json create mode 100644 data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/86368d5b-0509-4b52-b988-58bcf7e1043e.json create mode 100644 data/hfopenllm_v2/CreitinGameplays/Llama-3.1-8B-R1-v0.1/77b89fe6-464b-4017-a77f-8750e2668a82.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Broca/d2e47d86-23dd-4c95-a7fb-99518615d09f.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-BrocaV9/0a09891e-ac97-4c3a-8364-7106a851f1a8.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav3/eb41fe62-ac46-4630-bb2d-6b907f271737.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav6/d540a6c8-e9ec-4413-b9d2-dee68533c377.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav7/5b1f413a-05c4-43be-bdbc-9de5728e8d0a.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emerged/6701738c-27e4-4bbd-b614-fbc297c3164f.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emergedv3/7f4563b4-0b25-49e7-ac1c-afaa28b0eda2.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-FinalMerge/32b6e4af-69ba-49b7-9367-dfafe3e390e8.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyper/e16deaf7-da55-40ba-ac18-860fa3f14d34.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-HyperMarck-dl/8a7a5886-0618-4615-9cdf-46f5d19a29fe.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv3/66d18e5b-9ebc-4ab6-94fb-6d5c23c58672.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv4/a36aaaf6-2478-4b98-ad0c-2b06ddb8c308.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv5/4a6237a7-019c-4310-971e-84b08d1b5067.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-MegaMerge-pt2/996e781e-5939-41ac-b347-95c99037c34a.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-MergeStock/e880fa0e-ae49-4398-91bd-eadf8695425f.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-ReasoningMerge/da04ff51-fbeb-41a8-ae5e-8ddf5925b792.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Ultimav2/6d709396-1ae1-4e5c-a03c-13c1e9425202.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Unity/5b616df9-e15a-4f84-98b4-c2cb532c1b95.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/0f6552d9-3cbe-447e-909b-068e5ceed4c9.json delete mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SLERP/2861aae0-d2ec-48f5-bd20-9e7bcaf8dabd.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke/51a64f37-256c-4fe7-b28c-6117520f04ec.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernickev3/03ce9c1d-38e8-4a6c-b293-57428a9d7c0e.json create mode 100644 data/hfopenllm_v2/CultriX/Qwen2.5-14B-partialmergept1/3b0f5dea-db9b-4657-9807-6b3e56d38823.json create mode 100644 data/hfopenllm_v2/CultriX/Qwenfinity-2.5-14B/2d19e9ff-e331-4171-ae90-47e44f3f8885.json create mode 100644 data/hfopenllm_v2/CultriX/Qwestion-14B/6bfb8b24-1abd-405b-b01d-7d7111705dbb.json delete mode 100644 data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMerge/c83e6b6c-c8be-4d97-9c65-2d883f88f37f.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMergev1/72569796-1b11-48cc-ada7-e8c09522dd54.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14B-v5/58403e30-bd2b-4f4c-ad41-daa890c77d40.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14B/eb8e1f1d-c6b3-407c-b172-d240553d2f89.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14Bv1/356d75a0-6520-46c1-afa9-7dbb2596a5c1.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14Bv2/78681e0c-5fe2-4920-af7b-99345cea3efe.json create mode 100644 data/hfopenllm_v2/CultriX/SeQwence-14Bv3/ba0ee5b4-070a-461d-a3d2-cd4036387cc9.json create mode 100644 data/hfopenllm_v2/DRXD1000/Atlas-7B/17d0d377-bca4-411c-be11-6c5cfce07798.json delete mode 100644 data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json delete mode 100644 data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json create mode 100644 data/hfopenllm_v2/DRXD1000/Phoenix-7B/d01a56a1-1eb9-4ccf-8c09-348b6ba5480b.json create mode 100644 data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/389821ff-d8e2-4d1d-8fb2-57a689867ac5.json delete mode 100644 data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json delete mode 100644 data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json create mode 100644 data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/7913f782-29b0-48bd-bc62-37da9a5ac7d9.json delete mode 100644 data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json create mode 100644 data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/b0930974-999e-4372-9d21-b9790e0bad4c.json delete mode 100644 data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json create mode 100644 data/hfopenllm_v2/Daemontatox/AetherSett/8265f577-f504-4a56-9cf0-42c34766559a.json create mode 100644 data/hfopenllm_v2/Daemontatox/AetherTOT/82044cd2-1a46-406e-bc68-397ce41b29ea.json delete mode 100644 data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json create mode 100644 data/hfopenllm_v2/Daemontatox/AetherTOT/de09e323-8cf1-4aa9-9537-e8ad30a8c297.json delete mode 100644 data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json delete mode 100644 data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json create mode 100644 data/hfopenllm_v2/Daemontatox/AetherUncensored/bfe543b4-ec38-488e-ae04-125cd358b61f.json delete mode 100644 data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json create mode 100644 data/hfopenllm_v2/Daemontatox/Cogito-MIS/be36d8ae-b81c-4b4e-aa2f-5999c7582237.json create mode 100644 data/hfopenllm_v2/Daemontatox/CogitoDistil/342b435f-89e9-48ad-ab0f-2c1f52f4571a.json delete mode 100644 data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json delete mode 100644 data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json create mode 100644 data/hfopenllm_v2/Daemontatox/CogitoZ/b0c8737d-d838-4da1-909b-b218e22119dc.json delete mode 100644 data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json create mode 100644 data/hfopenllm_v2/Daemontatox/CogitoZ14/4cd40f28-842f-44d5-9eb2-86238077fc55.json create mode 100644 data/hfopenllm_v2/Daemontatox/DocumentCogito/0758051c-2d75-402e-af0e-769096cbb17c.json delete mode 100644 data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json delete mode 100644 data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json create mode 100644 data/hfopenllm_v2/Daemontatox/DocumentCogito/c93f610b-fb97-4ad1-b8af-fc41c6d8da33.json create mode 100644 data/hfopenllm_v2/Daemontatox/Llama3.3-70B-CogniLink/b8467118-d895-41fa-81c7-89892e1844d5.json create mode 100644 data/hfopenllm_v2/Daemontatox/Llama_cot/30d867bb-63c6-48d1-8d43-6c24f4cf44ba.json delete mode 100644 data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json create mode 100644 data/hfopenllm_v2/Daemontatox/MawaredT1/89b92cda-c5b6-45ed-a534-361c9d34794a.json create mode 100644 data/hfopenllm_v2/Daemontatox/Mini_QwQ/48cdf76a-886d-41ec-8580-00ed4232b601.json delete mode 100644 data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json create mode 100644 data/hfopenllm_v2/Daemontatox/NemoR/116272d4-d25d-49cb-80cb-ff26a0fb3cf4.json delete mode 100644 data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json delete mode 100644 data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json create mode 100644 data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/bb103828-70fe-4767-9302-6750d839129e.json create mode 100644 data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/7b58ab54-239b-4e49-93f1-c3940df61474.json delete mode 100644 data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json delete mode 100644 data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json create mode 100644 data/hfopenllm_v2/Daemontatox/PathfinderAI/559067a2-816c-4091-893e-b1c7860171ec.json delete mode 100644 data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json create mode 100644 data/hfopenllm_v2/Daemontatox/PathfinderAI/ec502619-880b-4b7c-acfe-c43cf6514e3f.json create mode 100644 data/hfopenllm_v2/Daemontatox/Phi-4-COT/6941a5dd-2a70-4846-a5f6-b16ef2d56a03.json delete mode 100644 data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json create mode 100644 data/hfopenllm_v2/Daemontatox/PixelParse_AI/636e2f93-3242-491c-9df5-003aa1dacecf.json create mode 100644 data/hfopenllm_v2/Daemontatox/RA2.0/1f4efa23-816d-49be-8659-feb003f4b3ef.json delete mode 100644 data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json delete mode 100644 data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json create mode 100644 data/hfopenllm_v2/Daemontatox/RA_Reasoner/d05be1e4-bcac-4b4a-bbde-8b17a5a71243.json delete mode 100644 data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json create mode 100644 data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/9ab53055-86f5-4a88-976f-015dd9c9e832.json delete mode 100644 data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json create mode 100644 data/hfopenllm_v2/Daemontatox/ReasonTest/ba34083a-9b13-46d9-8f36-aa3ddd586711.json create mode 100644 data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/6a39d734-ad73-4c4a-9583-3563e336d4b3.json delete mode 100644 data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json create mode 100644 data/hfopenllm_v2/Daemontatox/SphinX/2af71e88-4931-4359-b92a-c64fa33df802.json create mode 100644 data/hfopenllm_v2/Daemontatox/Sphinx2.0/bf9336a7-a7c4-420a-9dd0-68d8e0c815c4.json create mode 100644 data/hfopenllm_v2/Daemontatox/TinySphinx/2de872b2-10c7-44dd-91c3-f20205207da6.json create mode 100644 data/hfopenllm_v2/Daemontatox/TinySphinx2.0/5cabed09-d8ea-46c2-bb78-012dac954d6b.json delete mode 100644 data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json create mode 100644 data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/8236db6a-ff8a-4237-af5a-03bb258f8e59.json create mode 100644 data/hfopenllm_v2/Daemontatox/Zirel_1.5/1a7b078e-bc1f-400f-a0cd-f7b535548f23.json delete mode 100644 data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json delete mode 100644 data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json create mode 100644 data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/fdaf561c-567c-416d-a74a-ac3c07c5be5b.json create mode 100644 data/hfopenllm_v2/Daemontatox/mini_Pathfinder/58900b3b-303b-49c8-b807-7b8d06601568.json delete mode 100644 data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json delete mode 100644 data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json create mode 100644 data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/7ac5a45a-7b41-4f63-8556-8737638a00ea.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-10b/3cb55475-30c8-43c8-8d7d-394450fdc117.json delete mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f5e140ff-0c0e-4769-8116-63cf50255773.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4-v2/df85ec6e-1325-40ce-8087-d960a1d767dd.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4/a7bd3fff-f01e-46ca-af85-5b4ac6ae7320.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/11842dd9-0572-41ef-aaa0-8d19f3420efc.json delete mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-Llama3-8b-ORPO/01abccec-1cea-4060-89be-289987d0a2ce.json create mode 100644 data/hfopenllm_v2/Danielbrdz/Barcenas-R1-Qwen-1.5b/dce8226c-57bd-4255-b813-8a70494f0a1a.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/7f80e69c-eec6-49ac-a088-6248ee25f736.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/e0267a2c-dfc5-456e-864d-b5b0ad1fa508.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/e6ad37be-28f4-43b4-9df1-b7b47d31232e.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/5514368a-1f7d-4cd0-b7f7-d116b753f975.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/c0e29cf8-897f-4e07-abb4-71c801d34301.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/68310379-65b2-482d-892b-f76547bce2b0.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json delete mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/a034c4ec-d4cd-439b-8dbd-e67685ea7616.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/e4b761d3-bb84-4433-b9fb-4c92ecae6279.json create mode 100644 data/hfopenllm_v2/Dans-DiscountModels/mistral-7b-test-merged/38d78d30-be6d-476c-a3aa-d9a40f570a56.json create mode 100644 data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/36e60f6c-60f7-4b17-88fe-82810e195fc7.json delete mode 100644 data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/a6c647e8-ed24-4150-8563-dd9b20e21498.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/b5a366ac-d736-4447-a2f1-98d0b84ba3bd.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/5d098dc6-8124-4d26-86ec-d54e6e09c3a6.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1137cbc4-d80b-4e21-bfeb-feab41dc80b2.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/097bbfbc-0ccd-4fd4-9e0c-9c192cba9e8b.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/db8c6169-bfc1-48bb-be53-fa93c673f051.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/41437fc9-6d48-4317-a8de-ab4e63b2cf46.json create mode 100644 data/hfopenllm_v2/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/e075f4fe-95e0-48f4-94c4-f6ebd3f4edaa.json create mode 100644 data/hfopenllm_v2/DavidAU/Gemma-The-Writer-9B/3349d66c-e12b-49c1-a406-e0e77b697458.json create mode 100644 data/hfopenllm_v2/DavidAU/Gemma-The-Writer-DEADLINE-10B/7aa0ff6b-11a9-4554-a27f-e477a0ff77c7.json create mode 100644 data/hfopenllm_v2/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/ac749485-df6d-485e-8fa7-63bdfd744167.json create mode 100644 data/hfopenllm_v2/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/54363a4b-312b-4035-a1c3-b5321311cec4.json create mode 100644 data/hfopenllm_v2/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/aa9e2b9e-cd25-4492-9801-eba7d40b4365.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/c6b484b8-f6f3-4516-aff5-c2f6438c9047.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/c6c760c9-a345-4e25-b333-b403bf6db389.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/65b2aa58-2c04-48f2-9ea3-c8fd97cb9dde.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/92903344-0dde-4f5a-a7d2-749a1ffe9cd3.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/59ddd478-c1cd-4bd8-80c3-fdebe762414a.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/02f63fc6-9376-4fb5-b067-63493238cc27.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json create mode 100644 data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/dd7597fd-27f5-4e77-a44f-b01d0db82719.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json create mode 100644 data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/20cd0d60-eb0d-41bd-b37f-910a03dd7f82.json delete mode 100644 data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json create mode 100644 data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/c4e9d045-3769-4828-a2ca-7fa508873089.json create mode 100644 data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/0a0501ec-4ecd-47c1-914b-d473f795cef2.json create mode 100644 data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/beca755f-203f-4bc8-b5cf-f9a9e3f8bd8f.json create mode 100644 data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/79e1e1c6-cbe0-43a9-a593-8e2119baaf77.json delete mode 100644 data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json create mode 100644 data/hfopenllm_v2/Davidsv/SUONG-1/def80b44-3d9a-46ba-bf5f-ffc81e50af2e.json create mode 100644 data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/5e1aa809-ef20-445e-a05b-eccd585d5991.json create mode 100644 data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/7c2be651-ca56-4285-afc7-1bfe1c8ce11e.json create mode 100644 data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter1/cfe4ea72-ddb9-49b5-9599-99f215e112e5.json create mode 100644 data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter2/81d63d8e-88dd-4b16-b9b8-d07604878f8f.json create mode 100644 data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/81f8208b-f7e7-4685-bb84-321d9e097470.json create mode 100644 data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/a0c9a434-9b8c-47c5-b511-9daac7901686.json delete mode 100644 data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json create mode 100644 data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/28b60eae-1b38-4404-8db1-3fb2997583f4.json create mode 100644 data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/746862a2-a90c-4612-91d0-f989b9eed1a5.json delete mode 100644 data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json delete mode 100644 data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json create mode 100644 data/hfopenllm_v2/Deci/DeciLM-7B-instruct/715ee057-9c9a-4e04-991c-7040b1eef65b.json create mode 100644 data/hfopenllm_v2/Deci/DeciLM-7B/4dc1d103-3458-4b8c-9e63-b98effd69667.json delete mode 100644 data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.1-8B-Inst/070ff2a5-9a5d-48cf-8517-1ad9b6642d59.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst/8406a5b8-a87d-489b-b75b-00e9f675f09f.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/11e8f9b6-32ab-4b83-a601-e5644c0b2c39.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/6b542f5a-ea62-45ce-8e98-436a4d058877.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/9b280640-bfee-4730-acc3-386a54b2434c.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/causal_gpt2/eff5171b-6119-4013-8aa8-8a4f0215b045.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/471c5fed-f155-4521-9d9c-b5370ca91bec.json delete mode 100644 data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2/690be099-3ace-484f-b01f-2fe6b324d12a.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2_v1/71fbd15f-5eec-40d9-84e8-07323f3ffac6.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/eb93dd3e-3d13-4234-bb66-f6177648aa2b.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/f7ec1ed7-cc30-4879-8ab1-4909011553d5.json delete mode 100644 data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json create mode 100644 data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/3e100704-dbd3-4d05-b325-5bb4bc90e51c.json delete mode 100644 data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json create mode 100644 data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/12f003ef-1098-4d3f-aed7-7343034157bc.json delete mode 100644 data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json create mode 100644 data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/9de2e564-3a30-4f1c-80da-6432a245a64f.json delete mode 100644 data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json delete mode 100644 data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json create mode 100644 data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/dd5aaa3f-b24b-4a5b-852b-b80f4a6bf366.json create mode 100644 data/hfopenllm_v2/DeepMount00/Llama-3-8b-Ita/8d8b9fd2-43f6-4edc-8340-44d20824a7e7.json create mode 100644 data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/7fe45c20-a2c0-4acf-9425-651a1ec3b0d0.json create mode 100644 data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/baf93ef6-56f3-4809-93f6-32dcf4730388.json create mode 100644 data/hfopenllm_v2/DeepMount00/Llama-3.1-Distilled/f6df14bd-207c-4fea-b789-c9f9aef749b3.json create mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita/97766a7f-cf5b-46ae-b51e-5c5702ae000b.json create mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v2/d5cd2a1b-3def-4b33-a8fe-4b02e090db27.json create mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v3/275d4bf0-566c-4b50-86b9-38c7f45df143.json create mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v5/aa504db9-81f3-424f-b7d9-683ebe31f5d8.json create mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v6/2cc209b7-ef10-435d-a840-b904ab741491.json create mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/9b9390ac-fd65-4a58-9834-5352aa340cdc.json delete mode 100644 data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json delete mode 100644 data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json create mode 100644 data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/4efe5cd4-6b8a-4951-a63a-4c7dc390bbec.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Baldur-8B/4bc5a0db-1c88-4c61-9343-1d340305ecc5.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/74527f51-dcec-4b82-8ba8-075c933404f5.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Control-8B/ac31bc90-3854-4d38-925d-ef8dc7e75d24.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Darkens-8B/88583cff-1adc-4b1b-8e68-07f0074d0ae2.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/fadbac9e-7224-41d1-abfa-7039cbcba9f6.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Odin-9B/1fb90540-0fa0-44ca-ad67-1e3503f6b729.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json create mode 100644 data/hfopenllm_v2/Delta-Vector/Tor-8B/047784e2-c1ee-40d9-a60d-e43504825801.json delete mode 100644 data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json create mode 100644 data/hfopenllm_v2/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/ee60453d-2d51-46f7-8a18-c651d590f0e7.json create mode 100644 data/hfopenllm_v2/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b0ac4b11-f7b4-4753-baae-310a92f08259.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/324db8b3-38c7-4a2c-82e8-7bebfa38e760.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/54dd9033-61b9-4f26-9cde-e04c7136524b.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/d0973d6c-373c-41cd-9e62-52470c044dac.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/da15da67-b316-4c2e-86a5-c1f88eece9cb.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/b0c34174-bfd0-4556-a3bf-92ec0ddf5ec4.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/bce7b15d-1670-46db-bdff-24fb38bc3fd9.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/15e5e02f-27b9-4063-b601-42c2b17180f9.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/51b0c546-0dde-4668-a8b8-3b9753a31aa0.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/45842b1c-cf68-44a7-928f-2da454cdd13f.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/c15cdefd-dbe3-432e-aab0-3c43540cd320.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/1f489afa-a01d-40f3-836a-9e386c502d1d.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/94bcc87e-eb06-4321-9b72-2f99168cf92a.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/c0bc9811-4d7c-412f-a12b-3e6eab2e5a6f.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/b5a8b278-69e9-41ba-89ee-8fd6b2d90a1c.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/a3ad7f0f-64bd-42a1-bc7d-d7d4cbbd80fd.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/f07c3a4a-2a8e-45c4-a726-be95726df2db.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/f36d56b8-cd77-4d69-a51d-39025bcfcdfd.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/65acabdc-ea5f-426c-820b-2b79f2b20b44.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/96b00cfa-1383-4b36-a043-17eb39678ffc.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/3b8a796e-6bde-4506-8335-bd3cc72482e1.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/a93e99e2-ca13-4cdc-9904-7ae5cc82c623.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/65d9e237-2757-459e-94e7-e382213e4eeb.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/c3f44524-4c75-4cd0-9f5d-79c8b08f6f77.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/2e7d3674-d0b0-4b87-8bd8-8202114b7665.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/30d21295-beb1-4179-8c6f-7bac79b29474.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e2fc95de-b9d9-4043-b55c-aa2819d4f52f.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/7fbd7f97-baf9-4acd-ba0c-90ffbf0c47a5.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/336effcd-d8fc-4477-846f-70fc40bdc111.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/28f87820-d587-498e-b713-7c0af0cdc324.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json delete mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json create mode 100644 data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/f1b671ab-ebb3-43ec-86fa-832982d04cc1.json create mode 100644 data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/327cde83-d107-4455-bc03-7e03026c52e6.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json create mode 100644 data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/7497b8fb-9a7d-46dc-868e-1a2bbcdc7860.json delete mode 100644 data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json create mode 100644 data/hfopenllm_v2/DreadPoor/AnotherTest/92c8afbe-7735-40c8-af0e-29da687c2070.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/bca052ac-6556-49d8-94e3-f4bda560a5d3.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/5f74fe6e-8575-4cea-959b-e6ba03c7e273.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/b0f696f5-ed70-4293-999d-a9121192c137.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/18751a6f-062c-4915-bbe0-ae222cf9ae0b.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/398ebe04-638f-4a11-b99d-6778ff3ff97b.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/b4f197f2-3456-4221-b222-10dfbbb50f56.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/0a2fa86a-f9b3-4a49-b215-4cd3ee9b4c22.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/1561ec50-1cb9-47ce-9db1-09efe9c3fc61.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/496525ff-394a-4b7b-9d93-f5b38d2a1ee3.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json create mode 100644 data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/37071760-d24c-43cc-9965-d8c7873c0ee8.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/91a71a49-5dd4-43b1-9e1c-fd9492236712.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/d1d48abb-6dcf-4905-958f-c3a3e75feac6.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json create mode 100644 data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/68282f29-f56f-420b-bd1e-9cc54783c1a5.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json create mode 100644 data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/cd1c84dc-6c6e-4789-add7-0e3ca783b0ea.json create mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/22a9d3b8-ac45-4433-8926-5d28681af922.json delete mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json delete mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json create mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/57c4b9eb-dffd-4623-a2d5-b2374d3c9109.json create mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/24adbd8c-df3a-4b58-94e6-61a3dfa6828e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json delete mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json create mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/6ed62f64-c2be-4bca-b17d-bd0184a3d498.json delete mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json create mode 100644 data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/db9e4d03-03a8-4a10-8739-16bbcfbb06d4.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json create mode 100644 data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/7b0fc4fe-51c8-4f01-b07b-5bca05b40859.json delete mode 100644 data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json create mode 100644 data/hfopenllm_v2/DreadPoor/BulkUp/6f286418-d8e3-4c11-8941-cfe5a18b1037.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json create mode 100644 data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/b0a83b1f-3af2-45e8-9d88-d7302a529112.json create mode 100644 data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/0462fce1-51b4-48d8-8278-a90048ffd637.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json create mode 100644 data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/e02f597c-c368-4223-ac90-c99d82c90634.json create mode 100644 data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/32e63ffc-c64e-4562-ba99-14873f5bac2e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json create mode 100644 data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/6af4faad-05c2-488b-9685-e11ae4e1cbf0.json delete mode 100644 data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json create mode 100644 data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/8aa7701b-7019-44a0-851f-cfc9108fdfbd.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json create mode 100644 data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/a2f95fad-5ab5-47d0-b9aa-33358c673caf.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json create mode 100644 data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/aef73a77-9df7-4d4f-89ef-50905d326198.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json create mode 100644 data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/e9ffdfb6-6f91-4bac-89d2-40b1eb43f3ee.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json create mode 100644 data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/8ff39438-907c-465f-ac7a-5a25cfd8d824.json create mode 100644 data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/83d831c5-a74f-4699-9961-664a7a51b7b8.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json create mode 100644 data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/83fb88ec-f640-4c1e-b71c-53a123fc4c2e.json create mode 100644 data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/3811cc34-45cb-4932-b862-39bf042331e0.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json create mode 100644 data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/5b2a16a1-7a2a-40b7-add6-b99378b6af00.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json create mode 100644 data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/1dc2a5bb-40b6-401e-8f1c-6110cb4c0f0d.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json create mode 100644 data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/742e0a1c-7496-4076-bdbf-ada0a8e528c2.json delete mode 100644 data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json create mode 100644 data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/f0664035-3256-444c-b848-ef603e0d46b5.json delete mode 100644 data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json create mode 100644 data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/9159aaa6-8663-491f-901a-74da4c343d20.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json create mode 100644 data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/5179b145-9fdb-4ab5-8cca-87966ecf6519.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json create mode 100644 data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/da872193-1d25-4e8e-bc22-9138a9d121ba.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json create mode 100644 data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/967fdd26-1f8a-40d6-8f7d-ca731c7ef2e3.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json create mode 100644 data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/dd615b4c-189e-4361-bcf4-879fd59b28a2.json create mode 100644 data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/0aeee3e8-00ce-4f95-bbd9-307d93a194a4.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json create mode 100644 data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/8c583b51-4349-48af-98d9-8eaaf43d60b6.json create mode 100644 data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/34aab556-5e97-4ea2-9ada-d17dc3624be2.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json create mode 100644 data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/fbd9d5e3-15f7-45ce-92fb-368b3bfcc526.json delete mode 100644 data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json create mode 100644 data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/b177e329-ce6b-4bc6-aeac-1c01306e6b1f.json create mode 100644 data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/7f371c11-e8f0-4233-b359-aac39c0a1110.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json create mode 100644 data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/9f758d4e-d121-4688-8ece-8dc67a499811.json create mode 100644 data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/903b8c71-d54d-4ce4-9845-71eb8ca8733a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json create mode 100644 data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/9bdc17bf-7b81-49c8-81f5-c6dfa31b449b.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json create mode 100644 data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/28109e00-87c1-4809-a4fc-dddebba52621.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json create mode 100644 data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/6a21381b-426d-4a5d-ad6d-2aeb57ed14c5.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json create mode 100644 data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/03a8091c-473e-4fbe-af70-35f791a23a0f.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json create mode 100644 data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/ed75e9ed-841b-4783-a201-bc72651afd0a.json create mode 100644 data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/38cd418c-9770-49d2-8b30-ac47e445cee3.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json create mode 100644 data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/d49b6a48-ae81-467d-87c5-b17f9ca306f8.json create mode 100644 data/hfopenllm_v2/DreadPoor/Morphing-8B-Model_Stock/39b7e250-9f71-4833-941e-85692a48b6e6.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json create mode 100644 data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/c0d102a2-ff8c-45ac-a825-31472b98b871.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json create mode 100644 data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/7c5674a8-6a1c-483e-be9c-b0a6d00d3ac4.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json create mode 100644 data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/d34b899e-b067-4c9c-9fa2-439f8b2d589d.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json create mode 100644 data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/8c7b2332-510b-42d3-bcbb-e177c35d27d5.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json create mode 100644 data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/685f107f-e431-4dba-a117-8d6f1dd2c296.json delete mode 100644 data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json create mode 100644 data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/e1570804-85b6-4518-a099-5f21ab27d12c.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json create mode 100644 data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/a779ebec-76ab-4a1e-aa4f-d1a6adfe2d5c.json create mode 100644 data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/1ed7f6ed-d04d-4cfc-a36a-1ef0f72d4814.json delete mode 100644 data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json create mode 100644 data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/c901a9ee-069a-4e3e-ac52-3017d67d8800.json create mode 100644 data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/08317b59-ff74-43c8-bea5-2a266c38816e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json create mode 100644 data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/4106d4d3-344a-4c1f-b9ce-a3140d435013.json delete mode 100644 data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json create mode 100644 data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/2b308fad-8494-4056-8b84-82733cd2710a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json create mode 100644 data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/93c867d0-4f10-440c-838c-91d1633fe584.json create mode 100644 data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/1a4a69c5-4acc-4ad9-adb2-bd9cf0fa2875.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json create mode 100644 data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/151226ba-9744-45bc-b923-30df57f7aa3e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json create mode 100644 data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/98363657-0793-4eb3-94de-28961afc92ea.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json create mode 100644 data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/a32b4ded-6bff-441e-afbd-736e6d8cce5c.json create mode 100644 data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/326bcf4a-02e9-4218-8bf2-55a94a79435e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json create mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/145facc2-ab11-4c68-b841-762e0ad9bd5a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json create mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/d3e6aae6-9284-4309-8d8c-02c9e797a58b.json create mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/6ee8537c-90e8-4455-83ca-c8c375a5ead7.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json create mode 100644 data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/6efbfb38-57e5-46c7-b765-f7d0356afb97.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json create mode 100644 data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/f4d418d9-1089-452d-9c7f-4cc4712e6ac7.json create mode 100644 data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/1c9b325b-92b3-499a-a3ea-026269c63c88.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json delete mode 100644 data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json create mode 100644 data/hfopenllm_v2/DreadPoor/TEST02-Ignore/c546ccde-cef3-4de2-a49f-24517d76dde5.json delete mode 100644 data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json create mode 100644 data/hfopenllm_v2/DreadPoor/TEST03-ignore/e85d3ccf-f48d-4e5c-b893-771a107773d4.json delete mode 100644 data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json create mode 100644 data/hfopenllm_v2/DreadPoor/TEST06-ignore/b8d22ade-874e-4ff3-9fcd-dbe14220d48b.json delete mode 100644 data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json create mode 100644 data/hfopenllm_v2/DreadPoor/TEST07-ignore/97e8e7e2-74a4-42a5-a0b1-250e47d3c3e6.json delete mode 100644 data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json create mode 100644 data/hfopenllm_v2/DreadPoor/TEST08-ignore/b2d56bb6-a726-4e47-8bc6-c016a51aac5c.json create mode 100644 data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/3366f6d8-41bc-4c2c-a72c-bc0fd7dc8dd2.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json delete mode 100644 data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json create mode 100644 data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/7ba52efb-3890-4691-8740-9f051f1f645e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json create mode 100644 data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/7b192b49-057e-418a-b47d-44b0ec82a6b6.json delete mode 100644 data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json create mode 100644 data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/f2120d53-bef6-44d6-84a6-a6f8e3537188.json delete mode 100644 data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json create mode 100644 data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/f5408aa9-85c8-46e5-b225-0480b2e18e97.json create mode 100644 data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/c1918f55-286c-4b29-ac53-2ee8f9d36d9e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json create mode 100644 data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/52659d37-67f8-45b8-88e4-11917dc90488.json delete mode 100644 data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json create mode 100644 data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/556ae77c-effe-44ab-ac4a-1ad7cbd7c363.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json create mode 100644 data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/048fc971-3baf-4740-a132-2f9476d01b7a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json create mode 100644 data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/abd28d25-01e0-474d-be35-08d816d281f5.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json create mode 100644 data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/17f49724-6553-4baa-b354-45ffd0f2c844.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json create mode 100644 data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/3e60d982-d7d5-432b-962e-b7734cc90534.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json create mode 100644 data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/79a0fdf3-b432-4598-be62-f9eb57fa5a43.json delete mode 100644 data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json create mode 100644 data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/662566e0-2af3-40d6-90de-9b361bcae355.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json create mode 100644 data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/d81c0035-a0b1-426c-9080-8ccbf745642b.json create mode 100644 data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/100bc243-158c-4e5c-918b-1439bf26fee8.json delete mode 100644 data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json create mode 100644 data/hfopenllm_v2/DreadPoor/felix_dies-mistral-7B-model_stock/45e32080-1464-40e0-a232-310fdda967eb.json delete mode 100644 data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json create mode 100644 data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/e89b279f-d548-4aa8-b5e5-0bffdd98b840.json create mode 100644 data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/777a53f9-891c-4f9e-99a8-bb1988f61f19.json delete mode 100644 data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json delete mode 100644 data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json create mode 100644 data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/f15846b1-8eaa-411b-88f7-25064161af4e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json create mode 100644 data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/e803fc85-fb98-4db8-aab0-a63100dcd5fc.json create mode 100644 data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/50620749-5ecf-41eb-a131-611675560e07.json delete mode 100644 data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json create mode 100644 data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/2d40a551-6440-4d71-87e4-639d486c1c5e.json delete mode 100644 data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json create mode 100644 data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/22235942-2e3e-4ef4-b7a0-5800f507571a.json delete mode 100644 data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json delete mode 100644 data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json create mode 100644 data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/ac06867d-3a34-42f6-9e2e-226cf86748f6.json create mode 100644 data/hfopenllm_v2/DreadPoor/test/394f1fc8-dc2c-4ff9-9ad0-7b3a8a8ddeb3.json delete mode 100644 data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json create mode 100644 data/hfopenllm_v2/DreadPoor/test_ALT/03e52d4f-78d7-453c-9685-844dd1636904.json delete mode 100644 data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json create mode 100644 data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/3ce136d5-be81-4b8c-a7dc-4e1346935d35.json delete mode 100644 data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json delete mode 100644 data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json create mode 100644 data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/fb35accf-0c5d-4f72-8d73-ba366a41a76d.json create mode 100644 data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/75e5ca5d-cce1-4463-b398-553399ce6833.json create mode 100644 data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/c426bae7-b98d-4343-b419-ac8206196a95.json delete mode 100644 data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json create mode 100644 data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/b17de9f2-6f94-49f6-b908-fa983e8f8f9b.json create mode 100644 data/hfopenllm_v2/EleutherAI/gpt-j-6b/58ba7ca1-8cca-4668-836b-824491d9cf01.json create mode 100644 data/hfopenllm_v2/EleutherAI/gpt-neo-1.3B/23da100a-13b9-42a7-ba79-234be551d0e4.json create mode 100644 data/hfopenllm_v2/EleutherAI/gpt-neo-125m/2d0c12b9-cff8-4366-a3ce-7772e4c098c9.json create mode 100644 data/hfopenllm_v2/EleutherAI/gpt-neo-2.7B/4b87eea2-169c-411e-9d15-caf6b7826590.json create mode 100644 data/hfopenllm_v2/EleutherAI/gpt-neox-20b/62a3cce2-4ff5-4dc9-beab-a06001fd82d9.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-1.4b/0e5961e1-af27-4eee-8b9b-c82ee4ab61b1.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-12b/b62352d4-e3b0-4b4d-8d68-e2d973d820c1.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-160m/7fadc486-767e-45ef-979d-74ecb858cb99.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-1b/d0628e6f-a6f3-42eb-b9fc-e880ae8c0688.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-2.8b/0999a066-1151-4445-b130-00d8fe4a516e.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-410m/1efc09d8-6a5c-4d48-b76e-2e04ef97b676.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json create mode 100644 data/hfopenllm_v2/EleutherAI/pythia-6.9b/1a59412f-fe78-4ecf-8951-8f2996dd374f.json delete mode 100644 data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json create mode 100644 data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/b5403311-2069-488d-af98-27da14496c15.json create mode 100644 data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/6c10c176-b2b6-4216-91c0-1444944612f7.json create mode 100644 data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B/80ebd92e-d9b6-46ce-b77e-973c3f3f6051.json create mode 100644 data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/0418e36f-17ea-46a2-bfeb-91cc0ff719bf.json create mode 100644 data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/4f5ba3fc-694a-45b1-ae9d-2c7d33e41519.json delete mode 100644 data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json create mode 100644 data/hfopenllm_v2/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/8b0d1556-bbd5-49e3-b881-32224bc1aa9a.json create mode 100644 data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/524e634f-280c-4f3a-9f1f-bdda19fad740.json delete mode 100644 data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json delete mode 100644 data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json create mode 100644 data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/cb82e92b-f207-4fbd-9bfe-43184769cdbd.json create mode 100644 data/hfopenllm_v2/Epiculous/NovaSpark/0b674103-4e55-41f4-accb-b7be73671801.json delete mode 100644 data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json delete mode 100644 data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json create mode 100644 data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/fa0290e0-723f-4502-90b6-c77007fffc1f.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Alpaca-Llama3.1-8B/c3827ecd-d02a-4464-a098-110f4fb54516.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it-Philos/af9700fe-20c0-4b7c-9f3a-c4d78fab7911.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it/959a4e4d-211c-4e45-94f1-f8f877e0b36f.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/96a8b3c0-d6bc-41fe-8967-0d798669aa8e.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json create mode 100644 data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/ed5d2ca8-d551-493d-8877-348204ef91cc.json create mode 100644 data/hfopenllm_v2/EpistemeAI/DeepThinkers-Phi4/04e20a14-8346-4801-8515-189861c857cb.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json create mode 100644 data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/eec2da56-ba0a-418f-afe1-8a46882b9839.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-12B-v1.13a-philosophers/321cf68b-9220-4ada-89da-061341a20a9d.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-12B/86fda025-2345-4a40-9094-223b96b21f13.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/3c734233-9868-4ba6-83c0-2b63f2ce8980.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/7f5eca48-0ab9-4ef2-85c2-a7f1fe713afe.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/f5e0e809-08b8-43dd-a44d-875f365610c3.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/8d267135-a7e6-4ec5-ae09-66478804bb66.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/4940ed0e-2c1e-4408-9806-49ceed30a69e.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/5f6f7b7c-ef6a-4468-aae5-d7dfc25c5659.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/5244ee3c-7d65-434a-acfe-cdb277ff5264.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/eba4644f-d455-4a23-a16f-8ecb038ffe7f.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/fb270319-7010-4946-b60c-409aebe41aaa.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/d57bd77a-11cc-497c-b0bb-31c1ffa63dc2.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/0220984e-fe8c-4e72-bc3e-92b949ffe769.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/16482634-ec03-463a-9deb-2230ee955800.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/4c1db32d-96fc-4a66-b083-530a3e75ad6d.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/c0c5c846-395a-47ac-9e8e-e598939f317d.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/6b3f6b59-a8eb-48c2-acbc-92e8f34b2dd6.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B/d017e3bf-2abe-4b84-810e-e0eaf973adc3.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/62a3ecb8-f6d1-429c-807f-5545b2a5897f.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Llama-3.2-3B-Agent007-Coder/748557ce-1a49-4b3a-9c38-9007dc04aafb.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/95d43d01-a75e-4af4-a2cc-b60f832071d3.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json create mode 100644 data/hfopenllm_v2/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/4dc7c889-7839-4047-b48c-33be5b688e72.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/751851c8-9a7f-4135-a106-eab4efbd0734.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/2930e30c-9f2e-4248-ae3b-ed7ffbd12f8c.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/c1acc460-aeb8-4a99-8ca5-376ab60fb74a.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/33b8b64f-7da5-45aa-bf80-7145ef704229.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/2662d257-49e2-430d-b44f-b0b347c61271.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/870b639b-ee7a-4b13-872b-52657539c836.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/6ff20678-a335-4fa8-8126-9f96ce247f34.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/19c4ea89-896a-4577-a386-c2470eaf743f.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json create mode 100644 data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/22eb2479-16ff-4a56-b9e4-e8835da7ca0e.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/aca3f1fd-9c46-47f6-81c6-dc56a702c1de.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/071ca686-5950-4af4-80f2-969b1008e370.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/78977c34-33f8-4037-86e0-dfce1d01c3f8.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/480e4294-c8d9-4088-9b8c-7a239d57f683.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/be9b21e8-90ce-451a-bcaf-2ebc7c72bc34.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/b0054dd8-e62c-4d0c-9b18-090851c3a7e2.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/985e479b-658a-4548-9b5e-c9c04b8838c1.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/d0ef8af4-156d-456d-9e33-b2cdb3f8c04e.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/5050c787-2f95-4a17-a4b0-c094860627b5.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json create mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/bb5c8274-4324-47f2-94c5-d0c831ce0de7.json delete mode 100644 data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/8113a26a-5941-4f3d-872a-bdde5456ad97.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/5b60047b-2e85-4a47-a31f-4c07f4bd2c30.json delete mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/88d79858-3a35-43eb-8da6-95b80b5deef6.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/63266a49-01ea-40f1-83ef-778f391aff2b.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/f0da069a-833f-489a-a923-c79542a3a9a6.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/205b9da8-d561-41ec-946e-1d2f9a43e437.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/2ea4da56-4b95-4222-a4e2-f57c73e0ee4e.json delete mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/c086f693-cef1-4212-9c17-669b210f4caa.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/290995f2-9982-4f29-ac74-dc646905206c.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/c60e65e6-d771-4c53-80d0-c1e09aa39377.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/fcff202d-3b4f-4ba9-b3f6-1122d8abcac1.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/5f0fa37a-e829-402b-b2ab-c68ffa248b6e.json delete mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/a0b4a345-3530-4da2-8403-87259bbd1405.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/3548f0ea-f3ab-4a0e-9c77-5ae62014ed44.json delete mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json create mode 100644 data/hfopenllm_v2/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/707270e3-334b-4eba-84c0-2795ae53d79a.json create mode 100644 data/hfopenllm_v2/Eric111/CatunaMayo-DPO/c827bee3-a181-42bc-9387-ca132d59c8ba.json delete mode 100644 data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json delete mode 100644 data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json create mode 100644 data/hfopenllm_v2/Eric111/CatunaMayo/d3e8949b-f6f8-459f-891b-f4900ff806cd.json create mode 100644 data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/35d5f5e3-74eb-4eea-9f78-b7b8969830a2.json delete mode 100644 data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json create mode 100644 data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/4cf4479a-622a-4bc2-86f2-aa526216f24c.json delete mode 100644 data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json create mode 100644 data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/6ed27890-3e61-4c7d-8c94-a78c0b34ba32.json create mode 100644 data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/87b5e360-7867-4edd-b45e-e7bb92a91b69.json create mode 100644 data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/d93116b8-28ff-41ea-8273-56f7ae11cf18.json create mode 100644 data/hfopenllm_v2/Etherll/Qwen2.5-7B-della-test/ba8c2c17-64f6-4cdb-b3b9-8977ce1bdbe2.json create mode 100644 data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/5e5602cc-b4de-4247-aa6d-940817fc849b.json delete mode 100644 data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json create mode 100644 data/hfopenllm_v2/Etherll/Replete-LLM-V3-Llama-3.1-8b/cc5f27f5-36d8-49bb-9c9d-7879598bfe71.json delete mode 100644 data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json create mode 100644 data/hfopenllm_v2/Etherll/SuperHermes/aec03bd9-808a-4c3f-bbde-40bcac5775fb.json create mode 100644 data/hfopenllm_v2/Eurdem/Defne-llama3.1-8B/b4ae6f0b-8a6b-4c60-8eb2-3e202877bcf5.json delete mode 100644 data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json create mode 100644 data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/c68deb4d-73a8-40ab-b4e5-1773b7ec4ed8.json create mode 100644 data/hfopenllm_v2/FINGU-AI/L3-8B/a93c5674-599b-429c-a322-3c6bc7248f45.json delete mode 100644 data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json create mode 100644 data/hfopenllm_v2/FINGU-AI/Phi-4-RRStock/5e6374a6-56bd-4bd9-b04b-30ec9cf234bc.json delete mode 100644 data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json create mode 100644 data/hfopenllm_v2/FINGU-AI/Q-Small-3B/c3d2fc86-a5c4-4e92-bcf9-26096ca32ad4.json create mode 100644 data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/1b49cb06-3ee1-4945-aaed-12c868d9e45e.json delete mode 100644 data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json create mode 100644 data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65853bb5-ff3e-4880-8c32-ce9aabcadd7b.json delete mode 100644 data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json create mode 100644 data/hfopenllm_v2/FINGU-AI/Ultimos-32B/7fecc176-debf-4bf7-b3f3-479d05678a1e.json delete mode 100644 data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json create mode 100644 data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/3c965626-a264-40db-93e1-cd7659d0662e.json delete mode 100644 data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json delete mode 100644 data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json create mode 100644 data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/50fa6f0c-d689-4380-b619-253209b5badc.json delete mode 100644 data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json create mode 100644 data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/adb25c88-6113-4307-bbf0-d377f757bc18.json create mode 100644 data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/b9ac5e03-c878-4e46-a89c-1906f3b91dce.json delete mode 100644 data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json create mode 100644 data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/d6a6badf-4472-44b5-af9e-4282e4406a8e.json delete mode 100644 data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json delete mode 100644 data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json create mode 100644 data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/92e62d3a-3091-4538-b6da-ba705e11687a.json create mode 100644 data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/04f5fdc6-f1cd-4b2d-947a-86fee67b3b62.json delete mode 100644 data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json create mode 100644 data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5013ccfc-6bc5-4862-898c-1ca781f92572.json delete mode 100644 data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/38fff98c-72b1-453c-a2cf-cf077dd19d10.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/42911928-ef64-474b-828a-02ce3383773e.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/7989d7d3-c5e9-43c6-80a1-6de51533f9bf.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/5b9acd52-7eb6-4099-98be-ecd6cae07835.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/666bef5a-2d62-4743-bff1-07365716ab19.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/85de411c-2308-4824-bd6e-3327eeb6fe3e.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/df28c4c2-d6a4-4ab0-a1ac-faf00a93de99.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/6fb37ad0-b41b-4ad7-91a2-79bbb835d445.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/c41df02e-5aff-4de6-a1c4-d45b5585e29d.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/aa587b4a-9c19-4231-ba72-9b66446460f9.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/be14e75e-4fb1-41aa-b168-1ec23eb305e0.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/73be4a2b-28c9-4208-8107-3734fea25008.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/0bf2fa4e-3bcb-46ff-a068-f4c796123c6d.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/9f8fc05a-8658-4ed3-994a-965e6882d242.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/ced11f6e-490d-42e9-8f3e-00e22cfc2910.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/70ba788b-fe8c-4667-a859-0fb122de22b9.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/e93f2d5f-7ffc-44b8-b2dc-d07b73de44ab.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/15cacfe0-bdfb-4b87-a813-bfa70ff71984.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/cff00e2a-41e3-40d2-aab3-4bb3bd7d0d0e.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/e1eab0cf-2c6d-44b2-8aaf-a75347741529.json delete mode 100644 data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json create mode 100644 data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/ed221db8-cf81-4257-8785-db9381eec5b7.json delete mode 100644 data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json create mode 100644 data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/b314468b-401a-4318-b022-c966bf3366aa.json create mode 100644 data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/a0dbb2eb-66c7-48a3-a85c-725b49141edf.json delete mode 100644 data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json delete mode 100644 data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json create mode 100644 data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/812a36ec-4928-40a9-9aa8-ee39d7bb02f5.json create mode 100644 data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_e2e/77af2424-0a23-49f3-97b0-316d04a33547.json create mode 100644 data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_viggo/6f422676-2d7e-40ed-a5e3-4afc25564cfc.json delete mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json create mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/43923dd6-838a-4259-a938-7766dfd9c07e.json create mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/dba94a49-02b0-4e92-bd6c-c6bfc9be3cfb.json delete mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json create mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/16a782dc-0795-4281-aad6-4f664a0940ab.json delete mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json delete mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json create mode 100644 data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/5d24d4ad-9f37-4634-ba23-74fbc74fd298.json create mode 100644 data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/043cd315-fcb7-4871-ae79-dee3fdefaef0.json delete mode 100644 data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json create mode 100644 data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/3c377d7e-14bc-4c82-9ada-7560552abbe4.json delete mode 100644 data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json create mode 100644 data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/43bb650b-8bb7-41b4-866a-cb2dad1499d6.json create mode 100644 data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-Merged/bdf8f907-37ca-41ca-9a4e-f4dd446f895f.json create mode 100644 data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaUltra-Merged/14a1872c-7afd-4cd4-ad87-853e4fc0847e.json create mode 100644 data/hfopenllm_v2/GenVRadmin/llama38bGenZ_Vikas-Merged/887e4ca9-ed48-4b33-b933-f8534a8d0377.json delete mode 100644 data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json create mode 100644 data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/c585488d-4043-482f-b1fa-4a61e96f7f0f.json delete mode 100644 data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json create mode 100644 data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/d64541f6-19ef-4f04-a991-93efec6fe24f.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1c13e194-8bee-4456-a249-f71e7e34b0eb.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1d3db737-20e7-4da1-a311-e60de0b41c93.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/7b73d50e-358b-4961-8b58-63765ce5a82a.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/81dfd69c-cf01-4114-8157-fd09af6f490c.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/f38240ab-35e4-431e-b4d5-b1b0e1d57c5f.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/01863b4f-9550-49c3-ad83-74c0bb535eb9.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/edd25437-38bc-443c-9da3-bc041270447e.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/31836d43-5022-488f-ba9e-379195809069.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/2a5a3ed6-7137-49e2-a141-497ceba88757.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/0b1c6aa6-b94e-4400-9b0d-c39aa1bcd808.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/69423132-adc9-4b97-b799-15f37de1d7e5.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json create mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/54d5bf0f-7c4c-40b1-bca6-5484ef8e2a04.json delete mode 100644 data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json create mode 100644 data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/cfe8f9c7-e9bf-4a17-afa0-d5b8f46d24e7.json delete mode 100644 data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json delete mode 100644 data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json create mode 100644 data/hfopenllm_v2/GritLM/GritLM-7B-KTO/7fbc0323-1c78-46b6-a08a-6e5870c64e53.json create mode 100644 data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/1c769f0d-b99d-4b82-a529-f5264f7b3349.json delete mode 100644 data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json create mode 100644 data/hfopenllm_v2/Groq/Llama-3-Groq-8B-Tool-Use/a9365685-e299-48e2-931a-c63e123a9e00.json create mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.0-8b-Llama-3/bdf2d61a-daa1-4b1f-9245-43ff263540fb.json create mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f0b4eef9-dab2-48e2-87f8-ad83ec33ec23.json delete mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json create mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/29e10491-8c34-4b7a-a0bd-77f6ca0dc54c.json delete mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json delete mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json create mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/c588d86a-80c4-46d1-93e0-b7fa8491f3b3.json create mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/0b11eb9a-61c8-4af1-8335-24bef2597e5d.json delete mode 100644 data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json delete mode 100644 data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json create mode 100644 data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/7d31e5fd-700a-42a8-bea8-8989e8c52603.json delete mode 100644 data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json create mode 100644 data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/f993880a-3c7c-4af9-a3ce-3c27207b9a3c.json create mode 100644 data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/2fae7e4a-8c28-4be8-9391-ca79077e32c2.json create mode 100644 data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/436e651e-6f04-44ff-ab3d-db8ed0d639bd.json create mode 100644 data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge/9fbccac2-c840-494e-a24d-a6f0c9a07b88.json create mode 100644 data/hfopenllm_v2/HPAI-BSC/Llama3-Aloe-8B-Alpha/a4ee6a33-df51-4a4e-a13d-45488a094fd7.json create mode 100644 data/hfopenllm_v2/HPAI-BSC/Llama3.1-Aloe-Beta-8B/a3923f10-e64c-4556-9616-4fe7072eff60.json create mode 100644 data/hfopenllm_v2/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/ca15d972-9075-42df-884b-5d069f6ff425.json delete mode 100644 data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json create mode 100644 data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/905909a5-abef-46bf-9392-c97873e229df.json delete mode 100644 data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json create mode 100644 data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/95bd05cf-8f59-409d-a99e-d249bad6c561.json create mode 100644 data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/76b12246-33f6-4992-a0ab-38704dcf6345.json delete mode 100644 data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json create mode 100644 data/hfopenllm_v2/Hastagaras/Llama-3.1-Jamet-8B-MK.I/e4415806-0ec0-465a-b28f-9c8741436fb4.json create mode 100644 data/hfopenllm_v2/Hastagaras/Zabuza-8B-Llama-3.1/98e62ab5-d35a-42dd-904b-bed9c50f3745.json delete mode 100644 data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json create mode 100644 data/hfopenllm_v2/HelpingAI/Cipher-20B/8fb3596e-224e-492b-bdb6-a95a16656eb0.json create mode 100644 data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/154203c4-d86e-4c36-806b-c45c5cc568ce.json delete mode 100644 data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json delete mode 100644 data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json create mode 100644 data/hfopenllm_v2/HelpingAI/Priya-10B/e42c01f7-2869-4103-bbfd-81aa5a15c140.json create mode 100644 data/hfopenllm_v2/HelpingAI/Priya-3B/323d2f94-5e04-4627-9f74-129217f53eea.json delete mode 100644 data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json create mode 100644 data/hfopenllm_v2/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/6bcc284b-8973-47d5-b5b1-1abb7a3242ee.json create mode 100644 data/hfopenllm_v2/HeraiHench/Double-Down-Qwen-Math-7B/691cace3-5316-4f5b-8693-67efb24a0a06.json create mode 100644 data/hfopenllm_v2/HeraiHench/Marge-Qwen-Math-7B/d387b3dc-9e76-44a6-9a9f-132a4fd762b4.json create mode 100644 data/hfopenllm_v2/HeraiHench/Phi-4-slerp-ReasoningRP-14B/f6f515d3-f5e9-4362-be51-bb8fc05527e6.json create mode 100644 data/hfopenllm_v2/HiroseKoichi/Llama-Salad-4x8B-V3/2e1e215f-b622-439f-a13f-531441e25ae3.json create mode 100644 data/hfopenllm_v2/HoangHa/Pensez-Llama3.1-8B/d50d66a9-a0c4-4b82-922c-9d012f1b50a1.json delete mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json create mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/ea7292a8-3f07-47be-b8ae-7d352ed1ecb6.json delete mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json create mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/4eedd6d4-279f-4660-8d71-708a27bb53e0.json create mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-gemma-v0.1/9c0f67d1-f95d-4ca0-a234-2e09ac788f55.json delete mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json create mode 100644 data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/e5c0fbc9-f424-4b04-839a-8335adaf89cc.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/d91107fa-eb8d-4d01-90a2-fc9831f337b2.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/926999bf-1ba6-4321-82b2-fcced4336739.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/57d481bf-0db9-4208-afda-dcd20df13964.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/eb417e47-fe63-4dc5-b3e5-28782f3782da.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/b0f516dd-7185-4906-87a5-3c6f019894d0.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/1e562944-a205-4ef7-aff1-3776595d131c.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/6ccaf08d-1b0a-4ca9-941e-a71e2dce5cb4.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/2064938d-9f05-4740-a4d4-2a2da0eac21d.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/43240184-8245-43ff-a971-678523918fe0.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/b3b854b6-700c-4297-b335-6acc3c385f84.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/a9d79c6a-f99a-4b60-8e37-ee2cdfe75f30.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/88e1dd78-d3bc-401b-88e9-d963bac181db.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/a41bd607-f319-4063-a6e4-813f43e40568.json delete mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json create mode 100644 data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/8629aef1-c673-4b17-a9cc-b361a53bdaa7.json create mode 100644 data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/532c927a-dc0c-4e65-8ab0-7b9ddd889d89.json delete mode 100644 data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json create mode 100644 data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/843f9927-9865-4066-9cc0-f0522d3b914f.json delete mode 100644 data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json delete mode 100644 data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json create mode 100644 data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/eeecb2cb-e286-443f-84aa-d825702a4ad8.json create mode 100644 data/hfopenllm_v2/IDEA-CCNL/Ziya-LLaMA-13B-v1/36ab4f5a-b2cf-4d01-8283-9eaf2c90928f.json create mode 100644 data/hfopenllm_v2/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/c4e810f1-ffb3-4ece-b445-64e339761530.json create mode 100644 data/hfopenllm_v2/IlyaGusev/gemma-2-2b-it-abliterated/025725b6-0034-48c0-a720-5fc210e5e24b.json create mode 100644 data/hfopenllm_v2/IlyaGusev/gemma-2-9b-it-abliterated/7bdd8928-c336-494e-9c87-de9ecc2749b8.json create mode 100644 data/hfopenllm_v2/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/ff7369dc-3ff2-424b-80b0-e06a141b54f3.json delete mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json create mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/a6dc7253-75fd-4897-be85-8ac89fc11f8e.json create mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/296ceacc-542a-4000-bf9b-ae59b33a53ce.json delete mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json delete mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json create mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/13870577-7579-48b4-9c92-202318ca6ecc.json delete mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json create mode 100644 data/hfopenllm_v2/Intel/neural-chat-7b-v3/6ebd2806-2623-4773-93bd-1036ff01cb8c.json create mode 100644 data/hfopenllm_v2/IntervitensInc/internlm2_5-20b-llamafied/99d6a44b-d556-4674-8ade-a5b30cf99255.json create mode 100644 data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/605118a3-316a-46b5-9719-f596e361a2a8.json delete mode 100644 data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json create mode 100644 data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/271d2829-fbd4-438e-9f09-59539af68c8b.json delete mode 100644 data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json create mode 100644 data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/107bc549-75c1-4272-b567-f8ab9f6cd675.json create mode 100644 data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/dfb451e9-c1c1-45a1-8082-155763366129.json delete mode 100644 data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json delete mode 100644 data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json delete mode 100644 data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json create mode 100644 data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/b2d80977-d079-42ec-b057-5aac530b9d70.json create mode 100644 data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/16b33b80-3b4b-4edb-b89f-3d93dca8969c.json delete mode 100644 data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json delete mode 100644 data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json create mode 100644 data/hfopenllm_v2/J-LAB/Thynk_orpo/63c94e0a-4572-4b8a-bfe0-7f88bb847d7f.json create mode 100644 data/hfopenllm_v2/JackFram/llama-160m/538f2b43-328c-456d-8a40-ff2b37924453.json create mode 100644 data/hfopenllm_v2/JackFram/llama-68m/fb7a68e6-716e-48c6-96c0-d227735f9a7c.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json create mode 100644 data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/3593d4b8-5602-4cca-935f-a76e342f060a.json create mode 100644 data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/72d503fc-b221-498e-811a-a806769175d6.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json create mode 100644 data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/ad7d9698-d9e6-4f2d-9767-987835626c8c.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json create mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/98899942-fcf0-41de-8587-44d7429bea47.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json create mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/bb51eb59-88f6-49c2-814a-11b2c80313d0.json create mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/d8563f36-e299-4186-a5dc-9dae51824e1f.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json delete mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json create mode 100644 data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/43bc0528-7bc5-4eac-8848-c9995079450f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/ce19893b-a7e1-4f8e-96f2-eb9cee2afeac.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/24629e14-d197-4a5b-adff-7840af652f22.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-1epoch/9c3ea35c-2cf7-4c31-8b83-c69df3cd9448.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-5epoch/46548403-6eb5-4f7a-874c-1327420f4cab.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-1epoch/0bd9c061-b7ee-4bc2-9deb-ea7eea012c49.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-5epoch/aa2fe858-111c-45e8-b0d4-0048d7fc7ef7.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/ad03cae6-b126-4157-a225-9576e4d651d0.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/0d57b65d-3dd4-4185-b8cf-531105e94b5e.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/f8882044-6e71-4788-b2ee-f51f85e67ecc.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/3c8f96c5-af91-4f41-a0b4-6e1b7d55d8ad.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/e26743b9-4caf-46f8-bd5a-7e4445c850b1.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/febd4016-3a30-4b26-93e5-f7b556781b9b.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/ae82125e-94ac-48ca-8240-807e4b7ef9a0.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/5321fa0b-b010-4e1d-9f20-a97b56f4f937.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/d25a4602-ea50-4a53-952c-112ba250123b.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/232e3fc4-5cd2-4515-9e15-acd7d56bc34d.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/975f54fe-a581-4ce1-b0c1-7becb7605f09.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/92ae4461-48bc-47fe-a3ad-ea4c3452d395.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/638e1cc0-9baf-4555-a278-4b21c46af86f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/cef4161a-4e1c-4a92-bca8-b07f957a13b1.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/715b556b-2bc0-4864-b4b1-b7413a5d45bc.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/7552ad5c-5d1f-478b-a931-036083b2954e.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/7bb3ae9f-9bb3-4bf2-9d97-d7f4f30697ac.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/821d67e5-da8d-4383-8825-3bfa72a91fc9.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/c5bddcba-4a40-4fbb-93e8-aebd06a70a66.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/dc35237c-606d-4609-927a-566bea767312.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/3924d1af-e167-4186-a34b-d9b4b8c26d59.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/f733c4cc-90fc-4b31-bed3-c57dba6d4b6a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/08f933a0-b096-4271-890e-0df7e20d1d20.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/8434e448-ed77-45f2-9c31-39128912f842.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/d801037b-1eb0-4058-9096-429e5237e015.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/e0c46f18-598e-402f-8955-68e71fab67cd.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/4b987cb5-cf7c-4866-8cf0-9926f78c2de9.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/ec658058-1075-4918-9dc9-fc79d0dcf897.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/b68baa86-3e1a-4888-98ba-2ecede79b4a7.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/0b11c8ab-2cfa-425d-9d81-d999f94401db.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/a3e48db8-3679-4f19-853d-82a73ef49400.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/7dbf35b2-80c1-4181-80f9-850ea51cead2.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/231f47db-1662-4313-9ff4-f32883f5615c.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/c79df898-14c6-4f00-9f65-0d01cd34ed61.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/2c52917f-c396-410d-bc78-c93c433797fc.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/0f1d2925-4e1c-495b-94be-f3515fbd53d7.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/5cbb1972-9895-4689-9f6f-7e0037829a78.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/6bc42e37-1f31-47cb-97e4-9d0b28b53691.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/a1573b95-59e6-4ae0-bc12-6ef6fee90b76.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/78c61b39-3c76-4af9-8d5e-fcd67d6c8779.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/e4c06400-da86-4448-b421-23476f50bdb3.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/48f4c2a7-e819-4789-92ea-e02c5e92d3e4.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/cd9cbbac-f1ca-4193-88cc-e5968cc1bb62.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/ab3685ab-1795-4a0e-8ee4-4f509616d1b8.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/9018f443-a63f-4e07-b10b-272f66d1eb0d.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/548d1536-b941-43a9-a60b-ae5448b70933.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/99853109-17d9-46fa-a502-e4c977c1fb8f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/e171a0a0-f46d-404f-84e8-539155284e17.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/eadd93e5-5770-4d4a-a1b2-6e732a82ce34.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/151cb8c4-0a7d-4886-80ea-560902e1f932.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1acb97c4-a9d2-4ec8-9486-77eb6857646c.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1d803ac5-3ca6-4cb0-bcd1-779eaea1562d.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/81562e50-23c5-4ef1-b98c-b40625f3b8c6.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/95fa292a-ee64-4844-9646-ce3cc7f730d2.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/4d14c584-b5a1-41cd-9605-78088dfebd7f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1415d3d9-d7f8-48ef-8a2f-aa675c4c14db.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/4b0ab369-e72f-4229-b449-3a21ee9d2c95.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/478b6c1f-3329-4c9b-9d90-59b8b551c1af.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/212f8dd2-3c61-45bd-a3de-2326334feb73.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9251282e-f72f-406e-a2cf-e7063516f624.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/91a3c739-7e16-4d21-8879-bb2fd4d4c6ad.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/aaa78d8f-6050-4b5d-bb67-da6c9d1ee065.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1f0430fe-24ff-4ef6-8577-ee5bfa74f18b.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/f374772b-2685-41e2-a455-9002e48e3739.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/6db801f8-5253-47c0-b87e-6779bff42f6b.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/0d704671-c0b6-4296-85b5-eaf972d6be6a.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/7e31545f-0865-4843-914b-a71f8a84314f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/431c7130-5a19-4a71-8a92-fea9726769ac.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/ca850c4a-14d0-4145-9977-0d33e6e3e362.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/7389caa3-6d8f-43e3-b3f2-d9320e56f621.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1e822b0f-0d80-4613-983b-ebd2e6fbfcd6.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1206f592-e6f7-4e7d-83cd-cbe82b37ec58.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/e4085c6a-bc16-4328-a724-4b9838b55faa.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/b929b955-1fbb-43d0-add1-4d58fdc4097c.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/df723a0f-9a32-42f3-9421-780159f7d821.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/c1046d2c-0b5b-4ab7-b173-8d5b5ecbc07d.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/60c02070-7554-4764-8a02-841ca75a0d5c.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/d243f226-149b-4824-837e-e80ab68bae9d.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/4f9361d0-2ad9-44da-a1d9-876d43451ae6.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/6c6e9ebc-f83d-48d5-b69f-be43d4167a0e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/7cd2c0da-15b8-4ad6-8cad-feb68631c079.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/36b84cf2-d221-4e9a-b728-37dc2bf7e1d6.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1fd0d1db-1d75-4b10-bae8-33023c2c7466.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/c6c02512-6c91-4818-a084-c48915fd83de.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/326affa2-9ea4-4fc9-b60f-d2abeb7493c3.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/b3a190d1-5b86-4439-a21e-1f118239db82.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/b37a7db5-b26f-4a82-b27c-6c3a2ba72fda.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/05a59445-b816-4982-9b1a-1c2394ffbaa9.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/ff952579-e92d-4af8-9497-f49fed5efba0.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/b541ede0-6de9-4557-8280-43567fd3dd96.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/8514f601-0bb2-4639-90cc-29e96088e7de.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/57e6d0cf-943a-4b83-a1f4-4f03b5066523.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/ec205127-21c0-4edf-bb3a-ec8ccac4fcdb.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/14b260e6-4300-43ec-b7af-587a2f5b03fb.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/53de1fc9-7097-4103-b731-588a7bf39f80.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/1a1031c5-3ec2-4d12-93eb-e0a3b0448ed4.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/51b62d59-f39c-49ca-af0a-73df6440e29d.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/622a0ae1-0eb5-49f0-bc44-d396c7233e27.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/71291a41-283e-42ca-b192-7b759e3c3712.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/7e504fef-b304-4c1a-856d-06e56a8869d7.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/f8258f5e-8826-4fe1-b9d3-61708e79d4ab.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/099ce031-1e11-4a07-bac1-03bef9b915d6.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/75ff25fd-e5f7-4380-b192-cbc8a8ee95aa.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/cbc43c7a-d8ac-4b03-a383-703f7fa51757.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/72d7f252-1bff-40ad-9ec8-1ac2a2e02a8e.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/5eb10878-11e6-43ad-9bb5-658a3495129c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/23b29cd4-cfd0-49f1-8959-c3aa8be9722f.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/03db2532-f8e0-41e9-ac0c-ff2913f4b12a.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/273f0d50-aa4e-4469-8360-2ce0a2e1a850.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/79a48e79-d59b-4f86-a8f4-3af174a9ee0b.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/9da9a0e6-257a-41f6-b3a3-e3279a4924db.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/dfed058c-48b2-4e1e-9a29-624771e3e9dd.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/bcb53a8a-1670-400c-aab6-bd8ed2ebcdf4.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/8438a108-0d5d-48b6-b73a-981d13329daa.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/88616292-1e38-4481-af30-6b60e28fb097.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/44094907-0b09-4706-a117-116a7e10a6e5.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/d19e8078-87e9-4760-9b91-6b5f478820e1.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/896464f1-01bc-4370-8d90-3368323b2908.json delete mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/9889f0b9-9051-485c-bd44-32b1e56b865c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/6563ce79-6df4-4c78-89e2-064f1250d898.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/b1778755-e6e6-47e2-925d-44d786c4ff62.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/3ae923b8-e9f4-472e-8d5e-54fa5f42ce01.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/40831e23-0a9e-4bdc-a365-9399b6b82ff9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/4a60fa82-34dc-4b0c-9102-65adac5039e4.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/75ff2c43-dd19-48ae-9ba3-f99cdbadda1c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/d7962833-660a-4b9b-9836-8a2f3251f38e.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/ad8ecabf-a868-496e-892b-582efb54fa6a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/49f25d3d-80c9-4723-8fa9-1501d44d70aa.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/70ea520c-3e0c-4412-9dbe-40a00801335c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/8e7f8bad-812b-4f6c-8dea-1cf44584c300.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/3b39a8f0-c5ba-4f74-9d27-bf5b389e038c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/702a14d5-a7fd-4926-ab26-e4c3b7f5eda7.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/20e5d087-7b20-4a39-81da-7334354b61f0.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/4c5a769c-0472-402c-8e97-d24e5b302bac.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/96166735-ed03-4931-81c9-d3daed1913d9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/06d9b1e3-d054-4fa5-bf1f-9d6149e5111c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/776fd8d8-9846-4359-97d4-2340425d1315.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/197ae1c5-c9b1-4912-91a3-8ccacddc1be6.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1fffd3d9-1c6b-4965-84e6-980bb0a13af3.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/57e8aaf0-f10b-4024-9f93-7b7f13f3ab10.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/304d5bee-df2d-40fc-b4a0-e3d99178f4bd.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/6126d30d-e2dd-4b8b-9cb3-acdc76084bbb.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/fc7284d9-a73f-4562-a781-5cb87247183f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/26ab447c-a850-4197-983a-a0dca4532029.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/ee9e2131-aa99-49e1-9814-f0664614354b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/23c472f7-f060-4a69-8f72-12490675825a.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/04172bef-c06b-4c08-b2af-9e1fe4d97664.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/3436355a-d2fe-411f-a764-4cb8284deb4c.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/265655c0-2ead-4dd7-8c7e-4bee69d51bce.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/645cae82-9e7b-4d1b-b944-e3783089c1c1.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/ab658117-7c6b-428f-8f60-bf88a1d8a5bc.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/03c4b5ce-3b22-4d9f-bf60-b626b52a114b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/ce7e3a31-c65b-4521-b685-fcbd067c75d9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/adb53e2c-5dee-4840-8eae-e0186c6e103f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/ba89563d-f53a-4bf0-91e1-92ac950523d8.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/3fc0ad8d-4bb2-401a-9baf-b94b39b7e1aa.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ed816bcb-bbe9-48ae-a6ac-3603779a985f.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/f347ed24-066a-4cba-8478-f03628cb2b5b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/ffddfea0-d17e-44e7-8931-a9601e9cb26b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/ec351fa1-78c2-48c6-83f0-7c2a9b2f0731.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/a0038c34-130b-49dc-a93f-94706a3dad50.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/cbd5ea42-1e5b-4984-bdcf-e60fbfb9d692.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/b902e2b2-a0b3-4467-b076-b98717c40d74.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/4c749665-59ff-49df-a193-0262f66e6003.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/c99899c6-95e1-4dea-ac12-f8df49728a3b.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/13deca9f-073e-444b-bf79-35e816f7c312.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/c8adc0a5-f4bf-4f88-984c-aba506eae6a9.json create mode 100644 data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/b146daaf-ce1f-4520-bc19-21ce8679b220.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/45e1d037-1ed0-472c-a311-c651fde270fc.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/3f4ce54a-01f3-4c23-a4ba-22d47e0344dc.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/470d52be-9dbd-4714-b004-f65cc82d245f.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/c836fd05-1969-439c-91e1-fd0cab816f6c.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/14774c6b-eb03-4abc-92df-1e7a196ca8a4.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/5293ae0c-8022-44d4-b2f5-4f5390dff93e.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/9020f91f-a8f0-447d-af68-247aa81a25c6.json create mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/0cd6837a-8c3f-4529-9ea0-8755e1725467.json delete mode 100644 data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json create mode 100644 data/hfopenllm_v2/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/7cb17011-cf77-4e86-b67f-84e6ff4b8086.json create mode 100644 data/hfopenllm_v2/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/086831f9-c677-428b-a997-4da58733633c.json create mode 100644 data/hfopenllm_v2/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/d71893b8-b82c-490b-a700-b579d64e0610.json create mode 100644 data/hfopenllm_v2/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/9893689f-c27d-4148-a27f-cd07b07e98b7.json create mode 100644 data/hfopenllm_v2/Josephgflowers/TinyLlama-Cinder-Agent-v1/90f2df23-a9ec-44be-ade5-89b59cb7368a.json create mode 100644 data/hfopenllm_v2/Josephgflowers/TinyLlama-v1.1-Cinders-World/afd545da-390a-478a-b0f5-ea819f088f27.json create mode 100644 data/hfopenllm_v2/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/ce776f68-856f-4aee-b7e4-e55d15e8d714.json create mode 100644 data/hfopenllm_v2/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/9b015729-524c-44f3-9c2c-c42981d7a61e.json create mode 100644 data/hfopenllm_v2/Josephgflowers/Tinyllama-r1/56a54ffc-4692-496c-95df-8e4ad19d4d95.json delete mode 100644 data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json create mode 100644 data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/4b105969-2ce5-4c62-89ef-efd392c2ca89.json create mode 100644 data/hfopenllm_v2/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/31af79b1-48c1-4399-9d16-8582c92996ee.json create mode 100644 data/hfopenllm_v2/Junhoee/Qwen-Megumin/59a67f29-cb7d-497c-b7bb-1764a665ae33.json delete mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json create mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/fe57367c-74b7-483e-af54-4f404cbea75b.json delete mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json create mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/fda2277b-1513-416e-b586-ed05920a0bb4.json delete mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json create mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/b3dde216-f80a-4664-aadc-b5f5dd3e5895.json create mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/07ed6241-fd1a-46eb-91fd-92a4a8f6bd15.json delete mode 100644 data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json delete mode 100644 data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json create mode 100644 data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/ba76c356-cd6a-4636-8ab1-18bb9df69881.json delete mode 100644 data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json create mode 100644 data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/c6ae54a1-2821-48d1-b689-bbb85aaa70a6.json create mode 100644 data/hfopenllm_v2/Kimargin/GPT-NEO-1.3B-wiki/6f296f0e-80ca-49b7-94e7-cb45b795c715.json create mode 100644 data/hfopenllm_v2/KingNish/Qwen2.5-0.5b-Test-ft/b5509e11-820a-4ad4-8c6a-0294762502a8.json create mode 100644 data/hfopenllm_v2/KingNish/Reasoning-0.5b/90d73665-8d83-4e74-ab7d-29b1d3b6181b.json delete mode 100644 data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json create mode 100644 data/hfopenllm_v2/KingNish/Reasoning-Llama-3b-v0.1/72387647-cbac-4b72-9c22-db7029a39457.json create mode 100644 data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.1/6219ec01-4b6a-4acd-aee1-96c3e8e48643.json create mode 100644 data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.2/5c323d7c-25cd-4718-8a1f-54d986cadaf2.json create mode 100644 data/hfopenllm_v2/KingNish/qwen-1b-continued-v2/adfab21a-941b-4efc-8b63-fdfb3074ba9b.json create mode 100644 data/hfopenllm_v2/KingNish/qwen-1b-continued/350d00a4-7501-4130-a069-323530bc9729.json delete mode 100644 data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json create mode 100644 data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/ea809d28-178e-4a0b-ab5a-34739077c5ff.json create mode 100644 data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/243d5ccd-58f3-4da5-8718-553f3f456490.json delete mode 100644 data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json create mode 100644 data/hfopenllm_v2/Krystalan/DRT-o1-14B/a45537a7-76a6-4855-b83b-abe965f13460.json delete mode 100644 data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json create mode 100644 data/hfopenllm_v2/Krystalan/DRT-o1-7B/9be911b6-b9f4-47b1-849d-62eb20c9e944.json delete mode 100644 data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json create mode 100644 data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/33d7d5f0-cbee-4a26-b5e8-48bdd12492cf.json delete mode 100644 data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json create mode 100644 data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/4355fbdd-ac72-4f26-8e07-b7e8d774d238.json create mode 100644 data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/4bffc633-e20c-4874-b7db-d1b7dabb8070.json create mode 100644 data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/2d5c844d-d950-4254-bac2-0a986659c541.json delete mode 100644 data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json delete mode 100644 data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json create mode 100644 data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/f6e74b3c-9ee4-40c3-bf92-35d965503a04.json delete mode 100644 data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json create mode 100644 data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/8f1d2600-7347-48b8-9759-11570598459d.json create mode 100644 data/hfopenllm_v2/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/cd653bfd-2c06-4224-aeeb-bf591995a69e.json delete mode 100644 data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json create mode 100644 data/hfopenllm_v2/Kumar955/Hemanth-llm/cdf1fcc7-429d-44bd-b76c-d26ee743f6fe.json delete mode 100644 data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json create mode 100644 data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/4828bd36-5453-4383-8985-08d04a7ebecd.json create mode 100644 data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki100p/4c2baa59-c2f1-4779-9d21-1f69c0821968.json create mode 100644 data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki10p/555c1079-c4d0-4b9e-9d2d-769e7ba32429.json create mode 100644 data/hfopenllm_v2/LEESM/llama-3-8b-bnb-4b-kowiki231101/58a4a1c6-0ee4-4524-9ca1-b40870f1d600.json create mode 100644 data/hfopenllm_v2/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/eea2a38a-4f1b-48d0-894c-09974894f264.json create mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/3d8063ab-0ad5-43e4-83ff-90b46dee766f.json delete mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json create mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/da5e0284-7c44-42d4-a110-a23880de277f.json delete mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json delete mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json create mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/bef017bb-47b1-48e4-93c4-3b222a16af7a.json create mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/401c83b0-b7d2-4987-9e46-f127fdbb595f.json delete mode 100644 data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json create mode 100644 data/hfopenllm_v2/LLM360/K2-Chat/c6fde59b-73ed-4179-a907-076be068b262.json delete mode 100644 data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json delete mode 100644 data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json create mode 100644 data/hfopenllm_v2/LLM360/K2/90997fea-6c67-493e-bd8e-5327cfb33ea4.json create mode 100644 data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/08957d63-7462-44ff-9dd8-060a5801a31b.json delete mode 100644 data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json create mode 100644 data/hfopenllm_v2/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/a434f569-e7d6-4464-afa8-6104be43fa06.json delete mode 100644 data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json create mode 100644 data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/e32ed251-e817-409f-b4c3-8f168f1ff822.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBA100/1d9a65a3-d2bb-48a7-8a00-8e4a79c36db2.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.0/608398da-ae2a-4be2-aaf9-6ec8899aa63d.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.2/80e04641-be7d-4351-a4f6-1318981ef834.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.3/e74222c6-636c-4075-8d4d-30c73fa70fda.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.6/aed80361-9304-44a0-934a-52976d7f1bf3.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.61/709bd280-b03e-4908-808f-34566bc968f4.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.62/66c495b3-4b09-42ad-b742-4d753c3bde7a.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.63/e24f7be6-3051-4990-8b93-121aec5402eb.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.64/0321571b-4246-4490-bd6c-7b106eb8e15a.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json create mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1/54dbf947-ab18-40dd-9cd7-a496289b2e72.json delete mode 100644 data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json create mode 100644 data/hfopenllm_v2/LenguajeNaturalAI/leniachat-gemma-2b-v0/d841e204-ed6a-439d-8408-d5cfb3b38dae.json create mode 100644 data/hfopenllm_v2/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/96b57891-83e3-4948-ad48-64a2a370e166.json create mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_A/30301818-6dad-45f9-acfb-a68ccc7c0609.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json create mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_B/50743107-30de-4c5d-bf83-cc003af8a5db.json create mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_C/625ee1b3-e0a1-4a86-83a4-6e66b380f864.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json create mode 100644 data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/89fda762-1989-4850-837c-f79ef538c58c.json create mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/1de1f906-0e36-4f79-b159-16ef8ee33ab3.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json create mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/d8588222-9e4b-47c1-9f86-92f47c9c8e38.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json create mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/15e6e6e6-39fa-424f-ba12-5f209cd4b2cc.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json create mode 100644 data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/81225b85-1523-49c1-b770-897112d2e6ae.json create mode 100644 data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/254deaf7-a253-4d41-a10d-1143f86b288c.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/ba0b66f5-724a-4a6b-ac20-a36d530a8b4b.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/eed0b3b4-e277-49ee-aed5-f3599b2d5653.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/96a21b6e-ed47-40fb-85cd-15924330e60d.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/f41f5471-6384-4510-85d2-41f236082583.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/2728eccc-525f-4350-901b-dbc352c78014.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/3e7ae935-46c3-427c-8713-41c659c1828a.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/66782676-c942-4aff-b754-b96cd96cf1f9.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/941a9e27-2ac4-4dab-a6d0-cb9319c79a27.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/caf93f75-530e-4f4d-9cc0-2cf9b0a7f2ff.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/d3ca0458-ee97-4a4c-a6a9-066880ffefb5.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/615bf89b-9357-46f4-82ed-f49b0021da01.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/06398630-23ad-4000-8ea2-fcca230568d7.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/bdfa30f8-da0f-418f-adaf-caafda4c81a5.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bd5e550c-5355-4e01-bafc-2ca89899253a.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/f842ad5b-24f0-419b-9d65-5a6ff1f5e04b.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/3a09590f-28f3-4161-8a93-d42cec62aa90.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0f6b76ca-c4b8-40b2-a3af-2ea1c3650933.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/f276ad54-4e3b-4718-ae1f-0479565e4565.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/dec20396-6555-4773-bf02-2cd1fcedda89.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/eebc33e1-0016-4adf-815a-72653a34c01b.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/803c3898-c1a6-4832-ac3a-a86139489810.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/bfaa3d3e-66fd-4477-85af-4b83f13ff05b.json create mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/99debdd2-1dea-4eb6-be5c-c144656cfe20.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/ad67bb88-7f74-4eb4-b771-0b3b60be4416.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/af2f579d-1e8a-47d8-8e44-a599bee83e37.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/763c840e-ea73-453e-8e54-5f4fd6fda9cd.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/4fb40ac4-a637-4b9a-b69d-ba551c0f0938.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/ffc4ef41-4a28-4816-be54-8ffd8e153073.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/f75fe902-f1c7-4e6c-87d6-128688db8d94.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/dbd3098b-4532-441b-a81c-072c52579be6.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/438e4aa3-5e02-446e-bd3a-07ef724d24ff.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/027fdc55-61eb-416c-b6ad-4408912d151b.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/37a4895d-def5-494d-9b62-d8c97ba9350b.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/0d53c27e-962c-428f-b540-35ab027883a8.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/6f7b2d91-24d6-442c-93a5-9afc88e9a308.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/21793520-7d1a-4040-bb96-fa7fe98ae580.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/59d53c40-5b16-4a70-a693-5fb554cf7614.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/b28a569c-6bdf-4547-a2ce-c3e224764be3.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/2de129c8-2259-4367-a619-85d9e8f61e06.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/c242030f-fb2b-42dc-a5d1-687273b17282.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/3b3fdb16-b6e1-40c8-9ac0-02f1f2207eb7.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/ef6e8e0d-7ba4-45ea-aaf7-617f68f2e97c.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/f8c131a4-1fee-4694-8753-88853418ef4b.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/27dec9ff-fb18-43dd-949f-7c0587a5858f.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/060df34d-ab67-43e1-bd56-ebaceb77abd3.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/a6357673-3daa-4593-8593-2b65a7d5477e.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/121d4877-1955-48db-a23a-6b0ad0623b9e.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/1f1eab02-219e-4ad8-af50-e103541e1c9d.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/b4cccfb3-1c17-48a3-a211-a26c44de757f.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/05e97a86-681d-42a2-8a47-beade25d8fc9.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json create mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/6c0899b4-f066-45f6-827d-11c535ef0634.json delete mode 100644 data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json delete mode 100644 data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json create mode 100644 data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/f9660557-b9f6-4ecc-b260-c245f0e62b5b.json delete mode 100644 data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json create mode 100644 data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/89168032-5840-4c2c-821e-b3d717ade46f.json create mode 100644 data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/10d0aa63-67d9-4dba-9bdc-db7ab3b4547d.json delete mode 100644 data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json create mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/6f66ae5b-8cb6-4263-98a4-4a1eddfaca10.json delete mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json delete mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json create mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/5e715199-7030-47b4-89c6-83ba0968c07c.json create mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/3fca39e8-443d-47da-a858-83a68c18eec9.json delete mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json delete mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json create mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/b7518bd2-d3af-49e6-823a-f8d507e8e60f.json delete mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json create mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/fa399f16-1652-430c-be19-afaf5ab96be1.json delete mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json create mode 100644 data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/cbe5032b-122c-4a0b-a099-50e998a4bc77.json delete mode 100644 data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json create mode 100644 data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/fd8c3209-dcc0-4d27-a3aa-d0f76ef86f8d.json create mode 100644 data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/1a18d49c-ad7b-4823-abbc-7191e9d659cd.json delete mode 100644 data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json create mode 100644 data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/9e2c614e-1104-43a6-9e8f-b7851562e01a.json delete mode 100644 data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json delete mode 100644 data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json create mode 100644 data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/7d4b83ab-9c9d-46e5-8cbf-b8afcf781230.json create mode 100644 data/hfopenllm_v2/LilRg/ECE_Finetunning/a42b5d7e-be7f-4cde-aaf0-001e2cf05a44.json delete mode 100644 data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/21f6688c-be52-4352-9c95-d37c0a5f6c94.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/e92ba586-7bee-4a9b-b388-e35efde3d36f.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/45ed0bb3-efbf-4a32-9735-d814aa08790a.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/eff28375-89a7-4970-9342-428b07d0c6f4.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/23877e30-b8fb-45ea-a803-47df757ea909.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/8bc25d04-9cc5-4551-a9c5-ce185c7ad974.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json delete mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json create mode 100644 data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/d2d4b5a5-109d-4d26-a166-3d97b341584e.json create mode 100644 data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/ac404d92-7a06-4758-ab1d-fcf840c2b995.json create mode 100644 data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/95ea7fbf-d3f2-4fc1-ba17-05549f6e4d25.json create mode 100644 data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/c101e272-24d2-44db-9b0f-2ed4d17cec41.json delete mode 100644 data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json create mode 100644 data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/2cb789c7-dddf-42b2-8fdf-4cbd5132946c.json create mode 100644 data/hfopenllm_v2/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/a414aefd-ce24-49a9-b431-0c6014ebfbd8.json delete mode 100644 data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json create mode 100644 data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/91fcb6a3-d351-48c8-87e8-e2a06642e925.json create mode 100644 data/hfopenllm_v2/Locutusque/Hercules-6.0-Llama-3.1-8B/3cd90efa-ddf0-43c4-884c-84337ded14b2.json create mode 100644 data/hfopenllm_v2/Locutusque/Hercules-6.1-Llama-3.1-8B/c66c21e9-a332-40f9-ae87-bdd78a25d753.json create mode 100644 data/hfopenllm_v2/Locutusque/Llama-3-NeuralHercules-5.0-8B/0b4def91-29df-45d9-8dd4-c4097ec47ba3.json create mode 100644 data/hfopenllm_v2/Locutusque/Llama-3-Yggdrasil-2.0-8B/2cbf258c-369e-4b1c-863f-43cf97c3a7a4.json create mode 100644 data/hfopenllm_v2/Locutusque/TinyMistral-248M-v2.5/8372889e-f9cd-4cf7-aec0-8e18d5c627e3.json create mode 100644 data/hfopenllm_v2/Luni/StarDust-12b-v1/ce4cc270-57da-4d08-9130-62508b409cb2.json delete mode 100644 data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json delete mode 100644 data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json create mode 100644 data/hfopenllm_v2/Luni/StarDust-12b-v2/4cfedb8f-0e47-4008-9bc5-fb15e4afa607.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/de3c949d-bab5-4430-bdd1-48e1b7860934.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/011e53cd-409f-479b-9c3d-bfce75a1277b.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/1ff40e45-5be4-4625-9f66-5599a829903d.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/fed97d94-2949-4383-8f25-fa79bd413508.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/f4820bc8-7dfd-4439-af95-21b6cc9367ac.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/36e576bb-de50-49ec-a91f-f134c11bbe38.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/0edd388b-7a1b-4334-9b72-52d84653ff67.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/b3199674-328e-41a0-9aa4-bf39aec735bc.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/52db4d79-7040-4525-934e-0f33e4acec63.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/ee34821e-9182-433f-a8b0-745711e23738.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/10ef0990-5356-432f-b24c-dd107188ec5f.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/47de680d-33b1-4441-92da-4b97a5fc513f.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/96ac0351-2ade-4d76-bcf9-bc0f633f8694.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/31aae266-c14b-451f-8bab-62ee7d5d382e.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/f6edb102-e867-46d1-afdc-3c45166bd510.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/8b7756cc-9af3-4f98-84ac-7fef4c1bdaa0.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/dcf33a22-5e57-4476-a2cb-ebd60407a920.json create mode 100644 data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/15659480-be0b-41c8-a463-873be444b194.json create mode 100644 data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/0444c1bf-a3d3-4d23-bc6c-0a98c4dc1e9d.json delete mode 100644 data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json create mode 100644 data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/93aa3a13-5069-410f-a1df-6944e0231e0e.json delete mode 100644 data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json create mode 100644 data/hfopenllm_v2/Lyte/Llama-3.2-3B-Overthinker/427ea7d0-c1f1-4cfe-b6a7-555262a7a317.json create mode 100644 data/hfopenllm_v2/M4-ai/TinyMistral-248M-v3/c6dbe372-7a3c-487c-87c0-fb324c39f8c9.json delete mode 100644 data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json create mode 100644 data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/cf8d99c8-8790-4bdf-bfc2-1a6d1fe35916.json create mode 100644 data/hfopenllm_v2/MLP-KTLim/llama-3-Korean-Bllossom-8B/5b5d42d7-8012-46f1-826f-32d839806048.json create mode 100644 data/hfopenllm_v2/MTSAIR/Cotype-Nano/5e1bf2cb-55c4-4806-89af-cb9953c7c1b1.json delete mode 100644 data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json create mode 100644 data/hfopenllm_v2/MTSAIR/MultiVerse_70B/21ee4b33-9829-4cca-9603-c30fd4a1f7ff.json delete mode 100644 data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/c6c14a8b-0e9f-4b97-b9f3-27c7250fb8f2.json delete mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json delete mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/6586fa94-9f43-4814-8c8a-8ed244ac94e7.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/df7d7db2-867e-47f0-9abf-d71b79e97630.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/e2502e7e-3a10-49f3-b5c6-b20496fed998.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/51cde18f-09b0-4b66-a962-811ee49e192f.json delete mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/4ea48b42-8026-4799-b35d-46757fd2753f.json create mode 100644 data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/52e9b4ae-9119-4f26-87e4-6532d1148ecd.json create mode 100644 data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/4bda68c0-cc09-4945-961b-48776b7b5fc8.json delete mode 100644 data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json create mode 100644 data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/18ea0ad0-a216-4906-a96c-c8b040398dbd.json delete mode 100644 data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json create mode 100644 data/hfopenllm_v2/MagusCorp/grpo_lora_enem_llama3_7b/1e2321f6-93bd-4acf-9f5b-c82807a40233.json create mode 100644 data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/13032961-52a1-43cf-b69d-1802c43e1bcc.json delete mode 100644 data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json create mode 100644 data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/9d444061-2c29-499a-8906-77ef58aba34d.json delete mode 100644 data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json delete mode 100644 data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json create mode 100644 data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1ffdf6b0-b3a3-432a-a0e4-69b4d447bb76.json create mode 100644 data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/8ce733ea-e6e9-4f9b-ab28-f93202507265.json delete mode 100644 data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json create mode 100644 data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/0e88aa91-609c-4d2d-9296-25b06eeb0342.json delete mode 100644 data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json delete mode 100644 data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json create mode 100644 data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/3e235ea0-3f04-4d99-9db2-7cafcbdbac6f.json create mode 100644 data/hfopenllm_v2/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/5e31a55c-f222-4192-b031-27bb40ba56fa.json create mode 100644 data/hfopenllm_v2/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/11fd4b70-4ea7-4bee-8caf-8921d4c89f24.json delete mode 100644 data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json create mode 100644 data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/8e721067-898d-45ca-b4f5-9f523c4ce3d3.json delete mode 100644 data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json create mode 100644 data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/be5d5480-ce4c-4ade-8c6a-c08cd2826909.json delete mode 100644 data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json create mode 100644 data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/54dec074-29f8-4863-be37-2c08f6f2c3cb.json create mode 100644 data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/88a15025-556b-469d-be77-c773f2c61038.json delete mode 100644 data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json delete mode 100644 data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json create mode 100644 data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/b4f4596b-17e5-40bf-ae60-0b17492ba9f8.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/97ce858e-a64f-4881-b6d0-0a2c0814336d.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/1becd83e-e9b8-49c1-a137-80c5a8dbdf0d.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/337bb321-9c6e-4751-9c9b-d8ba0120dd07.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/cfa95cc9-5bb1-4921-97c7-078f2f929a2f.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/6d5ba3c4-a0c2-40cd-9766-68d36d21c5b6.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/6cc4404a-f3e1-47b9-b56b-34e4269e1261.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/8d820e43-ff42-4247-9ad0-4ed8e70672b4.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/d858ce8e-6a4b-46b1-8d51-03ebc2d8aaec.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/9813dd88-ff70-4d9e-86c5-9b73444275c5.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-llama3.1-70b/ac677432-e7d1-4439-9c05-426059c285ef.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3-4b/018f270f-3cfe-403c-a236-483038a0b04e.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3.5-4b/718a40ea-26b1-4cf4-9584-57be798640ae.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-72b/207a28a9-ae24-4a31-be95-96296b2e466d.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-7b/72efedb8-d456-41ed-b1ae-4887cb6c18f8.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2.5-72b/ac91fb37-5742-4a3d-b93a-86c63b90cad5.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/c71d025d-e954-4420-b397-e07c3644d1f4.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3-70b/968c3759-de5f-4255-ba95-cafc7a3c70a7.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3.1-70b/5e23b2f7-33f7-4e49-b73a-a02b8650ee0d.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-phi3-4b/1b6c64f6-acf8-4cff-bcae-6e8b3725c6f1.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-72b/7908f572-8886-4add-ae84-b4ec0ec17c26.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-7b/9e04ec5c-2208-4569-9b63-4768ed4262b9.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2.5-72b/ee2c8beb-6566-4b19-91d0-8e48c12a3fdf.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/c7579616-0c21-443a-a149-0c51a0ae92ac.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3-70b/ef7a1429-db2f-433b-a606-339a9d868e7a.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3.1-70b/f531e13c-79ed-45da-a246-857fd2c884c1.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-phi3-4b/0f525d93-663a-442c-9a51-1ad3a5054172.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-72b/15af21e1-3193-47fa-a3fc-1f087216d4d9.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-7b/67b270d9-3422-4770-9957-7bde65acca0a.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/e2d38bcc-9133-4051-82d0-4e4fd66e00f8.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.4-llama3-70b/4ff256af-73c7-4a5a-96da-19546a786c59.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.4-qwen2-7b/225cbeef-1d0d-40fc-949d-4ba6696fb690.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/24fcd662-5abb-4bf8-b8df-1c21b048cd92.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.5-qwen2-7b/7badcb45-7826-4fd1-b964-c697fbda76cc.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.6-qwen2-7b/bfb532f1-3319-46ff-80ae-0ca783a18bb6.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-2.7-qwen2-7b/ea304515-b41f-4e96-a0ec-78c897ebf9a4.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/1fe79ea5-1922-4a5e-8857-1c832353b0a6.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/9098d70f-cbcd-4f6c-bcba-0b1da743396e.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/df4ed9e0-30bc-4a3f-b7a2-8955cbb38d31.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.1-llamaloi-3b/f68957d5-20a1-438f-9931-6a787aaed467.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/416e0c04-9119-4230-ba71-b0f47e2d4997.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/d57780e2-154e-437d-ac2f-0007e1f9140e.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/027d464b-1375-4de7-aa57-e1473d16ba89.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/a81f20fa-57e8-498c-a162-6d8a9be09ee6.json delete mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json create mode 100644 data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/d72ddbff-8ff7-446f-a74a-10a46bce6e3e.json create mode 100644 data/hfopenllm_v2/Minami-su/Amara-o1-7B-Qwen/f681d612-f574-4641-b34e-95b6de97f9e8.json create mode 100644 data/hfopenllm_v2/Minami-su/Amara-o2-7B-Qwen/cae1adaf-e424-4dcd-943b-5bbb708aca57.json create mode 100644 data/hfopenllm_v2/Minami-su/test-7B-00/969ac825-92f2-448c-899a-226e69dee377.json delete mode 100644 data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json delete mode 100644 data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json create mode 100644 data/hfopenllm_v2/Minami-su/test-7B-01/e108ad28-c155-4162-852c-0f588a136bdc.json create mode 100644 data/hfopenllm_v2/Minami-su/test-v2-7B-00/93cfeba9-7d31-45b4-a6e2-99a5f318f5b3.json delete mode 100644 data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json delete mode 100644 data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json create mode 100644 data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/c1b16b84-9392-48f3-b483-0a9786925506.json create mode 100644 data/hfopenllm_v2/ModelSpace/GemmaX2-28-9B-v0.1/b0c6e08d-b426-49d5-8a66-ee3d70131b62.json create mode 100644 data/hfopenllm_v2/MoonRide/Llama-3.2-3B-Khelavaster/6a6651a3-b34e-404d-ac25-42c151fb9ba3.json delete mode 100644 data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json create mode 100644 data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/da63b789-5571-4ed8-976e-146d385b18e2.json delete mode 100644 data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json create mode 100644 data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/87b900e7-3bab-4e60-b0ef-349667cb2656.json delete mode 100644 data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json create mode 100644 data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/c9fd4740-4990-4174-b782-9b63c34d6407.json create mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2582a049-e940-408b-b2d9-7a7bdf470e49.json delete mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json create mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/99310118-d2ec-4647-85db-fcc22aee9161.json delete mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json create mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/bedd12e4-da18-4ca6-ba51-6d13e1c80bae.json delete mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json create mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/6767e14a-bbfa-4a0d-8120-1f48a565474e.json delete mode 100644 data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json create mode 100644 data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/70260aac-1bbf-4913-9dcc-58633d055314.json delete mode 100644 data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json delete mode 100644 data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json create mode 100644 data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/fba6e1a2-c197-4731-91ea-f6d059ba8b16.json create mode 100644 data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/22e74d0c-70d6-43c5-be4d-62842d93fedf.json delete mode 100644 data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json delete mode 100644 data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json create mode 100644 data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/f7c33065-1da1-4da4-81c7-f2c9307b6e9b.json delete mode 100644 data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json create mode 100644 data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/ecdb4661-426a-46be-aefc-7e04483cebc0.json create mode 100644 data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/236976b3-af46-45ac-a8a5-f5897e3468a1.json delete mode 100644 data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v-0.1.0/fd175296-a5f6-4914-80e9-b8b75bc659de.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v0.1.0/d910bbaa-d55c-4b00-9320-856a8a6713c0.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/99a5f123-5d2e-469b-884e-c9a64c6bc197.json delete mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json delete mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/ed17a715-f0ae-461c-9618-ac952c450ec5.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/3dd2a474-9ea8-4e26-8986-5bcc67c78c39.json delete mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/b39e14a6-c05f-4e88-b2d4-63a199aa61a1.json create mode 100644 data/hfopenllm_v2/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/39893637-552a-48d8-9b83-433415eb26c3.json delete mode 100644 data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json create mode 100644 data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/f9549713-f487-4e26-bfeb-ec6d394b7014.json create mode 100644 data/hfopenllm_v2/NJS26/NJS_777/02579c41-f117-4412-9c00-ee7db3e9ab97.json delete mode 100644 data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json delete mode 100644 data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json create mode 100644 data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/bfa1d761-00aa-4438-a5de-972d934c63d5.json create mode 100644 data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/20a84d88-05c2-4e02-8c84-2afa84cc659f.json delete mode 100644 data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json delete mode 100644 data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json create mode 100644 data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/84eedce3-3a93-4630-b914-aa281fd2efda.json delete mode 100644 data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json create mode 100644 data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/b3b7b62f-ac82-4ef9-9634-afb81645ec19.json delete mode 100644 data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json create mode 100644 data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/283c5166-b9c5-4d20-9653-0cd0346d87c1.json create mode 100644 data/hfopenllm_v2/NYTK/PULI-GPTrio/478b54cd-6410-41e5-8a53-4e46bcd9d7af.json delete mode 100644 data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json create mode 100644 data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/de2ae7a9-93eb-4149-b3ff-b5b7dfba29c4.json create mode 100644 data/hfopenllm_v2/Naveenpoliasetty/llama3-8B-V2/ef5aa9db-804b-4a53-9c22-9c99f6c69eeb.json create mode 100644 data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/553fd36d-08dd-46a3-ab04-77b9039e7921.json delete mode 100644 data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json create mode 100644 data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e2bae853-cc0f-456a-a635-98d5f87ac47c.json delete mode 100644 data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json create mode 100644 data/hfopenllm_v2/Nekochu/Llama-3.1-8B-German-ORPO/d6c5f196-c97b-4a0a-81b0-59143ec4b10e.json create mode 100644 data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/5d92e02f-b590-4b6b-8c64-30690f79e916.json delete mode 100644 data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json delete mode 100644 data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json create mode 100644 data/hfopenllm_v2/Nekochu/Luminia-13B-v3/e10f38df-b5d5-47c6-924f-563c6f8a6616.json create mode 100644 data/hfopenllm_v2/Nekochu/Luminia-8B-RP/27257dc9-750c-4673-8865-986434bc5c0e.json delete mode 100644 data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json delete mode 100644 data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json create mode 100644 data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/e599f3f8-e5eb-4bfe-a102-efc5a967434d.json delete mode 100644 data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json create mode 100644 data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/8e56f2dd-49d0-4eff-beea-53d01cd96f0e.json create mode 100644 data/hfopenllm_v2/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/f1a2b5d0-2c8a-4bbc-8bc5-0484485c2dad.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/2c12ee67-0c77-4cb2-9e88-1c731ed55c3f.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/567f8f54-225f-4d9b-be06-f24091adc1e6.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/ebb59730-9522-4c45-8f42-c0d941fd728c.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/2c44fa8c-ebd3-4ea6-8578-61da38965c09.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/3ef26b8c-6bfb-457b-a160-a65c3cc8b0c6.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/0ab721ba-fbda-44ca-a349-1d3abfaabe62.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/2fea1128-4f0c-40d8-be87-72c42c0648fb.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/db9dc9d2-4aa2-43d0-9f2e-15fbd05af62c.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/28399fd0-840c-49d3-8179-407ed83d3bfc.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/d7108c13-e14a-4366-9a39-204f853b1bee.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/56152d05-9273-4701-8c0a-723e2cab618d.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/55d2f23d-cb6c-42d2-8b57-837451d3c6df.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/7479ae87-e795-4e20-848a-291614176def.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/04ceb40e-bde8-487b-9d29-dc8f681af9be.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/e26b00b0-d9df-4ce2-a649-b19f8957b8ce.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/9954194c-69b5-4eb4-8b32-859845548cb0.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/2afbc279-242a-4276-85f0-facd29c2d89b.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/ba307ad4-3647-4785-9bf1-cd4dacf3c71f.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Mediver_V1.01/d03c73ca-7364-4517-aea4-f0ac564c49df.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Medusa_v1.01/1dd4b82a-ca80-4c9c-8800-f97ab2b9cbe7.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/f2363099-c39a-4874-bf77-ccc0fa087680.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/596eeee8-3600-4f8a-8888-978b610eb2ca.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/595ddba1-c450-4b69-85b7-0e3118c8c6c7.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/64890314-bba0-4fb2-8c21-38b413cff4c8.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/470b8b0d-fbaf-408c-a28e-57d1b294f8a8.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/00a1579e-8636-4eca-9a63-c0b067a5f3dc.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Dolto_0.1/a52cc4c9-6d60-4083-ac77-591e247d86c9.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/ac5c321a-d35a-4e0f-a1be-bcc0b7109f91.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1/c4d11b01-ae5b-4198-b102-07160f100a41.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/19405ead-2263-4613-8053-43beeafb4bfc.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OrcaSun_V1/6c698a60-a813-4be7-b55f-b684029b492d.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/b67c4a44-7787-45e2-b88c-5d7e8e496fa3.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_SunOrca_V1/a20a529e-c52e-41b7-a8ee-909167048bfb.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Sydonia_0.1/2735e6f4-839f-4ab1-8ede-3447891b1b26.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Syneridol_0.2/e74e7e7f-8550-4cba-97cd-2626c82d6b29.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.1/14f4c00d-8915-413d-8e85-79f395127682.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.11/9119b586-d3b2-4ce0-a243-d584e2087184.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v1/629f3f1a-f8ee-4d1b-b604-7bbd35c6517b.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2.1/a6ac828c-904b-413a-a5fa-a5ed06a28143.json create mode 100644 data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2/251a3ef9-c7ae-4d79-8a60-4bc021a3f001.json delete mode 100644 data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json create mode 100644 data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/962b48a3-23d7-4104-b34d-4e5c2af31d58.json delete mode 100644 data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json create mode 100644 data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/e4b0be31-6f9a-4a57-b433-e561da9bd827.json create mode 100644 data/hfopenllm_v2/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/9a31f208-b7d8-4baa-b96e-99926ecb35af.json delete mode 100644 data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json create mode 100644 data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/8d933df1-60cb-471d-bfc3-b11c93150203.json create mode 100644 data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/35315c3a-ec06-433a-b3fa-ae7a4a59b7ea.json delete mode 100644 data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json delete mode 100644 data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/3530db9a-0d61-4cf8-9fff-b15f6488c845.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/7d9901e0-eafe-4d49-a5bb-fab059708bcb.json delete mode 100644 data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/ee7f9025-bb2c-4902-b8e2-bfac2b63d2fd.json delete mode 100644 data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/acemath-200/6157f79e-2673-4ad6-99d7-e5cf5e4e1db2.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/phi-4-14b/0aa7572c-1aa6-4997-a2a2-3b557fbde639.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/phi-4-1steps/6f5df760-2d3e-47b1-b55e-4031a5f11d41.json create mode 100644 data/hfopenllm_v2/NikolaSigmoid/phi-4-300steps/ac676b03-c3ce-4ff1-83fc-5c8db82f1497.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/2229cdf8-3ecb-4f11-8824-9c3bfbf6f968.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/95ebc5b8-a541-4fca-9e7c-692720e73362.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/09a2508d-a171-493f-9ff2-e7f375815c91.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/12a4a921-5859-4fd6-9d64-677a7d8ef696.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/b79f12d0-cdfc-4c9d-a88b-40612dcbf64d.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/d162cf7c-3ef4-420f-aab4-789a98b1195a.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/7e49018e-5e2d-4cdb-be5b-2ac04ec84bf5.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json create mode 100644 data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/24677f2a-ea89-4289-bcb6-13699de9782f.json delete mode 100644 data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json create mode 100644 data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/3e09df3c-2224-4a29-8e55-18a485db2b25.json delete mode 100644 data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json delete mode 100644 data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json create mode 100644 data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/cc0bd236-8fc4-43d3-a18f-4b2afb112946.json create mode 100644 data/hfopenllm_v2/Norquinal/Alpha/5afd4c0f-b61d-452f-8c48-d298780d91d5.json delete mode 100644 data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json delete mode 100644 data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json create mode 100644 data/hfopenllm_v2/Norquinal/Bravo/eac52141-4fd8-4e21-9c78-920ab8933e5a.json delete mode 100644 data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json create mode 100644 data/hfopenllm_v2/Norquinal/Charlie/8449837f-64ac-4293-b1f8-210e62779202.json delete mode 100644 data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json create mode 100644 data/hfopenllm_v2/Norquinal/Delta/ab8a665c-8234-484f-a8a9-8ee79d73edff.json create mode 100644 data/hfopenllm_v2/Norquinal/Echo/a954242f-41a6-49d7-a71d-3bfe940cdb92.json delete mode 100644 data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json delete mode 100644 data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json create mode 100644 data/hfopenllm_v2/Norquinal/Foxtrot/6d1c518f-3f42-49eb-9208-b30e27e7e87e.json create mode 100644 data/hfopenllm_v2/Norquinal/Golf/87931db7-42a4-48df-b5a5-8bd934061dbe.json delete mode 100644 data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json create mode 100644 data/hfopenllm_v2/Norquinal/Hotel/54088dbc-04cc-4b35-b4e1-e495b7cfd47f.json delete mode 100644 data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json create mode 100644 data/hfopenllm_v2/NotASI/FineTome-Llama3.2-1B-0929/7129efad-8ab2-4f7a-b6ed-055989b3e131.json create mode 100644 data/hfopenllm_v2/NotASI/FineTome-Llama3.2-3B-1002/cfc6f85f-e4b6-4164-b7eb-4efb888e1ba5.json create mode 100644 data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-1B-1007/0f053a45-cd79-4e51-9b4c-ae5c51006c17.json create mode 100644 data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8002b35-1454-4635-a31e-b419c7000b53.json create mode 100644 data/hfopenllm_v2/NousResearch/DeepHermes-3-Mistral-24B-Preview/4c08530e-d529-49a1-a3fe-2351c422981a.json create mode 100644 data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Llama-3-8B/d16879dc-7ed7-49c4-aca6-4c9cd3b3a350.json create mode 100644 data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Mistral-7B/70656b13-e0a2-4ef4-af43-0d9995d57af6.json create mode 100644 data/hfopenllm_v2/NousResearch/Hermes-2-Theta-Llama-3-8B/6544f1ca-02a6-4e58-98f0-e19cc6082682.json create mode 100644 data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-70B/5cd3796f-fb31-49c1-a974-019c5c5b20ae.json create mode 100644 data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-8B/49eff9ad-90c9-43b1-a1f5-cf371ac4b39b.json create mode 100644 data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.2-3B/59720f7e-7e09-483f-8332-8dc7aa19ae78.json delete mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json create mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/a3a89e4a-0589-4776-a1da-227552482e94.json create mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/b3c04d1f-80e3-4d86-9779-c5e4bbce6f35.json delete mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json delete mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json create mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/448fda35-bfdc-42ae-90f9-d44383e0a454.json create mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/0d97542e-82b6-4f27-9822-62b67e7690c2.json delete mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json create mode 100644 data/hfopenllm_v2/NousResearch/Nous-Hermes-llama-2-7b/2725bd69-839d-4427-8e05-0e289fff70de.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Llama-2-13b-128k/adb71488-adb8-4848-bf1d-aecd04cb6718.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-128k/c7736577-c4c3-4233-9308-a4bb9b2dbb89.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-64k/76fe52f4-9fa5-4ccb-8c92-7bd9eb9886ee.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-128k/1d92e45f-c5a5-4dd6-a61f-8e0f7246117a.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-64k/5e1513f1-4375-4380-85fa-b96a419c013b.json delete mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/fadbf3b2-283a-4f8e-9acf-463d75924b97.json delete mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json create mode 100644 data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/c04ffe5b-c313-4249-83bb-bbe07ad6fc69.json create mode 100644 data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/a9aa164e-386b-4987-9f49-2dde64ade45c.json delete mode 100644 data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json delete mode 100644 data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json create mode 100644 data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/e4c1b3ef-e1db-4eca-b818-f3b1680cc5f0.json create mode 100644 data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/1ab95edc-ea3c-4d3f-9f59-dc7f7468adb9.json delete mode 100644 data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json delete mode 100644 data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json create mode 100644 data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/80a81bbc-6edf-48b9-afb7-e4e0a03753d8.json delete mode 100644 data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json create mode 100644 data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/afb24bf8-3c47-4278-9b84-19b05017745b.json create mode 100644 data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/4f8cda4d-959b-41ab-a79d-d2b35968eb89.json delete mode 100644 data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json create mode 100644 data/hfopenllm_v2/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/2818aa8c-5c73-4de9-bcbe-fd8f68e8bc6b.json create mode 100644 data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/6a683ead-0f3e-449b-9ae1-8afc9f1ab33d.json delete mode 100644 data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json delete mode 100644 data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json create mode 100644 data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/38cb02a8-862d-40e1-922a-e65f537df87e.json delete mode 100644 data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json create mode 100644 data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/f816e2a7-2629-4abe-9ed0-3d1299e95194.json create mode 100644 data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/286fae5b-544a-4033-9092-d633fc80f47b.json delete mode 100644 data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json create mode 100644 data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/93477bf6-ea00-418b-8a2f-975a9554263e.json delete mode 100644 data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json create mode 100644 data/hfopenllm_v2/NyxKrage/Microsoft_Phi-4/3d7c6576-f99c-4bb3-94fa-4f713e2898f6.json create mode 100644 data/hfopenllm_v2/OEvortex/Emotional-llama-8B/d1e9a242-941f-4461-b75b-7043c2c01ef7.json delete mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json create mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI-15B/e39661af-ad93-41d7-8892-1230064f1a1c.json create mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/595b61b2-5220-48f6-91a0-3aa0d37c63d8.json delete mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json create mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI2-9B/3173263e-2a42-4e8d-956e-8175ef464e76.json delete mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json delete mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json create mode 100644 data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/f77f8291-1573-4fb6-a984-1cc099c09621.json create mode 100644 data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/c4681e14-513c-4e5e-af8c-88ca11849176.json delete mode 100644 data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json create mode 100644 data/hfopenllm_v2/Omkar1102/code-yi/0c220edd-2563-4fec-99a4-ef8c210ca5ce.json delete mode 100644 data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json delete mode 100644 data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json create mode 100644 data/hfopenllm_v2/Omkar1102/code-yi/bd7ef5a7-aa75-4eb4-8860-aec63f8bf9d1.json delete mode 100644 data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json create mode 100644 data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/85c20522-03c0-4dac-a1c8-2945e4bf0e0e.json create mode 100644 data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f180fddd-077f-43f9-b2d9-38c5f33be44d.json delete mode 100644 data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json create mode 100644 data/hfopenllm_v2/Open-Orca/Mistral-7B-OpenOrca/ef384329-8406-4767-ac1a-3eba3131f726.json create mode 100644 data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/2ddeae27-77d3-413c-a6e1-9de0f3980c4e.json delete mode 100644 data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/38b2dbbe-be86-4ef0-a39b-89841f662141.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/999a8091-22bd-4c08-bee1-772202e7edde.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/fda91d98-d259-430c-929b-78852cab64ec.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/535bfa4f-ab63-4832-9f17-7b245ff2b2af.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/681a6cc5-5519-4b13-8b50-93adcab4a3f7.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/141dd12c-6901-4a96-a051-f35647ddcc73.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/5b095779-aacc-41f3-9a3f-83f64a1c0d4c.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/7a88c95a-b253-4f36-8fde-1b0158bbf0b6.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/7938a00e-4e11-4223-a900-fa53df168ab7.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/8f966b4e-1baf-445f-9f10-4ba6b47aaf9b.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/a334d998-21a5-4108-96e3-9935507a9f8f.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/941e27c6-81da-4ce1-b1c8-544c1426cd11.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/e409a374-685b-482d-82e4-2436dca37309.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/84713625-97b6-4fad-982d-41b5c500d73a.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/b7edd9ab-a018-4b2f-9b01-b56cbe98abda.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/ec896115-21ef-4337-9fdd-32a04c574a05.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/d8e5f49b-7bf3-41d4-a91e-c566219609f6.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/ce1a92a3-6bec-410f-ab42-c567c5d23856.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/0a125470-b50f-4ca0-90dc-1f6b69c3ccd4.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/aeee0165-ac7e-4da6-8102-ba60f43587de.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/b47b8666-2556-45df-ba5b-9a5e94186784.json create mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/0bde5d57-39be-4497-a2a8-d08d3c8d65f4.json delete mode 100644 data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json create mode 100644 data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/86599961-3ec2-4837-89a4-809f1dd7226c.json delete mode 100644 data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json delete mode 100644 data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json create mode 100644 data/hfopenllm_v2/OpenGenerativeAI/Bifrost/dc3ca25e-41b2-4206-afaa-7d2d10fd27a7.json delete mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json create mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/cd77d407-3be3-4b84-8a73-34a15744de93.json create mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/1cd20db5-0225-4724-b1f9-7c32eae456e1.json delete mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json delete mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json create mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/dfc45dc3-51e6-454b-aee9-ea6b0714f0ca.json delete mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json create mode 100644 data/hfopenllm_v2/OpenLLM-France/Lucie-7B/3da2a408-672c-47b8-be32-61f56a15e9f3.json create mode 100644 data/hfopenllm_v2/OpenLeecher/llama3-8b-lima/94700c3c-f18d-4f96-a794-65bcf483fca9.json create mode 100644 data/hfopenllm_v2/OpenScholar/Llama-3.1_OpenScholar-8B/6f3481d4-076f-45bd-8564-d485109c7a63.json create mode 100644 data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/9f5ca3b2-747a-4fd0-b382-bf7ef503ba25.json create mode 100644 data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/f1932041-263a-4841-9c8b-c6cc9fa50c21.json delete mode 100644 data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json create mode 100644 data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/691bef38-bc9e-4f8d-b774-9d7c62eec72b.json create mode 100644 data/hfopenllm_v2/Orion-zhen/phi-4-abliterated/5795f693-9ebc-47c6-9d2c-185dd0d32044.json delete mode 100644 data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json create mode 100644 data/hfopenllm_v2/P0x0/Astra-v1-12B/eb83f474-0d3d-488c-bc0f-93e5d1dfb2f3.json delete mode 100644 data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/f93b2053-11c4-4868-860f-90fbfe8288fc.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/8984fe95-9fd3-48ff-aa5f-18df63ecd6bb.json delete mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/a0f6f5de-578c-4290-85b5-c51aed985074.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/8ccc76ff-25c9-4706-b6a8-31b49f8be813.json delete mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/924f8b31-506d-4df2-8a7b-d0cd66d55f6d.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/8e7dfd9f-350d-406c-811d-453f1744dd53.json delete mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json delete mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/b713d1d2-351f-43a1-b77d-27723e1d4267.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/322a9442-174f-4223-b839-6f8f9664d5e5.json delete mode 100644 data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json create mode 100644 data/hfopenllm_v2/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/b12e71d1-c435-4172-a28f-38e26791dadb.json create mode 100644 data/hfopenllm_v2/PJMixers/LLaMa-3-CursedStock-v2.0-8B/ad33b0e8-39c8-4118-81bd-bc86b482f122.json delete mode 100644 data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json create mode 100644 data/hfopenllm_v2/Parissa3/test-model/db8a7864-293b-45e9-995b-5301071c902d.json create mode 100644 data/hfopenllm_v2/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/31e3beea-28dc-4b47-a5e9-5fafc89226db.json create mode 100644 data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/49315a95-394f-4508-8e6c-7c1d5547c257.json delete mode 100644 data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json create mode 100644 data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/375d3a94-97af-47ef-82af-afd7581663d4.json delete mode 100644 data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json create mode 100644 data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/77cfe896-4aa1-4bcd-a39a-f437c3f7e738.json delete mode 100644 data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json create mode 100644 data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/3d69ec7d-9999-4e16-8dc9-99fad35e156e.json delete mode 100644 data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json delete mode 100644 data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json create mode 100644 data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/d2a7459b-8a12-4529-b978-c7237979f16b.json delete mode 100644 data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json create mode 100644 data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/e7a228ad-69de-471a-9f31-6bdc7221999c.json create mode 100644 data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/9196ae39-adb0-4d53-8399-0ccd4d628065.json delete mode 100644 data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json delete mode 100644 data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json create mode 100644 data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/ea318f99-a1ab-41ed-ae5d-39c62ac40e1b.json create mode 100644 data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/05f69fd6-a77e-478d-ad86-3e83e615e892.json delete mode 100644 data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json create mode 100644 data/hfopenllm_v2/PranavHarshan/LaMistral-V4/5b8e9508-befb-4674-bd84-9c722a0864ce.json delete mode 100644 data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json create mode 100644 data/hfopenllm_v2/PranavHarshan/MedNarra-X1/8beb3730-23e8-4b89-933d-2d3f1a1d1365.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/07417712-1933-4920-8964-67ba74bf6d01.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/ae4cc05d-a65a-4f18-a99c-f133603686d1.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/54df4d3e-0ef0-4e30-aa46-b47a4589a34c.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/a717d466-9157-4991-8459-f39847d914a2.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/15a8789b-27de-49d1-b3e5-9b1fc9b5694e.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/921562fe-cc21-4ff3-93de-a62e1d4bf7e7.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/863969d9-e567-43cc-a0a9-7f80eaba374a.json create mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/2987fa45-363e-4a07-8e9f-db01586a135b.json delete mode 100644 data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json create mode 100644 data/hfopenllm_v2/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/3488de21-d9a6-49e8-ba8f-d9beee9bdabe.json create mode 100644 data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/0cacf042-6b62-4b67-8821-97cd703788d0.json delete mode 100644 data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json create mode 100644 data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/9f0dfceb-1332-447a-bf6f-6c6c40686a6f.json delete mode 100644 data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json create mode 100644 data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/c1308f95-6d55-4ff6-b14e-1bd09b467d99.json delete mode 100644 data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json delete mode 100644 data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json create mode 100644 data/hfopenllm_v2/PuxAI/LUA_model/4ab16120-8d39-4dea-aa76-5c249506848d.json delete mode 100644 data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json create mode 100644 data/hfopenllm_v2/PygmalionAI/pygmalion-6b/f9647ea0-6464-4aa0-b1ea-a994a7bcca3c.json delete mode 100644 data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json create mode 100644 data/hfopenllm_v2/Q-bert/MetaMath-1B/c5ef47ab-2e73-43d6-b9ea-1ee7e50d9df8.json create mode 100644 data/hfopenllm_v2/Quazim0t0/1up-14b/9ef7a4a0-b751-45ff-ab1f-d50687a3f4c3.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/8b303795-557b-4fa1-bbc6-d36bd77ee739.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Alice-14B/7fec288e-0b0d-45c0-b0e6-17b905cd7ea3.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/5a09783b-82da-43ae-a607-2cfea550d931.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/6c2d191a-a2d1-459c-b2e2-5766bec62ce7.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/121cb5fc-2fa2-4718-b325-c40014802e40.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/8bbfa040-b16e-4116-ad3e-b3e4e58a7de6.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c8891914-c9fb-4b4d-9592-826f04520e7b.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/e77ffcb3-c7d8-4700-b4ea-fe4e5ba94223.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json create mode 100644 data/hfopenllm_v2/Quazim0t0/CoT_Phi/da237415-f34e-4cbb-9a94-3ff621f3df8d.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Dyson-14b/479f3bfa-d614-46a9-88c7-9891852b0d8c.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/f5f0c7da-fb03-4023-81a7-801b0729a19d.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Fugazi14b/40f51424-2922-498d-bbbc-d500667a8554.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json create mode 100644 data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/4f25d177-6bcf-4864-87a4-1beb21a7373d.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Geedorah-14B/b160ab1f-be6b-4dfa-8fa9-36fc65a64782.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json create mode 100644 data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/d497a7e3-11c2-4e0c-8788-091caabede56.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json create mode 100644 data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/4a55bcf2-e1c1-4fce-8f79-472dae869b26.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/5b00dd5e-0ad3-4ea0-aa0d-2327d610e6a6.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Heretic1.5b/1c80d383-1ccb-4f32-a63d-dd3954fe5f6b.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/75065074-7ef6-41ac-be7c-496cc458640a.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/49a0287b-48d7-44db-bf20-a084919d332f.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Imbue-14b/7b2861ee-58f9-4ac9-99ee-2ec663e1b157.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Insom/628542f9-fac6-42a7-8ec5-5cd93f977a7e.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json create mode 100644 data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/5b0924ae-cf52-4245-a687-91e4b1742c16.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/459c2b98-c3af-4334-a4bc-13334efe49b8.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/b2780aa3-d299-4180-8441-dd54e94255cb.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/f55d398d-0555-4e89-a37c-def04741a0dd.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/63caf8f8-9e55-4ef6-ae76-ee7184a50675.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Lineage-14B/f82ccde3-bd3b-499c-8b8c-182822392cea.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Lo-Phi-14b/8a52fb4a-d6ae-4c8d-aed0-2137e0a83ea1.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/b7cbc2fb-2c52-4c13-9266-52103421f2ee.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json create mode 100644 data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/f4474361-e897-4dbb-a89e-5451a4724474.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json create mode 100644 data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/de257b5e-4629-4f8a-b08d-d2ca372593e2.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Math_Phi4_Reason/a37aada3-104a-488a-898f-245ff257de46.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/d9d655d1-d94c-483a-a3a2-ca196e1391d1.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/77bf7126-0cb9-43ef-8d23-5f1395f91642.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/73f410be-3084-4994-8406-f8ac70880626.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Mouse-9B/24caad7a-15fa-4820-91cc-0f544a34d173.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/e087b221-f813-4688-8d98-17980f98ac5b.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json create mode 100644 data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/f4d03bff-3b34-497f-a17f-0379bc562f11.json create mode 100644 data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/2ca21612-ea90-41f3-b618-3ea81c09c3ae.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json create mode 100644 data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/d4dc2088-9911-4966-afe9-022df89dd522.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/ad03a075-8f24-46f6-ae04-5a04eb7061c1.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/2d1da226-e65c-48a0-aabb-46b1cf670a82.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill.16bit/7fb3a035-2b83-4a58-818f-16fe6d9a8ab3.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/87018726-9f81-47b1-883e-609afea7fb37.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Phi4Basis-14B-sce/292b9333-96c7-4fc7-bf35-78bbce9f10d3.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/b44224c3-ed2c-4120-9e2a-e6286358a4da.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json create mode 100644 data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/f7a2c9af-c55c-4307-bfef-1ca709525d82.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Rosemary-14b/d9655f35-edfd-4c53-b359-559870e8019e.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Rune-14b/afdd962d-652a-4395-92f7-c16dc874a779.json create mode 100644 data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/2594e917-3ebd-428b-8f36-cb0da668695d.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Sake-20b/91a86644-ad96-4c66-8691-1c0b531b572c.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/331f56ce-5e45-46d8-9143-3f66be20b699.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Sumatra-20b/6138ebe0-8483-4cfb-8d95-b334bb09e831.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json create mode 100644 data/hfopenllm_v2/Quazim0t0/SuperNova14b/4d16dd47-42d1-4ea6-8f1b-dc50648bceab.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json create mode 100644 data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/a6b0f2bf-08da-472f-b858-8be967a44cdc.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json create mode 100644 data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/57c7553d-f3e5-4a31-8c16-66aae570d8ec.json create mode 100644 data/hfopenllm_v2/Quazim0t0/ThinkPhi1.1-Tensors/58c31bdd-f86f-4fbb-8549-191bb9f46f02.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Venti-20b/dd25c1dd-0edf-44ca-b18c-633dbd47368f.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/2a030613-b5f7-4393-ac39-d2d072c913dc.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/f8c73290-c400-4f1f-a00a-516592497b0d.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Wendy-14B/b31908fc-5e7e-45d6-835f-4e86a05b23fb.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json create mode 100644 data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/4320cb98-7f9f-4510-bb88-448ce231bae8.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json create mode 100644 data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/28b986d1-2e67-4462-9165-6cb8f260b6c6.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json create mode 100644 data/hfopenllm_v2/Quazim0t0/caramel-14B/fe1e21cb-7934-4022-a74a-777172310021.json create mode 100644 data/hfopenllm_v2/Quazim0t0/graphite-14b-sce/90871638-b828-484d-8822-95ffceb20909.json create mode 100644 data/hfopenllm_v2/Quazim0t0/mocha-14B/04a98dfb-8e96-444c-8df4-ed7cf72a26ea.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json create mode 100644 data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/8c5c22af-f230-4d34-b80d-f42ef27e1675.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json create mode 100644 data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/f3466a90-541b-4a08-a9c6-d5a79b2299b0.json delete mode 100644 data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json create mode 100644 data/hfopenllm_v2/Quazim0t0/time-14b-stock/ef9ee5ae-d92b-4143-af1b-d62a7c3c7fd4.json create mode 100644 data/hfopenllm_v2/Qwen/QwQ-32B-Preview/859af708-ac37-4749-bc06-73d92338d1f5.json create mode 100644 data/hfopenllm_v2/Qwen/QwQ-32B/e274380d-e0f7-47c3-afc3-e603e6cecf9e.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/19810be8-ea81-4db5-9854-1830b05a5732.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-0.5B/1258c282-3672-4b42-9d4d-117568e17bf5.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/9b9f6e01-238e-4893-b398-4e1c83c44dfa.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-1.8B/b267621b-dbba-4c4a-bb9f-fa85734d0f59.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/a7e4e787-8e95-48a0-9d50-53ba9f05cd1c.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-110B/3d39dcab-55df-4ad3-bdc8-03ae684e4390.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/1b499881-9edb-4626-a919-977393d6bef1.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-14B/84b8970c-6c29-4ee1-93b8-c97e4a7c4950.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/2e070663-2622-4a8e-bd39-7f0ef9df399e.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-32B/047fa91e-2dc7-4881-8254-3dfbd4a2ff1b.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/6d73016e-078e-4ffe-b2ae-5b829d1456df.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-4B/0b68b5bd-d22c-4194-9ddf-f22e9181f84d.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/03d51d90-fd15-42b7-ad5f-c7326cc642a7.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-7B/d3e5c939-c53a-49d6-80cd-34420dbb176a.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/ab321358-26f9-4577-a5fb-1f5d4b8784b4.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B/a43aae68-f12c-4a6d-b846-c498cf35f6cd.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/b84615c0-43c4-49ec-83fe-5d3f8e6026af.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-0.5B/7e687d24-9e12-4ecf-b283-e222efb9473a.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/4aea143c-28fd-48bb-b911-37ac3fe58220.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-1.5B/34a8daec-bfff-4cf4-9011-0542b30c1d10.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/3e919d7b-53db-41fb-ac93-224e2768b9c6.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-57B-A14B/66becca1-d92b-409f-ab56-44d05cac66fd.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/6293b269-7c4c-44da-bd85-e51954c173a1.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-72B/add3b058-e7bc-4b7b-bb98-0d7039979072.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/db0b6b3f-e5a9-4367-ab87-e58d5c6ccd81.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-7B/54b055d0-80ae-4bba-b729-bd77b3ec7502.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/5c22d0b3-5082-4c6e-865c-71da03cf9378.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-Math-7B/f8e5ee9f-519d-4ed8-bd2a-88897075f401.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/b74c3215-7bd5-42d1-9193-f4c9c6a8bec2.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/27df1e06-463b-4519-87eb-a1666ad3f98c.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9d975b05-7bee-462d-a33a-afa0d5af94d4.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9ef9135a-473e-43a5-a460-fd3ec50226f9.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-0.5B/c57cae01-328e-447b-8945-e3cd2c4b8a7b.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/494c86cf-7f37-49d8-8160-b81859552c87.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-1.5B/6de5e76e-4297-4bcd-b06e-f63fa28da0e0.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/9b10cd14-82f3-4b36-a4be-5092127d68c3.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/bbd94181-0523-4543-80a7-056b041e03b7.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-14B/e10d8573-e201-460e-a931-49a1b13ceeea.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/e2ca9477-2414-4b8a-8d22-68f9ced54ae5.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-32B/831246b8-5433-48e6-ba11-8a4239373106.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/8277994c-8bf5-4ece-9f34-4fe9a4310bbf.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-3B/5aabc7c5-eb3a-42e0-8b40-0a08004f6e1a.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/cbb73c83-ad94-4973-9bf5-a5e7ca4d1653.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-72B/3ed06a16-d5fe-43d3-a369-f4ed29fb3a5d.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/fc817789-2f44-4d2b-b40e-2422fe33d104.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/5e1c8723-7c43-4d8f-8c7c-386c2eb6b9cf.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-7B/b6740747-19ac-4a9c-892f-6556013ddc8b.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/3263ab46-09ae-4c24-9332-b6874d0d0330.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B/a8706a7e-5693-4768-a955-a448549d2e77.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/3c932329-0440-4799-886f-10bc4a5aeb09.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B/b1e42d9d-827d-4109-8d1b-182694033b21.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/0c6f0d92-3ee0-48d7-b3fc-70149911a51d.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/73b07681-8e10-414e-8922-650908f9cf6a.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B/8b1549f8-0602-4538-842c-abe9dca7baff.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/ad395ad4-0f9f-4b49-83c9-b89fa6b6dd89.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/14c01681-fbef-49c4-b737-a7baaa02d393.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/3ad495c0-da8e-4776-8d05-bc7dce1fe120.json delete mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json create mode 100644 data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B/0762ca9e-f0d4-408e-9992-e91a10e0e65f.json create mode 100644 data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/ec6c1d05-cea7-445c-bed3-9eee1e1ff03d.json delete mode 100644 data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json create mode 100644 data/hfopenllm_v2/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/1fc39812-77fb-4d0c-b9fb-706e94c40afe.json create mode 100644 data/hfopenllm_v2/RESMPDEV/Qwen2-Wukong-0.5B/fdc3c502-53ad-4bf7-85ce-51eaed72754b.json create mode 100644 data/hfopenllm_v2/RLHFlow/ArmoRM-Llama3-8B-v0.1/3f74c1c7-f349-4193-95cf-b0033112fea0.json create mode 100644 data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/36a803da-83ab-4c49-8855-9344aaa7a68b.json delete mode 100644 data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json delete mode 100644 data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json create mode 100644 data/hfopenllm_v2/RWKV/rwkv-raven-14b/df986996-249e-49f9-b074-91e8dcdf62e2.json delete mode 100644 data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json create mode 100644 data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/90f007e9-e323-4a82-b276-ac1b928030ca.json create mode 100644 data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/2b627f93-5cc7-4a5e-b682-d129396362e5.json delete mode 100644 data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json create mode 100644 data/hfopenllm_v2/Rakuten/RakutenAI-7B/2fde07ac-d218-4cc6-947e-8ceb87eedbee.json delete mode 100644 data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json create mode 100644 data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/2a141bfe-4632-4058-a232-1f2c5540c41f.json delete mode 100644 data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json delete mode 100644 data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json create mode 100644 data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/fa2d74a5-e8f6-4a1c-9310-a9b16c2e59d1.json delete mode 100644 data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json create mode 100644 data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/c7c0ceff-9273-4cc3-8f8e-bd93181590ba.json delete mode 100644 data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/c439478a-1734-4038-aa8b-bb2d12ec022d.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-Coder-Llama3-8B/4a36f73a-9495-4ea2-863c-220b8ca6bf99.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-Coder-Qwen2-1.5b/faa9d3b9-343a-4a9e-82c5-6bc81bc87b9c.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/a55bf380-d567-4228-b30c-57e9df31e844.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/dfd92311-4f3d-4355-8ccf-a59f29914b8f.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/d98e190e-5b5f-46eb-b701-e32d2dbef3a0.json create mode 100644 data/hfopenllm_v2/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/32edb764-2a42-4efe-ac86-9eda81942b84.json create mode 100644 data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/36855ebd-2030-4d5d-9c42-ca049244e694.json delete mode 100644 data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json create mode 100644 data/hfopenllm_v2/RezVortex/Jajuka-3b/9651a0a1-4004-42f3-ad8f-2aebb38ec967.json delete mode 100644 data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json delete mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json create mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/a59e55dc-e2b5-43be-8469-49eee0e98d55.json delete mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json create mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/a956e306-f184-4dbc-ac7a-3793ae735801.json delete mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json create mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/c05cc6ce-12fd-491d-b41b-57cc14b6d34a.json delete mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json create mode 100644 data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/415875b7-fe10-47e7-aca0-029c2f51c067.json create mode 100644 data/hfopenllm_v2/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/c505ee64-3d3b-48e2-9c8a-f59609a758e9.json create mode 100644 data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/00003185-c291-40c5-bba1-f87eae0afc08.json delete mode 100644 data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json create mode 100644 data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/328f61d7-677b-4a06-b464-0da42153f9ae.json delete mode 100644 data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json delete mode 100644 data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json create mode 100644 data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/9cb5b8fd-062c-4161-9301-640980d21b9f.json create mode 100644 data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Stheno-Filtered/09284b75-a2f9-40ea-8135-7aa61c626fa2.json delete mode 100644 data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json create mode 100644 data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/e2502331-6ac3-43bc-8218-259b44333283.json create mode 100644 data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/8dde454d-aa48-4ee1-b5c6-f3353087d492.json delete mode 100644 data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json create mode 100644 data/hfopenllm_v2/SaisExperiments/RightSheep-Llama3.2-3B/662c8ed2-2407-4606-ac1e-ec7ade185d2d.json create mode 100644 data/hfopenllm_v2/Sakalti/Anemoi-3B/332aef8c-7c62-463e-ba3c-07ae0205d457.json delete mode 100644 data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json create mode 100644 data/hfopenllm_v2/Sakalti/Euphrates-14B/cfdfcf21-e445-430e-a295-946cb8c3fce9.json delete mode 100644 data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json create mode 100644 data/hfopenllm_v2/Sakalti/Llama3.2-3B-Uranus-1/a5606b92-aa2d-44e3-a92c-47d0b38fef9c.json create mode 100644 data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/465d473c-ef28-4725-8cac-02f2a031b22c.json delete mode 100644 data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json create mode 100644 data/hfopenllm_v2/Sakalti/Neptuno-3B/2c636544-8676-4eee-8bcd-d623be0275be.json delete mode 100644 data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json delete mode 100644 data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json create mode 100644 data/hfopenllm_v2/Sakalti/Neptuno-Alpha/8b332fac-1cfa-498b-853a-52ec5492ddc7.json create mode 100644 data/hfopenllm_v2/Sakalti/Oxyge1-33B/2bf1b38b-e90b-4fa8-b19e-47d93ff9ab4e.json delete mode 100644 data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json create mode 100644 data/hfopenllm_v2/Sakalti/Phi3.5-Comets-3.8B/69bb0243-75b2-4858-ba6b-5e70cfb516a7.json create mode 100644 data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/4bb7e325-8741-4c09-81f6-9efdb30ef5a5.json delete mode 100644 data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json create mode 100644 data/hfopenllm_v2/Sakalti/QwenTest-7/87878b74-22ce-4554-914c-03e486d13de3.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-0.5B/5030f8d4-f216-4f78-84f1-dd03b0324bb0.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/c5e244fd-e85e-4fbb-9703-b8e733fb91bf.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/38261a01-62df-42b2-9b1d-f924598e70ef.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-1.7B/5736f0b5-3903-4774-a84a-c3db260d36e4.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-14B/70134d58-972e-49c9-8cde-4ba2691d3dc3.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-2.4B/d4bb1440-2064-4752-bcb3-c9cec234fd1b.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/d9e6059e-d20b-4465-b7ba-2ee3a72562b6.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/f8b02d65-c8a0-43eb-b48e-d1e1f7f363d6.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-2B/7bf23db0-877c-4700-95c8-e35dee5e57b4.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-3.7B/07f8351e-c7c6-463f-9e91-ee1d3bb2b35c.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-4B/8535ffae-f39d-46ed-89bb-a1656885db91.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-7.5B/5e832121-9a67-44d9-973d-fffdb1b37975.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/92d3f67d-a026-49e3-a440-68c10fb358ae.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/9d0baaef-bd31-4a96-bb2a-e92b62b748d2.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/489e8e84-5e30-46fa-a421-f52308f051e7.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-8B/a208f807-c930-4e81-8ebd-dcbb4db76442.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-900M/4956539d-a255-4c56-877f-257e463fa3e4.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json create mode 100644 data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/3451eb65-020c-4e34-9128-7410e6b293cd.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json create mode 100644 data/hfopenllm_v2/Sakalti/SJTPass-2/b5cd0061-e4dd-4049-a51e-b16490e69120.json create mode 100644 data/hfopenllm_v2/Sakalti/SJTPass-4/c4686af6-0b7b-4df3-9152-14a3ef087b7f.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json create mode 100644 data/hfopenllm_v2/Sakalti/SJTPass-5/155885ca-11e7-4cd2-b26c-53e001e2a6f9.json delete mode 100644 data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/d9ca5411-def6-43b3-a522-595131d8e5e6.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba1-1.8B/e54553ab-0897-4cb5-9213-5bb72758d2b5.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba1-7B/eed48cdc-18db-4c03-84bf-d2d50e3328b0.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/d7952aef-37e2-4c15-a1a4-598690773bbb.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/5e1e1376-bb22-4fc9-a1d6-3f2fe7d302b9.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/cfdae559-f3f1-4a78-b4cc-fbfb8bb37b16.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json create mode 100644 data/hfopenllm_v2/Sakalti/Saba2-3B/a12208ce-e9e1-4476-8054-0d565efad92c.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json delete mode 100644 data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json create mode 100644 data/hfopenllm_v2/Sakalti/Sailor-japanese/f46e1eeb-8b8b-4d47-9510-445109b5518b.json create mode 100644 data/hfopenllm_v2/Sakalti/Saka-1.5B/7dc4970f-ce35-4ffa-9052-2ab40abb1e55.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json create mode 100644 data/hfopenllm_v2/Sakalti/Saka-14B/823e886a-1431-4078-81a3-4b941983461d.json create mode 100644 data/hfopenllm_v2/Sakalti/Saka-24B/583609f0-de5b-43cd-a667-bb2c36679fd2.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json create mode 100644 data/hfopenllm_v2/Sakalti/Saka-7.2B/2d2cea8b-167e-4d63-b01c-537f372672f9.json delete mode 100644 data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json create mode 100644 data/hfopenllm_v2/Sakalti/Saka-7.6B/f584f596-3a17-404a-81a2-3033ad38cad6.json delete mode 100644 data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json create mode 100644 data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/ebb0930f-92be-4e1b-a2a6-779f69d2151c.json delete mode 100644 data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json create mode 100644 data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/b8926567-e208-442e-8ba8-c6dd4ecc5c4a.json create mode 100644 data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/4bf6efe1-81fc-48f6-96ba-8df9ffbef2f2.json delete mode 100644 data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json create mode 100644 data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/05ffcb7a-2694-4276-bf45-73e1110bc494.json delete mode 100644 data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json delete mode 100644 data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json create mode 100644 data/hfopenllm_v2/Sakalti/light-1.1-3B/dc3b944b-a57a-44ab-87ac-8e1882b7bcce.json create mode 100644 data/hfopenllm_v2/Sakalti/light-3B/154f70b4-d77c-4d1b-b85c-bc81fe8162bd.json delete mode 100644 data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json delete mode 100644 data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json create mode 100644 data/hfopenllm_v2/Sakalti/light-3b-beta/998316d2-389a-4ce0-b0b0-0430c1361de7.json delete mode 100644 data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json create mode 100644 data/hfopenllm_v2/Sakalti/light-7b-beta/ce803cde-6e23-433c-a4d2-38c5cb5ba14b.json create mode 100644 data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/2519485b-47cd-497c-a349-9e69db0266f3.json delete mode 100644 data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json create mode 100644 data/hfopenllm_v2/Sakalti/magro-7B/56d86e26-4ee6-4652-9b7b-a538238a24d4.json delete mode 100644 data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json create mode 100644 data/hfopenllm_v2/Sakalti/mergekit-01/416b89e4-5e8a-4131-9403-e8967a4127b8.json delete mode 100644 data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json create mode 100644 data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/347a90e8-d8b7-4266-8242-ceac865796a0.json delete mode 100644 data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json create mode 100644 data/hfopenllm_v2/Sakalti/model-3/389f7ab8-b30e-4d0c-b9a4-625e74a1f73f.json delete mode 100644 data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json create mode 100644 data/hfopenllm_v2/Sakalti/qwen2.5-2.3B/6ae33b7f-53a1-45c5-8b0b-d462188c3f9d.json delete mode 100644 data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json create mode 100644 data/hfopenllm_v2/Sakalti/tara-3.8B/d96fb0b2-7cba-4cc4-a5f4-b8a451754857.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f8d362f6-eafc-4d11-bc40-d169d69d3a95.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/4bacd3dd-44c2-42d8-98c0-3eeb920dc0f0.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/de073f45-0d14-4f8a-9d3b-d4fd961186b8.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-14B/fd88d234-b3f9-4f48-896c-af58f1a69880.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-32B/273745b1-3761-463e-b9ab-7860968064eb.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/101d84d3-e741-4eb2-bd8a-db6c12022fe2.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json create mode 100644 data/hfopenllm_v2/Sakalti/ultiima-72B/9c82deca-1998-4506-b038-c5dd592324d8.json delete mode 100644 data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json delete mode 100644 data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json create mode 100644 data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/da620a94-4c0d-4c50-9619-10e12001fb5d.json create mode 100644 data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/51dade8f-34e7-4237-8691-22655249bf76.json delete mode 100644 data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json delete mode 100644 data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json create mode 100644 data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/cdd59385-0a54-4ca1-b24d-9316a70f2875.json create mode 100644 data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/514a3103-e8a1-49e8-b9da-a85963f5b3dd.json delete mode 100644 data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json delete mode 100644 data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json create mode 100644 data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/daafaafa-1e00-4433-95f3-91c169598ebd.json delete mode 100644 data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json create mode 100644 data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/50e53ad5-8693-44c1-b5c7-45b91d7e0ae4.json create mode 100644 data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/bda5d02f-7973-41a3-8f8e-4e33a12b74e0.json delete mode 100644 data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json create mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/99ff5ca5-4409-4d9c-9ec0-4cf392afeff2.json delete mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json create mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/362f5875-4dbc-4e68-90ce-789f692bb533.json delete mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json delete mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json create mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/fdb5faf6-2cdd-42bb-b154-d6e93b2348bf.json delete mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json create mode 100644 data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/93f829b8-b8d9-4389-a210-2a38c3a30edb.json delete mode 100644 data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json create mode 100644 data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/6ec3554d-377b-4bf6-88ef-8a4c9e70f485.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/70d749cf-2e92-4847-86de-7964fc8eb990.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/623f2b04-6cd7-4ea0-8844-badb0ff6c9c6.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/e1aca741-2765-4e47-b6a1-49f3d9532432.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/4f42366e-e6aa-4974-9a40-5781e350616d.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/4ec2231d-c012-4ad3-830c-8ff86c977202.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/1d2e5513-bd0c-4795-8487-f5266c6e368f.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/104172b7-86f5-410a-a454-63e1cfbeb87f.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/d28e04ac-7d18-43fb-80b8-82c0662fec79.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/20bb3819-9d85-4d84-99ba-65e33965f0c5.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/3a4bdf58-0137-4d85-b567-59b3fed3dad5.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json create mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/04f843ba-947c-4732-979c-2aeae7d34e5a.json delete mode 100644 data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json create mode 100644 data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/173a31d3-7d12-4ab1-a963-005a81aee767.json delete mode 100644 data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json delete mode 100644 data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json create mode 100644 data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/d0555736-b614-43ca-91d7-8264e3566872.json create mode 100644 data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/4b7b13b7-4aee-4462-87e6-aa6c15068236.json delete mode 100644 data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json create mode 100644 data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/4b1f9ce5-bb12-42e3-b0e0-afaa784b0c4c.json delete mode 100644 data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json delete mode 100644 data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json create mode 100644 data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/acbcd5a5-bcd8-4209-b35f-425feada7e8b.json create mode 100644 data/hfopenllm_v2/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/cb9a415f-1a02-46ad-a731-bf825ddd78ae.json create mode 100644 data/hfopenllm_v2/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/92cde6db-47f4-43c6-9ad5-643c35faa226.json create mode 100644 data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/5e88a037-f9bd-4b39-944f-f0781bb7884f.json delete mode 100644 data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json delete mode 100644 data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json create mode 100644 data/hfopenllm_v2/Sharathhebbar24/SSH_355M/d4b08f5d-5add-49f4-b8db-c1a12e0a5313.json create mode 100644 data/hfopenllm_v2/Sharathhebbar24/chat_gpt2_dpo/ac5adf39-f0a4-439b-9873-9141e0a554b1.json create mode 100644 data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/62965c92-cdf4-4a3b-b035-990abaab615c.json delete mode 100644 data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json create mode 100644 data/hfopenllm_v2/Sicarius-Prototyping/Brainy_LLAMA/3866ece8-d70a-4061-9e86-0798ecd98bd6.json delete mode 100644 data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json create mode 100644 data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/ff484d0e-bb14-4a80-ae29-2351b03cf278.json create mode 100644 data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/06ac1718-fe71-4e05-a47f-1200e067336c.json delete mode 100644 data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/4ddb1616-7889-45ef-96de-823fee338e1d.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/487dd91b-5bc4-4355-90d3-c82ecc789ab3.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/a74e86d9-8b94-4f60-8f0c-73cc4b04d905.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/9a9239ab-9e0e-449b-bd1b-6ec280fad505.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Impish_LLAMA_3B/2c710cd5-75a6-46b7-8356-212da7bf864d.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/377d5240-73b5-48d0-bbdc-0960ad1d9069.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_14B-1M/9f31a6da-c5bd-4143-b2f9-715c0e9f7b74.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_7B-1M/104a0157-c614-44cf-b6cc-9f15dab4b187.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/bb379093-c169-44bd-ac86-edb8ab8fc225.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Phi-Line_14B/e29001c0-17c0-4deb-8ca2-ce9ad06d8cb3.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Phi-lthy4/43d87bf5-2620-4f8e-a8b6-f86fc157d987.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/735d9d75-d9d1-4553-b7cf-f8e7c2e65218.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/0c6dcc87-343c-4973-a589-3e3393829184.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/7c1d1657-e9ae-433f-be9d-523431bfc7ae.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/0b2d9a65-c028-4f4b-a280-dc0c35ac9516.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/e87e1d3f-1476-499d-a9f3-b6463b429262.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/246e8450-3c53-4bde-99bb-5663f751e88e.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/496b9e45-2f64-456e-b35e-12a94c5643b1.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json create mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/05890047-a95a-433e-b6b6-fb037592cdd1.json delete mode 100644 data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/4a30580c-1d25-49d4-984d-2d28ef3a5656.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/696d7966-d140-4f43-91df-54f02247b34f.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/fdf10ab8-e3f9-49e6-8fd0-ed116868c217.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/9ac16d1f-d894-414d-8a14-110e971d0ba6.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/2eb01e0e-8f7b-4956-9a2d-b32ecaa936f6.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/3b221b0e-6158-471f-bcd2-b09514f28bd7.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/c8af8428-aab6-4d19-b185-2b437c0334fa.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/c617d12b-c37f-47ef-9704-e19774c67aeb.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/577f31e2-1808-45e2-a528-5933019cfa85.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/7bd7f5c8-be9e-473e-be18-03ad22a195ee.json delete mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/5036a549-5583-4775-935a-1a12b6de3e7d.json delete mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json create mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/5c0ffff9-542c-424e-88e9-89584e686e12.json delete mode 100644 data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json create mode 100644 data/hfopenllm_v2/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/5c6a045d-2c90-4938-9185-9c1a0f82903a.json create mode 100644 data/hfopenllm_v2/Skywork/Skywork-o1-Open-Llama-3.1-8B/02480176-2058-4e71-a970-9698be8d235e.json create mode 100644 data/hfopenllm_v2/Solshine/Brimful-merged-replete/4be1e5b4-254c-4287-907d-cc845042de37.json delete mode 100644 data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json create mode 100644 data/hfopenllm_v2/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/21b51852-5cad-414e-92d5-31878f025d67.json create mode 100644 data/hfopenllm_v2/Sorawiz/Gemma-9B-Base/9eb07d4a-1f01-4696-9137-d477ffca43be.json create mode 100644 data/hfopenllm_v2/Sorawiz/Gemma-Creative-9B-Base/4236485b-aa92-4bc4-a652-17ed3231ecf4.json delete mode 100644 data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json create mode 100644 data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/9c0d6b71-8c6a-4294-961c-972a002b847f.json delete mode 100644 data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json create mode 100644 data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/d1e906d5-8f0d-49c2-88c3-cf71774de600.json delete mode 100644 data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json create mode 100644 data/hfopenllm_v2/SpaceYL/ECE_Poirot/798e4f83-6262-4d5b-a854-6ff114167209.json delete mode 100644 data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json create mode 100644 data/hfopenllm_v2/Spestly/Athena-1-3B/dd2603d5-e99e-4778-95d0-159c788626cf.json create mode 100644 data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/41c71990-e79d-447f-b082-63c96fd67a1f.json delete mode 100644 data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json delete mode 100644 data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json create mode 100644 data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/b9e25948-2871-4b6c-933b-8a731e48e81b.json create mode 100644 data/hfopenllm_v2/Stark2008/GutenLaserPi/7c70df74-2bc2-40e0-b0f4-77be1a7e044c.json delete mode 100644 data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json delete mode 100644 data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json create mode 100644 data/hfopenllm_v2/Stark2008/LayleleFlamPi/ea71bdd5-3aa1-4d26-9256-5aeb2f79fa8c.json create mode 100644 data/hfopenllm_v2/Stark2008/VisFlamCat/b0e9c0ca-cd56-42c8-96ed-477884bfd9f9.json delete mode 100644 data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json delete mode 100644 data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json create mode 100644 data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/7395fcde-49dd-47f4-a8ea-463eda40f5e3.json delete mode 100644 data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json create mode 100644 data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/a130087f-566f-4405-b662-1102f1664c49.json create mode 100644 data/hfopenllm_v2/StelleX/Qwen2.5_Math_7B_Cot/3be58cf3-4761-4459-9f3c-eabf812a3c19.json delete mode 100644 data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json create mode 100644 data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/dbdd71ad-db5b-4b4b-8856-68b55adbe127.json delete mode 100644 data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json create mode 100644 data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/da159a16-48a0-45e3-ad4d-bdc9e8b5288c.json delete mode 100644 data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json create mode 100644 data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/77d5f51e-5ad2-42a6-a32c-060cd844b949.json delete mode 100644 data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json create mode 100644 data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/724cc582-cc83-474b-9606-70dbc22f3581.json create mode 100644 data/hfopenllm_v2/Supichi/BBA-123/8a1b2aae-d717-4b49-8ed2-a7ee2cee1940.json delete mode 100644 data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json create mode 100644 data/hfopenllm_v2/Supichi/BBA99/0dfb062d-a6ec-42a6-a9f9-6f6424bbdf0c.json delete mode 100644 data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json create mode 100644 data/hfopenllm_v2/Supichi/BBAIK29/ab2512fa-2335-4817-9a76-3259690bbc67.json delete mode 100644 data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json create mode 100644 data/hfopenllm_v2/Supichi/BBAI_135_Gemma/fe7f1442-b7db-42d5-bc83-b8afd1d0c802.json delete mode 100644 data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json create mode 100644 data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/0e14484a-69d7-423e-bf6c-33d0992f408c.json delete mode 100644 data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json create mode 100644 data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/881eaa2c-af5f-4e84-8807-d0835c10ebd2.json delete mode 100644 data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json create mode 100644 data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/ef8a7079-9d13-42b7-ab2d-b72df5ae5d95.json delete mode 100644 data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json create mode 100644 data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/db8d3fc4-58f4-4f07-8c27-c73a4a4719fb.json create mode 100644 data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/0c44a429-e705-4794-b702-1a731e52df90.json delete mode 100644 data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json create mode 100644 data/hfopenllm_v2/Supichi/HF_TOKEN/92b3d2c1-61f4-432a-82a7-43b4367f7ef0.json delete mode 100644 data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json create mode 100644 data/hfopenllm_v2/Supichi/NJS26/5703e81d-055c-459b-8202-80ec382a8d5b.json delete mode 100644 data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json delete mode 100644 data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json create mode 100644 data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/f6260b6e-52a2-4142-93ba-5393807fa0d4.json create mode 100644 data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/83b84506-4826-48de-a6fe-2af6ae5d425a.json delete mode 100644 data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json delete mode 100644 data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json create mode 100644 data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/7483e260-9853-4d3f-aa10-187796d96de9.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/f9925806-4252-44e8-b67e-917737572bd4.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/70470e6c-8d66-4249-b762-a5a2e3589a53.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/d3abfe3c-ebfe-4dfd-b0db-93c14d32c585.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V2/a35b06bc-d759-421a-94cf-f408a98e9273.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V3/bbac659c-7cf8-41d4-98d4-ded4c471bd98.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V4/0c73f3a0-0a92-4b1c-abfa-6eb77138dacd.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V5/a7ab6f16-717f-4567-8057-a4a18e1a1e77.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V6/2abe2c9d-032d-469e-852b-114eca5e84f8.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V7/2e8a83dc-c760-4f42-a361-e02cf3a65427.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V8/743dfe64-e7cd-493e-817d-8d5fcdc2ea24.json create mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V9/4e37c90b-65a8-4b71-bfc2-d63541fb8962.json delete mode 100644 data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json create mode 100644 data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/2e34d74e-1b69-4daf-8bee-77e5357fd439.json delete mode 100644 data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json create mode 100644 data/hfopenllm_v2/T145/Llama-3.1-8B-Zeus/0646e2f7-d2e6-42d3-8f09-f8daee302709.json delete mode 100644 data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json create mode 100644 data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/c66b1ff8-9c04-4f9c-b83e-088f31f79590.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V10/1bd2affc-9970-4149-b52b-51549b1f0029.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V11/f0479d74-4684-4b41-a63b-16d7fe0e3290.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V12/95deb890-a15d-4c71-8151-ed45c3dfb87f.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/1c07fc4c-a773-4e03-bb14-7144e7815c01.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V13/e7e8388e-db3c-4881-b67c-5177c60562b9.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V14/c4923208-2a47-45f2-a74a-4483e4b99bee.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V15/b5f06a78-5b57-45a5-93be-4f3c1b36f208.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V16/835f19d3-515c-4bc4-ab96-5cb5bece45dc.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/7dd96382-6fc1-4a39-924b-d9034b5b0839.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/77a666a2-a9b2-43cc-8e64-67172f4ab6c8.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/e3eae267-46ab-4433-a8f3-2a2f8448299b.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V17/e31308c4-8eb2-4a72-8127-18049d58b814.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V18/c7098a7a-e865-4ecd-b511-abeb2c0872bd.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V19/b3a8c734-e63a-47f7-af2c-a3b6518802fa.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/35937965-2791-4f75-8954-5a2280381c91.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/4ab806fe-738d-4f5b-89e4-004134d2f7fe.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2/a937e27e-b757-4de7-b679-01ac29d8bb22.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V20/1d906aab-33a6-4ffe-8a63-694482d83d09.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V21/9e101298-6482-4ae8-83e4-b948ba8fa550.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V22/3818710d-80a9-4e7d-90e3-f06afffb71ac.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V23/a18ec0c4-6f3f-4904-b69c-e40770df169e.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V24/529c2bd4-6b8e-4e3c-8737-c0b794444d13.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V25/9e994362-a1d1-48f7-9db1-dd9d532b9f35.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V26/cf35b7db-f675-4362-8916-36b0582b64f4.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V27/79ee7e34-36cd-4024-8978-86c1b059ae5f.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V28/9ec4fb99-ed4d-416e-9342-0c036aadd35d.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V29/8788e4fa-04c5-4f7c-bb4e-523287901f71.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2L1/18097bf4-5149-40e9-9850-558c3f143ed8.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V2L2/b5942721-5c30-4c49-a6e1-fb5419539652.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V3/76d27de3-0309-4e4b-8d0d-0e402bde0a31.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V30/5c0553ff-4910-45a9-aa8d-3a76af098403.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V4/fd97d1d9-a1b5-429d-b73d-1ea92ae1d61c.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V6/f77aa103-5a09-409c-ad72-7992b6049f94.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V7/0afdaa1d-c1e7-4283-a2b3-f459c09df4a9.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V8/044ed79b-0c54-4a7a-94ba-a3f999adeb0d.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json create mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V9/ac6b884d-62ea-4ff5-8eee-cfce08869030.json delete mode 100644 data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json create mode 100644 data/hfopenllm_v2/T145/qwen-2.5-3B-merge-test/8ffa696e-adef-4808-ba0e-bb04921a433d.json delete mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json create mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/8a2cfa62-5f13-447e-8d0f-2503e4962ac5.json create mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/4f24fc46-3686-41fa-bf25-a0e39b252cc9.json delete mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json delete mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json create mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/b1375cb4-b0d5-4cb4-ad43-394ebd1a481f.json create mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat/4ce062da-acfc-4684-95c2-679cbe5a697b.json delete mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json create mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b/3d785765-befa-4e53-8672-769f7bb87dcd.json delete mode 100644 data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json create mode 100644 data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/ab0d3a24-19db-4d00-892e-bcb7c0f2f30f.json delete mode 100644 data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json create mode 100644 data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/31f0b186-1805-42ff-86cf-d8455a66d538.json create mode 100644 data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/ed6b3e7e-d294-420d-b9b9-460a52cd0239.json create mode 100644 data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/91dec0c0-9854-4790-a0a5-e17d19636f17.json create mode 100644 data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/599616fb-26c1-47e3-a98b-9ad922a95c08.json delete mode 100644 data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json create mode 100644 data/hfopenllm_v2/TIGER-Lab/Qwen2.5-Math-7B-CFT/aeee4365-c34d-46b9-8c98-29976010bb62.json create mode 100644 data/hfopenllm_v2/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/1ec68708-94c9-4561-bb99-7f211d7a9950.json create mode 100644 data/hfopenllm_v2/Tarek07/Progenitor-V1.1-LLaMa-70B/0b53e7b4-0e91-40a2-911b-cd0d415e9fad.json create mode 100644 data/hfopenllm_v2/Tarek07/Thalassic-Alpha-LLaMa-70B/91bcd646-fe3d-458b-a426-a6a8863d69a0.json delete mode 100644 data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json create mode 100644 data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/2e0458cc-e092-4770-bd80-00dff169d754.json create mode 100644 data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/d56ef415-0edf-4fde-8277-ae44b4bb4ed2.json delete mode 100644 data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json delete mode 100644 data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json create mode 100644 data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/a0a1beb8-ee9a-4e88-b939-6e0104ed76a7.json delete mode 100644 data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json create mode 100644 data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/f9b7c3ee-ea8b-42f0-a55a-6171d4e3d0ea.json create mode 100644 data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B/2c8c6c6a-ce95-4d11-a33a-d547859fee11.json create mode 100644 data/hfopenllm_v2/TencentARC/MetaMath-Mistral-Pro/47858744-3378-4ed4-9101-8acbc3a53cda.json create mode 100644 data/hfopenllm_v2/TencentARC/Mistral_Pro_8B_v0.1/2aaeaaa7-89ed-4666-b0a5-8c1320ec4ec5.json create mode 100644 data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/23ae6a72-5a1f-4961-8662-feb4d8ad8a26.json delete mode 100644 data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json create mode 100644 data/hfopenllm_v2/TheDrummer/Gemmasutra-9B-v1/312ec315-6175-4f99-8741-97d97eb26b47.json create mode 100644 data/hfopenllm_v2/TheDrummer/Gemmasutra-Mini-2B-v1/7869bbe3-fd17-4e6d-9546-94d3df5e83ef.json create mode 100644 data/hfopenllm_v2/TheDrummer/Llama-3SOME-8B-v2/68c9fb85-f90e-442f-aa96-458dabe30b39.json delete mode 100644 data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json create mode 100644 data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/6891d1dd-0e1a-42e8-9206-64a4c71854f9.json create mode 100644 data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/c62eb6b3-2a3d-45bd-acdf-bad717e51766.json delete mode 100644 data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json create mode 100644 data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v1/55d4a6ae-44e5-4a1b-9509-299fbc6c3a36.json create mode 100644 data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v2/227e3e19-29d6-414f-b538-9f6f89d47677.json create mode 100644 data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v3/e922ac2c-e8d0-48f2-99fc-da70c925136c.json delete mode 100644 data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json create mode 100644 data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/59f93c1c-3712-4ee2-a3d2-999e5acc2ee5.json delete mode 100644 data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json create mode 100644 data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/a98dcf1e-6abb-402b-9e0c-da7c23b74bde.json delete mode 100644 data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json create mode 100644 data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/a889f561-0d8a-4345-9131-0a897ec215ac.json create mode 100644 data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/6402facc-6258-43a4-a0fd-78e21765c504.json delete mode 100644 data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json create mode 100644 data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/29fbd2e0-e08a-48f4-905e-d2aa54886915.json delete mode 100644 data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json create mode 100644 data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-r-v0.3/313e0379-d3ea-4f5a-8e06-4b0a94317487.json create mode 100644 data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.1/f326fbd0-5f92-4324-a587-1f08cf7da208.json create mode 100644 data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.2/d61310e9-5267-4a87-8e24-ae25172cd64e.json create mode 100644 data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.3/60953e5e-523d-43c0-ad00-f746308030b1.json create mode 100644 data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4.1/5afd8861-d7cb-45cd-af1b-6db966cb56e0.json create mode 100644 data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4/c3972df1-4414-4c71-b473-fb9459cf085b.json create mode 100644 data/hfopenllm_v2/Tijmen2/cosmosage-v3/b89d54b7-2329-4608-b9f6-07017e63f1cd.json delete mode 100644 data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json create mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/50389350-af23-41ba-af46-5ffe338ff9d2.json delete mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json delete mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json create mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/b8f8f045-2306-43ad-8fa0-6a8bdb494db6.json create mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/7cd59011-75d7-4497-956c-322d5d609c5f.json delete mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json delete mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json create mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1313d865-9c5b-45d2-ad64-629c65f07f2c.json create mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/0efc2583-bf21-4b60-96cc-716928768eb1.json create mode 100644 data/hfopenllm_v2/TinyLlama/TinyLlama_v1.1/be0a2737-19a0-4401-998a-a03663467133.json delete mode 100644 data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json create mode 100644 data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/71720e07-2de0-4402-bdfd-102150c61765.json create mode 100644 data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/38c84c69-5cdb-4f24-820d-4b39c5b118ff.json delete mode 100644 data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json delete mode 100644 data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json create mode 100644 data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/de9d274d-f213-4037-9711-3e9d3dbbcc96.json delete mode 100644 data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json create mode 100644 data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/92381da4-b9d1-43c4-a5c9-59f375017e11.json delete mode 100644 data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json create mode 100644 data/hfopenllm_v2/Triangle104/Annunaki-12b/44ab6a50-027d-47df-a518-5aa944eb2a61.json create mode 100644 data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/2a1947d7-74e0-43d0-931d-b2862348e90a.json delete mode 100644 data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json create mode 100644 data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/3677b71c-387d-4182-b15d-c3525bc7bc36.json delete mode 100644 data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json create mode 100644 data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/6b125a8e-5b53-48ca-8875-926249879f39.json delete mode 100644 data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json delete mode 100644 data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json create mode 100644 data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/af851d4b-69d4-49a9-a160-a180146c3963.json create mode 100644 data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1/7aa6ce37-c0e4-48ce-b9db-f158ac47d366.json create mode 100644 data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/1bce093e-27c0-41ad-aad6-b656f6773ed5.json create mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/5c6cffab-ef72-4e12-808c-c26ee8ec6999.json delete mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json delete mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json create mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/e288a874-f750-4a90-be07-616094c220cf.json create mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/0607da8d-3f4e-468a-91a6-b975261a87c0.json delete mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json create mode 100644 data/hfopenllm_v2/Triangle104/DS-R1-Llama-8B-Harmony/be2cc2fd-c8e7-4421-b8c8-d3b937272d0d.json create mode 100644 data/hfopenllm_v2/Triangle104/DSR1-Distill-Llama-Lit-8B/15ffe64e-72fd-4e65-8632-babf137a386d.json create mode 100644 data/hfopenllm_v2/Triangle104/DSR1-Distill-Qwen-7B-RP/ce1c0d4f-f5a3-49e7-ab77-65ff51bbd0ca.json create mode 100644 data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/b5afab38-13ba-4abd-9d04-a433c41061c5.json delete mode 100644 data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json delete mode 100644 data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json create mode 100644 data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/a862c2a5-f66b-4d09-ac57-6cbe565f9f35.json delete mode 100644 data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json create mode 100644 data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/d8254f6c-8110-44d3-800e-101fc731d779.json create mode 100644 data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/ccbcd5a7-2b98-4d90-ace1-3ad5971a5f18.json delete mode 100644 data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json create mode 100644 data/hfopenllm_v2/Triangle104/Dolphin3-Llama3.2-Smart/c208b19b-4ecf-4fad-b931-54f65d4b711b.json create mode 100644 data/hfopenllm_v2/Triangle104/Gemmadevi-Stock-10B/debaf4a0-c734-47ea-bea0-2ddc65dc397d.json create mode 100644 data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT-Summary/0eeb5962-ccc0-407b-92e6-7cf17c00941f.json create mode 100644 data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT/4b60e863-482c-4f91-8cd1-6c993d3c5988.json delete mode 100644 data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json create mode 100644 data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/f5f0bc72-427d-4703-aab1-1bb1bea73895.json delete mode 100644 data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json create mode 100644 data/hfopenllm_v2/Triangle104/Herodotos-14B/aae7f543-7b5b-435f-a506-e3ab901a8c5a.json delete mode 100644 data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json create mode 100644 data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/6e6ff4c3-3cfd-4790-80c4-544d9cbe47e2.json create mode 100644 data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/3ee76278-89d4-44fb-a449-717534b00161.json delete mode 100644 data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json delete mode 100644 data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json create mode 100644 data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/fa2854d3-9e2f-4f79-ac8c-e1cb5a638745.json create mode 100644 data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/9ddaa721-bf3a-416a-9be8-291188793cc9.json delete mode 100644 data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json delete mode 100644 data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json create mode 100644 data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/d659077d-7261-4c69-862c-d61be21662a2.json create mode 100644 data/hfopenllm_v2/Triangle104/Llama3.1-Allades-Lit-8b/e87ba227-c55e-4666-949d-b45913f8336b.json create mode 100644 data/hfopenllm_v2/Triangle104/Llama3.1-cc-Lit-8b/077f683a-af6f-4a71-b599-b9b269546b7c.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-1.5b/54808b08-d10d-4a06-ab60-8d99039311b8.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/138e6fdb-7092-4ee6-be82-7bb86c1fc759.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-10b/1b27423f-62cc-4189-a293-5af84ef1f2c8.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/f5468512-d2c7-4486-9d31-bef61225af52.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-14b/0e0ec1a9-76aa-4d7e-9c0e-946d6b000a6a.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-7b/07b87b98-0d61-4479-937f-7447565b4631.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json delete mode 100644 data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json create mode 100644 data/hfopenllm_v2/Triangle104/Minerva-8b/85b11b91-d686-49e9-8db0-971dd7cafb75.json create mode 100644 data/hfopenllm_v2/Triangle104/Mistral-Redemption-Arc/21bac032-a092-4afa-8d29-ebdefb3a0650.json create mode 100644 data/hfopenllm_v2/Triangle104/Mistral-Small-24b-Harmony/29e3a687-429f-4f33-ae5f-48db85127364.json delete mode 100644 data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json create mode 100644 data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/d98493a6-f237-4565-8508-9e4cc3188d2d.json create mode 100644 data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/2def6fbd-7488-4e9f-a822-2405d4f7a315.json delete mode 100644 data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json create mode 100644 data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/819143d4-9538-48b9-b7af-128bc15c518a.json delete mode 100644 data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json create mode 100644 data/hfopenllm_v2/Triangle104/Phi-4-AbliteratedRP/c29d47af-a9de-4edb-acac-6763c0d44ca3.json create mode 100644 data/hfopenllm_v2/Triangle104/Phi4-RP-o1-Ablit/22bf3fb7-9235-4a57-b8fd-c85b12047b0e.json create mode 100644 data/hfopenllm_v2/Triangle104/Phi4-RP-o1/2bea7014-460d-470b-918f-468b58d70fd6.json create mode 100644 data/hfopenllm_v2/Triangle104/Porpoise-R1-Llama3.2-3b/3927a5dd-002b-441a-b769-ba68547cd5f3.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/476fc734-dedd-4192-aa59-eb2f9dabf16b.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/817e2fbe-0866-489f-b987-391228a68c53.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/f25f5eb1-ff22-4be3-a639-a9d25207078f.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/f71d1c31-184b-46be-a288-bdc92f0ebe09.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/0d9547b3-7bef-4815-9c44-7d714fe81bbb.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/22dbc5a2-0ff6-4566-9bfd-e5ce314be597.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/afedb249-f1a5-42d6-b6c0-54b2cc303f64.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/61b1bf5e-6aa4-4e90-af2c-dcf5fc9903f2.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json delete mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json create mode 100644 data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/c0adc04c-1e02-4891-a5a1-1fab0ddf18ca.json create mode 100644 data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/cc57e6f0-ab55-4ab9-983c-63d74632d016.json delete mode 100644 data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json create mode 100644 data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/0d3c5fdb-c4a5-4436-b9d4-f0f42cb4db96.json delete mode 100644 data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json delete mode 100644 data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json create mode 100644 data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/a6ec2934-e9fd-481d-8f00-932603bc6e0a.json create mode 100644 data/hfopenllm_v2/Triangle104/RomboHermes3-R1-Llama3.2-3b/e2553c93-60df-4126-9e64-ecd4a5003389.json delete mode 100644 data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json create mode 100644 data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/e7c2fb42-e82a-4dac-9cc3-a9f41ab54e0f.json create mode 100644 data/hfopenllm_v2/Triangle104/Set-70b/a807ee8c-509e-4b6d-a414-df24444d8a0a.json delete mode 100644 data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json create mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/2199024b-7944-4950-8335-32a536efad02.json delete mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json create mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/97919c86-6161-4548-95b9-d44263a29f8a.json delete mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json delete mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json create mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/c40c1a46-2e30-4cf1-bcf3-a316a793fbcd.json create mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/c1294268-b5f5-4d64-b91a-147f58a21a47.json delete mode 100644 data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/2b029e6d-a0b8-4b6c-b62d-144b8dc4f739.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/b926ca6c-60c9-4353-9671-0453b46d0222.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/44db30b4-2010-4f96-a39e-9ccc8568374f.json delete mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/2210d673-d417-46be-aeca-de48cd846e01.json delete mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/892d27cc-dfb3-40c7-ae0f-a7cd06784808.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49b3f293-721d-4d44-9748-88d1ce275050.json delete mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/70fb41fe-46af-49e3-8270-5882e12f710f.json delete mode 100644 data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/13e2489f-9d96-4f68-8e22-c937604c2145.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/0c386ea0-4706-4a6f-994c-b6ee21dbce92.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/a8d5a193-6c87-4b5b-8ea3-b3ab78e73104.json create mode 100644 data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO/4018f4bd-492a-4814-9a7a-1f0c376f2d2e.json create mode 100644 data/hfopenllm_v2/UKzExecution/LlamaExecutor-8B-3.0.5/568072cb-118d-41af-bfe8-fa14cb4c7348.json create mode 100644 data/hfopenllm_v2/Unbabel/TowerInstruct-Mistral-7B-v0.2/a6d08766-8c36-41bf-8bbc-acdfdc3f8e23.json create mode 100644 data/hfopenllm_v2/Undi95/MG-FinalMix-72B/2504fed5-c8a1-4ffc-8ce5-9559aa8c4325.json delete mode 100644 data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json create mode 100644 data/hfopenllm_v2/Undi95/Phi4-abliterated/359dde31-d9dc-4c22-b829-77df652dcc73.json create mode 100644 data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/34a79823-b993-402a-89a7-538e126ee02a.json delete mode 100644 data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/f392c5c3-9bee-4111-9a22-6a1b706fd2ad.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/73bbdd22-4e5f-496b-b39f-290d8e0d2aa4.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/72a66eae-9c94-40e3-b3c9-211303e5cba8.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/ef7390b5-599b-4354-805b-9486e4ce34fa.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/57f964c3-0504-4b60-9539-ce0e369816ea.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/4e6c0336-5d94-4417-a194-92a4d6f38481.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/fe38dea8-92f4-4fb2-afdf-c5932d7c9e27.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-2b/5ced7497-5a05-40d2-80cb-cae63ca62022.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-7b/52a66aaa-193a-48ca-b693-4dcab811eaa3.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/e0e4bcef-cb73-436b-9353-b18ade293e8b.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/1ae45791-7e47-4083-bd72-4530fa26893c.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Phi-3-medium/b2731f04-a9bd-4e36-a545-85be5b66f5a7.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/ed6de552-d04b-4d51-8456-610e2cb41d85.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/3e08a589-d2b3-487b-900e-85725522a2e4.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/b2717503-d081-40ee-b1ed-fcadaf239049.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/9915eb01-5c45-42b6-82a3-ad782411642f.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json create mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/190eb7ca-46db-4e1d-8b71-9bb20af74ede.json delete mode 100644 data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json create mode 100644 data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B-r-v-0.1/86b9077d-9ec3-411d-84c5-326ba97742c1.json create mode 100644 data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/18bfa50c-20be-4027-8ee7-f6cd1411c882.json create mode 100644 data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/eb1a099a-48c7-412b-b62f-143537c41f06.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3-70B-Fireplace/e530a4b7-c2f6-4bad-bab5-2895e950ed63.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3-70B-ShiningValiant2/52ad7152-feea-46a6-b2d8-20e1a70514ce.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-70B-ShiningValiant2/a61162a6-ef3e-46f4-8aa2-241547fadea2.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/9f208aef-8544-47c8-bb1f-a3841aff208b.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/da237ab6-df39-460f-9efc-e1649e1ac202.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Enigma/c81b3193-9d01-4590-8b72-da97aa3c9dc4.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Esper2/1a9ffe50-69ae-48bc-b636-89431391eb37.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/b0c67359-1da0-4f55-aa1c-f54f88038bd7.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/c700798b-583a-41be-94dd-382669bb495f.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/3c0b9735-2ef1-4f27-b94a-f246eb57b73c.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/e8c9501b-c985-4b78-a902-a1a030c72e60.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Enigma/df978fce-3373-4073-8c44-d6a83df1d9d1.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Esper2/e46ee8d9-81af-4259-8fef-3d3113fb6168.json create mode 100644 data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-ShiningValiant2/aa6ab404-89ef-4336-b811-7c8064e26107.json create mode 100644 data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/a14e6c79-4a78-4c02-a7ca-35e783f32be1.json delete mode 100644 data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json delete mode 100644 data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json create mode 100644 data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/ba1fb85b-bbc0-46ac-95d7-e61b91f65c2b.json delete mode 100644 data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json create mode 100644 data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/f6312fc7-c7a8-45dc-a57c-91f56b4ca28a.json delete mode 100644 data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json create mode 100644 data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/335f5c32-f3f0-4a16-8c9d-8f07b2aae54a.json delete mode 100644 data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json create mode 100644 data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/b7c7a907-7ecc-4d5b-bc6f-8b8d82954b21.json create mode 100644 data/hfopenllm_v2/Weyaxi/Einstein-v6.1-Llama3-8B/112f01a2-f0fb-4257-86bf-61c9a184eb92.json create mode 100644 data/hfopenllm_v2/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/2d9410d6-7162-4811-bf7d-9de2c2b48fd2.json create mode 100644 data/hfopenllm_v2/Weyaxi/Einstein-v7-Qwen2-7B/16ff8fa3-4676-473c-99ad-908ddb59d8ed.json create mode 100644 data/hfopenllm_v2/Weyaxi/Einstein-v8-Llama3.2-1B/9b153ac9-f95b-419b-b7f9-beccd769ddad.json create mode 100644 data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8a5df3c2-eb71-4e12-b013-fb43685f2916.json delete mode 100644 data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json create mode 100644 data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/35fa3213-5c08-4b19-ae76-237fdd25444e.json delete mode 100644 data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json create mode 100644 data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/242ce55f-1471-435e-bcd7-d28b5fc87fc4.json delete mode 100644 data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json delete mode 100644 data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json create mode 100644 data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/95f509f2-5e67-404a-968d-f7488d684e32.json delete mode 100644 data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json create mode 100644 data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/bcbcdfe9-0663-417c-9a29-60906e63db8f.json delete mode 100644 data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json create mode 100644 data/hfopenllm_v2/Xclbr7/Arcanum-12b/d95a7493-2f99-4c10-8067-711c7388af7d.json delete mode 100644 data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json create mode 100644 data/hfopenllm_v2/Xclbr7/Hyena-12b/789848a0-6d8a-4583-93c3-a72df74d0071.json create mode 100644 data/hfopenllm_v2/Xclbr7/caliburn-12b/14af87df-0fc5-46e1-9d0b-c25c8b6a7ce7.json delete mode 100644 data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json delete mode 100644 data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json create mode 100644 data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/379f559f-9bfa-444f-b477-562c25b4c299.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/effb6a3d-c98f-4c3a-be77-902c61cda21b.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/6c1c1405-afa4-412d-ba1f-49dc1cac4509.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Phi-4-Megatron-Empathetic/6f4ed7c2-c775-4fd2-8600-4cea523f53e4.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Phi-4-mini-UNOFFICAL/5fd5206b-186a-43b9-a4f4-07e75aa0293a.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/b707ecbf-0658-4226-803d-53456d16d54b.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/dca1ee57-5e86-4532-a2f3-ac6a619ca576.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/1233476a-7839-4a22-a7ca-1d0f237d8888.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/5c4bdeca-5ef8-4002-8f82-67d49b5ff722.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/18f5fd6c-2b79-4d48-b7e9-18845db16271.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a9039374-fa5a-4b8b-800f-5f4651cf812d.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/3f9704b4-bf25-40da-b6dc-b927c3569f40.json create mode 100644 data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/a8f858d8-a792-409f-b79d-948a19e2aa87.json delete mode 100644 data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json create mode 100644 data/hfopenllm_v2/Xkev/Llama-3.2V-11B-cot/5c34a168-b8cf-436b-a3b7-a2d1feadffb9.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/77092cfe-9820-45e8-94c5-31d27f1daa7c.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0505/cab8fed8-de68-4fa5-b4fc-d9483fc56571.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/a8103350-b208-4856-8e7b-8ea8918ba0d1.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0805/e849c03c-c569-4059-8fc5-6a98cf391342.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/f1d8bffa-61fc-47d5-85cf-48cebcb31af5.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005/97bdb352-2e9d-4cc5-8b70-55348ef3a217.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/78053a33-24c8-4e9f-8791-f127f21eec1c.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/03082966-87ba-4560-a784-5d8677003500.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/97f26b20-db66-4a30-ba2a-c18a31081271.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-SCE/85f9ccda-8c47-4fa1-9d47-e9da4730b077.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/2a57d6f4-643b-4b30-8d67-03032d454887.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/d333f360-c1c3-4916-8480-4a1fc490875a.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4/37a41261-a7b0-44b2-916f-770cdfa0ad39.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/c46cd6cc-b56d-44c5-a03c-b49381ba3462.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest/612b6226-c25d-42e0-bcd7-be7faa844530.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-it-restore/2fc7a4d6-88e0-4f11-9110-dc53942870a4.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-7B-it-restore/34665752-58d8-48ee-81a6-f1a068c23026.json create mode 100644 data/hfopenllm_v2/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/cc0767b5-4aaa-4418-8f68-72a721323e9c.json create mode 100644 data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/ea507a41-1654-4515-94cc-ce2e38800c61.json create mode 100644 data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/c44e773f-4cca-4780-bdd4-f486e65c18e0.json create mode 100644 data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f8a46bda-d53b-484e-8832-7939f7d0762d.json create mode 100644 data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B/c3968a2d-4a9a-4f62-8bea-a3b4b6dcd378.json delete mode 100644 data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json create mode 100644 data/hfopenllm_v2/Yash21/TinyYi-7B-Test/da18242c-d6bb-4a0a-a2f9-2e42099f4e8a.json delete mode 100644 data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json create mode 100644 data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/ac078124-85d9-4715-bf7c-1428b1063732.json create mode 100644 data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/9c1dcd75-8491-4890-ac6f-000868099a3e.json delete mode 100644 data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json create mode 100644 data/hfopenllm_v2/Youlln/3PRYMMAL-PHI3-3B-SLERP/7850fc57-49c7-4124-b7c6-e1e7bb2bc726.json create mode 100644 data/hfopenllm_v2/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/8f38374e-f373-4639-9278-24441ebd0325.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/c007938e-3427-4896-8493-1500abdfbd2b.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/df81dc0d-6c72-49e9-862b-02e9b6642cb6.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/46c96d8e-568c-48f8-a74b-9dd4b4195037.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/1f4f7181-8a81-49f4-9e81-925d5d69a37c.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/3ea343b6-93f6-4c61-a164-3db95d13cbdf.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/a9ea8bb5-05fc-4da3-8e00-f53ab8ea6af5.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/0ea74ce5-43c9-43eb-92bc-3d928062d9e0.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/6896faa7-7204-4091-8f4e-9cc0b53d673a.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/88064453-fd8c-4bd9-adf1-39f43972bec1.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/a18ade45-acba-4059-b969-445e529a82e2.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/6c0e4132-71e7-44af-95fc-83b0a6be2a82.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/5d9ab422-4f4f-460d-bd39-51266b43d7e5.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/cda03c45-0782-40cc-a17d-67d808657b83.json create mode 100644 data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/50f5451b-41c4-4ba5-8bee-ee8a2deb7e79.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json delete mode 100644 data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json create mode 100644 data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/cf758994-6e94-434d-bf68-74cca188b5e8.json create mode 100644 data/hfopenllm_v2/YoungPanda/qwenqwen/611f9549-0788-44e9-8125-18df06cd80d6.json delete mode 100644 data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json create mode 100644 data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/59cf23ba-027d-4bac-a0e1-526376396b4d.json create mode 100644 data/hfopenllm_v2/Yuma42/Llama3.1-IgneousIguana-8B/1f02bbd3-ddaf-4db6-b7f8-31bad8ffac66.json create mode 100644 data/hfopenllm_v2/Yuma42/Llama3.1-SuperHawk-8B/1e737e28-d926-43e8-9e4c-e39fa91d7977.json create mode 100644 data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/43ef8eee-5d8a-47e7-ac71-1a898421370a.json delete mode 100644 data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json delete mode 100644 data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json create mode 100644 data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/d8d03c71-942f-4aff-8a5e-5c265c639b44.json create mode 100644 data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-rpo-avg/96262938-1146-4993-92a1-a2ddb2519f8a.json create mode 100644 data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/292d7cfb-3e3c-47d8-8cca-33507f9ff081.json delete mode 100644 data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/3f29c10f-57ef-435b-85df-2cae30ae72fa.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/d7f022fe-86cb-4e4e-a672-62c2dc8cffd3.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/baa35c90-c494-4dff-af28-cb549e40bed8.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/2fdc3186-6791-4550-ac4f-a1a5a5a1d514.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/f687df8b-42b5-4d94-b741-1b516d9221b2.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/c3a8a952-6869-4eee-a59f-4ae33ac72986.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/a7a74117-71e4-49b2-bd65-add82c9165d8.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/04ee694c-0c89-4f25-b10f-315a24743ba2.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/47fd4acb-acc3-4f12-8af5-c425d3754c38.json create mode 100644 data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-Qandora-CySec/e19577f5-d1ba-45ad-8500-d18ae2b14440.json delete mode 100644 data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json create mode 100644 data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/e86443cd-453b-4ca0-8e7e-054764fe4bb9.json create mode 100644 data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/24cd9977-f3fb-4619-aea1-59e1a36b2a5e.json delete mode 100644 data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json create mode 100644 data/hfopenllm_v2/aaditya/Llama3-OpenBioLLM-70B/1401f0d9-6f4c-41d2-819f-eb9487c5c1e6.json delete mode 100644 data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json create mode 100644 data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/4b1f2aab-ef92-4231-9bdd-96918b26914c.json create mode 100644 data/hfopenllm_v2/abacusai/Liberated-Qwen1.5-14B/4956e127-14a1-405e-a0e0-76fe94ea727b.json create mode 100644 data/hfopenllm_v2/abacusai/Llama-3-Smaug-8B/90fb6e40-88f7-4ce2-ae99-308d87e69718.json create mode 100644 data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/cdad0f08-1c60-4493-bed0-9733894b367a.json delete mode 100644 data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json create mode 100644 data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/8e83b4f7-736f-4e03-8256-2a1fc421b04f.json delete mode 100644 data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json delete mode 100644 data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json create mode 100644 data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/f0d6639d-8485-4bcd-b069-046a747dfbfa.json delete mode 100644 data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json create mode 100644 data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/d1fe36ba-04f8-4110-8c39-81d393c4cbfc.json create mode 100644 data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/5a8ab5fb-ec1e-490c-b643-e3b9d49f5d34.json delete mode 100644 data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json delete mode 100644 data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json create mode 100644 data/hfopenllm_v2/abacusai/bigstral-12b-32k/de944f89-d2d4-4b01-b4b5-e7cbd1d8d1ae.json delete mode 100644 data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json create mode 100644 data/hfopenllm_v2/abacusai/bigyi-15b/db96601a-2f7f-438f-915b-55fee0e0d1d1.json create mode 100644 data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/27912f7d-7033-4b7c-b93a-af1673ce4a9b.json delete mode 100644 data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json create mode 100644 data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/da58a484-4a45-4a70-a651-031ada8023d5.json delete mode 100644 data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json create mode 100644 data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v1/e8bd221d-8a89-4e3c-8815-0bff27574053.json create mode 100644 data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v2/ffc21c2a-59fb-4ad8-88a4-930879b6eba0.json create mode 100644 data/hfopenllm_v2/abhishek/autotrain-llama3-orpo-v2/1e506afa-0d08-45d6-9242-b06104aa67e8.json create mode 100644 data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/7d66bb93-cb2f-4be6-b133-1f0325be58e1.json delete mode 100644 data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json create mode 100644 data/hfopenllm_v2/abideen/MedPhi-4-14B-v1/936f3c5f-7817-4118-96c8-e4061d4560fb.json create mode 100644 data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/7d36ceed-2a1b-4b20-88ae-0a609cc161e9.json delete mode 100644 data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json create mode 100644 data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/77cace56-503f-4531-a4eb-0178a68cc283.json create mode 100644 data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/9e49b710-2413-42f3-8943-bc9dbf68cb3c.json delete mode 100644 data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json create mode 100644 data/hfopenllm_v2/aevalone/distill_qw_test/9a5b3564-97df-4661-a171-37322386ac4d.json create mode 100644 data/hfopenllm_v2/agentlans/Gemma2-9B-AdvancedFuse/0fc0450d-cdf1-44b5-a809-202d1dd6b5e3.json create mode 100644 data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/7f06c78c-f95e-4e50-aa57-da0579adcdae.json delete mode 100644 data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json create mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-8B-drill/06e55e47-9995-4fa2-877a-c728e9f9f1a1.json create mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/39af1e0a-d1e3-4372-bc18-d07f3dff09f0.json delete mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json create mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish/f32d59d6-8ab9-4b7d-ad9d-f62ce6d559bd.json create mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-LexiHermes-SuperStorm/7ddc3aef-c6c5-4d04-8473-3b3bba219d7f.json create mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/ce80ac07-22d2-4883-ac6c-40b080e00b81.json create mode 100644 data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse/cbece170-f872-485f-a6c2-5db17ced73bc.json delete mode 100644 data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json create mode 100644 data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/c1fd751b-c6c3-4350-9618-f4b4840e1b69.json create mode 100644 data/hfopenllm_v2/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/bfd28b91-3a72-4417-b52b-804d2cbae12f.json create mode 100644 data/hfopenllm_v2/ai21labs/Jamba-v0.1/32c26cbc-3697-47a6-bd12-18187df9dda9.json delete mode 100644 data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json create mode 100644 data/hfopenllm_v2/ai4bharat/Airavata/02280b9f-bc01-4e44-9d09-1e4ae8c0438b.json delete mode 100644 data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json delete mode 100644 data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json create mode 100644 data/hfopenllm_v2/aixonlab/Aether-12b/a57d2d49-5ccf-48f5-8035-b1d480c80f40.json delete mode 100644 data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json create mode 100644 data/hfopenllm_v2/aixonlab/Grey-12b/6b5a3c69-f8dd-4952-96fc-b6e4dec1ed9d.json delete mode 100644 data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json create mode 100644 data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/fe0665dd-b976-4d90-b16b-6c2acfef15ff.json create mode 100644 data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-First/8c6bdc44-fd29-45e7-b161-2c8e07ef2935.json create mode 100644 data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-Last/e7c70ff9-59ad-4d09-8af0-ef9cf16d1dfa.json create mode 100644 data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-First/26c4c993-ae49-42a0-be0a-f157be9f7d58.json create mode 100644 data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-Last/19adf124-c120-4e97-80cf-49c40a66eb81.json create mode 100644 data/hfopenllm_v2/akhadangi/Llama3.2.1B.BaseFiT/66bc5d38-8d25-4934-bce8-41ce4ea0e385.json create mode 100644 data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/541eafe5-807e-44b0-b652-a0752210fc71.json create mode 100644 data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/845a2484-9f17-4c0e-b06b-6250992298bc.json create mode 100644 data/hfopenllm_v2/alcholjung/llama3_medical_tuned/e62b6b26-5f3c-42c9-9541-bb8b23caee66.json delete mode 100644 data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json delete mode 100644 data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json delete mode 100644 data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json delete mode 100644 data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json delete mode 100644 data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json delete mode 100644 data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json delete mode 100644 data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json delete mode 100644 data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json delete mode 100644 data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json delete mode 100644 data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json delete mode 100644 data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json delete mode 100644 data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json delete mode 100644 data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json delete mode 100644 data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json delete mode 100644 data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json delete mode 100644 data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json delete mode 100644 data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json delete mode 100644 data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json delete mode 100644 data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json delete mode 100644 data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json delete mode 100644 data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json delete mode 100644 data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json delete mode 100644 data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json delete mode 100644 data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json delete mode 100644 data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json delete mode 100644 data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json delete mode 100644 data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json delete mode 100644 data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json delete mode 100644 data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json delete mode 100644 data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json delete mode 100644 data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json delete mode 100644 data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json delete mode 100644 data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json delete mode 100644 data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json delete mode 100644 data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json delete mode 100644 data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json delete mode 100644 data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json delete mode 100644 data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json delete mode 100644 data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json delete mode 100644 data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json delete mode 100644 data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json delete mode 100644 data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json delete mode 100644 data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json delete mode 100644 data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json delete mode 100644 data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json delete mode 100644 data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json delete mode 100644 data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json delete mode 100644 data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json delete mode 100644 data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json delete mode 100644 data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json delete mode 100644 data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json delete mode 100644 data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json delete mode 100644 data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json delete mode 100644 data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json delete mode 100644 data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json delete mode 100644 data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json delete mode 100644 data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json delete mode 100644 data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json delete mode 100644 data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json delete mode 100644 data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json delete mode 100644 data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json delete mode 100644 data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json delete mode 100644 data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json delete mode 100644 data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json delete mode 100644 data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json delete mode 100644 data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json delete mode 100644 data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json delete mode 100644 data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json delete mode 100644 data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json delete mode 100644 data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json delete mode 100644 data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json delete mode 100644 data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json delete mode 100644 data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json delete mode 100644 data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json delete mode 100644 data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json delete mode 100644 data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json delete mode 100644 data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json delete mode 100644 data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json delete mode 100644 data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json delete mode 100644 data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json delete mode 100644 data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json delete mode 100644 data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json delete mode 100644 data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json delete mode 100644 data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json delete mode 100644 data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json delete mode 100644 data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json delete mode 100644 data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json delete mode 100644 data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json delete mode 100644 data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json delete mode 100644 data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json delete mode 100644 data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json delete mode 100644 data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json delete mode 100644 data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json delete mode 100644 data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json delete mode 100644 data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json delete mode 100644 data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json delete mode 100644 data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json delete mode 100644 data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json delete mode 100644 data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json delete mode 100644 data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json delete mode 100644 data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json delete mode 100644 data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json delete mode 100644 data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json delete mode 100644 data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json delete mode 100644 data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json delete mode 100644 data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json delete mode 100644 data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json delete mode 100644 data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json delete mode 100644 data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json delete mode 100644 data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json delete mode 100644 data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json delete mode 100644 data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json delete mode 100644 data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json delete mode 100644 data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json delete mode 100644 data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json delete mode 100644 data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json delete mode 100644 data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json delete mode 100644 data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json delete mode 100644 data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json delete mode 100644 data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json delete mode 100644 data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json delete mode 100644 data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json delete mode 100644 data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json delete mode 100644 data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json delete mode 100644 data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json delete mode 100644 data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json delete mode 100644 data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json delete mode 100644 data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json delete mode 100644 data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json delete mode 100644 data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json delete mode 100644 data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json delete mode 100644 data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json delete mode 100644 data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json delete mode 100644 data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json delete mode 100644 data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json delete mode 100644 data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json delete mode 100644 data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json delete mode 100644 data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json delete mode 100644 data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json delete mode 100644 data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json delete mode 100644 data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json delete mode 100644 data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json delete mode 100644 data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json delete mode 100644 data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json delete mode 100644 data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json delete mode 100644 data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json delete mode 100644 data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json delete mode 100644 data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json delete mode 100644 data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json delete mode 100644 data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json delete mode 100644 data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json delete mode 100644 data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json delete mode 100644 data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json delete mode 100644 data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json delete mode 100644 data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json delete mode 100644 data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json delete mode 100644 data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json delete mode 100644 data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json delete mode 100644 data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json delete mode 100644 data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json delete mode 100644 data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json delete mode 100644 data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json delete mode 100644 data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json delete mode 100644 data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json delete mode 100644 data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json delete mode 100644 data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json delete mode 100644 data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json delete mode 100644 data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json delete mode 100644 data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json delete mode 100644 data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json delete mode 100644 data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json delete mode 100644 data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json delete mode 100644 data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json delete mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/ec773b66-24fd-4b6f-ac9c-ebcd355e4be7.json delete mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/a70b8356-94ce-4f0d-b44a-2215076eed5e.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/b182807d-587e-4702-bf30-dab11983b8db.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/c1f0944a-c44c-42e9-90ba-a847509cbd66.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/64bb8530-7071-402e-ba9b-1d15ecbe275c.json delete mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-RM/4f1fc265-f8b7-47e6-a9e6-cfa61b89ad4a.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/1420df5c-690e-4b01-b99c-c21c793689ae.json delete mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/aa9d0b0e-cb3f-452e-bc85-f7cf172d2b8b.json create mode 100644 data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/dfabd777-8620-40e3-b19c-a9227f57b638.json create mode 100644 data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/08fe3877-ab04-426a-9e27-72ec4ff8ffc3.json delete mode 100644 data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json create mode 100644 data/hfopenllm_v2/allenai/OLMo-1B-hf/4b264bb0-bd7e-4b15-9591-50b5a521f100.json delete mode 100644 data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json delete mode 100644 data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json create mode 100644 data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/a8cfe336-0c3e-401c-a1e9-d951e64918ec.json create mode 100644 data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/5e66c653-41b1-46de-b677-ffd8426ba5ec.json delete mode 100644 data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json delete mode 100644 data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json create mode 100644 data/hfopenllm_v2/allenai/OLMo-7B-hf/9f0f0914-1f7a-468e-8a2e-7ae122fd064d.json delete mode 100644 data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json create mode 100644 data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/cc64a143-4f1e-42ee-ade1-fafc4b316336.json delete mode 100644 data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json create mode 100644 data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/cf322e64-2682-4a9a-a48f-c4ec47b852f2.json create mode 100644 data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/30b32261-b24a-49e3-ba57-172dc1d03ba0.json delete mode 100644 data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json create mode 100644 data/hfopenllm_v2/allknowingroger/Chocolatine-24B/0681c01d-23f3-4b8b-9516-a5cc41761fc4.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json create mode 100644 data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-2.6B/7693ed8a-f76d-482b-92c1-f11810e522ca.json create mode 100644 data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-27B/f8dc0128-c606-490a-b965-59d5377dd778.json create mode 100644 data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-2.6B/844547f7-658f-41dd-ab4c-dc0569030e59.json create mode 100644 data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-27B/75c291b5-6d60-4bde-8621-f865196a6ecc.json create mode 100644 data/hfopenllm_v2/allknowingroger/Gemma2Slerp3-27B/36d54b12-594f-47fe-9637-a9b740416c5c.json create mode 100644 data/hfopenllm_v2/allknowingroger/Gemma2Slerp4-27B/57733383-9573-463d-a467-068d2685014c.json create mode 100644 data/hfopenllm_v2/allknowingroger/GemmaSlerp-9B/eda1ac9a-98e1-496f-bdeb-1e256b52c14a.json create mode 100644 data/hfopenllm_v2/allknowingroger/GemmaSlerp2-9B/00b8bfda-c6b1-4e1f-b68c-bff7335e2dff.json create mode 100644 data/hfopenllm_v2/allknowingroger/GemmaSlerp4-10B/0a3b9ad6-b853-471d-a292-413b30273034.json create mode 100644 data/hfopenllm_v2/allknowingroger/GemmaSlerp5-10B/d61c3ace-e353-4c0b-9472-c9a1928809cc.json create mode 100644 data/hfopenllm_v2/allknowingroger/GemmaStock1-27B/2293a19a-b650-436d-9448-1b641e63d407.json delete mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json create mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/c15b977c-c781-4b17-ac9f-25c77602c875.json create mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/42c191be-c0ae-4170-8b6f-565053ae7d9c.json delete mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json delete mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json create mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/f5cb910d-6e5b-404a-a751-d5cb90668150.json delete mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json create mode 100644 data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/de806e4c-dbf8-48cc-a0d8-033a61dfc777.json create mode 100644 data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/59150b73-b05a-451e-ba3f-696d04effe05.json delete mode 100644 data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json create mode 100644 data/hfopenllm_v2/allknowingroger/Llama3.1-60B/84926b81-360a-480c-b240-f154ec7fe0ba.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json create mode 100644 data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/8e6edb04-302b-4dfc-b38f-94b437c921a8.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json create mode 100644 data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/db92c564-1cf9-43db-9e25-1f450c7b1e7f.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/e3796243-cbba-4ec2-ad7c-89547ad24342.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json create mode 100644 data/hfopenllm_v2/allknowingroger/MistralPhi3-11B/1479be90-df8f-4e1d-b9db-03e84000187a.json create mode 100644 data/hfopenllm_v2/allknowingroger/Mistralmash1-7B-s/d2e6c48c-1c18-45a6-ba1a-b335325c980c.json create mode 100644 data/hfopenllm_v2/allknowingroger/Mistralmash2-7B-s/f843e45a-f66b-4091-a964-75583c2d7fc5.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json create mode 100644 data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/cbc3cd41-e187-4c4f-b207-37bceab423a4.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json create mode 100644 data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/0f124566-5e94-4233-9a3f-5ff9cfdf160c.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/98fabba8-7d70-4a1f-b03c-37e1a9ac94e8.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/91522dad-529b-477c-8372-793f631e14b7.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/cec22734-493c-4d11-ba86-6c7ae2005124.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/704a6e19-0d86-42a5-b8f5-05a5856e9c29.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/bc54349d-59e0-4ae4-94f9-3f5ae98261f4.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/d20d533a-758b-477c-b4eb-073adaed640e.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/f7c9ad0d-3fea-4bec-8ac3-46f01a3449fb.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/9db1f823-e068-4a39-a5cc-b9c588099427.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/23818b45-bf5f-48a2-982f-1e2a0d35aac8.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/de6eda66-b8f5-4b23-89e1-44bbac600953.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/632974c2-57e2-41f9-8c00-671e07e7594b.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json create mode 100644 data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/e86dcf4f-6282-4aa6-b645-00f93a2e9077.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json create mode 100644 data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/b20be5c9-9720-4076-b587-728549dd19af.json delete mode 100644 data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json create mode 100644 data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/5e193803-39d1-4f12-8726-ebbe5f71563c.json create mode 100644 data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/61131a6c-f412-42bf-814b-7d711a840d44.json delete mode 100644 data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json create mode 100644 data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/535e72b1-17e0-40e3-9d66-d31f8ec70413.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json create mode 100644 data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/ea15479e-24a8-4924-a754-a8567c511e61.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3della5-14B/5799f285-c61f-43a8-a6a6-053808cf4e8f.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3merge-14B/36feef44-3d3b-4102-8606-ee6420bddcff.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/fd55f19a-2c22-4f29-82e0-15b02f25b9a9.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/18e5decd-c95e-43d2-9ba2-007ba32e216f.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3task1-14B/85a4996e-8c44-4e4f-9478-19a8c5513617.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3task2-14B/db6d57c8-df0b-407e-b937-67c55b513a5f.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3task3-14B/89ac933d-0a7c-40e6-8fa7-35bb6205e44b.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json create mode 100644 data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/c79e690f-3e09-4fac-9412-937a3b7ef352.json create mode 100644 data/hfopenllm_v2/allknowingroger/Phi3mash1-17B-pass/ce74b7e3-8505-4c79-a7de-12d1e6b47155.json create mode 100644 data/hfopenllm_v2/allknowingroger/Quen2-65B/3c562d8a-2df9-4d3f-9699-bfaee4a1ce2b.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-42B-AGI/152b0cbe-e27b-4438-8326-e67f4e70e600.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task2/c733c91f-79a9-49e5-9398-3a424ee1940a.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task3/32d7b6c6-de5c-4864-a446-97dccce378c5.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task4/7b22d02b-5bfd-4243-9ad9-c858d0af55a6.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task7/99650529-55d9-42b0-b812-761a30277e5e.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task8/81abbc2a-791b-4a39-bb46-97edfa14b9c0.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwen2.5-slerp-14B/c658e535-7098-40fc-bea0-f5734d8f4ca9.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenSlerp12-7B/9e0656e9-9b82-4f6d-b00a-c09cf9cbc105.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenSlerp4-14B/07c36058-e0e8-48ea-85f3-0a2cb2fe3443.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenSlerp5-14B/c41d8925-b56b-458e-b1a9-27dbbcaee149.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenSlerp6-14B/9136feb4-5c3e-48b3-bc70-c7816b8b189b.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenStock1-14B/c395ef02-9a50-4696-aad2-bcb32ba05f67.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenStock2-14B/93f47969-556a-4fd4-b7bb-4d1c861a8d71.json create mode 100644 data/hfopenllm_v2/allknowingroger/QwenStock3-14B/349ae559-6c1f-4b2f-954c-e83cba1e603a.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwenslerp2-14B/3e43c3f6-645b-4ab3-b684-b23eb67bc5d9.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwenslerp2-7B/500c8cd4-fe4e-44f3-86b7-b0efd387ab92.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwenslerp3-14B/340a3ebb-bc06-404f-84e7-aeccc016fd32.json create mode 100644 data/hfopenllm_v2/allknowingroger/Qwenslerp3-7B/a6426f88-d7cc-4e6a-a2b5-76e59a52a6de.json create mode 100644 data/hfopenllm_v2/allknowingroger/ROGERphi-7B-slerp/bdd05c8f-b895-4c91-9a9f-a608a4259cbd.json create mode 100644 data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/0e1e45d4-2747-480d-9b1f-2b200e250271.json delete mode 100644 data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json create mode 100644 data/hfopenllm_v2/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/00f3f9ca-ae7d-4e62-9e7e-6bd202dbed59.json create mode 100644 data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/c9e57ab2-c2a4-4935-b976-4bf24647b777.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json create mode 100644 data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/c22436a2-ec60-4220-82b3-123618165eb2.json create mode 100644 data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/1f990438-dd84-44d2-99f9-a10035ecd652.json delete mode 100644 data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json delete mode 100644 data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json create mode 100644 data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/f4564f5e-3595-466e-8201-0e2a4c50ff0d.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/040def3a-702d-4868-b429-39697ca36207.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/9e24fd65-56ec-4160-b299-b34d702a3231.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yibuddy-35B/216bf9f8-9521-4311-a40b-8a847271265c.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yillama-40B/45f8c4fb-3591-44df-a4f0-57093b9bae23.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yislerp-34B/d17275ef-8a32-4fcb-94f4-fb24299ba50e.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yislerp2-34B/61b79e7d-0f50-4cfe-825c-ed5b23d943f3.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json create mode 100644 data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/113c3507-b738-4b06-ada8-da93b19c6ae2.json delete mode 100644 data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json delete mode 100644 data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json create mode 100644 data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/8835d5c1-8350-4d42-a753-82b94dffda3b.json create mode 100644 data/hfopenllm_v2/allknowingroger/llama3-Jallabi-40B-s/dc3bbda7-5007-44c7-b1ba-af0c82d100ee.json create mode 100644 data/hfopenllm_v2/allknowingroger/llama3AnFeng-40B/0d24ee06-a6b4-4be7-b3ef-c4f53b4fc414.json delete mode 100644 data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json create mode 100644 data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/f2415b7a-2cd7-4a05-834b-7da992e1da1a.json create mode 100644 data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/01af237f-40d8-4841-a90d-13dce6db8634.json delete mode 100644 data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json delete mode 100644 data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json create mode 100644 data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/d69bb392-fd38-4f57-b567-24566896167b.json create mode 100644 data/hfopenllm_v2/allura-org/Mistral-Small-24b-Sertraline-0304/63503943-1c1e-4dac-9c41-4933fbb44b70.json create mode 100644 data/hfopenllm_v2/allura-org/Mistral-Small-Sisyphus-24b-2503/80c5d343-41e6-45d7-8921-62586a3cd270.json create mode 100644 data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/2c27d7f6-60fd-49f3-8666-784f2a16031b.json delete mode 100644 data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json delete mode 100644 data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json create mode 100644 data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/cbcc1e64-8455-4382-8999-654d1757bbd6.json create mode 100644 data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/1bea4f6b-7a41-4907-baca-430c7ea179e9.json delete mode 100644 data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json create mode 100644 data/hfopenllm_v2/allura-org/Teleut-7b/298ce89b-966c-4f4e-9da5-3803a395188f.json delete mode 100644 data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json create mode 100644 data/hfopenllm_v2/aloobun/Meta-Llama-3-7B-28Layers/ea27a4d6-8c32-4b36-873d-1046ae6240e5.json delete mode 100644 data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json create mode 100644 data/hfopenllm_v2/aloobun/d-SmolLM2-360M/73d5905d-7825-43ba-8051-7e1f5639b857.json create mode 100644 data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/956b8589-a048-43be-9cfd-05658d3c57ca.json delete mode 100644 data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json delete mode 100644 data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json create mode 100644 data/hfopenllm_v2/alpindale/magnum-72b-v1/36f597b4-8f53-4b40-9c0e-c9284743e456.json create mode 100644 data/hfopenllm_v2/altomek/YiSM-34B-0rn/7b67e526-7588-4c62-9293-55e77851c4c7.json delete mode 100644 data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json create mode 100644 data/hfopenllm_v2/amazon/MegaBeam-Mistral-7B-300k/8bc96d6d-0cd7-49c4-8112-7d8fb1c45199.json create mode 100644 data/hfopenllm_v2/amd/AMD-Llama-135m/6751a200-0bd9-498e-a991-ebe22375633d.json create mode 100644 data/hfopenllm_v2/amd/AMD-Llama-135m/f41442e3-5aa7-4ca4-9e61-a5e13965a3e4.json create mode 100644 data/hfopenllm_v2/anakin87/gemma-2b-orpo/b105b62a-ce77-4387-b679-1adf2782b2f4.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v1-72b/72180fd7-bf34-4758-b02f-7d11859700c7.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v2-12b/ac5aaa9c-79ab-4082-b8c5-084fba3e122a.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v2-72b/2d266d7f-8edd-40fd-adfc-597a7742167b.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/484ccbf2-87e2-423f-9de4-a4bd54291b54.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/4de79504-f9e8-4235-9aad-d38f0799e081.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-34b/b4bde9d8-f50c-448c-ada4-5bc05f302c04.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/5da3240b-b5e3-4333-ba61-925343b56043.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v3-9b-customgemma2/d6727b7d-cdf3-48d5-8e30-484e86ad60b6.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-12b/15b86bbf-8d3b-474b-98f0-abb3972a7271.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-22b/c0b339f6-4a46-46eb-b2d0-945176afe676.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-27b/79367289-6245-4bf0-99e9-42bc3ff7649c.json delete mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json create mode 100644 data/hfopenllm_v2/anthracite-org/magnum-v4-9b/c3ec5505-1086-446a-9739-523810e93d13.json delete mode 100644 data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json delete mode 100644 data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json create mode 100644 data/hfopenllm_v2/apple/DCLM-7B/c6c5e462-d373-4536-afc3-b740fb7e300f.json delete mode 100644 data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json create mode 100644 data/hfopenllm_v2/appvoid/arco-2-instruct/b7537abe-8177-4206-999f-5bb7e95c72c8.json delete mode 100644 data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json create mode 100644 data/hfopenllm_v2/appvoid/arco-2/eb2f6159-e37e-46db-9419-6a66cb7e539e.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json create mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Blitz/0b2d0a06-2907-4258-be33-1591e18ac6a2.json create mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/0284d867-45c4-4fe4-883c-8e3ea169d66c.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json create mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Nova/1a2da513-104e-4074-b3b7-601ab11bf6d8.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json create mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Spark/189db16b-5e78-439f-9f79-6eec979c3a79.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json create mode 100644 data/hfopenllm_v2/arcee-ai/Arcee-Spark/d751f1c5-5505-4c12-8d51-091538b49949.json create mode 100644 data/hfopenllm_v2/arcee-ai/Llama-3.1-SuperNova-Lite/b6f9144f-57a0-4c18-9e52-ffccf2d8ca9c.json create mode 100644 data/hfopenllm_v2/arcee-ai/Llama-Spark/67dc7fb2-1455-4f60-9dcb-59a8197741d7.json delete mode 100644 data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json create mode 100644 data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7f4ab590-29fa-473a-b617-00135dd1d6ee.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json create mode 100644 data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/d67db62e-e21d-43c8-8b4c-bfa353e47636.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json create mode 100644 data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/85abff46-8ae5-4a75-9522-721793224363.json create mode 100644 data/hfopenllm_v2/arcee-ai/Virtuoso-Small/1736bbd8-4457-4d55-8c0b-0ae6e001ee62.json delete mode 100644 data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json create mode 100644 data/hfopenllm_v2/arcee-ai/raspberry-3B/4777e427-8d17-4e06-8cbf-0883c95bbfd8.json delete mode 100644 data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json create mode 100644 data/hfopenllm_v2/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4df0b890-d4c5-408e-8994-88f7383e9235.json create mode 100644 data/hfopenllm_v2/argilla/notus-7b-v1/76a5a59d-f5fd-4fb0-849e-7db7772b555a.json delete mode 100644 data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json delete mode 100644 data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json create mode 100644 data/hfopenllm_v2/argilla/notux-8x7b-v1/6c8399d0-01ce-45cb-a20f-a49e4e760a1e.json create mode 100644 data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/92c2c5ee-dfa2-4db3-8401-887d02cc21dd.json delete mode 100644 data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json delete mode 100644 data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json create mode 100644 data/hfopenllm_v2/arshiaafshani/Arsh-V1/b40ef568-f277-4d5c-87cd-53feaa71598b.json create mode 100644 data/hfopenllm_v2/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/893d5149-c535-41c7-8a1a-26bb6b33e407.json create mode 100644 data/hfopenllm_v2/ashercn97/a1-v0.0.1/0b649ed5-5af4-4910-b853-2408e3b58f1f.json delete mode 100644 data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json delete mode 100644 data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json create mode 100644 data/hfopenllm_v2/ashercn97/a1-v002/5c8edeba-5c65-4168-b67e-02143acbcafb.json create mode 100644 data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/67e657ef-d602-4f58-b898-874a22f4a009.json delete mode 100644 data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json create mode 100644 data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/53d2bf07-689a-4e69-a534-b288313c8481.json delete mode 100644 data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json delete mode 100644 data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json create mode 100644 data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/34d6a184-d4d5-4609-8305-c0e2ee1c585b.json create mode 100644 data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/39b627ab-3e64-42f7-a88d-abe5764fcf4d.json delete mode 100644 data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json create mode 100644 data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-2/d8467b15-8a03-4cde-9fc5-5c08bdabb6c6.json create mode 100644 data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-4/85bc5976-0d40-4416-bbf8-9b1dbf372343.json create mode 100644 data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-3-over-8/8c7e8e64-672e-4c7e-a808-a49f1792d3a8.json create mode 100644 data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-5-over-16/de8651eb-16d1-46ee-a1df-b8c72caaf205.json create mode 100644 data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-7-over-16/6a744db8-814f-4e8e-b6e5-0d096267dfa5.json create mode 100644 data/hfopenllm_v2/aws-prototyping/MegaBeam-Mistral-7B-512k/028b7c37-770e-4356-a7c6-0cc74650d5fd.json create mode 100644 data/hfopenllm_v2/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3b399c64-922a-48ba-9a25-862102749647.json delete mode 100644 data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json create mode 100644 data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/d5e46a11-3e81-457d-9d26-9fd17f96f076.json delete mode 100644 data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json create mode 100644 data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/b3abfbc1-911a-43b7-a338-efb25f746f9d.json delete mode 100644 data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json create mode 100644 data/hfopenllm_v2/baebee/7B-Cetacea/6b471ee0-9444-45ff-92cf-da624aa59bf6.json create mode 100644 data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/b56bd924-0a63-4ca2-8f2f-97b581e47a36.json delete mode 100644 data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json delete mode 100644 data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json create mode 100644 data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/bfe9098d-7207-4f8c-9a3f-549a29303b5f.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/7856172d-ec3e-4e71-befe-54952478e330.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/a68aada5-61bd-4a4c-a8e1-b9a2ace349df.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/9d19c44f-4912-4c95-ab3f-2dddb055d932.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/6cef3550-27d7-4073-b4bb-0f19a2c5f553.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/08ab8f6a-9aaf-4ab4-ada3-eb4a75f46995.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/622f9379-6a30-43ba-a7a8-fbd08c484fa5.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/24f728e6-de5e-44cc-8b6d-51e0065c1475.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/c3b2bf18-d355-40fc-a862-376c1b988305.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/79474be5-2587-4087-a2cc-1337e3b696dd.json create mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/22ff2700-70c0-459e-96a2-0ce1710947bc.json delete mode 100644 data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json create mode 100644 data/hfopenllm_v2/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/7d3a47a3-83d3-4f51-ab72-6a2fa5b5ef80.json create mode 100644 data/hfopenllm_v2/bamec66557/NameLess-12B-prob/69dc0f8e-16d7-4907-9741-484eafa62b8c.json delete mode 100644 data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/e516abc1-9c3c-4921-a385-e2533d45fed3.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/8baa5832-cc07-4a31-a815-0e8151426ea6.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/509fbca4-f405-4c27-85a9-1eea59025070.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/6f45ed56-6bec-4439-9adb-e79fcd74667c.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/512ff924-c1d3-4d75-a468-2bcdcda25cf6.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/86b561ae-c4d3-4293-a884-bcab26df026d.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/516d1972-9731-4234-a4b3-b96423ebba5c.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/274f6e02-c81f-4f2e-9747-e5de5cee1933.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/61638b55-296b-40fd-a39f-cc2276d9f94a.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/11c1b6fe-4815-415b-a4a8-d14073df6ee1.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/88e2cb24-288e-4f37-8753-f0daa825051c.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/8a1a6c44-17fd-402e-a22e-e795a1f612e3.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json create mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/1121af0b-61fe-424a-bc66-3164bcb1d833.json delete mode 100644 data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json create mode 100644 data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/35300d67-7ee1-4874-b351-87f46267cec9.json delete mode 100644 data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json create mode 100644 data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/6180b7b3-4b21-42aa-a62d-084a91568b43.json delete mode 100644 data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json create mode 100644 data/hfopenllm_v2/belztjti/dffghgjh/7414d344-0e67-424a-9e16-00de0487ce02.json delete mode 100644 data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json delete mode 100644 data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json create mode 100644 data/hfopenllm_v2/belztjti/dtfgv/f5fcd407-080c-4cb7-a299-7a7f919c734d.json create mode 100644 data/hfopenllm_v2/benhaotang/phi4-qwq-sky-t1/efe03731-6021-4dcf-b7fe-24cbf2d60fac.json create mode 100644 data/hfopenllm_v2/beomi/gemma-mling-7b/6ffed624-cc22-4b62-a447-3c02b0e43ded.json create mode 100644 data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/ed867fa8-be8a-49b0-8c94-38085808b58b.json delete mode 100644 data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json create mode 100644 data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/c8b9a56b-0933-4085-8d5f-a1d8294699db.json delete mode 100644 data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json create mode 100644 data/hfopenllm_v2/bfuzzy1/Gunny/9b178661-ed9a-427d-b93c-b905b8089ad8.json delete mode 100644 data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json create mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-c/69588e07-7559-49c2-9423-19fd143e42f7.json delete mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json delete mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json create mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-d/317589da-d673-4f90-93e9-59983f2ef54b.json create mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-m/efab322e-ea15-4fe7-9bfc-15246003e59c.json delete mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json create mode 100644 data/hfopenllm_v2/bfuzzy1/acheron-m1a-llama/b1eac68e-b292-414b-9594-c921f8e10818.json delete mode 100644 data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json create mode 100644 data/hfopenllm_v2/bfuzzy1/acheron/b7d08c65-8219-4067-9504-99e438a86038.json delete mode 100644 data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json create mode 100644 data/hfopenllm_v2/bfuzzy1/llambses-1/e9c5b479-0dce-4de3-84d6-90c7515337f1.json create mode 100644 data/hfopenllm_v2/bhuvneshsaini/merged_model/3c766465-29db-4b3d-b42f-a3222b38a096.json delete mode 100644 data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json delete mode 100644 data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json create mode 100644 data/hfopenllm_v2/bigcode/starcoder2-15b/e6c85677-61ed-475b-85a5-48b91ec76bcf.json delete mode 100644 data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json create mode 100644 data/hfopenllm_v2/bigcode/starcoder2-3b/7b68fa5e-dbbf-4542-8767-6874aabf8f40.json delete mode 100644 data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json create mode 100644 data/hfopenllm_v2/bigcode/starcoder2-7b/c103b7f4-a432-42d6-86ef-cb369e0c16ff.json delete mode 100644 data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json create mode 100644 data/hfopenllm_v2/bigscience/bloom-1b1/643dda41-37d0-4c1e-b856-58b774612886.json delete mode 100644 data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json create mode 100644 data/hfopenllm_v2/bigscience/bloom-1b7/ba2f284b-d7c6-4748-a8dc-4f80caa30c6c.json create mode 100644 data/hfopenllm_v2/bigscience/bloom-3b/16e30aa0-736a-4ef8-8ba6-78285b84546f.json delete mode 100644 data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json create mode 100644 data/hfopenllm_v2/bigscience/bloom-560m/73eb729d-adfd-4dee-9bde-04a31f5528f6.json delete mode 100644 data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json create mode 100644 data/hfopenllm_v2/bigscience/bloom-7b1/0daad2ae-92d0-4522-a067-20332f72c96f.json delete mode 100644 data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json delete mode 100644 data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json create mode 100644 data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/a3e3849f-a289-4132-b4a8-f67d67ad46a1.json delete mode 100644 data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json create mode 100644 data/hfopenllm_v2/bond005/meno-tiny-0.1/59a9ed26-a67a-4e76-8858-520400c90766.json create mode 100644 data/hfopenllm_v2/bosonai/Higgs-Llama-3-70B/6c5c61b4-8037-4b28-8616-1aefa7963eb8.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/e9f9b836-fbdf-4996-9b35-2c8145a7f01b.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/5b3dae43-5d5c-4d19-bd47-5c0f68ecbb81.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/d5b31b1f-ace0-457f-bf8a-9041398b8344.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/b34702cf-ffb8-4e75-9c9b-f5c52623d4c8.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/c701f1fd-166d-416b-8f78-edf17f2fecd4.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/4217b403-e924-4f67-9b0e-ad1d4ed293a1.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/03816e41-5fb8-4815-ab9c-4108ab19a3bc.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/a763b10e-350a-4342-ade3-b782437ca3e2.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/9e806fd2-edbf-40e2-a008-834cee537bb6.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B/fbcf861c-62db-4079-bba6-becd4e231216.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/22b591c0-3386-4bd5-860c-20c0c6001986.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/dfb9a9c4-114e-4188-9940-4d6df7e4815f.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/38fd5f4d-0f3c-4dc2-b250-a9ee7090aac2.json create mode 100644 data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B/e53cbc94-fc9f-4d53-ae28-26bc8c2caef8.json create mode 100644 data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/2165e69a-c50c-419a-932e-909f53b73b71.json delete mode 100644 data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json create mode 100644 data/hfopenllm_v2/braindao/Qwen2.5-14B/46430a07-15c8-4727-9102-2f471d4f1d3c.json create mode 100644 data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/3c7f540a-c850-4e20-ad93-60e021d17133.json delete mode 100644 data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json delete mode 100644 data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json create mode 100644 data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/c3ab4f38-6f7b-4589-ae4f-21ace05b8c44.json create mode 100644 data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/2708c0d6-03e7-4a17-b6b9-e16f3ddcf5bb.json delete mode 100644 data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json create mode 100644 data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6427a5ef-8508-430d-970d-054fc485e754.json delete mode 100644 data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json create mode 100644 data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/08984ad9-1e9b-4916-b214-af26dadfcc0b.json delete mode 100644 data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json create mode 100644 data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/1dbb5d03-fdfa-4059-9d50-d037ada6b1ac.json delete mode 100644 data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json create mode 100644 data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/6bf42faa-c3e9-4069-bf93-ffd626062f0f.json delete mode 100644 data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json create mode 100644 data/hfopenllm_v2/bunnycore/Best-Mix-Llama-3.1-8B/9feccbdc-18eb-4077-b50b-986db0047fc8.json delete mode 100644 data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json create mode 100644 data/hfopenllm_v2/bunnycore/Blabbertron-1.0/a074c33f-782a-409c-987b-7dd62c65ccc7.json create mode 100644 data/hfopenllm_v2/bunnycore/Blabbertron-1.1/2f2c0dea-dcd4-4e54-9f40-9fda4b91bd40.json delete mode 100644 data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json create mode 100644 data/hfopenllm_v2/bunnycore/CyberCore-Qwen-2.1-7B/84481fee-3727-427b-912a-30e2744df28a.json create mode 100644 data/hfopenllm_v2/bunnycore/DeepQwen-3B-LCoT-SCE/aaa801dc-1a47-4009-9ad4-7129a8d4e651.json create mode 100644 data/hfopenllm_v2/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/3ac92cbf-c85b-4e00-9ef9-4322f961591a.json create mode 100644 data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/162b511b-4684-4595-9261-a33f3a4117f9.json delete mode 100644 data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json create mode 100644 data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/20d5d59a-028d-4e34-9414-d9edaf2e59b8.json delete mode 100644 data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json create mode 100644 data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/a21b53fb-783b-440b-9f3d-d8ada3bd18ea.json delete mode 100644 data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json delete mode 100644 data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json create mode 100644 data/hfopenllm_v2/bunnycore/FuseQwQen-7B/0d2ab1e8-a2d7-45cf-b123-67bcab2d9dff.json create mode 100644 data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.1/6b4a37c8-c7e6-4156-9d6d-8cba51b74d82.json create mode 100644 data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.2/78582fec-2f69-4b37-8497-12ceb097b44b.json create mode 100644 data/hfopenllm_v2/bunnycore/Gemma-2-2B-Smart/949bf65e-c2ae-4701-82f0-39d0c62a0e87.json create mode 100644 data/hfopenllm_v2/bunnycore/Gemma2-9B-TitanFusion/8812151c-4301-4131-a414-d64d025e476e.json create mode 100644 data/hfopenllm_v2/bunnycore/HyperLlama-3.1-8B/2db1542f-a8da-4fb8-91a5-6dd1a942b55e.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-Mix/9feeffb2-3763-4e43-933e-89100b76f7fa.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-v3/721102b5-ed5e-4631-8600-a6adfff0c784.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-All-Mix/18c185f7-5ca4-46ff-81c2-6c538f096409.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Bespoke-Thought/7ab5911c-e229-43e5-a798-095287d0a597.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Booval/f800c4e5-e918-45bb-8a12-3ca2a64c6b23.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/5fcf41bc-30dc-46a7-9cf2-4ce2c7a5850c.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/d4b20ef4-734e-40a7-818e-f77e170d7437.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Della/e0996c96-c9e5-4d39-8e6d-1455ef1f9544.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Long-Think/3ad2b31e-ce2a-4cb4-9b85-79cdebd5d364.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Mix-Skill/9aff874c-1953-4b97-9bff-9e6120b0bfa7.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlus/45ae7f45-8c36-46c6-989d-bc672cdf8eff.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/7d36e44e-a329-4b96-a891-365ad900f718.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RP-DeepThink/a8c26325-1eec-43a6-a8ad-3bcb2e378924.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RRStock/bde1a879-6852-42ce-9217-f427af85a46a.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ToxicKod/dd7a0377-f4d6-4390-b9f2-bf50b05ec0f7.json create mode 100644 data/hfopenllm_v2/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/12cbf241-d6d4-4d25-ad3d-13a42d7adc74.json create mode 100644 data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/1f66fd7c-40ee-4249-8963-5c7bb93a3eaf.json delete mode 100644 data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-3.5-mini-TitanFusion-0.1/7076406b-7e0a-49c7-8150-2e6a243aa23b.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v2/96c3fd80-a601-4629-a1ab-bf7f366a909a.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v3/1302c9a5-d35c-400c-b9f3-d990243e5d59.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v4/c7f48bbf-6583-4ddd-ae4d-671c43218dae.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock/5f07e092-2eb0-44c2-b2ce-5f1b31a9ea99.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-RP-v0/15701682-97ce-46cf-8010-a6bdeaf8c7aa.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-RR-Shoup/c6eecf0b-fa16-484a-8eeb-d196203b3c3e.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-RStock-v0.1/4337b1c1-cc00-4a15-8148-e8d0739561b9.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-ReasoningRP/1151ee14-8fe9-4f97-808d-8103b353c2ec.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Sce-exp-v0.1/a2c18179-aca3-422c-b9f5-8345109cea13.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Stock-Ex/07495d34-1505-45a9-bb48-887af0da8a0c.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Stock-RP/567baf6d-99f9-46a5-8c40-c6899986f1ff.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-4-Trim-Exp1/a337df3a-28ff-46c9-adae-4bc029937101.json create mode 100644 data/hfopenllm_v2/bunnycore/Phi-Seek-4-Sce-V1/b201a849-44e9-4598-918b-ffa27c894ee9.json delete mode 100644 data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json create mode 100644 data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/dd87ebf3-3088-43b1-851c-a97d12a68ea8.json create mode 100644 data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/1b3ef805-8b0c-44bf-b048-773a0dd94d0d.json delete mode 100644 data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json create mode 100644 data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/220cb478-58c0-4028-b51a-ec5fe1050746.json delete mode 100644 data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json create mode 100644 data/hfopenllm_v2/bunnycore/QandoraExp-7B/17cb8ab1-e7ba-4daf-95d4-2cdbd2777434.json delete mode 100644 data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json create mode 100644 data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/2b55023b-b8bc-42a2-aca8-dcaf39890232.json delete mode 100644 data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json create mode 100644 data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/31736569-5992-4b1d-9d66-27a6c1620506.json delete mode 100644 data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/630b37b5-351c-403c-ac76-ccb68ffc5d53.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/69cdef01-30dc-4f75-97fa-9daeebcec72f.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/9aa1acb0-c791-4dea-aa1e-c912cea69466.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/0c1d66f3-8fd7-47f2-8538-a1aa8985aebf.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Exp-Sce/2872dcd9-421b-4346-812c-b27bb32c6e86.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-R1-Stock/2f3e2fc0-f1e0-43cb-8a8c-6aadcc538646.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/d0a76497-84b0-45b9-b748-04ffe9bc13a3.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen-2.5-7b-S1k/185b6560-6790-417f-aeba-f7405fee808a.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-1.5B-Model-Stock/30a8074e-df03-4866-9b8d-a5a7eece3c71.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v2/ac8874ae-d6d6-45d3-aabc-06a3852f68d0.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/bc98b048-18d4-438e-80c4-0cd851798da5.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/c88c011f-0a24-4e78-a104-035d25af2430.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/f9e3c31c-02c0-4f5e-ad4f-3be0801a0f41.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock/5484405a-2ec8-4515-af75-76a5dd348d3d.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Mix/7dc117b9-c2a2-44c1-8471-f3bc8a116e3e.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker-V2/e2d314dd-b5b3-49b5-8e64-1e3464f4b963.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker/7ecb453b-1ba7-44ec-abfd-1f8be4c817fd.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-CyberRombos/d0a70e95-fc72-41c6-ac42-09b8f379b566.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Fuse-Exp/e2ef8ea6-b464-445e-81df-ef0779c1d0d4.json delete mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/f3d7cca2-141c-4b84-abc4-396ad2d59e3c.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/e3f48d7a-c8a3-4e75-99d6-7f2946696b12.json delete mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/3feb9449-49a2-427f-a317-c21e6d1ca66c.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-V0.1/6359e37e-0405-436b-903c-8f0e740dd6c7.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/f5daed76-f6e5-4a7d-84d7-80537a046b83.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/03af2b1d-989f-4afc-ab13-8793093b9c50.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/5db7ec54-7feb-4c11-b2e0-042226ba1f94.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M/f1f5615d-8a78-43c9-b5c6-edc180252381.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-ID/9c89bf8f-4b8a-4c01-8685-fafc687c673e.json create mode 100644 data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Sky-R1-Mini/58b69c0f-826d-414f-915e-dd0b78d9298c.json create mode 100644 data/hfopenllm_v2/bunnycore/QwenMosaic-7B/101ea548-2ffe-4f47-b3b5-5fbe9a3854b4.json create mode 100644 data/hfopenllm_v2/bunnycore/Smol-Llama-3.2-3B/259c4798-ff03-4f58-8fb4-59150710212b.json delete mode 100644 data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json create mode 100644 data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/f731caa1-f777-494a-8490-da0c815f0708.json delete mode 100644 data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json create mode 100644 data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/d4d25d38-b21a-490e-9ca9-556504ec00ea.json create mode 100644 data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/75bb85a3-40bb-4630-95a0-50e40b008412.json delete mode 100644 data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json create mode 100644 data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/bb44f3ef-eefa-48ef-a257-2eb345c89a00.json delete mode 100644 data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json create mode 100644 data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/2dcf1771-3dbe-43ad-974c-54e2e2860bcc.json delete mode 100644 data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json delete mode 100644 data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json create mode 100644 data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/caa0c8df-5488-4bf9-a5b8-0fff831e6732.json delete mode 100644 data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json create mode 100644 data/hfopenllm_v2/c10x/Q-Pluse/c6f8e581-e849-4e28-b3a6-1838ee522770.json create mode 100644 data/hfopenllm_v2/c10x/longthinker/f0c361a1-a3ac-4415-ab5d-069bdf27e7a3.json delete mode 100644 data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json create mode 100644 data/hfopenllm_v2/carsenk/flippa-v6/44129be7-f73d-4580-8375-e8ef324e73a8.json delete mode 100644 data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json create mode 100644 data/hfopenllm_v2/carsenk/phi3.5_mini_exp_825_uncensored/2925ecde-a9a5-4369-b391-d23a8605d35c.json create mode 100644 data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/8409e464-fd16-4b41-b533-2f6cae4fe894.json create mode 100644 data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1/86f6c6eb-8b08-4e6c-a1bc-0d941a00f10b.json create mode 100644 data/hfopenllm_v2/cckm/tinymistral_950m/aa2e6df7-a0b0-42f7-8057-e2763fc34834.json create mode 100644 data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/2bf9a06e-f3bf-4b55-804b-e553a722e0de.json delete mode 100644 data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json create mode 100644 data/hfopenllm_v2/chargoddard/prometheus-2-llama-3-8b/b380a675-39ea-4950-ad0a-d9771f09ddde.json create mode 100644 data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/482358eb-7d3b-4de0-b5d9-451308f104e2.json delete mode 100644 data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json create mode 100644 data/hfopenllm_v2/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/ef04a83d-7b89-43ec-ba33-30e1006422dc.json create mode 100644 data/hfopenllm_v2/cjvt/GaMS-1B/7b64cf2e-c7c6-4b48-8e51-ea2aa0914145.json delete mode 100644 data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json create mode 100644 data/hfopenllm_v2/cloudyu/Llama-3-70Bx2-MOE/52c8e3f4-1063-4d9c-80d9-fdd0a72fc98e.json create mode 100644 data/hfopenllm_v2/cloudyu/Llama-3.2-3Bx4/1f4a827d-31cd-42e6-871d-7c0cad010f58.json create mode 100644 data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/56d6d99c-fba1-42e7-aad4-631370b44da3.json delete mode 100644 data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json create mode 100644 data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/006a0ac7-d6c3-42c1-b0cc-6a0bfe74f884.json delete mode 100644 data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json create mode 100644 data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/33a82686-6202-4a4d-ba34-bd4537105e5f.json delete mode 100644 data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json create mode 100644 data/hfopenllm_v2/cloudyu/S1-Llama-3.2-3Bx4-MoE/38d45554-44bd-4b40-b7c9-c0b7ba44b862.json create mode 100644 data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/37d7e3ab-db9c-4ad7-81d1-933c030a6250.json delete mode 100644 data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json create mode 100644 data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/9cc49b3c-4e51-4f67-92ea-4ac8a3cbed43.json create mode 100644 data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/b6bd8515-4c95-40ce-b2d5-af8873d261ab.json create mode 100644 data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/d102e75d-3e20-482b-a243-bae3ec44e2bb.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.1-8B/68920da1-af71-4ccd-88b9-554e3c72c4dc.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.2-1B/c0eb144f-c726-4a80-bce9-384fb7a641a7.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/0b26f82d-36f6-4fd0-a0fd-05e4a1368a6e.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8fe4360a-0924-4386-b4cd-89069f7ff55f.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9-llama3-8b/eeeb082b-7112-4a08-a87a-b2c9ae37efff.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-llama-3-70b/b8f933e9-867f-4934-9648-371d1e632116.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/8d225023-4b7e-48cd-ae67-6d00b541f17d.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/ee3b45e7-a5d6-4fa8-8abd-f6a77d5a6d5b.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/177ef040-da5c-4a65-adac-efdc555bd110.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/e9dc8337-eb35-4eb9-bca7-30ec1cd44092.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/f4549a39-0b28-4e06-998a-774f5f02cfba.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-72b/a79af78a-adab-406f-995a-adb3893e1510.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-7b/4e8e457a-85eb-4afb-a9fe-8f8ce6eaf4d7.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/eeb3a10a-d584-414a-90de-e018c47615c2.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/e83dadb0-5092-48b8-b408-e6bb1ac8a0ba.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/cebc7767-fbc9-45a2-808b-51e1a4f0f35c.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-gemma2-2b/b64b6416-b18b-47cc-a516-c613cd670b37.json create mode 100644 data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/64e96d56-72a9-413f-8903-45821b98f71e.json create mode 100644 data/hfopenllm_v2/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/a3f44cfd-d1fc-4a3c-aa5b-a0f37fc4a192.json delete mode 100644 data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json create mode 100644 data/hfopenllm_v2/cpayne1303/cp2024-instruct/79314f48-d92b-4992-b3c6-d31278c0867a.json delete mode 100644 data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json create mode 100644 data/hfopenllm_v2/cpayne1303/cp2024/5a007612-c8e7-4f6b-baa9-a21af7e908c6.json create mode 100644 data/hfopenllm_v2/cpayne1303/llama-43m-beta/fdefdd3e-2d83-4430-bd95-e16a1935dff1.json create mode 100644 data/hfopenllm_v2/cpayne1303/llama-43m-beta/ffdd45bf-3409-4b92-909a-25a32ba27f82.json create mode 100644 data/hfopenllm_v2/cpayne1303/smallcp2024/a78ab8ac-2c2e-405a-95ee-0d1d27cf533b.json delete mode 100644 data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json delete mode 100644 data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json create mode 100644 data/hfopenllm_v2/crestf411/MN-Slush/d9d49bf7-f6f0-4c25-9182-d815454940e3.json create mode 100644 data/hfopenllm_v2/cstr/llama3.1-8b-spaetzle-v90/deb48e93-0378-482f-8a5d-7ec350497e0b.json create mode 100644 data/hfopenllm_v2/cyberagent/calm3-22b-chat/302a9a47-8603-42d9-85fb-64c60e7c6f44.json delete mode 100644 data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json create mode 100644 data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/28d52801-3998-421f-a37a-2b7b677d0eaa.json delete mode 100644 data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json create mode 100644 data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/32b4e23b-9430-45a8-bfa2-eea2e89792c4.json delete mode 100644 data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json create mode 100644 data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/0336e168-e313-44cb-a030-42e6d20e92df.json delete mode 100644 data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json create mode 100644 data/hfopenllm_v2/databricks/dbrx-base/11bd8b5b-2ea4-4ec5-8fe6-654aedb40fc9.json delete mode 100644 data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json delete mode 100644 data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json create mode 100644 data/hfopenllm_v2/databricks/dbrx-instruct/6d97749c-3bfa-4c32-b581-a5e2b73303f3.json delete mode 100644 data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json create mode 100644 data/hfopenllm_v2/databricks/dolly-v1-6b/ec58907d-b67c-467e-a3dd-b9f9c10138f0.json create mode 100644 data/hfopenllm_v2/databricks/dolly-v2-12b/a7f09a3d-025c-48fa-9358-863b9ae382b1.json delete mode 100644 data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json delete mode 100644 data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json create mode 100644 data/hfopenllm_v2/databricks/dolly-v2-3b/bf2be2d5-58de-4550-b733-a5910bded48d.json create mode 100644 data/hfopenllm_v2/databricks/dolly-v2-7b/52b32c1f-6189-4850-b3f4-de442eb2ccb5.json delete mode 100644 data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json delete mode 100644 data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json create mode 100644 data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/87b44160-c3dd-452d-8c15-c4f758f8db7b.json create mode 100644 data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/3e6814d3-54ea-493f-a9fc-85ae9eed1b05.json delete mode 100644 data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json create mode 100644 data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/35b7ff42-3825-4240-97bf-f8af7e8c23ff.json create mode 100644 data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c108173e-1582-4c99-9291-46986d7ba1cf.json create mode 100644 data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6feb08b0-1c67-4fe2-a001-0b3b84529687.json create mode 100644 data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d4ab3df2-109a-4eec-9742-dc3bb79d5a58.json create mode 100644 data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/53ec995e-bcfd-4a72-bd9a-45d14da3f219.json create mode 100644 data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/299a0397-89c7-4329-9599-9fc29a52db87.json create mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/41adbc32-6cdf-49ba-980c-6eb6f722b40b.json delete mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json create mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/4236ece5-f2b2-44e7-9503-9731bff20155.json delete mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json create mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b33d672c-4a96-4093-bc13-25c42303b918.json delete mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json create mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/2b4f42fc-8b25-481c-98f7-911c52fdd242.json delete mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json create mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/634b7a64-2bd3-48b8-b2f4-a93189801850.json delete mode 100644 data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json delete mode 100644 data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json create mode 100644 data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/72a4bcc3-9dfc-4268-be4e-cda5837a3da2.json create mode 100644 data/hfopenllm_v2/dfurman/Llama-3-70B-Orpo-v0.1/78fa85f6-baff-4d95-ad3a-a0663f51b0a0.json create mode 100644 data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/359231a5-6eb9-4f73-a6f1-d7fd7f35c7ed.json create mode 100644 data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/79b81e37-f75e-4b18-b145-73c42625ced5.json create mode 100644 data/hfopenllm_v2/dfurman/Qwen2-72B-Orpo-v0.1/2d99af7a-f67c-4e74-9ba2-f1401dfdf9fb.json create mode 100644 data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/315fa815-fab0-47c9-8185-00bc597c0176.json delete mode 100644 data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json create mode 100644 data/hfopenllm_v2/dicta-il/dictalm2.0/0c1686db-b396-4ecf-86f1-e4e092491acd.json delete mode 100644 data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json create mode 100644 data/hfopenllm_v2/distilbert/distilgpt2/57455fbc-b5a9-4a3b-9a30-7da0593fd778.json create mode 100644 data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/a8f9d0e6-5a1a-4d09-ac78-47fd586384df.json delete mode 100644 data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json create mode 100644 data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/9d0d4eee-0b87-485c-843f-e32d08aa601b.json delete mode 100644 data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json delete mode 100644 data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json create mode 100644 data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/e47c83ff-9a16-488b-8ccf-4a2fad2b14fc.json delete mode 100644 data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json create mode 100644 data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/8c7e25df-884d-4940-8185-4c1b82fac8c5.json create mode 100644 data/hfopenllm_v2/djuna/G2-GSHT/83611d50-01d0-4642-a104-daf77f1a0fe8.json delete mode 100644 data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json create mode 100644 data/hfopenllm_v2/djuna/Gemma-2-gemmama-9b/5cbdafba-6071-4da1-8b19-3de612e9ff18.json create mode 100644 data/hfopenllm_v2/djuna/L3.1-ForStHS/1c934cba-c94a-4aad-9645-84658e0b5588.json delete mode 100644 data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json create mode 100644 data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/7aad3f6b-89d9-4c9e-9339-cf4111fc37c6.json delete mode 100644 data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json create mode 100644 data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/38d4a8ca-4273-4e6a-8a39-3b5ff20ec461.json delete mode 100644 data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json create mode 100644 data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/3d65fbc2-bf91-479c-a687-e9ef702794fb.json delete mode 100644 data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json delete mode 100644 data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json create mode 100644 data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/650cdbbb-e066-4581-8d61-77aa6a4c402c.json create mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/05d566c5-1810-483c-8ce0-84635b9457dc.json delete mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json delete mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json create mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/37e3456a-92ff-4122-a697-ffbdc1c79555.json delete mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json create mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/70c908d4-f1bf-4553-9bf7-95eb593b4853.json delete mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json create mode 100644 data/hfopenllm_v2/djuna/MN-Chinofun/2ccc9c20-5414-4286-abcd-ad2b20f8652d.json create mode 100644 data/hfopenllm_v2/djuna/Q2.5-Partron-7B/50f4560a-e172-42b9-b552-437aff158a38.json delete mode 100644 data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json delete mode 100644 data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json create mode 100644 data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/c6a3abac-8a34-4725-915b-c27c3d0bc484.json delete mode 100644 data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json create mode 100644 data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/a8ed68ea-6463-4ff9-9dcd-034080272dec.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/5799ce8b-c00d-49f6-96dc-f7dd057a268c.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0d261023-3e35-4160-98ca-241bbaee927e.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f0454d3b-18b4-488a-94dd-fb24729996c7.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/6bafa7a7-3a2a-4141-9564-a762d1cdb1d0.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Llama3.1-Large/37f20f86-40ba-4f63-b29d-efff6cb0e09b.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Medium/bf0e7ce4-09e9-4879-993a-eb50b2a421d7.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/bcbc29f7-ea03-4dbe-a83e-d4940b2c6bea.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-XLarge-base/cbea8d66-0370-4998-8e3a-06fef0a60f0c.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-XLarge/ca48b670-b82e-46cc-beb9-2fd0f11d3585.json delete mode 100644 data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json create mode 100644 data/hfopenllm_v2/dnhkng/RYS-XLarge2/d37f99f7-f9c3-48b6-84d3-7da5d77f5030.json create mode 100644 data/hfopenllm_v2/dreamgen/WizardLM-2-7B/503c8a24-4ced-4dca-b9df-5733ce89c2ca.json delete mode 100644 data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/5c5283a0-819f-4112-bb90-5277423d9c00.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/b636bc82-1625-49b1-beec-cadaf4e1b1a9.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/00f481c1-0ef0-40bd-bd95-81dc9443a62c.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/7ea22fef-2d79-49ae-bf72-9153a4e239c5.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/64f441df-1781-4d01-b73b-2156413ad403.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/4e3676eb-8607-416e-986a-7098bc192820.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json create mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/2101369c-5042-48f3-a8f2-f9f56e7b6ae7.json delete mode 100644 data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json create mode 100644 data/hfopenllm_v2/duyhv1411/Llama-3.2-1B-en-vi/c4b86264-3725-4742-91f0-3e01f8d965a4.json create mode 100644 data/hfopenllm_v2/duyhv1411/Llama-3.2-3B-en-vi/0308147c-dabb-46bb-8add-d332fcd5a800.json create mode 100644 data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-inst/a9977a0d-e199-488a-a26e-6269806fdb2b.json create mode 100644 data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/56b89ec8-90c5-4e1e-a458-1bb8b5b92be8.json delete mode 100644 data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json create mode 100644 data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id/4185c376-91c6-435d-ae3b-47cd85151049.json create mode 100644 data/hfopenllm_v2/dwikitheduck/gen-inst-1/26e45f5d-1e3d-425f-ba4d-b444dcda7f74.json delete mode 100644 data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json create mode 100644 data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/09be48ce-61f8-4ba9-b082-b9c475fa714d.json delete mode 100644 data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json create mode 100644 data/hfopenllm_v2/dwikitheduck/gen-try1/27417bcb-fb2f-41d2-9dfa-9865a36f38d5.json delete mode 100644 data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json create mode 100644 data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/7b6fc3c2-a67d-450e-858c-fa87be122376.json delete mode 100644 data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json delete mode 100644 data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json create mode 100644 data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/76b86418-5450-48c6-ae56-58a19016d055.json delete mode 100644 data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json create mode 100644 data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/e06594e4-899a-4285-b130-f7b605e5a6b9.json create mode 100644 data/hfopenllm_v2/ehristoforu/Gemma2-9B-it-psy10k-mental_health/9efdc773-a5c7-4709-88c8-96a67d84a742.json create mode 100644 data/hfopenllm_v2/ehristoforu/Gemma2-9b-it-train6/1fcc2f96-afc9-403f-b82e-8e1804506582.json create mode 100644 data/hfopenllm_v2/ehristoforu/HappyLlama1/bee1e134-9a43-441a-b977-522c510dd1ce.json create mode 100644 data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT-Dare/b70e1089-d136-4b2f-a253-f361bcf8cdcc.json create mode 100644 data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT/8b7e9c34-a982-4f4d-b5dc-66a12578601f.json create mode 100644 data/hfopenllm_v2/ehristoforu/RQwen-v0.1/0ccc36d0-f546-46d1-91d3-15a40c7bf6c1.json create mode 100644 data/hfopenllm_v2/ehristoforu/RQwen-v0.2/066abe97-2c6c-4f3b-9e5e-e144f130258a.json create mode 100644 data/hfopenllm_v2/ehristoforu/SoRu-0009/a3af8f77-d915-4482-a2b6-c99744aada4b.json delete mode 100644 data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json create mode 100644 data/hfopenllm_v2/ehristoforu/coolqwen-3b-it/82cc8b37-e242-441e-ac74-1662bcc0a0e2.json create mode 100644 data/hfopenllm_v2/ehristoforu/della-70b-test-v1/1527c8bc-c1ec-45f4-9663-4cffbb808f94.json delete mode 100644 data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json create mode 100644 data/hfopenllm_v2/ehristoforu/falcon3-ultraset/337b8ce8-d697-47f6-94ac-7a420dd7d91b.json delete mode 100644 data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json create mode 100644 data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/3d6ed2bb-5be7-4838-abb7-49754f9c3bfe.json delete mode 100644 data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json create mode 100644 data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/0a6c7056-1bce-479e-84b0-f4eeea0bd3cc.json delete mode 100644 data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json delete mode 100644 data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json create mode 100644 data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/3e236ad8-3828-407f-9076-743b465b8d15.json delete mode 100644 data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json create mode 100644 data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/9e90dcdf-ce2a-4a7c-8b89-6af8b7c2bcfe.json create mode 100644 data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/940d88e9-085b-4065-b8c8-92ebe685deb0.json delete mode 100644 data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json create mode 100644 data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/7fdcd616-2c72-4c44-9646-9c32344bfa0b.json delete mode 100644 data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json create mode 100644 data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-duable4layers-it/9d358f55-810c-4ac1-adc7-83f95bd74c11.json create mode 100644 data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-it/9ba3fe31-772a-4cf7-aa13-3680b6ad51ba.json delete mode 100644 data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json create mode 100644 data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/651a32b1-77fb-4acf-89bf-2d45b684944d.json create mode 100644 data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-it/192c4037-753a-4790-80d0-33c4d277102d.json delete mode 100644 data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json create mode 100644 data/hfopenllm_v2/ehristoforu/moremerge-upscaled/679d66bf-244e-4080-9a42-0a0c6cfdc965.json delete mode 100644 data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json create mode 100644 data/hfopenllm_v2/ehristoforu/moremerge/73b0ca8a-fb16-43eb-a9af-a01219cf6196.json create mode 100644 data/hfopenllm_v2/ehristoforu/phi-4-25b/7f00ecbc-fcc8-43ae-867b-cb160e63a80c.json create mode 100644 data/hfopenllm_v2/ehristoforu/qwen2.5-test-32b-it/a8238bd4-3982-4e45-92e4-bab77e528e29.json create mode 100644 data/hfopenllm_v2/ehristoforu/qwen2.5-with-lora-think-3b-it/f87f9f08-e989-4e99-a254-a3650e7ab1b6.json delete mode 100644 data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json create mode 100644 data/hfopenllm_v2/ehristoforu/rmoe-v1/f40496a9-fb14-4b2d-8070-84f55e6417f6.json delete mode 100644 data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json create mode 100644 data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/cc52f59d-5669-44b0-b1af-e6fd0836e284.json create mode 100644 data/hfopenllm_v2/ehristoforu/ruphi-4b/67525a37-f658-40e8-89a1-de8bf6275a00.json create mode 100644 data/hfopenllm_v2/ehristoforu/testq-32b/3cb34886-7a93-42b9-a8fa-fab5f4bd8624.json delete mode 100644 data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json delete mode 100644 data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json create mode 100644 data/hfopenllm_v2/ehristoforu/tmoe-v2/0dd1f9fc-cf54-47ff-8ccd-148b45f3c921.json delete mode 100644 data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json create mode 100644 data/hfopenllm_v2/ehristoforu/tmoe/7a05616e-7335-419a-914d-00fb287fe663.json create mode 100644 data/hfopenllm_v2/ehristoforu/trd-7b-it/070a21b5-4cd3-41b7-9653-0d2d2e4f273d.json delete mode 100644 data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json create mode 100644 data/hfopenllm_v2/ehristoforu/ud-14b/5afc044a-3138-443f-89cf-74f1272cc632.json delete mode 100644 data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json delete mode 100644 data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json create mode 100644 data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/a6c1d914-647c-46b7-b0e1-712b8d506780.json create mode 100644 data/hfopenllm_v2/ell44ot/gemma-2b-def/43f35eac-0946-42f9-a128-eb8011c29588.json create mode 100644 data/hfopenllm_v2/euclaise/ReMask-3B/04c22be7-2cf4-4774-b479-863199c7c3a4.json delete mode 100644 data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json delete mode 100644 data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json create mode 100644 data/hfopenllm_v2/eworojoshua/vas-01/fc3d436b-ec61-4458-a3c6-1df41057ea70.json delete mode 100644 data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json create mode 100644 data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/e3ed157f-f306-40fb-b3a1-d3434236759e.json create mode 100644 data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/8793b3e3-f409-499a-81f8-c250c8092841.json delete mode 100644 data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json create mode 100644 data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/33572f63-15ba-4fbc-b1cf-56b978384d02.json delete mode 100644 data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json create mode 100644 data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/44c636ba-8303-4d75-bcb5-46e3c07a991a.json delete mode 100644 data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json create mode 100644 data/hfopenllm_v2/experiment-llm/exp-3-q-r/0a002444-3e5a-4fc8-acc6-72210a4181a9.json delete mode 100644 data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json delete mode 100644 data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json create mode 100644 data/hfopenllm_v2/facebook/opt-1.3b/bbf936a5-3594-4d0a-b5af-7a01740d0c81.json create mode 100644 data/hfopenllm_v2/facebook/opt-30b/1164abea-4cc2-46a7-a44b-f024a2ce40b4.json delete mode 100644 data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json create mode 100644 data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/bfd88bec-fcc2-4580-a5c7-4792a0300a5b.json delete mode 100644 data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json create mode 100644 data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/7f49e582-a01f-481f-8345-1c384fc8b567.json delete mode 100644 data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json create mode 100644 data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/10937ed1-56e2-4aad-b717-5125bc8ac72a.json delete mode 100644 data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json delete mode 100644 data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json create mode 100644 data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/f4622539-c0ac-4e9f-86d4-00e3c826d03b.json delete mode 100644 data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json create mode 100644 data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/6b13b2b1-68cd-4aae-8f2b-2400f40760d7.json create mode 100644 data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/5b02726c-ba3f-482b-9f10-87b8d69ffeb4.json delete mode 100644 data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json create mode 100644 data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/21d6f2dd-7bd6-42a9-b14e-c25777497890.json delete mode 100644 data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json delete mode 100644 data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json create mode 100644 data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/d0bc11cb-56ff-4c77-9446-e76e550e0919.json delete mode 100644 data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json create mode 100644 data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/ff78dc97-e9cf-4215-a607-3e80892af82c.json create mode 100644 data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/0ff1c6ff-5404-4d61-b6c6-f6ef7ae9ca8b.json delete mode 100644 data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json create mode 100644 data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/48837141-2556-4658-87e0-bb88cfcd562a.json delete mode 100644 data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json delete mode 100644 data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json create mode 100644 data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/f2d6da5d-3685-43de-8ceb-5b798f88e24c.json delete mode 100644 data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json create mode 100644 data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/9ec02ccd-329a-4d62-9f04-87de6fda5011.json create mode 100644 data/hfopenllm_v2/fblgit/juanako-7b-UNA/781d0332-e332-4ff7-8585-9c2d8395a147.json delete mode 100644 data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json delete mode 100644 data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json create mode 100644 data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/d6dd460e-c352-4d31-8941-183c6eabd0a7.json delete mode 100644 data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json create mode 100644 data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/66bf6442-04ea-437b-88c4-e61afc6f7139.json create mode 100644 data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/0d1911f5-a2e7-4511-a8d8-098cbf9207df.json delete mode 100644 data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json delete mode 100644 data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json create mode 100644 data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/abc18648-ef96-4695-94d5-fa14be277431.json delete mode 100644 data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json create mode 100644 data/hfopenllm_v2/fhai50032/RolePlayLake-7B/ff1e7aaa-3f29-4192-a0e0-80fcd11ba055.json create mode 100644 data/hfopenllm_v2/fhai50032/Unaligned-Thinker-PHI-4/cc8ef5bd-957f-4308-9539-00a696182056.json create mode 100644 data/hfopenllm_v2/flammenai/Llama3.1-Flammades-70B/abc7652f-b88e-40ba-847c-c99dce9f2719.json create mode 100644 data/hfopenllm_v2/flammenai/Mahou-1.2a-llama3-8B/56e36294-e616-45a1-8dc9-2c14cf3ee8d0.json create mode 100644 data/hfopenllm_v2/flammenai/Mahou-1.2a-mistral-7B/4b81caad-92ed-4bd5-98bd-58582854b5d8.json create mode 100644 data/hfopenllm_v2/flammenai/Mahou-1.5-llama3.1-70B/2cef0040-6d4c-4c38-be40-5477911f3063.json create mode 100644 data/hfopenllm_v2/flammenai/Mahou-1.5-mistral-nemo-12B/4aeef94f-823e-4be5-b4f1-37463e052748.json delete mode 100644 data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json create mode 100644 data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/3d367147-373f-4543-be19-55a6429558a2.json delete mode 100644 data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json create mode 100644 data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/cb93091a-6c46-438a-b111-cbf7e2fac420.json delete mode 100644 data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json create mode 100644 data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/ea6048f1-8be4-4ec8-a5d5-35ff1523d74a.json create mode 100644 data/hfopenllm_v2/fluently-lm/Llama-TI-8B/f4dc1659-800f-49d2-a290-48e9d4b15581.json delete mode 100644 data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json create mode 100644 data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/d4d8a784-5bd5-4437-8e0d-75dcb967ae33.json create mode 100644 data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/91017e73-f33a-49f5-ac87-f6e6a178d885.json delete mode 100644 data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json delete mode 100644 data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json create mode 100644 data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/b7a75bca-6afe-448a-8e5c-53ebd577c964.json delete mode 100644 data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json create mode 100644 data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/8cdced5c-23bc-4426-a0c9-b9bf82913683.json delete mode 100644 data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json create mode 100644 data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/368784c8-6fc2-4340-8277-a6a9a9800a99.json delete mode 100644 data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json create mode 100644 data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/f7ddf26b-4b4c-404b-b9d3-6ceaf78d39aa.json delete mode 100644 data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json create mode 100644 data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/f423b0d1-3536-4865-9615-f89b9d15b14c.json delete mode 100644 data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json create mode 100644 data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/c7e8333d-1d79-4cfa-9833-fa42f9fcbb4b.json create mode 100644 data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/b6149d15-3e0f-43d2-ae90-eca290a94edb.json delete mode 100644 data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json delete mode 100644 data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json create mode 100644 data/hfopenllm_v2/formulae/mita-v1-7b/e21f5d83-6b71-488d-ad55-d23268fbd611.json delete mode 100644 data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json create mode 100644 data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/68e1a42e-4318-4b5a-a45b-2607b7c2fe05.json create mode 100644 data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/12a03ffb-d66b-4d00-a43b-fd5be80e1b07.json delete mode 100644 data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json create mode 100644 data/hfopenllm_v2/frameai/Loxa-4B/adbad8dc-7d13-44cc-a5c6-e8da1de27c37.json delete mode 100644 data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json create mode 100644 data/hfopenllm_v2/freewheelin/free-evo-qwen72b-v0.8-re/7fb595e5-abbc-43ff-8135-c4bb4a2ea593.json create mode 100644 data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/1bb09da7-1675-4e57-b46a-9791c888ce6f.json delete mode 100644 data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json create mode 100644 data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/3ed7dd5a-e431-480a-91a7-5ccd915057e4.json delete mode 100644 data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json delete mode 100644 data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json create mode 100644 data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/9cab35b6-d6a7-475e-b715-e4493d07cd92.json create mode 100644 data/hfopenllm_v2/fulim/FineLlama-3.1-8B/ef7149ae-8d50-4890-89ae-fb561a86d130.json create mode 100644 data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/3fa14e1f-82a5-4c04-9c76-2a3f6d56aa81.json delete mode 100644 data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json delete mode 100644 data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json create mode 100644 data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/4418c7d1-72da-4ed3-9d5c-9d8520f6641c.json delete mode 100644 data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json create mode 100644 data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/8fe13380-a045-4d63-96f8-ec977540478c.json create mode 100644 data/hfopenllm_v2/gbueno86/Brinebreath-Llama-3.1-70B/6da42427-c7de-4830-b368-ca7757ee1d51.json create mode 100644 data/hfopenllm_v2/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/5faf24b3-38af-4f3f-8377-bba70d75f8df.json create mode 100644 data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/9a26214c-2601-49be-b1b1-03796b704059.json delete mode 100644 data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json create mode 100644 data/hfopenllm_v2/glaiveai/Reflection-Llama-3.1-70B/fa71ed09-45d4-4a5b-bfb1-a61a359a8f0c.json create mode 100644 data/hfopenllm_v2/gmonsoon/SahabatAI-Llama-11B-Test/25c5b304-46d3-4df3-9ac3-75ffa972849a.json delete mode 100644 data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json create mode 100644 data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/88ed0272-39f8-4676-970a-525aee058991.json delete mode 100644 data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json create mode 100644 data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/d8eff5d0-061b-4b83-b96a-04f9ba47ea6c.json delete mode 100644 data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json create mode 100644 data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/dcb90e75-8709-4729-8c00-e756e6a9a49d.json delete mode 100644 data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json create mode 100644 data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/81dcf3ca-f5c2-40a1-8871-b0188d5e9ceb.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/0a0a4d32-c7a9-49c9-bba4-dae6b464a5b6.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/82a3a8ef-7e5f-48d0-a48e-41ea2c5b6452.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/e635e798-fa85-4430-bf1e-9d5ad7fe9f22.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/7ccaa29a-4f73-4794-83a2-b925d755d91e.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/ba8de8f6-c118-4bc3-ae8d-851e964684ed.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/4011975a-e2a0-466a-9b34-923e1b4f8733.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/8a172205-39c6-4dd1-86b2-11b234b37e3c.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/495b2e8e-e2d8-4158-bc6e-7568604d44e9.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/e6a97d0d-9dc3-43a5-a69f-8132e19f9c77.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/4aecfd45-f47b-4f02-a0ed-288cbef46a6f.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/a6f7bc45-c2b5-47d8-a062-60f20c3d7ea4.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/c85c79d6-28e0-4deb-ad84-901b725aeca8.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/73271472-d06f-405b-af9d-2da7c17e1eb0.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/4e40bb43-c33d-4324-aa02-5bb7f88a5d1f.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/9b36e4c0-0d13-4988-8145-b9254da2e76e.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/6a464798-0111-4c71-b156-72a5aba1da63.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/78252135-f15b-427d-86de-c32cd3dbcd0f.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json create mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/c3b7bd57-9bc3-4d83-aad9-7d6315748c0a.json delete mode 100644 data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json create mode 100644 data/hfopenllm_v2/godlikehhd/ifd_2500_qwen/bce17582-e807-4b91-b0e7-0a890bf5eb24.json create mode 100644 data/hfopenllm_v2/godlikehhd/ifd_new_correct_all_sample_2500_qwen/f8371e81-f6d4-4441-bc6c-5d4a18da7d08.json create mode 100644 data/hfopenllm_v2/godlikehhd/ifd_new_correct_sample_2500_qwen/78407b2e-1f44-46f0-bc21-76bdc68f8d9c.json create mode 100644 data/hfopenllm_v2/godlikehhd/ifd_new_qwen_2500/bdb9e2d2-8d09-4994-a320-2f968bcb4898.json create mode 100644 data/hfopenllm_v2/godlikehhd/qwen-2.5-1.5b-cherry/c57d15c8-9581-4bb5-89e4-2fea1e3c584e.json create mode 100644 data/hfopenllm_v2/godlikehhd/qwen_2.5-1.5b-cherry_new/550d5665-7a8a-437e-b318-000690dd250f.json create mode 100644 data/hfopenllm_v2/godlikehhd/qwen_full_data_alpaca/a1922f33-32f5-4f99-8df6-e2080808d292.json create mode 100644 data/hfopenllm_v2/godlikehhd/qwen_ins_ans_2500/6ccc376b-24a4-42cc-8ea0-823ef14336db.json delete mode 100644 data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json delete mode 100644 data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json delete mode 100644 data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json delete mode 100644 data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json delete mode 100644 data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json delete mode 100644 data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json delete mode 100644 data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json delete mode 100644 data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json delete mode 100644 data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json delete mode 100644 data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json delete mode 100644 data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json delete mode 100644 data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json delete mode 100644 data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json delete mode 100644 data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json delete mode 100644 data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json delete mode 100644 data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json delete mode 100644 data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json delete mode 100644 data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json delete mode 100644 data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json delete mode 100644 data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json delete mode 100644 data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json delete mode 100644 data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json delete mode 100644 data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json delete mode 100644 data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json delete mode 100644 data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json delete mode 100644 data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json delete mode 100644 data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json delete mode 100644 data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json delete mode 100644 data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json delete mode 100644 data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json delete mode 100644 data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json delete mode 100644 data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json delete mode 100644 data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json delete mode 100644 data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json delete mode 100644 data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json delete mode 100644 data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json delete mode 100644 data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json delete mode 100644 data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json delete mode 100644 data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json delete mode 100644 data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json delete mode 100644 data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json delete mode 100644 data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json delete mode 100644 data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json delete mode 100644 data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json delete mode 100644 data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json delete mode 100644 data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json delete mode 100644 data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json delete mode 100644 data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json delete mode 100644 data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json delete mode 100644 data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json delete mode 100644 data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json delete mode 100644 data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json delete mode 100644 data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json delete mode 100644 data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json delete mode 100644 data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json delete mode 100644 data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json delete mode 100644 data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json delete mode 100644 data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json delete mode 100644 data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json delete mode 100644 data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json create mode 100644 data/hfopenllm_v2/google/codegemma-1.1-2b/6547b6f3-63dd-4516-b294-62c4246c3dc7.json delete mode 100644 data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json delete mode 100644 data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json delete mode 100644 data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json delete mode 100644 data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json delete mode 100644 data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json delete mode 100644 data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json delete mode 100644 data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json delete mode 100644 data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json create mode 100644 data/hfopenllm_v2/google/flan-t5-base/a58bf2d3-d209-41b8-a795-ba7a16e4a28f.json create mode 100644 data/hfopenllm_v2/google/flan-t5-large/b15ad3b5-7ef2-439e-9acd-a85eab520d31.json delete mode 100644 data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json delete mode 100644 data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json create mode 100644 data/hfopenllm_v2/google/flan-t5-small/64da2654-9fdb-4a08-ad16-cf8793a30ed8.json create mode 100644 data/hfopenllm_v2/google/flan-t5-xl/37080215-ee30-4e59-a407-b14695ac2a38.json delete mode 100644 data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json delete mode 100644 data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json create mode 100644 data/hfopenllm_v2/google/flan-t5-xl/b83a0ce7-bf13-4a98-81f3-04e5a44105f7.json create mode 100644 data/hfopenllm_v2/google/flan-t5-xxl/bb7bea21-5bc6-460d-98ff-b3ed02d5b215.json delete mode 100644 data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json delete mode 100644 data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json create mode 100644 data/hfopenllm_v2/google/flan-ul2/da9ddecc-43cf-4055-a19e-795b1ee98826.json create mode 100644 data/hfopenllm_v2/google/gemma-1.1-2b-it/a93ccb3f-f2d9-415d-8397-0c7fb765fada.json create mode 100644 data/hfopenllm_v2/google/gemma-1.1-7b-it/d0f86765-bdb4-4367-986b-28303bbe1844.json create mode 100644 data/hfopenllm_v2/google/gemma-2-27b-it/693bb191-ae83-49dc-9df1-2f68b1b5fe4a.json create mode 100644 data/hfopenllm_v2/google/gemma-2-27b/7b2c0b72-6421-4f33-8593-a4bbfd0c6d6b.json create mode 100644 data/hfopenllm_v2/google/gemma-2-2b-it/c4ee822f-fc8b-4523-95b6-7c3f12a334b3.json create mode 100644 data/hfopenllm_v2/google/gemma-2-2b-jpn-it/1810033a-185b-4c91-91d3-43b8f6c61443.json create mode 100644 data/hfopenllm_v2/google/gemma-2-2b-jpn-it/beb721ae-a35c-4f6b-a80f-aac4835d5f8d.json create mode 100644 data/hfopenllm_v2/google/gemma-2-2b/cf20e77a-340f-4d8d-b593-9645bdfc5877.json create mode 100644 data/hfopenllm_v2/google/gemma-2-2b/eec73e49-ac2b-42ed-a115-76e45007cd5d.json create mode 100644 data/hfopenllm_v2/google/gemma-2-9b-it/aa06d058-87f9-4fde-ad53-139b29a71448.json create mode 100644 data/hfopenllm_v2/google/gemma-2-9b/3f1d571a-fc42-411b-88ab-4700d5861367.json create mode 100644 data/hfopenllm_v2/google/gemma-2b-it/74a56080-aeb2-4cc6-a825-bbe4d9a5900a.json create mode 100644 data/hfopenllm_v2/google/gemma-2b/2eb433ba-5c93-4355-99dd-edcb65721603.json create mode 100644 data/hfopenllm_v2/google/gemma-7b-it/826fc3ab-6ff8-44fa-a745-a0b80bcb2db4.json create mode 100644 data/hfopenllm_v2/google/gemma-7b/6da54964-e3b5-4567-8ce4-7e0f279af84f.json delete mode 100644 data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json delete mode 100644 data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json delete mode 100644 data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json delete mode 100644 data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json delete mode 100644 data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json delete mode 100644 data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json delete mode 100644 data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json delete mode 100644 data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json delete mode 100644 data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json delete mode 100644 data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json delete mode 100644 data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json delete mode 100644 data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json delete mode 100644 data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json delete mode 100644 data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json delete mode 100644 data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json delete mode 100644 data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json delete mode 100644 data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json delete mode 100644 data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json create mode 100644 data/hfopenllm_v2/google/mt5-base/a7dde688-a0ae-4731-909f-0bef0c6eeba9.json delete mode 100644 data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json create mode 100644 data/hfopenllm_v2/google/mt5-small/eb2a8a60-2240-4b08-9dc3-be0215aa7bfc.json delete mode 100644 data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json create mode 100644 data/hfopenllm_v2/google/mt5-xl/9b05919f-d7c1-4e04-9dd8-9ae70e0005e6.json delete mode 100644 data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json create mode 100644 data/hfopenllm_v2/google/mt5-xxl/6cd98538-74b6-4ac6-a3ac-9a311cfe47f6.json delete mode 100644 data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json delete mode 100644 data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json delete mode 100644 data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json delete mode 100644 data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json delete mode 100644 data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json delete mode 100644 data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json delete mode 100644 data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json delete mode 100644 data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json delete mode 100644 data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json delete mode 100644 data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json delete mode 100644 data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json delete mode 100644 data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json create mode 100644 data/hfopenllm_v2/google/recurrentgemma-2b-it/b0ca2dec-387f-4b27-9adb-772af1899832.json create mode 100644 data/hfopenllm_v2/google/recurrentgemma-2b/53c4b397-b78e-4699-a01e-3535aa072225.json create mode 100644 data/hfopenllm_v2/google/recurrentgemma-9b-it/f5b251f0-741c-4ad5-ab04-19c5202854ea.json create mode 100644 data/hfopenllm_v2/google/recurrentgemma-9b/7b2ba13a-e01d-4442-9abe-d16df1a1668a.json delete mode 100644 data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json delete mode 100644 data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json create mode 100644 data/hfopenllm_v2/google/switch-base-8/bf79f87c-3f14-49e8-acba-725e709d5f11.json create mode 100644 data/hfopenllm_v2/google/umt5-base/3fbac7d4-cbbb-4b77-9db4-fd7e122cc90e.json delete mode 100644 data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json delete mode 100644 data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json delete mode 100644 data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json delete mode 100644 data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json delete mode 100644 data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json delete mode 100644 data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json delete mode 100644 data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json delete mode 100644 data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json delete mode 100644 data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json delete mode 100644 data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json delete mode 100644 data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json delete mode 100644 data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json delete mode 100644 data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json create mode 100644 data/hfopenllm_v2/goulue5/merging_LLM/6efd0dbd-b8c1-4c66-bdf7-19055c16ca22.json delete mode 100644 data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json create mode 100644 data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/1388b8d4-c711-480c-8a06-a8b7bd8aa79c.json delete mode 100644 data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json create mode 100644 data/hfopenllm_v2/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/03393ffd-1923-4767-ba14-d0e3e6751842.json create mode 100644 data/hfopenllm_v2/grimjim/Gigantes-v1-gemma2-9b-it/b7d049dc-127d-4075-8067-22adac9a58c3.json create mode 100644 data/hfopenllm_v2/grimjim/Gigantes-v2-gemma2-9b-it/89d79024-f4b8-4165-bd88-47f2b0010800.json create mode 100644 data/hfopenllm_v2/grimjim/Gigantes-v3-gemma2-9b-it/d2c0fb0d-6c0c-464a-b09f-6382a57b6afb.json create mode 100644 data/hfopenllm_v2/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/a891b28a-2dcc-4b8e-ad20-1f23d663b44b.json create mode 100644 data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/55e274bb-1e2c-4402-b7ae-09ff7b1f9738.json delete mode 100644 data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json delete mode 100644 data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json create mode 100644 data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/fe7a6940-fc4c-4345-84be-609c8155be57.json delete mode 100644 data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json create mode 100644 data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/77eb2b0f-e3e3-474c-bb02-dabde2998ef0.json delete mode 100644 data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json create mode 100644 data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/94d744be-5d28-490a-ba9a-8440cb97dce9.json create mode 100644 data/hfopenllm_v2/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/2765061e-7506-4eb6-b63f-312f6290665a.json create mode 100644 data/hfopenllm_v2/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/167c937c-66c7-45a8-bbd9-97d98531bf7d.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v1-Gemma2-8k-9B/9587c35c-1def-46e7-8642-7acb0340be5e.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v2-12B/1c9594fe-03d6-4ec1-9da5-99960da0dcd4.json delete mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v2-Gemma2-8k-9B/8ed2c4eb-bc72-4dde-a559-1afd1698d37d.json delete mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v3-12B/a2f9536a-9266-4aee-be90-d04f4dcbe53c.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v3-Gemma2-8k-9B/7f116aaa-3880-4e53-948a-4b06e0d26cff.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v4-12B/7cbe4516-2be2-421b-95f4-c9500ad64ca5.json delete mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json create mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/07df565a-bc30-4a9d-b472-7a85f35938be.json delete mode 100644 data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json create mode 100644 data/hfopenllm_v2/grimjim/Magot-v1-Gemma2-8k-9B/7545f7db-10bb-4d97-9b3f-4346f4f26bad.json create mode 100644 data/hfopenllm_v2/grimjim/Magot-v2-Gemma2-8k-9B/47384f10-ac6a-4629-92db-86f01a441f7f.json create mode 100644 data/hfopenllm_v2/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/3c9f022f-3e2b-48d6-acb9-07f066cfceb6.json create mode 100644 data/hfopenllm_v2/grimjim/llama-3-Nephilim-v1-8B/1d851cfb-8624-4516-8204-85569c60dc67.json create mode 100644 data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2-8B/a7990990-7498-4b74-a0aa-9c266910698e.json create mode 100644 data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2.1-8B/0b41d37e-0728-4575-9662-c150e2e29bd0.json create mode 100644 data/hfopenllm_v2/grimjim/llama-3-Nephilim-v3-8B/c565a7e9-bd1b-41a5-bff3-3a349553f4e8.json delete mode 100644 data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json create mode 100644 data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/680a4507-755e-4014-877b-6032f0220270.json create mode 100644 data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.1/5ace8dc6-e348-4267-bb4a-f71a335d074e.json create mode 100644 data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.2/07549821-db51-4b77-980a-056131b5dd29.json create mode 100644 data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.3/ff12a0a1-a913-441b-955c-bcbd50056acf.json create mode 100644 data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.4/947cfc2b-b73c-40eb-9e57-be5278776711.json create mode 100644 data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/53639078-c50a-4147-bab0-16993f1790b6.json delete mode 100644 data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json delete mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json create mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/b2cf96e0-382e-4200-a4a4-d66e8a188878.json delete mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json create mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d4ed3eb6-f569-4d4b-8da5-50eaaf824128.json create mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/210f7063-e0d9-424d-94f4-3645e4e1b401.json delete mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json create mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/4ecd26d8-8416-4dba-8d53-96f4013cfef0.json delete mode 100644 data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json create mode 100644 data/hfopenllm_v2/haoranxu/ALMA-13B-R/15712b7d-e69f-4a4f-b13c-4e79ce859399.json delete mode 100644 data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json create mode 100644 data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/9148c375-7c08-4c1c-82ed-5f935b2a4f04.json delete mode 100644 data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json delete mode 100644 data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json create mode 100644 data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/fb93274b-b7d8-483a-a95d-96340535febc.json create mode 100644 data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/0818b755-ec49-457c-8635-73f01816f30b.json delete mode 100644 data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json delete mode 100644 data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json create mode 100644 data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/77962326-0160-49bd-9ef1-59b403b2bfce.json create mode 100644 data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/272abbe5-8b61-442f-9860-d7411e7fec99.json delete mode 100644 data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json create mode 100644 data/hfopenllm_v2/hongbai12/li-0.4-pre/14d617a8-18c6-40a7-a4ba-19cf5fc5f4e3.json delete mode 100644 data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json create mode 100644 data/hfopenllm_v2/hotmailuser/Deepseek-qwen-modelstock-2B/ef7b5e6d-b5b7-4c7b-9781-6f90eb1ff5dd.json create mode 100644 data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/1970e257-7c93-4342-9ff4-a96af21acc67.json delete mode 100644 data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json create mode 100644 data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/15d71696-4b21-41ff-a4c6-0aea92fb844a.json delete mode 100644 data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json create mode 100644 data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/ccb85394-5252-48d4-8980-8b3a6c67ab1a.json delete mode 100644 data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/ea9837ff-f4c7-4bb0-b2af-7ae26371baf0.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/fe9012a7-d07f-48d4-b460-eca256078d8b.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/8e8d2071-8e7d-4dad-8536-4698b2d00316.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/dbcb41be-9ed6-4244-ada8-77f363c3487e.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/e48e2d7e-6c14-4bb1-bd12-74d93a145ca3.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/30c2d908-3eaf-408a-a2b5-301e0cd9e052.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json delete mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json create mode 100644 data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/f7624d04-66d1-4c05-8c01-d015ecf8412c.json create mode 100644 data/hfopenllm_v2/hotmailuser/Gemma2Crono-27B/511e4aad-1e5a-4515-9433-46989fc3945b.json create mode 100644 data/hfopenllm_v2/hotmailuser/Gemma2SimPO-27B/863e71ec-03a4-47ed-8bc9-b064d5571162.json create mode 100644 data/hfopenllm_v2/hotmailuser/Gemma2atlas-27B/6a6dfcb4-192b-44ff-a34f-76b31bbf5ad3.json create mode 100644 data/hfopenllm_v2/hotmailuser/Gemma2magnum-27b/e0dbec0b-a154-448a-be23-ef9b764469ea.json create mode 100644 data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp-8B/ecd91300-b0cf-48ce-9e5c-253a7991f90e.json create mode 100644 data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp2-8B/e3df71f1-63e1-40f1-918d-07cb3ec939cf.json create mode 100644 data/hfopenllm_v2/hotmailuser/LlamaStock-8B/52066a23-9847-490e-90e3-57eee3c63276.json create mode 100644 data/hfopenllm_v2/hotmailuser/Mistral-modelstock-24B/91f15ba3-a062-4b01-8a61-6e51fdf5f8d4.json create mode 100644 data/hfopenllm_v2/hotmailuser/Mistral-modelstock2-24B/323630ee-fbe0-49a7-aa11-816fde38ba2d.json create mode 100644 data/hfopenllm_v2/hotmailuser/Phi4-Slerp4-14B/e5c8f97d-1873-4c9d-8bed-50dc592543db.json create mode 100644 data/hfopenllm_v2/hotmailuser/Qwen2.5-HomerSlerp-7B/7ee2803c-b8f8-4156-8472-bab4baab8863.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenModelStock-1.8B/78573f63-3073-4be4-93a7-0ea00b1383fd.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSlerp-14B/42da7295-d78d-49a4-9279-8406063240c4.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSlerp-3B/b61c5735-53ca-4dda-a223-79921eee7f3e.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSlerp-7B/310124ef-e33f-49de-83eb-e665a5143aaa.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSlerp2-14B/c9b056df-8bbe-4959-ab44-85813157c95c.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSlerp2-3B/7a60385f-48dd-4926-8b66-3d42a1631db3.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSlerp3-14B/da365c7b-74d0-4a9f-a8fd-cf4049ec4de6.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenSparse-7B/e2930715-b616-49a4-83bc-53e92fc3580f.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenStock-0.5B/543f45e0-a158-4fdb-bbb1-8deb38f4515b.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenStock-1.7B/b96a20e0-d044-4a66-8909-437aeaef569c.json create mode 100644 data/hfopenllm_v2/hotmailuser/QwenStock1-14B/408742ff-4b21-46dc-b4d6-4c78d652d228.json create mode 100644 data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/496a9fbe-376c-4546-bd90-b42f583924ce.json delete mode 100644 data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json create mode 100644 data/hfopenllm_v2/huggyllama/llama-13b/f32c07b4-21a8-4cd2-91f8-f0f26d0b1b38.json create mode 100644 data/hfopenllm_v2/huggyllama/llama-65b/cc36cc37-0f41-42aa-8051-54cc135820ef.json create mode 100644 data/hfopenllm_v2/huggyllama/llama-7b/20d3dac4-9f8c-431c-b20f-364dd860e37f.json create mode 100644 data/hfopenllm_v2/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/89022ea8-2a5b-4eba-8d7a-320ba13d30a4.json delete mode 100644 data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json create mode 100644 data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/97bfd152-79c6-4c96-8d3e-588275339e41.json delete mode 100644 data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json create mode 100644 data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/93061947-2bcf-482e-ab22-38ef8ee33bcf.json delete mode 100644 data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json create mode 100644 data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/8f65748b-1251-49f8-bfed-d1e4a937d5ba.json create mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/4f278881-69d3-42b5-b72c-ff8627a6ef44.json delete mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json delete mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json create mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/d88e85c5-73df-46cc-9234-f0556592ad5a.json delete mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json create mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/44d2a20d-e867-4fa5-af3d-087f9c1b4067.json delete mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json create mode 100644 data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/e83b3e7e-dc34-4b06-bcfe-95b3ba28aab4.json create mode 100644 data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/44f2948c-4564-44cc-98d8-4f82a30e1f09.json delete mode 100644 data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json create mode 100644 data/hfopenllm_v2/iFaz/llama31_8B_en_emo_v4/846cf1ff-62c3-44e7-b6dd-0135ec77451a.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_1B_en_emo_v1/d2054469-b38b-4b1d-bd40-7324319f8eca.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_3B_en_emo_1000_stp/ce60608d-5b52-49d4-bbce-4b20e8272cef.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_3B_en_emo_2000_stp/f177bb70-fb7c-4b57-965d-acbcb4936bfa.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_3B_en_emo_300_stp/a5b2ab3d-1f12-4a5a-a110-2514185568b6.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_3B_en_emo_5000_stp/63b887a1-a0b9-46db-a563-b9bd67a0805a.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v2/92d122f7-f29d-49e3-99da-bf20edf377a2.json create mode 100644 data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v3/a0b71344-f3a8-4ad0-87c5-6393148488b1.json delete mode 100644 data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json create mode 100644 data/hfopenllm_v2/iRyanBell/ARC1-II/821ff784-c48a-4623-9fb5-b77b7114b625.json delete mode 100644 data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json create mode 100644 data/hfopenllm_v2/iRyanBell/ARC1/ed251513-4807-4e31-bc8e-3ab0217ae4f3.json create mode 100644 data/hfopenllm_v2/ibivibiv/colossus_120b/e7fa3baa-07b4-4f10-aa9c-8424d8fea303.json delete mode 100644 data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json create mode 100644 data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/11dfd131-00bf-4561-a913-f1c0cb15bf9c.json delete mode 100644 data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/3ba34f38-2340-407f-a7b5-82749f8a0ee6.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/91b9649b-bdf6-4b15-a038-47edc2e79ef6.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/24670e63-32e1-4c5d-82fe-0d0c45a4e165.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/198d1441-1d13-468a-a998-c8cf9f1e7a57.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/e9eb1499-835c-4a70-b531-4be5a9718c34.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/b1fd95ad-767d-4c13-a936-00b08c74ca3d.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/f87bd357-535e-4450-b01d-b41e1b7571e0.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/300fd27e-4dce-441f-91da-f38bd14ffe5e.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/1fd9a2e5-856f-4303-8ac1-611311f3e7b5.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/4c34d5c6-af1b-4519-8d08-67bd837e9b97.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/ddc27df7-1c4c-4563-92b2-5a39380423a8.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/3e606ef8-9caa-43d4-81d6-8eae9936ab4c.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/b9053559-3b90-4de0-981a-dbb49db38eb5.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/cea89bc6-b1a1-4b67-a136-45e097563a5b.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/5eb16113-7d0d-47a0-91d8-ec7dab35efdd.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/45aa6545-d20a-4dfb-a8a6-01f2fd34c9f5.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/c94079d1-d8b1-4198-8129-8c5a11c310ca.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/cb45306a-096c-4ed5-a028-6d720b26afe9.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-7b-base/f301908e-474b-4ba2-a873-610ca1b6c2bd.json create mode 100644 data/hfopenllm_v2/ibm-granite/granite-7b-instruct/06f5865d-a62a-48da-b33f-486fe29e3685.json delete mode 100644 data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json create mode 100644 data/hfopenllm_v2/ibm/PowerLM-3b/4f952c51-91dc-446e-bda1-43ed66e1ca3e.json delete mode 100644 data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json delete mode 100644 data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json create mode 100644 data/hfopenllm_v2/ibm/merlinite-7b/dcba3a6f-8f4f-49f6-af74-541de16be435.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/b5d39bcb-dab4-4880-9cb1-68dbd20a3ce5.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/1e597e9b-4e75-4981-842b-dad6f1c15ed7.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/18752dc4-76d1-40dc-9f43-62b8087b7a88.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/fa30c36e-20f1-41ee-a59d-0044f2b76dfb.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/5391ae8f-41b0-41cb-9365-b5cb7649c8b7.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/a95ab4cf-456f-4b3d-9bab-2b755649758d.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/9840baa9-2ddf-4dd9-b3b0-3ec3075089bc.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/26ff113c-95ca-4716-83f7-4792b46be246.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/285e1d08-15a0-4d8b-a844-e4cad923ea9b.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/0462269d-94a3-4991-9af5-e55592f344e5.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/c47c4cd6-90b6-42df-a3b9-4fc8f1b3c980.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/0fecafe4-f8f0-4f97-ab2d-589a3856e1af.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/4b5529b9-0800-4cd6-b720-a905ab5e6c9a.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/84783e4d-5eed-474d-9463-a01a0890850e.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/d9fe39c5-24a5-4240-bfc9-59860fcb3911.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/2ddf850e-36dc-41b2-92da-e2b45d1544c6.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/b10a9284-fa5e-4a4e-8240-edc98cea6d9c.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/2c51bd1d-ebe8-4de9-9749-5f42f7ba3d5a.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/425e6f1e-50dd-444f-b0da-5a0c47d5bf06.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/7e1fcf4e-9f64-4112-934c-4808f07d32b2.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/d3666566-09dc-4d53-9996-2301c6fb2721.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/36e5efb9-e3f0-4903-a9f1-3d51453bfdc4.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a6dba337-81d2-40c6-89c2-aee6de82282e.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/e44b8d9a-f270-45c8-b126-6a8911c35436.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/44d5e1ac-45d5-42aa-b9fa-f18112cf6676.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/4246401d-9049-4c83-83d4-e2d9efa4dded.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/26c4785a-0caf-4b01-be5d-1e421bfeb698.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/cc9b9a25-18f9-4cc3-a756-3975a3a3be7d.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/b4edb7f5-a675-4627-af96-7ed0909da1e5.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/461b6f40-6f19-48b1-857e-f0fb37f929f9.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/e924270d-a655-4093-91b2-f73b7f12eefd.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/af8905e0-e969-45bd-8e09-e7316fff0914.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/e92a6d31-2277-4093-8fae-b3dfaa2d47dd.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/47472cd9-36d3-4074-83d4-af53b9c23758.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/b922f4e1-1fd9-4a32-94ce-4784430cef51.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/5bb2e77f-7709-4eb8-bd08-3c8da4a56310.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/35937213-bb16-4935-9d92-9fa8fd61aac3.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/04122d1b-929d-439c-bb8d-f08508f7a00e.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/03beb242-2628-4ea0-a2f3-c3ec43d379de.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/46d55b7b-1972-4cb0-97ca-e04d306282a7.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/32730d82-cfac-481f-9a22-9cbe40646218.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/a290a75f-753b-489d-87a2-ce0637c09f41.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/54032eb0-c4cd-4c76-be2e-f0c81bd26365.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/73b59506-cc1d-413c-a28b-d25e0e6bf413.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json delete mode 100644 data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json create mode 100644 data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/bea2dcd6-4772-4aac-bcbc-4802cfb33495.json delete mode 100644 data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json create mode 100644 data/hfopenllm_v2/icefog72/IceCocoaRP-7b/66275215-28e6-42bc-bc22-5d152682ce53.json create mode 100644 data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/9015365c-400b-4fa3-85f2-a1033b030cf7.json delete mode 100644 data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json delete mode 100644 data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json create mode 100644 data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/55d52914-0904-4e6e-8b37-c22b06f5f2bf.json create mode 100644 data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/3677260a-2fd5-41bf-9010-f1b31cedacbc.json delete mode 100644 data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json delete mode 100644 data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json create mode 100644 data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/fc54f87a-2e4a-4f3f-b407-e268c4487d16.json create mode 100644 data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/8d893736-1707-4c0b-860d-16c62ec26d78.json delete mode 100644 data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json delete mode 100644 data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json create mode 100644 data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/d3d2728f-74bf-4196-a909-43797d8b628a.json delete mode 100644 data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json create mode 100644 data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ed241e67-8718-48be-a6e8-19e295a2b5cd.json create mode 100644 data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/05aafad3-e07a-453b-a70b-f18fbd4eb218.json delete mode 100644 data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json delete mode 100644 data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json create mode 100644 data/hfopenllm_v2/icefog72/IceMartiniRP-7b/f79ac32e-ab83-40c3-9c18-35623f5ae1d4.json delete mode 100644 data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json create mode 100644 data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/cec76b15-1069-4d37-b8bc-74dde28101f6.json delete mode 100644 data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json create mode 100644 data/hfopenllm_v2/icefog72/IceSakeRP-7b/e4ac0d0c-65ea-4b43-bb4b-7371c6cd5d61.json delete mode 100644 data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json create mode 100644 data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/f8d629bf-df0b-4c6a-8c18-17dda002b089.json create mode 100644 data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/6739d8e3-f4bd-4fd5-98f3-887f5ed3f9c0.json delete mode 100644 data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json create mode 100644 data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/a51722f4-29f4-47a5-acba-4c8b5355551b.json delete mode 100644 data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json create mode 100644 data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/06d0a21f-f6e4-4ca9-a679-8c4502aaaad1.json delete mode 100644 data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json create mode 100644 data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/04a4dcc9-3784-4aea-9faf-9db49c2e4c43.json delete mode 100644 data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json create mode 100644 data/hfopenllm_v2/ifable/gemma-2-Ifable-9B/e4668365-d3dd-4996-9bb1-5b4e6f510264.json create mode 100644 data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/4d743678-e14d-4866-b1bf-0d660787847b.json delete mode 100644 data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json delete mode 100644 data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json create mode 100644 data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/720b1476-876c-47d1-bf46-d037389b4b2f.json create mode 100644 data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/4e4f3b2d-5b17-486a-a2ab-c2e89194c765.json delete mode 100644 data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json delete mode 100644 data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json create mode 100644 data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/b738668e-3ac1-4a36-ad71-ad7d2a5256ae.json delete mode 100644 data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json create mode 100644 data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/623f1b73-1505-4527-b41c-dcb2b711226d.json create mode 100644 data/hfopenllm_v2/internlm/internlm2-1_8b/53f03454-9587-4208-bc01-21de62f59195.json delete mode 100644 data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json delete mode 100644 data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json create mode 100644 data/hfopenllm_v2/internlm/internlm2-7b/fb38d8b4-6320-4b8d-bf3d-e3d22bb0ed83.json delete mode 100644 data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json create mode 100644 data/hfopenllm_v2/internlm/internlm2-chat-1_8b/b127a923-3bf2-4cad-9225-d738efe800e3.json create mode 100644 data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/a94ae52a-7936-4750-83f5-4740f23adf15.json delete mode 100644 data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json create mode 100644 data/hfopenllm_v2/internlm/internlm2_5-20b-chat/95e689c6-cd19-4114-b3b5-1672ab849214.json delete mode 100644 data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json delete mode 100644 data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json create mode 100644 data/hfopenllm_v2/internlm/internlm2_5-7b-chat/890a8414-bccf-4a66-8013-6c270d017965.json create mode 100644 data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/0f8ce410-cf3b-4f78-81b9-a0a1fe91b963.json delete mode 100644 data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json create mode 100644 data/hfopenllm_v2/inumulaisk/eval_model/121096cf-356b-4069-a0a3-8cf6aad52b81.json delete mode 100644 data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json delete mode 100644 data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json create mode 100644 data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/fb0bcadf-32a0-4320-909f-2c38ba7d9372.json create mode 100644 data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/ab941c52-cf33-4b8e-87af-4a73930cf72a.json delete mode 100644 data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json create mode 100644 data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/08c242fd-0258-4817-970a-668584ed9385.json delete mode 100644 data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json create mode 100644 data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/2171af9a-be5e-4daf-8e67-a5239ccec7bd.json delete mode 100644 data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json create mode 100644 data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/706f75a1-2f6b-47dd-809e-a830e739b574.json delete mode 100644 data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json create mode 100644 data/hfopenllm_v2/irahulpandey/mistralai-7B-slerp-v0.1/a9cd0399-4670-4f5c-8c64-c82dac97cd8c.json create mode 100644 data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/67cfd12d-0551-406d-bd1d-8ced75c69478.json delete mode 100644 data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json create mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0a31d2f0-196b-4508-861a-1ba7bd28ea23.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json create mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/57576999-2749-441a-91d6-5a976e83a658.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json create mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/e44792e6-0329-4784-832b-3043478e70a4.json create mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/8b3789d6-51be-472a-95d3-2ae7c34ad140.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/3f4765f2-551b-485f-9020-0cf17a36a887.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/6375a845-5d86-4dcf-bfd2-e836daa4ca11.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/65a74446-6964-4f5f-8ea6-aeb1b09595ae.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/dcba5998-3b84-4753-a4fa-2558ffe3e69b.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/0af6b3c0-6638-4bd8-bdd9-349e2b9ca71c.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/4e332594-d0b9-4913-9950-208abe4faab7.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/5ad2ad73-47ed-465d-b4c0-b358e6b6435f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/c9f716ef-0aa6-445f-8fc9-b102f3a0ea2a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/a2e32a77-867c-4921-ada4-c7b169efbebe.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/f76f759f-d05d-4eb6-a2b9-3b1dfbe840f0.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/ece0bd6b-4eec-485c-942b-e23f3295c2f8.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/ada110bb-0988-4c19-9798-74577dde5ce9.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/ed4f994d-d196-40bd-8f8f-f6a7f07c3c90.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/57395f9a-0534-453e-80fc-96e9dc5cd9c3.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/f8f70702-9ab4-4e1a-a11d-090627d58f02.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/3cab8bda-bdf6-4345-b89e-18d34a8f6361.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0955fc17-8878-401a-9ec3-149528ee51e1.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/c63bf49a-e7d4-4853-8684-9cc03eaa7840.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/65e6a3b6-4291-4591-bc0b-576930061c68.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/1ddf9e02-4066-440e-a777-fcd3f96bc4b3.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/f9f96bb2-edbc-4112-97aa-a7420dea32a1.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/3a24b30f-7698-4ecb-ac26-3537a0b38616.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/d4030df6-2be6-4f46-9c9b-ce3037b9a004.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/ec234403-f43d-46a0-84a4-ab47673226b3.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/805379f4-784f-4602-92e8-180df4da9fc3.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/9f3920aa-9400-46f1-bcfa-969f69b3335c.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/26cbf444-ab93-409a-b85d-e2bd267eae5e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/7c2b17a8-1de2-4441-a281-fe3fd043f831.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/94c5756c-cbde-46e2-90d2-207678373061.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/e0048124-89bf-4327-88a8-00aa51ee29af.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/9d776307-43af-43bb-ab64-52fb7f331cfe.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/d8d41981-a7c8-48e9-a63c-86520a0f23d5.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/1355985c-fbcb-4eac-8435-417d6034f2f0.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/44486b02-7bdd-4f59-8d4e-5c8deeb1fd60.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/45ae3dc3-6dc0-4d10-99cb-a7f330110906.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/6b54763a-6329-47fb-bf50-296604251b47.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/96a26bf3-b4b2-465f-8ce6-a2ef943c001a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/655b047f-c3a8-4c9c-b864-81d318b2f506.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/f62fed77-e166-422d-b5ce-c50b7bccbf4c.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/7ffdabf3-0a8e-4316-b6bd-85b10a81db53.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/2c93c987-b32d-4a02-8df4-949cc45b8eb2.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/02e7c1d6-9db1-4de8-b13e-afd752b3669a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/580a3045-338a-47b2-8ed7-54c993d5aa90.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/e71d3be5-ea9d-4426-aa58-5806b7541aa6.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/1174683a-9488-4c6b-be6b-e5a96328a96f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/3789b37f-daf0-4c21-82b8-309cbf00312e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/8586cdc1-dd4e-4112-a59c-f6bc2766701b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/946a7b16-dfa6-42ad-97c1-955bf8a40dae.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/d9a6cc31-57c4-4480-a019-25a34b31fcc8.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/279bd5fa-0ab1-411b-871b-bd9ff23853f6.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/c26fae10-e65a-49ac-a2da-2dbf024fd10d.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/6d37b2b4-630e-4471-b7a8-50f8a58902fe.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/de687865-4297-4130-bcfe-0c5116c9b0d1.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/ee1acad1-5dc4-4d8b-8aca-544af5dc2392.json create mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/52e3f1b1-5a1c-4cca-a36f-9f60284e1883.json delete mode 100644 data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json create mode 100644 data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/2d54c67e-fad5-4a61-b3ae-0393f16dc1ba.json delete mode 100644 data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json create mode 100644 data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/5120e433-f5c7-45fa-be56-566101556271.json delete mode 100644 data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json create mode 100644 data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/7f4b4668-c3a0-4575-957d-ba321d55f420.json delete mode 100644 data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json delete mode 100644 data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json create mode 100644 data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/9245b74d-4b9d-4158-a402-0c3742097eba.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-1/29a5fcd3-9c22-424c-ab17-70cfe187aea1.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-2/af71bfa0-1077-4c96-a4c1-0aa28dc789bf.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-3/258ebe6d-191d-4804-b5e1-5cd6ce93ba88.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-4/4765f197-82ed-44b3-9a7c-7cbabc6ecd8e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-5/a5d66f97-1f4b-43da-a83a-4a262e297fd9.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-6/5d29cf73-65d6-4965-a504-4caf07108cc8.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json create mode 100644 data/hfopenllm_v2/jaspionjader/bbb-7/15ec04ae-30d3-4ffb-9b0c-54ba63410e3d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-1/2ed96c70-390b-44de-aa08-9883a2f33ff3.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-10/67c95889-8a67-40fd-99e2-62e767c16416.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-11/a518f39d-e073-493d-9a4f-9af53fc71abf.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-12/24f0d9bc-d743-4f46-b5a6-e855e39a1daf.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-13/3d27f6d9-05a0-44bd-a225-6e6a0bf4a35b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-15/ad28e7b8-69e6-4fb9-bec4-62c67fae6d58.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-16/0da639d4-181c-4ee1-808c-3de8003c2471.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-17/480bd62c-bc67-4379-bce0-b28a5d6bdf4f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-18/dd94c18e-b2c3-4135-aa2d-5eb0248315d0.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-19/a2ae2953-e341-49be-8469-32bd41d780d7.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-2/23bdd694-f250-46dd-9b8b-526fda47bc9e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-20/d600a69d-1952-4e30-abe8-1769ab63ac29.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-21/afc031d4-852e-4ead-9098-6ce30112b459.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-22/cb33e29f-e5e1-4bf5-9e20-86d9c3486d2d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-23/a4b93124-1151-4f69-8a5e-6b916e8cf11f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-24/efe11d8f-65e6-4ba6-8148-fdd43c9346be.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-25/923da7be-2ec8-46b2-8187-fe08eb86d5a0.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-26/1652b9fe-640a-48f9-b7a5-20ae28fb5985.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-27/572463ed-f6b9-460d-9c38-0e0ee5327511.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-28/5f6bbbfd-16a8-4ea8-b9d9-b436a882700a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-29/32322361-f18d-480d-9475-cd11a45bc4bc.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-3/f62d1aee-2d9e-466e-85e2-002fae5d2504.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-30/af389bf1-da63-49a9-9e49-32613d8d05b8.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-31/ea13ae62-d050-4cc4-9cbe-99eedfc206e2.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-32/1e697620-36a7-459c-b88c-405febb57c3a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-33/532723e8-a9b7-4f72-a015-c2bd9363b5d8.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-34/be096a57-7d81-4999-919a-ed8a243012b2.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-35/cadeb016-e158-4a49-921c-efe0e4eb0cb2.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-36/c606d7b9-3ea3-49d4-9ecc-9610ed4b4eac.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-37/04a5eed3-7eea-4d9f-acc6-5a96ec987e2b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-38/a1c60d74-dabe-423d-9e40-3dd8112d7d8e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-39/29c7bc9b-6833-497b-a553-2941026efea5.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-4/09a60955-978e-4136-bdde-d5459e37ad2c.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-40/501744a2-070a-4378-9232-f7ccd9b2a67e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-41/369efdc6-6529-477c-b5f0-d229c8102491.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-42/906645f3-2041-4380-8118-ac26b92297ba.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-43/57fe8deb-02dc-43a8-8a92-14bdaf61dd67.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-44/95f2fa22-3da9-4876-ace3-50763f2b2453.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-46/b2f9e38f-c2a1-4e5f-a7ce-4e33a05b503b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-47/b3173a2a-8309-498d-961b-0167d5d5dea6.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-48/0d59dd75-c999-4a7e-919a-fd084202fc9c.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-49/639e91d9-ebbf-4ba2-bce3-6953e7c91e32.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-5/56a5fb9b-a4b7-4290-9ec9-6864b3efaa82.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-50/d03fb481-be0b-4dfb-bb4d-54067e058e99.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-51/d8fc3475-83e9-4790-a472-72b442087562.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-52/57efd335-4873-4e01-bfc3-0d704b3d482a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-53/25fdcc8a-0e7d-4148-8508-2631ea6deb05.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-54/f5f63d06-7e51-4b91-8814-ecbda604fe6b.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-55/5326c33b-6b8a-472a-9058-a9e9fe83b599.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-56/28674053-e1b6-4f0a-a90e-5dd5082ec164.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-57/fd27bfa7-11b3-46d3-915c-373ddf5a9865.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-58/91f190ba-39c8-47af-8351-73d1f382dd99.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-59/b637b55c-dd05-4060-bf33-e63e9de7fac9.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-6/bcacef79-d7c0-46e7-9194-43541c2f01fc.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-60/77a358c7-59fa-4b22-a190-dfca86c5166b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-61/ad4c8922-7079-4383-8f42-d3de6326a1e1.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-62/7f89eded-e5fc-4b3b-9afd-dcd71b7b44d5.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-63/07cb94ab-0aea-4ce2-89b0-4378cb892c7e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-64/5fb04756-c7bb-4772-b209-0d9a300bbf7d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-7/0c02d1b6-2d31-4c54-b881-588cbfb0c686.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-8/a32e4d22-8096-4537-a68a-98ff9171ac8c.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json create mode 100644 data/hfopenllm_v2/jaspionjader/bh-9/4e45b666-fa7e-4a38-8b6b-65846876c8d9.json delete mode 100644 data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json delete mode 100644 data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json create mode 100644 data/hfopenllm_v2/jaspionjader/dp-6-8b/d9cb1d13-2af5-4385-aa78-5c053e00e6c6.json delete mode 100644 data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json create mode 100644 data/hfopenllm_v2/jaspionjader/dp-7-8b/6afaec07-ebb8-4f3f-af48-c679f38f4917.json delete mode 100644 data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json create mode 100644 data/hfopenllm_v2/jaspionjader/ek-6/bf8370c9-baed-4034-ac38-c6f796baca15.json delete mode 100644 data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json create mode 100644 data/hfopenllm_v2/jaspionjader/ek-7/d397c078-6fe3-44a8-859c-a0f7c551dc3a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-1-8b/ed61cd6a-bbf0-45f2-9536-a7a262d5d6fb.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-2-8b/6be795f4-0784-44bf-8926-e3060ec37dcf.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-3-8b/d4d808f5-3b79-43b5-8076-d3f785083789.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-4-8b/370f5923-91d7-40d2-bd06-bf2b657b8ef2.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-5-8b/5334e5e4-d243-4c20-912c-d0ded74d6ea5.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-6-8b/7306f2cd-4fd2-4dd4-b06b-8c9aa558388b.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-7-8b/68cc19eb-423b-4d6d-a3bf-eac6f666bc4b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-8-8b/59aa26a8-93b3-43fc-8c38-ef67cd8efd80.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json delete mode 100644 data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json create mode 100644 data/hfopenllm_v2/jaspionjader/f-9-8b/220cd306-0613-4c8f-9848-4af812a1d37f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json create mode 100644 data/hfopenllm_v2/jaspionjader/fct-14-8b/39a6a40c-3fa0-41ba-9d13-da9381263d4a.json create mode 100644 data/hfopenllm_v2/jaspionjader/fct-9-8b/4d037b71-5d03-41a1-bf23-c0aea0cdcbbb.json delete mode 100644 data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json create mode 100644 data/hfopenllm_v2/jaspionjader/fr-1-8b/16baf620-7dcc-49f3-a787-b431e11ad4f6.json delete mode 100644 data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json create mode 100644 data/hfopenllm_v2/jaspionjader/fr-10-8b/4745add2-7bcb-4c05-8b12-6bd30856890b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json delete mode 100644 data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json create mode 100644 data/hfopenllm_v2/jaspionjader/fr-3-8b/f68b122d-4dec-4d5c-ac22-198da3d3e96b.json create mode 100644 data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/2e20f780-ceab-4d1d-a1ab-35f4f0ac44aa.json delete mode 100644 data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json create mode 100644 data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/f21bcd75-fc9f-4266-8976-3227b18b6b32.json create mode 100644 data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/7c1a81ec-1cb7-4858-8f1f-23b3ee49b73f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json create mode 100644 data/hfopenllm_v2/jaspionjader/knf-2-8b/1cbfd1ad-237d-4cd3-8b5d-3135c194fcc0.json delete mode 100644 data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json delete mode 100644 data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json create mode 100644 data/hfopenllm_v2/jaspionjader/knfp-2-8b/ef5c1813-a74d-4b3d-9911-c27a46c1c84e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json create mode 100644 data/hfopenllm_v2/jaspionjader/knfp-3-8b/df50857d-c90e-4ec8-a9b6-96a6d2f894b1.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-1-8b/774d54fb-a445-4ed9-b79a-9c1346537e98.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-11-8b/420b8be3-3560-48e8-8ab3-bb55338a9069.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-4-8b/c118b75c-597f-48a7-a4eb-675af72c9930.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-5-8b/e75534d3-b994-4e88-9274-7b62f61916cf.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-6-8b/770a1ff1-057f-49a7-9402-c6dd881ac03d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-8-8b/6cc9790d-9b02-437e-8ac7-be4152f5b17d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json create mode 100644 data/hfopenllm_v2/jaspionjader/kstc-9-8b/264f5b42-a3ac-4af1-8145-c5763b8e7fa6.json delete mode 100644 data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-10/549db368-437a-4982-ba5b-5c4d7bf203ae.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-11/0d098a19-7e8f-4a52-8466-729be91388d8.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-13/83335f65-25a4-4bec-a901-587567ed0e99.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-14/02fb24c3-927f-4c21-bd47-b883521162a3.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-17/2a6507c7-44c1-4416-9ff1-36abd6af3b73.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-2/327a146a-8cfd-4480-8342-46afde530677.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-20/0700fb7a-e722-432f-a64d-c040bba4deee.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-22/131d3a7e-43dd-4189-8466-6562703b3bdd.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-23/8f6d7008-b8de-4a76-94aa-bbecc93ef3f7.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-25/aadb0ce5-a1aa-4b0d-bec4-8bb0e8e54a1d.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-29/a73250f1-399a-4afa-bf83-4036dce78ef3.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-32/f68bf680-9626-4952-b95e-12a18fd60820.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-33/d6a78a5c-4a2e-4370-88f2-d8627a94f1ea.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-34/7b5eab2e-fba3-47d5-9839-02249c2568c5.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-35/2acee2c3-4322-4152-8151-c1d571475b7c.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-36/67ffb2de-0410-44a2-aad7-4a32e2c49c7d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-37/2923aeb3-982f-400d-9588-707583c75a1d.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-6/b6a622da-5ce8-4ea5-a82a-f3a2a299ddf2.json delete mode 100644 data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json create mode 100644 data/hfopenllm_v2/jaspionjader/slu-mix-1/7b06ac17-bfc6-43d5-99e6-d2b7a31290fb.json delete mode 100644 data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json create mode 100644 data/hfopenllm_v2/jaspionjader/sof-1/fd481b93-55b2-4831-9be9-1b1b2886fda3.json delete mode 100644 data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json create mode 100644 data/hfopenllm_v2/jaspionjader/sof-10/f159748f-234e-4962-b582-cd5805448f33.json create mode 100644 data/hfopenllm_v2/jaspionjader/sof-3/044d53dd-d134-4959-a70c-46f11cc0b300.json delete mode 100644 data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json delete mode 100644 data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json create mode 100644 data/hfopenllm_v2/jaspionjader/sof-6/f05501fd-7c06-46d5-bc20-a9d0cc5c2e0f.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-10/5c44a2f2-23e3-4c9f-9b7c-9012ca8b15e9.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-11/80e5134b-0733-41cc-8b4f-ef32fbe57066.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-12/61123e41-7b2a-40da-9f7f-b830c27d7f12.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-13/b93c31d7-54c3-47b9-a267-3f8fdb796805.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-14/b3eaa4c5-7abc-4e2d-9c11-c70ecb8a843b.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-15/3b06f75e-3d22-4428-8d4f-2e704b96961e.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-16/dfda4aab-f8d4-49ee-b141-78539b69007c.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-17/690f3c19-c148-458d-b4c5-87761d72b851.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-18/b6a18246-776d-463f-80d5-140df74e9704.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-19/9831abdc-ad08-48c0-8384-86240e7350b5.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json delete mode 100644 data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json create mode 100644 data/hfopenllm_v2/jaspionjader/test-20/96a572e5-4751-46ce-9202-deb223ef4dfe.json create mode 100644 data/hfopenllm_v2/jayasuryajsk/Qwen2.5-3B-reasoner/f4320b1e-ea4f-4aea-8dab-cdb221ce53e5.json create mode 100644 data/hfopenllm_v2/jeanmichela/o-distil-qwen/8376c0bf-f9c3-4529-b13c-c57106182d15.json create mode 100644 data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/97a80145-e621-4603-8ff8-2cc4bd74190a.json delete mode 100644 data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json delete mode 100644 data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json create mode 100644 data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/99a7881c-cca0-43d6-96f5-ce5292ed60a0.json create mode 100644 data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/60ca8f7e-1c20-4adb-bb84-892bad3c0d63.json delete mode 100644 data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json create mode 100644 data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/4a0f8dc7-9446-4dda-bf49-8cca4851746c.json delete mode 100644 data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json create mode 100644 data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/6eb3a040-8234-4d31-8274-6987b0e4e3b4.json delete mode 100644 data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json create mode 100644 data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/16053077-38fd-4136-81a5-fea0d4cd927a.json delete mode 100644 data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json create mode 100644 data/hfopenllm_v2/jebish7/aya-expanse-8b/25abb99f-536e-4638-8611-a1db5dee931d.json delete mode 100644 data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json create mode 100644 data/hfopenllm_v2/jebish7/gemma-2-2b-it/aaf0e5bd-b033-455e-bb23-b12b6f7c4520.json create mode 100644 data/hfopenllm_v2/jebish7/gemma-2-9b-it/b3a46478-c5f4-4c74-9bf0-d1ba616ae24c.json create mode 100644 data/hfopenllm_v2/jebish7/qwen2.5-0.5B-IHA-Hin/169fb05f-5201-47b8-a06e-7d01e574c689.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/db076309-32e5-4d46-9786-ff14f8daf5d2.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-minperplexity-2/cde914dc-7d57-425f-9787-e4b8d36d61cf.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/5d793ce3-a7fd-4ee3-b32c-c9da63ec0566.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/8c645c9f-02f6-44a5-b295-d6364ed49464.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/97bb5519-e2d3-44d5-abf4-b5263c2b3245.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/bd3d78d3-3ff1-4a92-a316-e4e30787a331.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/d8951ed7-f4ef-49ce-891e-8d8509e9cf93.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/e1772d6c-fd26-43a7-82b3-7997d8a6809f.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/febaf893-6aaf-4c87-89fc-cc865ebf2859.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/0ad591f4-c846-4fd1-8536-a169e0a7e4ab.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/0a318ebd-7bbb-456b-a6e4-9b480a858b5e.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.0/e1cfdc32-3c5e-4f4b-a205-f416c96cf5e6.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.1/85426280-8138-46d0-a111-b59b0d7c86c8.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.2/32bbd26e-05e7-4a0f-a491-8f54cea9f3d3.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.3/86ed6833-ae85-4a8e-b840-b0c9540083ce.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.4/2f751ac3-5ca5-4d0d-9ad4-48155e51468a.json create mode 100644 data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.5/9677e68d-afda-4917-825c-83318219ff59.json create mode 100644 data/hfopenllm_v2/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/23cd57c2-bf7f-440a-ab3e-edfdede5e8cd.json delete mode 100644 data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json create mode 100644 data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bec23315-f98a-4211-81a0-c49f395e66c9.json create mode 100644 data/hfopenllm_v2/jiangxinyang-shanda/Homer-LLama3-8B/1ac5faef-7fa0-4b58-a6ba-0c444a2023a8.json create mode 100644 data/hfopenllm_v2/jieliu/Storm-7B/39327803-11e7-4b28-8750-81feb027e8f3.json delete mode 100644 data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json delete mode 100644 data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json create mode 100644 data/hfopenllm_v2/jiviai/medX_v2/ce2b6874-0fc8-4364-a526-7b25b101e1e3.json delete mode 100644 data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json create mode 100644 data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/9f9ebc90-31f9-45c1-b9c2-07b727b12f3d.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/d189a2fc-71f5-4bc9-a0b1-7e744a19921f.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1eb697fe-9dd4-4a41-aa47-33456df39e2d.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/5f10df7b-cd2c-44ca-b13a-2852483c71f8.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/3abbb4b6-8050-44fd-b066-0f061ce2f4d7.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/5f47e65d-293f-469e-a18f-5627ca1adf44.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/b753c1aa-8a0c-4600-99ec-8eb51ab50da7.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/15c21655-9af8-4bee-9884-b047683e9adf.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/f642de95-218a-4db0-807f-1bb97618b4f6.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/01443b06-9ad3-41f5-ae0d-bc84086e0a0d.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1ee8c377-2236-4225-942f-ef8ce5770741.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/4ee9aa78-d9eb-4a1c-91c4-f29f093b95d3.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/419c6631-805f-43ba-9db8-5296f8d221ec.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/3fc1822f-4a43-4a3b-90d7-fc163491c90a.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/76b4037b-c5d0-435f-966a-bd88b1665dad.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/757b85e7-84c8-429f-aeb4-870852fa8959.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/acab4982-1205-4362-803e-306b1e2371bf.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/0e549b5d-c1d9-443d-9a80-8dd34dadd22e.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d3d4eccc-8792-40e5-91cf-22885f4cbaf5.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/708aded5-6252-44e3-bf0d-08bf3e7f32e0.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/ce6d31f2-f38e-4af3-85a3-d2f6c80f71f1.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/5efcc291-ca9a-4ca9-b2ed-dab37dce5f5a.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/47320824-8064-40d4-a08c-810faafbba77.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/8baeef58-0ba6-4723-8f23-7a4c386f2cad.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/0387ca63-1e31-4eaa-ac7c-35d417548c54.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/733983fe-4b9c-47e6-963d-c57829b6f1af.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/80c4859d-8016-4650-939f-100ba2e6d808.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/21724d3a-cc6c-43eb-9d69-46d8d91c97f8.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/d781945e-e9df-4136-90cd-632f0bed6246.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/8f146bb5-dd4d-49ce-ac60-76f66321feb8.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/89bfba6d-c622-445e-b0b9-512aadcea7cf.json delete mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json create mode 100644 data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/9c27f2e6-ebbe-4fac-bc51-74455d3a6512.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/455ef1e0-bdf2-49bf-a53d-2c9e3d00d5f3.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/e04a76a6-ac22-43b2-bbf9-196a08de2949.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/2fcb74f0-add1-4d46-8a0f-8578a616dbed.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/51530638-ef76-43ce-9396-8a0d07988712.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/74d99e4d-0e6f-4804-aa52-0dc76d37fac3.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/80e8b9f0-b507-4927-9d24-1c793e3783cc.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/7b037520-a5e9-4b58-80f3-f0ecc5957c67.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/10b88d05-62d2-4603-9d04-b0854e39ed40.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/4b693f41-d811-4b64-892c-d840eee5ace4.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/90d86c8c-3aa6-42ba-a94f-75c961e65c41.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/8318ae52-6ae3-45ce-82db-73f8cb5ad7c7.json create mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/b20a1d13-2f14-42e4-bdde-49f053cef325.json delete mode 100644 data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json create mode 100644 data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/51521dfb-d4b5-45df-ac2a-54190aed0b9f.json delete mode 100644 data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json delete mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json create mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/997a1ceb-185a-4e6c-8383-eb5a6f976771.json create mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/22101998-c3d3-414f-9ed1-99330cdbe3b2.json delete mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json create mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/a2408953-a7eb-449c-b80c-3620915d44d0.json delete mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json delete mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json create mode 100644 data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/d65e5b08-7d3c-4c0d-85fa-496db65a235c.json delete mode 100644 data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json create mode 100644 data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/ce2c9614-46d2-481d-ac25-3cc71a93bd5e.json delete mode 100644 data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json create mode 100644 data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/e9ba998d-8147-4046-afae-9ee7d544e98d.json delete mode 100644 data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json create mode 100644 data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/c44f1012-1123-42c8-b110-5735dc756fd5.json create mode 100644 data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/5088f6a6-2acf-4d10-8b78-0d5bd4126ab5.json delete mode 100644 data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json delete mode 100644 data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json create mode 100644 data/hfopenllm_v2/kaist-ai/janus-7b/b4d96088-5cc0-4ebc-8b8b-8c7e9f90420b.json delete mode 100644 data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json create mode 100644 data/hfopenllm_v2/kaist-ai/janus-dpo-7b/529dba11-53af-4045-ae46-04e1b9838d4a.json create mode 100644 data/hfopenllm_v2/kaist-ai/janus-rm-7b/391f6d6c-418f-44be-910a-fb90b5712649.json delete mode 100644 data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json create mode 100644 data/hfopenllm_v2/kaist-ai/mistral-orpo-capybara-7k/2ccccb4b-7260-4a1a-9426-117e359c7c5c.json delete mode 100644 data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json delete mode 100644 data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json create mode 100644 data/hfopenllm_v2/kavonalds/BunderMaxx-0710/84afecec-453d-491c-9f5a-de31d8fba43e.json create mode 100644 data/hfopenllm_v2/kavonalds/BunderMaxx-0710/dba3a3a4-cd23-44c9-823f-0bd88cf6465b.json create mode 100644 data/hfopenllm_v2/kavonalds/BunderMaxx-1010/1179bcce-558e-40ad-8537-c74c59557975.json delete mode 100644 data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json delete mode 100644 data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json create mode 100644 data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/fe0a5c17-6c8d-4f06-a58e-47648ef9ecec.json create mode 100644 data/hfopenllm_v2/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/81cf8cbd-33bc-44ab-930a-65242e1ae7b2.json create mode 100644 data/hfopenllm_v2/keeeeenw/MicroLlama/173bb053-e817-4551-b169-c3f71163650a.json delete mode 100644 data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json create mode 100644 data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/b7e6a86f-340c-48ed-a828-2e80a13aa515.json create mode 100644 data/hfopenllm_v2/kevin009/llamaRAGdrama/bd221eee-7aa8-4d6f-a6be-89ee5568e729.json create mode 100644 data/hfopenllm_v2/khoantap/cheap-moe-merge/8727a325-a515-4456-ba34-65c30f84644a.json delete mode 100644 data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json create mode 100644 data/hfopenllm_v2/khoantap/llama-3-8b-stock-merge/3e4011fa-d480-4c16-9371-2025bc834358.json create mode 100644 data/hfopenllm_v2/khoantap/llama-breadcrumbs-ties-merge/867499a7-589b-4564-b04d-a004b7c0abb4.json create mode 100644 data/hfopenllm_v2/khoantap/llama-evolve-ties-best-merge/52f1fb51-fc7e-4cc2-918a-7c7226ae2ce5.json create mode 100644 data/hfopenllm_v2/khoantap/llama-linear-0.5-0.5-1-merge/5f4a8fb6-b22d-4eb2-aaef-da05ca45fbeb.json create mode 100644 data/hfopenllm_v2/khoantap/llama-linear-0.5-1-0.5-merge/3278855d-7bd1-4e7e-b27b-b1393006e7e7.json create mode 100644 data/hfopenllm_v2/khoantap/llama-linear-1-0.5-0.5-merge/5193ab4d-1627-43b5-bfb7-89e08ea1f810.json create mode 100644 data/hfopenllm_v2/khoantap/llama-slerp-merge/598faeda-48fb-43a8-aaa9-849d5dfcea79.json delete mode 100644 data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json create mode 100644 data/hfopenllm_v2/khoantap/moe-out-merge/d1afa2fb-1256-4dd3-b13b-802917bf481b.json create mode 100644 data/hfopenllm_v2/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/397c9bc3-0af5-453c-9b68-5360783dfbf7.json delete mode 100644 data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json create mode 100644 data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/9bb39652-c79a-42bf-b6d8-c4ed6174a4c7.json delete mode 100644 data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json create mode 100644 data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/7e793244-b746-4aa4-a401-dcf5884f61a4.json create mode 100644 data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1/26a8da03-debd-41e3-8ee1-2827d76b26ca.json create mode 100644 data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/e214c326-dd84-4915-bba1-faaafbb026b2.json create mode 100644 data/hfopenllm_v2/kno10/ende-chat-0.0.5/98a5ea0a-6e45-48f8-8219-32099b9fa9d0.json delete mode 100644 data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json create mode 100644 data/hfopenllm_v2/kno10/ende-chat-0.0.7/40d7d17d-2d41-4d23-83c1-ab5f3320e36e.json delete mode 100644 data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json delete mode 100644 data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json create mode 100644 data/hfopenllm_v2/kyutai/helium-1-preview-2b/d881a83a-9ba8-4919-8b89-45f5a7220621.json delete mode 100644 data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json create mode 100644 data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/d6c966a1-7927-424a-9886-b98688d27e6f.json delete mode 100644 data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json create mode 100644 data/hfopenllm_v2/ladydaina/ECE-FDF/c09fe163-a7f7-4b6b-b407-ee8d698b2ee8.json create mode 100644 data/hfopenllm_v2/laislemke/LLaMA-2-vicuna-7b-slerp/b3979c7f-0596-4a24-b264-73a17ba19821.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/f6156893-92e7-4c4f-bff4-8b6d774ecbd8.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/8b1c19e0-8b47-46ae-8bf3-f84c7d3a9c0e.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/6221102e-4e8c-46dd-8c03-fa9e92b7e4ea.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/329e5e91-10ba-4795-ae86-dda95e698b4f.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/3fe89b13-135d-4790-871d-74e7a28ea2e9.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/4b807741-f1b9-4964-9bc9-bb93f9b34217.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json create mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/c52a8a4d-be91-4a0d-8cd5-8473a42f0978.json delete mode 100644 data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json create mode 100644 data/hfopenllm_v2/langgptai/Qwen-las-v0.1/f6e157c4-0ce9-41c9-b885-9222d894ff0c.json delete mode 100644 data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json create mode 100644 data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/fe52a94a-5324-4b59-accc-dfd1f9d4aead.json create mode 100644 data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/1241f5e3-54eb-429e-b109-a5e163e39eda.json delete mode 100644 data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json create mode 100644 data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/8ccc7c8c-1d14-45bb-9a6b-f8f69e506139.json delete mode 100644 data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-9B/5531b59e-24c0-41af-ab6b-d6a5e38b0a98.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Advanced-9B/63e82cb3-2f6f-4617-abb7-ae093bc27830.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Remix-9B/0feb74e6-40d4-472d-9233-27faa2d3f802.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2-9B/e74dd005-c9b5-45c9-b7f5-455c3110e09b.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2a-9B/d094bf6f-9952-45c7-995e-d7eda07f4668.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2f-9B/0e5f3393-8a6a-4f2f-948a-a37ae4d8fdeb.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/f91982ac-0cab-415a-8503-e090d195bd05.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3b-9B/fb1af66e-7828-495b-8277-5cff77c3070e.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3i-9B/ac84c157-4d11-43c1-8731-b1e5cfa91668.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3j-9B/bbc812dd-9a9c-4f99-b813-50361025eea3.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/fc818799-49d5-4fca-b131-ebe8d5d831f1.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/33349989-8573-4d71-ae0f-99691fdaffc3.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4b-9B/91551de5-d8ac-4c0d-b9b4-3627db947f0e.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4c-9B/c2d2c1f4-aaab-45f1-b3f6-5b4ea56b696e.json create mode 100644 data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4d-9B/36821a8b-af18-4631-b4b0-7e4b37bb194b.json create mode 100644 data/hfopenllm_v2/lemon07r/Llama-3-RedMagic4-8B/e402d129-f4f1-4b95-b079-4f30936119aa.json create mode 100644 data/hfopenllm_v2/lemon07r/llama-3-NeuralMahou-8b/814e1ea7-a639-4b05-9208-0bf537ea5479.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/35a50d36-31d0-454b-a13c-80ca26945f94.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/87347017-4ff1-4bd3-a1d7-8f3999061209.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/976184ed-c4ed-4898-83c7-521a8a8309ac.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/fa52f072-7725-4a4e-b728-042e5897a1bd.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/6374dcee-301c-4f28-9316-82ed8e693089.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/b7c95cb4-f32f-466e-a28c-32afd9ec5578.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json delete mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json create mode 100644 data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/bddd742b-f7c9-44aa-ad2f-83f51a4625be.json create mode 100644 data/hfopenllm_v2/lesubra/merge-test/099af0ee-c06b-4435-8f97-27681f3eddff.json delete mode 100644 data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json create mode 100644 data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/fa826f3a-8688-4518-8d44-68189abb47ba.json create mode 100644 data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/10d29dc0-3486-40df-9933-1ce8f0fabaa2.json create mode 100644 data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/741ff375-3392-461e-a9b0-e0dab4e6e9f8.json create mode 100644 data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/c3d709de-118d-40c2-ab89-040efedd7fdb.json create mode 100644 data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual/9be3dd27-93fa-49e9-a628-5a77a8a3bb9a.json delete mode 100644 data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_145_/be850d1b-bf75-4c34-830f-8881792ac842.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_200_Gemma/6b644b97-4fc3-4826-9ea9-68be1dc8e947.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_212_QwenLawLo/861d41f1-6d33-4e07-96ea-2c39a36c4b63.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_212_Qwencore/7501b038-4847-45bc-8b92-6800d7a58c1e.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_230_Xiaqwen/db48206d-700b-45f3-b597-8752110113b5.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_375_QwenDyancabs/b52b76e4-9dec-4336-88b1-d98b95b95d2a.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_456_QwenKoen/ba9ec2ea-2bce-4999-9e48-e1d0795b31d0.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_7B_KoenQwenDyan/724221ce-d7b2-43cb-8e16-72ac529a7b60.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_7B_Qwen2.5koen/552f3814-d071-4d00-a895-b739dffdcb2d.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyanKoenLo/d3819133-bae8-493d-9a86-aee67da5d115.json create mode 100644 data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyancabsLAW/5c3a022f-7221-4b4f-ab67-d5b69c558434.json create mode 100644 data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/c161b868-746f-4d88-9f41-eb8283a7b87a.json create mode 100644 data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/f79a76fc-09ff-48c8-b0e7-5f18e0750e6d.json delete mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json create mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/39f4d1ab-fd42-4746-b949-9666ce32f9d1.json create mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/8348f316-9109-4229-9fee-edc02431befa.json delete mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json create mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/6b2346c6-5fbf-4195-b3bb-66bbd446ca53.json delete mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json delete mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json create mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/8645ffc1-6487-4205-b8b0-e980e094ac6c.json delete mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json create mode 100644 data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/2c6d1e57-7673-4a86-808e-6ff6a7146a11.json delete mode 100644 data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json create mode 100644 data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/64ab8b1a-62be-4561-8f0c-e42f1fe37178.json create mode 100644 data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/3eb22885-eb7c-4c85-b79f-cd47ffacd551.json delete mode 100644 data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json delete mode 100644 data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json create mode 100644 data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/8956d608-c627-469b-943d-bfad6c7382af.json delete mode 100644 data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json create mode 100644 data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/9ff060c8-d4fa-4880-a0cd-9581f5c2f574.json create mode 100644 data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/e3d6b3d7-a231-40c1-bac9-0b7fcb478bca.json delete mode 100644 data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json create mode 100644 data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/20acb302-3a74-4425-af4c-a1d719b90a88.json delete mode 100644 data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json delete mode 100644 data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json create mode 100644 data/hfopenllm_v2/lt-asset/nova-1.3b/a8613588-687d-4291-ae5a-57688501cffd.json create mode 100644 data/hfopenllm_v2/lunahr/thea-3b-50r-u1/83dd67cb-5508-4aa5-9435-d5585b7f3d52.json delete mode 100644 data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json delete mode 100644 data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json create mode 100644 data/hfopenllm_v2/lunahr/thea-v2-3b-50r/26d981bb-f2e5-4195-8d6f-594bb0b26f4a.json create mode 100644 data/hfopenllm_v2/m42-health/Llama3-Med42-70B/df06c977-b54c-4668-837f-eb583ef24d29.json create mode 100644 data/hfopenllm_v2/macadeliccc/Samantha-Qwen-2-7B/31a8ac03-f58b-46e3-9f17-53311b1fd506.json create mode 100644 data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/3e4a7141-7a82-421a-a107-bbac3cbafc9b.json delete mode 100644 data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json create mode 100644 data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/9a3069f2-81ed-484a-b6e6-a45a259e9a43.json delete mode 100644 data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json create mode 100644 data/hfopenllm_v2/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c0a3d0c3-c541-4606-a925-4100b062284f.json create mode 100644 data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/20685a4b-686f-4cd4-b49d-3067a005256d.json delete mode 100644 data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json delete mode 100644 data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json create mode 100644 data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/85a91293-cd51-4f79-8b98-2f4bc67d78c1.json create mode 100644 data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/d2e3a6c2-4e67-4150-b9a8-fec979fb1658.json delete mode 100644 data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json create mode 100644 data/hfopenllm_v2/maldv/badger-kappa-llama-3-8b/c4d686f2-2af1-4271-9556-09380f07ba5f.json create mode 100644 data/hfopenllm_v2/maldv/badger-lambda-llama-3-8b/93167303-b38e-43f0-a552-72c26ccb4339.json create mode 100644 data/hfopenllm_v2/maldv/badger-mu-llama-3-8b/b52a176f-f369-4791-a7e3-88a72709c868.json create mode 100644 data/hfopenllm_v2/maldv/badger-writer-llama-3-8b/b6310012-17f1-4ee0-abd0-0079a9299350.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Cheng-1/f581e832-0f77-496e-bcd3-6cfec51ef594.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/47b47c89-b13b-4099-98b2-854feae05f63.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Cheng-2/8d51ae58-7b20-4fa4-b234-2abb9cdeaad4.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/4d4d5679-8ec6-49b8-a5d7-2a76497b44b7.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/0bdb6574-69e2-4858-b7aa-a90a5fadf741.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST/fa1a92bb-ad25-4be2-a35f-7fdebbeeeba8.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-Preview/d62ea0a1-cc9d-41b7-8d60-479b8e2262b5.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/912446e3-efdf-4ed0-80bd-261c6c87a3d0.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/5e86dc31-ae3e-4ef7-858e-41e29b3a8031.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/80680e5e-ab83-4a59-aeec-9d4166509c47.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/c5bc9c92-8469-4174-aafd-67bb61aaccf2.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/1d67b792-178b-4baa-a108-2362f658bd4e.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Qwen2.5-7B-Preview/eb0c87b0-4795-4029-82c1-57ce37ba8259.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/dc9b2300-7ab0-4e92-9d23-15fe9ca52994.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview/e005624d-c822-4be1-9477-873642aae228.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/e9756d91-b9e2-4dd0-bf08-c6154c7d1f2e.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/704598c3-c5d6-4ce0-bab3-0fa98118e16a.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/fafc9463-d725-4827-8bc1-5cd9e83814b6.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/109820e0-ee00-449c-9ae5-58a7bf1da5f8.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/37f29d5b-d803-4195-9ce0-75e45e32c160.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/43546f48-8c46-4481-b1e5-f4b1ad2535be.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/ec81e0ff-9cb4-4d43-9f78-1d5f4edc9103.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/9290c86f-40b0-4520-b8aa-3460de62c396.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/a4bf576e-9556-4956-8dcb-4d8906d45db0.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/320a5c00-3307-4bc3-9f47-9befb88e461c.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/844d1556-6bc6-467e-a145-f92646770727.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/78923f4b-c2e7-4472-8398-10a0a8453ec5.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-7b/17abe1bf-2e97-409e-88e3-4f661861a195.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/756978e5-1dfe-433e-ba88-339004a50ea7.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/a889ae3a-5d86-4454-bfb9-332c4b61b836.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/2c5e1086-03b7-4cdd-801e-03fb26183076.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/d9578847-b732-4c75-b246-9cdf03674fe0.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/4c6f83fe-7896-4cf3-9434-b5f8d499f5ba.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/619037af-d528-4579-b7e3-58628468d8fb.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/5113b737-8d9f-4321-9a67-91f1aabb40a1.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/641ac372-2e5a-4b44-b22e-a17600a6a868.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/7cbb0b08-871d-48fc-bf3e-86267f5ef19d.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/r1o-et/c82e887c-c8ab-4221-aa0b-e8b7a86e7c46.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/50c65a83-9d08-4155-ad2c-5a2f8ffc8743.json create mode 100644 data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/99d97aef-bb6b-471b-8ed7-f6f92f75842c.json delete mode 100644 data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json delete mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json create mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/b98504a0-f1d6-4872-b748-2ca8199c5328.json create mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/5a159667-7460-4a97-884e-6a96df59873b.json delete mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json create mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/16a2eceb-073d-4dc3-87a7-a15c641c5ebb.json delete mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json delete mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json create mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/e8e2d04b-21db-43dc-8b8f-7fa3bba87abc.json delete mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json create mode 100644 data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/acbb93b3-f8fc-479d-9610-392efd7d4ecc.json create mode 100644 data/hfopenllm_v2/mattshumer/Reflection-Llama-3.1-70B/6d0589bd-1f05-44ee-afa5-3657b960d7c9.json create mode 100644 data/hfopenllm_v2/mattshumer/ref_70_e3/134663d8-05a8-4336-90e2-68e7cba5f1df.json delete mode 100644 data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json create mode 100644 data/hfopenllm_v2/maywell/Qwen2-7B-Multilingual-RP/3bfced28-b06e-46ab-a6aa-171b0c424337.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.1-MedIT-SUN-8B/b6a83b82-6b05-4437-a076-e2a3982f6169.json delete mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f621201b-f571-4487-9f1e-b767675c659d.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/710fdb79-fba4-42da-8e26-45b4caf75207.json delete mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/35fa7a5e-8866-4ce3-9899-8737e908f34f.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/2b24b69b-15dc-4666-83f3-c77db545bdbd.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/0d00d849-2147-4fc1-9e5f-d42a95be6ca5.json delete mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/f45135b0-3c26-44b5-9922-a6c0817a172d.json create mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/67eb0d6c-9086-4c80-8506-c3e1489f2673.json delete mode 100644 data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json create mode 100644 data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/79d3dc85-08f6-475c-ac2c-1ff32f5a089f.json delete mode 100644 data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json create mode 100644 data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/4e9b3fa2-d3d2-4e4c-a1fa-c812f481f64a.json delete mode 100644 data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json create mode 100644 data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/6e62a8a0-0bdf-4b6c-93de-593423dadd3a.json delete mode 100644 data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json create mode 100644 data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/871131c1-295d-40a0-a396-09d24b880064.json delete mode 100644 data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json create mode 100644 data/hfopenllm_v2/meetkai/functionary-small-v3.1/44eefbb2-22d4-4dff-889d-a87fc40b2eea.json delete mode 100644 data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json create mode 100644 data/hfopenllm_v2/meraGPT/mera-mix-4x7B/cd1de470-a174-4c08-9efe-a06d493dc4b2.json delete mode 100644 data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json create mode 100644 data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/fdb55a14-0697-4775-8358-fed202498b4f.json create mode 100644 data/hfopenllm_v2/mergekit-community/SuperQwen-2.5-1.5B/c069a224-638a-4cad-a9ad-e4f8579e8c15.json create mode 100644 data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/10e5c103-f25f-45bb-bfe6-a22876cffe87.json delete mode 100644 data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json create mode 100644 data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/a9ecca9a-c5d4-45b2-a403-e74a98a46322.json delete mode 100644 data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json create mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/630d8a60-03b7-4550-82f4-e879b2e01c6c.json delete mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json create mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/206b5a96-ae07-41fd-822f-436d49c57dcb.json delete mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json delete mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json create mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/702d2120-5301-4e03-bb0f-1f8ab19e522a.json delete mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json create mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/61e39700-c237-49fc-baef-3fa573b3b0c6.json create mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/8892ab84-750d-494f-9f87-ad28e73cf364.json delete mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json create mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/538a2eb7-34e4-4e78-a382-60a13710096e.json delete mode 100644 data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json delete mode 100644 data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json create mode 100644 data/hfopenllm_v2/mergekit-community/sexeh_time_testing/a041629e-8ed8-4a6c-95ee-98e759501e19.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/09f05984-5815-4b3d-bc73-83ea1e5ecc27.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-13b-hf/6535524e-f8cf-4f2f-9d89-9ba70aedac91.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/08ea4f9d-0e3c-4a8b-85e6-075290d30ba4.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-70b-hf/631f0a1f-a6f5-46f6-9aa0-31ac9764c086.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/b771f6db-7516-4423-9010-3467db0e26e3.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-2-7b-hf/cf580dfb-2924-4c4b-9352-394275b959bd.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/ba549fe6-7718-4abf-a610-7e0f48611483.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.1-70B/b92440b1-78a9-4288-a432-f057f2b04a2f.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/838f3932-edf2-4f72-9238-981d1aadc771.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.1-8B/61e933b2-5cd1-4f08-8a9e-5b06ef54b6d5.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/0b307c78-94c7-418f-bc47-5106b81c30de.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.2-1B/18783694-3e7b-4d06-9378-5a3fa4a7a0a2.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/dab922e5-1b46-4a90-b75c-1b26cd6cc6d3.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.2-3B/8cfa1f00-3b26-4d75-9b0a-0dea65e2e352.json delete mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json create mode 100644 data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/f74d26e6-9dfb-4e81-8522-8309b27760cf.json create mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/2022bcf3-a057-4b0a-aa33-6cf074ffc714.json delete mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json create mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B/a6e79d12-42f6-47ad-95fa-ba03fa4d3a06.json delete mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json create mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/24d850fe-1817-4041-8767-085f4bd2bac3.json create mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/610a3be1-1032-4079-ba37-d6c2c5f9fd55.json delete mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json create mode 100644 data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B/857bb10e-1b43-4714-a758-0cef5816ba02.json delete mode 100644 data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json delete mode 100644 data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json delete mode 100644 data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json delete mode 100644 data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json delete mode 100644 data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json delete mode 100644 data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json delete mode 100644 data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json delete mode 100644 data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json delete mode 100644 data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json delete mode 100644 data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json delete mode 100644 data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json delete mode 100644 data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json delete mode 100644 data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json delete mode 100644 data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json delete mode 100644 data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json delete mode 100644 data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json delete mode 100644 data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json delete mode 100644 data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json delete mode 100644 data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json delete mode 100644 data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json delete mode 100644 data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json delete mode 100644 data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json delete mode 100644 data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json delete mode 100644 data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json delete mode 100644 data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json delete mode 100644 data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json delete mode 100644 data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json delete mode 100644 data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json delete mode 100644 data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json delete mode 100644 data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json delete mode 100644 data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json delete mode 100644 data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json delete mode 100644 data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json delete mode 100644 data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json delete mode 100644 data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json delete mode 100644 data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json delete mode 100644 data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json delete mode 100644 data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json delete mode 100644 data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json delete mode 100644 data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json delete mode 100644 data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json delete mode 100644 data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json delete mode 100644 data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json delete mode 100644 data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json delete mode 100644 data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json delete mode 100644 data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json delete mode 100644 data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json delete mode 100644 data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json delete mode 100644 data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json delete mode 100644 data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json delete mode 100644 data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json delete mode 100644 data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json delete mode 100644 data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json delete mode 100644 data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json delete mode 100644 data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json delete mode 100644 data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json delete mode 100644 data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json delete mode 100644 data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json delete mode 100644 data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json delete mode 100644 data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json delete mode 100644 data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json delete mode 100644 data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json delete mode 100644 data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json delete mode 100644 data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json delete mode 100644 data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json delete mode 100644 data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json delete mode 100644 data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json delete mode 100644 data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json delete mode 100644 data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json delete mode 100644 data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json delete mode 100644 data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json delete mode 100644 data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json delete mode 100644 data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json delete mode 100644 data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json delete mode 100644 data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json delete mode 100644 data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json delete mode 100644 data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json delete mode 100644 data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json delete mode 100644 data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json delete mode 100644 data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json delete mode 100644 data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json delete mode 100644 data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json delete mode 100644 data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json delete mode 100644 data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json delete mode 100644 data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json delete mode 100644 data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json delete mode 100644 data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json delete mode 100644 data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json delete mode 100644 data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json delete mode 100644 data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json delete mode 100644 data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json delete mode 100644 data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json delete mode 100644 data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json delete mode 100644 data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json delete mode 100644 data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json delete mode 100644 data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json delete mode 100644 data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json delete mode 100644 data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json delete mode 100644 data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json delete mode 100644 data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json delete mode 100644 data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json delete mode 100644 data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json delete mode 100644 data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json delete mode 100644 data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json delete mode 100644 data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json delete mode 100644 data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json delete mode 100644 data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json delete mode 100644 data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json delete mode 100644 data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json delete mode 100644 data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json delete mode 100644 data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json delete mode 100644 data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json delete mode 100644 data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json delete mode 100644 data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json delete mode 100644 data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json delete mode 100644 data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json delete mode 100644 data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json delete mode 100644 data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json delete mode 100644 data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json delete mode 100644 data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json delete mode 100644 data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json delete mode 100644 data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json delete mode 100644 data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json delete mode 100644 data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json delete mode 100644 data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json delete mode 100644 data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json delete mode 100644 data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json delete mode 100644 data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json delete mode 100644 data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json delete mode 100644 data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json delete mode 100644 data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json delete mode 100644 data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json delete mode 100644 data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json delete mode 100644 data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json delete mode 100644 data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json delete mode 100644 data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json delete mode 100644 data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json delete mode 100644 data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json delete mode 100644 data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json delete mode 100644 data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json delete mode 100644 data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json delete mode 100644 data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json delete mode 100644 data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json delete mode 100644 data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json delete mode 100644 data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json delete mode 100644 data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json delete mode 100644 data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json delete mode 100644 data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json delete mode 100644 data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json delete mode 100644 data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json delete mode 100644 data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json delete mode 100644 data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json delete mode 100644 data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json delete mode 100644 data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json delete mode 100644 data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json delete mode 100644 data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json delete mode 100644 data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json delete mode 100644 data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json delete mode 100644 data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json delete mode 100644 data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json delete mode 100644 data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json delete mode 100644 data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json delete mode 100644 data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json delete mode 100644 data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json delete mode 100644 data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json delete mode 100644 data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json delete mode 100644 data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json delete mode 100644 data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json delete mode 100644 data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json delete mode 100644 data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json delete mode 100644 data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json delete mode 100644 data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json delete mode 100644 data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json delete mode 100644 data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json delete mode 100644 data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json delete mode 100644 data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json delete mode 100644 data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json delete mode 100644 data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json delete mode 100644 data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json delete mode 100644 data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json delete mode 100644 data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json delete mode 100644 data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json delete mode 100644 data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json delete mode 100644 data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json delete mode 100644 data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json delete mode 100644 data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json delete mode 100644 data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json delete mode 100644 data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json delete mode 100644 data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json delete mode 100644 data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json delete mode 100644 data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json delete mode 100644 data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json delete mode 100644 data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json delete mode 100644 data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json delete mode 100644 data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json delete mode 100644 data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json delete mode 100644 data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json delete mode 100644 data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json delete mode 100644 data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json delete mode 100644 data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json delete mode 100644 data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json delete mode 100644 data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json delete mode 100644 data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json delete mode 100644 data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json delete mode 100644 data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json delete mode 100644 data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json delete mode 100644 data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json delete mode 100644 data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json delete mode 100644 data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json delete mode 100644 data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json delete mode 100644 data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json delete mode 100644 data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json delete mode 100644 data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json delete mode 100644 data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json delete mode 100644 data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json delete mode 100644 data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json delete mode 100644 data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json delete mode 100644 data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json delete mode 100644 data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json delete mode 100644 data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json delete mode 100644 data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json delete mode 100644 data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json delete mode 100644 data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json delete mode 100644 data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json delete mode 100644 data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json delete mode 100644 data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json delete mode 100644 data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json delete mode 100644 data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json delete mode 100644 data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json delete mode 100644 data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json delete mode 100644 data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json delete mode 100644 data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json delete mode 100644 data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json delete mode 100644 data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json delete mode 100644 data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json delete mode 100644 data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json delete mode 100644 data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json delete mode 100644 data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json delete mode 100644 data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json delete mode 100644 data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json delete mode 100644 data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json delete mode 100644 data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json delete mode 100644 data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json delete mode 100644 data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json delete mode 100644 data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json delete mode 100644 data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json delete mode 100644 data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json delete mode 100644 data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json delete mode 100644 data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json delete mode 100644 data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json delete mode 100644 data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json delete mode 100644 data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json delete mode 100644 data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json delete mode 100644 data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json delete mode 100644 data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json delete mode 100644 data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json delete mode 100644 data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json delete mode 100644 data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json delete mode 100644 data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json delete mode 100644 data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json delete mode 100644 data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json delete mode 100644 data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json delete mode 100644 data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json delete mode 100644 data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json delete mode 100644 data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json delete mode 100644 data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json delete mode 100644 data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json delete mode 100644 data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json delete mode 100644 data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json delete mode 100644 data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json delete mode 100644 data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json delete mode 100644 data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json delete mode 100644 data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json delete mode 100644 data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json delete mode 100644 data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json delete mode 100644 data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json delete mode 100644 data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json delete mode 100644 data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json delete mode 100644 data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json delete mode 100644 data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json delete mode 100644 data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json delete mode 100644 data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json delete mode 100644 data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json delete mode 100644 data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json delete mode 100644 data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json delete mode 100644 data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json delete mode 100644 data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json delete mode 100644 data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json delete mode 100644 data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json delete mode 100644 data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json delete mode 100644 data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json delete mode 100644 data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json delete mode 100644 data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json delete mode 100644 data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json delete mode 100644 data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json delete mode 100644 data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json delete mode 100644 data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json delete mode 100644 data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json delete mode 100644 data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json delete mode 100644 data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json delete mode 100644 data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json delete mode 100644 data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json delete mode 100644 data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json delete mode 100644 data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json delete mode 100644 data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json delete mode 100644 data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json delete mode 100644 data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json delete mode 100644 data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json delete mode 100644 data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json delete mode 100644 data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json delete mode 100644 data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json delete mode 100644 data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json delete mode 100644 data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json delete mode 100644 data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json delete mode 100644 data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json delete mode 100644 data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json delete mode 100644 data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json delete mode 100644 data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json delete mode 100644 data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json delete mode 100644 data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json delete mode 100644 data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json delete mode 100644 data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json delete mode 100644 data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json delete mode 100644 data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json delete mode 100644 data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json delete mode 100644 data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json delete mode 100644 data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json delete mode 100644 data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json delete mode 100644 data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json delete mode 100644 data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json delete mode 100644 data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json delete mode 100644 data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json delete mode 100644 data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json delete mode 100644 data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json delete mode 100644 data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json delete mode 100644 data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json delete mode 100644 data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json delete mode 100644 data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json delete mode 100644 data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json delete mode 100644 data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json delete mode 100644 data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json delete mode 100644 data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json delete mode 100644 data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json delete mode 100644 data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json delete mode 100644 data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json delete mode 100644 data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json delete mode 100644 data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json delete mode 100644 data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json delete mode 100644 data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json delete mode 100644 data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json delete mode 100644 data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json delete mode 100644 data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json delete mode 100644 data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json delete mode 100644 data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json delete mode 100644 data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json create mode 100644 data/hfopenllm_v2/mhl1/Qwen2.5-0.5B-cinstruct-stage1/cdabdd54-6101-471c-9bd8-446953be986b.json delete mode 100644 data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json delete mode 100644 data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json delete mode 100644 data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json delete mode 100644 data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json delete mode 100644 data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json delete mode 100644 data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json delete mode 100644 data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json delete mode 100644 data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json delete mode 100644 data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json delete mode 100644 data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json create mode 100644 data/hfopenllm_v2/microsoft/DialoGPT-medium/8029cb75-8d3b-411d-b0eb-74539b8ecb2f.json delete mode 100644 data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json delete mode 100644 data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json delete mode 100644 data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json delete mode 100644 data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json delete mode 100644 data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json delete mode 100644 data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json delete mode 100644 data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json delete mode 100644 data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json delete mode 100644 data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json delete mode 100644 data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json delete mode 100644 data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json delete mode 100644 data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json delete mode 100644 data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json delete mode 100644 data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json delete mode 100644 data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json delete mode 100644 data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json delete mode 100644 data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json create mode 100644 data/hfopenllm_v2/microsoft/Orca-2-13b/65d10996-2c5b-4e11-9a07-319c2446a237.json delete mode 100644 data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json create mode 100644 data/hfopenllm_v2/microsoft/Orca-2-7b/ef21d739-b122-4ab8-a8ff-a7cfecad5c8e.json delete mode 100644 data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/45f3b963-497b-4d89-ac66-9ff0ba8dadf8.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/4173435b-d907-4ac5-a8bd-dfa2759f3fb6.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/b4a79f30-3a04-4f78-861e-1571316a0642.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/53426038-df38-45ba-b621-34231c9cad7f.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/fa758fe5-21ec-45cc-941f-5cb5ca0612b1.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/d2a92a62-3bd0-4cb2-897b-742ea0d5203f.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/8b752519-63d4-4638-b56e-1c45c7f4694e.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/8da71b7c-7b73-453f-998b-84e70b54e471.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/2b7b1216-3ea7-48f1-89f6-e5d84fef2b32.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json delete mode 100644 data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json create mode 100644 data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/37e19712-3197-42da-a8f2-ae1f36c2b06c.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json delete mode 100644 data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json delete mode 100644 data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json delete mode 100644 data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json delete mode 100644 data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json delete mode 100644 data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json delete mode 100644 data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json delete mode 100644 data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json delete mode 100644 data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json delete mode 100644 data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json delete mode 100644 data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json delete mode 100644 data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json delete mode 100644 data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json delete mode 100644 data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json delete mode 100644 data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json delete mode 100644 data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json delete mode 100644 data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json delete mode 100644 data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json delete mode 100644 data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json delete mode 100644 data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json delete mode 100644 data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json delete mode 100644 data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json delete mode 100644 data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json delete mode 100644 data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json delete mode 100644 data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json delete mode 100644 data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json delete mode 100644 data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json delete mode 100644 data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json delete mode 100644 data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json delete mode 100644 data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json delete mode 100644 data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json delete mode 100644 data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json delete mode 100644 data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json delete mode 100644 data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json delete mode 100644 data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json delete mode 100644 data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json delete mode 100644 data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json delete mode 100644 data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json delete mode 100644 data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json delete mode 100644 data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json delete mode 100644 data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json delete mode 100644 data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json create mode 100644 data/hfopenllm_v2/microsoft/phi-1/c6ae6691-64ec-443d-8d76-af614c8cc7f9.json create mode 100644 data/hfopenllm_v2/microsoft/phi-1_5/80567722-8c6b-41b9-8103-3bdaedfdb8ee.json create mode 100644 data/hfopenllm_v2/microsoft/phi-2/20192dc4-ea3a-4413-8457-18a592fa0c64.json create mode 100644 data/hfopenllm_v2/microsoft/phi-4/8c878c05-86f7-4d61-81d7-9bb286516581.json create mode 100644 data/hfopenllm_v2/microsoft/phi-4/fa753be0-4a98-4ec3-9cc9-3bf7b380ad17.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json delete mode 100644 data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json delete mode 100644 data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json delete mode 100644 data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json delete mode 100644 data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json delete mode 100644 data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json delete mode 100644 data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json delete mode 100644 data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json delete mode 100644 data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json delete mode 100644 data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json create mode 100644 data/hfopenllm_v2/migtissera/Llama-3-70B-Synthia-v3.5/0516b46b-a957-413f-aadc-58f4339dc60a.json create mode 100644 data/hfopenllm_v2/migtissera/Llama-3-8B-Synthia-v3.5/97200dd7-7ed0-4a7b-ace9-31c173f017f1.json create mode 100644 data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/758f8332-ffa8-4059-ac6f-400f9367bb23.json delete mode 100644 data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json create mode 100644 data/hfopenllm_v2/migtissera/Tess-3-Mistral-Nemo-12B/b1103662-055c-471e-ace8-dd75f607491d.json create mode 100644 data/hfopenllm_v2/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/27b0d675-498f-4351-b92f-7c0d1a3c83bd.json create mode 100644 data/hfopenllm_v2/migtissera/Tess-v2.5.2-Qwen2-72B/3f1f88d4-2908-4f28-b8d3-4f9ded18ba0e.json create mode 100644 data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/3883b0d3-e442-42d3-adc6-ed959c902dd3.json delete mode 100644 data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json delete mode 100644 data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json create mode 100644 data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/da172cdb-1388-42f5-97b1-ae8e15291631.json create mode 100644 data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/7c94dbfa-4b3a-43fd-9f2c-b3d63d8ef700.json delete mode 100644 data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json create mode 100644 data/hfopenllm_v2/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/7cdd1de0-767d-4527-a024-c67166bb8b20.json create mode 100644 data/hfopenllm_v2/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/d4702278-54c4-42e8-a901-dfe5c7f2004a.json create mode 100644 data/hfopenllm_v2/ministral/Ministral-3b-instruct/149f8ee5-4376-4fcc-8f87-7412a3083570.json delete mode 100644 data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json create mode 100644 data/hfopenllm_v2/mistral-community/Mistral-7B-v0.2/de82b746-c5d7-450a-bc2b-1b2859d91d6b.json create mode 100644 data/hfopenllm_v2/mistral-community/Mixtral-8x22B-v0.1/d2a916a6-288a-4761-a3fd-ca674edb67c1.json create mode 100644 data/hfopenllm_v2/mistral-community/mixtral-8x22B-v0.3/cda497f9-c7f9-48d6-944b-0167476e5e5c.json delete mode 100644 data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json delete mode 100644 data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json delete mode 100644 data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json delete mode 100644 data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json delete mode 100644 data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json delete mode 100644 data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json delete mode 100644 data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json delete mode 100644 data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json delete mode 100644 data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json delete mode 100644 data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json delete mode 100644 data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json delete mode 100644 data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json delete mode 100644 data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json delete mode 100644 data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json delete mode 100644 data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json delete mode 100644 data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json delete mode 100644 data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json delete mode 100644 data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json delete mode 100644 data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json delete mode 100644 data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json delete mode 100644 data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json delete mode 100644 data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json delete mode 100644 data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json delete mode 100644 data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json delete mode 100644 data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json delete mode 100644 data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json delete mode 100644 data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json delete mode 100644 data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json delete mode 100644 data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json delete mode 100644 data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json delete mode 100644 data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json delete mode 100644 data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json delete mode 100644 data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json delete mode 100644 data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json delete mode 100644 data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json delete mode 100644 data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json delete mode 100644 data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json delete mode 100644 data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json delete mode 100644 data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json delete mode 100644 data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json delete mode 100644 data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json delete mode 100644 data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json delete mode 100644 data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json delete mode 100644 data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json delete mode 100644 data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json delete mode 100644 data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json delete mode 100644 data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json delete mode 100644 data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json delete mode 100644 data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json delete mode 100644 data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json delete mode 100644 data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json delete mode 100644 data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json delete mode 100644 data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json delete mode 100644 data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json delete mode 100644 data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json delete mode 100644 data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json delete mode 100644 data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json delete mode 100644 data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json delete mode 100644 data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json delete mode 100644 data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json delete mode 100644 data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json delete mode 100644 data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json delete mode 100644 data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json delete mode 100644 data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json delete mode 100644 data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json delete mode 100644 data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json delete mode 100644 data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json delete mode 100644 data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json delete mode 100644 data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json delete mode 100644 data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json delete mode 100644 data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json delete mode 100644 data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json delete mode 100644 data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json delete mode 100644 data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json delete mode 100644 data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json delete mode 100644 data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json delete mode 100644 data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json delete mode 100644 data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json delete mode 100644 data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json delete mode 100644 data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json delete mode 100644 data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json delete mode 100644 data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json create mode 100644 data/hfopenllm_v2/mistralai/Codestral-22B-v0.1/b56c6c01-a226-4090-9332-330535d79e24.json create mode 100644 data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/0ddc8e10-9cc5-48eb-b5b0-a2c2f071862b.json delete mode 100644 data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/2917c469-7e22-497e-8d62-9b9972266658.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/2424d85c-e092-4e7c-bf4f-ae014d08a159.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/90278363-1d8f-47ca-a7dc-c51c6b511dc9.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-v0.1/3c3197ee-675d-4bb7-874d-28104d2a3cae.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-7B-v0.3/eb5a8679-bfdd-40f2-9a32-55c04a65ae7e.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/d770f88d-b110-4f27-85e9-e52217c11798.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-Nemo-Base-2407/364328ce-5de7-401f-ad84-0c76e3c1dc91.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/f7dcfdbb-ff12-4692-9702-712de3d0b7ba.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-Small-24B-Base-2501/d641aa88-9981-4a25-90d5-fcc4564ede52.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/8915e742-df2e-41bc-b83f-3e111edfd257.json delete mode 100644 data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json create mode 100644 data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/e29a5e35-8677-4e53-83fd-85e919b4366a.json create mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/e5c55d38-dc04-42b4-9aca-ae7be436ebe0.json delete mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json create mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x22B-v0.1/504baceb-6684-430d-a532-b7b5b0b061fe.json delete mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json create mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/31fcd34a-af1e-4eab-bd9a-5ec17eb572d2.json create mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/01ab0a3e-393a-497a-9b32-8af790b7581a.json create mode 100644 data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/541967a6-b856-4dc9-958a-9335197fba99.json delete mode 100644 data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json create mode 100644 data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/ee31c801-67cb-46a3-9e39-02e842c0473f.json create mode 100644 data/hfopenllm_v2/mkurman/llama-3.2-MEDIT-3B-o1/65fabe8b-05af-461e-b804-fcff3492da34.json create mode 100644 data/hfopenllm_v2/mkurman/phi-4-MedIT-11B-exp-1/7e1a7121-2c9f-4196-bbdd-48aea257f384.json create mode 100644 data/hfopenllm_v2/mkurman/phi4-MedIT-10B-o1/dd32609c-316e-4511-8791-fcae33a1a506.json delete mode 100644 data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json create mode 100644 data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/d95d7058-49eb-47d7-b790-3a253291d22b.json create mode 100644 data/hfopenllm_v2/mkxu/llama-3-8b-po1/37cbc3d6-1198-4e23-b86c-1fd979eacd9a.json create mode 100644 data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/76d0d338-e502-4638-adad-c4c4df00c26f.json delete mode 100644 data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json delete mode 100644 data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json create mode 100644 data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/f47375bd-547a-4d0b-8c96-bbe2bc1ac445.json create mode 100644 data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/6b1ed68c-3099-4bd7-892b-cdc36c90ccfe.json delete mode 100644 data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json create mode 100644 data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/0e59c8ca-cde0-4482-ab03-3309bcb8737c.json delete mode 100644 data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json create mode 100644 data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v2/d7e900e2-0574-44cd-a68a-0dd2715cf48c.json create mode 100644 data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v3/fd626c3f-566d-4193-9a85-e7c9a89e671c.json create mode 100644 data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/196b04ae-fd53-400f-9f08-19edd4959f6e.json delete mode 100644 data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json delete mode 100644 data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json create mode 100644 data/hfopenllm_v2/mlabonne/Daredevil-8B/57177299-076a-4506-89a7-ce54af08df4f.json create mode 100644 data/hfopenllm_v2/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/d3bdf36f-7f89-4b5a-b6cb-847b49200b5b.json delete mode 100644 data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json create mode 100644 data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/92619b9e-dacf-4d0a-9f8b-6e131af74fa4.json delete mode 100644 data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json create mode 100644 data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/cbb408ea-ced6-4f47-9066-d4ff6d604b1e.json delete mode 100644 data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json create mode 100644 data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/6999bb02-29fd-4c59-886f-184362afa06e.json create mode 100644 data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/913d1d8e-0b02-4ce5-9b7c-403143a8c880.json delete mode 100644 data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json create mode 100644 data/hfopenllm_v2/mlabonne/OrpoLlama-3-8B/82c87bc0-29cf-4150-92f5-c80fb0028ea6.json create mode 100644 data/hfopenllm_v2/mlabonne/phixtral-2x2_8/a18834ad-6143-4ce2-9842-471817a60a39.json delete mode 100644 data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json create mode 100644 data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/be900bcf-8ec9-484f-81db-0e83975c1ecd.json create mode 100644 data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d226ccf6-674b-44c6-8b11-d782b59a961a.json delete mode 100644 data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json create mode 100644 data/hfopenllm_v2/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/d8839a1a-8d07-4e0b-bd44-2668c84f750c.json create mode 100644 data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/e90b04db-2eb3-483a-ab0e-ea8aef821d84.json create mode 100644 data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/900921ae-fbb2-4488-ab19-18987c1d008d.json create mode 100644 data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/0da0a7cd-c075-4bc0-8e88-8acc7212e5c3.json delete mode 100644 data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json create mode 100644 data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/b50a49cd-2909-4dbe-9c9f-c150abb99845.json delete mode 100644 data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json create mode 100644 data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/13831d81-a9dd-43c7-bce1-240aad42fbc6.json delete mode 100644 data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json create mode 100644 data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/56ea7cb3-3a1e-477a-bac8-26a0fde6297a.json delete mode 100644 data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json create mode 100644 data/hfopenllm_v2/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/8ce19b33-4f2b-4b8d-80bd-1ed399a5e9dd.json create mode 100644 data/hfopenllm_v2/mosaicml/mpt-7b/18ab167d-b72e-4fa9-94a8-09edc641c73f.json delete mode 100644 data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json create mode 100644 data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/7df237ea-29c0-4d0a-9092-c41df4c13aca.json delete mode 100644 data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json delete mode 100644 data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json create mode 100644 data/hfopenllm_v2/mrdayl/OpenCogito/e5dc8caa-2d86-4ff0-af8d-22d85c8faeb0.json create mode 100644 data/hfopenllm_v2/mrdayl/OpenCognito-r1/01591bb6-9daf-40fb-b802-0a007f4cc388.json delete mode 100644 data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json delete mode 100644 data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json create mode 100644 data/hfopenllm_v2/mrdayl/OpenCognito-r2/f6c32abf-bbae-4827-9ce2-29ce20c9463e.json delete mode 100644 data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json create mode 100644 data/hfopenllm_v2/mrdayl/OpenCognito/74a6605d-3557-4458-bef5-cc9420434e68.json delete mode 100644 data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json create mode 100644 data/hfopenllm_v2/mrdayl/OpenThink/dbe6e126-d35c-4634-a544-adf374ed5d00.json create mode 100644 data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-gsm8k-3e/d68681c1-01e4-4af0-9a81-e0aaed0ae865.json create mode 100644 data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-limo/de9620b8-7112-436f-8941-fae2c5e7f9e0.json create mode 100644 data/hfopenllm_v2/mukaj/Llama-3.1-Hawkish-8B/cafee7ac-deb6-4c4b-af8f-81548648cb14.json create mode 100644 data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/3e3cb617-6f19-4731-b31a-b1f4d88237d5.json delete mode 100644 data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json create mode 100644 data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/3c2c2c14-d065-4d6c-8c98-44ba8f2ca461.json delete mode 100644 data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json create mode 100644 data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/8909f916-401b-4457-ab8f-2691696049c6.json create mode 100644 data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/ae191508-7dad-4cac-ad4a-af95d7a15b5d.json create mode 100644 data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish/507f5047-fac3-415f-b9fa-aae4311fa837.json create mode 100644 data/hfopenllm_v2/nbeerbower/BigKartoffel-mistral-nemo-20B/0ee8716c-74f0-41b4-94a2-efc715150293.json create mode 100644 data/hfopenllm_v2/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/fcf491f4-cf57-4c95-9de1-4702ab5d54c7.json create mode 100644 data/hfopenllm_v2/nbeerbower/DoublePotato-Mistral-Nemo-13B/4fd20259-c7c7-4da5-9013-ae2feb2175b1.json create mode 100644 data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-1.5B/a7c8c345-cade-48fd-93c0-0f344044d2b5.json create mode 100644 data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-14B/7a8e3986-7688-4a26-a74c-a9bb47cd3e8d.json create mode 100644 data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/7a2ffb4d-1135-42a1-b28b-3b4e4d014979.json create mode 100644 data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/25468720-93d7-4f10-a534-30c4976657e8.json create mode 100644 data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/5ba1d617-9d9a-4c3b-b9cc-3224ace129b3.json create mode 100644 data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/27b2b46f-1323-4ddd-9f65-d8fcd9cd6508.json create mode 100644 data/hfopenllm_v2/nbeerbower/Flammades-Mistral-Nemo-12B/65917125-bb7c-4d64-ba5f-b5e4f67ec332.json create mode 100644 data/hfopenllm_v2/nbeerbower/Gemma2-Gutenberg-Doppel-9B/30bf22d8-b93a-4775-8073-30e14e15e35d.json create mode 100644 data/hfopenllm_v2/nbeerbower/Gutensuppe-mistral-nemo-12B/ff510365-a13d-4e44-9709-59a56e864991.json create mode 100644 data/hfopenllm_v2/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/6d1eebc4-228b-43f3-b31c-3d5b1591ae2d.json delete mode 100644 data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json create mode 100644 data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/f1e8cdbb-14b7-4959-a053-fb1b37629aff.json create mode 100644 data/hfopenllm_v2/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/4145d1a0-8d6a-4d64-8a45-a89cf343ac46.json create mode 100644 data/hfopenllm_v2/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/d6966190-e254-4902-8472-cac59bfbdbe0.json create mode 100644 data/hfopenllm_v2/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5fdb5437-f413-451d-9800-42036cda7686.json delete mode 100644 data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json create mode 100644 data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/347577a4-2768-4472-ba48-9b174ad89724.json create mode 100644 data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/33af440e-837d-4454-9340-af0d3ee74f77.json delete mode 100644 data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/1a1f4709-8d05-4905-8105-0c3606d5ef5b.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/28421948-089b-4487-bb71-a06e5ce74402.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/3fa0c783-9226-4fc8-b3a0-6e960684f43d.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/743b7fe2-f998-408c-98b1-af02d9c1ee2a.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/0039c88b-a881-4ce0-9a0a-a10f1a8cbc70.json delete mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v2/87c7fbd9-7648-4d0d-ac9e-8ba85860e335.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v7/6ca3ab87-c05e-46b5-879d-4fc8bf75417b.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B/525f1b9f-88a2-459d-bb4a-7c01a0107968.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Small-Drummer-22B/503f79be-7f05-4464-ac9f-0f284f1e7965.json create mode 100644 data/hfopenllm_v2/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/86ec7d95-6f6d-4ca6-97d5-7a910f42a06d.json delete mode 100644 data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json create mode 100644 data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/d472ba79-6592-4f8a-a99c-ec3f71468d3e.json delete mode 100644 data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json create mode 100644 data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/6ddc052c-6bda-4d8e-ad97-20d881c8cfb7.json create mode 100644 data/hfopenllm_v2/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/76d1aed8-80fe-4b4f-bd81-ea0d6bf085c4.json delete mode 100644 data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json create mode 100644 data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/d2845d6e-65dd-4448-901d-d554b3e741f3.json create mode 100644 data/hfopenllm_v2/nbeerbower/Stella-mistral-nemo-12B-v2/f7dd203f-24d8-4875-878a-12ed99e20cd3.json create mode 100644 data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-27B/287ae246-bee5-4fae-b78f-203491aa8df2.json create mode 100644 data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-9B/9ee493f7-e031-4593-beae-65be17678e00.json create mode 100644 data/hfopenllm_v2/nbeerbower/llama-3-gutenberg-8B/86b10c6f-41c6-4d0a-ae59-f90e204e466c.json create mode 100644 data/hfopenllm_v2/nbeerbower/llama3.1-cc-8B/043e3533-7d5c-4d45-bcd8-0dbcc8ca4819.json create mode 100644 data/hfopenllm_v2/nbeerbower/llama3.1-kartoffeldes-70B/1b3269fb-4b16-42b6-80c0-3d54bc2b4fed.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades-12B/ee625c29-62c4-49da-9790-e7e67233157d.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades3-12B/02b16bf2-62bb-401e-9726-2135d8d610be.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-cc-12B/db10c6f9-2962-46cc-aa4e-4c99c4b494d1.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-gutades-12B/aa37bda0-2e0a-4361-a5b4-468154d8ac72.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v2/d9a6565c-5a0b-4893-b6e0-1fc52ec55bf5.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v3/becf9805-83a9-4137-a938-81a61a10e4f0.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v4/6e848120-bc31-4628-af05-30707a6dcc41.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B/864af855-71b0-4b11-ae3f-56294a7d0db9.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg2-12B-test/285bd390-1dd9-4db2-af45-68dea557da3c.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-kartoffel-12B/459e2375-1a15-4129-bee0-dc8852d531e2.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-narwhal-12B/7b4c7d92-f581-4057-bec9-e3a8c6a5386e.json delete mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json create mode 100644 data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/7ceab841-f9a3-455b-9314-243d8fc3cd11.json delete mode 100644 data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json create mode 100644 data/hfopenllm_v2/nbrahme/IndusQ/c1e2fb45-22d8-4eb4-8971-ce89c3048b9e.json create mode 100644 data/hfopenllm_v2/necva/IE-cont-Llama3.1-8B/68cb2ca1-1648-41a2-92b7-969bccdca4ee.json create mode 100644 data/hfopenllm_v2/necva/replica-IEPile/5f285d61-5e4b-4c5c-8960-c10313d76ae3.json delete mode 100644 data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/3af19898-8590-4aec-b324-46c7fbf596d3.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/e8472266-6d03-439f-bd6b-e3ac5ef2cf09.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/3f578b45-48f9-4022-991c-32a71706aba3.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/ef8c22a7-3898-422e-88e2-1a8c14ab5bf2.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/81630ea2-d496-4872-92b7-e476badaf50d.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/9436d04a-9c81-47ad-a7b8-496e14058627.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/f1e6e54e-cb97-4980-8957-2190ee5c4c34.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/30914dd3-c857-4aaf-b6b9-d1c7e4917e89.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/1c389a32-68b3-47c0-a6b8-2c2291293002.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json delete mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json create mode 100644 data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/e759a217-6571-446d-9bf9-d1512793f307.json create mode 100644 data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/753f3b21-7365-4117-b2a0-a91f03ec3d39.json delete mode 100644 data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json create mode 100644 data/hfopenllm_v2/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/297ef102-67c1-4e9c-b418-fed026bb1f8a.json delete mode 100644 data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json create mode 100644 data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/9fbf73d7-7d67-4d6c-a5b9-efc627cd1b2b.json create mode 100644 data/hfopenllm_v2/netcat420/Llama3.1-MFANN-8b/b1446577-f13f-434a-a0b4-916091395d4a.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/fc8946aa-8b04-482c-8c05-d026d2af07be.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/fabe3784-948c-4618-9cf0-c76a3ddd3820.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/736dcf09-6a19-4e88-a790-7a7ee74d8717.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/75b4c750-1570-4825-a04a-965c06861fd4.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/b7f8b678-2aea-4d41-ba21-2083fc472574.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/a8010630-58de-448c-af08-70b8ffec431b.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-SFT/4a0c2ce5-a4b4-4d35-b65d-bbc6e36a649b.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-abliterated-phi2-merge-unretrained/1132251a-59c7-402e-9957-f9288864508f.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-llama3.1-Abliterated-SLERP/e2fac049-8f9f-4b71-bcd3-5746b7d90150.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/d891a1e1-ad65-498f-9ee8-59523c1bfd19.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/9dd3103f-6c4f-4077-ac27-3a9b0f4a5882.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-v2/ca031f70-5785-46d1-8a58-b279d8340776.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V2/18457711-92b8-4c27-a89a-928fecdf5724.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.2/3398aeb8-08a8-4be9-a24c-efeabcaa2139.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.3/707bc006-4318-41bc-b91b-aa43ca7cba6f.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3b/7bfda919-13be-4b68-8655-99fe6a4605a2.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.15/f844e739-5f0d-4db4-ba66-bd33b1290571.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.18/0cde6639-6a89-4682-bb3e-a2a24a1bc8ab.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.19/87652005-4404-4c45-bd4f-5f63c44adf63.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.20/a7e0bc2d-784d-4719-ac08-d8fa0c29d178.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.21/e8ba93e6-6f90-4169-8403-381b7f9e26ab.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.22/ea86b542-3d06-4e71-b49d-17cdd362b465.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.23/15615d2c-46a1-47c7-a273-697e97bdf9f2.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv0.24/a2b8da3f-c99e-4dba-b4a2-23739281eaf2.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.1/76f3fa3a-1629-4cdd-b457-3a108784b427.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.2/c9e979e1-4433-4a38-8fd4-c14895e74f44.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.3/3f2effba-1ab8-476d-b228-ed9491e83adf.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json create mode 100644 data/hfopenllm_v2/netcat420/MFANN3bv1.4/a5f0fb1b-27a7-495f-a010-3307afdb8949.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.19/22f2aa1d-fff1-430a-9c20-3b32859d9665.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.20/daff0e6f-d29f-4861-855f-902a0cd9a469.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.21/0f5cb926-b691-4d57-87f5-290235fd250a.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.22.1/d9e813da-2966-4901-99f9-c7627c64fc52.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.23/4cb98a5b-3eb7-4fa8-adfd-17add38d3332.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.24/f7494fd4-d248-46a6-a46d-f9d8db560aae.json create mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.25/4b8533d1-7770-435f-ba76-a5c658aabd8f.json delete mode 100644 data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/309c7906-0010-4f17-848f-185062d96a26.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-7b-MFANN-slerp/f18ab2ab-098b-4e46-8f8d-433b52cdb81b.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/b4a70c71-dfac-4888-937e-d5220b491b0e.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/b879a534-6b24-4873-a0e4-e18453540121.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/c67ae8f2-596b-4dab-8c4f-768b2f0608b4.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/7766c638-b4dc-4b2d-8c14-becdb1b709ef.json create mode 100644 data/hfopenllm_v2/netcat420/Qwen2.5-MFANN-7b/dd211bef-3940-4d78-8f7b-a67da81d605b.json create mode 100644 data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/87e20b7a-85c8-4845-94b0-ace1e18814cb.json create mode 100644 data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/9ab01db6-3154-4c5b-b6a2-35479538d332.json create mode 100644 data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-v1.1/9d35316a-011d-4e45-ae57-317b53de621f.json create mode 100644 data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/c9e7fec0-b244-4ca1-a117-a52fdd4671a5.json delete mode 100644 data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-7B-v0.1/0659cb01-0d52-42cb-9e3a-2d8cac01692e.json delete mode 100644 data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-7B-v0.2/98490bb1-70f0-4e7a-8fd6-698ec9fcbd5a.json delete mode 100644 data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-v0.3-Qwen2.5-7B/6e0f7e7e-8927-436e-95a7-5a7c626ca241.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-v0.4-Qwen2.5-7B/9c5b3f4d-6e0b-482b-b142-dd7b387cae22.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-v0.5-Qwen2.5-7B/04840708-a4cc-407c-8b2a-876b382920a1.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-72B/83b0844c-70fe-4b63-8ed2-4147390518ee.json create mode 100644 data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-7B/9cf10c60-bee1-4f4f-9e03-c3c10287bded.json create mode 100644 data/hfopenllm_v2/nguyentd/FinancialAdvice-Qwen2.5-7B/8e92dd9e-a68c-46ef-9b03-955c06a21437.json create mode 100644 data/hfopenllm_v2/ngxson/MiniThinky-1B-Llama-3.2/dd1139d8-2b44-4516-b24a-1219826f5482.json create mode 100644 data/hfopenllm_v2/ngxson/MiniThinky-v2-1B-Llama-3.2/e37e86f7-b67b-4f0a-b1bd-92f30842b303.json delete mode 100644 data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json create mode 100644 data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/bc3b55d5-35ca-48b5-832e-8544e145b1b1.json create mode 100644 data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/5757cd3d-c64e-4743-8200-5e610e24bf95.json delete mode 100644 data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json create mode 100644 data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241029_1532/ae8cd3ad-ce7b-41f4-8e4a-f11002af2e58.json create mode 100644 data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241110_2026/bee54048-ebb2-4051-a18f-aa85b0f2ce27.json create mode 100644 data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/2f98c85b-5a2e-467e-9626-b1bdefe7bdd7.json delete mode 100644 data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json create mode 100644 data/hfopenllm_v2/nidum/Nidum-Limitless-Gemma-2B/2c530a3b-888e-4a61-b97b-ea875b30ec9c.json create mode 100644 data/hfopenllm_v2/nisten/franqwenstein-35b/4c9fb322-735e-4644-8121-088d00f78c5f.json create mode 100644 data/hfopenllm_v2/nisten/franqwenstein-35b/e7e7733f-682b-4e68-8f07-85f3ba7a7ae1.json create mode 100644 data/hfopenllm_v2/nisten/tqwendo-36b/e9a4e1e2-bd55-4c3d-99eb-8fafd8f6ec44.json create mode 100644 data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/42ed92b3-63bc-4fa1-bc16-c19bfb73368f.json delete mode 100644 data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json create mode 100644 data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/915ae579-786a-4eb2-a1bb-107a12c9c40d.json delete mode 100644 data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json delete mode 100644 data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json create mode 100644 data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/3489ffea-a607-4f3d-a0c2-bd17147f244f.json create mode 100644 data/hfopenllm_v2/nlpguy/Miisce-one/7b5ba8a8-16c3-4169-b97d-13dd5d4f8395.json delete mode 100644 data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json create mode 100644 data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/6411c44a-b2b3-4fe3-8ba4-9422a0a0b31e.json create mode 100644 data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/fe344f84-7428-45af-940f-736275bc4d50.json create mode 100644 data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/60956ea2-8b0b-4e4b-801a-d0689f9d46f4.json create mode 100644 data/hfopenllm_v2/nlpguy/StableProse/1ad54bdc-419a-4dd9-9fbb-d7b7ee7038d1.json delete mode 100644 data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json delete mode 100644 data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json create mode 100644 data/hfopenllm_v2/nlpguy/StarFusion-alpha1/2ab375f0-2477-48a5-a5d9-0b5d0d7d0a84.json create mode 100644 data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e0525a52-d38c-4b2f-b59b-048b4bf71cb2.json delete mode 100644 data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json create mode 100644 data/hfopenllm_v2/noname0202/gemma-2-2b-it-ties/01bc964f-552b-4cda-9ed0-cf720f0c8de4.json delete mode 100644 data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json create mode 100644 data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/c9e95c55-978e-485b-8a77-ab2e668e3254.json create mode 100644 data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/c71c606b-ccb7-48e9-a6c8-b72205ec6c06.json delete mode 100644 data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json create mode 100644 data/hfopenllm_v2/noname0202/llama-math-1b-r16-0to512tokens-test/ae1801cb-d112-4d1a-895d-c6743779846a.json create mode 100644 data/hfopenllm_v2/noname0202/llama-math-1b-r32-0to512tokens-test/008e3601-dfc4-4bc1-bf8b-f5cef43ae098.json create mode 100644 data/hfopenllm_v2/noname0202/llama-math-1b-r32-test/379b315d-96fb-4edb-b2d6-3dc113a10c17.json create mode 100644 data/hfopenllm_v2/noname0202/llama-math-1b-r8-512tokens-test/8cd36aa1-6f87-4d4d-a1bf-adc87e0a26c6.json delete mode 100644 data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json create mode 100644 data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/f76ce244-29f7-44f0-9850-7291f8e4cbf1.json create mode 100644 data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/506871f1-0c87-4e8c-a270-eed7b5da2599.json delete mode 100644 data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json delete mode 100644 data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json create mode 100644 data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/c20264fd-b1f9-4e0f-9f6e-1d58f1c18cda.json create mode 100644 data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/59f14dca-923a-41f1-b443-cc3551063f45.json delete mode 100644 data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json create mode 100644 data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a1ba054f-b0a1-4827-b7ea-3988aa4cf1f1.json delete mode 100644 data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json delete mode 100644 data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json create mode 100644 data/hfopenllm_v2/nvidia/AceInstruct-72B/51d8f53f-ad7e-4dae-9e2a-0895729ff790.json create mode 100644 data/hfopenllm_v2/nvidia/AceInstruct-7B/421119ea-0da8-4b26-a335-f2e720618c44.json delete mode 100644 data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json delete mode 100644 data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json create mode 100644 data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/b0e6bfb2-a8d4-4b1d-859a-aa821f646e57.json delete mode 100644 data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json create mode 100644 data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/7c4c2ccf-7d7b-4d24-802e-20c182290d07.json delete mode 100644 data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json create mode 100644 data/hfopenllm_v2/nvidia/AceMath-72B-RM/95212a55-f382-4869-9e11-cfa201ba865b.json create mode 100644 data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/a7da2118-063c-489f-bb31-40f1b7beeefe.json delete mode 100644 data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json create mode 100644 data/hfopenllm_v2/nvidia/AceMath-7B-RM/9a75ae18-8f9a-40a5-8a7b-0c38df34e9dd.json delete mode 100644 data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json delete mode 100644 data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json create mode 100644 data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/a85d4a1f-fbd9-4d21-9700-9e55e30c1391.json create mode 100644 data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/2fd1c45e-209c-43da-ae85-d60887513a96.json delete mode 100644 data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json create mode 100644 data/hfopenllm_v2/nvidia/Llama-3.1-Minitron-4B-Depth-Base/91e0e6aa-b933-4a02-a28d-8d69e698c60a.json delete mode 100644 data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json create mode 100644 data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/6f3f3d06-2937-4c55-9b95-a62ae5253571.json create mode 100644 data/hfopenllm_v2/nvidia/Minitron-4B-Base/9b3ffdd3-ac18-4084-9e83-1bfc61db0ec2.json delete mode 100644 data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json delete mode 100644 data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json create mode 100644 data/hfopenllm_v2/nvidia/Minitron-8B-Base/60077cbd-87af-4a00-a359-9235acb011ed.json create mode 100644 data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Base/577936a8-b450-4233-b633-064565b3d1a4.json create mode 100644 data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/470b9413-2cc8-4bf4-9e7c-0b8e99929568.json delete mode 100644 data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json create mode 100644 data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/3cbf9c73-0dc8-402e-bc94-c6d52b9f1af7.json delete mode 100644 data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json create mode 100644 data/hfopenllm_v2/nvidia/OpenMath2-Llama3.1-8B/3fccb1d0-5ae1-427a-adae-37004ecbacaa.json delete mode 100644 data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json create mode 100644 data/hfopenllm_v2/nxmwxm/Beast-Soul-new/6463183f-4043-4b96-b4d1-0bd41b4d6876.json create mode 100644 data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/0b102423-1a06-4e5b-a287-710695658b63.json delete mode 100644 data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json delete mode 100644 data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json create mode 100644 data/hfopenllm_v2/odyssey-labs/Astral-1-10B/b7e4ffd8-2a5a-4364-844a-a308dd7c899c.json create mode 100644 data/hfopenllm_v2/olabs-ai/reflection_model/3fa2e3ef-a375-4ca5-9f85-7cb986313d53.json delete mode 100644 data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json create mode 100644 data/hfopenllm_v2/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/abd48d9d-0443-40be-a23a-68922771e14f.json create mode 100644 data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/436ff0a4-9907-4e56-a5f2-c97f1b13f81a.json delete mode 100644 data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json create mode 100644 data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/7a654100-b206-4011-828e-fb386df27d0c.json delete mode 100644 data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json create mode 100644 data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/2f0e262c-a099-41f4-89f1-8b251708a960.json delete mode 100644 data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json create mode 100644 data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/7bf3e9ca-7d6f-4d43-b8fe-aceb8d60c7c6.json delete mode 100644 data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json create mode 100644 data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/8703dbdd-12ef-457b-8cda-f570c8f5c890.json delete mode 100644 data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json delete mode 100644 data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json create mode 100644 data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d77f3e8f-1eea-478e-babd-ba873d2d427c.json create mode 100644 data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/783a4385-c802-4bb3-9a21-90629d16efc7.json delete mode 100644 data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json delete mode 100644 data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json create mode 100644 data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/bb4ff51e-ce3a-42f5-871e-3e5e8977bc42.json delete mode 100644 data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json create mode 100644 data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/e80d25b5-3f4b-45a7-9472-09f98db03bf0.json delete mode 100644 data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json create mode 100644 data/hfopenllm_v2/ontocord/starcoder2-29b-ls/7fed0b1d-0d79-4784-8fd6-42f8611b1751.json delete mode 100644 data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json create mode 100644 data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/be534cd3-8245-4370-ba6c-9687b431ee8d.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b-merge_test/e98967b7-3aff-4baa-92eb-eff86bf09797.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/8736a22a-f980-4a01-953d-217f27050129.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/75a2b5c9-7c73-4bb4-8e99-af4a3a27589d.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/0e0ebdc7-a5bd-4314-9bd7-fc8a11541a4e.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/f8579305-003b-4727-b904-bad4f363a616.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3103f36a-4a88-4a39-8261-0b597f8d6db4.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/eda9de3b-ae53-4102-b203-eddadbc50464.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7de4fa8-d97d-400f-bc3f-ecb1963a03ed.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/fa6ecaf9-457e-4135-ad25-4790ebc27737.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/ebaa99c4-ff66-421d-8ba7-dae2c5fa274c.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/e388c707-8b35-49a4-94eb-f32e983fe33e.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/f6273192-31cf-4ee1-af45-c2f62de05330.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/105650e6-d9cf-4106-9d55-6f3c08f2f1cf.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/a1d23749-40c0-4ccb-a104-bf0de63bc2bd.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/4e4b4cf9-48d5-4ff6-92c0-1e9d7b874b6b.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/3c4713a3-3973-4a04-9c4a-a6782251734e.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/de70c700-a007-4e87-a3db-941ee285eb1f.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/a1324a7f-1911-4fa9-8d83-be891f752a61.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/9c4af0df-f538-4755-8cd0-eec6b2b26524.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json create mode 100644 data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/fde650a6-a5d1-4edc-bd64-8be806663263.json create mode 100644 data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/96dd1a08-b166-4d8e-ac31-5e948adf931b.json delete mode 100644 data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json delete mode 100644 data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json create mode 100644 data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3b90b9db-a68e-4ee9-bd4d-a18cec357753.json create mode 100644 data/hfopenllm_v2/oopere/Llama-FinSent-S/444a6ace-77d4-4d93-b80b-ff5c7e2f6888.json create mode 100644 data/hfopenllm_v2/oopere/Llama-FinSent-S/7e11a778-fccf-4a91-81cf-c06f1a5c77c4.json create mode 100644 data/hfopenllm_v2/oopere/pruned10-llama-3.2-3B/e5d126d7-e0bf-43dc-95c0-184ea1d586ea.json create mode 100644 data/hfopenllm_v2/oopere/pruned20-llama-1b/d05b129c-6b9e-4e6b-80fc-af65db620c5d.json create mode 100644 data/hfopenllm_v2/oopere/pruned20-llama-3.2-3b/d9792fac-29c1-45b2-b649-cdebb6830e2f.json create mode 100644 data/hfopenllm_v2/oopere/pruned40-llama-1b/fcc2f06a-e6c8-4c28-bf22-4ee582392912.json create mode 100644 data/hfopenllm_v2/oopere/pruned40-llama-3.2-1B/c6e13327-90b3-440d-9367-dbcec54dd6cc.json create mode 100644 data/hfopenllm_v2/oopere/pruned40-llama-3.2-3b/30b02429-350c-4d86-aded-ba8597bec4d5.json create mode 100644 data/hfopenllm_v2/oopere/pruned60-llama-1b/7d1ee802-106e-4313-ba1d-72d5a0676c88.json create mode 100644 data/hfopenllm_v2/oopere/pruned60-llama-3.2-3b/1b3af020-f65e-44b8-a9a2-ad60fa686427.json create mode 100644 data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/6e40871d-bc23-4f1c-a005-f5b8eb096f84.json delete mode 100644 data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json create mode 100644 data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/1ab33ed2-ea3b-4c6f-a2ac-2465ddd844f4.json delete mode 100644 data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json delete mode 100644 data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json create mode 100644 data/hfopenllm_v2/open-neo/Kyro-n1-3B/ec601f5d-bf19-4407-ac41-6b9272d94735.json create mode 100644 data/hfopenllm_v2/open-neo/Kyro-n1-7B/87e53761-e8b7-4032-ae7a-c3a91704d115.json delete mode 100644 data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json create mode 100644 data/hfopenllm_v2/open-thoughts/OpenThinker-7B/59492d86-4b85-4865-84e9-84ab4ace630c.json delete mode 100644 data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json create mode 100644 data/hfopenllm_v2/openai-community/gpt2-large/cc082df2-259c-44c1-abe4-ef349056a2a9.json create mode 100644 data/hfopenllm_v2/openai-community/gpt2-medium/3f069053-b24e-4242-9302-d46b82e511aa.json create mode 100644 data/hfopenllm_v2/openai-community/gpt2-xl/62cd9bcb-a74c-40b9-be84-a0077235ae3c.json create mode 100644 data/hfopenllm_v2/openai-community/gpt2/b4cd25f1-87d5-4173-a4d3-928444f6cb37.json create mode 100644 data/hfopenllm_v2/openai-community/gpt2/ddd4716e-d8ae-46a1-8fb4-c27e2da40e6e.json delete mode 100644 data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json delete mode 100644 data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json delete mode 100644 data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json delete mode 100644 data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json delete mode 100644 data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json delete mode 100644 data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json delete mode 100644 data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json delete mode 100644 data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json delete mode 100644 data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json delete mode 100644 data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json delete mode 100644 data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json delete mode 100644 data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json delete mode 100644 data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json delete mode 100644 data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json delete mode 100644 data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json delete mode 100644 data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json delete mode 100644 data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json delete mode 100644 data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json delete mode 100644 data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json delete mode 100644 data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json delete mode 100644 data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json delete mode 100644 data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json delete mode 100644 data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json delete mode 100644 data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json delete mode 100644 data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json delete mode 100644 data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json delete mode 100644 data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json delete mode 100644 data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json delete mode 100644 data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json create mode 100644 data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/1e5b62a3-018b-429a-b2b4-325545ee99dc.json delete mode 100644 data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json delete mode 100644 data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json create mode 100644 data/hfopenllm_v2/openchat/openchat-3.5-0106/958d410e-ce43-44c0-8a56-685c0a618408.json create mode 100644 data/hfopenllm_v2/openchat/openchat-3.5-1210/57c53f20-aa32-49fd-926a-f26c9d0759d4.json delete mode 100644 data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json delete mode 100644 data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json create mode 100644 data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/76def522-6fe1-458f-bfbf-99b50ece3367.json delete mode 100644 data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json create mode 100644 data/hfopenllm_v2/openchat/openchat_3.5/c467bc88-6769-48ac-abd4-867ee38bbe57.json delete mode 100644 data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json create mode 100644 data/hfopenllm_v2/openchat/openchat_v3.2/801681eb-66f4-46e0-bb2b-7ba4b46679af.json delete mode 100644 data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json create mode 100644 data/hfopenllm_v2/openchat/openchat_v3.2_super/cdd0ea1c-b17a-4816-953c-1d7164c64114.json create mode 100644 data/hfopenllm_v2/orai-nlp/Llama-eus-8B/b2060893-1f7d-4e7a-a458-3623147ac118.json delete mode 100644 data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json create mode 100644 data/hfopenllm_v2/oxyapi/oxy-1-small/cf8aac35-679a-4ebb-bca8-6e0f2d42e71b.json create mode 100644 data/hfopenllm_v2/ozone-ai/0x-lite/34bfe887-5a3a-4626-997e-c35d3a0ec341.json delete mode 100644 data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json delete mode 100644 data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json create mode 100644 data/hfopenllm_v2/ozone-research/Chirp-01/b81acc47-6fd5-4f89-8c70-f8f14b677e04.json create mode 100644 data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/30b977a8-7882-49be-8621-9ee3fce270ec.json delete mode 100644 data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json create mode 100644 data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/3367fd79-713c-4691-80cd-4abb6b2818ef.json delete mode 100644 data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json delete mode 100644 data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json create mode 100644 data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/add899b8-f3e6-4d87-8846-8254f4dfbd5f.json delete mode 100644 data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json create mode 100644 data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/53829ec0-f233-4b61-a672-6a467823caaa.json delete mode 100644 data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json create mode 100644 data/hfopenllm_v2/paloalma/TW3-JRGL-v2/e2b41200-bff2-4835-a0ea-27ff56937570.json create mode 100644 data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/3d33f26d-72be-451e-bcf0-501e0bc2f1db.json delete mode 100644 data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json create mode 100644 data/hfopenllm_v2/pankajmathur/model_007_13b_v2/3b4c05fc-2ccf-46db-8d64-045508f6614b.json delete mode 100644 data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_3b/af83a91c-3b07-48c6-9726-5bd77347f810.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_7b/48759b07-9aea-42bd-8d73-9c4208d2789f.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_phi-4/68820679-55f4-494d-91a0-0db1bccb8983.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/029774ac-a63d-4acc-a37c-4194e4afdecc.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/146df856-e2c8-41eb-b860-ceb78c126e55.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/74c6bea7-ad16-4f08-a2b7-9c894b9ce207.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/b5e97b2d-d8a2-485a-8b0a-71590e4a376e.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/e79d0a8c-caec-4dec-b119-3229ffa69a73.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/2c760893-b52a-40a9-9420-fb193a62a5c3.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/ef9b84e0-68b0-4caa-9980-96ea5e7f440b.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/fb48aff8-3f6b-4934-9fb8-d72bf8614d6f.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/9450acd9-16b6-49a2-9b73-cf1161b96df3.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/0d50ec2d-5dd4-487e-80cb-9533246a9876.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f6e6827d-fbf8-49cd-bdad-e8c7ea87550a.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/c5e48fd8-0eea-46a9-8790-1745923561d3.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/870c7739-8886-47df-8e20-09bfae03b9c5.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/d8eb5fd1-f1d4-481d-85af-88a11d7b6f6f.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/6625b2e0-1f65-4dc5-9913-ceb0e82e6439.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/24e7df20-e046-48f7-909e-502d0c70216a.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/7920f562-9e7f-4a64-85f4-584b13af44de.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/c6620817-69fe-40e2-bb0a-1e9c739ab65d.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/520e2d66-4143-493b-8533-64f86c6d676e.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/993bdfd2-3a88-4de3-9ed9-9b7b63c0f4f5.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/4e1be694-cc4d-4943-a8e4-74913cfb2ebe.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/42c174d1-6211-4438-bb9a-24f3cf386a6d.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/625bf39b-a118-4ec6-82d0-5405cf70ba53.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json delete mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json create mode 100644 data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/e09cb198-d259-42ea-a356-6efe61b1e12b.json create mode 100644 data/hfopenllm_v2/paulml/ECE-ILAB-Q1/5838b130-c2e6-400c-80b7-6822efb5db2c.json delete mode 100644 data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json create mode 100644 data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/52b51638-64cd-4b19-8fc7-c223d50bc549.json delete mode 100644 data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json create mode 100644 data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/28b3178b-c963-4267-9649-3f7fc10fba3c.json delete mode 100644 data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json create mode 100644 data/hfopenllm_v2/piotr25691/thea-3b-25r/748298a2-5042-4636-ac7e-051c28916f3a.json delete mode 100644 data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json create mode 100644 data/hfopenllm_v2/piotr25691/thea-c-3b-25r/03bcd4e6-1620-424a-9200-c0cf4b73bbd2.json delete mode 100644 data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json create mode 100644 data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/c7fba530-63cc-4ece-a171-4a2919aa8057.json delete mode 100644 data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json create mode 100644 data/hfopenllm_v2/postbot/gpt2-medium-emailgen/c25c1046-a8d5-4f4b-9a72-c4591cfb4023.json create mode 100644 data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/c3800a5c-310b-41cb-9b07-cfc1f1b13256.json delete mode 100644 data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Base/e8e2b99f-cf83-4776-9117-aa2b5d9c8068.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/2da19e45-117f-446b-b956-b35a20bb7411.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/9e982a33-19cb-4381-8560-884bc8946a2b.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Base/9130a862-cfd7-47ce-a92a-f60438739491.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/858d3717-fcb2-45d9-8eaa-1b00ae0ca918.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/5f1f137b-cb2f-4ee6-8bc9-5e0b94939f35.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/6feca911-7a6e-43a2-b59d-7cb48070fe8e.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/d3ad9813-273e-47de-be16-312cc67ac64f.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/317205ee-2cc6-4523-9662-be6508314b08.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/3b5fe65a-50a1-4036-b81a-86117356cab9.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/812ac262-97f4-485e-93de-f8d420b8658e.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/39cd7eb0-781e-47b6-8eaa-c72e702f778f.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/9411a8a4-306e-43da-96d7-c93eb3aac398.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/c93feb32-0526-44ac-b3ed-95f08c37cc9f.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/1a3b0f7a-afb6-4002-9321-23a86f000c5c.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/8d29363d-3096-4c54-a40e-acf4a7318a04.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/8cea452d-63b8-4e82-9511-64c94f8e140d.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/5e5b5424-1d48-4a5e-8775-52c75609c338.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/73787033-ed1d-4d2e-b7b2-e886ef6f1036.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/54c9403f-2525-45c0-a585-9ff598f95f6b.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/77d0d88d-7ca8-4f3e-8b79-295f53140635.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/727f27e3-2a3f-4572-8db5-87e498c4b6ca.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/b6e0cc97-27cf-4082-a908-95d5c39014b8.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/3b77ec51-fd47-4bc7-9e96-ed46202fef7c.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/b24cdd3f-3e44-4ebe-b2b4-209ee0bbfbd3.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/e47a3cab-dfef-47f6-9377-9ee32489bab6.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/1e4481fe-458b-4c23-8a6c-55439fb8b4fd.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/6421e9dc-e7ca-4e1c-9f4f-1d1ac409c4d1.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/55f43b53-6ed9-4c16-bf75-c968999a6f36.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/6ce93e70-04b1-46b8-b3e3-7eb0df35e1c1.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/95096a89-2baf-4b14-bc6e-1f30e920c086.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/f1651632-2787-47cf-b471-89d1b89a6b01.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/e1fb2ac9-8f60-4dc1-9e0d-99fcb91a53a9.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/d3accbc1-d698-4357-ab08-0b98fb49b4ed.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/5388a25a-5780-4ae1-999f-172b558a7b52.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/9e4143ff-d461-4fdb-8bc7-86f959f69e68.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/5d843bd7-b34b-41d4-92ff-c25a709b4930.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/87975b2f-298b-4297-8f4d-e5bb1bf5d113.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/41bb8174-f3d6-4862-b892-dbc9f6e2e696.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/683ad2cd-5e39-4088-b98b-94d89dda7b88.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/08ffd7ab-ccca-4258-be6d-cbc151cc43aa.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/4b6efad4-c697-4f0a-8d24-75dc49d8ec06.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/4986c30a-85b0-4263-9be4-d69c9b067e0c.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/47b5a878-1a4a-425f-ae6f-ac286f681cca.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/992a6862-46b9-415e-858f-2eff8709ca81.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/c6391381-c973-4068-b72c-af08762d9e5c.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/0f6e18e6-1b0f-43f4-a9af-6632f6ce63cc.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/56d9ee92-6774-4c9b-9861-c5f0a9945e7c.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-1.3B/d3e753cc-37fc-4d77-8b2d-da90a7843d60.json create mode 100644 data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-2.7B/eb08ef6f-6631-47c4-8f52-bf9454ad34b6.json create mode 100644 data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/2207b154-c5d4-4e5a-ade0-271e62d6345f.json delete mode 100644 data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json create mode 100644 data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-SimPO/f4161154-7777-4261-9275-a3002a1305d8.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/8523812d-1db6-4a9d-b06b-ac904191789d.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/6cd9ea81-618d-444e-a892-d4f9819daa67.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/2217326d-377a-4503-8180-206c12c87436.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/3bbb10fc-e3b9-4c6a-ac35-ee5de9ecd330.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json create mode 100644 data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/01124f11-b739-422b-97f7-062074b8d0fb.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/7cc4c93b-7c43-4bed-84a3-fa1cd9130abb.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/bf3aa551-f9c6-4203-b2d4-55cf9e6e2872.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/2eae8905-5338-4a78-86e7-d354d06efa23.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/9dcc4121-e046-49c7-969e-7255b0c32d3d.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/dd7d4acd-549a-467b-b461-0eba5b019122.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/159969cc-32c5-4f6f-b586-8e6d44180b44.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/b80e559d-e519-4678-8abc-ee5591b81fac.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/90c137c9-939d-4e77-9fcc-9e33551a6121.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/f25d6fef-d337-4cf7-ba05-ca6ff5eccd52.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/c6f92306-dcdc-4549-bfc2-feb62a3a6ef6.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/96c64d23-d23d-486c-83a4-4c0ab4f09d60.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Coma-II-14B/243abf0b-0f88-4b4f-ab51-6c8aebaf19be.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/438fb728-d6ad-4c28-a43c-ff82d522cd50.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/94b45b8d-b754-4fb4-843d-b7ffeafc4f1b.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Deepthink-Llama-3-8B-Preview/5618fc82-d455-4261-8e34-1190d70fd3f3.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/395f6339-3fca-4f4d-befc-2d231008efdd.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/b22696ac-7074-44f2-b72f-c59ca0a41ce6.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6856f8b6-a719-4f69-be71-4df582015f28.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/f2c0ea2b-76ae-4469-832e-84c0b79fa283.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Elita-1/5619e3cb-eb3e-4420-a156-6f7b2a5d372d.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/9d5e329f-491a-4608-bcac-1ee63046b34a.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/80953f08-6530-4bab-a375-cc542081aabb.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/0b8691a8-f394-4da3-a67b-faa1af9b42c9.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/fb541a2b-d9bd-4aa2-8b83-da62a3b77731.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json create mode 100644 data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/c20d1c62-d3e0-4e30-b0d3-4c62a6585d23.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json create mode 100644 data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/8a10eeb6-7178-4c78-8940-68fad78e389b.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json create mode 100644 data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/f0bb774c-a842-4261-b817-b169ce65a493.json create mode 100644 data/hfopenllm_v2/prithivMLmods/GWQ2b/59afe234-3a7f-49bb-873c-df6cf793e5e5.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/4074081a-66a6-42e4-994f-72541f90888b.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp1/6a618ec8-c029-49ec-9ea5-da52b5231280.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp2/edc8f510-c961-4c1f-9757-e80c4247f275.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/aaa5d1e6-5aca-4471-87ea-7195610a6c1d.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/89b45e8b-9979-4c7f-8aa6-c6ab7009cab0.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/41000c74-8b29-4369-996f-cf3a2fd09f63.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/a1765846-74e1-440a-8851-12a571444059.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/9c6b594f-387a-42a3-9e40-3b26363e6071.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-3.2-3B-Math-Oct/2b910401-457a-45dd-920a-559f4595897b.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-3.2-6B-AlgoCode/90b7be49-53a0-4d7f-8995-cbc52fe3a70f.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-8B-Distill-CoT/5e8854ba-7147-4fdd-a568-1ea58e79e7d8.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-1B/df6e0cfb-d720-428a-a5ad-b1529faa07c0.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-3B/a88a6e6f-2253-4b67-9527-55ab6153e40f.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Llama-Express.1-Math/00c66a37-b46b-47e8-a098-ce12433c1135.json create mode 100644 data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/6ad5483c-13dc-4e79-a719-66af383d195a.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json create mode 100644 data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/9fa6813a-7acb-4c08-9912-6dc0d356a7e2.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/3880e3bf-6ff0-4eef-a519-2649014254e1.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Magellanic-Qwen-25B-R999/e77efb9d-b1fc-4833-8e7f-8da683019018.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/2bcc02df-8d27-412a-8b58-c331df98e4d4.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/622531d5-03f8-42cf-974e-94291aa1e515.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/b772f20f-afbd-496c-9f94-e5fd30d54466.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/169d5ad3-ae4a-42de-b951-f264d85bf623.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/e84c3b50-4ea9-4f41-be11-50c6aa3d4656.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/594780dc-d969-4a6b-b90b-1cc32f40c452.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/4ff7c238-d69c-4b92-83d0-69cacdfa0fe6.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/bb576dc9-eede-48d6-b438-732da91a4d29.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/0fb2fe17-b55d-4802-ad48-bd4d711e1e0f.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/03d59002-dc98-467f-b2a9-605ef8d9b763.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/8a7034fd-7027-4a87-9cac-c95b745935d0.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-Empathetic/717f745f-1eae-4277-8a31-dbed140ef3e8.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-Math-IO/2dc78735-c0c3-4dd7-8e97-52c92785e623.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-QwQ/e9ab98ff-5cf0-4437-9cf3-c77ecb546c84.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-Super-1/6303d73e-4129-472a-a6fd-c64cb3de7204.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-Super-o1/8a689e8f-19cc-45b7-80be-ce861a549af7.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-Super/84881315-55a4-4f05-a115-cf82f850090d.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi-4-o1/970dc71c-42be-4d50-86ac-f7301ec969ca.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Phi4-Super/c02e1fcf-a837-4b8a-a42d-63837c56128d.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/37280340-5b9a-47d9-aa37-9299d9025518.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/46e7ad9b-b774-46b9-933c-913d1b307f7a.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/c154d3f5-39dc-43c0-85ea-2e43b08494b4.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/abd830e4-2b7f-4895-8262-75926edbafd9.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/2c945021-72e3-4e7a-9c6f-81efb27b2206.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/5f0ea694-7f73-45fa-b54f-49fc06d1a6d9.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/6c73f6ae-8ffd-4948-8071-33eab07437a6.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/fbf71df3-b9c3-4f9c-b538-e4ccf097e81c.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e3dcfd94-ca04-4cd3-ada5-e701a8b776da.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/9278bcf2-bfab-437f-bd64-7496b24fb8cf.json create mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/633aa068-5613-41d8-a194-aebc9ce1586f.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Qwen-7B-Distill-Reasoner/d3c1a922-a453-4c7b-b33b-52934e7bf72b.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/3a27b2a6-5eea-450b-91c7-1dc006229985.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/395e37ae-005d-47c0-9cf5-919460e34350.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/b03b7c7a-f263-4712-bcf4-2e32ca4bd237.json create mode 100644 data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/452ab810-6921-4922-9446-f2a5c081dc61.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/1abba5a0-f1a3-4f39-a81c-f4cd641d33ac.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/b2eefd3a-795c-4dc0-a10e-924bece05ea5.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/008cc919-f156-4a2e-af4b-eed015ca91f6.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/9d56082f-5e46-4d7a-8f06-cb44fc983b3f.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/7ea26e73-a501-40bf-8f01-81ab8e850a91.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/e3343130-cf4f-4e5c-b2d3-5dda13d575b9.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/ba1965f8-b59f-4d71-920c-e3b401ca0534.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/6dc87410-a39e-41b1-8759-68c1556c8419.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/c4ebe788-fb60-453b-914b-56bf87dd6374.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Triangulum-10B/45a44cc8-a550-4d2f-b0f4-37b4aac6a2b5.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Triangulum-5B/10593c13-3b30-4605-8063-c6a6526fc9d9.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/12b8f4d7-2ae8-492c-8756-f7cb21a58c76.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/96d9b675-c299-4138-a381-fb4de36287e5.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/17fffa9b-8ed4-44c7-87ea-7ee2c1f28e6a.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/8999a5f3-f421-4663-835e-7626cebd2282.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/951e1a4f-ed6c-49ca-b648-6086989e333f.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/2acc0666-e0ff-4760-a74a-227a02775344.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/3196c71d-0e0a-4d29-8bca-c31ba3d99dfd.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/e858aa6c-c424-447e-b512-7dcf794f9f0f.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/8773eac5-205e-4264-981b-58f1a25f872a.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/c26ae286-a9b8-499f-b886-4b75be0cf2da.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/d3a61998-2d41-4349-bd15-ce29143cc910.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/56b66428-2751-4c62-b98c-6c60e58c45ca.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json create mode 100644 data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/9b2ec4af-4a7c-4cf7-8b7d-79b6cc219880.json delete mode 100644 data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json create mode 100644 data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/5855a920-428f-4699-becc-73d4422f706f.json create mode 100644 data/hfopenllm_v2/pszemraj/Llama-3-6.3b-v0.1/f1004f08-7f46-4eb1-8f60-66893fca7180.json create mode 100644 data/hfopenllm_v2/pszemraj/Mistral-v0.3-6B/97db158a-3035-45d3-8d92-a08c9e605493.json create mode 100644 data/hfopenllm_v2/qingy2019/LLaMa_3.2_3B_Catalysts/0d81b928-2a24-4eb4-93d5-224e3c505532.json create mode 100644 data/hfopenllm_v2/qingy2019/OpenMath2-Llama3.1-8B/bf4cc7ee-cad4-42af-8638-6b371577ec68.json create mode 100644 data/hfopenllm_v2/qingy2019/Oracle-14B/5b574dda-0d85-47aa-9ebc-7f8581d402ca.json create mode 100644 data/hfopenllm_v2/qingy2019/Oracle-14B/6043830f-8a9d-4a03-9de5-4805724a9ae8.json delete mode 100644 data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json delete mode 100644 data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json delete mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json create mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/9d5fdb25-0d6a-4d5c-bcfb-0903504e620a.json create mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/217819b0-2c4b-4c26-823b-1ea14f893e01.json delete mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json create mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/0f844855-fb46-4b53-82c2-f36e5721c385.json delete mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json create mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/59aaa7ed-27d4-4765-b115-90570ad86c77.json delete mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json create mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/4478c5ff-3b51-4be2-abce-3fb6a951b6e7.json delete mode 100644 data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json delete mode 100644 data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json create mode 100644 data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/9202146d-5889-49fd-9025-e03153ba9093.json create mode 100644 data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/94257d3e-2b1e-47a1-bbd1-7fc696a574b3.json delete mode 100644 data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json create mode 100644 data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/2245cf71-fb8d-44ca-b58d-06608312ee8c.json delete mode 100644 data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json delete mode 100644 data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json create mode 100644 data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/9a823fde-7802-4876-b72c-d8f73cd17236.json delete mode 100644 data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json create mode 100644 data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/ede99239-ef8f-49eb-a48b-0ec2553c99e5.json create mode 100644 data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/4a307570-994f-491c-87a7-ad90b7965b8b.json delete mode 100644 data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json create mode 100644 data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/eb448d78-6417-4533-8458-99c1869a74ae.json delete mode 100644 data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json delete mode 100644 data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json create mode 100644 data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/e1b8e4ad-4327-46b9-b957-fbd02e57c87e.json delete mode 100644 data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json create mode 100644 data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/aab6b224-b948-4fb1-84b7-0dbe5c46d527.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/2e5cd1de-6109-4f76-b722-abbd4b207f4d.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwarkstar-4B/767d1296-4971-478f-8d78-1d63d162ae5b.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-4B/eab74e3b-de61-4fa9-87c2-56e69b70349a.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/3219d563-3bfb-4618-8cb3-e9b198d5b11f.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/233fd27c-561e-4c9e-a917-cbc5b08c055a.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/a875e8f7-a4e6-4c17-abbc-b8d4b73b7501.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/4b68ba49-6681-4add-9197-2cd711701e15.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json delete mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json create mode 100644 data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/5679ca73-3d5f-4bc7-bea2-5e9e713db0cc.json create mode 100644 data/hfopenllm_v2/qq8933/OpenLongCoT-Base-Gemma2-2B/a6c631f6-890c-4199-abee-18b012bc48df.json delete mode 100644 data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json create mode 100644 data/hfopenllm_v2/raphgg/test-2.5-72B/1edc3610-40fc-467d-8410-26d4b6adebce.json create mode 100644 data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/42c773ba-8fb4-4b3c-8ac7-0688519bb55c.json delete mode 100644 data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json create mode 100644 data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/1a371df5-447f-4fd8-8fe8-dbf9a1dc079a.json delete mode 100644 data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json create mode 100644 data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/821a21a0-6fd7-438a-933d-5e31b2dd2adc.json delete mode 100644 data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json delete mode 100644 data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json create mode 100644 data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/781a4cc6-a69d-4106-81aa-06e114f7c897.json delete mode 100644 data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json create mode 100644 data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/e49c98b4-46f4-406e-9eeb-7072bf72b9a3.json create mode 100644 data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/3b7524a8-d17b-4788-93f2-11076df464a7.json create mode 100644 data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/6188a57f-4bc3-42a5-ad18-c59774e40407.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.1/28689805-7c4c-438e-8431-f4a6aceb5e94.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7c156689-9668-4ded-bacc-c88a03ad1526.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7e43f187-1959-4dfe-802f-094ba88f3b0d.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/a6170173-ef17-4cfa-a76e-8e51cb8cb970.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/e998d52b-dd94-4ef2-9cfc-5034ded0105a.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.4/a3ac60bd-8fb3-47d9-b378-1f0c4d74fed2.json create mode 100644 data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.5/0f69217c-74ed-4398-8d1b-53d1a43be890.json delete mode 100644 data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json create mode 100644 data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/b973adcc-769c-4009-87c5-5f5af02a5d3a.json delete mode 100644 data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json create mode 100644 data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/4b30f11e-a2b9-40e9-b080-9d7484a5d048.json create mode 100644 data/hfopenllm_v2/refuelai/Llama-3-Refueled/befdae09-4caa-4996-a3ac-fe36310aaf01.json delete mode 100644 data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json create mode 100644 data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/8cd7fc1b-2873-4154-9de7-c0b8e5f4f5e9.json delete mode 100644 data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json create mode 100644 data/hfopenllm_v2/rhymes-ai/Aria/7f6e5858-f5d4-41cf-9bb7-c3c82a55c392.json create mode 100644 data/hfopenllm_v2/rhysjones/phi-2-orange-v2/7b8bf84f-4101-41a1-b6ff-9cadbb5f84a3.json create mode 100644 data/hfopenllm_v2/riaz/FineLlama-3.1-8B/1f3a733d-a6d3-453b-9763-61992cd514b0.json create mode 100644 data/hfopenllm_v2/riaz/FineLlama-3.1-8B/d0eed3c1-2226-48c5-a314-e429f66c5053.json create mode 100644 data/hfopenllm_v2/rmdhirr/Gluon-8B/957f02f1-45c7-4cce-b5aa-86bb5e485ad3.json delete mode 100644 data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-14b/55a01e8e-318a-4609-a862-bab4d62b3e7a.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-7b/cbdcd76f-be8f-42fe-89ed-d1d09d9d785f.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/c7b6515e-6f96-468b-8bc0-15212c31e790.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/f27f3a1d-c19a-42b2-8b49-64ecfe5d3405.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-14b/994aa481-627a-4bed-8719-9e874373cbc6.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-32b/9f5cd849-20b1-4e8d-9deb-f286dcfd9d6e.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-3b/c4dd34f2-7acc-4a94-a9aa-3c6aeeae8a8c.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-72b/e908b473-a015-4156-8e88-d67153479cb9.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-7b/173af77d-7a51-4d5a-8fd3-366aaa5d78a0.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/0bb65f09-323d-485f-886e-5a35c8bcd342.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/86b4c877-ef2d-4563-93a2-92d7e77eab5c.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/be2ee3f6-37ee-4895-821a-3d3c7eb04eac.json delete mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json create mode 100644 data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Qwen-14b/e574af17-dd3b-4c09-8689-ea598d44e562.json create mode 100644 data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/83958185-047a-4356-918d-2f45f273c08a.json delete mode 100644 data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json create mode 100644 data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Llama3-8B/d04c6e84-0b63-4de1-9278-aa37c9d2c8e3.json delete mode 100644 data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json create mode 100644 data/hfopenllm_v2/rootxhacker/Apollo-70B/a218e260-7f56-4676-af58-254bd84d0327.json delete mode 100644 data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json create mode 100644 data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/f21fb2c8-4abe-40de-ab2c-9d23e95ee281.json delete mode 100644 data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json create mode 100644 data/hfopenllm_v2/rootxhacker/apollo-7B/da5774b2-8a6f-4f2d-8267-beb25490b06a.json delete mode 100644 data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json create mode 100644 data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/274705bd-8eb6-4863-8998-f5d67c4ac827.json create mode 100644 data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/5b95cc2f-3378-45e7-9f56-6bb7e1ce4826.json delete mode 100644 data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json create mode 100644 data/hfopenllm_v2/rubenroy/Gilgamesh-72B/6918d1a3-e547-46b7-9062-274057c1f513.json delete mode 100644 data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json create mode 100644 data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/599deb3c-49f9-4c0b-af8d-78f9e166820b.json delete mode 100644 data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json create mode 100644 data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/b4ea3f14-3787-434b-8f26-20ff640c0146.json delete mode 100644 data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json delete mode 100644 data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json create mode 100644 data/hfopenllm_v2/rwitz/go-bruins-v2/6952c527-ca23-494a-910c-1c027e4a5a29.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-CPO/3f12e79c-dd1b-428d-9094-10a047205e3e.json delete mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/d508da29-0288-4a0a-b727-fc5355515c5e.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-IPO/48cf5a8a-70c6-4c55-8959-32d773d6dbcf.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-KTO/4bb7d331-f305-4c08-a073-87ba7b2cbde2.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-SPO/94639454-c525-4e6f-af27-d92d45a9ac40.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama2-7B-SimPO/9fa81bb7-7abc-4764-9465-d61217590da5.json create mode 100644 data/hfopenllm_v2/sabersaleh/Llama3/9a683492-4057-4de4-a30a-aa66becffb13.json create mode 100644 data/hfopenllm_v2/sabersalehk/Llama3-001-300/b917df45-62f2-4c3b-943a-ad6c98ef8bc1.json create mode 100644 data/hfopenllm_v2/sabersalehk/Llama3-SimPO/ba658bc7-b89d-4fb7-a794-f48bd3715a49.json create mode 100644 data/hfopenllm_v2/sabersalehk/Llama3_001_200/93f79cdc-ffd7-4299-9876-c0c7bed55ae5.json create mode 100644 data/hfopenllm_v2/sabersalehk/Llama3_01_300/5a91b0bf-b043-41d2-960d-5f0e78abc400.json create mode 100644 data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/263f56e5-b578-475a-9bc4-b5ffc142f9e2.json delete mode 100644 data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json create mode 100644 data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/9219ff66-73ba-45d8-99a0-23d23b3555ba.json delete mode 100644 data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json delete mode 100644 data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json create mode 100644 data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/b2328396-e9b2-464d-94e4-f03db19144ea.json create mode 100644 data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/3f895edf-8f54-48ff-a731-666144af0fda.json delete mode 100644 data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json delete mode 100644 data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json create mode 100644 data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/b48b8e16-a555-466b-8b1c-246137223311.json delete mode 100644 data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json create mode 100644 data/hfopenllm_v2/sakaltcommunity/novablast-preview/5fdcb98f-4c50-4cdb-bd99-dd32efc6d6f3.json delete mode 100644 data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json create mode 100644 data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/d49c5e72-0dd0-4663-a310-9cd9bf1f5150.json create mode 100644 data/hfopenllm_v2/sakhan10/quantized_open_llama_3b_v2/0176903f-e6ca-4f21-b98a-00bc443bf244.json create mode 100644 data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/11f32afc-95c1-4531-ae45-5a0974d36b3a.json delete mode 100644 data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json create mode 100644 data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/70657dd7-63cf-40f4-92a0-1097fc1ce9ae.json delete mode 100644 data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json create mode 100644 data/hfopenllm_v2/sam-paech/Darkest-muse-v1/53cf325b-6f32-4791-8f95-8b982ea03b23.json delete mode 100644 data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json delete mode 100644 data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json create mode 100644 data/hfopenllm_v2/sam-paech/Delirium-v1/8c50491b-6ed4-4f38-9d3f-d5168600cf4f.json delete mode 100644 data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json create mode 100644 data/hfopenllm_v2/sam-paech/Quill-v1/7adf79de-a51d-4b87-989a-c218ec6d99e3.json create mode 100644 data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/92358e5a-5e73-4747-9e92-e5ac003b97f7.json delete mode 100644 data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json create mode 100644 data/hfopenllm_v2/schnapss/testmerge-7b/f1636512-b98f-4fe4-adf3-abd556dd0ab9.json delete mode 100644 data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json delete mode 100644 data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json create mode 100644 data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/9333afdd-4866-412b-b11b-dfb118a06db9.json delete mode 100644 data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json create mode 100644 data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/840c0e19-6d75-47a2-b64b-f9c51cb1dcff.json create mode 100644 data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/071b49f2-8e23-47b1-9858-78d676d9905e.json delete mode 100644 data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json create mode 100644 data/hfopenllm_v2/securin/Securin-LLM-V2.5-Qwen-1.5B/d3821f53-87aa-470a-a403-c8e3cd100ae1.json create mode 100644 data/hfopenllm_v2/senseable/WestLake-7B-v2/389dbaba-c9cd-4e6b-afb3-f2ee3951faa0.json delete mode 100644 data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json create mode 100644 data/hfopenllm_v2/sequelbox/Llama3.1-70B-PlumChat/5f78f39a-42cc-4cf6-bb27-e2160765bf24.json create mode 100644 data/hfopenllm_v2/sequelbox/Llama3.1-8B-MOTH/b6e3d811-bf9d-474e-b82d-358a44e0dfc9.json create mode 100644 data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumChat/bef1cbad-4f75-4dde-b467-6145f72a87f4.json create mode 100644 data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumCode/654bebe0-b461-427e-a4cf-06386e9272d8.json create mode 100644 data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumMath/37ef4e34-58f8-463a-950f-48b3a6833d54.json create mode 100644 data/hfopenllm_v2/sequelbox/gemma-2-9B-MOTH/20687086-8aab-40f1-aec6-03917f4f9bf5.json delete mode 100644 data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json create mode 100644 data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/53a0a998-a0a6-4800-80bf-bfd83123f2f6.json create mode 100644 data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/4ee8df1c-e8ff-4a56-816c-0c2258a226e7.json delete mode 100644 data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json create mode 100644 data/hfopenllm_v2/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/42c8d84d-c8b8-42c6-8f49-4e971df173d7.json create mode 100644 data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/77b57dea-22e1-48a6-b8ae-9e474f08ad5f.json delete mode 100644 data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json delete mode 100644 data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json create mode 100644 data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/a9ed5d04-57d2-4566-91df-b798be939fdb.json create mode 100644 data/hfopenllm_v2/sethuiyer/Qwen2.5-7B-Anvita/bad4ec47-fe84-4518-b072-6955938f0c86.json delete mode 100644 data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json create mode 100644 data/hfopenllm_v2/shadowml/BeagSake-7B/497e585c-059a-4e18-9a8f-bdaa066f59ea.json delete mode 100644 data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json create mode 100644 data/hfopenllm_v2/shadowml/Mixolar-4x7b/e24b2a4e-83e4-4a79-bc41-03a54af00595.json create mode 100644 data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/15e39361-585b-4870-b91a-64dce4fb37ec.json delete mode 100644 data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json delete mode 100644 data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json create mode 100644 data/hfopenllm_v2/shivam9980/NEPALI-LLM/96efd11b-e9f2-4bf1-90f9-561714137edf.json create mode 100644 data/hfopenllm_v2/shivam9980/mistral-7b-news-cnn-merged/98e9936d-d376-4c72-80a6-0a28cf722ac4.json create mode 100644 data/hfopenllm_v2/shivank21/mistral_dpo_self/7ada9c83-7851-4da2-b9d1-d744b174b777.json create mode 100644 data/hfopenllm_v2/shuttleai/shuttle-3/a6ed72b7-14f1-464c-a7f5-590791982696.json delete mode 100644 data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json create mode 100644 data/hfopenllm_v2/shyamieee/Padma-v7.0/79e3f38d-ae2b-44a7-be0d-024adad6bcd6.json delete mode 100644 data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json delete mode 100644 data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json create mode 100644 data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/ef13bdea-cf73-4ead-b6d7-73a155fa9a79.json create mode 100644 data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/2663884f-941c-4e16-8029-b38e3a543733.json delete mode 100644 data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json create mode 100644 data/hfopenllm_v2/siqi00/Mistral-7B-DFT/ca7af645-4796-4b31-ae7d-2cbebe5a369b.json create mode 100644 data/hfopenllm_v2/siqi00/Mistral-7B-DFT2/f95e098c-d320-4db1-887d-8c3252bbaf77.json create mode 100644 data/hfopenllm_v2/skumar9/Llama-medx_v2/2bbf6dc9-8dd5-4dee-908e-d4a8fc03bc84.json delete mode 100644 data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json create mode 100644 data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/5f4edfdb-a62c-4410-83a3-1ceb15d2e7b0.json create mode 100644 data/hfopenllm_v2/someon98/qwen-CoMa-0.5b/aadfae06-73b6-4306-b056-0a733b9bd8f4.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json create mode 100644 data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/cfecbfbc-46c3-4dd3-8bd9-afe4cd386973.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json create mode 100644 data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/97640dd1-d415-4b56-818c-cdcede3c52fd.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json create mode 100644 data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b750c460-ef70-4abf-b77d-118a82039598.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json create mode 100644 data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/f4c20519-9e33-4698-a17a-07e5fe7d2707.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/0f204733-55b4-4c06-bd12-dbc2e2593abd.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/0bb226ed-fe88-4678-9b50-f77883ceb708.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/fb297e45-9e14-4853-8384-75c187b28a9b.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/4f6eba27-2ab4-4b33-9568-814d15fbd6b9.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/c3bc3d69-a987-4dd0-b6a5-e0ecc50034fb.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/5d02ba78-cf8b-44ee-a1b3-e51ecf437d89.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/4a43fa67-2438-4c2a-b17b-9d2f221e5a86.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/2c044767-1169-48c6-9e37-e9d1e35f4cfe.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/bad67b35-d9ef-417a-955b-9c33e87cb927.json create mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/60eaa315-f489-405d-a67d-7f1312e90cab.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json create mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/50de312a-293d-41a4-8bee-4feb0c148b90.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json create mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/56f24cac-394c-4439-8f2e-8270e7519bda.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json create mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/8efa1423-0a39-4674-a94d-3d92448010d6.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json create mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/350b3491-cba8-46b4-a07f-3d1277270530.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen-14B-ProseStock-v4/0741ead7-24f3-49b0-9967-f726df84f78a.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/1ea4d10e-e099-4967-8c43-e84acaeb40be.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/6c78d9f7-a61e-4f65-ac57-61597f735541.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/e9bcfb1f-c688-4e7a-918a-e697adaf7aa5.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/153cfe7f-c27a-40b8-b8d2-54351f26f583.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/b58372cd-5d55-4f42-a5da-2970e55b44b0.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/34a028ac-2002-480c-a1af-5b945ffe872e.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso/065ffc51-154c-4a93-a342-0dd476fda473.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/ebc74f4f-157d-4ee4-8b99-9fb5b685afd5.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/91004d26-7b8b-4c0a-bd8c-8880654dc93a.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/5eb1aa92-a031-40d4-ad64-552075dae68a.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentessential-14B-v1/3ebc147d-58f2-4605-a011-a71c591fac0e.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v013/01795776-e909-46d3-8b6c-0989334e3d0e.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v1/00dffa94-31f9-4b5c-b032-03dd20fc2e8d.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v2/736249d0-cea9-46c6-9677-ecae4b410af4.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v3/ef602cfe-3453-4189-b583-292cf05421d1.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v5/559af2c1-deca-4c35-b83a-004c22ac958a.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6-Prose/8d66d895-626a-477f-91b6-2195f35aacb3.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6/004df803-70da-4e59-b3ad-f210c790f29e.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v7/bb2972ca-e673-4be5-bc7e-2689adeac3a9.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v8/eacf2411-a0ea-41fd-8363-e565fce0f26f.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v9/4eefe3cd-ff42-4d4c-89c6-c3e48d8c85e9.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-qv256/f19dab38-48ed-438e-8a62-86e4d111f6c8.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/ff4b6d28-62e2-4671-8df9-690ce7f13f0b.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v10/9c05a7e4-f495-41d0-a7f0-1959e7434ba2.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v11/404e3d61-26d3-4f95-9847-064f0c7c6970.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/0b4574f2-1b71-427f-9923-17db449be191.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose/775b88cd-98e8-4d93-acca-e294f68f2da2.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/89464568-47cb-4659-af37-8b061d3f0c8c.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/9fad9d73-acbf-4ffc-886c-551c1fe1ed45.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v2-Prose/c1882335-0df5-4df2-bfa1-c16126c328fb.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Prose/291471ed-3b7c-4bd4-91bb-c27cd74ec460.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/53565fe4-0368-477b-9916-ac9a4b8a9c7b.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/f6cb5e9d-c4c9-44a2-9adf-7fa5639d84d9.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3/e51fee25-7648-49d9-a8da-b8dbc68a722b.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/6acdc96b-cfde-439f-b6b3-a66257b3fcde.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose/850da8de-ca13-4f15-bb9f-68b910355cfd.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v8/542fbb7a-d4eb-4cbf-b63a-4305cb108361.json create mode 100644 data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v9/1dbb8206-6a86-4e2c-8ee0-d80fed014a69.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json create mode 100644 data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/6341de3c-8d4c-4af8-8f0d-c81e948bacd6.json create mode 100644 data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/e6cb6a87-6db8-4aee-bede-ce8a60dc8f4a.json delete mode 100644 data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json create mode 100644 data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/5113439d-1394-46f2-a38e-34b54e94a9e6.json delete mode 100644 data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json create mode 100644 data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/a03d88aa-7ccd-4f8a-9a1e-c9469d3ae559.json delete mode 100644 data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json create mode 100644 data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1cfb40a7-7373-417c-aa1c-f6ab63ecb3b8.json delete mode 100644 data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json create mode 100644 data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/446ac93f-d47c-4207-bf32-0cd94e88a931.json delete mode 100644 data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json create mode 100644 data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/7e4ba4f8-2768-4e7b-a11d-75ad22a47c45.json delete mode 100644 data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json delete mode 100644 data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json create mode 100644 data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/ca77f821-4722-45b1-b731-7d774232acb4.json delete mode 100644 data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json create mode 100644 data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/f32d2a11-edd3-4662-aed7-88c6820b2c2e.json delete mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json create mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/71c56883-dd14-4f16-b839-5ce607a4aadb.json create mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/639004c2-81a5-410d-bd61-e3e263f55335.json delete mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json create mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/5f232a99-07c9-4df7-9d3b-837966ea6de5.json delete mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json create mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/482e34ee-8974-46c6-b3f4-4cc9872ef562.json delete mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json create mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2/13743252-3ba3-406d-8e95-5a4cd3ac3772.json delete mode 100644 data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json create mode 100644 data/hfopenllm_v2/spmurrayzzz/Mistral-Syndicate-7B/ff25cb66-ed6f-421a-a038-1feb24666645.json create mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/843f0d9a-04e8-4cea-bb18-94651a814d1f.json delete mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json delete mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json create mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/fa3ccf4a-9b26-4a76-a974-3a776adec7c2.json delete mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json create mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ef4ac8ab-4ff5-4fce-94b6-443b1ef7964f.json create mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/468bbea7-6dee-4a1a-84b3-e44b0f3ab95a.json delete mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json create mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/bd8fdfa5-bda1-402b-9010-94bf78b0127b.json delete mode 100644 data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json delete mode 100644 data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json create mode 100644 data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/a0b34b40-3e68-463f-a7fa-3c58c15aa16d.json delete mode 100644 data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json create mode 100644 data/hfopenllm_v2/stabilityai/StableBeluga2/dbf4fbac-cd99-426d-b725-600e60af00d2.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/f793c471-1638-476a-a050-455a32368e29.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-12b/1d9c1beb-f84b-4eb7-9c1e-ce5a70afabfb.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/99396d97-d875-4cd9-a8a1-a9aec5c43bfc.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/82a44b46-156f-4232-92e4-6a08d7a4f197.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/3b40defd-5a2e-4d6e-838f-dbbbf12236fb.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/dde41cd5-e6d1-43a9-9593-1a5751bc5f44.json create mode 100644 data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/1cffcbeb-ef81-4efe-b883-0a8540a799e7.json delete mode 100644 data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json create mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-0130/033ef96e-3d2d-49a4-bbff-8bc815a1b40e.json delete mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json create mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-0218/bfe654b8-cb79-4845-bf14-85012207ce90.json delete mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json delete mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json create mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-1028/5c4efc23-9591-447b-aecc-4c82797d7d01.json create mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-1225/a5fe3fab-95d9-41ac-a95f-66205e489dae.json delete mode 100644 data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-0120/c0bf8ffb-444a-43a3-9514-76aa92c5f5b7.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/3d556d9f-036b-4368-bb4a-18ad6b444bdf.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/92905e27-1033-4423-b87d-23236f9be964.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/17326bb0-42c2-469a-ac19-6a4b75d9e6e2.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/11574f56-6c34-48e4-8fb5-c58d42f07330.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/8f728c51-15f9-422d-bbdb-4d976961ab9d.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/8d6e4b5e-ad17-4390-bc6b-ab6581a62442.json delete mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json create mode 100644 data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/5e33bf05-6c67-4ecc-982d-7590e9953145.json delete mode 100644 data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json create mode 100644 data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/f55ae879-bd95-409c-a8a3-9a57cd615a31.json delete mode 100644 data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json create mode 100644 data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/b8426ac9-14f1-4e07-9c7e-b50cb2c7a1e3.json delete mode 100644 data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json create mode 100644 data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/51fd90b0-0d5a-4199-ba5b-ff29eeeab06b.json delete mode 100644 data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json create mode 100644 data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/c46e4fa1-afae-4b68-a13e-034b5cd2b779.json create mode 100644 data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/42cc06ed-20fc-4e84-836f-3d7243ec336d.json delete mode 100644 data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json delete mode 100644 data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json create mode 100644 data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/aaa53387-af33-4454-95f0-3af85f4778c0.json create mode 100644 data/hfopenllm_v2/suayptalha/DeepSeek-R1-Distill-Llama-3B/465bca6d-b32a-4d34-9916-fc8b3166faa0.json delete mode 100644 data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json create mode 100644 data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/bf138f3d-09d9-4dea-aa43-5efc804bc775.json delete mode 100644 data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json create mode 100644 data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/cb4e944c-66f6-49f2-b1e0-d90454e34315.json create mode 100644 data/hfopenllm_v2/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/b2b6bc49-bda1-4a3e-a071-ec0a0bdc1313.json delete mode 100644 data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json create mode 100644 data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/933f3d40-8726-418f-be2f-1f9686e9ab02.json create mode 100644 data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/af1bf15c-7c5f-46fa-ba3a-821b521e86f4.json delete mode 100644 data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json create mode 100644 data/hfopenllm_v2/suayptalha/Luminis-phi-4/43df4336-1eb8-4df7-8309-1199aafc07b1.json create mode 100644 data/hfopenllm_v2/suayptalha/Maestro-10B/44ae222d-407c-4c8b-9b67-75440631f848.json delete mode 100644 data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json create mode 100644 data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/a87db0fe-3727-4ff1-875f-9edd3109f3a2.json delete mode 100644 data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json create mode 100644 data/hfopenllm_v2/sumink/Qmerft/0c73e33a-7f6f-4925-970b-db289069d5ca.json delete mode 100644 data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json create mode 100644 data/hfopenllm_v2/sumink/Qwenftmodel/02bc7f5c-dc2f-4d8c-adcb-a89a34ff5549.json create mode 100644 data/hfopenllm_v2/sumink/Qwenmplus/590c031c-2aa6-48e6-9b3f-68b1a585dd39.json create mode 100644 data/hfopenllm_v2/sumink/Qwensci/970c9fb8-c217-444b-a025-f4d9acdd679d.json create mode 100644 data/hfopenllm_v2/sumink/bbhqwen/07a08dd7-822b-49ac-859b-d2fc75b9c88d.json create mode 100644 data/hfopenllm_v2/sumink/bbhqwen2/0c0e9250-b75a-4549-9fb2-2b5c9ac2ef49.json create mode 100644 data/hfopenllm_v2/sumink/bbhqwen3/2ae306b1-5409-4418-b5e4-50feff9dafe7.json create mode 100644 data/hfopenllm_v2/sumink/bbhqwen4/44bf5d75-afb2-48fa-a0fa-96d283b0ae94.json create mode 100644 data/hfopenllm_v2/sumink/bbhqwen5/e3860bb2-b2e4-4fdf-91cb-3343ad6440d7.json create mode 100644 data/hfopenllm_v2/sumink/bbhqwen6/6369fceb-148f-4491-9488-420182a9838f.json create mode 100644 data/hfopenllm_v2/sumink/flflmillama/045c814e-a30f-4b6b-b4f4-382dee4063b7.json create mode 100644 data/hfopenllm_v2/sumink/ftgpt/59d2b375-5696-47d0-9c96-1a826c08bea0.json create mode 100644 data/hfopenllm_v2/sumink/llamaft/ff601b4f-24a1-4376-8c5e-5bda2ea88f65.json create mode 100644 data/hfopenllm_v2/sumink/llamamerge/8c043ba8-f7dd-4cc8-a3b1-7201042b8dc8.json create mode 100644 data/hfopenllm_v2/sumink/llftfl7/ce27dff4-9ca7-47cb-bc18-b5dd167c72a2.json delete mode 100644 data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json delete mode 100644 data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json create mode 100644 data/hfopenllm_v2/sumink/llmer/d69ecbfa-5036-48b8-8fed-f9162e2857f5.json delete mode 100644 data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json create mode 100644 data/hfopenllm_v2/sumink/qwft/b5924329-c182-482a-bee8-22fcb348281d.json delete mode 100644 data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json create mode 100644 data/hfopenllm_v2/sumink/qwmer/a6a6b6f2-ac28-4c4a-806e-8abe8c7f9190.json delete mode 100644 data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json create mode 100644 data/hfopenllm_v2/sumink/solarmer3/b904301c-d0c0-41a4-b92e-92b2d7c9c13a.json delete mode 100644 data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json create mode 100644 data/hfopenllm_v2/sumink/somer/b5de0218-91dc-487a-be90-70f8bcb64803.json create mode 100644 data/hfopenllm_v2/sumink/somer2/3870f65b-3429-45c2-846f-6af30155a78b.json delete mode 100644 data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json delete mode 100644 data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json create mode 100644 data/hfopenllm_v2/sumink/somerft/d6c33a51-be09-4cb5-9942-4348668d3e5e.json create mode 100644 data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/1ccd36ee-445a-4861-8835-d602973148fc.json delete mode 100644 data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json create mode 100644 data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/4c7ef4ee-3a7e-4f15-8a4a-c5853b1c6a47.json delete mode 100644 data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json create mode 100644 data/hfopenllm_v2/synergetic/FrankenQwen2.5-14B/6a69202c-1c68-43e4-bd45-bbc2ff2db743.json delete mode 100644 data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json create mode 100644 data/hfopenllm_v2/talha2001/Beast-Soul-new/a053d6a3-05d4-4d0b-a9b8-7865cf7ac612.json delete mode 100644 data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json create mode 100644 data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/f76d3d30-4fce-48a9-a26b-7d714fff1d29.json delete mode 100644 data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json create mode 100644 data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/eb38a092-1b56-4348-8188-baa2243f7046.json create mode 100644 data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/1c4cfb94-fc66-4fe2-9879-78683abe654f.json create mode 100644 data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/2deef730-c37b-46ca-82b7-de38ae724fd4.json delete mode 100644 data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json delete mode 100644 data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json delete mode 100644 data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json create mode 100644 data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/13a92beb-a8a4-4853-b2f5-1b09d3e2a64a.json create mode 100644 data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/36cf5b59-5369-4baf-80c1-3a47678eb5cb.json delete mode 100644 data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json delete mode 100644 data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json create mode 100644 data/hfopenllm_v2/tannedbum/Ellaria-9B/fced3ef1-fb69-47fe-bf68-3efe72db3142.json delete mode 100644 data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json create mode 100644 data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/7a83d75a-332e-476a-b0f7-986b2ec9cc5d.json delete mode 100644 data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json create mode 100644 data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/6f413d72-cd9f-435c-b13e-9cec14edeb5c.json create mode 100644 data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/a7822bbf-bc23-437d-8e5b-32fb06d3a9ec.json delete mode 100644 data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json create mode 100644 data/hfopenllm_v2/teknium/CollectiveCognition-v1.1-Mistral-7B/0b19508c-4996-4fb7-b0e0-9fa952854fa3.json create mode 100644 data/hfopenllm_v2/teknium/OpenHermes-13B/447c22c1-8929-420f-b59b-01ab32a22281.json delete mode 100644 data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json create mode 100644 data/hfopenllm_v2/teknium/OpenHermes-2-Mistral-7B/ab3dbe43-658e-4c8a-a399-b3d070d467ba.json create mode 100644 data/hfopenllm_v2/teknium/OpenHermes-2.5-Mistral-7B/ee5c87a4-aa06-4728-a9bf-2fc35284b987.json delete mode 100644 data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json create mode 100644 data/hfopenllm_v2/teknium/OpenHermes-7B/6a1a58f6-e399-4ac3-a516-f02a37b6ff68.json create mode 100644 data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/9e2bfd77-b73e-436f-ad50-ccfd379cd3f2.json delete mode 100644 data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json create mode 100644 data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/100cf60a-c43c-4b3a-a667-a45cffdd562a.json delete mode 100644 data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json create mode 100644 data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/2088fca7-11d7-47de-808d-d47da0caad0f.json delete mode 100644 data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json create mode 100644 data/hfopenllm_v2/tensopolis/mistral-small-2501-tensopolis-v1/bf0b3560-9d38-406a-ad30-5fd157f0fe43.json create mode 100644 data/hfopenllm_v2/tensopolis/mistral-small-r1-tensopolis/9ce12fbc-00f7-4cc8-bd9d-67ead83a0801.json create mode 100644 data/hfopenllm_v2/tensopolis/phi-4-tensopolis-v1/14501de3-dac0-44af-8c17-7abcd9bbba8b.json create mode 100644 data/hfopenllm_v2/tensopolis/qwen2.5-14b-tensopolis-v1/c9db8ce4-6f0d-4c13-8484-6fca9e9c3798.json create mode 100644 data/hfopenllm_v2/tensopolis/qwen2.5-3b-or1-tensopolis/8c6c06be-bbc6-4307-ba5b-336dc2bb466f.json create mode 100644 data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v1/1326ff61-d0b4-46eb-9bcf-f978166e622b.json create mode 100644 data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v2/4c9e829f-7a99-4d61-8730-7457215a4fd6.json delete mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json create mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/afc24d42-6d25-4036-8f22-fcf944b481b7.json create mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/6f6db681-991e-408b-8d4e-71fff9e1c974.json delete mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json delete mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json create mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/f3fa76bf-f11c-4dee-9b9f-00f1ec793dac.json create mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/77b457d9-4957-4f0d-a8d3-e005ae382239.json delete mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json create mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/11474a7a-73a6-4a3f-8bcb-bef783e12a2b.json delete mode 100644 data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json create mode 100644 data/hfopenllm_v2/tensoropera/Fox-1-1.6B/23cc1e7f-0994-43a5-8403-5361a2976285.json delete mode 100644 data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json create mode 100644 data/hfopenllm_v2/tenyx/Llama3-TenyxChat-70B/88c257d3-d5c1-4e1f-bbc8-9fc6bd65e15e.json delete mode 100644 data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json create mode 100644 data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/ec4c2032-8fc0-448a-a7c4-ee9b35b642db.json delete mode 100644 data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json create mode 100644 data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/3c7ac4de-1456-4afb-b7ac-07beb6cb4d39.json delete mode 100644 data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json create mode 100644 data/hfopenllm_v2/theprint/CleverBoi-7B-v2/a06ad94f-13ee-466c-b25f-87cd87012678.json delete mode 100644 data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json create mode 100644 data/hfopenllm_v2/theprint/CleverBoi-7B-v3/9e1ca6d0-d2b2-48c5-acc2-ad299ce02e1f.json create mode 100644 data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/7dcd6e37-3685-4b08-b983-b2a711aeaf73.json delete mode 100644 data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json create mode 100644 data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-v2/b1ae6801-0139-41d3-85dc-102ad5cc4c6a.json delete mode 100644 data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json create mode 100644 data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/4cc037a2-d952-4566-a575-015f8e3a5925.json create mode 100644 data/hfopenllm_v2/theprint/Code-Llama-Bagel-8B/a1eaadae-8601-4c18-ab0c-4f6d80d3307b.json create mode 100644 data/hfopenllm_v2/theprint/Conversely-Mistral-7B/40e452df-8f0a-4473-a3d1-41f9c288c12f.json create mode 100644 data/hfopenllm_v2/theprint/Llama-3.2-3B-VanRossum/216020ac-276b-436e-815b-d6968eb83770.json create mode 100644 data/hfopenllm_v2/theprint/ReWiz-7B/1bb4aeac-a5e1-4fd7-9e70-64fdcfc600cd.json delete mode 100644 data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json create mode 100644 data/hfopenllm_v2/theprint/ReWiz-Llama-3.1-8B-v2/25739611-f690-41b4-87de-9f4ea8b3d815.json create mode 100644 data/hfopenllm_v2/theprint/ReWiz-Llama-3.2-3B/b8c27fdd-5b35-41ab-8a35-b5a48f27cceb.json delete mode 100644 data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json create mode 100644 data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/fa237949-c3ac-482a-8a54-5a2019f24016.json create mode 100644 data/hfopenllm_v2/theprint/ReWiz-Qwen-2.5-14B/b60dd828-a3e7-46a8-b4c2-322aeca42faf.json create mode 100644 data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/5de9f914-333f-4181-a93f-79257a3daf54.json delete mode 100644 data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json delete mode 100644 data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json create mode 100644 data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/e2d23da4-226a-4a02-8390-e8edaea4b65b.json create mode 100644 data/hfopenllm_v2/theprint/WorldBuilder-12B/c64c7470-dcf9-46f8-b789-cab7e902739d.json delete mode 100644 data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json create mode 100644 data/hfopenllm_v2/theprint/phi-3-mini-4k-python/f6d727a3-19dc-4173-a88f-2c47449896aa.json create mode 100644 data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/490d14c8-2cb0-4328-9f41-6074b28d6fdc.json delete mode 100644 data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json delete mode 100644 data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json create mode 100644 data/hfopenllm_v2/thirdeyeai/elevate360m/9351b079-7ef5-42ec-bb83-f0d8ec7de479.json create mode 100644 data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-1_5B/852d5adb-f422-4102-8114-082ab0b3c07d.json create mode 100644 data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B-0917/c64e98cd-c022-4834-a3e0-3949416d1fb1.json create mode 100644 data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B/f101bd15-ac61-49d4-beac-c89bc889b34b.json create mode 100644 data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/11caf1c1-e2a0-4abb-bb0e-d06853a06e4d.json create mode 100644 data/hfopenllm_v2/tianyil1/MistralForCausalLM_Cal_DPO/f0b57a60-8402-4430-93f3-b846a94113f2.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/50aa8077-4493-47a9-9cec-014c56343ecf.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/5e70d00b-c822-4ad6-afe8-3756a7038c57.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/8162ba41-e630-470f-a297-72fb9f2110fd.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/60dd9d02-476f-459d-a41c-f89f82116dc3.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/73e89f21-5799-4835-a0e0-a6664c0483da.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7f355ad4-9156-486d-8cf4-723117da3bb8.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/4ccc6026-b639-488d-867f-d98ea49cf1b6.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/3cf2e68e-4de0-436e-935e-86935e11f72f.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/e9e4ae5d-0dd1-463c-9f15-47cb21efb409.json delete mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json create mode 100644 data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/c57eb23a-5998-4ab9-9a98-39b1338f5ba6.json delete mode 100644 data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json create mode 100644 data/hfopenllm_v2/tiiuae/falcon-11B/94fb625d-f58c-4f2e-8268-1dc4472c1cce.json delete mode 100644 data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json create mode 100644 data/hfopenllm_v2/tiiuae/falcon-40b-instruct/4481ddef-2bef-4284-b56d-21054f5a9a97.json create mode 100644 data/hfopenllm_v2/tiiuae/falcon-40b/80048c4b-e97b-45c7-aa04-70ce69481a97.json delete mode 100644 data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json delete mode 100644 data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json create mode 100644 data/hfopenllm_v2/tiiuae/falcon-7b-instruct/d21a2557-2348-4087-b2a6-6e1c0101bccc.json delete mode 100644 data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json create mode 100644 data/hfopenllm_v2/tiiuae/falcon-7b/76290d4b-5526-400b-8ca4-24d220f7c02d.json create mode 100644 data/hfopenllm_v2/tiiuae/falcon-mamba-7b/3a146535-09b3-4246-8bd8-0e984e0905b1.json delete mode 100644 data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json create mode 100644 data/hfopenllm_v2/tinycompany/BiBo-v0.3/6683f95c-f97f-4117-b3c5-c1ed9587289e.json delete mode 100644 data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json delete mode 100644 data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json create mode 100644 data/hfopenllm_v2/tinycompany/BiBo-v0.7/bbe74b2b-9e13-4c13-92c8-618078667248.json create mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/61876ce3-acc4-4619-b0c2-78ac4dff48ea.json delete mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json create mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/b304baee-c9de-4982-801d-2b9e7f1a7334.json delete mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json create mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/6f27e746-1bdd-4cec-a955-c27f2f9900ef.json delete mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json create mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/30637c5d-1bc0-49dc-8afd-335a9a66f196.json delete mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json create mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/169e29b6-50d8-456d-aa20-3fe2f3b19a1e.json delete mode 100644 data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-base/427d32f7-190b-4005-b02c-6a8ce089dbbf.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/de7551a8-63b1-4de3-899f-9d98cb985005.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/eff6f456-906d-4320-8e6f-667fbbf0574a.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-ib/6cbd9a3a-7e06-4eee-af9e-6db4ff35c36a.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/7e3d3803-c8d4-4025-8d12-c4c29c49c059.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/a43a6ca9-3543-44bc-8511-ee5c45552070.json create mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/83f6fdec-9592-45a1-acdf-0ebbb400c8a4.json delete mode 100644 data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json delete mode 100644 data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json create mode 100644 data/hfopenllm_v2/tinycompany/Tamed-Shawty/6e2d4174-303f-437b-9abb-26667b1dd04c.json delete mode 100644 data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json create mode 100644 data/hfopenllm_v2/tklohj/WindyFloLLM/955e93d0-bec1-483c-b3f0-258e13d5cb16.json create mode 100644 data/hfopenllm_v2/togethercomputer/GPT-JT-6B-v1/3065ca79-c5e9-4875-9f81-4231e971d818.json delete mode 100644 data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json create mode 100644 data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/fc7e485f-a416-420b-b43c-e45e502c4a8f.json create mode 100644 data/hfopenllm_v2/togethercomputer/LLaMA-2-7B-32K/53e882c6-6eb5-4202-a8d0-3a313556c9f4.json delete mode 100644 data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json create mode 100644 data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/ba715669-c0ed-471f-80a6-b67453fb4930.json create mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/316cab27-5cac-4d26-90ae-05d1fc3bd14a.json delete mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json delete mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json create mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/d2b0a35a-ea72-42f4-9f71-fffa1480bc22.json create mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/bf3eabff-fbf7-421c-9e04-548accc7678c.json delete mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json create mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/b7eeedd8-33ef-46b3-a3fb-6ac87247bc4e.json delete mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json delete mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json create mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/b1c41abe-e7f6-4229-b776-8ed0b5f91bd4.json create mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/5b769770-3b63-4863-a723-95212e2be40e.json delete mode 100644 data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json create mode 100644 data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f2264b41-efa5-4278-91fd-2f454aa91c61.json delete mode 100644 data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json delete mode 100644 data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json create mode 100644 data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/5c3484b4-6faa-47fd-a1a2-881898450f79.json create mode 100644 data/hfopenllm_v2/trthminh1112/autotrain-llama32-1b-finetune/326b95f8-9eae-4064-a261-077a957e233c.json delete mode 100644 data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json create mode 100644 data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/c1c7336e-b8bf-4a69-a586-c1a224ba8a65.json create mode 100644 data/hfopenllm_v2/universalml/NepaliGPT-2.0/89e55482-b762-4f5d-a021-211048719bdc.json create mode 100644 data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/81018e12-63f8-4ad8-87c4-181a13202497.json delete mode 100644 data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json delete mode 100644 data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json create mode 100644 data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/5b09e8cb-aaf1-48fd-a2f4-11a8d4bc9a4d.json delete mode 100644 data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json create mode 100644 data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/8b344f21-9038-4b15-aba8-308aa62e4b39.json create mode 100644 data/hfopenllm_v2/unsloth/phi-4-bnb-4bit/68ca8f7c-88c2-4ede-bcb7-d4ae23429d8f.json create mode 100644 data/hfopenllm_v2/unsloth/phi-4-unsloth-bnb-4bit/df557f25-5505-49dd-a0cb-88fff601c6e2.json create mode 100644 data/hfopenllm_v2/unsloth/phi-4/a50bf387-bf34-490f-979a-b6217a85a1bd.json create mode 100644 data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/89264aa0-3bed-41d3-b171-2a5434cc990f.json delete mode 100644 data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json create mode 100644 data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/a3272caf-a292-4dc7-8932-636a4099ca6b.json delete mode 100644 data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json delete mode 100644 data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json create mode 100644 data/hfopenllm_v2/upstage/solar-pro-preview-instruct/c4ade77e-628f-457d-bbe1-3e5a0cb19d04.json delete mode 100644 data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json create mode 100644 data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/b030646c-5f5c-43ab-bbc4-405f82992265.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-code-mistral-7b-v1.0/399e516c-d8c8-4511-a746-76c81f72b36a.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-codellama-34b-v2.0/bd8e4424-7903-43e7-8105-269de734582e.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/9126e939-3a87-4774-9606-084c5b56e933.json delete mode 100644 data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/be2ef197-738e-422d-9a88-cafd124584b7.json delete mode 100644 data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/ee22e6c5-8529-4987-86d0-4abf3b525f90.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/50f0ddc2-fccd-447c-ab50-a086ccb4cd3a.json delete mode 100644 data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json create mode 100644 data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/83294141-a70f-40da-b3f8-21b367098cce.json create mode 100644 data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/303ae3d2-fdf5-404d-83ca-8e6071e13e6b.json delete mode 100644 data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json create mode 100644 data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/1b13d76d-259f-41f2-baba-ce96ef0cb937.json delete mode 100644 data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json delete mode 100644 data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json create mode 100644 data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/b644a420-0a70-4b3d-9a5a-ff91911c857b.json create mode 100644 data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/33aaa60f-eb69-4d36-917c-6862121a223e.json delete mode 100644 data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json create mode 100644 data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/a1d2e571-6de0-4bd7-bdcf-8b3921b450f6.json create mode 100644 data/hfopenllm_v2/v000000/Qwen2.5-Lumen-14B/ad93274e-3ca0-40cb-9f65-e6e6c66a8008.json delete mode 100644 data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json create mode 100644 data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/b8043d04-c3ab-4d6a-97eb-44b195a52710.json delete mode 100644 data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json create mode 100644 data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/c6bff6da-382f-4423-ba3a-d987839132e0.json create mode 100644 data/hfopenllm_v2/vhab10/llama-3-8b-merged-linear/f3574ad1-a6d7-47fb-86e7-69c256452dea.json delete mode 100644 data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json create mode 100644 data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/f2e47267-6c40-4d70-8420-295c95b318f3.json create mode 100644 data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/395f246e-34c6-40e6-bfeb-b047aa12cf90.json delete mode 100644 data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json create mode 100644 data/hfopenllm_v2/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/3a91f8bb-c132-45b3-b8b4-d2ecc9f03f3a.json delete mode 100644 data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json create mode 100644 data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/97c92043-9bed-460a-8d7b-70ab3584c75b.json delete mode 100644 data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json create mode 100644 data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/ab2ce171-bfcf-49ea-a341-2a52b2bd803a.json delete mode 100644 data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json create mode 100644 data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/f9bbd9cc-dc6a-466f-b777-eaea4a15b874.json delete mode 100644 data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json create mode 100644 data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/cd0aefa3-b0c9-4683-872f-f9f9d285e6c3.json delete mode 100644 data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json create mode 100644 data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/c42db2ab-dbc4-48e4-9c16-7b8a5f8492c3.json create mode 100644 data/hfopenllm_v2/vicgalle/Humanish-RP-Llama-3.1-8B/1b32c387-97a7-42ff-892c-d3bacebbf050.json create mode 100644 data/hfopenllm_v2/vicgalle/Merge-Mistral-Prometheus-7B/cbea057c-b0f9-48ac-a075-eb28ebbaf358.json create mode 100644 data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/0b1bb876-9dc7-47d5-855a-f028fb7f2df6.json delete mode 100644 data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json create mode 100644 data/hfopenllm_v2/vicgalle/Roleplay-Llama-3-8B/a86678ad-344c-430f-80c7-02d634b0cd5b.json create mode 100644 data/hfopenllm_v2/viettelsecurity-ai/security-llama3.2-3b/827f3236-74fa-432b-8177-8785ac25ad76.json delete mode 100644 data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json create mode 100644 data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/7f694687-77e5-41d2-923b-f2d5f231729b.json delete mode 100644 data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json create mode 100644 data/hfopenllm_v2/voidful/smol-360m-ft/daa9d03e-63b0-4c08-ae72-e11041200ac7.json create mode 100644 data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/1539822f-acc4-4dae-9e61-133da97ebcbe.json delete mode 100644 data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json delete mode 100644 data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json create mode 100644 data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/eec80fda-ce2f-4ef4-94d3-9e7b90f7f2e5.json delete mode 100644 data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json create mode 100644 data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/448cac5f-a7d3-41fb-9b49-666758037eb4.json create mode 100644 data/hfopenllm_v2/vonjack/Qwen2.5-Coder-0.5B-Merged/5d7c5ac1-84c3-4fd1-ac51-4c00ed8c59c7.json create mode 100644 data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/7e1741cc-f9ea-4940-9b6b-d7a515cfce31.json delete mode 100644 data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json delete mode 100644 data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json create mode 100644 data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/ec4d21be-b1a6-47a9-84a4-1a25249c1768.json create mode 100644 data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/c6b03539-04b3-4ef2-909d-8036a7ea2ae1.json delete mode 100644 data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json delete mode 100644 data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json create mode 100644 data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/f156ac38-056e-4ef1-bdbe-e83c299a683b.json create mode 100644 data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/11d3c8db-300c-4e02-b729-7adba6844ad2.json delete mode 100644 data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json delete mode 100644 data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json create mode 100644 data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/fc75a820-fc0b-4e50-9304-61f0e93795c0.json delete mode 100644 data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json create mode 100644 data/hfopenllm_v2/wanlige/li-14b-v0.4/bb66896f-799c-4e17-8b54-af5e795699fa.json create mode 100644 data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/30a1a786-7478-401f-85ae-57037ada3d32.json delete mode 100644 data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json create mode 100644 data/hfopenllm_v2/waqasali1707/Beast-Soul-new/05430b16-07b6-41a1-ade9-6211cdf8ccf1.json delete mode 100644 data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json create mode 100644 data/hfopenllm_v2/wave-on-discord/qwent-7b/09bc4d5a-f104-4a36-999c-11e2532eef1e.json delete mode 100644 data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json create mode 100644 data/hfopenllm_v2/weathermanj/Menda-3B-500/a92cfff6-6caf-4bf1-913a-9d7dd2d8d449.json create mode 100644 data/hfopenllm_v2/weathermanj/Menda-3b-750/8972e92c-ebbe-4dc4-8a8c-6f7a42ab5c11.json delete mode 100644 data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json delete mode 100644 data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json create mode 100644 data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e4f39815-9704-4d0a-8d9b-39359367adcc.json delete mode 100644 data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json create mode 100644 data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/f40df456-eb9a-46f8-8fb0-b6ad2748f3c2.json delete mode 100644 data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json create mode 100644 data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/398996d9-299b-4120-a757-e2fe14e779ee.json create mode 100644 data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/4398633e-77b0-4b61-ae85-29b0e5aad38b.json delete mode 100644 data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json create mode 100644 data/hfopenllm_v2/win10/EVA-Norns-Qwen2.5-v0.1/1bc60148-512f-4830-b541-f30535cf74bf.json create mode 100644 data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/a9dfb20a-13e0-4419-a747-7c001b2e9435.json delete mode 100644 data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json create mode 100644 data/hfopenllm_v2/win10/Norns-Qwen2.5-12B/388e3559-a3b6-4738-9843-9bdd048bae09.json create mode 100644 data/hfopenllm_v2/win10/Norns-Qwen2.5-7B/994a6930-42d5-463a-9e7c-0a3070144211.json delete mode 100644 data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json create mode 100644 data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/cce46320-9794-443a-831a-92e2a21515b0.json delete mode 100644 data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json create mode 100644 data/hfopenllm_v2/win10/llama3-13.45b-Instruct/988f4cc0-ebfb-43a9-8a7f-3dd1f1c1e342.json create mode 100644 data/hfopenllm_v2/win10/miscii-14b-1M-0128/3c675148-5d09-4778-baad-9295ef8cfc79.json delete mode 100644 data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json create mode 100644 data/hfopenllm_v2/winglian/Llama-3-8b-64k-PoSE/620b80ba-81ab-4504-9f42-4965014f3cd1.json create mode 100644 data/hfopenllm_v2/winglian/llama-3-8b-256k-PoSE/b6c68fc1-c2c1-4cdf-91ef-2007becd7ade.json create mode 100644 data/hfopenllm_v2/wzhouad/gemma-2-9b-it-WPO-HB/19279c18-c2f7-4f75-a9c5-a121b2d4bcff.json create mode 100644 data/hfopenllm_v2/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/7966789d-8ace-4b39-9093-96bbb8e641d8.json delete mode 100644 data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json create mode 100644 data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/5e1d849d-0342-4de9-a7d8-dd5cd5960fac.json create mode 100644 data/hfopenllm_v2/xinchen9/Llama3.1_8B_Instruct_CoT/a17563e3-0369-4042-8006-2ec781653f63.json create mode 100644 data/hfopenllm_v2/xinchen9/Llama3.1_CoT/68369110-e371-4112-ae0a-14f7fe9fc40f.json create mode 100644 data/hfopenllm_v2/xinchen9/Llama3.1_CoT_V1/2a6925d3-992f-4c4f-a57b-3eb41062743b.json create mode 100644 data/hfopenllm_v2/xinchen9/Mistral-7B-CoT/28290ea9-9ce5-4605-ac5b-aa2d606994d8.json delete mode 100644 data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json create mode 100644 data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/eb2ed6eb-4789-400d-aea5-841547a20cd7.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/873218a0-7ddb-4287-88ce-8c8214e85c85.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/e4c32b92-46b4-431a-83f2-11499f587534.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/a05681a0-07e4-4206-ae89-dee4e9706467.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/b078f823-d603-4030-81a2-a3ca1a1117f9.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/26625158-6720-47c7-8c28-46ca7b4b947e.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/5e3e8dec-f14b-4b7a-ace1-1e1728395e84.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/35b4378e-52cd-4ae1-985b-c8e2c00dc61a.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json create mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/4d99a55e-39c0-41c7-9ef0-494f739ceaec.json delete mode 100644 data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/f3c7bacd-e231-45fd-b503-ee4d34caf4e8.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1bb87d8f-2d66-42b2-a744-1a7cbc2c17dc.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/ae10fd26-e648-4fa0-ae24-dfaaf4ff510d.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/0af58746-0492-4ba7-8a17-c0a5c43d0700.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/88fff9f5-7aa7-463a-87e0-5fd2f5bacf09.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/bc79527d-ae58-4b17-afd8-df931562dbf3.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json create mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/3e7423d5-ad7e-48e2-bd25-a4946d443c24.json delete mode 100644 data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json delete mode 100644 data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json create mode 100644 data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/7979fd6a-a886-41cc-987b-356b7c452bff.json create mode 100644 data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/2be6bc34-1e61-426f-b963-6e096b5418fb.json delete mode 100644 data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json create mode 100644 data/hfopenllm_v2/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/c4f69339-be6b-4bb4-8faf-a1f40e73d4b0.json delete mode 100644 data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json create mode 100644 data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/c845eb10-a028-4cc2-8f64-25d75480c0d5.json create mode 100644 data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/377e7223-4876-49b6-8057-b1831d7f129b.json create mode 100644 data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/4ddb9ed6-0599-482e-b12e-bcb01975cc85.json create mode 100644 data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B/9d5af106-be69-4b62-99c1-fcfb6091d080.json create mode 100644 data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/2f2d7a55-2838-446d-9487-a6cfa0c03356.json delete mode 100644 data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json delete mode 100644 data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json create mode 100644 data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/65d20d45-f63b-4b09-b66d-5f53297c0c20.json delete mode 100644 data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json create mode 100644 data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/4712953f-0777-4b97-8f13-f7309f19f0dc.json delete mode 100644 data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json create mode 100644 data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/84382308-04b5-439f-b486-b26d20da605a.json delete mode 100644 data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json create mode 100644 data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/e82be06f-14ed-45e8-a273-d28c50f5212b.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/5815ba55-40fc-4f8e-ae0b-b329c42fd503.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/e58eceb3-b501-4924-9d0d-98d7da3c16c5.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/5a88455c-7699-4c49-8a12-76cda15d878c.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/122b4c1e-6e6c-4db5-8991-b091361c3ecf.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/6abeb0e4-32ee-4dbb-9902-b19cc96a2aa7.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/679f214f-e03f-47a9-8a11-91adbf1c4880.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/680e77b8-9c64-4c52-aa83-55236039cef1.json delete mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json create mode 100644 data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/c24c471c-14b3-462e-8b81-6548b27e5ffc.json delete mode 100644 data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json create mode 100644 data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/efa7fa62-2e8b-403c-b345-eef876b48dbd.json create mode 100644 data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/40bae762-65bd-4b4c-b422-ffd0fd3790a9.json delete mode 100644 data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json create mode 100644 data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/596957cc-719c-44c7-8284-06a9ba0d1a30.json delete mode 100644 data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json create mode 100644 data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/706bbc09-f867-4327-bc4d-b5ede41ebd93.json delete mode 100644 data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/8962e9be-75bf-4f57-8ce2-b29523740851.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/014f4838-22ff-4802-a887-4d2de01a9256.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/5c6eac9c-0ec6-4364-a86b-dcd894d69f0b.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/09b81cf2-3b79-448c-ab8e-87e378c804bb.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/28b9977a-db3d-4f38-b1f7-bd0cdcab5504.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17/845ea162-cfa1-47f4-8914-d81d9bf1bb7d.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/706737c7-cd1a-4958-9ffc-2655f0b50178.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18/5acd58cd-8dfb-4fb7-8832-6bc151e0b1a1.json create mode 100644 data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-24/d374a68d-b985-47c2-b087-500bffa93c80.json create mode 100644 data/hfopenllm_v2/yuchenxie/ArlowGPT-3B-Multilingual/23fbceb0-b646-4945-b17f-66dde24a0e43.json create mode 100644 data/hfopenllm_v2/yuchenxie/ArlowGPT-8B/73d9e204-e829-4159-b340-6d9581c6f0e1.json create mode 100644 data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/a6979dda-fba6-4104-b153-3b0a89de8585.json delete mode 100644 data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json create mode 100644 data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/62e04968-0c5c-4aad-a434-d9d24bccbdb8.json create mode 100644 data/hfopenllm_v2/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/bae4064e-b10f-4082-876d-e4168ca1a8cc.json create mode 100644 data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/0040b48c-0f54-4c9b-97ee-1ca833c68e36.json delete mode 100644 data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json create mode 100644 data/hfopenllm_v2/zake7749/gemma-2-9b-it-chinese-kyara/6050e969-bcde-4594-8e53-05fa74c7287d.json create mode 100644 data/hfopenllm_v2/zelk12/Gemma-2-TM-9B/3aaee358-bf3e-4d91-91bf-bd42e0a7c61e.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen1-gemma-2-9B/ef5f4fb2-f409-49dc-b3f0-f3e19585cd8a.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen2-GI-gemma-2-9B/4048fa60-7427-4f7e-9939-e270aa5e8b51.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen2-gemma-2-9B/f5c9baea-f2cf-414a-937a-6a43f55a1c1d.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen3-gemma-2-9B/1da70796-d40b-4f2a-8ce3-b304f414a6d5.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen4-gemma-2-9B/de476f79-2539-4f9e-a1d2-901c6c4342d4.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen5-gemma-2-9B/80aee542-c894-46b6-a6ed-9f3400aefa9e.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen6-gemma-2-9B/5c9d4eaf-0985-4f9e-8007-08b4081bb19d.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen6fix-gemma-2-9B/4b019824-8454-4ce8-aa49-d122a2491f9c.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Gen7-gemma-2-9B/0dfcd13c-f057-4aec-82ad-b5cf2b266502.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/927589bf-f6a0-4155-a24b-120231bbf029.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge-gemma-2-9B/1a2740cb-c541-434e-89a1-7a9fd2c4cabd.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge1-gemma-2-9B/0110d1c9-755e-4f09-888b-0c9c1a263639.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/cda65781-494c-45bd-8c32-7b1fe987f31c.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge2-gemma-2-9B/2fd7de02-f8d9-45c1-9bb5-db5134bd4862.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge3-gemma-2-9B/acf07f51-5acd-4375-bafa-7a1a244db3c6.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge4-gemma-2-9B/ff985193-ba26-45d3-97be-b7d3b17ab4d7.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge5-gemma-2-9B/21dbea2c-5cb1-431c-a496-af9b932b3440.json create mode 100644 data/hfopenllm_v2/zelk12/MT-Merge6-gemma-2-9B/1143955c-c32c-4b41-8484-2c77e72f4946.json create mode 100644 data/hfopenllm_v2/zelk12/MT-gemma-2-9B/94824ceb-08c3-415c-8003-b70a0d9af09d.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen1-gemma-2-9B/bf2903cb-b954-4870-98c3-116a96aa49fb.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen2-gemma-2-9B/b089c439-a38c-438d-bdad-1c68a1265d95.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen3-gemma-2-9B/c988815b-50e5-47e4-a418-bbbcdf1eb4a0.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen4-gemma-2-9B/fa11d66c-7ebc-4b81-83b7-d35a4ff23d3f.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/1c81787b-594e-4bb6-aee1-7f193a628b16.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen5-gemma-2-9B/fd9ce37e-d43d-4ec2-94ec-0eb42e3cc685.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen6-gemma-2-9B/0625f09a-3e02-410b-963b-49b83dfc5c8f.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Gen7-gemma-2-9B/50c1399e-b409-4dff-b4d6-9be01dbb02c7.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/402bdb4a-b258-40a4-ac9f-de74026c02f3.json create mode 100644 data/hfopenllm_v2/zelk12/MT1-gemma-2-9B/65dcf458-db0f-45cd-a8a4-e16108e51161.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen1-gemma-2-9B/f1346b1a-0e66-4d80-bfad-ccbe0a8e2abf.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen2-gemma-2-9B/11e7b55a-d872-474a-98a6-fc82ce5a863e.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen3-gemma-2-9B/19688633-fa6c-412a-8dbc-c16fc49b3276.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen4-gemma-2-9B/7d67eb9c-a4d8-4b86-8c24-928ebbe58de7.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen5-gemma-2-9B/447f880c-643f-4041-8cdb-87697d798085.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen6-gemma-2-9B/653d459e-f8b7-48bc-a9db-779e515532cf.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Gen7-gemma-2-9B/4e56faf6-dbde-4059-b502-32c76bdbed2d.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/f161df97-3cc6-48d3-bfc5-d3f01108ecbb.json create mode 100644 data/hfopenllm_v2/zelk12/MT2-gemma-2-9B/7d08412d-e987-497f-a6ec-ce0affe0f80f.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen1-gemma-2-9B/f042f897-cfe8-4d8c-b75b-bbfca44505ea.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen2-gemma-2-9B/f24ab334-c022-4e34-a930-3fed6ee18793.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen3-gemma-2-9B/2bd3c620-780f-452d-92d7-d01a04539939.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen4-gemma-2-9B/234042bd-237f-4cc5-8c5d-1eacd2e8bfaa.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B/d8e0a32e-f307-4056-b450-47a12a0a7b15.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B_v1/9dc3c4f5-8974-4496-8a6e-daa4fe3e3c2a.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Gen6-gemma-2-9B/037787fb-9c61-4c56-a7fc-704c04b519f7.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/5df3dd8f-4921-4916-8163-8651b796e478.json create mode 100644 data/hfopenllm_v2/zelk12/MT3-gemma-2-9B/50463593-3a53-4b3f-9621-d05670309b7e.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-Gen1-gemma-2-9B/d7fef356-36c7-488f-8f49-997682a2c01a.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-Gen2-gemma-2-9B/42e7abc6-eaa2-4971-90ee-e4d9dbb97ddb.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-Gen3-gemma-2-9B/b1cf06a6-d270-41ae-bb9b-443bdc5446f3.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-Gen4-gemma-2-9B/e40ea476-bcc5-4d3b-bf8e-e5048d9cbe42.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-Gen5-gemma-2-9B/731a5f85-a59e-40af-870c-00e519ca0e7e.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/38d93ae8-90ec-473c-8570-33d52c46770b.json create mode 100644 data/hfopenllm_v2/zelk12/MT4-gemma-2-9B/9072fd28-040b-44df-bd58-6e3f59398189.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-Gen1-gemma-2-9B/14827e00-09c5-4ebd-93cb-8e026ac73d20.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-Gen2-gemma-2-9B/11e76d74-b8e0-408f-b429-566faa5d60a2.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-Gen3-gemma-2-9B/944c84d8-231d-47ef-85f4-23c0286a4a02.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-Gen4-gemma-2-9B/47c8da1d-8ce3-4d19-b8b8-6b5e68e2e8ab.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-Gen5-gemma-2-9B/ca54a8d4-153b-4169-b6ee-133461a9bedd.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/652359ec-14f2-4f94-a694-b7dc98819bfc.json create mode 100644 data/hfopenllm_v2/zelk12/MT5-gemma-2-9B/b34f3335-c7a3-431f-b2c8-6f0731a81378.json create mode 100644 data/hfopenllm_v2/zelk12/MTM-Merge-gemma-2-9B/077306f9-5d40-40dc-9df4-b5ca559af5c7.json create mode 100644 data/hfopenllm_v2/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/e0f0fe87-8ed3-4398-8683-65aa042d01d9.json create mode 100644 data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/2d968d3e-a3df-4bdf-86a4-034087c0d7fc.json create mode 100644 data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/db476911-87fb-433f-b164-4435718dab46.json create mode 100644 data/hfopenllm_v2/zelk12/Rv0.4MT4g2-gemma-2-9B/75a967f6-a8ab-435f-999b-4889e8217dce.json create mode 100644 data/hfopenllm_v2/zelk12/T31122024203920-gemma-2-9B/e072997b-2f79-4d25-b8dc-ebf15ac311e1.json create mode 100644 data/hfopenllm_v2/zelk12/Test01012025155054/6d681a29-0d1a-4054-8250-5246993509f8.json delete mode 100644 data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json create mode 100644 data/hfopenllm_v2/zelk12/Test01012025155054t0.5_gemma-2/2a6af4ce-e45c-4721-a23c-03071a5e774f.json create mode 100644 data/hfopenllm_v2/zelk12/gemma-2-S2MTM-9B/5ae5ddff-714d-4a20-b1d3-3eeb95fd858c.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/60052d34-f6a7-4204-baea-532f5ba29880.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/e1ddd882-f8a1-48d0-bb2a-878f43095895.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/d2c3edec-38d8-48e3-9f6d-e26a63442af8.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/dcfafe94-dacb-4e7a-9365-8bb39ecb79ec.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/8ca0e602-bf6b-4d15-95c2-a0d47e78ded0.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/fc262523-dcde-4b45-80ba-2922e66d42c4.json create mode 100644 data/hfopenllm_v2/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/f8d745da-9867-4348-bace-d8052c3b4025.json create mode 100644 data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/3d410f0f-6b24-4e86-a353-6142c51b1ecc.json delete mode 100644 data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json create mode 100644 data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/46329fc3-974f-4d04-be9e-ba85b3816efc.json delete mode 100644 data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json delete mode 100644 data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json create mode 100644 data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/b964d0a4-7c44-4ea2-894e-3e1ca30321e0.json delete mode 100644 data/mmlu/openai/gpt2/7b2767f8-9266-486e-8e49-6177930bc258.json delete mode 100644 data/narrativeqa/openai/gpt2/96a7ea61-8869-4dd0-9164-756519a26ac0.json create mode 100644 scripts/global-mmlu-lite/__init__.py create mode 100644 scripts/global-mmlu-lite/adapter.py diff --git a/data/ai2_arc/anthropic/claude-sonnet-4-0/40094cf6-b187-475d-8f14-abb71d998c2b.json b/data/ai2_arc/anthropic/claude-sonnet-4-0/40094cf6-b187-475d-8f14-abb71d998c2b.json deleted file mode 100644 index 72c200a73..000000000 --- a/data/ai2_arc/anthropic/claude-sonnet-4-0/40094cf6-b187-475d-8f14-abb71d998c2b.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "ai2_arc/anthropic_claude-sonnet-4-0/1761000045.0", - "retrieved_timestamp": "1761000045.0", - "source_data": { - "dataset_name": "ai2_arc", - "hf_repo": "allenai/ai2_arc", - "samples_number": 2376, - "sample_ids": [ - 1, - 2, - 3, - 4, - 5 - ] - }, - "source_metadata": { - "source_name": "inspect_ai", - "source_type": "evaluation_run", - "source_organization_name": "unknown", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthropic/claude-sonnet-4-0", - "id": "anthropic/claude-sonnet-4-0", - "developer": "anthropic", - "inference_platform": "anthropic" - }, - "evaluation_results": [ - { - "evaluation_name": "choice", - "evaluation_timestamp": "1761000045.0", - "metric_config": { - "evaluation_description": "accuracy", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 1.0 - }, - "generation_config": {} - } - ], - "detailed_evaluation_results_per_samples": [ - { - "sample_id": "1", - "input": "Which statement best explains why photosynthesis is the foundation of most food webs?", - "ground_truth": "A", - "response": "A", - "choices": [ - "Sunlight is the source of energy for nearly all ecosystems.", - "Most ecosystems are found on land instead of in water.", - "Carbon dioxide is more available than other gases.", - "The producers in all ecosystems are plants." - ] - }, - { - "sample_id": "2", - "input": "Which piece of safety equipment is used to keep mold spores from entering the respiratory system?", - "ground_truth": "B", - "response": "B", - "choices": [ - "safety goggles", - "breathing mask", - "rubber gloves", - "lead apron" - ] - }, - { - "sample_id": "3", - "input": "Meiosis is a type of cell division in which germ cells divide to produce haploid cells. Where does meiosis occur?", - "ground_truth": "D", - "response": "D", - "choices": [ - "brain cells", - "bone cells", - "muscle cells", - "ovary cells" - ] - }, - { - "sample_id": "4", - "input": "Which characteristic describes the texture of a kitten's fur?", - "ground_truth": "D", - "response": "D", - "choices": [ - "gray", - "warm", - "long", - "soft" - ] - }, - { - "sample_id": "5", - "input": "Which best describes the structure of an atom?", - "ground_truth": "B", - "response": "B", - "choices": [ - "a lightweight core surrounded by neutral particles", - "a massive core surrounded by negatively-charged particles", - "a network of interacting positive and negative particles", - "overlapping layers of neutral, positive, and negative particles" - ] - } - ] -} \ No newline at end of file diff --git a/data/formatted b/data/formatted deleted file mode 100644 index 7b8d6c05a..000000000 --- a/data/formatted +++ /dev/null @@ -1,283714 +0,0 @@ -[ - { - "id": "0-hero/Matter-0.2-7B-DPO_bfloat16_26a66f0d862e2024ce4ad0a09c37052ac36e8af6_True", - "model": { - "name": "0-hero/Matter-0.2-7B-DPO", - "sha": "26a66f0d862e2024ce4ad0a09c37052ac36e8af6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.90636130175029, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3302792147058693, - "normalized_score": 33.02792147058693 - }, - "bbh": { - "name": "BBH", - "value": 0.3596254301656297, - "normalized_score": 10.055525080241035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.381375, - "normalized_score": 5.871874999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1163563829787234, - "normalized_score": 1.8173758865248217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-13", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "0-hero/Matter-0.2-7B-DPO", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.219174164123715 - } - }, - { - "id": "01-ai/Yi-1.5-34B_bfloat16_4b486f81c935a2dadde84c6baa1e1370d40a098f_False", - "model": { - "name": "01-ai/Yi-1.5-34B", - "sha": "4b486f81c935a2dadde84c6baa1e1370d40a098f", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.64649419429311, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2841172533322695, - "normalized_score": 28.411725333226947 - }, - "bbh": { - "name": "BBH", - "value": 0.5976391706360018, - "normalized_score": 42.74936268839652 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36577181208053694, - "normalized_score": 15.436241610738257 - }, - "musr": { - "name": "MUSR", - "value": 0.4236041666666667, - "normalized_score": 11.217187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4665890957446808, - "normalized_score": 40.732121749408975 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-34B", - "hub_license": "apache-2.0", - "hub_hearts": 46, - "params_billions": 34.389, - "co2_cost": 22.7033978747449 - } - }, - { - "id": "01-ai/Yi-1.5-34B-32K_bfloat16_2c03a29761e4174f20347a60fbe229be4383d48b_False", - "model": { - "name": "01-ai/Yi-1.5-34B-32K", - "sha": "2c03a29761e4174f20347a60fbe229be4383d48b", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.727912908508134, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3118691737922047, - "normalized_score": 31.186917379220468 - }, - "bbh": { - "name": "BBH", - "value": 0.6015685776542417, - "normalized_score": 43.38184666762572 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.4398229166666667, - "normalized_score": 14.07786458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4709109042553192, - "normalized_score": 41.21232269503546 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-15", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-34B-32K", - "hub_license": "apache-2.0", - "hub_hearts": 36, - "params_billions": 34.389, - "co2_cost": 23.15462857509891 - } - }, - { - "id": "01-ai/Yi-1.5-34B-Chat_bfloat16_f3128b2d02d82989daae566c0a7eadc621ca3254_True", - "model": { - "name": "01-ai/Yi-1.5-34B-Chat", - "sha": "f3128b2d02d82989daae566c0a7eadc621ca3254", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.35799367075618, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6066758423205982, - "normalized_score": 60.66758423205982 - }, - "bbh": { - "name": "BBH", - "value": 0.6083748310271819, - "normalized_score": 44.262825981005655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.277190332326284, - "normalized_score": 27.719033232628398 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.4281979166666667, - "normalized_score": 13.058072916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45204454787234044, - "normalized_score": 39.11606087470449 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-10", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-34B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 268, - "params_billions": 34.389, - "co2_cost": 22.423843867327744 - } - }, - { - "id": "01-ai/Yi-1.5-34B-Chat-16K_bfloat16_ff74452e11f0f749ab872dc19b1dd3813c25c4d8_True", - "model": { - "name": "01-ai/Yi-1.5-34B-Chat-16K", - "sha": "ff74452e11f0f749ab872dc19b1dd3813c25c4d8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.403554842710225, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.456449997118756, - "normalized_score": 45.6449997118756 - }, - "bbh": { - "name": "BBH", - "value": 0.6100218256499571, - "normalized_score": 44.53615654671034 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21374622356495468, - "normalized_score": 21.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.43976041666666665, - "normalized_score": 13.736718750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45445478723404253, - "normalized_score": 39.383865248226954 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-15", - "submission_date": "2024-07-15", - "generation": 0, - "base_model": "01-ai/Yi-1.5-34B-Chat-16K", - "hub_license": "apache-2.0", - "hub_hearts": 26, - "params_billions": 34.389, - "co2_cost": 6.774022458148835 - } - }, - { - "id": "01-ai/Yi-1.5-6B_bfloat16_cab51fce425b4c1fb19fccfdd96bd5d0908c1657_False", - "model": { - "name": "01-ai/Yi-1.5-6B", - "sha": "cab51fce425b4c1fb19fccfdd96bd5d0908c1657", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.745698054972127, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26166017278598563, - "normalized_score": 26.166017278598567 - }, - "bbh": { - "name": "BBH", - "value": 0.44925820198929056, - "normalized_score": 22.027904536694773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.43740625, - "normalized_score": 13.309114583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31441156914893614, - "normalized_score": 23.823507683215126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-11", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "01-ai/Yi-1.5-6B", - "hub_license": "apache-2.0", - "hub_hearts": 30, - "params_billions": 6.061, - "co2_cost": 1.8442095716487765 - } - }, - { - "id": "01-ai/Yi-1.5-6B-Chat_bfloat16_3f64d3f159c6ad8494227bb77e2a7baef8cd808b_True", - "model": { - "name": "01-ai/Yi-1.5-6B-Chat", - "sha": "3f64d3f159c6ad8494227bb77e2a7baef8cd808b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.784006289829847, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5145270105542183, - "normalized_score": 51.452701055421834 - }, - "bbh": { - "name": "BBH", - "value": 0.4571311331954389, - "normalized_score": 23.67872313235784 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1623867069486405, - "normalized_score": 16.238670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.43917708333333333, - "normalized_score": 14.030468750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3193151595744681, - "normalized_score": 24.368351063829788 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-11", - "submission_date": "2024-10-22", - "generation": 0, - "base_model": "01-ai/Yi-1.5-6B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 41, - "params_billions": 6.061, - "co2_cost": 1.4447911071090622 - } - }, - { - "id": "01-ai/Yi-1.5-9B_bfloat16_8cfde9604384c50137bee480b8cef8a08e5ae81d_False", - "model": { - "name": "01-ai/Yi-1.5-9B", - "sha": "8cfde9604384c50137bee480b8cef8a08e5ae81d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.153901514184795, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29358435617494916, - "normalized_score": 29.358435617494916 - }, - "bbh": { - "name": "BBH", - "value": 0.514294179104191, - "normalized_score": 30.50071699492122 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.43278124999999995, - "normalized_score": 12.030989583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3916223404255319, - "normalized_score": 32.402482269503544 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-9B", - "hub_license": "apache-2.0", - "hub_hearts": 48, - "params_billions": 8.829, - "co2_cost": 1.4688920320817076 - } - }, - { - "id": "01-ai/Yi-1.5-9B-32K_bfloat16_116561dfae63af90f9d163b43077629e0e916bb1_False", - "model": { - "name": "01-ai/Yi-1.5-9B-32K", - "sha": "116561dfae63af90f9d163b43077629e0e916bb1", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.809786285875365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23031113002389217, - "normalized_score": 23.031113002389215 - }, - "bbh": { - "name": "BBH", - "value": 0.496332115988265, - "normalized_score": 28.937011582169664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4186145833333333, - "normalized_score": 10.82682291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37649601063829785, - "normalized_score": 30.721778959810877 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-15", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-9B-32K", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 8.829, - "co2_cost": 1.5680734132696938 - } - }, - { - "id": "01-ai/Yi-1.5-9B-Chat_bfloat16_bc87d8557c98dc1e5fdef6ec23ed31088c4d3f35_True", - "model": { - "name": "01-ai/Yi-1.5-9B-Chat", - "sha": "bc87d8557c98dc1e5fdef6ec23ed31088c4d3f35", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.530872220260978, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6045525871354672, - "normalized_score": 60.455258713546726 - }, - "bbh": { - "name": "BBH", - "value": 0.555906430281685, - "normalized_score": 36.95293138417893 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2258308157099698, - "normalized_score": 22.58308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.42590625, - "normalized_score": 12.838281249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39752327127659576, - "normalized_score": 33.05814125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-10", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-9B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 141, - "params_billions": 8.829, - "co2_cost": 1.453543490910605 - } - }, - { - "id": "01-ai/Yi-1.5-9B-Chat-16K_bfloat16_2b397e5f0fab87984efa66856c5c4ed4bbe68b50_True", - "model": { - "name": "01-ai/Yi-1.5-9B-Chat-16K", - "sha": "2b397e5f0fab87984efa66856c5c4ed4bbe68b50", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.76539234993476, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4214040966856829, - "normalized_score": 42.14040966856828 - }, - "bbh": { - "name": "BBH", - "value": 0.5153383364651778, - "normalized_score": 31.497608947018318 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.40990624999999997, - "normalized_score": 10.03828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39935172872340424, - "normalized_score": 33.261303191489354 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-15", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-1.5-9B-Chat-16K", - "hub_license": "apache-2.0", - "hub_hearts": 35, - "params_billions": 8.829, - "co2_cost": 1.5847450134017533 - } - }, - { - "id": "01-ai/Yi-34B_bfloat16_e1e7da8c75cfd5c44522228599fd4d2990cedd1c_False", - "model": { - "name": "01-ai/Yi-34B", - "sha": "e1e7da8c75cfd5c44522228599fd4d2990cedd1c", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.373127018936653, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3045751938190667, - "normalized_score": 30.45751938190668 - }, - "bbh": { - "name": "BBH", - "value": 0.5457099951794562, - "normalized_score": 35.542431259008794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36661073825503354, - "normalized_score": 15.548098434004473 - }, - "musr": { - "name": "MUSR", - "value": 0.4118541666666667, - "normalized_score": 9.648437500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.441156914893617, - "normalized_score": 37.90632387706855 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-01", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-34B", - "hub_license": "apache-2.0", - "hub_hearts": 1293, - "params_billions": 34.389, - "co2_cost": 25.657483288779545 - } - }, - { - "id": "01-ai/Yi-34B-200K_bfloat16_8ac1a1ebe011df28b78ccd08012aeb2222443c77_False", - "model": { - "name": "01-ai/Yi-34B-200K", - "sha": "8ac1a1ebe011df28b78ccd08012aeb2222443c77", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.01347533597433, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15424850507763843, - "normalized_score": 15.424850507763843 - }, - "bbh": { - "name": "BBH", - "value": 0.5441817925289527, - "normalized_score": 36.02211028900003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.38171874999999994, - "normalized_score": 9.414843749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45345744680851063, - "normalized_score": 39.273049645390074 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-06", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-34B-200K", - "hub_license": "apache-2.0", - "hub_hearts": 318, - "params_billions": 34.389, - "co2_cost": 25.50385584712952 - } - }, - { - "id": "01-ai/Yi-34B-Chat_bfloat16_2e528b6a80fb064a0a746c5ca43114b135e30464_True", - "model": { - "name": "01-ai/Yi-34B-Chat", - "sha": "2e528b6a80fb064a0a746c5ca43114b135e30464", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.226662652803373, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4698887839820565, - "normalized_score": 46.98887839820566 - }, - "bbh": { - "name": "BBH", - "value": 0.5560872910766164, - "normalized_score": 37.623987597243485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.39784375, - "normalized_score": 8.363802083333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4093251329787234, - "normalized_score": 34.369459219858165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-34B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 350, - "params_billions": 34.389, - "co2_cost": 25.125695689981566 - } - }, - { - "id": "01-ai/Yi-6B_bfloat16_7f7fb7662fd8ec09029364f408053c954986c8e5_False", - "model": { - "name": "01-ai/Yi-6B", - "sha": "7f7fb7662fd8ec09029364f408053c954986c8e5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.611617485376058, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28933784580468713, - "normalized_score": 28.93378458046871 - }, - "bbh": { - "name": "BBH", - "value": 0.4309230591000865, - "normalized_score": 19.408504737915056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.39368749999999997, - "normalized_score": 7.044270833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29911901595744683, - "normalized_score": 22.12433510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-01", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-6B", - "hub_license": "apache-2.0", - "hub_hearts": 372, - "params_billions": 6.061, - "co2_cost": 1.0985492537000408 - } - }, - { - "id": "01-ai/Yi-6B-200K_bfloat16_4a74338e778a599f313e9fa8f5bc08c717604420_False", - "model": { - "name": "01-ai/Yi-6B-200K", - "sha": "4a74338e778a599f313e9fa8f5bc08c717604420", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.996098298832841, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08433068702154728, - "normalized_score": 8.433068702154728 - }, - "bbh": { - "name": "BBH", - "value": 0.42892948109603307, - "normalized_score": 20.148020103768047 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.45873958333333337, - "normalized_score": 16.842447916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2844082446808511, - "normalized_score": 20.489804964539008 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-06", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-6B-200K", - "hub_license": "apache-2.0", - "hub_hearts": 172, - "params_billions": 6.061, - "co2_cost": 1.1264239806184515 - } - }, - { - "id": "01-ai/Yi-6B-Chat_bfloat16_01f7fabb6cfb26efeb764da4a0a19cad2c754232_True", - "model": { - "name": "01-ai/Yi-6B-Chat", - "sha": "01f7fabb6cfb26efeb764da4a0a19cad2c754232", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.11765000523676, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33952135888331847, - "normalized_score": 33.95213588833185 - }, - "bbh": { - "name": "BBH", - "value": 0.41326019207548687, - "normalized_score": 17.00016656742376 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.36879166666666663, - "normalized_score": 3.5656250000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3061003989361702, - "normalized_score": 22.90004432624113 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-6B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 65, - "params_billions": 6.061, - "co2_cost": 1.1106656722436012 - } - }, - { - "id": "01-ai/Yi-9B_bfloat16_b4a466d95091696285409f1dcca3028543cb39da_False", - "model": { - "name": "01-ai/Yi-9B", - "sha": "b4a466d95091696285409f1dcca3028543cb39da", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.811867367746064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2708779372066118, - "normalized_score": 27.08779372066118 - }, - "bbh": { - "name": "BBH", - "value": 0.49396075125308075, - "normalized_score": 27.626956112077934 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.40540624999999997, - "normalized_score": 8.909114583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35738031914893614, - "normalized_score": 28.59781323877068 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-01", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-9B", - "hub_license": "apache-2.0", - "hub_hearts": 185, - "params_billions": 8.829, - "co2_cost": 1.530663961417017 - } - }, - { - "id": "01-ai/Yi-9B-200K_bfloat16_8c93accd5589dbb74ee938e103613508c4a9b88d_False", - "model": { - "name": "01-ai/Yi-9B-200K", - "sha": "8c93accd5589dbb74ee938e103613508c4a9b88d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.72955178611439, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23270921155866434, - "normalized_score": 23.270921155866432 - }, - "bbh": { - "name": "BBH", - "value": 0.4793302602023641, - "normalized_score": 26.49249509714754 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.42940625, - "normalized_score": 12.109114583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36220079787234044, - "normalized_score": 29.1334219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-15", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "01-ai/Yi-9B-200K", - "hub_license": "apache-2.0", - "hub_hearts": 75, - "params_billions": 8.829, - "co2_cost": 1.5489823437933046 - } - }, - { - "id": "01-ai/Yi-Coder-9B-Chat_bfloat16_356a1f8d4e4a606d0b879e54191ca809918576b8_True", - "model": { - "name": "01-ai/Yi-Coder-9B-Chat", - "sha": "356a1f8d4e4a606d0b879e54191ca809918576b8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.985989314863886, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4817041006750976, - "normalized_score": 48.17041006750976 - }, - "bbh": { - "name": "BBH", - "value": 0.48142000339111674, - "normalized_score": 25.94315294491389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3991770833333333, - "normalized_score": 7.963802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24251994680851063, - "normalized_score": 15.83554964539007 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-21", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "01-ai/Yi-Coder-9B", - "hub_license": "apache-2.0", - "hub_hearts": 198, - "params_billions": 8.829, - "co2_cost": 1.8195322295921676 - } - }, - { - "id": "1-800-LLMs/Qwen-2.5-14B-Hindi_float16_554e931a1b7e72689bdf044f8507e319e4b722e7_False", - "model": { - "name": "1-800-LLMs/Qwen-2.5-14B-Hindi", - "sha": "554e931a1b7e72689bdf044f8507e319e4b722e7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.266177006668904, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.582570911847232, - "normalized_score": 58.2570911847232 - }, - "bbh": { - "name": "BBH", - "value": 0.6523901531956199, - "normalized_score": 49.32988995210337 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3330815709969788, - "normalized_score": 33.30815709969788 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.4489375, - "normalized_score": 14.350520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5262632978723404, - "normalized_score": 47.362588652482266 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "1-800-LLMs/Qwen-2.5-14B-Hindi", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.8660742873468124 - } - }, - { - "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct_bfloat16_05b8099d33cc43eb065ab4aadb13c5362e1c3cbe_False", - "model": { - "name": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", - "sha": "05b8099d33cc43eb065ab4aadb13c5362e1c3cbe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.020777275634742, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30774677854758703, - "normalized_score": 30.774677854758707 - }, - "bbh": { - "name": "BBH", - "value": 0.6284322714967584, - "normalized_score": 46.54015554088327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.311178247734139, - "normalized_score": 31.1178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.4490625, - "normalized_score": 15.432812499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.516373005319149, - "normalized_score": 46.263667257683224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-06", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 5.400465469071647 - } - }, - { - "id": "1024m/PHI-4-Hindi_float16_9bb64444dcd4d306619ac29bcb32d29299238373_False", - "model": { - "name": "1024m/PHI-4-Hindi", - "sha": "9bb64444dcd4d306619ac29bcb32d29299238373", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.48785095027712, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.00816832670647216, - "normalized_score": 0.816832670647216 - }, - "bbh": { - "name": "BBH", - "value": 0.6710015642760666, - "normalized_score": 52.46181381023763 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23338368580060423, - "normalized_score": 23.338368580060422 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3976510067114094, - "normalized_score": 19.686800894854585 - }, - "musr": { - "name": "MUSR", - "value": 0.4913541666666667, - "normalized_score": 21.519270833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.523936170212766, - "normalized_score": 47.10401891252955 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "1024m/PHI-4-Hindi", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.65964205977959 - } - }, - { - "id": "1024m/QWEN-14B-B100_bfloat16_adecb879fc4c585b789f36f19d5bccc150f40837_True", - "model": { - "name": "1024m/QWEN-14B-B100", - "sha": "adecb879fc4c585b789f36f19d5bccc150f40837", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.91906679224076, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7762104549262623, - "normalized_score": 77.62104549262622 - }, - "bbh": { - "name": "BBH", - "value": 0.653271132679638, - "normalized_score": 49.77664782103219 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5438066465256798, - "normalized_score": 54.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.41, - "normalized_score": 9.883333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5178690159574468, - "normalized_score": 46.429890661938536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-02-06", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.370181942335884 - } - }, - { - "id": "152334H/miqu-1-70b-sf_float16_1dca4cce36f01f2104ee2e6b97bac6ff7bb300c1_False", - "model": { - "name": "152334H/miqu-1-70b-sf", - "sha": "1dca4cce36f01f2104ee2e6b97bac6ff7bb300c1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.097407726236643, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5181740005407873, - "normalized_score": 51.81740005407873 - }, - "bbh": { - "name": "BBH", - "value": 0.6102361685099691, - "normalized_score": 43.807147003691966 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.45820833333333333, - "normalized_score": 17.209374999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42278922872340424, - "normalized_score": 35.865469858156025 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "152334H/miqu-1-70b-sf", - "hub_license": "", - "hub_hearts": 221, - "params_billions": 68.977, - "co2_cost": 12.19797195382831 - } - }, - { - "id": "1TuanPham/T-VisStar-7B-v0.1_float16_b111b59971c14b46c888b96723ff7f3c7b6fd92f_True", - "model": { - "name": "1TuanPham/T-VisStar-7B-v0.1", - "sha": "b111b59971c14b46c888b96723ff7f3c7b6fd92f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.144808610895357, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36070404305021786, - "normalized_score": 36.07040430502179 - }, - "bbh": { - "name": "BBH", - "value": 0.5052203113352468, - "normalized_score": 30.24383447882599 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.4375, - "normalized_score": 13.554166666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3210605053191489, - "normalized_score": 24.562278368794324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-22", - "generation": 0, - "base_model": "1TuanPham/T-VisStar-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.294, - "co2_cost": 1.9052644538640493 - } - }, - { - "id": "1TuanPham/T-VisStar-v0.1_float16_c9779bd9630a533f7e42fd8effcca69623d48c9c_True", - "model": { - "name": "1TuanPham/T-VisStar-v0.1", - "sha": "c9779bd9630a533f7e42fd8effcca69623d48c9c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.144808610895357, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36070404305021786, - "normalized_score": 36.07040430502179 - }, - "bbh": { - "name": "BBH", - "value": 0.5052203113352468, - "normalized_score": 30.24383447882599 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.4375, - "normalized_score": 13.554166666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3210605053191489, - "normalized_score": 24.562278368794324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-20", - "generation": 0, - "base_model": "1TuanPham/T-VisStar-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.294, - "co2_cost": 1.2487688271364052 - } - }, - { - "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B_float16_d9bb11fb02f8eca3aec408912278e513377115da_False", - "model": { - "name": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", - "sha": "d9bb11fb02f8eca3aec408912278e513377115da", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.09120798437004, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42625012743963797, - "normalized_score": 42.625012743963794 - }, - "bbh": { - "name": "BBH", - "value": 0.5041306326216103, - "normalized_score": 29.19930078641656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3959479166666666, - "normalized_score": 11.693489583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36494348404255317, - "normalized_score": 29.43816489361702 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4193565486098367 - } - }, - { - "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot_float16_2bec01c2c5d53276eac2222c80190eb44ab2e6af_True", - "model": { - "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", - "sha": "2bec01c2c5d53276eac2222c80190eb44ab2e6af", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.223740793316347, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22134381219608418, - "normalized_score": 22.134381219608414 - }, - "bbh": { - "name": "BBH", - "value": 0.34609423326328875, - "normalized_score": 8.618063681935197 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26586102719033233, - "normalized_score": 26.586102719033235 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3089166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17495013297872342, - "normalized_score": 8.32779255319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2024-10-10", - "generation": 1, - "base_model": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.974099722258276 - } - }, - { - "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1_float16_09339d9c3b118ae3c6e7beab8b84347471990988_True", - "model": { - "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", - "sha": "09339d9c3b118ae3c6e7beab8b84347471990988", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.037946007988568, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2892381104358657, - "normalized_score": 28.923811043586575 - }, - "bbh": { - "name": "BBH", - "value": 0.33427703119251256, - "normalized_score": 6.515144725982737 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3340625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5454985447919645 - } - }, - { - "id": "3rd-Degree-Burn/Llama-Squared-8B_bfloat16_f30737e92b3a3fa0ef2a3f3ade487cc94ad34400_True", - "model": { - "name": "3rd-Degree-Burn/Llama-Squared-8B", - "sha": "f30737e92b3a3fa0ef2a3f3ade487cc94ad34400", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.434954194134717, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27552449722292405, - "normalized_score": 27.55244972229241 - }, - "bbh": { - "name": "BBH", - "value": 0.4431025683868353, - "normalized_score": 21.277103190106818 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.30894791666666666, - "normalized_score": 1.9518229166666672 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2366190159574468, - "normalized_score": 15.179890661938533 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.022223049482849 - } - }, - { - "id": "4season/final_model_test_v2_bfloat16_cf690c35d9cf0b0b6bf034fa16dbf88c56fe861c_False", - "model": { - "name": "4season/final_model_test_v2", - "sha": "cf690c35d9cf0b0b6bf034fa16dbf88c56fe861c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.086235043644056, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3191132860809319, - "normalized_score": 31.911328608093193 - }, - "bbh": { - "name": "BBH", - "value": 0.6342049783295018, - "normalized_score": 47.410670136906425 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.4314479166666667, - "normalized_score": 12.430989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3528091755319149, - "normalized_score": 28.08990839243498 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-20", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "4season/final_model_test_v2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 21.421, - "co2_cost": 2.1620767196814024 - } - }, - { - "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview_bfloat16_f740497979293c90fa1cfaa7c446016e107cc2c1_True", - "model": { - "name": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", - "sha": "f740497979293c90fa1cfaa7c446016e107cc2c1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.56857495894073, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7189579205397235, - "normalized_score": 71.89579205397234 - }, - "bbh": { - "name": "BBH", - "value": 0.5119887898349903, - "normalized_score": 30.84806521622957 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24773413897280966, - "normalized_score": 24.773413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.38200000000000006, - "normalized_score": 6.150000000000006 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3732546542553192, - "normalized_score": 30.361628250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-20", - "generation": 0, - "base_model": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 1.3772382561660794 - } - }, - { - "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview_bfloat16_601f2b8c448acc5686656d3979ed732ce050b827_True", - "model": { - "name": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", - "sha": "601f2b8c448acc5686656d3979ed732ce050b827", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.225292099823264, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7280504616639405, - "normalized_score": 72.80504616639405 - }, - "bbh": { - "name": "BBH", - "value": 0.5240303130445233, - "normalized_score": 32.53678156315301 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22507552870090636, - "normalized_score": 22.507552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.40199999999999997, - "normalized_score": 9.749999999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37433510638297873, - "normalized_score": 30.481678486997637 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-21", - "generation": 0, - "base_model": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.368615076760581 - } - }, - { - "id": "AALF/gemma-2-27b-it-SimPO-37K_bfloat16_27f15219df2000a16955c9403c3f38b5f3413b3d_True", - "model": { - "name": "AALF/gemma-2-27b-it-SimPO-37K", - "sha": "27f15219df2000a16955c9403c3f38b5f3413b3d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 9.512077380763676, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24065257959990605, - "normalized_score": 24.065257959990603 - }, - "bbh": { - "name": "BBH", - "value": 0.3911343917952534, - "normalized_score": 15.307880971954303 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3487604166666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1971409574468085, - "normalized_score": 10.793439716312056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-05", - "generation": 2, - "base_model": "google/gemma-2-27b", - "hub_license": "gemma", - "hub_hearts": 18, - "params_billions": 27.227, - "co2_cost": 19.995443250824795 - } - }, - { - "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps_bfloat16_d5cbf18b2eb90b77f5ddbb74cfcaeedfa692c90c_True", - "model": { - "name": "AALF/gemma-2-27b-it-SimPO-37K-100steps", - "sha": "d5cbf18b2eb90b77f5ddbb74cfcaeedfa692c90c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 10.246803363324455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2567642743476199, - "normalized_score": 25.67642743476199 - }, - "bbh": { - "name": "BBH", - "value": 0.39308230769885016, - "normalized_score": 15.261078322847055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3329166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21251662234042554, - "normalized_score": 12.501846926713947 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-21", - "generation": 2, - "base_model": "google/gemma-2-27b", - "hub_license": "gemma", - "hub_hearts": 12, - "params_billions": 27.227, - "co2_cost": 19.7134709493991 - } - }, - { - "id": "AELLM/gemma-2-aeria-infinity-9b_bfloat16_24e1de07258925d5ddb52134b66e2eb0d698dc11_True", - "model": { - "name": "AELLM/gemma-2-aeria-infinity-9b", - "sha": "24e1de07258925d5ddb52134b66e2eb0d698dc11", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 31.919053815912523, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.759399504426034, - "normalized_score": 75.93995044260342 - }, - "bbh": { - "name": "BBH", - "value": 0.5983336669577649, - "normalized_score": 42.0902142313775 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.40196875, - "normalized_score": 9.046093750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38622007978723405, - "normalized_score": 31.80223108747045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "AELLM/gemma-2-aeria-infinity-9b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 6.007578688357264 - } - }, - { - "id": "AELLM/gemma-2-lyco-infinity-9b_bfloat16_2941a682fcbcfea3f1485c9e0691cc1d9edc742e_True", - "model": { - "name": "AELLM/gemma-2-lyco-infinity-9b", - "sha": "2941a682fcbcfea3f1485c9e0691cc1d9edc742e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.04985134020671, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7316475839660989, - "normalized_score": 73.1647583966099 - }, - "bbh": { - "name": "BBH", - "value": 0.5839534871023703, - "normalized_score": 39.78753882674737 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.40063541666666663, - "normalized_score": 8.912760416666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.378656914893617, - "normalized_score": 30.961879432624112 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "AELLM/gemma-2-lyco-infinity-9b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 5.957040134565551 - } - }, - { - "id": "AGI-0/Art-v0-3B_bfloat16_7a55f84e91334c5732b516c35432bf59c7001525_True", - "model": { - "name": "AGI-0/Art-v0-3B", - "sha": "7a55f84e91334c5732b516c35432bf59c7001525", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.132145545921874, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.319238509377341, - "normalized_score": 31.9238509377341 - }, - "bbh": { - "name": "BBH", - "value": 0.3400959483013824, - "normalized_score": 8.029776912286984 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24622356495468278, - "normalized_score": 24.622356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3768229166666666, - "normalized_score": 5.002864583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11785239361702128, - "normalized_score": 1.9835992907801419 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "AGI-0/Art-v0-3B", - "hub_license": "other", - "hub_hearts": 10, - "params_billions": 3.086, - "co2_cost": 2.3615763771653406 - } - }, - { - "id": "AGI-0/Artificium-llama3.1-8B-001_float16_6bf3dcca3b75a06a4e04e5f944e709cccf4673fd_True", - "model": { - "name": "AGI-0/Artificium-llama3.1-8B-001", - "sha": "6bf3dcca3b75a06a4e04e5f944e709cccf4673fd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.491817924739056, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5247687247614108, - "normalized_score": 52.47687247614108 - }, - "bbh": { - "name": "BBH", - "value": 0.42562150225923556, - "normalized_score": 19.34889807323965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13595166163141995, - "normalized_score": 13.595166163141995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3794583333333333, - "normalized_score": 5.165625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3181515957446808, - "normalized_score": 24.23906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "AGI-0/Artificium-llama3.1-8B-001", - "hub_license": "unknown", - "hub_hearts": 33, - "params_billions": 8.03, - "co2_cost": 2.7974026325717554 - } - }, - { - "id": "AGI-0/smartllama3.1-8B-001_float16_974d5ee685f1be003a1d8d08e907fe672d225035_False", - "model": { - "name": "AGI-0/smartllama3.1-8B-001", - "sha": "974d5ee685f1be003a1d8d08e907fe672d225035", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.424552077662117, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35178659290682057, - "normalized_score": 35.178659290682056 - }, - "bbh": { - "name": "BBH", - "value": 0.46701787510868176, - "normalized_score": 24.8577369132814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.43864583333333335, - "normalized_score": 14.397395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3486535904255319, - "normalized_score": 27.628176713947987 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-11-25", - "generation": 0, - "base_model": "AGI-0/smartllama3.1-8B-001", - "hub_license": "unknown", - "hub_hearts": 33, - "params_billions": 8.03, - "co2_cost": 1.4376684950180103 - } - }, - { - "id": "AI-MO/NuminaMath-7B-CoT_bfloat16_ff7e3044218efe64128bd9c21f9ec66c3de04324_True", - "model": { - "name": "AI-MO/NuminaMath-7B-CoT", - "sha": "ff7e3044218efe64128bd9c21f9ec66c3de04324", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.118457218023075, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2688544173903022, - "normalized_score": 26.88544173903022 - }, - "bbh": { - "name": "BBH", - "value": 0.4314193495860012, - "normalized_score": 19.152364282090307 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26963746223564955, - "normalized_score": 26.963746223564954 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.33034375, - "normalized_score": 0.8263020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28681848404255317, - "normalized_score": 20.75760933806146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-15", - "submission_date": "2024-09-10", - "generation": 1, - "base_model": "deepseek-ai/deepseek-math-7b-base", - "hub_license": "apache-2.0", - "hub_hearts": 22, - "params_billions": 6.91, - "co2_cost": 1.4919779154384423 - } - }, - { - "id": "AI-MO/NuminaMath-7B-TIR_bfloat16_c6e394cc0579423c9cde6df6cc192c07dae73388_False", - "model": { - "name": "AI-MO/NuminaMath-7B-TIR", - "sha": "c6e394cc0579423c9cde6df6cc192c07dae73388", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.182289143173433, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27562423259174545, - "normalized_score": 27.562423259174547 - }, - "bbh": { - "name": "BBH", - "value": 0.41436913375897894, - "normalized_score": 16.87354725795866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1608761329305136, - "normalized_score": 16.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.35092708333333333, - "normalized_score": 4.199218749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2732712765957447, - "normalized_score": 19.252364066193852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-04", - "submission_date": "2024-07-11", - "generation": 1, - "base_model": "deepseek-ai/deepseek-math-7b-base", - "hub_license": "apache-2.0", - "hub_hearts": 340, - "params_billions": 6.91, - "co2_cost": 2.148219574988251 - } - }, - { - "id": "AI-Sweden-Models/Llama-3-8B-instruct_bfloat16_4e1c955228bdb4d69c1c4560e8d5872312a8f033_True", - "model": { - "name": "AI-Sweden-Models/Llama-3-8B-instruct", - "sha": "4e1c955228bdb4d69c1c4560e8d5872312a8f033", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.343669671742774, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24012841482821137, - "normalized_score": 24.012841482821138 - }, - "bbh": { - "name": "BBH", - "value": 0.4173460154515302, - "normalized_score": 18.388095615027524 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.47709375000000004, - "normalized_score": 19.936718749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25972406914893614, - "normalized_score": 17.747118794326237 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-01", - "submission_date": "2024-06-27", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 2.3322217617350347 - } - }, - { - "id": "AI-Sweden-Models/gpt-sw3-40b_float16_1af27994df1287a7fac1b10d60e40ca43a22a385_False", - "model": { - "name": "AI-Sweden-Models/gpt-sw3-40b", - "sha": "1af27994df1287a7fac1b10d60e40ca43a22a385", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 4.872902485288683, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1470298807164989, - "normalized_score": 14.702988071649889 - }, - "bbh": { - "name": "BBH", - "value": 0.3267744702957652, - "normalized_score": 6.894934050796576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2348993288590604, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36323958333333334, - "normalized_score": 2.8382812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12757646276595744, - "normalized_score": 3.064051418439715 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-02-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "AI-Sweden-Models/gpt-sw3-40b", - "hub_license": "other", - "hub_hearts": 10, - "params_billions": 39.927, - "co2_cost": 5.919638730114153 - } - }, - { - "id": "AI4free/Dhanishtha_float16_b7dd53b35d0c7c9e162cd336f5c9d8a13dbf6992_True", - "model": { - "name": "AI4free/Dhanishtha", - "sha": "b7dd53b35d0c7c9e162cd336f5c9d8a13dbf6992", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.247711953182252, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2451240486353985, - "normalized_score": 24.51240486353985 - }, - "bbh": { - "name": "BBH", - "value": 0.34039444943326375, - "normalized_score": 7.93648440557281 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25604229607250756, - "normalized_score": 25.604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.35694791666666664, - "normalized_score": 1.951822916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16431183510638298, - "normalized_score": 7.145759456264774 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "AI4free/Dhanishtha (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 1.777, - "co2_cost": 1.1626300441273092 - } - }, - { - "id": "AI4free/t2_bfloat16_8b541ace084aa8c95a4e89c0d4c4c64a74bcdf51_True", - "model": { - "name": "AI4free/t2", - "sha": "8b541ace084aa8c95a4e89c0d4c4c64a74bcdf51", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.33461581083953, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3866828902866616, - "normalized_score": 38.66828902866616 - }, - "bbh": { - "name": "BBH", - "value": 0.2910111436321769, - "normalized_score": 2.133152068631521 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18957703927492447, - "normalized_score": 18.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3846354166666666, - "normalized_score": 5.646093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11436170212765957, - "normalized_score": 1.595744680851063 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.7121244370774158 - } - }, - { - "id": "AIDC-AI/Marco-o1_float16_5e4deeeb286b7a2e35a6d16989e64df860f7f4e5_True", - "model": { - "name": "AIDC-AI/Marco-o1", - "sha": "5e4deeeb286b7a2e35a6d16989e64df860f7f4e5", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.639223265636087, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.477083028586373, - "normalized_score": 47.7083028586373 - }, - "bbh": { - "name": "BBH", - "value": 0.5364362696398749, - "normalized_score": 34.84254498655976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37462235649546827, - "normalized_score": 37.46223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.41384375, - "normalized_score": 9.963802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41165226063829785, - "normalized_score": 34.62802895981088 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2025-01-31", - "generation": 0, - "base_model": "AIDC-AI/Marco-o1", - "hub_license": "apache-2.0", - "hub_hearts": 714, - "params_billions": 7.616, - "co2_cost": 1.5728705744515559 - } - }, - { - "id": "Aashraf995/Creative-7B-nerd_bfloat16_fc24bca48549ef8e39cbee5a438e5a16e25e4afa_False", - "model": { - "name": "Aashraf995/Creative-7B-nerd", - "sha": "fc24bca48549ef8e39cbee5a438e5a16e25e4afa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.97819251596781, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4721871301480073, - "normalized_score": 47.21871301480073 - }, - "bbh": { - "name": "BBH", - "value": 0.5606785565640195, - "normalized_score": 37.080153817355196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3164652567975831, - "normalized_score": 31.64652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.4515416666666667, - "normalized_score": 14.942708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44921875, - "normalized_score": 38.802083333333336 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "Aashraf995/Creative-7B-nerd (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.297734001048624 - } - }, - { - "id": "Aashraf995/Gemma-Evo-10B_float16_5ec9c5763ca6662dd897cd292e08014ec10b0d74_False", - "model": { - "name": "Aashraf995/Gemma-Evo-10B", - "sha": "5ec9c5763ca6662dd897cd292e08014ec10b0d74", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.32632733409121, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7332211864519476, - "normalized_score": 73.32211864519475 - }, - "bbh": { - "name": "BBH", - "value": 0.6044352897552882, - "normalized_score": 43.42455936867185 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.45947916666666666, - "normalized_score": 16.66822916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4275265957446808, - "normalized_score": 36.3918439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "Aashraf995/Gemma-Evo-10B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 4.596031436815906 - } - }, - { - "id": "Aashraf995/Qwen-Evo-7B_bfloat16_641aac3f105805414efe0a55b18736dce73da0a0_False", - "model": { - "name": "Aashraf995/Qwen-Evo-7B", - "sha": "641aac3f105805414efe0a55b18736dce73da0a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.275058582706105, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4757343847657549, - "normalized_score": 47.57343847657549 - }, - "bbh": { - "name": "BBH", - "value": 0.5709361538590277, - "normalized_score": 38.585326990506125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31419939577039274, - "normalized_score": 31.419939577039273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.4541458333333333, - "normalized_score": 15.534895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44622672872340424, - "normalized_score": 38.46963652482269 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "Aashraf995/Qwen-Evo-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.268015219938996 - } - }, - { - "id": "Aashraf995/QwenStock-14B_float16_b91871dcd31fe2e445c233a449d021b47ebfe1fb_False", - "model": { - "name": "Aashraf995/QwenStock-14B", - "sha": "b91871dcd31fe2e445c233a449d021b47ebfe1fb", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.13002133952593, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5008632650256873, - "normalized_score": 50.08632650256874 - }, - "bbh": { - "name": "BBH", - "value": 0.6550130348108012, - "normalized_score": 50.433898707168744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35725075528700906, - "normalized_score": 35.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38926174496644295, - "normalized_score": 18.568232662192393 - }, - "musr": { - "name": "MUSR", - "value": 0.4792604166666667, - "normalized_score": 19.274218750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5382313829787234, - "normalized_score": 48.69237588652482 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "Aashraf995/QwenStock-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.7498913058726506 - } - }, - { - "id": "AbacusResearch/Jallabi-34B_float16_f65696da4ed82c9a20e94b200d9dccffa07af682_False", - "model": { - "name": "AbacusResearch/Jallabi-34B", - "sha": "f65696da4ed82c9a20e94b200d9dccffa07af682", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.186081920716532, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3528604103777976, - "normalized_score": 35.28604103777975 - }, - "bbh": { - "name": "BBH", - "value": 0.6023380603196266, - "normalized_score": 43.61576498719506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.48217708333333337, - "normalized_score": 20.23880208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4681682180851064, - "normalized_score": 40.90757978723404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-01", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "AbacusResearch/Jallabi-34B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 34.389, - "co2_cost": 6.572984679569464 - } - }, - { - "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure_float16_05762859c0efcd44e7aa0043868de67208cde7ff_False", - "model": { - "name": "Ahdoot/StructuredThinker-v0.3-MoreStructure", - "sha": "05762859c0efcd44e7aa0043868de67208cde7ff", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.924082152035727, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4192808415005519, - "normalized_score": 41.928084150055184 - }, - "bbh": { - "name": "BBH", - "value": 0.48376906494893984, - "normalized_score": 27.258541777802407 - }, - "math": { - "name": "MATH Level 5", - "value": 0.290785498489426, - "normalized_score": 29.078549848942597 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.41582291666666665, - "normalized_score": 10.011197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36103723404255317, - "normalized_score": 29.00413711583924 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "Ahdoot/StructuredThinker-v0.3-MoreStructure (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.397, - "co2_cost": 1.4898812996305322 - } - }, - { - "id": "Ahdoot/Test_StealthThinker_float16_475333d513a2779ff839ceb003e626681569ac1c_False", - "model": { - "name": "Ahdoot/Test_StealthThinker", - "sha": "475333d513a2779ff839ceb003e626681569ac1c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.06904758264274, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42200361706937595, - "normalized_score": 42.20036170693759 - }, - "bbh": { - "name": "BBH", - "value": 0.46466398134666304, - "normalized_score": 25.366571749083093 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17900302114803626, - "normalized_score": 17.900302114803626 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42804166666666665, - "normalized_score": 11.938541666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35970744680851063, - "normalized_score": 28.856382978723406 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "Ahdoot/Test_StealthThinker (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.5976047229935322 - } - }, - { - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0_bfloat16_b5434e21936031982db15694c91b783bf85d06ea_True", - "model": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", - "sha": "b5434e21936031982db15694c91b783bf85d06ea", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.584200054790823, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6358018945287394, - "normalized_score": 63.58018945287393 - }, - "bbh": { - "name": "BBH", - "value": 0.4497434194912941, - "normalized_score": 22.32210586978715 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.33136458333333335, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.301030585106383, - "normalized_score": 22.336731678486995 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5825486726169312 - } - }, - { - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder_bfloat16_fada53c826e3ae5db7a7c10936d2a5d05395ddbe_True", - "model": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", - "sha": "fada53c826e3ae5db7a7c10936d2a5d05395ddbe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.930540293017284, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7097656440466851, - "normalized_score": 70.9765644046685 - }, - "bbh": { - "name": "BBH", - "value": 0.4477501104993749, - "normalized_score": 22.65149321038919 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1487915407854985, - "normalized_score": 14.879154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.34079166666666666, - "normalized_score": 1.965625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3178191489361702, - "normalized_score": 24.202127659574465 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5676457690031012 - } - }, - { - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1_bfloat16_25f5123401151a36ada50cce93c590a259a11150_True", - "model": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", - "sha": "25f5123401151a36ada50cce93c590a259a11150", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.9979903395144, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6145693426774292, - "normalized_score": 61.45693426774292 - }, - "bbh": { - "name": "BBH", - "value": 0.4282342020189216, - "normalized_score": 19.125071594281845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.32869791666666665, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2876496010638298, - "normalized_score": 20.849955673758867 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5874449879343348 - } - }, - { - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1_float16_df6b8ed99a44693cfca83e0f676d9f3f2d5d298f_True", - "model": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", - "sha": "df6b8ed99a44693cfca83e0f676d9f3f2d5d298f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.643406810994602, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6730209178313542, - "normalized_score": 67.30209178313541 - }, - "bbh": { - "name": "BBH", - "value": 0.4391775517124728, - "normalized_score": 20.43056853928926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.35409375000000004, - "normalized_score": 4.528385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.308843085106383, - "normalized_score": 23.204787234042552 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.6141912522744508 - } - }, - { - "id": "Alepach/notHumpback-M0_bfloat16_e4db4662cb3978bf14843eef4ff2897767dc96b3_True", - "model": { - "name": "Alepach/notHumpback-M0", - "sha": "e4db4662cb3978bf14843eef4ff2897767dc96b3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.137220097878128, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23500755772461512, - "normalized_score": 23.500755772461517 - }, - "bbh": { - "name": "BBH", - "value": 0.27849287879199425, - "normalized_score": 1.2773603683897798 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35523958333333333, - "normalized_score": 2.8382812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1118683510638298, - "normalized_score": 1.3187056737588652 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 3.507090986732091 - } - }, - { - "id": "Alepach/notHumpback-M1_bfloat16_5fd575e6f460b6dc9aea53f1b738a1fdf54a2151_True", - "model": { - "name": "Alepach/notHumpback-M1", - "sha": "5fd575e6f460b6dc9aea53f1b738a1fdf54a2151", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.779297887614574, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2206944241279804, - "normalized_score": 22.06944241279804 - }, - "bbh": { - "name": "BBH", - "value": 0.28824720129981835, - "normalized_score": 1.922946094565394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23741610738255034, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.342, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10912566489361702, - "normalized_score": 1.0139627659574466 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 3.5944551921441956 - } - }, - { - "id": "Alepach/notHumpback-M1-v2_bfloat16_7fc79144d17da44c736f6f4a0ea32b8c9152718a_True", - "model": { - "name": "Alepach/notHumpback-M1-v2", - "sha": "7fc79144d17da44c736f6f4a0ea32b8c9152718a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.206724746715221, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2277135777514772, - "normalized_score": 22.77135777514772 - }, - "bbh": { - "name": "BBH", - "value": 0.2775640398406834, - "normalized_score": 1.2676708259061196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3473333333333333, - "normalized_score": 2.3499999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1118683510638298, - "normalized_score": 1.3187056737588652 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.170536546043483 - } - }, - { - "id": "Alibaba-NLP/gte-Qwen2-7B-instruct_bfloat16_e26182b2122f4435e8b3ebecbf363990f409b45b_True", - "model": { - "name": "Alibaba-NLP/gte-Qwen2-7B-instruct", - "sha": "e26182b2122f4435e8b3ebecbf363990f409b45b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.834176058521932, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22554045488193547, - "normalized_score": 22.55404548819355 - }, - "bbh": { - "name": "BBH", - "value": 0.4495144990818469, - "normalized_score": 21.92548248566236 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35585416666666664, - "normalized_score": 6.315104166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33211436170212766, - "normalized_score": 25.790484633569736 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-15", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "Alibaba-NLP/gte-Qwen2-7B-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 354, - "params_billions": 7.613, - "co2_cost": 4.344226697159869 - } - }, - { - "id": "Alsebay/Qwen2.5-7B-test-novelist_float16_89f34e5e67378dc38ce0da19d347ea26c23fbca5_False", - "model": { - "name": "Alsebay/Qwen2.5-7B-test-novelist", - "sha": "89f34e5e67378dc38ce0da19d347ea26c23fbca5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.172849099811746, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5351600420218354, - "normalized_score": 53.51600420218354 - }, - "bbh": { - "name": "BBH", - "value": 0.515121518446605, - "normalized_score": 30.417500036352994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2348942598187311, - "normalized_score": 23.48942598187311 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.47488541666666667, - "normalized_score": 18.29401041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3865525265957447, - "normalized_score": 31.839169621749413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 3, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3343856523700721 - } - }, - { - "id": "Amaorynho/BBAI2006_float16_523790b424a66cfcbef03bfb360686d7e51d81c0_False", - "model": { - "name": "Amaorynho/BBAI2006", - "sha": "523790b424a66cfcbef03bfb360686d7e51d81c0", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.4638352160730896, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14670518668244703, - "normalized_score": 14.670518668244704 - }, - "bbh": { - "name": "BBH", - "value": 0.2704366990167133, - "normalized_score": 1.6776683167876156 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3605416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Amaorynho/BBAI2006 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.09, - "co2_cost": 0.08851011477487543 - } - }, - { - "id": "Amaorynho/BBAI270V4_bfloat16_8896a9c03756dd01a912121946c9f838cebe3c63_True", - "model": { - "name": "Amaorynho/BBAI270V4", - "sha": "8896a9c03756dd01a912121946c9f838cebe3c63", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.549298117195413, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1990374428737971, - "normalized_score": 19.90374428737971 - }, - "bbh": { - "name": "BBH", - "value": 0.30712046736502824, - "normalized_score": 3.006785000149223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33139583333333333, - "normalized_score": 2.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11136968085106383, - "normalized_score": 1.2632978723404247 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Amaorynho/BBAI270V4 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6849635831653293 - } - }, - { - "id": "Amaorynho/BBAIIFEV1_bfloat16_33fe7318e700f67458fa3c1872f4821e35d0095a_True", - "model": { - "name": "Amaorynho/BBAIIFEV1", - "sha": "33fe7318e700f67458fa3c1872f4821e35d0095a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.577013862416408, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8047369867507104, - "normalized_score": 80.47369867507103 - }, - "bbh": { - "name": "BBH", - "value": 0.5292462038560509, - "normalized_score": 32.97465792285014 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1933534743202417, - "normalized_score": 19.335347432024168 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4184895833333333, - "normalized_score": 10.877864583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3857214095744681, - "normalized_score": 31.746823286052013 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Amaorynho/BBAIIFEV1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6822098163443253 - } - }, - { - "id": "Amaorynho/BBAI_375_float16_4754976361b6f11107115121ff8bcb942c9b8b7e_False", - "model": { - "name": "Amaorynho/BBAI_375", - "sha": "4754976361b6f11107115121ff8bcb942c9b8b7e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.4638352160730896, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14670518668244703, - "normalized_score": 14.670518668244704 - }, - "bbh": { - "name": "BBH", - "value": 0.2704366990167133, - "normalized_score": 1.6776683167876156 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3605416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Amaorynho/BBAI_375 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.09, - "co2_cost": 0.08627659342838825 - } - }, - { - "id": "Amu/t1-1.5B_float16_c716f5dc63c3d82b185cbac27f6fafd970131db3_True", - "model": { - "name": "Amu/t1-1.5B", - "sha": "c716f5dc63c3d82b185cbac27f6fafd970131db3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.141383312461109, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3393717558300864, - "normalized_score": 33.93717558300864 - }, - "bbh": { - "name": "BBH", - "value": 0.4007606984109216, - "normalized_score": 15.172860277880739 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3517083333333333, - "normalized_score": 1.1968749999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2566489361702128, - "normalized_score": 17.405437352245865 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "Amu/t1-1.5B (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 0.6206258731095564 - } - }, - { - "id": "Amu/t1-3B_float16_400974f4c14e78c9f0ea2660748e3b6d5253c350_True", - "model": { - "name": "Amu/t1-3B", - "sha": "400974f4c14e78c9f0ea2660748e3b6d5253c350", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.160895171582572, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33277703160946287, - "normalized_score": 33.27770316094629 - }, - "bbh": { - "name": "BBH", - "value": 0.39989750143834385, - "normalized_score": 15.248848632790677 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13746223564954682, - "normalized_score": 13.746223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34348958333333335, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12840757978723405, - "normalized_score": 3.1563977541371155 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "Amu/t1-3B (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 0.8009694476236229 - } - }, - { - "id": "ArliAI/ArliAI-RPMax-12B-v1.1_bfloat16_645db1cf8ad952eb57854a133e8e15303b898b04_True", - "model": { - "name": "ArliAI/ArliAI-RPMax-12B-v1.1", - "sha": "645db1cf8ad952eb57854a133e8e15303b898b04", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.976339912086193, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5348852156721942, - "normalized_score": 53.48852156721942 - }, - "bbh": { - "name": "BBH", - "value": 0.475181760840119, - "normalized_score": 24.80906331793277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.36184375, - "normalized_score": 5.563802083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3384308510638298, - "normalized_score": 26.492316784869978 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-31", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "ArliAI/ArliAI-RPMax-12B-v1.1", - "hub_license": "apache-2.0", - "hub_hearts": 44, - "params_billions": 12.248, - "co2_cost": 3.6668046106558903 - } - }, - { - "id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1_bfloat16_540bd352e59c63900af91b95a932b33aaee70c76_True", - "model": { - "name": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", - "sha": "540bd352e59c63900af91b95a932b33aaee70c76", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.942143268323218, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6359016298975606, - "normalized_score": 63.59016298975607 - }, - "bbh": { - "name": "BBH", - "value": 0.5015613456039083, - "normalized_score": 28.787014099442825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3576875, - "normalized_score": 5.3109375000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35513630319148937, - "normalized_score": 28.34847813238771 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", - "hub_license": "llama3", - "hub_hearts": 29, - "params_billions": 8.03, - "co2_cost": 1.7854895170769118 - } - }, - { - "id": "Arthur-LAGACHERIE/Precis-1B-Instruct_float16_c3916b69283eb7424aa4df62deeeafbd81883521_True", - "model": { - "name": "Arthur-LAGACHERIE/Precis-1B-Instruct", - "sha": "c3916b69283eb7424aa4df62deeeafbd81883521", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.848710962428209, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3670738086056109, - "normalized_score": 36.70738086056109 - }, - "bbh": { - "name": "BBH", - "value": 0.3223614510687368, - "normalized_score": 6.06909792285563 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.34355208333333337, - "normalized_score": 3.077343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14261968085106383, - "normalized_score": 4.735520094562647 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "Arthur-LAGACHERIE/Precis-1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7923436680964789 - } - }, - { - "id": "Artples/L-MChat-7b_bfloat16_e10137f5cbfc1b73068d6473e4a87241cca0b3f4_True", - "model": { - "name": "Artples/L-MChat-7b", - "sha": "e10137f5cbfc1b73068d6473e4a87241cca0b3f4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.238493444242586, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5296646231997766, - "normalized_score": 52.96646231997766 - }, - "bbh": { - "name": "BBH", - "value": 0.46003301674679414, - "normalized_score": 24.20155738881327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4028645833333333, - "normalized_score": 8.124739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3298703457446808, - "normalized_score": 25.541149527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-02", - "submission_date": "2024-07-07", - "generation": 1, - "base_model": "Artples/L-MChat-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.1844523714047013 - } - }, - { - "id": "Artples/L-MChat-Small_bfloat16_52484c277f6062c12dc6d6b6397ee0d0c21b0126_True", - "model": { - "name": "Artples/L-MChat-Small", - "sha": "52484c277f6062c12dc6d6b6397ee0d0c21b0126", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 15.23132798262929, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32870561222002065, - "normalized_score": 32.87056122200207 - }, - "bbh": { - "name": "BBH", - "value": 0.48225627665257265, - "normalized_score": 26.856515500031353 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36959375, - "normalized_score": 9.265885416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24642619680851063, - "normalized_score": 16.269577423167846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-11", - "submission_date": "2024-07-07", - "generation": 1, - "base_model": "Artples/L-MChat-Small (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 2.78, - "co2_cost": 0.9310211903677648 - } - }, - { - "id": "Aryanne/QwentileSwap_bfloat16_e373a84c38c369967edd4ba037d24dba7cb35738_True", - "model": { - "name": "Aryanne/QwentileSwap", - "sha": "e373a84c38c369967edd4ba037d24dba7cb35738", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.91650770857709, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7378422585406721, - "normalized_score": 73.78422585406722 - }, - "bbh": { - "name": "BBH", - "value": 0.7008370136278447, - "normalized_score": 57.67565555333628 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42220543806646527, - "normalized_score": 42.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.4640416666666667, - "normalized_score": 19.20520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5945811170212766, - "normalized_score": 54.95345744680851 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Aryanne/QwentileSwap (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 32.764, - "co2_cost": 34.397309465760216 - } - }, - { - "id": "Aryanne/SHBA_bfloat16_66d0feb9f54c375520fa6342f3d8f7e2be707101_True", - "model": { - "name": "Aryanne/SHBA", - "sha": "66d0feb9f54c375520fa6342f3d8f7e2be707101", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.875548231649606, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7816560060639104, - "normalized_score": 78.16560060639105 - }, - "bbh": { - "name": "BBH", - "value": 0.5233174837035715, - "normalized_score": 32.47770265291174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.41613541666666665, - "normalized_score": 11.11692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3892121010638298, - "normalized_score": 32.134677895981085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "Aryanne/SHBA (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5300866770671717 - } - }, - { - "id": "Aryanne/SuperHeart_bfloat16_02b5050d7e600ce3db81a19638f6043c895d60cf_False", - "model": { - "name": "Aryanne/SuperHeart", - "sha": "02b5050d7e600ce3db81a19638f6043c895d60cf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.55719928277682, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5192234382549413, - "normalized_score": 51.92234382549414 - }, - "bbh": { - "name": "BBH", - "value": 0.5215375046264326, - "normalized_score": 31.893554212659296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15634441087613293, - "normalized_score": 15.634441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.44357291666666665, - "normalized_score": 14.713281249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3912067819148936, - "normalized_score": 32.35630910165484 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "Aryanne/SuperHeart (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8079185536473381 - } - }, - { - "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt_float16_01c0981db9cf0f146fe050065f17343af75a8aa6_True", - "model": { - "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "sha": "01c0981db9cf0f146fe050065f17343af75a8aa6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.518524239214223, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4605214165081982, - "normalized_score": 46.05214165081983 - }, - "bbh": { - "name": "BBH", - "value": 0.42577470857933336, - "normalized_score": 19.53766599736009 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3636458333333333, - "normalized_score": 3.7890625000000013 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28116688829787234, - "normalized_score": 20.12965425531915 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 0.673034773667099 - } - }, - { - "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt_bfloat16_01c0981db9cf0f146fe050065f17343af75a8aa6_True", - "model": { - "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "sha": "01c0981db9cf0f146fe050065f17343af75a8aa6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.483555890459094, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45105431366551857, - "normalized_score": 45.105431366551855 - }, - "bbh": { - "name": "BBH", - "value": 0.42746984992662185, - "normalized_score": 19.766408804078655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1472809667673716, - "normalized_score": 14.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.36228124999999994, - "normalized_score": 2.551822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28058510638297873, - "normalized_score": 20.06501182033097 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.3771692115967817 - } - }, - { - "id": "Ateron/Glowing-Forest-12B_float16_22dd37b5d3b433069b1685d6b1f01035e3c4a817_False", - "model": { - "name": "Ateron/Glowing-Forest-12B", - "sha": "22dd37b5d3b433069b1685d6b1f01035e3c4a817", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.612024614962053, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3591803082487799, - "normalized_score": 35.91803082487799 - }, - "bbh": { - "name": "BBH", - "value": 0.549176294722067, - "normalized_score": 35.52548240518279 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.44490625, - "normalized_score": 15.179947916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37175864361702127, - "normalized_score": 30.1954048463357 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.8972694159256903 - } - }, - { - "id": "Ateron/Lotus-Magpic_float16_6b74d29b20a99f2cc681967178c98413f933f982_True", - "model": { - "name": "Ateron/Lotus-Magpic", - "sha": "6b74d29b20a99f2cc681967178c98413f933f982", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.49856357813722, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6286076499244228, - "normalized_score": 62.86076499244227 - }, - "bbh": { - "name": "BBH", - "value": 0.5253514950133299, - "normalized_score": 32.65772737584173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4331875, - "normalized_score": 12.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3490691489361702, - "normalized_score": 27.67434988179669 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-03-05", - "generation": 1, - "base_model": "Ateron/Lotus-Magpic (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 0.8675250939281365 - } - }, - { - "id": "Ateron/Way_of_MagPicaro_float16_f24956949dbd0d0713da0e14562e74fc20b7367d_False", - "model": { - "name": "Ateron/Way_of_MagPicaro", - "sha": "f24956949dbd0d0713da0e14562e74fc20b7367d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.630569141650984, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2637091805298829, - "normalized_score": 26.37091805298829 - }, - "bbh": { - "name": "BBH", - "value": 0.5427386861946704, - "normalized_score": 34.315941121704846 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.46490625, - "normalized_score": 17.84661458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35355718085106386, - "normalized_score": 28.173020094562645 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-03-05", - "generation": 1, - "base_model": "Ateron/Way_of_MagPicaro (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 0.8622054716791998 - } - }, - { - "id": "AuraIndustries/Aura-4B_bfloat16_808d578b460382ddc90f8828a4dcd1c58deb7045_True", - "model": { - "name": "AuraIndustries/Aura-4B", - "sha": "808d578b460382ddc90f8828a4dcd1c58deb7045", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.063480369846303, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38156203318306536, - "normalized_score": 38.15620331830654 - }, - "bbh": { - "name": "BBH", - "value": 0.4490409465001946, - "normalized_score": 22.64085672774061 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.39384375, - "normalized_score": 7.3638020833333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27061170212765956, - "normalized_score": 18.95685579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "AuraIndustries/Aura-4B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 4.513, - "co2_cost": 1.1599834197817673 - } - }, - { - "id": "AuraIndustries/Aura-8B_bfloat16_d7f840c57c89fd655690a8371ce8f5c82f57ad80_True", - "model": { - "name": "AuraIndustries/Aura-8B", - "sha": "d7f840c57c89fd655690a8371ce8f5c82f57ad80", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.363297533889394, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7205315230255722, - "normalized_score": 72.05315230255721 - }, - "bbh": { - "name": "BBH", - "value": 0.5131231419849063, - "normalized_score": 30.981348410709717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4004479166666666, - "normalized_score": 9.22265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38738364361702127, - "normalized_score": 31.93151595744681 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-10", - "generation": 1, - "base_model": "AuraIndustries/Aura-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.279427720279719 - } - }, - { - "id": "AuraIndustries/Aura-MoE-2x4B_bfloat16_82e9951d78355fd6b37c2a54778df2948e1b52a9_True", - "model": { - "name": "AuraIndustries/Aura-MoE-2x4B", - "sha": "82e9951d78355fd6b37c2a54778df2948e1b52a9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 16.79797839924053, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.460096987105325, - "normalized_score": 46.00969871053249 - }, - "bbh": { - "name": "BBH", - "value": 0.43385067041774666, - "normalized_score": 20.61384808360901 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.40851041666666665, - "normalized_score": 9.830468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26496010638297873, - "normalized_score": 18.32890070921986 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.231, - "co2_cost": 1.994775253625848 - } - }, - { - "id": "AuraIndustries/Aura-MoE-2x4B-v2_bfloat16_cc78898fad6443ccfe79b956bfde17bd101c15a0_True", - "model": { - "name": "AuraIndustries/Aura-MoE-2x4B-v2", - "sha": "cc78898fad6443ccfe79b956bfde17bd101c15a0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 17.515881576703762, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4777822843388875, - "normalized_score": 47.77822843388875 - }, - "bbh": { - "name": "BBH", - "value": 0.43152444292813597, - "normalized_score": 20.801181076748463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.4100625, - "normalized_score": 10.42447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2609707446808511, - "normalized_score": 17.88563829787234 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.231, - "co2_cost": 1.8490265375303532 - } - }, - { - "id": "Aurel9/testmerge-7b_bfloat16_b5f0a72d981b5b2c6bd6294093c6956d88477a3e_False", - "model": { - "name": "Aurel9/testmerge-7b", - "sha": "b5f0a72d981b5b2c6bd6294093c6956d88477a3e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.96930213435916, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3979984219648311, - "normalized_score": 39.79984219648311 - }, - "bbh": { - "name": "BBH", - "value": 0.5189590919105128, - "normalized_score": 32.79279332763635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4658645833333333, - "normalized_score": 17.133072916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3052692819148936, - "normalized_score": 22.807697990543733 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "Aurel9/testmerge-7b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9529287302081578 - } - }, - { - "id": "Ayush-Singh/Llama1B-sft-2_float16_8979241089bc73efdb2b89c47fcadc90586d7688_False", - "model": { - "name": "Ayush-Singh/Llama1B-sft-2", - "sha": "8979241089bc73efdb2b89c47fcadc90586d7688", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.1693229450767695, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13743755457741016, - "normalized_score": 13.743755457741015 - }, - "bbh": { - "name": "BBH", - "value": 0.283428204214368, - "normalized_score": 1.2375708061002186 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35520833333333335, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11170212765957446, - "normalized_score": 1.300236406619384 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Ayush-Singh/Llama1B-sft-2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7669701604841612 - } - }, - { - "id": "Azure99/Blossom-V6-14B_bfloat16_7bc5a97a4faf8de6554255a287f76b1841f8572f_True", - "model": { - "name": "Azure99/Blossom-V6-14B", - "sha": "7bc5a97a4faf8de6554255a287f76b1841f8572f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.80581546858162, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6395486198841297, - "normalized_score": 63.95486198841297 - }, - "bbh": { - "name": "BBH", - "value": 0.5068726694646123, - "normalized_score": 30.3528011952772 - }, - "math": { - "name": "MATH Level 5", - "value": 0.525679758308157, - "normalized_score": 52.567975830815705 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.40352083333333333, - "normalized_score": 8.906770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4543716755319149, - "normalized_score": 39.3746306146572 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Azure99/Blossom-V6-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 4.503315116093226 - } - }, - { - "id": "Azure99/Blossom-V6-7B_bfloat16_a4e3f54a7a3d5db6486cc1bc491a1b38e9883954_True", - "model": { - "name": "Azure99/Blossom-V6-7B", - "sha": "a4e3f54a7a3d5db6486cc1bc491a1b38e9883954", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.04565038906736, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5538194213575536, - "normalized_score": 55.38194213575535 - }, - "bbh": { - "name": "BBH", - "value": 0.49736683240887, - "normalized_score": 29.447521285247884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45845921450151056, - "normalized_score": 45.84592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.43009375, - "normalized_score": 13.395052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41439494680851063, - "normalized_score": 34.932771867612296 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Azure99/Blossom-V6-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 1.8136679248856464 - } - }, - { - "id": "Azure99/blossom-v5-32b_bfloat16_ccd4d86e3de01187043683dea1e28df904f7408e_True", - "model": { - "name": "Azure99/blossom-v5-32b", - "sha": "ccd4d86e3de01187043683dea1e28df904f7408e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.724659940062114, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5235441960664371, - "normalized_score": 52.3544196066437 - }, - "bbh": { - "name": "BBH", - "value": 0.5954545257004673, - "normalized_score": 42.883055884713976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1865558912386707, - "normalized_score": 18.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.40199999999999997, - "normalized_score": 8.350000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4234541223404255, - "normalized_score": 35.93934692671394 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "Azure99/blossom-v5-32b", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 32.512, - "co2_cost": 11.376000661812801 - } - }, - { - "id": "Azure99/blossom-v5-llama3-8b_bfloat16_91ea35e2e65516988021e4bb3b908e3e497e05c2_True", - "model": { - "name": "Azure99/blossom-v5-llama3-8b", - "sha": "91ea35e2e65516988021e4bb3b908e3e497e05c2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.598962808998438, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.434293230849701, - "normalized_score": 43.4293230849701 - }, - "bbh": { - "name": "BBH", - "value": 0.4184909197087261, - "normalized_score": 18.306535405618444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.36702083333333335, - "normalized_score": 5.3109375000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2205784574468085, - "normalized_score": 13.397606382978722 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-20", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "Azure99/blossom-v5-llama3-8b", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.7443057854894946 - } - }, - { - "id": "Azure99/blossom-v5.1-34b_bfloat16_2c803204f5dbf4ce37e2df98eb0205cdc53de10d_True", - "model": { - "name": "Azure99/blossom-v5.1-34b", - "sha": "2c803204f5dbf4ce37e2df98eb0205cdc53de10d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.298681690900178, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5696562897556262, - "normalized_score": 56.96562897556262 - }, - "bbh": { - "name": "BBH", - "value": 0.6109110096611161, - "normalized_score": 44.14770458838461 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2590634441087613, - "normalized_score": 25.90634441087613 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.39279166666666665, - "normalized_score": 7.298958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4557845744680851, - "normalized_score": 39.531619385342786 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-19", - "submission_date": "2024-07-27", - "generation": 0, - "base_model": "Azure99/blossom-v5.1-34b", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 34.389, - "co2_cost": 12.700997469006204 - } - }, - { - "id": "Azure99/blossom-v5.1-9b_bfloat16_6044a3dc1e04529fe883aa513d37f266a320d793_True", - "model": { - "name": "Azure99/blossom-v5.1-9b", - "sha": "6044a3dc1e04529fe883aa513d37f266a320d793", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.470194407631528, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5085816744016985, - "normalized_score": 50.85816744016986 - }, - "bbh": { - "name": "BBH", - "value": 0.5343292377916368, - "normalized_score": 34.20124449031171 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.39939583333333334, - "normalized_score": 8.024479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39793882978723405, - "normalized_score": 33.10431442080379 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-15", - "submission_date": "2024-07-24", - "generation": 0, - "base_model": "Azure99/blossom-v5.1-9b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.829, - "co2_cost": 3.227728401062798 - } - }, - { - "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference_bfloat16_028a91b1a4f14d365c6db08093b03348455c7bad_True", - "model": { - "name": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", - "sha": "028a91b1a4f14d365c6db08093b03348455c7bad", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.607935619579848, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31763831079314, - "normalized_score": 31.763831079313995 - }, - "bbh": { - "name": "BBH", - "value": 0.5979459664230056, - "normalized_score": 42.19084405906616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.39657291666666666, - "normalized_score": 8.104947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3868849734042553, - "normalized_score": 31.876108156028373 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-28", - "submission_date": "2024-09-05", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "", - "hub_hearts": 17, - "params_billions": 9.242, - "co2_cost": 8.800296921754853 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B_bfloat16_9fc53668064bdda22975ca72c5a287f8241c95b3_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", - "sha": "9fc53668064bdda22975ca72c5a287f8241c95b3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.578242909223654, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6821134589555713, - "normalized_score": 68.21134589555712 - }, - "bbh": { - "name": "BBH", - "value": 0.6641614484348598, - "normalized_score": 51.327160982522116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.45226041666666666, - "normalized_score": 16.53255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47298869680851063, - "normalized_score": 41.44318853427896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 70.554, - "co2_cost": 21.053813934815626 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B_bfloat16_c7a742e539ec264b9eaeefe2aed29e92e8a7ebd6_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", - "sha": "c7a742e539ec264b9eaeefe2aed29e92e8a7ebd6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.29353000800769, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5319873491225504, - "normalized_score": 53.19873491225504 - }, - "bbh": { - "name": "BBH", - "value": 0.49582333763258896, - "normalized_score": 28.992936470320583 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4350833333333333, - "normalized_score": 13.252083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31607380319148937, - "normalized_score": 24.00820035460993 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-21", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 7.242, - "co2_cost": 1.898749257491813 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B_float16_6d8ceada57e55cff3503191adc4d6379ff321fe2_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", - "sha": "6d8ceada57e55cff3503191adc4d6379ff321fe2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.91009196843762, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7442120240960651, - "normalized_score": 74.42120240960651 - }, - "bbh": { - "name": "BBH", - "value": 0.6670337872930245, - "normalized_score": 52.02816164280523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22507552870090636, - "normalized_score": 22.507552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.46165625000000005, - "normalized_score": 18.34036458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4586103723404255, - "normalized_score": 39.84559692671394 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 70.554, - "co2_cost": 20.86191028123878 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B_float16_7be7c0ff1e35c3bb781c47222da99a1724f5f1da_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", - "sha": "7be7c0ff1e35c3bb781c47222da99a1724f5f1da", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.062531958779175, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6050268842227512, - "normalized_score": 60.50268842227513 - }, - "bbh": { - "name": "BBH", - "value": 0.4954985723563075, - "normalized_score": 28.9882222457564 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.37120833333333336, - "normalized_score": 5.6677083333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3252160904255319, - "normalized_score": 25.02401004728132 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-07-13", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.7160075623568083 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B_bfloat16_302e3ae0bcc50dae3fb69fc1b08b518398e8c407_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", - "sha": "302e3ae0bcc50dae3fb69fc1b08b518398e8c407", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.843425216048672, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5867420666054957, - "normalized_score": 58.67420666054956 - }, - "bbh": { - "name": "BBH", - "value": 0.4939670574681802, - "normalized_score": 28.82328942958971 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.42723958333333334, - "normalized_score": 12.23828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3229720744680851, - "normalized_score": 24.774674940898343 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.5715943827465826 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B_bfloat16_503c24156d7682458686a7b5324f7f886e63470d_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", - "sha": "503c24156d7682458686a7b5324f7f886e63470d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.199808011295485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5553930238434022, - "normalized_score": 55.53930238434022 - }, - "bbh": { - "name": "BBH", - "value": 0.5345911997776569, - "normalized_score": 34.65682860864863 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.38876041666666666, - "normalized_score": 6.461718749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39602726063829785, - "normalized_score": 32.89191784869976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 7.616, - "co2_cost": 2.660155948960905 - } - }, - { - "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B_bfloat16_a42c86c61b98ca4fdf238d688fe6ea11cf414d29_True", - "model": { - "name": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", - "sha": "a42c86c61b98ca4fdf238d688fe6ea11cf414d29", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.14496041142372, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5185984299436606, - "normalized_score": 51.85984299436606 - }, - "bbh": { - "name": "BBH", - "value": 0.5509115146247398, - "normalized_score": 35.37870748220464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.45753125, - "normalized_score": 16.724739583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41181848404255317, - "normalized_score": 34.64649822695036 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 8.829, - "co2_cost": 2.233602481106533 - } - }, - { - "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B_bfloat16_0aca33fd7500a781d041e8bf7e5e3789b03f54f4_True", - "model": { - "name": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", - "sha": "0aca33fd7500a781d041e8bf7e5e3789b03f54f4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.447423677135685, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6131952109292234, - "normalized_score": 61.31952109292233 - }, - "bbh": { - "name": "BBH", - "value": 0.5077335431381055, - "normalized_score": 30.888804607565575 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.35784375, - "normalized_score": 5.297135416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3223902925531915, - "normalized_score": 24.710032505910167 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-02", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", - "hub_license": "llama3.1", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.7336102063619132 - } - }, - { - "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B_bfloat16_36651591cb13346ecbde23832013e024029700fa_True", - "model": { - "name": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", - "sha": "36651591cb13346ecbde23832013e024029700fa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.21644934658775, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6161928128476886, - "normalized_score": 61.61928128476886 - }, - "bbh": { - "name": "BBH", - "value": 0.4963813586525743, - "normalized_score": 28.697915491520025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4061875, - "normalized_score": 10.04010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3273769946808511, - "normalized_score": 25.26411052009456 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.242, - "co2_cost": 1.5985224340218647 - } - }, - { - "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B_bfloat16_1ef63c4993a8c723c9695c827295c17080a64435_True", - "model": { - "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", - "sha": "1ef63c4993a8c723c9695c827295c17080a64435", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.48445364047765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7335458804859993, - "normalized_score": 73.35458804859994 - }, - "bbh": { - "name": "BBH", - "value": 0.6695200461367471, - "normalized_score": 52.49894685232329 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25226586102719034, - "normalized_score": 25.226586102719033 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.45390625, - "normalized_score": 16.97161458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.460688164893617, - "normalized_score": 40.076462765957444 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-09-26", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", - "hub_license": "llama3.1", - "hub_hearts": 19, - "params_billions": 70.554, - "co2_cost": 22.138242550850272 - } - }, - { - "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B_bfloat16_56f9c2845ae024eb8b1dd9ea0d8891cbaf33c596_True", - "model": { - "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", - "sha": "56f9c2845ae024eb8b1dd9ea0d8891cbaf33c596", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.447423677135685, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6131952109292234, - "normalized_score": 61.31952109292233 - }, - "bbh": { - "name": "BBH", - "value": 0.5077335431381055, - "normalized_score": 30.888804607565575 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.35784375, - "normalized_score": 5.297135416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3223902925531915, - "normalized_score": 24.710032505910167 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-02", - "submission_date": "2024-08-29", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", - "hub_license": "llama3.1", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.8342796170800313 - } - }, - { - "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B_bfloat16_82c83d670a8954f4250547b53a057dea1fbd460d_True", - "model": { - "name": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", - "sha": "82c83d670a8954f4250547b53a057dea1fbd460d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.191053766563783, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6146690780462506, - "normalized_score": 61.46690780462506 - }, - "bbh": { - "name": "BBH", - "value": 0.4963813586525743, - "normalized_score": 28.697915491520025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4061875, - "normalized_score": 10.04010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3273769946808511, - "normalized_score": 25.26411052009456 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-08-29", - "generation": 0, - "base_model": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.242, - "co2_cost": 1.64927044522619 - } - }, - { - "id": "BAAI/OPI-Llama-3.1-8B-Instruct_bfloat16_48504799d009b4e1b29e6d2948a7cde68acdc3b0_True", - "model": { - "name": "BAAI/OPI-Llama-3.1-8B-Instruct", - "sha": "48504799d009b4e1b29e6d2948a7cde68acdc3b0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.531604396997649, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20745510800232272, - "normalized_score": 20.745510800232275 - }, - "bbh": { - "name": "BBH", - "value": 0.3551224419497605, - "normalized_score": 9.76871171424153 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3233020833333333, - "normalized_score": 3.579427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21243351063829788, - "normalized_score": 12.492612293144207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-06", - "submission_date": "2024-09-21", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.343313618854893 - } - }, - { - "id": "BEE-spoke-data/Meta-Llama-3-8Bee_bfloat16_8143e34e77a49a30ec2617c5c9cc22cb3cda2287_False", - "model": { - "name": "BEE-spoke-data/Meta-Llama-3-8Bee", - "sha": "8143e34e77a49a30ec2617c5c9cc22cb3cda2287", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.657811877680324, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19506575885317623, - "normalized_score": 19.506575885317623 - }, - "bbh": { - "name": "BBH", - "value": 0.46263641905752745, - "normalized_score": 24.19903269979749 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.36540625, - "normalized_score": 6.242447916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32197473404255317, - "normalized_score": 24.66385933806146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-28", - "submission_date": "2024-07-04", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6607598585562942 - } - }, - { - "id": "BEE-spoke-data/smol_llama-101M-GQA_bfloat16_bb26643db413bada7e0c3c50752bf9da82403dba_False", - "model": { - "name": "BEE-spoke-data/smol_llama-101M-GQA", - "sha": "bb26643db413bada7e0c3c50752bf9da82403dba", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.019599868138451, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13843712460715346, - "normalized_score": 13.843712460715347 - }, - "bbh": { - "name": "BBH", - "value": 0.3017560771912554, - "normalized_score": 3.1980040943527936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3712708333333334, - "normalized_score": 4.275520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11070478723404255, - "normalized_score": 1.1894208037825047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-26", - "submission_date": "2024-07-06", - "generation": 0, - "base_model": "BEE-spoke-data/smol_llama-101M-GQA", - "hub_license": "apache-2.0", - "hub_hearts": 28, - "params_billions": 0.101, - "co2_cost": 0.23921130531719068 - } - }, - { - "id": "BEE-spoke-data/smol_llama-220M-GQA_bfloat16_8845b1d3c0bc73522ef2700aab467183cbdca9f7_False", - "model": { - "name": "BEE-spoke-data/smol_llama-220M-GQA", - "sha": "8845b1d3c0bc73522ef2700aab467183cbdca9f7", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.577800964187134, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23860468002677343, - "normalized_score": 23.860468002677344 - }, - "bbh": { - "name": "BBH", - "value": 0.30316731388708956, - "normalized_score": 3.03784275772053 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.405875, - "normalized_score": 9.067708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1149434840425532, - "normalized_score": 1.6603871158392434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "BEE-spoke-data/smol_llama-220M-GQA", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 0.218, - "co2_cost": 0.3272265469372181 - } - }, - { - "id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu_bfloat16_dec16b41d5e94070dbc1f8449a554373fd4cc1d1_False", - "model": { - "name": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", - "sha": "dec16b41d5e94070dbc1f8449a554373fd4cc1d1", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.629850832257909, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19881248420856662, - "normalized_score": 19.88124842085666 - }, - "bbh": { - "name": "BBH", - "value": 0.29290517164510593, - "normalized_score": 2.314902449149024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.4367604166666667, - "normalized_score": 14.261718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "BEE-spoke-data/smol_llama-220M-GQA", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.218, - "co2_cost": 0.32375200832341444 - } - }, - { - "id": "BEE-spoke-data/smol_llama-220M-openhermes_bfloat16_fb4bcd4b7eee363baacb4176a26cea2aaeb173f4_False", - "model": { - "name": "BEE-spoke-data/smol_llama-220M-openhermes", - "sha": "fb4bcd4b7eee363baacb4176a26cea2aaeb173f4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.938005238605682, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1555229014570229, - "normalized_score": 15.552290145702292 - }, - "bbh": { - "name": "BBH", - "value": 0.30275191401927726, - "normalized_score": 3.107692077087363 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3847291666666666, - "normalized_score": 6.224479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11203457446808511, - "normalized_score": 1.337174940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "BEE-spoke-data/smol_llama-220M-GQA", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 0.218, - "co2_cost": 0.30885179519116146 - } - }, - { - "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan_bfloat16_d9ffec9798402d13d8f2c56ec3de3ad092445297_False", - "model": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan", - "sha": "d9ffec9798402d13d8f2c56ec3de3ad092445297", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 4.597533011984676, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15057713533424646, - "normalized_score": 15.057713533424646 - }, - "bbh": { - "name": "BBH", - "value": 0.30280434847620613, - "normalized_score": 4.411893932611097 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2332214765100671, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3724166666666667, - "normalized_score": 3.7187500000000013 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1307347074468085, - "normalized_score": 3.4149674940898342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-06", - "submission_date": "2024-09-13", - "generation": 1, - "base_model": "pszemraj/tFINE-900m-e16-d32-1024ctx", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.887, - "co2_cost": 4.912012576744369 - } - }, - { - "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024_bfloat16_b1e2f12f5224be9f7da0cb5ff30e1bbb3f10f6ca_False", - "model": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", - "sha": "b1e2f12f5224be9f7da0cb5ff30e1bbb3f10f6ca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 5.999886320692095, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13206735905176042, - "normalized_score": 13.206735905176043 - }, - "bbh": { - "name": "BBH", - "value": 0.3137786304497592, - "normalized_score": 4.737018282627999 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.43927083333333333, - "normalized_score": 13.808854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12367021276595745, - "normalized_score": 2.6300236406619386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-14", - "generation": 2, - "base_model": "pszemraj/tFINE-900m-e16-d32-1024ctx", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.887, - "co2_cost": 5.2012158962348956 - } - }, - { - "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e_bfloat16_4c626138c9f4e0c3eafe74b2755eb89334c7ca59_False", - "model": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", - "sha": "4c626138c9f4e0c3eafe74b2755eb89334c7ca59", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 5.908138429401099, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1402855534426433, - "normalized_score": 14.02855534426433 - }, - "bbh": { - "name": "BBH", - "value": 0.31345674638809023, - "normalized_score": 5.013070335904381 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.42069791666666667, - "normalized_score": 11.187239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12367021276595745, - "normalized_score": 2.6300236406619386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-22", - "generation": 3, - "base_model": "pszemraj/tFINE-900m-e16-d32-1024ctx", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.887, - "co2_cost": 5.033237360715477 - } - }, - { - "id": "BEE-spoke-data/tFINE-900m-instruct-orpo_bfloat16_e0a21c79bac74442252d36e2c01403afa3f0971b_True", - "model": { - "name": "BEE-spoke-data/tFINE-900m-instruct-orpo", - "sha": "e0a21c79bac74442252d36e2c01403afa3f0971b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 3.69630792495429, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13299157346950535, - "normalized_score": 13.299157346950535 - }, - "bbh": { - "name": "BBH", - "value": 0.30220933767045094, - "normalized_score": 3.267300577931774 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3408541666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11519281914893617, - "normalized_score": 1.6880910165484628 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "BEE-spoke-data/tFINE-900m-instruct-orpo", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.887, - "co2_cost": 5.149923864058659 - } - }, - { - "id": "BSC-LT/salamandra-7b_float16_bf30739316ceac4b624583a27ec96dfc401179e8_False", - "model": { - "name": "BSC-LT/salamandra-7b", - "sha": "bf30739316ceac4b624583a27ec96dfc401179e8", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.704911444345451, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13673829882489574, - "normalized_score": 13.673829882489574 - }, - "bbh": { - "name": "BBH", - "value": 0.3516612209885983, - "normalized_score": 10.157421990520298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.35009375000000004, - "normalized_score": 1.8617187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14926861702127658, - "normalized_score": 5.474290780141842 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-11-22", - "generation": 0, - "base_model": "BSC-LT/salamandra-7b", - "hub_license": "apache-2.0", - "hub_hearts": 27, - "params_billions": 7.768, - "co2_cost": 0.37857732828203783 - } - }, - { - "id": "BSC-LT/salamandra-7b-instruct_float16_77ddccbc7d9f9ffd55a8535365e8eebc493ccb8e_True", - "model": { - "name": "BSC-LT/salamandra-7b-instruct", - "sha": "77ddccbc7d9f9ffd55a8535365e8eebc493ccb8e", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.181243697003833, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24507418095098782, - "normalized_score": 24.507418095098785 - }, - "bbh": { - "name": "BBH", - "value": 0.3851324290080956, - "normalized_score": 14.688128545731287 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.41343749999999996, - "normalized_score": 10.213020833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18051861702127658, - "normalized_score": 8.946513002364064 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "BSC-LT/salamandra-7b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 54, - "params_billions": 7.768, - "co2_cost": 2.2950081726740126 - } - }, - { - "id": "Ba2han/Llama-Phi-3_DoRA_bfloat16_36f99064a7be8ba475c2ee5c5424e95c263ccb87_True", - "model": { - "name": "Ba2han/Llama-Phi-3_DoRA", - "sha": "36f99064a7be8ba475c2ee5c5424e95c263ccb87", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.469895001154402, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5130531434371911, - "normalized_score": 51.30531434371911 - }, - "bbh": { - "name": "BBH", - "value": 0.5514558259029191, - "normalized_score": 37.24916418079274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.40692708333333333, - "normalized_score": 9.532552083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39153922872340424, - "normalized_score": 32.393247635933804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-15", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Ba2han/Llama-Phi-3_DoRA", - "hub_license": "mit", - "hub_hearts": 6, - "params_billions": 3.821, - "co2_cost": 1.0662727000384584 - } - }, - { - "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB_bfloat16_d6e0221f0a3fc019049cc411fddc2bf2f646519e_False", - "model": { - "name": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", - "sha": "d6e0221f0a3fc019049cc411fddc2bf2f646519e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.30426754456703, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5076330802271307, - "normalized_score": 50.76330802271307 - }, - "bbh": { - "name": "BBH", - "value": 0.7256319952414622, - "normalized_score": 61.61232489960707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44486404833836857, - "normalized_score": 44.48640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39932885906040266, - "normalized_score": 19.910514541387023 - }, - "musr": { - "name": "MUSR", - "value": 0.46255208333333336, - "normalized_score": 17.15234375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5851063829787234, - "normalized_score": 53.90070921985816 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 72.706, - "co2_cost": 33.66266319511599 - } - }, - { - "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0_bfloat16_459891ec78c9bbed2836a8bba706e1707db10231_True", - "model": { - "name": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", - "sha": "459891ec78c9bbed2836a8bba706e1707db10231", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.26732013588396, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5473499204333391, - "normalized_score": 54.73499204333391 - }, - "bbh": { - "name": "BBH", - "value": 0.727311411382245, - "normalized_score": 61.91149453060484 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5785498489425982, - "normalized_score": 57.85498489425982 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.4206666666666667, - "normalized_score": 12.016666666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5628324468085106, - "normalized_score": 51.425827423167846 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-24", - "generation": 1, - "base_model": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 72.7, - "co2_cost": 34.70178370637048 - } - }, - { - "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0_bfloat16_d90f6e36584dc9b367461701e83c833bdeb736f2_False", - "model": { - "name": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", - "sha": "d90f6e36584dc9b367461701e83c833bdeb736f2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 15.07109165755061, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3011531624977283, - "normalized_score": 30.115316249772825 - }, - "bbh": { - "name": "BBH", - "value": 0.4908666248538678, - "normalized_score": 26.877991478721707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.4079791666666666, - "normalized_score": 8.930729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26803523936170215, - "normalized_score": 18.670582151300238 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-21", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 28.309, - "co2_cost": 6.669594250051597 - } - }, - { - "id": "BlackBeenie/Bloslain-8B-v0.2_bfloat16_ebcb7f9f30bc172523a827d1ddefeb52b1aba494_False", - "model": { - "name": "BlackBeenie/Bloslain-8B-v0.2", - "sha": "ebcb7f9f30bc172523a827d1ddefeb52b1aba494", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.803914231544336, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5023371321427147, - "normalized_score": 50.233713214271475 - }, - "bbh": { - "name": "BBH", - "value": 0.511087946253543, - "normalized_score": 30.662901797340655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4075729166666667, - "normalized_score": 10.446614583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3653590425531915, - "normalized_score": 29.484338061465724 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-19", - "generation": 1, - "base_model": "BlackBeenie/Bloslain-8B-v0.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3835257027469932 - } - }, - { - "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1_bfloat16_35e7781b9dff5aea29576709201d641e5f44440d_True", - "model": { - "name": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", - "sha": "35e7781b9dff5aea29576709201d641e5f44440d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.4004099666704, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5124037553690873, - "normalized_score": 51.240375536908736 - }, - "bbh": { - "name": "BBH", - "value": 0.4787448361604986, - "normalized_score": 26.034289552808826 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.36181250000000004, - "normalized_score": 5.726562500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34915226063829785, - "normalized_score": 27.683584515366427 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.462855656236873 - } - }, - { - "id": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge_float16_3ec46616f5b34821b3b928938931295f92e49213_False", - "model": { - "name": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", - "sha": "3ec46616f5b34821b3b928938931295f92e49213", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.3995789368526035, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23158552640327662, - "normalized_score": 23.158552640327663 - }, - "bbh": { - "name": "BBH", - "value": 0.3453848032699584, - "normalized_score": 9.359904687487282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.37781249999999994, - "normalized_score": 4.593229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1332280585106383, - "normalized_score": 3.6920065011820316 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 20.245, - "co2_cost": 7.166580943068515 - } - }, - { - "id": "BlackBeenie/Neos-Gemma-2-9b_float16_56dbbb4f972be887e5b57311a8a32e148e98d154_True", - "model": { - "name": "BlackBeenie/Neos-Gemma-2-9b", - "sha": "56dbbb4f972be887e5b57311a8a32e148e98d154", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 25.475663495967385, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5875665456544192, - "normalized_score": 58.75665456544192 - }, - "bbh": { - "name": "BBH", - "value": 0.5502975126048852, - "normalized_score": 35.638851313766615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.36175, - "normalized_score": 5.785416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39810505319148937, - "normalized_score": 33.12278368794326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "BlackBeenie/Neos-Gemma-2-9b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 5.358184238723577 - } - }, - { - "id": "BlackBeenie/Neos-Llama-3.1-8B_bfloat16_9b48520ec1a777be0f1fd88f95454d85ac568407_True", - "model": { - "name": "BlackBeenie/Neos-Llama-3.1-8B", - "sha": "9b48520ec1a777be0f1fd88f95454d85ac568407", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.51217705998031, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49439376410147295, - "normalized_score": 49.43937641014729 - }, - "bbh": { - "name": "BBH", - "value": 0.4424998411442879, - "normalized_score": 21.080122945815933 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3749895833333334, - "normalized_score": 5.740364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32621343085106386, - "normalized_score": 25.134825650118202 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "BlackBeenie/Neos-Llama-3.1-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.5877339071002958 - } - }, - { - "id": "BlackBeenie/Neos-Llama-3.1-base_bfloat16_d4af4d73ba5fea0275fd1e3ba5102a79ac8009db_True", - "model": { - "name": "BlackBeenie/Neos-Llama-3.1-base", - "sha": "d4af4d73ba5fea0275fd1e3ba5102a79ac8009db", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.9687947544913142, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17508211545366295, - "normalized_score": 17.508211545366294 - }, - "bbh": { - "name": "BBH", - "value": 0.29303397468240516, - "normalized_score": 2.2214471263806472 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23741610738255034, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34990625000000003, - "normalized_score": 2.8382812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11120345744680851, - "normalized_score": 1.2448286052009452 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 0, - "base_model": "BlackBeenie/Neos-Llama-3.1-base", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.65, - "co2_cost": 2.818569405998589 - } - }, - { - "id": "BlackBeenie/Neos-Phi-3-14B-v0.1_bfloat16_0afb7cc74a94f11f2695dc92788cdc6e28325f9c_True", - "model": { - "name": "BlackBeenie/Neos-Phi-3-14B-v0.1", - "sha": "0afb7cc74a94f11f2695dc92788cdc6e28325f9c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 27.032306783654633, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4022449323350931, - "normalized_score": 40.22449323350931 - }, - "bbh": { - "name": "BBH", - "value": 0.6211931530444463, - "normalized_score": 46.63138689861978 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.41254166666666664, - "normalized_score": 10.534375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45636635638297873, - "normalized_score": 39.596261820330966 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "BlackBeenie/Neos-Phi-3-14B-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 1.8192521859467732 - } - }, - { - "id": "BlackBeenie/llama-3-luminous-merged_bfloat16_64288dd8e3305f2dc11d84fe0c653f351b2e8a9d_False", - "model": { - "name": "BlackBeenie/llama-3-luminous-merged", - "sha": "64288dd8e3305f2dc11d84fe0c653f351b2e8a9d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.618577268829103, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43234506664538974, - "normalized_score": 43.234506664538976 - }, - "bbh": { - "name": "BBH", - "value": 0.5153924501559338, - "normalized_score": 30.643687228787254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.4148958333333333, - "normalized_score": 10.628645833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3773271276595745, - "normalized_score": 30.814125295508273 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-10-11", - "generation": 1, - "base_model": "BlackBeenie/llama-3-luminous-merged (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.527707129075998 - } - }, - { - "id": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco_bfloat16_828fa03c10e9085700b7abbe26f95067fab010fd_False", - "model": { - "name": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", - "sha": "828fa03c10e9085700b7abbe26f95067fab010fd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.37421537036845, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2634842218646525, - "normalized_score": 26.348422186465246 - }, - "bbh": { - "name": "BBH", - "value": 0.5213365363748029, - "normalized_score": 31.444704759068383 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.44062500000000004, - "normalized_score": 14.578124999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32064494680851063, - "normalized_score": 24.516105200945624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-19", - "generation": 0, - "base_model": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7136395964032798 - } - }, - { - "id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B_bfloat16_4672b7de38c2cc390b146d6b6ce7a6dd295d8a0e_True", - "model": { - "name": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", - "sha": "4672b7de38c2cc390b146d6b6ce7a6dd295d8a0e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 19.012851991491477, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5172497861230424, - "normalized_score": 51.72497861230424 - }, - "bbh": { - "name": "BBH", - "value": 0.42930745041520607, - "normalized_score": 18.650222512085886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.3833958333333333, - "normalized_score": 5.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27102726063829785, - "normalized_score": 19.003028959810873 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", - "hub_license": "llama3.2", - "hub_hearts": 70, - "params_billions": 5.199, - "co2_cost": 1.2202363351953194 - } - }, - { - "id": "BoltMonkey/DreadMix_bfloat16_ab5dbaaff606538db73b6fd89aa169760104a566_True", - "model": { - "name": "BoltMonkey/DreadMix", - "sha": "ab5dbaaff606538db73b6fd89aa169760104a566", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.76173172145654, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7094908176970438, - "normalized_score": 70.94908176970438 - }, - "bbh": { - "name": "BBH", - "value": 0.5435097438362475, - "normalized_score": 34.84501521605123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1555891238670695, - "normalized_score": 15.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.42121875, - "normalized_score": 13.619010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37898936170212766, - "normalized_score": 30.99881796690307 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-13", - "generation": 1, - "base_model": "BoltMonkey/DreadMix (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.4194278361687624 - } - }, - { - "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated_bfloat16_969e4c9b41e733a367f5ea18ed50a6171b5e2357_True", - "model": { - "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "sha": "969e4c9b41e733a367f5ea18ed50a6171b5e2357", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.776634015071924, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7998909559967553, - "normalized_score": 79.98909559967552 - }, - "bbh": { - "name": "BBH", - "value": 0.5151987922850448, - "normalized_score": 30.75990006920939 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.401875, - "normalized_score": 9.467708333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37333776595744683, - "normalized_score": 30.37086288416076 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-10", - "generation": 1, - "base_model": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.486203373854456 - } - }, - { - "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated_float16_969e4c9b41e733a367f5ea18ed50a6171b5e2357_False", - "model": { - "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "sha": "969e4c9b41e733a367f5ea18ed50a6171b5e2357", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.345510590269537, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45902316963434797, - "normalized_score": 45.9023169634348 - }, - "bbh": { - "name": "BBH", - "value": 0.5185441912447182, - "normalized_score": 30.793784752659274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.4082604166666666, - "normalized_score": 9.532552083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3631150265957447, - "normalized_score": 29.235002955082745 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.7743185740676325 - } - }, - { - "id": "BoltMonkey/SuperNeuralDreadDevil-8b_bfloat16_804d5864127e603abec179a159b43f446246fafc_True", - "model": { - "name": "BoltMonkey/SuperNeuralDreadDevil-8b", - "sha": "804d5864127e603abec179a159b43f446246fafc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.11105496878025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7709898624538447, - "normalized_score": 77.09898624538445 - }, - "bbh": { - "name": "BBH", - "value": 0.5286196012035721, - "normalized_score": 32.61215762394777 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.39768749999999997, - "normalized_score": 8.3109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36785239361702127, - "normalized_score": 29.76137706855792 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-13", - "generation": 1, - "base_model": "BoltMonkey/SuperNeuralDreadDevil-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 3.2814161034398572 - } - }, - { - "id": "BrainWave-ML/llama3.2-3B-maths-orpo_float16_d149d83d8e8f3883421d800848fec85766181923_False", - "model": { - "name": "BrainWave-ML/llama3.2-3B-maths-orpo", - "sha": "d149d83d8e8f3883421d800848fec85766181923", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.07608283209792, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20490742341431845, - "normalized_score": 20.490742341431847 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-24", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 3.0, - "co2_cost": 1.4144381166479545 - } - }, - { - "id": "BramVanroy/GEITje-7B-ultra_bfloat16_d4552cdc6f015754646464d8411aa4f6bcdba8e8_True", - "model": { - "name": "BramVanroy/GEITje-7B-ultra", - "sha": "d4552cdc6f015754646464d8411aa4f6bcdba8e8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.022898819216541, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3723442687624392, - "normalized_score": 37.234426876243916 - }, - "bbh": { - "name": "BBH", - "value": 0.37761612997305494, - "normalized_score": 12.879913010035898 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.32897916666666666, - "normalized_score": 1.5223958333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20113031914893617, - "normalized_score": 11.236702127659573 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-27", - "submission_date": "2024-10-28", - "generation": 3, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 43, - "params_billions": 7.242, - "co2_cost": 1.2390464419973757 - } - }, - { - "id": "BramVanroy/fietje-2_bfloat16_3abe75d01094b713368e3d911ffb78a2d66ead22_False", - "model": { - "name": "BramVanroy/fietje-2", - "sha": "3abe75d01094b713368e3d911ffb78a2d66ead22", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 9.140300477810824, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20980332185268422, - "normalized_score": 20.980332185268423 - }, - "bbh": { - "name": "BBH", - "value": 0.40356695178386187, - "normalized_score": 15.603676192567876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3695625, - "normalized_score": 5.161979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19855385638297873, - "normalized_score": 10.950428486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-09", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "microsoft/phi-2", - "hub_license": "mit", - "hub_hearts": 9, - "params_billions": 2.78, - "co2_cost": 0.6250772888227508 - } - }, - { - "id": "BramVanroy/fietje-2-chat_bfloat16_364e785d90438b787b94e33741a930c9932353c0_True", - "model": { - "name": "BramVanroy/fietje-2-chat", - "sha": "364e785d90438b787b94e33741a930c9932353c0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 10.615455210257904, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2917359273394593, - "normalized_score": 29.17359273394593 - }, - "bbh": { - "name": "BBH", - "value": 0.4149753717401999, - "normalized_score": 17.718965848323496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3527604166666667, - "normalized_score": 3.195052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20545212765957446, - "normalized_score": 11.71690307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-10-28", - "generation": 3, - "base_model": "microsoft/phi-2", - "hub_license": "mit", - "hub_hearts": 5, - "params_billions": 2.775, - "co2_cost": 0.7980654163418115 - } - }, - { - "id": "BramVanroy/fietje-2-instruct_bfloat16_b7b44797cd52eda1182667217e8371dbdfee4976_True", - "model": { - "name": "BramVanroy/fietje-2-instruct", - "sha": "b7b44797cd52eda1182667217e8371dbdfee4976", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 10.48571837435655, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2789963962286732, - "normalized_score": 27.899639622867326 - }, - "bbh": { - "name": "BBH", - "value": 0.41360714173029806, - "normalized_score": 17.57247980884759 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2332214765100671, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3369166666666667, - "normalized_score": 2.9145833333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2103557180851064, - "normalized_score": 12.26174645390071 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-27", - "submission_date": "2024-10-28", - "generation": 2, - "base_model": "microsoft/phi-2", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 2.775, - "co2_cost": 0.6487901858353973 - } - }, - { - "id": "CYFRAGOVPL/Llama-PLLuM-8B-base_bfloat16_d24d927d110786097dba5512d12f9311c49c4ddc_False", - "model": { - "name": "CYFRAGOVPL/Llama-PLLuM-8B-base", - "sha": "d24d927d110786097dba5512d12f9311c49c4ddc", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.51887667865397, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28988749850396944, - "normalized_score": 28.98874985039695 - }, - "bbh": { - "name": "BBH", - "value": 0.43204480458140976, - "normalized_score": 20.218737977791477 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.39703125, - "normalized_score": 10.062239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27568151595744683, - "normalized_score": 19.52016843971631 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "CYFRAGOVPL/Llama-PLLuM-8B-base", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.723931594223454 - } - }, - { - "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat_bfloat16_b6bb9172444c07e51ad16e8092d0f23e864a8956_True", - "model": { - "name": "CYFRAGOVPL/Llama-PLLuM-8B-chat", - "sha": "b6bb9172444c07e51ad16e8092d0f23e864a8956", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.61481701543444, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3514862786295917, - "normalized_score": 35.148627862959174 - }, - "bbh": { - "name": "BBH", - "value": 0.40770722535589576, - "normalized_score": 16.279056764258215 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.41991666666666666, - "normalized_score": 11.856249999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27194148936170215, - "normalized_score": 19.104609929078016 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "CYFRAGOVPL/Llama-PLLuM-8B-chat", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6247553846649435 - } - }, - { - "id": "CYFRAGOVPL/PLLuM-12B-base_bfloat16_f8770799a0f1f32861a19ae190933ed475fe5488_False", - "model": { - "name": "CYFRAGOVPL/PLLuM-12B-base", - "sha": "f8770799a0f1f32861a19ae190933ed475fe5488", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.667469455106152, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2820937335159599, - "normalized_score": 28.20937335159599 - }, - "bbh": { - "name": "BBH", - "value": 0.4390596143784447, - "normalized_score": 21.240801542833136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4142395833333334, - "normalized_score": 10.979947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2740192819148936, - "normalized_score": 19.335475768321512 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "CYFRAGOVPL/PLLuM-12B-base", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.8935730836523827 - } - }, - { - "id": "CYFRAGOVPL/PLLuM-12B-chat_bfloat16_3a0b90db5f90490eb05e757355b711b59fca7b95_True", - "model": { - "name": "CYFRAGOVPL/PLLuM-12B-chat", - "sha": "3a0b90db5f90490eb05e757355b711b59fca7b95", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.348386882498241, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32143601200370575, - "normalized_score": 32.14360120037057 - }, - "bbh": { - "name": "BBH", - "value": 0.44458000333075703, - "normalized_score": 21.319737721095155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.4114791666666666, - "normalized_score": 14.668229166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2872340425531915, - "normalized_score": 20.803782505910167 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "CYFRAGOVPL/PLLuM-12B-chat", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 0.7807010086114211 - } - }, - { - "id": "CYFRAGOVPL/PLLuM-12B-nc-base_bfloat16_28b2cf24ee46567c55939ec12edc1bf17d445adb_False", - "model": { - "name": "CYFRAGOVPL/PLLuM-12B-nc-base", - "sha": "28b2cf24ee46567c55939ec12edc1bf17d445adb", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.421222164941392, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24045310886226323, - "normalized_score": 24.045310886226325 - }, - "bbh": { - "name": "BBH", - "value": 0.42767589675970014, - "normalized_score": 19.387664951963906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.36451041666666667, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25590093085106386, - "normalized_score": 17.322325650118206 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "CYFRAGOVPL/PLLuM-12B-nc-base", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.9262126633435942 - } - }, - { - "id": "CYFRAGOVPL/PLLuM-12B-nc-chat_bfloat16_5b29326cb35c193039ea219183f7e606ec7acda9_True", - "model": { - "name": "CYFRAGOVPL/PLLuM-12B-nc-chat", - "sha": "5b29326cb35c193039ea219183f7e606ec7acda9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.598342751082507, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28344237733657807, - "normalized_score": 28.344237733657806 - }, - "bbh": { - "name": "BBH", - "value": 0.45764328318815456, - "normalized_score": 23.008553823293614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4353541666666667, - "normalized_score": 12.919270833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25972406914893614, - "normalized_score": 17.747118794326237 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "CYFRAGOVPL/PLLuM-12B-nc-chat", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 0.8350715162365853 - } - }, - { - "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct_float16_5be46c768d800447b82de41fdc9df2f8c43ba3c0_True", - "model": { - "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", - "sha": "5be46c768d800447b82de41fdc9df2f8c43ba3c0", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.50945092474288, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7198821349574684, - "normalized_score": 71.98821349574685 - }, - "bbh": { - "name": "BBH", - "value": 0.4426719080820793, - "normalized_score": 21.49731033280348 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3649166666666667, - "normalized_score": 3.9812500000000024 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2822473404255319, - "normalized_score": 20.249704491725765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 8, - "params_billions": 3.213, - "co2_cost": 1.1359074637893565 - } - }, - { - "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412_float16_ac6f1c0b756412163e17cb05d9e2f7ced274dc12_False", - "model": { - "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", - "sha": "ac6f1c0b756412163e17cb05d9e2f7ced274dc12", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.301755342918195, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47818233398493776, - "normalized_score": 47.818233398493774 - }, - "bbh": { - "name": "BBH", - "value": 0.43577246498246686, - "normalized_score": 20.175679614215046 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3872083333333334, - "normalized_score": 6.801041666666673 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31341422872340424, - "normalized_score": 23.712692080378247 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 3.213, - "co2_cost": 1.2871786673601686 - } - }, - { - "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B_bfloat16_b46c066ea8387264858dc3461f382e7b42fd9c48_True", - "model": { - "name": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", - "sha": "b46c066ea8387264858dc3461f382e7b42fd9c48", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.899338809216697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7122634609502786, - "normalized_score": 71.22634609502786 - }, - "bbh": { - "name": "BBH", - "value": 0.5262406145493724, - "normalized_score": 32.48627762381486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3686666666666667, - "normalized_score": 5.550000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3723404255319149, - "normalized_score": 30.26004728132387 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-07-02", - "generation": 1, - "base_model": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 15, - "params_billions": 8.03, - "co2_cost": 1.976769816253202 - } - }, - { - "id": "CausalLM/14B_bfloat16_cc054cf5953252d0709cb3267d1a85246e489e95_False", - "model": { - "name": "CausalLM/14B", - "sha": "cc054cf5953252d0709cb3267d1a85246e489e95", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.235580154596317, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2788213052478535, - "normalized_score": 27.882130524785346 - }, - "bbh": { - "name": "BBH", - "value": 0.4700462397700626, - "normalized_score": 24.780942674518666 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4154791666666667, - "normalized_score": 11.468229166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3221409574468085, - "normalized_score": 24.682328605200944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "CausalLM/14B", - "hub_license": "wtfpl", - "hub_hearts": 302, - "params_billions": 14.0, - "co2_cost": 1.9928289272104154 - } - }, - { - "id": "CausalLM/34b-beta_float16_0429951eb30ccdfff3515e711aaa7649a8a7364c_False", - "model": { - "name": "CausalLM/34b-beta", - "sha": "0429951eb30ccdfff3515e711aaa7649a8a7364c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.29783304542188, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3043247472262486, - "normalized_score": 30.43247472262486 - }, - "bbh": { - "name": "BBH", - "value": 0.5590996102136266, - "normalized_score": 36.677226262739055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.37486458333333333, - "normalized_score": 6.924739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5324966755319149, - "normalized_score": 48.05518617021277 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-06", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "CausalLM/34b-beta", - "hub_license": "gpl-3.0", - "hub_hearts": 63, - "params_billions": 34.389, - "co2_cost": 5.853192681528399 - } - }, - { - "id": "CausalLM/preview-1-hf_bfloat16_08e1e1ab428a591e74d849ff30bd8766474205bf_True", - "model": { - "name": "CausalLM/preview-1-hf", - "sha": "08e1e1ab428a591e74d849ff30bd8766474205bf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GlmForCausalLM", - "average_score": 16.706753161023286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5558928088582737, - "normalized_score": 55.58928088582738 - }, - "bbh": { - "name": "BBH", - "value": 0.3614567463880903, - "normalized_score": 10.100940706274752 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.34218750000000003, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35970744680851063, - "normalized_score": 28.856382978723406 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.543, - "co2_cost": 2.5574978825517047 - } - }, - { - "id": "Changgil/K2S3-14b-v0.2_bfloat16_b4f0e1eed2640df2b75847ff37e6ebb1be217b6c_False", - "model": { - "name": "Changgil/K2S3-14b-v0.2", - "sha": "b4f0e1eed2640df2b75847ff37e6ebb1be217b6c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.275784761251337, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3242840108689389, - "normalized_score": 32.428401086893885 - }, - "bbh": { - "name": "BBH", - "value": 0.4613311786298187, - "normalized_score": 24.283946726650168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3922604166666666, - "normalized_score": 6.7992187500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2643783244680851, - "normalized_score": 18.26425827423168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-17", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "Changgil/K2S3-14b-v0.2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 14.352, - "co2_cost": 3.2492605048590075 - } - }, - { - "id": "Changgil/K2S3-v0.1_bfloat16_d544e389f091983bb4f11314edb526d81753c919_False", - "model": { - "name": "Changgil/K2S3-v0.1", - "sha": "d544e389f091983bb4f11314edb526d81753c919", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.839283995827836, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32765617450586665, - "normalized_score": 32.76561745058666 - }, - "bbh": { - "name": "BBH", - "value": 0.46554920672286154, - "normalized_score": 24.559557672503786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.40140624999999996, - "normalized_score": 7.842447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2562333776595745, - "normalized_score": 17.359264184397162 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "Changgil/K2S3-v0.1", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 14.352, - "co2_cost": 2.4997646880101265 - } - }, - { - "id": "ClaudioItaly/Albacus_bfloat16_a53faf62d0f99b67478ed9d262872c821a3ba83c_False", - "model": { - "name": "ClaudioItaly/Albacus", - "sha": "a53faf62d0f99b67478ed9d262872c821a3ba83c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.505574062384014, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4667415790103592, - "normalized_score": 46.674157901035926 - }, - "bbh": { - "name": "BBH", - "value": 0.5113043406568835, - "normalized_score": 31.63886474402479 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.41353124999999996, - "normalized_score": 10.658072916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31648936170212766, - "normalized_score": 24.054373522458626 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-08", - "submission_date": "2024-09-08", - "generation": 1, - "base_model": "ClaudioItaly/Albacus (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.987, - "co2_cost": 1.5078778727070576 - } - }, - { - "id": "ClaudioItaly/Book-Gut12B_bfloat16_ae54351faca8170c93bf1de3a51bf16650f5bcf5_False", - "model": { - "name": "ClaudioItaly/Book-Gut12B", - "sha": "ae54351faca8170c93bf1de3a51bf16650f5bcf5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.39409811229197, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39984685080032095, - "normalized_score": 39.984685080032094 - }, - "bbh": { - "name": "BBH", - "value": 0.5417370194443233, - "normalized_score": 34.63219258973313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4635416666666667, - "normalized_score": 18.276041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3670212765957447, - "normalized_score": 29.669030732860524 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "ClaudioItaly/Book-Gut12B (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.9044964466390915 - } - }, - { - "id": "ClaudioItaly/Evolutionstory-7B-v2.2_bfloat16_9f838721d24a5195bed59a5ed8d9af536f7f2459_False", - "model": { - "name": "ClaudioItaly/Evolutionstory-7B-v2.2", - "sha": "9f838721d24a5195bed59a5ed8d9af536f7f2459", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.810835332574907, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4813794066410457, - "normalized_score": 48.137940664104576 - }, - "bbh": { - "name": "BBH", - "value": 0.5108043406568835, - "normalized_score": 31.62386474402479 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.41353124999999996, - "normalized_score": 10.658072916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31590757978723405, - "normalized_score": 23.98973108747045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-30", - "submission_date": "2024-09-01", - "generation": 1, - "base_model": "ClaudioItaly/Evolutionstory-7B-v2.2 (Merge)", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.120463976671076 - } - }, - { - "id": "ClaudioItaly/intelligence-cod-rag-7b-v3_bfloat16_2b21473c8a086f8d0c54b82c3454bf5499cdde3a_True", - "model": { - "name": "ClaudioItaly/intelligence-cod-rag-7b-v3", - "sha": "2b21473c8a086f8d0c54b82c3454bf5499cdde3a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.836966345244434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6897820006471718, - "normalized_score": 68.97820006471717 - }, - "bbh": { - "name": "BBH", - "value": 0.5366339718839108, - "normalized_score": 34.776158539494425 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3806646525679758, - "normalized_score": 38.066465256797585 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.4152708333333333, - "normalized_score": 10.675520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4195478723404255, - "normalized_score": 35.50531914893617 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "ClaudioItaly/intelligence-cod-rag-7b-v3 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3209446661048176 - } - }, - { - "id": "CohereForAI/aya-23-35B_float16_31d6fd858f20539a55401c7ad913086f54d9ca2c_True", - "model": { - "name": "CohereForAI/aya-23-35B", - "sha": "31d6fd858f20539a55401c7ad913086f54d9ca2c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 24.755408446939658, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6461932117891638, - "normalized_score": 64.61932117891638 - }, - "bbh": { - "name": "BBH", - "value": 0.5399551450731271, - "normalized_score": 34.85836046775463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4309895833333333, - "normalized_score": 13.47369791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33560505319148937, - "normalized_score": 26.178339243498815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-19", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "CohereForAI/aya-23-35B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 278, - "params_billions": 34.981, - "co2_cost": 33.970634038967276 - } - }, - { - "id": "CohereForAI/aya-23-8B_float16_ec151d218a24031eb039d92fb83d10445427efc9_True", - "model": { - "name": "CohereForAI/aya-23-8B", - "sha": "ec151d218a24031eb039d92fb83d10445427efc9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 16.010983148168723, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4698887839820565, - "normalized_score": 46.98887839820566 - }, - "bbh": { - "name": "BBH", - "value": 0.4296161519220307, - "normalized_score": 20.203760646739372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.3940625, - "normalized_score": 8.424479166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2278091755319149, - "normalized_score": 14.2010195035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-19", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "CohereForAI/aya-23-8B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 410, - "params_billions": 8.028, - "co2_cost": 2.3903441626419175 - } - }, - { - "id": "CohereForAI/aya-expanse-32b_float16_08b69cfa4240e2009c80ad304f000b491d1b8c38_True", - "model": { - "name": "CohereForAI/aya-expanse-32b", - "sha": "08b69cfa4240e2009c80ad304f000b491d1b8c38", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 29.718510126577055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7301737168490716, - "normalized_score": 73.01737168490715 - }, - "bbh": { - "name": "BBH", - "value": 0.5648670099212114, - "normalized_score": 38.70961143301418 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.3872708333333333, - "normalized_score": 6.408854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41298204787234044, - "normalized_score": 34.77578309692671 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-24", - "generation": 0, - "base_model": "CohereForAI/aya-expanse-32b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 241, - "params_billions": 32.296, - "co2_cost": 11.035470199820203 - } - }, - { - "id": "CohereForAI/aya-expanse-8b_float16_b9848575c8731981dfcf2e1f3bfbcb917a2e585d_True", - "model": { - "name": "CohereForAI/aya-expanse-8b", - "sha": "b9848575c8731981dfcf2e1f3bfbcb917a2e585d", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 22.406573697993498, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6358517622131501, - "normalized_score": 63.585176221315 - }, - "bbh": { - "name": "BBH", - "value": 0.4977203055736406, - "normalized_score": 28.52348250428851 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.37288541666666664, - "normalized_score": 4.410677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3003656914893617, - "normalized_score": 22.26285460992908 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-24", - "generation": 0, - "base_model": "CohereForAI/aya-expanse-8b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 352, - "params_billions": 8.028, - "co2_cost": 2.3393781377514093 - } - }, - { - "id": "CohereForAI/c4ai-command-r-plus_float16_fa1bd7fb1572ceb861bbbbecfa8af83b29fa8cca_True", - "model": { - "name": "CohereForAI/c4ai-command-r-plus", - "sha": "fa1bd7fb1572ceb861bbbbecfa8af83b29fa8cca", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 30.936070612618508, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7664186580495308, - "normalized_score": 76.64186580495308 - }, - "bbh": { - "name": "BBH", - "value": 0.581542357407793, - "normalized_score": 39.91995423143177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.48071875000000003, - "normalized_score": 20.42317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3991855053191489, - "normalized_score": 33.242833924349874 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "CohereForAI/c4ai-command-r-plus", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1715, - "params_billions": 103.811, - "co2_cost": 57.263063423341556 - } - }, - { - "id": "CohereForAI/c4ai-command-r-plus-08-2024_float16_2d8cf3ab0af78b9e43546486b096f86adf3ba4d0_True", - "model": { - "name": "CohereForAI/c4ai-command-r-plus-08-2024", - "sha": "2d8cf3ab0af78b9e43546486b096f86adf3ba4d0", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 33.647474595578004, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7539539532883859, - "normalized_score": 75.39539532883859 - }, - "bbh": { - "name": "BBH", - "value": 0.5995999913027185, - "normalized_score": 42.83686540770696 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.48294791666666664, - "normalized_score": 19.835156249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44207114361702127, - "normalized_score": 38.0079048463357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-21", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "CohereForAI/c4ai-command-r-plus-08-2024", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 250, - "params_billions": 103.811, - "co2_cost": 44.63775333534616 - } - }, - { - "id": "CohereForAI/c4ai-command-r-v01_float16_16881ccde1c68bbc7041280e6a66637bc46bfe88_True", - "model": { - "name": "CohereForAI/c4ai-command-r-v01", - "sha": "16881ccde1c68bbc7041280e6a66637bc46bfe88", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 25.929031834951832, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6748194789824333, - "normalized_score": 67.48194789824333 - }, - "bbh": { - "name": "BBH", - "value": 0.5406415512767856, - "normalized_score": 34.556659257058264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.45169791666666664, - "normalized_score": 16.12890625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3369348404255319, - "normalized_score": 26.326093380614658 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-11", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "CohereForAI/c4ai-command-r-v01", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1080, - "params_billions": 34.981, - "co2_cost": 26.79087453397965 - } - }, - { - "id": "CohereForAI/c4ai-command-r7b-12-2024_bfloat16_a9650f3bda8b0e00825ee36592e086b4ee621102_True", - "model": { - "name": "CohereForAI/c4ai-command-r7b-12-2024", - "sha": "a9650f3bda8b0e00825ee36592e086b4ee621102", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Cohere2ForCausalLM", - "average_score": 31.61752928799648, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7713145564878965, - "normalized_score": 77.13145564878965 - }, - "bbh": { - "name": "BBH", - "value": 0.5502642151855635, - "normalized_score": 36.0245641700103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2990936555891239, - "normalized_score": 29.909365558912388 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.41251041666666666, - "normalized_score": 10.230468750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3572140957446808, - "normalized_score": 28.5793439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-20", - "generation": 0, - "base_model": "CohereForAI/c4ai-command-r7b-12-2024", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 375, - "params_billions": 8.028, - "co2_cost": 4.909613871990747 - } - }, - { - "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0_float16_a5f780075831374f8850324448acf94976dea504_True", - "model": { - "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "sha": "a5f780075831374f8850324448acf94976dea504", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 11.483994762243412, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3278312654866864, - "normalized_score": 32.78312654866864 - }, - "bbh": { - "name": "BBH", - "value": 0.39199563613207467, - "normalized_score": 14.585976093815775 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.41201041666666666, - "normalized_score": 9.834635416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16655585106382978, - "normalized_score": 7.395094562647753 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.506, - "co2_cost": 0.9796484742504703 - } - }, - { - "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0_bfloat16_a5f780075831374f8850324448acf94976dea504_True", - "model": { - "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "sha": "a5f780075831374f8850324448acf94976dea504", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 11.26209259662469, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3102457036219453, - "normalized_score": 31.02457036219453 - }, - "bbh": { - "name": "BBH", - "value": 0.38810309159554507, - "normalized_score": 14.243045647726928 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.4080729166666666, - "normalized_score": 9.109114583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16647273936170212, - "normalized_score": 7.385859929078014 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.506, - "co2_cost": 1.9891382403495008 - } - }, - { - "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0_bfloat16_090d9f59c3b47ab8dd099ddd278c058aa6d2d529_True", - "model": { - "name": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", - "sha": "090d9f59c3b47ab8dd099ddd278c058aa6d2d529", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 11.897378852943028, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30664858131978706, - "normalized_score": 30.664858131978704 - }, - "bbh": { - "name": "BBH", - "value": 0.3895836210706875, - "normalized_score": 14.023921665416339 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.42791666666666667, - "normalized_score": 12.056250000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1692154255319149, - "normalized_score": 7.690602836879433 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-07-13", - "generation": 0, - "base_model": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 2.506, - "co2_cost": 1.924136196677034 - } - }, - { - "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0_bfloat16_44d6f26fa7e3b0d238064d844569bf8a07b7515e_True", - "model": { - "name": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", - "sha": "44d6f26fa7e3b0d238064d844569bf8a07b7515e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 12.60325045021582, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3692469314751526, - "normalized_score": 36.924693147515256 - }, - "bbh": { - "name": "BBH", - "value": 0.387877927616119, - "normalized_score": 14.11717086360044 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.4027395833333333, - "normalized_score": 8.309114583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17819148936170212, - "normalized_score": 8.687943262411347 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-02", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.506, - "co2_cost": 1.9216176259548279 - } - }, - { - "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0_bfloat16_3cddd4a6f5939a0a4db1092a0275342b7b9912f3_True", - "model": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", - "sha": "3cddd4a6f5939a0a4db1092a0275342b7b9912f3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.78540431507501, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4957424079220912, - "normalized_score": 49.57424079220912 - }, - "bbh": { - "name": "BBH", - "value": 0.5028481044452986, - "normalized_score": 30.356398875749075 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.40971874999999996, - "normalized_score": 10.281510416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3218916223404255, - "normalized_score": 24.654624704491724 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3936984403045483 - } - }, - { - "id": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0_bfloat16_e2cec0d68a67092951e9205dfe634a59f2f4a2dd_True", - "model": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", - "sha": "e2cec0d68a67092951e9205dfe634a59f2f4a2dd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.85320809199725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39679938119744496, - "normalized_score": 39.6799381197445 - }, - "bbh": { - "name": "BBH", - "value": 0.5023929881802022, - "normalized_score": 30.457173008350704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.40575, - "normalized_score": 9.718749999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3152426861702128, - "normalized_score": 23.91585401891253 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4373937651386457 - } - }, - { - "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0_bfloat16_822eddb2fd127178d9fb7bb9f4fca0e93ada2836_True", - "model": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", - "sha": "822eddb2fd127178d9fb7bb9f4fca0e93ada2836", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.748862299709277, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38171163623629745, - "normalized_score": 38.17116362362975 - }, - "bbh": { - "name": "BBH", - "value": 0.5087766443418147, - "normalized_score": 30.88426036029 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.45027083333333334, - "normalized_score": 15.483854166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32372007978723405, - "normalized_score": 24.857786643026003 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-02", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5072262758006536 - } - }, - { - "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES_bfloat16_881729709fbf263b75e0f7341b66b5a880b82d11_True", - "model": { - "name": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", - "sha": "881729709fbf263b75e0f7341b66b5a880b82d11", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.765081012881176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8239958864701216, - "normalized_score": 82.39958864701215 - }, - "bbh": { - "name": "BBH", - "value": 0.6370093752306357, - "normalized_score": 48.19594986631396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5317220543806647, - "normalized_score": 53.17220543806647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 12.65390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4979222074468085, - "normalized_score": 44.213578605200944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.3307043527297373 - } - }, - { - "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES_bfloat16_52d6f6308eba9c3a0b9116706fbb1ddc448e6101_True", - "model": { - "name": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", - "sha": "52d6f6308eba9c3a0b9116706fbb1ddc448e6101", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.36673047341537, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7564019025075688, - "normalized_score": 75.64019025075689 - }, - "bbh": { - "name": "BBH", - "value": 0.5402085849577634, - "normalized_score": 34.954070231174235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.493202416918429, - "normalized_score": 49.320241691842895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.40330208333333334, - "normalized_score": 8.779427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4341755319148936, - "normalized_score": 37.1306146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.091121739266506 - } - }, - { - "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP_bfloat16_ebd8d6507623008567a0548cd0ff9e28cbd6a656_True", - "model": { - "name": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", - "sha": "ebd8d6507623008567a0548cd0ff9e28cbd6a656", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.379930205236757, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5033112142448702, - "normalized_score": 50.33112142448702 - }, - "bbh": { - "name": "BBH", - "value": 0.6207548093635428, - "normalized_score": 46.397612796396146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.44134375, - "normalized_score": 14.367968750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4740691489361702, - "normalized_score": 41.56323877068557 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-08-27", - "generation": 1, - "base_model": "CombinHorizon/YiSM-blossom5.1-34B-SLERP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 34.389, - "co2_cost": 6.141628447926251 - } - }, - { - "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES_bfloat16_3284c32f13733d1cd17c723ed754f2c01b65a15c_True", - "model": { - "name": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", - "sha": "3284c32f13733d1cd17c723ed754f2c01b65a15c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.6578470977455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8206237228331937, - "normalized_score": 82.06237228331938 - }, - "bbh": { - "name": "BBH", - "value": 0.692924708291253, - "normalized_score": 56.04478184089901 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5944108761329305, - "normalized_score": 59.44108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.42072916666666665, - "normalized_score": 12.091145833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5720578457446809, - "normalized_score": 52.45087174940899 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 32.764, - "co2_cost": 26.000843124030272 - } - }, - { - "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES_bfloat16_d92237b4b4deccb92a72b5209c79978f09fe3f08_True", - "model": { - "name": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", - "sha": "d92237b4b4deccb92a72b5209c79978f09fe3f08", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.46621085774212, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8175762532303177, - "normalized_score": 81.75762532303177 - }, - "bbh": { - "name": "BBH", - "value": 0.6335891556421077, - "normalized_score": 47.767346005909815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.547583081570997, - "normalized_score": 54.7583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 12.453906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4910239361702128, - "normalized_score": 43.44710401891253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.3342592235028725 - } - }, - { - "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES_bfloat16_d976a5d6768d54c5e59a88fe63238a055c30c06a_True", - "model": { - "name": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", - "sha": "d976a5d6768d54c5e59a88fe63238a055c30c06a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.76362120574333, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8328136012446974, - "normalized_score": 83.28136012446973 - }, - "bbh": { - "name": "BBH", - "value": 0.6955174427138592, - "normalized_score": 56.82740697772572 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5853474320241692, - "normalized_score": 58.53474320241692 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.43139583333333337, - "normalized_score": 14.224479166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5684840425531915, - "normalized_score": 52.05378250591017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 32.764, - "co2_cost": 7.366635145725348 - } - }, - { - "id": "ContactDoctor/Bio-Medical-3B-CoT-012025_float16_37e0ac4b64a82964af3b33324629324cbcbf7cda_False", - "model": { - "name": "ContactDoctor/Bio-Medical-3B-CoT-012025", - "sha": "37e0ac4b64a82964af3b33324629324cbcbf7cda", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.73071055946323, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.360379349016166, - "normalized_score": 36.037934901616595 - }, - "bbh": { - "name": "BBH", - "value": 0.438315337642466, - "normalized_score": 22.263528103895354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.3367604166666667, - "normalized_score": 3.195052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2933843085106383, - "normalized_score": 21.487145390070918 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-15", - "generation": 2, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 10, - "params_billions": 3.085, - "co2_cost": 1.5996892891616015 - } - }, - { - "id": "ContactDoctor/Bio-Medical-Llama-3-8B_float16_5436cda92c65b0ef520d278d864305c0f429824b_False", - "model": { - "name": "ContactDoctor/Bio-Medical-Llama-3-8B", - "sha": "5436cda92c65b0ef520d278d864305c0f429824b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.91745266981443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4422365988909427, - "normalized_score": 44.223659889094264 - }, - "bbh": { - "name": "BBH", - "value": 0.486311802622738, - "normalized_score": 26.195811296028182 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.35139583333333335, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36477726063829785, - "normalized_score": 29.419695626477544 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-09", - "submission_date": "2024-12-24", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "other", - "hub_hearts": 73, - "params_billions": 4.015, - "co2_cost": 1.2351165102170476 - } - }, - { - "id": "CoolSpring/Qwen2-0.5B-Abyme_bfloat16_a48b7c04b854e5c60fe3464f96904bfc53c8640c_True", - "model": { - "name": "CoolSpring/Qwen2-0.5B-Abyme", - "sha": "a48b7c04b854e5c60fe3464f96904bfc53c8640c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.999994251217356, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19151850423542865, - "normalized_score": 19.151850423542868 - }, - "bbh": { - "name": "BBH", - "value": 0.2861834296481826, - "normalized_score": 2.2764835705971893 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.35421875, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13331117021276595, - "normalized_score": 3.701241134751772 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-18", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "Qwen/Qwen2-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 2.355594617085158 - } - }, - { - "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2_bfloat16_02c4c601453f7ecbfab5c95bf5afa889350026ba_True", - "model": { - "name": "CoolSpring/Qwen2-0.5B-Abyme-merge2", - "sha": "02c4c601453f7ecbfab5c95bf5afa889350026ba", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.320258162859158, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2021846478454944, - "normalized_score": 20.21846478454944 - }, - "bbh": { - "name": "BBH", - "value": 0.29942723009138733, - "normalized_score": 3.7090413943355123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3687291666666667, - "normalized_score": 3.8911458333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14893617021276595, - "normalized_score": 5.437352245862883 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "CoolSpring/Qwen2-0.5B-Abyme-merge2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.2193908884207358 - } - }, - { - "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3_bfloat16_86fed893893cc2a6240f0ea09ce2eeda1a5178cc_True", - "model": { - "name": "CoolSpring/Qwen2-0.5B-Abyme-merge3", - "sha": "86fed893893cc2a6240f0ea09ce2eeda1a5178cc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.8201960830112, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23860468002677343, - "normalized_score": 23.860468002677344 - }, - "bbh": { - "name": "BBH", - "value": 0.30031404525933675, - "normalized_score": 4.301149162861492 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.35009375000000004, - "normalized_score": 2.1283854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15001662234042554, - "normalized_score": 5.557402482269504 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "CoolSpring/Qwen2-0.5B-Abyme-merge3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.63, - "co2_cost": 1.2203426634256123 - } - }, - { - "id": "Corianas/Neural-Mistral-7B_float16_cde6f0126310f38b6781cc26cdb9a02416b896b9_True", - "model": { - "name": "Corianas/Neural-Mistral-7B", - "sha": "cde6f0126310f38b6781cc26cdb9a02416b896b9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.200438567718393, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5489235229191878, - "normalized_score": 54.892352291918776 - }, - "bbh": { - "name": "BBH", - "value": 0.4428023404192858, - "normalized_score": 22.431162626805257 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3872708333333333, - "normalized_score": 6.208854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27376994680851063, - "normalized_score": 19.307771867612292 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-05", - "submission_date": "2024-12-06", - "generation": 0, - "base_model": "Corianas/Neural-Mistral-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9234265666392719 - } - }, - { - "id": "Corianas/Quokka_2.7b_float16_d9b3274662c2ac6c6058daac90504b5a8ebcac3c_False", - "model": { - "name": "Corianas/Quokka_2.7b", - "sha": "d9b3274662c2ac6c6058daac90504b5a8ebcac3c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 4.995249580966088, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17490702447284318, - "normalized_score": 17.490702447284317 - }, - "bbh": { - "name": "BBH", - "value": 0.3055474937424842, - "normalized_score": 3.1652676176883503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3908333333333333, - "normalized_score": 6.087499999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11452792553191489, - "normalized_score": 1.6142139479905429 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-03-30", - "submission_date": "2024-12-05", - "generation": 0, - "base_model": "Corianas/Quokka_2.7b", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.786, - "co2_cost": 0.5873825038450923 - } - }, - { - "id": "Corianas/llama-3-reactor_float16_bef2eac42fd89baa0064badbc9c7958ad9ccbed3_False", - "model": { - "name": "Corianas/llama-3-reactor", - "sha": "bef2eac42fd89baa0064badbc9c7958ad9ccbed3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.995469727045661, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23001192391742797, - "normalized_score": 23.001192391742798 - }, - "bbh": { - "name": "BBH", - "value": 0.4457148560545015, - "normalized_score": 21.88855981925079 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.39771874999999995, - "normalized_score": 8.014843750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2800864361702128, - "normalized_score": 20.00960401891253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-20", - "submission_date": "2024-07-23", - "generation": 0, - "base_model": "Corianas/llama-3-reactor", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": -1.0, - "co2_cost": 1.6423296412642534 - } - }, - { - "id": "CortexLM/btlm-7b-base-v0.2_bfloat16_eda8b4298365a26c8981316e09427c237b11217f_False", - "model": { - "name": "CortexLM/btlm-7b-base-v0.2", - "sha": "eda8b4298365a26c8981316e09427c237b11217f", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.920254930681883, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14832865685270635, - "normalized_score": 14.832865685270637 - }, - "bbh": { - "name": "BBH", - "value": 0.4006411985841813, - "normalized_score": 16.19327709708517 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.38460416666666664, - "normalized_score": 5.542187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2349567819148936, - "normalized_score": 14.995197990543732 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "CortexLM/btlm-7b-base-v0.2", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 6.885, - "co2_cost": 1.422716700807614 - } - }, - { - "id": "Cran-May/SCE-2-24B_bfloat16_6a477b347fa6c0ce76bcaf353ddc282dd1cc75c3_True", - "model": { - "name": "Cran-May/SCE-2-24B", - "sha": "6a477b347fa6c0ce76bcaf353ddc282dd1cc75c3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 31.951539780759408, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5865924635522636, - "normalized_score": 58.65924635522636 - }, - "bbh": { - "name": "BBH", - "value": 0.6264692798019763, - "normalized_score": 46.32574571477496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18957703927492447, - "normalized_score": 18.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.4528125, - "normalized_score": 16.001562499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.461186835106383, - "normalized_score": 40.131870567375884 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Cran-May/SCE-2-24B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 2.7042352510701058 - } - }, - { - "id": "Cran-May/SCE-3-24B_bfloat16_bf2b658dd404c423228e7001498bd69c2d147da2_True", - "model": { - "name": "Cran-May/SCE-3-24B", - "sha": "bf2b658dd404c423228e7001498bd69c2d147da2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 30.620429636222998, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5465254413844156, - "normalized_score": 54.652544138441556 - }, - "bbh": { - "name": "BBH", - "value": 0.597283045074691, - "normalized_score": 42.278564800296486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18806646525679757, - "normalized_score": 18.806646525679756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.44347916666666665, - "normalized_score": 14.601562499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4646775265957447, - "normalized_score": 40.51972517730496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Cran-May/SCE-3-24B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 2.3631698036966537 - } - }, - { - "id": "Cran-May/T.E-8.1_bfloat16_5f84709710dcce7cc05fa12473e8bb207fe25849_True", - "model": { - "name": "Cran-May/T.E-8.1", - "sha": "5f84709710dcce7cc05fa12473e8bb207fe25849", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.699515240521414, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7076922565459647, - "normalized_score": 70.76922565459647 - }, - "bbh": { - "name": "BBH", - "value": 0.5581754708123893, - "normalized_score": 37.02437662584371 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44561933534743203, - "normalized_score": 44.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4505208333333333, - "normalized_score": 15.31510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4432347074468085, - "normalized_score": 38.13718971631205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "Cran-May/T.E-8.1 (Merge)", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 2.1812656007167486 - } - }, - { - "id": "Cran-May/merge_model_20250308_2_bfloat16_07c1b6130747a2508a806d3b8df13e221312c713_False", - "model": { - "name": "Cran-May/merge_model_20250308_2", - "sha": "07c1b6130747a2508a806d3b8df13e221312c713", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.257533671744866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5932370554572978, - "normalized_score": 59.32370554572978 - }, - "bbh": { - "name": "BBH", - "value": 0.6585311075974459, - "normalized_score": 50.99569841983836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4380664652567976, - "normalized_score": 43.80664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4793541666666667, - "normalized_score": 19.519270833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5419714095744681, - "normalized_score": 49.10793439716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "Cran-May/merge_model_20250308_2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 2.021194536877009 - } - }, - { - "id": "Cran-May/merge_model_20250308_3_bfloat16_1062ad627bd1823458a3b1b22f434941dea75f35_False", - "model": { - "name": "Cran-May/merge_model_20250308_3", - "sha": "1062ad627bd1823458a3b1b22f434941dea75f35", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.22959855955027, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6017799438822324, - "normalized_score": 60.177994388223254 - }, - "bbh": { - "name": "BBH", - "value": 0.6271459892225041, - "normalized_score": 46.56854499584235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.43204166666666666, - "normalized_score": 13.538541666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49617686170212766, - "normalized_score": 44.019651300236404 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "Cran-May/merge_model_20250308_3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 2.025239989867329 - } - }, - { - "id": "Cran-May/merge_model_20250308_4_bfloat16_5f8c7b1c49d946c3feb028f138add7efabde94e7_False", - "model": { - "name": "Cran-May/merge_model_20250308_4", - "sha": "5f8c7b1c49d946c3feb028f138add7efabde94e7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.569693940140574, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4539521802151624, - "normalized_score": 45.39521802151624 - }, - "bbh": { - "name": "BBH", - "value": 0.666435217186487, - "normalized_score": 52.02370667184541 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4199395770392749, - "normalized_score": 41.99395770392749 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3976510067114094, - "normalized_score": 19.686800894854585 - }, - "musr": { - "name": "MUSR", - "value": 0.4688125, - "normalized_score": 17.801562500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5366522606382979, - "normalized_score": 48.51691784869976 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "Cran-May/merge_model_20250308_4 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9787744084472945 - } - }, - { - "id": "Cran-May/tempmotacilla-cinerea-0308_bfloat16_36ba31b21cfe855ceb4dad313a5cf2b98616038c_True", - "model": { - "name": "Cran-May/tempmotacilla-cinerea-0308", - "sha": "36ba31b21cfe855ceb4dad313a5cf2b98616038c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.6405549503723, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8084837121061007, - "normalized_score": 80.84837121061007 - }, - "bbh": { - "name": "BBH", - "value": 0.6550960569488126, - "normalized_score": 50.59894860885105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5551359516616314, - "normalized_score": 55.51359516616314 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.42082291666666666, - "normalized_score": 12.669531249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5250166223404256, - "normalized_score": 47.224069148936174 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Cran-May/tempmotacilla-cinerea-0308 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.6420107969216504 - } - }, - { - "id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1_bfloat16_48af6b20168c9e157b33a7a005b6515901c93b0e_True", - "model": { - "name": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", - "sha": "48af6b20168c9e157b33a7a005b6515901c93b0e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.034841772545372, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.323485019747603, - "normalized_score": 32.3485019747603 - }, - "bbh": { - "name": "BBH", - "value": 0.3057485865545513, - "normalized_score": 3.2159805820764724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18126888217522658, - "normalized_score": 18.12688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.36215624999999996, - "normalized_score": 2.6028645833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12516622340425532, - "normalized_score": 2.7962470449172567 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "CreitinGameplays/Llama-3.1-8B-R1-v0.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.745944048401834 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Broca_bfloat16_51204ee25a629abfd6d5e77a850b5e7a36c78462_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Broca", - "sha": "51204ee25a629abfd6d5e77a850b5e7a36c78462", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.92450077102951, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.560414145578177, - "normalized_score": 56.0414145578177 - }, - "bbh": { - "name": "BBH", - "value": 0.6527145981540362, - "normalized_score": 50.034411781701095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3580060422960725, - "normalized_score": 35.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.47665625, - "normalized_score": 18.94869791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5364029255319149, - "normalized_score": 48.48921394799055 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.154002933010168 - } - }, - { - "id": "CultriX/Qwen2.5-14B-BrocaV9_bfloat16_883dafbff4edb8c83ef58a33413d4e09e922a53d_False", - "model": { - "name": "CultriX/Qwen2.5-14B-BrocaV9", - "sha": "883dafbff4edb8c83ef58a33413d4e09e922a53d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.258747166851514, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6762933460994606, - "normalized_score": 67.62933460994606 - }, - "bbh": { - "name": "BBH", - "value": 0.6391383585238984, - "normalized_score": 48.05322494738571 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3814199395770393, - "normalized_score": 38.14199395770393 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.46903125, - "normalized_score": 18.395572916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5330784574468085, - "normalized_score": 48.119828605200944 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-BrocaV9 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.5480056069748076 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Brocav3_bfloat16_6f3fe686a79dcbcd5835ca100e194c49f493167b_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Brocav3", - "sha": "6f3fe686a79dcbcd5835ca100e194c49f493167b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.84683205575066, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6951776841004091, - "normalized_score": 69.51776841004092 - }, - "bbh": { - "name": "BBH", - "value": 0.6452353476182755, - "normalized_score": 49.04911178348141 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38746223564954685, - "normalized_score": 38.74622356495468 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4756354166666667, - "normalized_score": 19.254427083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.531748670212766, - "normalized_score": 47.972074468085104 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Brocav3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.6334779798943906 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Brocav6_bfloat16_bd981505b6950df69216b260c3c0d86124fded7b_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Brocav6", - "sha": "bd981505b6950df69216b260c3c0d86124fded7b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.840730335689976, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6995239298394925, - "normalized_score": 69.95239298394925 - }, - "bbh": { - "name": "BBH", - "value": 0.6388835266626555, - "normalized_score": 47.81922480607394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38746223564954685, - "normalized_score": 38.74622356495468 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.47420833333333334, - "normalized_score": 18.87604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5319148936170213, - "normalized_score": 47.99054373522459 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Brocav6 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.5828022711198058 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Brocav7_float16_06acee7f6e9796081ced6201001784907c77f96f_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Brocav7", - "sha": "06acee7f6e9796081ced6201001784907c77f96f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.617379910439844, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6723715297632504, - "normalized_score": 67.23715297632504 - }, - "bbh": { - "name": "BBH", - "value": 0.6444026981327182, - "normalized_score": 48.90536078331687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38444108761329304, - "normalized_score": 38.4441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.47960416666666666, - "normalized_score": 20.150520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5257646276595744, - "normalized_score": 47.307180851063826 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.402698845183717 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Emerged_bfloat16_8bf0e31b23ee22858bbde2cee44dde88963f5084_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Emerged", - "sha": "8bf0e31b23ee22858bbde2cee44dde88963f5084", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.95214334488745, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7000237148543642, - "normalized_score": 70.00237148543641 - }, - "bbh": { - "name": "BBH", - "value": 0.6260033680703311, - "normalized_score": 45.932419368684656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.324773413897281, - "normalized_score": 32.477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.46909375000000003, - "normalized_score": 18.47005208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5186170212765957, - "normalized_score": 46.51300236406619 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.6147203352417563 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Emergedv3_bfloat16_f4df1b9c2bf37bbfd6b2e8f2ff244c6029a5d546_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Emergedv3", - "sha": "f4df1b9c2bf37bbfd6b2e8f2ff244c6029a5d546", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.656291851731986, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6388493641316153, - "normalized_score": 63.884936413161526 - }, - "bbh": { - "name": "BBH", - "value": 0.6190728411056029, - "normalized_score": 44.731608242608615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43580060422960726, - "normalized_score": 43.58006042296073 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36073825503355705, - "normalized_score": 14.76510067114094 - }, - "musr": { - "name": "MUSR", - "value": 0.4728125, - "normalized_score": 18.601562500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5173703457446809, - "normalized_score": 46.3744828605201 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.8378565529944946 - } - }, - { - "id": "CultriX/Qwen2.5-14B-FinalMerge_bfloat16_8fd624d0d8989a312d344772814da3575423897a_False", - "model": { - "name": "CultriX/Qwen2.5-14B-FinalMerge", - "sha": "8fd624d0d8989a312d344772814da3575423897a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.23626965408581, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48909781601705693, - "normalized_score": 48.909781601705696 - }, - "bbh": { - "name": "BBH", - "value": 0.5714945310011449, - "normalized_score": 38.16247948342354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3814199395770393, - "normalized_score": 38.14199395770393 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.43790625, - "normalized_score": 14.504947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4574468085106383, - "normalized_score": 39.716312056737586 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.8878829918808604 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Hyper_bfloat16_a6399c43f84736ed1b11d8cc7a25edf634781207_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Hyper", - "sha": "a6399c43f84736ed1b11d8cc7a25edf634781207", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.761935040602644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5391317260424563, - "normalized_score": 53.91317260424563 - }, - "bbh": { - "name": "BBH", - "value": 0.6507453346766106, - "normalized_score": 49.759879377498386 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34365558912386707, - "normalized_score": 34.3655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.48983333333333334, - "normalized_score": 21.029166666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5374002659574468, - "normalized_score": 48.60002955082743 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 7.678342043702582 - } - }, - { - "id": "CultriX/Qwen2.5-14B-HyperMarck-dl_bfloat16_77ca2edd6650455182d0c7e6a7be4249cfc34f8c_False", - "model": { - "name": "CultriX/Qwen2.5-14B-HyperMarck-dl", - "sha": "77ca2edd6650455182d0c7e6a7be4249cfc34f8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.89416756441815, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6650276821057017, - "normalized_score": 66.50276821057017 - }, - "bbh": { - "name": "BBH", - "value": 0.6096480033153927, - "normalized_score": 43.78585928068085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5287009063444109, - "normalized_score": 52.87009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.4415625, - "normalized_score": 15.095312499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5090591755319149, - "normalized_score": 45.4510195035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-16", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-HyperMarck-dl (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9685825864143358 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Hyperionv3_bfloat16_bc36be5b5ca3053ae96d85e962249efd0b283c82_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Hyperionv3", - "sha": "bc36be5b5ca3053ae96d85e962249efd0b283c82", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.76212059640138, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6836371937570092, - "normalized_score": 68.36371937570092 - }, - "bbh": { - "name": "BBH", - "value": 0.6522165609411941, - "normalized_score": 49.95005488379496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37009063444108764, - "normalized_score": 37.00906344410876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37080536912751677, - "normalized_score": 16.10738255033557 - }, - "musr": { - "name": "MUSR", - "value": 0.47296875, - "normalized_score": 18.921093749999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5339926861702128, - "normalized_score": 48.22140957446809 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Hyperionv3 (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 3.9657107513912075 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Hyperionv4_bfloat16_60cc366b0648bcb40ed22ebc53d64cc5aca25550_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Hyperionv4", - "sha": "60cc366b0648bcb40ed22ebc53d64cc5aca25550", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.67001853618524, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5415796752616391, - "normalized_score": 54.157967526163915 - }, - "bbh": { - "name": "BBH", - "value": 0.6471791978856551, - "normalized_score": 49.07652018118392 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3474320241691843, - "normalized_score": 34.74320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3976510067114094, - "normalized_score": 19.686800894854585 - }, - "musr": { - "name": "MUSR", - "value": 0.48319791666666667, - "normalized_score": 19.86640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5364029255319149, - "normalized_score": 48.48921394799055 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Hyperionv4 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 4.073613730173503 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Hyperionv5_bfloat16_e0f4941349664a75ddd03e4d2c190284c951e54b_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Hyperionv5", - "sha": "e0f4941349664a75ddd03e4d2c190284c951e54b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.724969700460356, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6729211824625327, - "normalized_score": 67.29211824625327 - }, - "bbh": { - "name": "BBH", - "value": 0.644265785086055, - "normalized_score": 48.948279827370094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3821752265861027, - "normalized_score": 38.21752265861027 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.4795416666666667, - "normalized_score": 19.876041666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5301695478723404, - "normalized_score": 47.796616430260045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Hyperionv5 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.973468255735361 - } - }, - { - "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2_bfloat16_20397f6cafc09c2cb74f105867cd99b3c68c71dc_False", - "model": { - "name": "CultriX/Qwen2.5-14B-MegaMerge-pt2", - "sha": "20397f6cafc09c2cb74f105867cd99b3c68c71dc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.796529761231035, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.568307645935008, - "normalized_score": 56.8307645935008 - }, - "bbh": { - "name": "BBH", - "value": 0.6577703330510146, - "normalized_score": 50.9079030473653 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3995468277945619, - "normalized_score": 39.95468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.472875, - "normalized_score": 18.74270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5420545212765957, - "normalized_score": 49.11716903073285 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.500867569666822 - } - }, - { - "id": "CultriX/Qwen2.5-14B-MergeStock_bfloat16_fa00543296f2731793dfb0aac667571ccf1abb5b_False", - "model": { - "name": "CultriX/Qwen2.5-14B-MergeStock", - "sha": "fa00543296f2731793dfb0aac667571ccf1abb5b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.74423639967271, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5685326046002386, - "normalized_score": 56.85326046002385 - }, - "bbh": { - "name": "BBH", - "value": 0.6579336391923106, - "normalized_score": 51.00939101332111 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41465256797583083, - "normalized_score": 41.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.4676354166666667, - "normalized_score": 17.854427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.539561170212766, - "normalized_score": 48.84013002364066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 6.645908481708354 - } - }, - { - "id": "CultriX/Qwen2.5-14B-ReasoningMerge_bfloat16_a310eb51c1cdcd4217e2aa303f7aac938dcc9ae1_False", - "model": { - "name": "CultriX/Qwen2.5-14B-ReasoningMerge", - "sha": "a310eb51c1cdcd4217e2aa303f7aac938dcc9ae1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.645885900911715, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46054690443578594, - "normalized_score": 46.05469044357859 - }, - "bbh": { - "name": "BBH", - "value": 0.6578226399295218, - "normalized_score": 50.867897804151305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.520392749244713, - "normalized_score": 52.0392749244713 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4077181208053691, - "normalized_score": 21.029082774049215 - }, - "musr": { - "name": "MUSR", - "value": 0.5165937500000001, - "normalized_score": 25.60755208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5344913563829787, - "normalized_score": 48.27681737588653 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-ReasoningMerge (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.625354688150173 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Ultimav2_bfloat16_9c805171d56f5d8720c687084c1ffc26bdf0acba_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Ultimav2", - "sha": "9c805171d56f5d8720c687084c1ffc26bdf0acba", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.835600416037856, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5500228283177524, - "normalized_score": 55.00228283177525 - }, - "bbh": { - "name": "BBH", - "value": 0.6555027486976712, - "normalized_score": 50.441052694140744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38444108761329304, - "normalized_score": 38.4441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.4965625, - "normalized_score": 22.036979166666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5417220744680851, - "normalized_score": 49.0802304964539 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Ultimav2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 5.907627333688703 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Unity_bfloat16_1d15e7941e6ceff5d6e4f293378947bee721a24d_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Unity", - "sha": "1d15e7941e6ceff5d6e4f293378947bee721a24d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.29922923584618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6738952645646883, - "normalized_score": 67.38952645646884 - }, - "bbh": { - "name": "BBH", - "value": 0.6019955540977778, - "normalized_score": 42.25861689291335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4312688821752266, - "normalized_score": 43.126888217522655 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.4679479166666667, - "normalized_score": 18.760156249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.507563164893617, - "normalized_score": 45.28479609929077 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Unity (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.8273777920130327 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Wernicke_bfloat16_622c0a58ecb0c0c679d7381a823d2ae5ac2b8ce1_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Wernicke", - "sha": "622c0a58ecb0c0c679d7381a823d2ae5ac2b8ce1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.94335063991926, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5234699486252034, - "normalized_score": 52.34699486252033 - }, - "bbh": { - "name": "BBH", - "value": 0.6568359662501574, - "normalized_score": 50.642876092422625 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3814199395770393, - "normalized_score": 38.14199395770393 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.46890625, - "normalized_score": 18.24661458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5423869680851063, - "normalized_score": 49.15410756501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Wernicke (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 14.77, - "co2_cost": 4.444468975984375 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Wernicke-SFT_bfloat16_3b68dfba2cf79e4a15e8f4271f7d4b62d2ab9f26_True", - "model": { - "name": "CultriX/Qwen2.5-14B-Wernicke-SFT", - "sha": "3b68dfba2cf79e4a15e8f4271f7d4b62d2ab9f26", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.549511791224056, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4937443760333692, - "normalized_score": 49.37443760333692 - }, - "bbh": { - "name": "BBH", - "value": 0.6460586236565512, - "normalized_score": 49.33057176327372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3595166163141994, - "normalized_score": 35.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.38999999999999996, - "normalized_score": 7.549999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5069813829787234, - "normalized_score": 45.2201536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Wernicke-SFT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 2.7860254045876838 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP_bfloat16_180175561e8061be067fc349ad4491270f19976f_True", - "model": { - "name": "CultriX/Qwen2.5-14B-Wernicke-SLERP", - "sha": "180175561e8061be067fc349ad4491270f19976f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.5436519453458, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5588904107767391, - "normalized_score": 55.88904107767391 - }, - "bbh": { - "name": "BBH", - "value": 0.6440929009604598, - "normalized_score": 49.372327095724025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4486404833836858, - "normalized_score": 44.864048338368576 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.41403125, - "normalized_score": 11.120572916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5093916223404256, - "normalized_score": 45.48795803782507 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.491, - "co2_cost": 4.311975384763283 - } - }, - { - "id": "CultriX/Qwen2.5-14B-Wernickev3_bfloat16_bd141b0df78ad1f6e2938edf167c2305b395a2b2_False", - "model": { - "name": "CultriX/Qwen2.5-14B-Wernickev3", - "sha": "bd141b0df78ad1f6e2938edf167c2305b395a2b2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.38114202702078, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7048198779239085, - "normalized_score": 70.48198779239087 - }, - "bbh": { - "name": "BBH", - "value": 0.6184146992839421, - "normalized_score": 44.576274906181084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3542296072507553, - "normalized_score": 35.422960725075534 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.4716666666666667, - "normalized_score": 18.69166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.515126329787234, - "normalized_score": 46.12514775413712 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "CultriX/Qwen2.5-14B-Wernickev3 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.831268500614135 - } - }, - { - "id": "CultriX/Qwen2.5-14B-partialmergept1_bfloat16_02c6491a2affea23c1e5d89d324a90d24a0e5381_False", - "model": { - "name": "CultriX/Qwen2.5-14B-partialmergept1", - "sha": "02c6491a2affea23c1e5d89d324a90d24a0e5381", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.10871735916122, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.633728507028019, - "normalized_score": 63.3728507028019 - }, - "bbh": { - "name": "BBH", - "value": 0.6151178406213536, - "normalized_score": 44.5944042928325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45392749244712993, - "normalized_score": 45.392749244712995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.47569791666666666, - "normalized_score": 19.662239583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5207779255319149, - "normalized_score": 46.75310283687944 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.018672278551488 - } - }, - { - "id": "CultriX/Qwenfinity-2.5-14B_bfloat16_6acc1308274031b045f028b0a0290cdbe4243a04_False", - "model": { - "name": "CultriX/Qwenfinity-2.5-14B", - "sha": "6acc1308274031b045f028b0a0290cdbe4243a04", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.322008098684286, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4813794066410457, - "normalized_score": 48.137940664104576 - }, - "bbh": { - "name": "BBH", - "value": 0.5655007271970033, - "normalized_score": 37.259942422120695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.45058333333333334, - "normalized_score": 15.456250000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4498005319148936, - "normalized_score": 38.86672576832151 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.9541332272834326 - } - }, - { - "id": "CultriX/Qwestion-14B_bfloat16_e286bfafbc28e36859202c9f06ed8287a4f1d8b6_False", - "model": { - "name": "CultriX/Qwestion-14B", - "sha": "e286bfafbc28e36859202c9f06ed8287a4f1d8b6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.54922603408469, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6317803428237078, - "normalized_score": 63.178034282370774 - }, - "bbh": { - "name": "BBH", - "value": 0.6450104739140539, - "normalized_score": 48.75703449690036 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3723564954682779, - "normalized_score": 37.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.46360416666666665, - "normalized_score": 17.217187500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.542220744680851, - "normalized_score": 49.13563829787233 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.707642084016007 - } - }, - { - "id": "CultriX/SeQwence-14B_bfloat16_f4a147b717ba0e9392f96e343250b00239196a22_False", - "model": { - "name": "CultriX/SeQwence-14B", - "sha": "f4a147b717ba0e9392f96e343250b00239196a22", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.886272868734814, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5351600420218354, - "normalized_score": 53.51600420218354 - }, - "bbh": { - "name": "BBH", - "value": 0.6505665291288972, - "normalized_score": 50.16357763465521 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35347432024169184, - "normalized_score": 35.34743202416919 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36073825503355705, - "normalized_score": 14.76510067114094 - }, - "musr": { - "name": "MUSR", - "value": 0.46661458333333333, - "normalized_score": 18.426822916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418882978723404, - "normalized_score": 49.09869976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-20", - "generation": 0, - "base_model": "CultriX/SeQwence-14B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.592765105556317 - } - }, - { - "id": "CultriX/SeQwence-14B-EvolMerge_bfloat16_a98c932f0d71d76883fe9aa9d708af0506b01343_False", - "model": { - "name": "CultriX/SeQwence-14B-EvolMerge", - "sha": "a98c932f0d71d76883fe9aa9d708af0506b01343", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.018640895247245, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5381576439403006, - "normalized_score": 53.81576439403007 - }, - "bbh": { - "name": "BBH", - "value": 0.6572183434723883, - "normalized_score": 50.780351477179785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36706948640483383, - "normalized_score": 36.70694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.48208333333333336, - "normalized_score": 20.26041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418882978723404, - "normalized_score": 49.09869976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "CultriX/SeQwence-14B-EvolMerge (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.901652091511035 - } - }, - { - "id": "CultriX/SeQwence-14B-EvolMergev1_bfloat16_6cc7116cdea757635dba52bb82a306654d118e77_False", - "model": { - "name": "CultriX/SeQwence-14B-EvolMergev1", - "sha": "6cc7116cdea757635dba52bb82a306654d118e77", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.4634621004166, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5554683794554005, - "normalized_score": 55.546837945540055 - }, - "bbh": { - "name": "BBH", - "value": 0.6545547382762975, - "normalized_score": 50.30225894842213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4214501510574018, - "normalized_score": 42.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3766778523489933, - "normalized_score": 16.890380313199106 - }, - "musr": { - "name": "MUSR", - "value": 0.46227083333333335, - "normalized_score": 17.08385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.539311835106383, - "normalized_score": 48.81242612293145 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "CultriX/SeQwence-14B-EvolMergev1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.9157920215613617 - } - }, - { - "id": "CultriX/SeQwence-14B-v5_bfloat16_9f43ad41542be56f6a18f31bfa60086318735ed5_False", - "model": { - "name": "CultriX/SeQwence-14B-v5", - "sha": "9f43ad41542be56f6a18f31bfa60086318735ed5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.60854208566567, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5919881470055011, - "normalized_score": 59.1988147005501 - }, - "bbh": { - "name": "BBH", - "value": 0.6517093605796943, - "normalized_score": 49.99573116031767 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33081570996978854, - "normalized_score": 33.081570996978854 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.47141666666666665, - "normalized_score": 18.327083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5414727393617021, - "normalized_score": 49.05252659574468 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.7303200921475566 - } - }, - { - "id": "CultriX/SeQwence-14Bv1_bfloat16_542bfbd2e6fb25ecd11b84d956764eb23233a034_False", - "model": { - "name": "CultriX/SeQwence-14Bv1", - "sha": "542bfbd2e6fb25ecd11b84d956764eb23233a034", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.625628460880606, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6678003253589365, - "normalized_score": 66.78003253589365 - }, - "bbh": { - "name": "BBH", - "value": 0.6344673727103446, - "normalized_score": 47.190897916289224 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3610271903323263, - "normalized_score": 36.102719033232624 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.47042708333333333, - "normalized_score": 18.80338541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.531998005319149, - "normalized_score": 47.99977836879433 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-24", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "CultriX/SeQwence-14Bv1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.660382222401487 - } - }, - { - "id": "CultriX/SeQwence-14Bv2_bfloat16_674c6d49b604fdf26e327e1e86c4fde0724b98e8_False", - "model": { - "name": "CultriX/SeQwence-14Bv2", - "sha": "674c6d49b604fdf26e327e1e86c4fde0724b98e8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.74007503457753, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5785992278266112, - "normalized_score": 57.859922782661116 - }, - "bbh": { - "name": "BBH", - "value": 0.6304512627108576, - "normalized_score": 46.52922387651957 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47583081570996977, - "normalized_score": 47.583081570996974 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36073825503355705, - "normalized_score": 14.76510067114094 - }, - "musr": { - "name": "MUSR", - "value": 0.4601041666666667, - "normalized_score": 17.546354166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5334109042553191, - "normalized_score": 48.1567671394799 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.9497873720789447 - } - }, - { - "id": "CultriX/SeQwence-14Bv3_bfloat16_b3f2b5273bbc996814a25aa9060fd6f4c0d93bca_False", - "model": { - "name": "CultriX/SeQwence-14Bv3", - "sha": "b3f2b5273bbc996814a25aa9060fd6f4c0d93bca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.665815540663296, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5719047682371663, - "normalized_score": 57.19047682371663 - }, - "bbh": { - "name": "BBH", - "value": 0.6302253848409948, - "normalized_score": 46.38536750450479 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47658610271903323, - "normalized_score": 47.65861027190332 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.4624270833333333, - "normalized_score": 17.270052083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5334940159574468, - "normalized_score": 48.16600177304965 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "CultriX/SeQwence-14Bv3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.9301492927461434 - } - }, - { - "id": "DRXD1000/Atlas-7B_bfloat16_967ee983e2a0b163c12da69f1f81aaf8ffb2a456_True", - "model": { - "name": "DRXD1000/Atlas-7B", - "sha": "967ee983e2a0b163c12da69f1f81aaf8ffb2a456", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.78657746420255, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3704459722425387, - "normalized_score": 37.04459722425387 - }, - "bbh": { - "name": "BBH", - "value": 0.3302176697760134, - "normalized_score": 7.540207541436388 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.33425, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14012632978723405, - "normalized_score": 4.4584810874704495 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-10", - "submission_date": "2024-12-10", - "generation": 0, - "base_model": "DRXD1000/Atlas-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.768, - "co2_cost": 2.513516751999117 - } - }, - { - "id": "DRXD1000/Phoenix-7B_bfloat16_a5caa8036d8b7819eb723debe3f037471b5c4882_True", - "model": { - "name": "DRXD1000/Phoenix-7B", - "sha": "a5caa8036d8b7819eb723debe3f037471b5c4882", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.420154178714037, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3209617149164218, - "normalized_score": 32.09617149164218 - }, - "bbh": { - "name": "BBH", - "value": 0.3931566034728218, - "normalized_score": 15.620179664331877 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.38494791666666667, - "normalized_score": 6.418489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23429188829787234, - "normalized_score": 14.921320921985814 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-10", - "submission_date": "2024-12-11", - "generation": 0, - "base_model": "DRXD1000/Phoenix-7B", - "hub_license": "apache-2.0", - "hub_hearts": 17, - "params_billions": 7.242, - "co2_cost": 0.9417448383956702 - } - }, - { - "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1_bfloat16_564d269c67dfcc5c07a4fbc270a6a48da1929d30_False", - "model": { - "name": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", - "sha": "564d269c67dfcc5c07a4fbc270a6a48da1929d30", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 15.492947659683166, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27562423259174545, - "normalized_score": 27.562423259174547 - }, - "bbh": { - "name": "BBH", - "value": 0.4472712447565954, - "normalized_score": 22.658642660096362 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.41734374999999996, - "normalized_score": 10.567968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31299867021276595, - "normalized_score": 23.66651891252955 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "DUAL-GPO/zephyr-7b-ipo-qlora-v0-merged", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.483, - "co2_cost": 1.9428467642746567 - } - }, - { - "id": "DZgas/GIGABATEMAN-7B_float16_edf2840350e7fd55895d9df560b489ac10ecb95e_False", - "model": { - "name": "DZgas/GIGABATEMAN-7B", - "sha": "edf2840350e7fd55895d9df560b489ac10ecb95e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.47146897335566, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46074637517342876, - "normalized_score": 46.07463751734288 - }, - "bbh": { - "name": "BBH", - "value": 0.5032184342862756, - "normalized_score": 29.827516654013994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.43284374999999997, - "normalized_score": 11.972135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3176529255319149, - "normalized_score": 24.183658392434985 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "DZgas/GIGABATEMAN-7B (Merge)", - "hub_license": "", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 1.2606746542398037 - } - }, - { - "id": "Daemontatox/AetherDrake-SFT_float16_17a0f90f0c06f2adc885faccd0a6172a7b996126_False", - "model": { - "name": "Daemontatox/AetherDrake-SFT", - "sha": "17a0f90f0c06f2adc885faccd0a6172a7b996126", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.917960736440758, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4812796712722244, - "normalized_score": 48.12796712722243 - }, - "bbh": { - "name": "BBH", - "value": 0.48720075507220245, - "normalized_score": 27.13925232163005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1510574018126888, - "normalized_score": 15.105740181268882 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.40884375, - "normalized_score": 9.972135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34990026595744683, - "normalized_score": 27.76669621749409 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "Daemontatox/AetherDrake-SFT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.1966938731945946 - } - }, - { - "id": "Daemontatox/AetherSett_bfloat16_d8d86c6dc1b693192931b02e39290eca331ae84e_False", - "model": { - "name": "Daemontatox/AetherSett", - "sha": "d8d86c6dc1b693192931b02e39290eca331ae84e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.420122512312307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5369586031729146, - "normalized_score": 53.695860317291455 - }, - "bbh": { - "name": "BBH", - "value": 0.5451624435465484, - "normalized_score": 34.7441456018993 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3972809667673716, - "normalized_score": 39.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.46031249999999996, - "normalized_score": 16.205729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4278590425531915, - "normalized_score": 36.42878250591017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 3, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.9646454629488488 - } - }, - { - "id": "Daemontatox/AetherTOT_float16_71d99f8fb69276422daae61222e57087000c05b0_False", - "model": { - "name": "Daemontatox/AetherTOT", - "sha": "71d99f8fb69276422daae61222e57087000c05b0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 23.178825097337906, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4397642699149368, - "normalized_score": 43.97642699149368 - }, - "bbh": { - "name": "BBH", - "value": 0.5066056342472064, - "normalized_score": 29.436391495634606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1487915407854985, - "normalized_score": 14.879154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4078541666666667, - "normalized_score": 9.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38040226063829785, - "normalized_score": 31.155806737588655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-28", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.67, - "co2_cost": 1.39784689077682 - } - }, - { - "id": "Daemontatox/AetherTOT_bfloat16_71d99f8fb69276422daae61222e57087000c05b0_False", - "model": { - "name": "Daemontatox/AetherTOT", - "sha": "71d99f8fb69276422daae61222e57087000c05b0", - "precision": "bfloat16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 22.874708418571885, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43829040279790954, - "normalized_score": 43.82904027979095 - }, - "bbh": { - "name": "BBH", - "value": 0.5034307630533988, - "normalized_score": 29.031857314174562 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14425981873111782, - "normalized_score": 14.425981873111782 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.40518750000000003, - "normalized_score": 9.248437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37782579787234044, - "normalized_score": 30.86953309692671 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-28", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.67, - "co2_cost": 0.708698329429445 - } - }, - { - "id": "Daemontatox/AetherUncensored_bfloat16_e498d645faab591062c6919a98b35656e2d0c783_False", - "model": { - "name": "Daemontatox/AetherUncensored", - "sha": "e498d645faab591062c6919a98b35656e2d0c783", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.374863917219894, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40419309653940433, - "normalized_score": 40.41930965394043 - }, - "bbh": { - "name": "BBH", - "value": 0.44631282805144945, - "normalized_score": 21.678618361970496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3746770833333333, - "normalized_score": 9.501302083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27102726063829785, - "normalized_score": 19.003028959810873 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4785060672358763 - } - }, - { - "id": "Daemontatox/Cogito-MIS_bfloat16_c1d59d3bc93d7ae4816800e37333f375e1debabf_True", - "model": { - "name": "Daemontatox/Cogito-MIS", - "sha": "c1d59d3bc93d7ae4816800e37333f375e1debabf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.081961864729385, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18145188100905596, - "normalized_score": 18.1451881009056 - }, - "bbh": { - "name": "BBH", - "value": 0.5059981143086196, - "normalized_score": 29.075970117521223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.37676041666666665, - "normalized_score": 4.928385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14353390957446807, - "normalized_score": 4.837101063829785 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.765363845157954 - } - }, - { - "id": "Daemontatox/CogitoDistil_bfloat16_f9a5302a0c4b464c44d79f745b8498ab51dd97de_True", - "model": { - "name": "Daemontatox/CogitoDistil", - "sha": "f9a5302a0c4b464c44d79f745b8498ab51dd97de", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.180474194347855, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27764775240805506, - "normalized_score": 27.764775240805506 - }, - "bbh": { - "name": "BBH", - "value": 0.36767660461416857, - "normalized_score": 11.94875947902338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39274924471299094, - "normalized_score": 39.274924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3754895833333333, - "normalized_score": 4.802864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2625498670212766, - "normalized_score": 18.061096335697396 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.629079371188348 - } - }, - { - "id": "Daemontatox/CogitoZ_bfloat16_7079c4e915e6f549df9f1c3fa3a3260f9a835f48_True", - "model": { - "name": "Daemontatox/CogitoZ", - "sha": "7079c4e915e6f549df9f1c3fa3a3260f9a835f48", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.383291042826805, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3967240255854466, - "normalized_score": 39.67240255854466 - }, - "bbh": { - "name": "BBH", - "value": 0.6734487392645502, - "normalized_score": 53.889571248021504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5241691842900302, - "normalized_score": 52.416918429003026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.4792604166666667, - "normalized_score": 19.940885416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5592586436170213, - "normalized_score": 51.028738179669034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "Daemontatox/CogitoZ (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 8.863382417868953 - } - }, - { - "id": "Daemontatox/CogitoZ14_bfloat16_df5320d7ff115f1e39e42506ed86a340eb2d12e0_True", - "model": { - "name": "Daemontatox/CogitoZ14", - "sha": "df5320d7ff115f1e39e42506ed86a340eb2d12e0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.383430079600444, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6637034180419066, - "normalized_score": 66.37034180419066 - }, - "bbh": { - "name": "BBH", - "value": 0.6297514788808327, - "normalized_score": 46.47935186892328 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42220543806646527, - "normalized_score": 42.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.405875, - "normalized_score": 9.067708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39993351063829785, - "normalized_score": 33.32594562647754 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 5.193449652876317 - } - }, - { - "id": "Daemontatox/DocumentCogito_bfloat16_23dcfc6bf91d84db1c977b151fd0923270d3e3ef_False", - "model": { - "name": "Daemontatox/DocumentCogito", - "sha": "23dcfc6bf91d84db1c977b151fd0923270d3e3ef", - "precision": "bfloat16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 24.220439046588428, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5064340394597445, - "normalized_score": 50.643403945974455 - }, - "bbh": { - "name": "BBH", - "value": 0.5111563719111275, - "normalized_score": 29.79360859597188 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.3973125, - "normalized_score": 8.597395833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38023603723404253, - "normalized_score": 31.13733747044917 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 10.67, - "co2_cost": 1.4133165870289686 - } - }, - { - "id": "Daemontatox/DocumentCogito_float16_9bdbfd8f330754c4103822ce180e0e3e3ce0973e_True", - "model": { - "name": "Daemontatox/DocumentCogito", - "sha": "9bdbfd8f330754c4103822ce180e0e3e3ce0973e", - "precision": "float16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 29.10815605844729, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7770349339751859, - "normalized_score": 77.70349339751859 - }, - "bbh": { - "name": "BBH", - "value": 0.5186726621665779, - "normalized_score": 31.184823044232704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.39105208333333336, - "normalized_score": 7.548177083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3737533244680851, - "normalized_score": 30.417036052009454 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-03-09", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 10.67, - "co2_cost": 0.7117566552956288 - } - }, - { - "id": "Daemontatox/Llama3.3-70B-CogniLink_bfloat16_69f134f69472a84d104d3ef0c0b1dd200b9a599d_True", - "model": { - "name": "Daemontatox/Llama3.3-70B-CogniLink", - "sha": "69f134f69472a84d104d3ef0c0b1dd200b9a599d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 42.77471354959223, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6931042965996888, - "normalized_score": 69.31042965996889 - }, - "bbh": { - "name": "BBH", - "value": 0.666832775829349, - "normalized_score": 52.12466257626164 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41389728096676737, - "normalized_score": 41.389728096676734 - }, - "gpqa": { - "name": "GPQA", - "value": 0.44546979865771813, - "normalized_score": 26.062639821029084 - }, - "musr": { - "name": "MUSR", - "value": 0.4876979166666667, - "normalized_score": 21.395572916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5172872340425532, - "normalized_score": 46.36524822695035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-03-02", - "generation": 1, - "base_model": "Daemontatox/Llama3.3-70B-CogniLink (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 32.37823644138559 - } - }, - { - "id": "Daemontatox/Llama_cot_float16_e0b1e5ec44b5dac34aa3bf99e0faf7c6c3f1390f_True", - "model": { - "name": "Daemontatox/Llama_cot", - "sha": "e0b1e5ec44b5dac34aa3bf99e0faf7c6c3f1390f", - "precision": "float16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 27.115741618463783, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7548781677061308, - "normalized_score": 75.48781677061308 - }, - "bbh": { - "name": "BBH", - "value": 0.4838374335391873, - "normalized_score": 26.866582717626454 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3872395833333333, - "normalized_score": 6.638281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.351811835106383, - "normalized_score": 27.979092789598102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.67, - "co2_cost": 0.7507026467642424 - } - }, - { - "id": "Daemontatox/MawaredT1_bfloat16_84a1d35d91b862a5cfc65988d4a0f65033b34c47_False", - "model": { - "name": "Daemontatox/MawaredT1", - "sha": "84a1d35d91b862a5cfc65988d4a0f65033b34c47", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.231298221572775, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41988036188424493, - "normalized_score": 41.98803618842449 - }, - "bbh": { - "name": "BBH", - "value": 0.5214815439293661, - "normalized_score": 31.900788297617016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3021148036253776, - "normalized_score": 30.211480362537763 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.47020833333333334, - "normalized_score": 18.676041666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4718251329787234, - "normalized_score": 41.3139036643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "arcee-ai/Meraj-Mini (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.276958262253007 - } - }, - { - "id": "Daemontatox/Mini_QwQ_bfloat16_e96df7ba6e989ee286da5d0b05a84525fdb56c53_False", - "model": { - "name": "Daemontatox/Mini_QwQ", - "sha": "e96df7ba6e989ee286da5d0b05a84525fdb56c53", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.832499483820992, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44970566984490046, - "normalized_score": 44.97056698449004 - }, - "bbh": { - "name": "BBH", - "value": 0.554898906584336, - "normalized_score": 36.210284793752635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41918429003021146, - "normalized_score": 41.918429003021146 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.46825, - "normalized_score": 17.264583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.437250664893617, - "normalized_score": 37.47229609929077 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.317404163373526 - } - }, - { - "id": "Daemontatox/NemoR_bfloat16_688f1a4c3c69fe9c6440cad7919ab602ae61fa39_False", - "model": { - "name": "Daemontatox/NemoR", - "sha": "688f1a4c3c69fe9c6440cad7919ab602ae61fa39", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.07399800099431, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2287375275380435, - "normalized_score": 22.873752753804347 - }, - "bbh": { - "name": "BBH", - "value": 0.5194067688446361, - "normalized_score": 31.605520137148783 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.39080208333333327, - "normalized_score": 9.916927083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32903922872340424, - "normalized_score": 25.44880319148936 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 2.261325280400474 - } - }, - { - "id": "Daemontatox/PathFinderAI2.0_bfloat16_bf8cfd82d4ceceb133058a78e1fe48436b50568a_True", - "model": { - "name": "Daemontatox/PathFinderAI2.0", - "sha": "bf8cfd82d4ceceb133058a78e1fe48436b50568a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.25665231066442, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45410178326839457, - "normalized_score": 45.41017832683946 - }, - "bbh": { - "name": "BBH", - "value": 0.665823006477417, - "normalized_score": 52.95651298557802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5075528700906344, - "normalized_score": 50.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4215625, - "normalized_score": 10.961979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5546875, - "normalized_score": 50.520833333333336 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2025-01-21", - "generation": 4, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 14.003082210470806 - } - }, - { - "id": "Daemontatox/PathFinderAi3.0_bfloat16_6c9aa17cee032523ce17de111d6865e33825cf1d_True", - "model": { - "name": "Daemontatox/PathFinderAi3.0", - "sha": "6c9aa17cee032523ce17de111d6865e33825cf1d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.45869427281298, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42709898624538445, - "normalized_score": 42.70989862453845 - }, - "bbh": { - "name": "BBH", - "value": 0.6884221416328996, - "normalized_score": 55.53835541644173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5045317220543807, - "normalized_score": 50.453172205438065 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4085570469798658, - "normalized_score": 21.140939597315437 - }, - "musr": { - "name": "MUSR", - "value": 0.4806875, - "normalized_score": 20.05260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5757147606382979, - "normalized_score": 52.85719562647754 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "Daemontatox/PathFinderAI3.0", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 32.764, - "co2_cost": 8.094724327094367 - } - }, - { - "id": "Daemontatox/PathfinderAI_float16_14c6a91351006b7be0aff85292733470ff1b546d_False", - "model": { - "name": "Daemontatox/PathfinderAI", - "sha": "14c6a91351006b7be0aff85292733470ff1b546d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.13131352504058, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37451739163198094, - "normalized_score": 37.45173916319809 - }, - "bbh": { - "name": "BBH", - "value": 0.6667854331232542, - "normalized_score": 52.64654733435329 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47583081570996977, - "normalized_score": 47.583081570996974 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.48583333333333334, - "normalized_score": 20.829166666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.559341755319149, - "normalized_score": 51.037972813238774 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "Daemontatox/PathfinderAI (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 4.540918227975272 - } - }, - { - "id": "Daemontatox/PathfinderAI_bfloat16_7271fc7d08fca9b12c49b40af6245a982273a5c3_True", - "model": { - "name": "Daemontatox/PathfinderAI", - "sha": "7271fc7d08fca9b12c49b40af6245a982273a5c3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.54876805856116, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4855006937148987, - "normalized_score": 48.55006937148987 - }, - "bbh": { - "name": "BBH", - "value": 0.6627335380624046, - "normalized_score": 52.32216296509384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48413897280966767, - "normalized_score": 48.413897280966765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.42559375, - "normalized_score": 11.59921875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.554188829787234, - "normalized_score": 50.465425531914896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "Daemontatox/PathfinderAI (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 9.451440999099955 - } - }, - { - "id": "Daemontatox/Phi-4-COT_bfloat16_bfc745d1a347b74843671eb50687c2e88c07ec7d_False", - "model": { - "name": "Daemontatox/Phi-4-COT", - "sha": "bfc745d1a347b74843671eb50687c2e88c07ec7d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.128818323637788, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17930313789633728, - "normalized_score": 17.93031378963373 - }, - "bbh": { - "name": "BBH", - "value": 0.6172933868833469, - "normalized_score": 45.34299043065811 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.453, - "normalized_score": 15.158333333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.500498670212766, - "normalized_score": 44.49985224586289 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.715153253727952 - } - }, - { - "id": "Daemontatox/PixelParse_AI_bfloat16_cc94604b91fc38513ca61f11dd9e1de1c3cc3b3d_False", - "model": { - "name": "Daemontatox/PixelParse_AI", - "sha": "cc94604b91fc38513ca61f11dd9e1de1c3cc3b3d", - "precision": "bfloat16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 22.92506088584278, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43829040279790954, - "normalized_score": 43.82904027979095 - }, - "bbh": { - "name": "BBH", - "value": 0.5034307630533988, - "normalized_score": 29.031857314174562 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1472809667673716, - "normalized_score": 14.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.40518750000000003, - "normalized_score": 9.248437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37782579787234044, - "normalized_score": 30.86953309692671 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-29", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.67, - "co2_cost": 1.4002193288085218 - } - }, - { - "id": "Daemontatox/RA2.0_bfloat16_e1505dd5f9f2c8549cc852a1aca3ec545638e813_False", - "model": { - "name": "Daemontatox/RA2.0", - "sha": "e1505dd5f9f2c8549cc852a1aca3ec545638e813", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.232562711526068, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37838934028378035, - "normalized_score": 37.838934028378034 - }, - "bbh": { - "name": "BBH", - "value": 0.4888687006782508, - "normalized_score": 28.471838145021334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38368580060422963, - "normalized_score": 38.368580060422964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.40912499999999996, - "normalized_score": 9.373958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26163563829787234, - "normalized_score": 17.95951536643026 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.327376435124672 - } - }, - { - "id": "Daemontatox/RA_Reasoner_float16_e799c6877cb70b6e78c1e337eaa58383040c8fa9_False", - "model": { - "name": "Daemontatox/RA_Reasoner", - "sha": "e799c6877cb70b6e78c1e337eaa58383040c8fa9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.208002590797037, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.559215104810791, - "normalized_score": 55.921510481079096 - }, - "bbh": { - "name": "BBH", - "value": 0.6053692417205033, - "normalized_score": 43.07300777347365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.3963541666666666, - "normalized_score": 7.510937500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43001994680851063, - "normalized_score": 36.6688829787234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-25", - "generation": 2, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 10.306, - "co2_cost": 1.5581467334671502 - } - }, - { - "id": "Daemontatox/RA_Reasoner2.0_float16_2a7477f34b171d2ae090e57abdbd997546dee242_False", - "model": { - "name": "Daemontatox/RA_Reasoner2.0", - "sha": "2a7477f34b171d2ae090e57abdbd997546dee242", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.039667218867848, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5366339091388627, - "normalized_score": 53.663390913886275 - }, - "bbh": { - "name": "BBH", - "value": 0.6062469551969276, - "normalized_score": 43.07006895743249 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2311178247734139, - "normalized_score": 23.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.3883541666666667, - "normalized_score": 7.177604166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4353390957446808, - "normalized_score": 37.259899527186754 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 3, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 1.573512786211919 - } - }, - { - "id": "Daemontatox/ReasonTest_bfloat16_8e81cfddd97a13d81d6207eb72be8b730a7ca12f_False", - "model": { - "name": "Daemontatox/ReasonTest", - "sha": "8e81cfddd97a13d81d6207eb72be8b730a7ca12f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.858232794071, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4079653098223824, - "normalized_score": 40.79653098223824 - }, - "bbh": { - "name": "BBH", - "value": 0.543526397621609, - "normalized_score": 35.37503681384339 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21374622356495468, - "normalized_score": 21.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.43154166666666666, - "normalized_score": 12.076041666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4271941489361702, - "normalized_score": 36.354905437352244 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.808, - "co2_cost": 1.3406293497661246 - } - }, - { - "id": "Daemontatox/Research_PathfinderAI_bfloat16_eae32cc9dffa3a2493fd793f7b847e7bb3376853_True", - "model": { - "name": "Daemontatox/Research_PathfinderAI", - "sha": "eae32cc9dffa3a2493fd793f7b847e7bb3376853", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.365878628802648, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3456916537010687, - "normalized_score": 34.56916537010687 - }, - "bbh": { - "name": "BBH", - "value": 0.287225755504323, - "normalized_score": 1.4263456550462994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16993957703927492, - "normalized_score": 16.993957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33939583333333334, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11303191489361702, - "normalized_score": 1.4479905437352243 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 0.6188413544397473 - } - }, - { - "id": "Daemontatox/SphinX_bfloat16_3da400d648b198211c81f61421bdcefac8073506_False", - "model": { - "name": "Daemontatox/SphinX", - "sha": "3da400d648b198211c81f61421bdcefac8073506", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.87478015820759, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5725042886208593, - "normalized_score": 57.25042886208593 - }, - "bbh": { - "name": "BBH", - "value": 0.5440583486084486, - "normalized_score": 34.71245082713039 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3081570996978852, - "normalized_score": 30.815709969788518 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.44049999999999995, - "normalized_score": 12.695833333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43658577127659576, - "normalized_score": 37.39841903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "Daemontatox/SphinX (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3043167149586126 - } - }, - { - "id": "Daemontatox/Sphinx2.0_bfloat16_16abdfe2c214dc1da6bfe654b3d6716fcc8450e2_True", - "model": { - "name": "Daemontatox/Sphinx2.0", - "sha": "16abdfe2c214dc1da6bfe654b3d6716fcc8450e2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.69418546772203, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7123133286346892, - "normalized_score": 71.23133286346894 - }, - "bbh": { - "name": "BBH", - "value": 0.647283976671531, - "normalized_score": 49.39675153748764 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40181268882175225, - "normalized_score": 40.181268882175225 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 13.053906249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5183676861702128, - "normalized_score": 46.485298463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "Daemontatox/Sphinx2.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.592650364309399 - } - }, - { - "id": "Daemontatox/TinySphinx_float16_62172ccb670864070581498fb12e7d2594ac3a77_False", - "model": { - "name": "Daemontatox/TinySphinx", - "sha": "62172ccb670864070581498fb12e7d2594ac3a77", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.167167127412911, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2566900269063862, - "normalized_score": 25.669002690638614 - }, - "bbh": { - "name": "BBH", - "value": 0.33098404240871354, - "normalized_score": 6.54657571552213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.33276041666666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1697972074468085, - "normalized_score": 7.7552452718676115 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.247, - "co2_cost": 1.0072558813543913 - } - }, - { - "id": "Daemontatox/TinySphinx2.0_bfloat16_accc28aa00084fe89801baa0885c291d18a031ec_False", - "model": { - "name": "Daemontatox/TinySphinx2.0", - "sha": "accc28aa00084fe89801baa0885c291d18a031ec", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.583926718324835, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25351733400710114, - "normalized_score": 25.351733400710117 - }, - "bbh": { - "name": "BBH", - "value": 0.3168407073661037, - "normalized_score": 5.004028710418718 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.33825, - "normalized_score": 1.3145833333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1731216755319149, - "normalized_score": 8.12463061465721 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.247, - "co2_cost": 1.0041719918661665 - } - }, - { - "id": "Daemontatox/Zirel-7B-Math_bfloat16_104d5e9f5df50c0782ff1a830f7ec3c4943210f3_True", - "model": { - "name": "Daemontatox/Zirel-7B-Math", - "sha": "104d5e9f5df50c0782ff1a830f7ec3c4943210f3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.976625000031117, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6638785090227264, - "normalized_score": 66.38785090227265 - }, - "bbh": { - "name": "BBH", - "value": 0.5447698777469486, - "normalized_score": 34.93944104913604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.47891666666666666, - "normalized_score": 18.597916666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4237034574468085, - "normalized_score": 35.96705082742317 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 3, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.538753219597133 - } - }, - { - "id": "Daemontatox/Zirel_1.5_bfloat16_53af159f98d8b428e719287f759500f95b601ee2_True", - "model": { - "name": "Daemontatox/Zirel_1.5", - "sha": "53af159f98d8b428e719287f759500f95b601ee2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.243506396749902, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4167575366693706, - "normalized_score": 41.67575366693706 - }, - "bbh": { - "name": "BBH", - "value": 0.3984669254999634, - "normalized_score": 15.082126333167833 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.36581250000000004, - "normalized_score": 3.3265624999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21434507978723405, - "normalized_score": 12.705008865248226 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 3, - "base_model": "Qwen/Qwen2.5-Coder-1.5B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 0.5795306020738551 - } - }, - { - "id": "Daemontatox/mini-Cogito-R1_bfloat16_7d86cfe7522a080853a6c25f7115fa5106c9d671_False", - "model": { - "name": "Daemontatox/mini-Cogito-R1", - "sha": "7d86cfe7522a080853a6c25f7115fa5106c9d671", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.629717650498435, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2298368329366082, - "normalized_score": 22.983683293660818 - }, - "bbh": { - "name": "BBH", - "value": 0.3280491875175077, - "normalized_score": 6.038995128638223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27492447129909364, - "normalized_score": 27.492447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.34469791666666666, - "normalized_score": 2.987239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14818816489361702, - "normalized_score": 5.354240543735224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Daemontatox/mini-Cogito-R1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 1.777, - "co2_cost": 0.6109876520487433 - } - }, - { - "id": "Daemontatox/mini_Pathfinder_bfloat16_20d12c01e831675a563c978900bcf291def5f7dd_True", - "model": { - "name": "Daemontatox/mini_Pathfinder", - "sha": "20d12c01e831675a563c978900bcf291def5f7dd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.872595162464073, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29615752869054107, - "normalized_score": 29.615752869054106 - }, - "bbh": { - "name": "BBH", - "value": 0.39556911910803755, - "normalized_score": 16.03002789836758 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47507552870090636, - "normalized_score": 47.507552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.37809374999999995, - "normalized_score": 4.8617187500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28091755319148937, - "normalized_score": 20.10195035460993 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.5482629089653008 - } - }, - { - "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct_bfloat16_46662d14130cfd34f7d90816540794f24a301f86_True", - "model": { - "name": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", - "sha": "46662d14130cfd34f7d90816540794f24a301f86", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.159277021889153, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8081091503876381, - "normalized_score": 80.81091503876381 - }, - "bbh": { - "name": "BBH", - "value": 0.5257532452246574, - "normalized_score": 32.49458680420566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.40032291666666664, - "normalized_score": 8.607031250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.382563164893617, - "normalized_score": 31.395907210401887 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-08-26", - "generation": 1, - "base_model": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.6729573304154113 - } - }, - { - "id": "Danielbrdz/Barcenas-10b_float16_71884e96b88f6c86fca3a528ddf71c7745cb1d76_False", - "model": { - "name": "Danielbrdz/Barcenas-10b", - "sha": "71884e96b88f6c86fca3a528ddf71c7745cb1d76", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.870970597687858, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6607811717354397, - "normalized_score": 66.07811717354397 - }, - "bbh": { - "name": "BBH", - "value": 0.6120828494270083, - "normalized_score": 43.769694605609374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.41346875, - "normalized_score": 10.316927083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4360871010638298, - "normalized_score": 37.34301122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-06", - "generation": 1, - "base_model": "Danielbrdz/Barcenas-10b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 10.306, - "co2_cost": 1.6193104184932727 - } - }, - { - "id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO_float16_b749dbcb19901b8fd0e9f38c923a24533569f895_True", - "model": { - "name": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", - "sha": "b749dbcb19901b8fd0e9f38c923a24533569f895", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 31.88950498581046, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4799055395240185, - "normalized_score": 47.990553952401854 - }, - "bbh": { - "name": "BBH", - "value": 0.6536184886648629, - "normalized_score": 51.029418403280296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.48075, - "normalized_score": 20.527083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47232380319148937, - "normalized_score": 41.36931146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-15", - "submission_date": "2024-08-13", - "generation": 0, - "base_model": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", - "hub_license": "mit", - "hub_hearts": 5, - "params_billions": 13.96, - "co2_cost": 2.3541541178541143 - } - }, - { - "id": "Danielbrdz/Barcenas-14b-phi-4_float16_53891d973087e8909e1c9cc968b7bf222247e2ab_False", - "model": { - "name": "Danielbrdz/Barcenas-14b-phi-4", - "sha": "53891d973087e8909e1c9cc968b7bf222247e2ab", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.746056207730017, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0497590836757581, - "normalized_score": 4.97590836757581 - }, - "bbh": { - "name": "BBH", - "value": 0.6769303819643072, - "normalized_score": 53.25769202541582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2583081570996979, - "normalized_score": 25.83081570996979 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.5096770833333334, - "normalized_score": 24.24296875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5174534574468085, - "normalized_score": 46.38371749408983 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "Danielbrdz/Barcenas-14b-phi-4 (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.7478531452063368 - } - }, - { - "id": "Danielbrdz/Barcenas-14b-phi-4-v2_float16_b602beb38b9a82ac497e6689751927eca9dbd876_False", - "model": { - "name": "Danielbrdz/Barcenas-14b-phi-4-v2", - "sha": "b602beb38b9a82ac497e6689751927eca9dbd876", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.447866315089907, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27747266142723526, - "normalized_score": 27.74726614272353 - }, - "bbh": { - "name": "BBH", - "value": 0.6573002324945257, - "normalized_score": 50.20692953827006 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3217522658610272, - "normalized_score": 32.17522658610272 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.43994791666666666, - "normalized_score": 14.293489583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5243517287234043, - "normalized_score": 47.150192080378254 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Danielbrdz/Barcenas-14b-phi-4-v2 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9885653626381716 - } - }, - { - "id": "Danielbrdz/Barcenas-3b-GRPO_float16_643e7615446a20d9ffe7cb66b88a6791cc6ae1eb_False", - "model": { - "name": "Danielbrdz/Barcenas-3b-GRPO", - "sha": "643e7615446a20d9ffe7cb66b88a6791cc6ae1eb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.5654768779582, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5444276741268723, - "normalized_score": 54.44276741268723 - }, - "bbh": { - "name": "BBH", - "value": 0.44143515175110304, - "normalized_score": 21.13661740394343 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13746223564954682, - "normalized_score": 13.746223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.35759375, - "normalized_score": 6.065885416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3036901595744681, - "normalized_score": 22.632239952718678 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "Danielbrdz/Barcenas-3b-GRPO (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.6176673631419419 - } - }, - { - "id": "Danielbrdz/Barcenas-Llama3-8b-ORPO_float16_66c848c4526d3db1ec41468c0f73ac4448c6abe9_True", - "model": { - "name": "Danielbrdz/Barcenas-Llama3-8b-ORPO", - "sha": "66c848c4526d3db1ec41468c0f73ac4448c6abe9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.51900505359198, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.737242738156979, - "normalized_score": 73.7242738156979 - }, - "bbh": { - "name": "BBH", - "value": 0.49865578559911244, - "normalized_score": 28.600623499981847 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4189583333333333, - "normalized_score": 11.169791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3829787234042553, - "normalized_score": 31.44208037825059 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-06-29", - "generation": 0, - "base_model": "Danielbrdz/Barcenas-Llama3-8b-ORPO", - "hub_license": "other", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.5483183713957436 - } - }, - { - "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b_float16_10e2f6bd3bb254f7e4e6857ab2799aaa9c855876_False", - "model": { - "name": "Danielbrdz/Barcenas-R1-Qwen-1.5b", - "sha": "10e2f6bd3bb254f7e4e6857ab2799aaa9c855876", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.138858570183295, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24280132271262472, - "normalized_score": 24.280132271262474 - }, - "bbh": { - "name": "BBH", - "value": 0.35872011187392944, - "normalized_score": 10.491260075913582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3496978851963746, - "normalized_score": 34.96978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.354125, - "normalized_score": 3.832291666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19090757978723405, - "normalized_score": 10.10084219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "Danielbrdz/Barcenas-R1-Qwen-1.5b (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.2225069482677418 - } - }, - { - "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2_bfloat16_d573ad0cdb0ccfc194bc9c65dd81912dffeb1d35_True", - "model": { - "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", - "sha": "d573ad0cdb0ccfc194bc9c65dd81912dffeb1d35", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.564777531713789, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3710953603106424, - "normalized_score": 37.109536031064245 - }, - "bbh": { - "name": "BBH", - "value": 0.48070333147041405, - "normalized_score": 26.108938446170427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.37021875, - "normalized_score": 3.944010416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2507480053191489, - "normalized_score": 16.749778368794324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.9447289025847284 - } - }, - { - "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3_bfloat16_e64145422fe367c1d3cbf8403cdc9cd2c6ccd5ca_True", - "model": { - "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", - "sha": "e64145422fe367c1d3cbf8403cdc9cd2c6ccd5ca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.13127211265294, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5052593784491815, - "normalized_score": 50.52593784491815 - }, - "bbh": { - "name": "BBH", - "value": 0.48388753289945696, - "normalized_score": 25.84864126816548 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.4167604166666667, - "normalized_score": 10.995052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2515791223404255, - "normalized_score": 16.842124704491724 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.8696611878200479 - } - }, - { - "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML_bfloat16_56925fafe6a543e224db36864dd0927171542776_False", - "model": { - "name": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", - "sha": "56925fafe6a543e224db36864dd0927171542776", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.542857676063695, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21110209798889168, - "normalized_score": 21.11020979888917 - }, - "bbh": { - "name": "BBH", - "value": 0.4791864789096407, - "normalized_score": 26.046417064819565 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3606354166666667, - "normalized_score": 5.712760416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2805019946808511, - "normalized_score": 20.05577718676123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 4.636903883938605 - } - }, - { - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML_bfloat16_029d84d4f4a618aa798490c046753b12801158e2_False", - "model": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", - "sha": "029d84d4f4a618aa798490c046753b12801158e2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.521356471467334, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08250774611364513, - "normalized_score": 8.250774611364513 - }, - "bbh": { - "name": "BBH", - "value": 0.4738171816307924, - "normalized_score": 26.336393544053255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3918229166666667, - "normalized_score": 9.677864583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32878989361702127, - "normalized_score": 25.42109929078014 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5971383963665837 - } - }, - { - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0_bfloat16_9367c1273b0025793531fcf3a2c15416539f5d81_False", - "model": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", - "sha": "9367c1273b0025793531fcf3a2c15416539f5d81", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.074906766382023, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.06682048076880455, - "normalized_score": 6.682048076880455 - }, - "bbh": { - "name": "BBH", - "value": 0.47747656219777285, - "normalized_score": 26.737651950701505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.3785833333333333, - "normalized_score": 8.122916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.328374335106383, - "normalized_score": 25.37492612293144 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6293978013302186 - } - }, - { - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1_bfloat16_a6188cd1807d0d72e55adc371ddd198d7e9aa7ae_False", - "model": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", - "sha": "a6188cd1807d0d72e55adc371ddd198d7e9aa7ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.349347006636378, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09105063453857985, - "normalized_score": 9.105063453857985 - }, - "bbh": { - "name": "BBH", - "value": 0.4748653313732898, - "normalized_score": 26.412550636134668 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3824895833333333, - "normalized_score": 7.811197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.327875664893617, - "normalized_score": 25.319518321513 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5811774914426833 - } - }, - { - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0_bfloat16_15a9988381fdba15281f1bd6b04c34f3f96120cc_True", - "model": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", - "sha": "15a9988381fdba15281f1bd6b04c34f3f96120cc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.08185593143887, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5064085515321569, - "normalized_score": 50.64085515321569 - }, - "bbh": { - "name": "BBH", - "value": 0.4624263551503409, - "normalized_score": 24.734770612245033 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.3644479166666667, - "normalized_score": 3.755989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2999501329787234, - "normalized_score": 22.21668144208038 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6874331691059232 - } - }, - { - "id": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7_bfloat16_e91ad0ada3f0d906bacd3c0ad41da4f65ce77b08_True", - "model": { - "name": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", - "sha": "e91ad0ada3f0d906bacd3c0ad41da4f65ce77b08", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.169864255303853, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5123538876846767, - "normalized_score": 51.235388768467665 - }, - "bbh": { - "name": "BBH", - "value": 0.4750220653053363, - "normalized_score": 26.820762256757714 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40051041666666665, - "normalized_score": 8.030468749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2744348404255319, - "normalized_score": 19.38164893617021 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 0.875771180221206 - } - }, - { - "id": "Dans-DiscountModels/mistral-7b-test-merged_bfloat16_9db677cc43fb88852d952ef5914e919e65dd03eb_True", - "model": { - "name": "Dans-DiscountModels/mistral-7b-test-merged", - "sha": "9db677cc43fb88852d952ef5914e919e65dd03eb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.073339331094914, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6678003253589365, - "normalized_score": 66.78003253589365 - }, - "bbh": { - "name": "BBH", - "value": 0.48981661658184755, - "normalized_score": 28.941004994824766 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3753958333333333, - "normalized_score": 4.3578125000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29778922872340424, - "normalized_score": 21.976580969267136 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "Dans-DiscountModels/mistral-7b-test-merged (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 2.336846263016353 - } - }, - { - "id": "Darkknight535/OpenCrystal-12B-L3_bfloat16_974d2d453afdde40f6a993601bbbbf9d97b43606_False", - "model": { - "name": "Darkknight535/OpenCrystal-12B-L3", - "sha": "974d2d453afdde40f6a993601bbbbf9d97b43606", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.685476383122634, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4070909630890482, - "normalized_score": 40.70909630890482 - }, - "bbh": { - "name": "BBH", - "value": 0.5222598504945516, - "normalized_score": 31.84449091545611 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.36565625, - "normalized_score": 5.740364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3640292553191489, - "normalized_score": 29.336583924349874 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-25", - "submission_date": "2024-08-26", - "generation": 0, - "base_model": "Darkknight535/OpenCrystal-12B-L3", - "hub_license": "", - "hub_hearts": 15, - "params_billions": 11.52, - "co2_cost": 4.024570033760275 - } - }, - { - "id": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm_bfloat16_e32bfdb8f5ac6f0fb644a1fcf91b0b82cadba260_False", - "model": { - "name": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", - "sha": "e32bfdb8f5ac6f0fb644a1fcf91b0b82cadba260", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.4525815482333, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31356799957446246, - "normalized_score": 31.356799957446242 - }, - "bbh": { - "name": "BBH", - "value": 0.4762231983114653, - "normalized_score": 24.9087539518445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.39278125, - "normalized_score": 10.830989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3208942819148936, - "normalized_score": 24.543809101654844 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 16.537, - "co2_cost": 2.5248284998228248 - } - }, - { - "id": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B_bfloat16_17c6339702cda2eb3feb08aec58b8e681ac4e678_False", - "model": { - "name": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", - "sha": "17c6339702cda2eb3feb08aec58b8e681ac4e678", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.075555358413183, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36849780803822746, - "normalized_score": 36.849780803822746 - }, - "bbh": { - "name": "BBH", - "value": 0.488693862545088, - "normalized_score": 27.616644147779652 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.43197916666666664, - "normalized_score": 12.397395833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2976230053191489, - "normalized_score": 21.95811170212766 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.717378353005959 - } - }, - { - "id": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B_bfloat16_ffc26e5c5ffbf42976e5bdc13ea858127eb96cf7_False", - "model": { - "name": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", - "sha": "ffc26e5c5ffbf42976e5bdc13ea858127eb96cf7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.762746148661485, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2506948230694557, - "normalized_score": 25.06948230694557 - }, - "bbh": { - "name": "BBH", - "value": 0.44878062698346727, - "normalized_score": 22.777139438535443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.41644791666666664, - "normalized_score": 10.289322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2709441489361702, - "normalized_score": 18.99379432624113 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 15.664, - "co2_cost": 2.6307445591975105 - } - }, - { - "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B_bfloat16_95a95ccc16bb0d1c36a78e2bdf68bc60148608a3_False", - "model": { - "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", - "sha": "95a95ccc16bb0d1c36a78e2bdf68bc60148608a3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.033732738998072, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3882564927725103, - "normalized_score": 38.82564927725103 - }, - "bbh": { - "name": "BBH", - "value": 0.48860331670972784, - "normalized_score": 27.773550052185215 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4375, - "normalized_score": 13.220833333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30244348404255317, - "normalized_score": 22.49372044917257 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 24.942, - "co2_cost": 2.3816831290269125 - } - }, - { - "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B_bfloat16_edd481ba969388f951af26d4a256538d02355342_False", - "model": { - "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", - "sha": "edd481ba969388f951af26d4a256538d02355342", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 18.80531524670597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3436182662003484, - "normalized_score": 34.36182662003484 - }, - "bbh": { - "name": "BBH", - "value": 0.47693843531787744, - "normalized_score": 25.61443384964336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.4230833333333333, - "normalized_score": 11.78541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29695811170212766, - "normalized_score": 21.884234633569736 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 24.942, - "co2_cost": 2.6746962812464536 - } - }, - { - "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm_bfloat16_b96213180934664665855bae599d2a4c2023b68a_False", - "model": { - "name": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", - "sha": "b96213180934664665855bae599d2a4c2023b68a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.28187768500508, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34159474638403875, - "normalized_score": 34.15947463840388 - }, - "bbh": { - "name": "BBH", - "value": 0.580689592371853, - "normalized_score": 38.54849081746308 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5536253776435045, - "normalized_score": 55.36253776435045 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.5155104166666666, - "normalized_score": 25.238802083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4623503989361702, - "normalized_score": 40.26115543735224 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 25.506, - "co2_cost": 5.562346505649761 - } - }, - { - "id": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B_bfloat16_0b1829e4631ff716278c81dac4ed1cab655a3505_False", - "model": { - "name": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", - "sha": "0b1829e4631ff716278c81dac4ed1cab655a3505", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.169195541253531, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2853162940996556, - "normalized_score": 28.531629409965557 - }, - "bbh": { - "name": "BBH", - "value": 0.44623832540838126, - "normalized_score": 22.878424102179434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.417875, - "normalized_score": 10.734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2777593085106383, - "normalized_score": 19.751034278959807 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 16.537, - "co2_cost": 2.7914034140852118 - } - }, - { - "id": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B_bfloat16_39b96b8ceca904a96a5e3e524c2a9513f1850bdd_False", - "model": { - "name": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", - "sha": "39b96b8ceca904a96a5e3e524c2a9513f1850bdd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 16.128174383829172, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3793135547015253, - "normalized_score": 37.931355470152525 - }, - "bbh": { - "name": "BBH", - "value": 0.4232300476265338, - "normalized_score": 18.810857392916514 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3559791666666667, - "normalized_score": 6.197395833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2720246010638298, - "normalized_score": 19.113844562647756 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 18.405, - "co2_cost": 3.5240369570496637 - } - }, - { - "id": "DavidAU/Gemma-The-Writer-9B_bfloat16_fcd6c9a1d0f6acc5bffc7df72cd8e996a9573937_True", - "model": { - "name": "DavidAU/Gemma-The-Writer-9B", - "sha": "fcd6c9a1d0f6acc5bffc7df72cd8e996a9573937", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.57119536629256, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17403156956874427, - "normalized_score": 17.403156956874426 - }, - "bbh": { - "name": "BBH", - "value": 0.5905439384199537, - "normalized_score": 41.272318662592205 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.409875, - "normalized_score": 10.134375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39793882978723405, - "normalized_score": 33.10431442080379 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/Gemma-The-Writer-9B (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 10.159, - "co2_cost": 3.968036805179416 - } - }, - { - "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B_bfloat16_69f38a595090ce6ba154b21d9d8b4c690f02b74e_True", - "model": { - "name": "DavidAU/Gemma-The-Writer-DEADLINE-10B", - "sha": "69f38a595090ce6ba154b21d9d8b4c690f02b74e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.6766411356942, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23315802071836061, - "normalized_score": 23.31580207183606 - }, - "bbh": { - "name": "BBH", - "value": 0.5896087932535433, - "normalized_score": 41.01919903629973 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.4188645833333333, - "normalized_score": 10.79140625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39461436170212766, - "normalized_score": 32.73492907801418 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/Gemma-The-Writer-DEADLINE-10B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.952, - "co2_cost": 5.197589797990433 - } - }, - { - "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B_bfloat16_7318b14104e3eb06c8e571ec8a51c7f027834d74_True", - "model": { - "name": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", - "sha": "7318b14104e3eb06c8e571ec8a51c7f027834d74", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.350742726448782, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28578948301617485, - "normalized_score": 28.578948301617487 - }, - "bbh": { - "name": "BBH", - "value": 0.5909421265868766, - "normalized_score": 41.155990975297556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.41759375, - "normalized_score": 10.665885416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3946974734042553, - "normalized_score": 32.74416371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.034, - "co2_cost": 5.037345351027912 - } - }, - { - "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B_float16_39e655b61e11cd9a53529c6bdf0e6357b5be6b2c_True", - "model": { - "name": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", - "sha": "39e655b61e11cd9a53529c6bdf0e6357b5be6b2c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.033823768599724, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527549125209998, - "normalized_score": 75.27549125209998 - }, - "bbh": { - "name": "BBH", - "value": 0.5911959785635329, - "normalized_score": 41.392610073884825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19108761329305135, - "normalized_score": 19.108761329305135 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34815436241610737, - "normalized_score": 13.087248322147648 - }, - "musr": { - "name": "MUSR", - "value": 0.4111770833333333, - "normalized_score": 10.363802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39677526595744683, - "normalized_score": 32.97502955082743 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 2.842982875986834 - } - }, - { - "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored_float16_1138d6b3e3527b75e7331044b1f0589a90667e8d_True", - "model": { - "name": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", - "sha": "1138d6b3e3527b75e7331044b1f0589a90667e8d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 31.67934579653929, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7070927361622716, - "normalized_score": 70.70927361622716 - }, - "bbh": { - "name": "BBH", - "value": 0.5922294775018883, - "normalized_score": 40.850090817774024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.229607250755287, - "normalized_score": 22.9607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.41632291666666665, - "normalized_score": 10.407031250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3966090425531915, - "normalized_score": 32.95656028368795 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.034, - "co2_cost": 3.4933973013928696 - } - }, - { - "id": "DavidAU/L3-DARKEST-PLANET-16.5B_bfloat16_37545fbc229061956c1801968c33c5b187512c41_True", - "model": { - "name": "DavidAU/L3-DARKEST-PLANET-16.5B", - "sha": "37545fbc229061956c1801968c33c5b187512c41", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.265056044870743, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6230623634179533, - "normalized_score": 62.30623634179532 - }, - "bbh": { - "name": "BBH", - "value": 0.5230436906708896, - "normalized_score": 31.776241491685315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.3753645833333333, - "normalized_score": 7.253906249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.363031914893617, - "normalized_score": 29.225768321512994 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/L3-DARKEST-PLANET-16.5B (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 16.537, - "co2_cost": 4.225040735611014 - } - }, - { - "id": "DavidAU/L3-Dark-Planet-8B_bfloat16_462c9307ba4cfcb0c1edcceac5e06f4007bc803e_False", - "model": { - "name": "DavidAU/L3-Dark-Planet-8B", - "sha": "462c9307ba4cfcb0c1edcceac5e06f4007bc803e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.469184056256058, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4134108609600305, - "normalized_score": 41.34108609600305 - }, - "bbh": { - "name": "BBH", - "value": 0.5084081453197787, - "normalized_score": 29.78962694499553 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.36159375, - "normalized_score": 6.332552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37367021276595747, - "normalized_score": 30.40780141843972 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "DavidAU/L3-Dark-Planet-8B (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.8782814138679607 - } - }, - { - "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct_float16_db4ae3d7b608fd0e7490d2fcfa0436e56e21af33_False", - "model": { - "name": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", - "sha": "db4ae3d7b608fd0e7490d2fcfa0436e56e21af33", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.857043217965458, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3961998608137519, - "normalized_score": 39.619986081375195 - }, - "bbh": { - "name": "BBH", - "value": 0.4765717717789398, - "normalized_score": 25.869793144697738 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.40196875, - "normalized_score": 8.312760416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3291223404255319, - "normalized_score": 25.458037825059098 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.174, - "co2_cost": 1.4375216123892207 - } - }, - { - "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct_float16_65a9e957dc4211aa3d87fdf588767823af5cde3f_False", - "model": { - "name": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", - "sha": "65a9e957dc4211aa3d87fdf588767823af5cde3f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.831555665481403, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3924032677739509, - "normalized_score": 39.24032677739509 - }, - "bbh": { - "name": "BBH", - "value": 0.46930207579694677, - "normalized_score": 24.504815583181394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.41942708333333334, - "normalized_score": 11.26171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31416223404255317, - "normalized_score": 23.795803782505907 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.174, - "co2_cost": 2.8494143473815945 - } - }, - { - "id": "DavidAU/L3-SMB-Instruct-12.2B-F32_float16_ac5e205a41b17a7b05b1b62f352aacc7e65b2f13_False", - "model": { - "name": "DavidAU/L3-SMB-Instruct-12.2B-F32", - "sha": "ac5e205a41b17a7b05b1b62f352aacc7e65b2f13", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.90163889043809, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4303215468290802, - "normalized_score": 43.032154682908015 - }, - "bbh": { - "name": "BBH", - "value": 0.4786412360346213, - "normalized_score": 26.130957088441544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.40872916666666664, - "normalized_score": 9.624479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3312001329787234, - "normalized_score": 25.688903664302597 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-25", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "DavidAU/L3-SMB-Instruct-12.2B-F32 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.174, - "co2_cost": 2.764794372875263 - } - }, - { - "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B_float16_7b626e50b6c35fcb064b8b039fcf30eae01c3fae_False", - "model": { - "name": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", - "sha": "7b626e50b6c35fcb064b8b039fcf30eae01c3fae", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.197491298716887, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34389309254998957, - "normalized_score": 34.389309254998956 - }, - "bbh": { - "name": "BBH", - "value": 0.4736328900737677, - "normalized_score": 26.692021341537863 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.40311458333333333, - "normalized_score": 8.555989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3570478723404255, - "normalized_score": 28.560874704491717 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 16.537, - "co2_cost": 5.845597193164506 - } - }, - { - "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct_bfloat16_8271fc32a601a4fa5efbe58c41a0ef4181ad8836_False", - "model": { - "name": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", - "sha": "8271fc32a601a4fa5efbe58c41a0ef4181ad8836", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.73968028525321, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4027945850343755, - "normalized_score": 40.279458503437546 - }, - "bbh": { - "name": "BBH", - "value": 0.4845980190500647, - "normalized_score": 27.36962320894452 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.41025, - "normalized_score": 10.314583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3345246010638298, - "normalized_score": 26.0582890070922 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.174, - "co2_cost": 2.7953992017252753 - } - }, - { - "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B_bfloat16_9e4ae1310a0d2c82d50fe2aedc94ef084901ac48_True", - "model": { - "name": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", - "sha": "9e4ae1310a0d2c82d50fe2aedc94ef084901ac48", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.710302269160096, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7042702252246262, - "normalized_score": 70.42702252246261 - }, - "bbh": { - "name": "BBH", - "value": 0.5260910165037093, - "normalized_score": 32.46178300050608 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.354125, - "normalized_score": 2.498958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3670212765957447, - "normalized_score": 29.669030732860524 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-10", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.2605639032263736 - } - }, - { - "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B_bfloat16_1ed5318f6bf5461efa5168289ab6786f4987ca96_False", - "model": { - "name": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", - "sha": "1ed5318f6bf5461efa5168289ab6786f4987ca96", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.615401464984767, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3345257250761313, - "normalized_score": 33.452572507613134 - }, - "bbh": { - "name": "BBH", - "value": 0.4420822344441435, - "normalized_score": 21.197829273123393 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26057401812688824, - "normalized_score": 26.057401812688823 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.37486458333333333, - "normalized_score": 7.458072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2892287234042553, - "normalized_score": 21.025413711583923 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.668, - "co2_cost": 1.4035730196826506 - } - }, - { - "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B_bfloat16_412099bb6f570707a3a6ee311bfeb93a204c1b7b_False", - "model": { - "name": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", - "sha": "412099bb6f570707a3a6ee311bfeb93a204c1b7b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 5.120376565217631, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17832905579418165, - "normalized_score": 17.832905579418167 - }, - "bbh": { - "name": "BBH", - "value": 0.30326053640004424, - "normalized_score": 3.023581212338263 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3714583333333333, - "normalized_score": 4.565625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.089, - "co2_cost": 1.1268767305406775 - } - }, - { - "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B_bfloat16_8a908eadce5fc13ddedab2c854433245de430e41_False", - "model": { - "name": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", - "sha": "8a908eadce5fc13ddedab2c854433245de430e41", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 13.021728019791546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28351773294857646, - "normalized_score": 28.351773294857644 - }, - "bbh": { - "name": "BBH", - "value": 0.35922718767499157, - "normalized_score": 10.870199077362608 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24169184290030213, - "normalized_score": 24.169184290030213 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.38469791666666664, - "normalized_score": 5.653906249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1636469414893617, - "normalized_score": 7.0718823877068555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 19.022, - "co2_cost": 2.2596681212071084 - } - }, - { - "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32_bfloat16_7a447e7af3ce9ea515f25c04ab7f942fe637b521_False", - "model": { - "name": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", - "sha": "7a447e7af3ce9ea515f25c04ab7f942fe637b521", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 6.418592963200898, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21067766858601844, - "normalized_score": 21.067766858601843 - }, - "bbh": { - "name": "BBH", - "value": 0.32861776640637924, - "normalized_score": 6.218964782807416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3404479166666667, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11220079787234043, - "normalized_score": 1.3556442080378246 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.714, - "co2_cost": 2.2526985444884104 - } - }, - { - "id": "Davidsv/SUONG-1_float16_5bab856eaa8836d4f37d736926bdd18b97ac3241_False", - "model": { - "name": "Davidsv/SUONG-1", - "sha": "5bab856eaa8836d4f37d736926bdd18b97ac3241", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.32234226282347, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2497207409673001, - "normalized_score": 24.97207409673001 - }, - "bbh": { - "name": "BBH", - "value": 0.28171339082318814, - "normalized_score": 1.8272424825748754 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35775, - "normalized_score": 4.185416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1085438829787234, - "normalized_score": 0.9493203309692663 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-14", - "generation": 1, - "base_model": "Davidsv/SUONG-1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 2.879, - "co2_cost": 0.22068029903945732 - } - }, - { - "id": "DavieLion/Llama-3.2-1B-SPIN-iter0_float16_bc1a37920fb5e3cb64a71a4deda649f33fecb95d_False", - "model": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "sha": "bc1a37920fb5e3cb64a71a4deda649f33fecb95d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.6238167467938993, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15067687070306784, - "normalized_score": 15.067687070306786 - }, - "bbh": { - "name": "BBH", - "value": 0.29300816789978756, - "normalized_score": 2.1008283750749497 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3565416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11253324468085106, - "normalized_score": 1.392582742316784 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "DavieLion/Llama-3.2-1B-SPIN-iter0 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.36964968247226154 - } - }, - { - "id": "DavieLion/Llama-3.2-1B-SPIN-iter0_bfloat16_2c95189201f94c64fcf4c9a7edc4777741f18999_False", - "model": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "sha": "2c95189201f94c64fcf4c9a7edc4777741f18999", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.985688359677067, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15492338107332987, - "normalized_score": 15.492338107332987 - }, - "bbh": { - "name": "BBH", - "value": 0.29372614029730437, - "normalized_score": 2.3306685577233517 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3564791666666667, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "DavieLion/Llama-3.2-1B-SPIN-iter0 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7089527374016119 - } - }, - { - "id": "DavieLion/Llama-3.2-1B-SPIN-iter1_bfloat16_8c632ae68bd385af2e2270933326edbcd0044e8c_False", - "model": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter1", - "sha": "8c632ae68bd385af2e2270933326edbcd0044e8c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.7519748574168585, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15754642127333254, - "normalized_score": 15.754642127333252 - }, - "bbh": { - "name": "BBH", - "value": 0.29402546232087917, - "normalized_score": 2.433772217660189 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3646041666666667, - "normalized_score": 2.675520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11178523936170212, - "normalized_score": 1.309471040189124 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "DavieLion/Llama-3.2-1B-SPIN-iter1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7217203551845318 - } - }, - { - "id": "DavieLion/Llama-3.2-1B-SPIN-iter2_bfloat16_36c9b3fd7196c6bac0fbe8f1e9c4f4fb3bcc993a_False", - "model": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter2", - "sha": "36c9b3fd7196c6bac0fbe8f1e9c4f4fb3bcc993a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.658145708347315, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13761264555822994, - "normalized_score": 13.761264555822994 - }, - "bbh": { - "name": "BBH", - "value": 0.2980340303779312, - "normalized_score": 3.1573429783229785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444108, - "normalized_score": 0.5287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.35530208333333335, - "normalized_score": 2.512760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11286569148936171, - "normalized_score": 1.4295212765957446 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "DavieLion/Llama-3.2-1B-SPIN-iter2 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7070763736913788 - } - }, - { - "id": "DavieLion/Llama-3.2-1B-SPIN-iter3_float16_108557f0db9b6f7c35ba8b0d094ebd81be6fe9fd_False", - "model": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "sha": "108557f0db9b6f7c35ba8b0d094ebd81be6fe9fd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.593140598286666, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1335910938531984, - "normalized_score": 13.35910938531984 - }, - "bbh": { - "name": "BBH", - "value": 0.29752276438021447, - "normalized_score": 3.1395015434855087 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.34996875, - "normalized_score": 2.512760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "DavieLion/Llama-3.2-1B-SPIN-iter3 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 1.0930826432847423 - } - }, - { - "id": "DavieLion/Llama-3.2-1B-SPIN-iter3_bfloat16_ae511fd6bae53efd2656dd3cc6fc87d0fc56356c_False", - "model": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "sha": "ae511fd6bae53efd2656dd3cc6fc87d0fc56356c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.6136899611836006, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1323920530858123, - "normalized_score": 13.239205308581232 - }, - "bbh": { - "name": "BBH", - "value": 0.29722352809482616, - "normalized_score": 3.02851385306557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3526666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11286569148936171, - "normalized_score": 1.4295212765957446 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "DavieLion/Llama-3.2-1B-SPIN-iter3 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3632299840264858 - } - }, - { - "id": "DavieLion/Lllma-3.2-1B_float16_5e0d3bc7ca705a41f897a870efd4ff6ce455e20c_False", - "model": { - "name": "DavieLion/Lllma-3.2-1B", - "sha": "5e0d3bc7ca705a41f897a870efd4ff6ce455e20c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.932331961262434, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1601439735457475, - "normalized_score": 16.01439735457475 - }, - "bbh": { - "name": "BBH", - "value": 0.2964692268500723, - "normalized_score": 2.4381228956228953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35781250000000003, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 0, - "base_model": "DavieLion/Lllma-3.2-1B", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7337502581357204 - } - }, - { - "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT_bfloat16_e9d7396bc0fa3d1ff4c1f4b1a0d81a1d1a7e977c_True", - "model": { - "name": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", - "sha": "e9d7396bc0fa3d1ff4c1f4b1a0d81a1d1a7e977c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.113556306971883, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.551921124837653, - "normalized_score": 55.1921124837653 - }, - "bbh": { - "name": "BBH", - "value": 0.48238301936695316, - "normalized_score": 27.187826826803345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4503020833333333, - "normalized_score": 15.854427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3472406914893617, - "normalized_score": 27.47118794326241 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.4339566351371729 - } - }, - { - "id": "Deci/DeciLM-7B_bfloat16_c3c9f4226801dc0433f32aebffe0aac68ee2f051_False", - "model": { - "name": "Deci/DeciLM-7B", - "sha": "c3c9f4226801dc0433f32aebffe0aac68ee2f051", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "DeciLMForCausalLM", - "average_score": 15.023477940437223, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28129474239462404, - "normalized_score": 28.129474239462404 - }, - "bbh": { - "name": "BBH", - "value": 0.44228566674266495, - "normalized_score": 21.252729791067395 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.43585416666666665, - "normalized_score": 13.0484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26919880319148937, - "normalized_score": 18.799867021276594 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-10", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Deci/DeciLM-7B", - "hub_license": "apache-2.0", - "hub_hearts": 226, - "params_billions": 7.044, - "co2_cost": 1.2842746949253772 - } - }, - { - "id": "Deci/DeciLM-7B-instruct_bfloat16_4adc7aa9efe61b47b0a98b2cc94527d9c45c3b4f_True", - "model": { - "name": "Deci/DeciLM-7B-instruct", - "sha": "4adc7aa9efe61b47b0a98b2cc94527d9c45c3b4f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "DeciLMForCausalLM", - "average_score": 17.470092220993035, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4880239985460799, - "normalized_score": 48.802399854608 - }, - "bbh": { - "name": "BBH", - "value": 0.4589748654047652, - "normalized_score": 23.887149044184596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.38841666666666663, - "normalized_score": 5.985416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26080452127659576, - "normalized_score": 17.86716903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-10", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Deci/DeciLM-7B-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 96, - "params_billions": 7.044, - "co2_cost": 1.2772986646723141 - } - }, - { - "id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst_bfloat16_9752180fafd8f584625eb649c0cba36b91bdc3ce_True", - "model": { - "name": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", - "sha": "9752180fafd8f584625eb649c0cba36b91bdc3ce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.926700693431915, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7794828831943688, - "normalized_score": 77.94828831943687 - }, - "bbh": { - "name": "BBH", - "value": 0.511742159482904, - "normalized_score": 30.393262902042363 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20090634441087613, - "normalized_score": 20.090634441087612 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3909583333333333, - "normalized_score": 9.63645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.379155585106383, - "normalized_score": 31.017287234042552 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-21", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "DeepAutoAI/Explore_Llama-3.1-8B-Inst (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.6394649716429317 - } - }, - { - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_bfloat16_9fd790df246b8979c02173f7698819a7805fb04e_True", - "model": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", - "sha": "9fd790df246b8979c02173f7698819a7805fb04e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.897376870733368, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5648856146136695, - "normalized_score": 56.48856146136695 - }, - "bbh": { - "name": "BBH", - "value": 0.35048085637770016, - "normalized_score": 8.292273657131942 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.31834375, - "normalized_score": 1.359635416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18085106382978725, - "normalized_score": 8.983451536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "DeepAutoAI/Explore_Llama-3.2-1B-Inst (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 1.3254123091945325 - } - }, - { - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0_bfloat16_9509dee6b01fff1a11dc26cf58d7eecbe3d9d9c4_True", - "model": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", - "sha": "9509dee6b01fff1a11dc26cf58d7eecbe3d9d9c4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.359085066778022, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5597148898256625, - "normalized_score": 55.97148898256626 - }, - "bbh": { - "name": "BBH", - "value": 0.33650903200352716, - "normalized_score": 7.042771972349901 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3103125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18035239361702127, - "normalized_score": 8.928043735224584 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 0, - "base_model": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.9343787388167499 - } - }, - { - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1_bfloat16_3f8b0fb6dcc1e9725ba52dd086241d5d9e413100_True", - "model": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", - "sha": "3f8b0fb6dcc1e9725ba52dd086241d5d9e413100", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.921433609301348, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4998891829235318, - "normalized_score": 49.98891829235318 - }, - "bbh": { - "name": "BBH", - "value": 0.3141475230443668, - "normalized_score": 4.257780193079653 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37809374999999995, - "normalized_score": 5.1950520833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12691156914893617, - "normalized_score": 2.990174349881796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.9399318977538753 - } - }, - { - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1_bfloat16_158b977bca89e073871e2313740a7c75eb1291af_True", - "model": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", - "sha": "158b977bca89e073871e2313740a7c75eb1291af", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.311829325717667, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5844193406827218, - "normalized_score": 58.44193406827218 - }, - "bbh": { - "name": "BBH", - "value": 0.3512662445055541, - "normalized_score": 8.818154144791238 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3117083333333333, - "normalized_score": 0.6635416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18184840425531915, - "normalized_score": 9.094267139479904 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-17", - "generation": 1, - "base_model": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 1.3605236173599355 - } - }, - { - "id": "DeepAutoAI/causal_gpt2_bfloat16_995f029f6645dde1ef830406001754b904c49775_False", - "model": { - "name": "DeepAutoAI/causal_gpt2", - "sha": "995f029f6645dde1ef830406001754b904c49775", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 6.032059300937514, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1812767900282362, - "normalized_score": 18.12767900282362 - }, - "bbh": { - "name": "BBH", - "value": 0.30257073962835446, - "normalized_score": 2.6333438399574587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.42695833333333333, - "normalized_score": 12.103125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11311502659574468, - "normalized_score": 1.457225177304964 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-17", - "submission_date": "2024-10-17", - "generation": 0, - "base_model": "DeepAutoAI/causal_gpt2", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 0.124, - "co2_cost": 0.25173041948319086 - } - }, - { - "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0_bfloat16_8bad8800d04a06f3f906728ee223cab2f50453a0_True", - "model": { - "name": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", - "sha": "8bad8800d04a06f3f906728ee223cab2f50453a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.338965494504436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7892746800711002, - "normalized_score": 78.92746800711004 - }, - "bbh": { - "name": "BBH", - "value": 0.5080411642065981, - "normalized_score": 30.51007603826353 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.41346875, - "normalized_score": 10.983593749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3877160904255319, - "normalized_score": 31.96845449172577 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-10", - "generation": 0, - "base_model": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7123559936882888 - } - }, - { - "id": "DeepAutoAI/d2nwg_causal_gpt2_bfloat16_eab065cba5a7a9b08f8b264d61d504c4ecbb611b_False", - "model": { - "name": "DeepAutoAI/d2nwg_causal_gpt2", - "sha": "eab065cba5a7a9b08f8b264d61d504c4ecbb611b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 6.305441322097188, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19161823960425006, - "normalized_score": 19.161823960425007 - }, - "bbh": { - "name": "BBH", - "value": 0.30268984588252307, - "normalized_score": 2.850573557678692 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.42971875, - "normalized_score": 12.681510416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11510970744680851, - "normalized_score": 1.6788563829787229 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-18", - "submission_date": "2024-10-18", - "generation": 0, - "base_model": "DeepAutoAI/d2nwg_causal_gpt2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.124, - "co2_cost": 0.2598146183920366 - } - }, - { - "id": "DeepAutoAI/d2nwg_causal_gpt2_v1_bfloat16_3f40c3dcb3eb591dec80ff03573eec7928a7feaa_False", - "model": { - "name": "DeepAutoAI/d2nwg_causal_gpt2_v1", - "sha": "3f40c3dcb3eb591dec80ff03573eec7928a7feaa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 6.419565742614277, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1988623518929773, - "normalized_score": 19.886235189297732 - }, - "bbh": { - "name": "BBH", - "value": 0.29918984588252306, - "normalized_score": 2.3872783507070148 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.4336875, - "normalized_score": 13.244270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11353058510638298, - "normalized_score": 1.5033983451536632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-18", - "submission_date": "2024-10-19", - "generation": 0, - "base_model": "DeepAutoAI/d2nwg_causal_gpt2_v1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.124, - "co2_cost": 0.3430067286083757 - } - }, - { - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst_bfloat16_0f04c5ad830f8ae0828191a4670fd4ba361b63d2_True", - "model": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", - "sha": "0f04c5ad830f8ae0828191a4670fd4ba361b63d2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.85905810866196, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.803263119633683, - "normalized_score": 80.32631196336831 - }, - "bbh": { - "name": "BBH", - "value": 0.512116784464076, - "normalized_score": 31.101628224178786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.41613541666666665, - "normalized_score": 11.51692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38863031914893614, - "normalized_score": 32.070035460992905 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 2.5700611964067677 - } - }, - { - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0_bfloat16_210a97b4dadbda63cc9fe459e8415d4cd3bbaf99_True", - "model": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", - "sha": "210a97b4dadbda63cc9fe459e8415d4cd3bbaf99", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.735244330693718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889499860370484, - "normalized_score": 78.89499860370483 - }, - "bbh": { - "name": "BBH", - "value": 0.5125175335277464, - "normalized_score": 31.162649496607866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.41213541666666664, - "normalized_score": 11.51692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38954454787234044, - "normalized_score": 32.171616430260045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.72090943948751 - } - }, - { - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1_bfloat16_ecd140c95985b4292c896e25a94a7629d2924ad1_True", - "model": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", - "sha": "ecd140c95985b4292c896e25a94a7629d2924ad1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.735244330693718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889499860370484, - "normalized_score": 78.89499860370483 - }, - "bbh": { - "name": "BBH", - "value": 0.5125175335277464, - "normalized_score": 31.162649496607866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.41213541666666664, - "normalized_score": 11.51692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38954454787234044, - "normalized_score": 32.171616430260045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-16", - "generation": 0, - "base_model": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6568916130640359 - } - }, - { - "id": "DeepMount00/Lexora-Lite-3B_bfloat16_2cf39db7ecac17edca0bf4e0973b7fb58c40c22c_True", - "model": { - "name": "DeepMount00/Lexora-Lite-3B", - "sha": "2cf39db7ecac17edca0bf4e0973b7fb58c40c22c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.888387400166817, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5775996577968678, - "normalized_score": 57.759965779686794 - }, - "bbh": { - "name": "BBH", - "value": 0.4873392373334518, - "normalized_score": 28.436278690016838 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23036253776435045, - "normalized_score": 23.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.39660416666666665, - "normalized_score": 7.942187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3602061170212766, - "normalized_score": 28.911790780141843 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-10-20", - "generation": 0, - "base_model": "DeepMount00/Lexora-Lite-3B", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 3.161923130387553 - } - }, - { - "id": "DeepMount00/Lexora-Lite-3B_v2_bfloat16_0562af3800440fe9839bd6e885d9e0062ab70ead_True", - "model": { - "name": "DeepMount00/Lexora-Lite-3B_v2", - "sha": "0562af3800440fe9839bd6e885d9e0062ab70ead", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.690213091300453, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49431840848947456, - "normalized_score": 49.43184084894746 - }, - "bbh": { - "name": "BBH", - "value": 0.48117654754683153, - "normalized_score": 27.168451763223718 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2280966767371601, - "normalized_score": 22.80966767371601 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.38215625, - "normalized_score": 5.669531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35438829787234044, - "normalized_score": 28.26536643026005 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "DeepMount00/Lexora-Lite-3B_v2", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 0.7749346127852547 - } - }, - { - "id": "DeepMount00/Lexora-Medium-7B_bfloat16_c53d166f4f2996a5b7f161529f1ea6548b54a2b2_True", - "model": { - "name": "DeepMount00/Lexora-Medium-7B", - "sha": "c53d166f4f2996a5b7f161529f1ea6548b54a2b2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.83719802694387, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4103379034295669, - "normalized_score": 41.03379034295669 - }, - "bbh": { - "name": "BBH", - "value": 0.5144844494250328, - "normalized_score": 32.6953311808552 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22205438066465258, - "normalized_score": 22.20543806646526 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.44394791666666666, - "normalized_score": 14.760156250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43251329787234044, - "normalized_score": 36.9459219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 0, - "base_model": "DeepMount00/Lexora-Medium-7B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 3.4698222000444563 - } - }, - { - "id": "DeepMount00/Llama-3-8b-Ita_bfloat16_d40847d2981b588690c1dc21d5157d3f4afb2978_True", - "model": { - "name": "DeepMount00/Llama-3-8b-Ita", - "sha": "d40847d2981b588690c1dc21d5157d3f4afb2978", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.7968164089867, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7530297388706411, - "normalized_score": 75.3029738870641 - }, - "bbh": { - "name": "BBH", - "value": 0.493576505761469, - "normalized_score": 28.077745566893725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4267708333333333, - "normalized_score": 11.6796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38522273936170215, - "normalized_score": 31.691415484633573 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-01", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 24, - "params_billions": 8.03, - "co2_cost": 1.556516897300023 - } - }, - { - "id": "DeepMount00/Llama-3.1-8b-ITA_bfloat16_5ede1e388b6b15bc06acd364a8f805fe9ed16db9_True", - "model": { - "name": "DeepMount00/Llama-3.1-8b-ITA", - "sha": "5ede1e388b6b15bc06acd364a8f805fe9ed16db9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.228097653849485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7916727616058724, - "normalized_score": 79.16727616058725 - }, - "bbh": { - "name": "BBH", - "value": 0.5109356715302854, - "normalized_score": 30.933181176860177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.41359375, - "normalized_score": 11.399218749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38763297872340424, - "normalized_score": 31.959219858156025 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-10-28", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 2.5075735912307806 - } - }, - { - "id": "DeepMount00/Llama-3.1-8b-Ita_bfloat16_5ede1e388b6b15bc06acd364a8f805fe9ed16db9_False", - "model": { - "name": "DeepMount00/Llama-3.1-8b-Ita", - "sha": "5ede1e388b6b15bc06acd364a8f805fe9ed16db9", - "precision": "bfloat16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 26.26573192518462, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5364843060856306, - "normalized_score": 53.64843060856306 - }, - "bbh": { - "name": "BBH", - "value": 0.5169995464792883, - "normalized_score": 31.333639113507697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.44871875, - "normalized_score": 15.156510416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39602726063829785, - "normalized_score": 32.89191784869976 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 0.0, - "co2_cost": 0.9062470550284771 - } - }, - { - "id": "DeepMount00/Llama-3.1-Distilled_bfloat16_0a94c7ddb196107e8bf1b02e31488ff8c17b9eb3_True", - "model": { - "name": "DeepMount00/Llama-3.1-Distilled", - "sha": "0a94c7ddb196107e8bf1b02e31488ff8c17b9eb3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.631398468956544, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7843787816327346, - "normalized_score": 78.43787816327344 - }, - "bbh": { - "name": "BBH", - "value": 0.5100875314179011, - "normalized_score": 30.84142128641545 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.40581249999999996, - "normalized_score": 10.126562499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3781582446808511, - "normalized_score": 30.906471631205672 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6780001093726986 - } - }, - { - "id": "DeepMount00/Qwen2-1.5B-Ita_bfloat16_26a6671a48c0023293c447932798a3ec72b55a29_True", - "model": { - "name": "DeepMount00/Qwen2-1.5B-Ita", - "sha": "26a6671a48c0023293c447932798a3ec72b55a29", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.83176132950784, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5173495214918638, - "normalized_score": 51.734952149186384 - }, - "bbh": { - "name": "BBH", - "value": 0.39805765159128703, - "normalized_score": 15.42299613137108 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.35037500000000005, - "normalized_score": 1.0635416666666688 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2771775265957447, - "normalized_score": 19.686391843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "DeepMount00/Qwen2-1.5B-Ita", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 1.544, - "co2_cost": 0.5125510916273373 - } - }, - { - "id": "DeepMount00/Qwen2-1.5B-Ita_v2_bfloat16_e9c2a4197001bf188e4bc7d49873ea84f01e27c6_True", - "model": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v2", - "sha": "e9c2a4197001bf188e4bc7d49873ea84f01e27c6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.07000871129109, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49998891829235315, - "normalized_score": 49.99889182923532 - }, - "bbh": { - "name": "BBH", - "value": 0.3953827803974795, - "normalized_score": 15.106125348603845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37018749999999995, - "normalized_score": 3.840104166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30319148936170215, - "normalized_score": 22.576832151300238 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "DeepMount00/Qwen2-1.5B-Ita_v2", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 1.544, - "co2_cost": 0.5546789113986239 - } - }, - { - "id": "DeepMount00/Qwen2-1.5B-Ita_v3_bfloat16_4faa0ebc54beab39e1f044af1fee3ce44d9b8755_True", - "model": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v3", - "sha": "4faa0ebc54beab39e1f044af1fee3ce44d9b8755", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.948513223691304, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4890479483326463, - "normalized_score": 48.90479483326463 - }, - "bbh": { - "name": "BBH", - "value": 0.3948478837209111, - "normalized_score": 15.22652186012814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.37415624999999997, - "normalized_score": 4.269531250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3017785904255319, - "normalized_score": 22.419843380614655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "DeepMount00/Qwen2-1.5B-Ita_v3", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 1.544, - "co2_cost": 0.5846262308283405 - } - }, - { - "id": "DeepMount00/Qwen2-1.5B-Ita_v5_bfloat16_681e6db531df0cc3d7806251659b973ed4ff8c8f_True", - "model": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v5", - "sha": "681e6db531df0cc3d7806251659b973ed4ff8c8f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.023240512569227, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4987400098405564, - "normalized_score": 49.87400098405564 - }, - "bbh": { - "name": "BBH", - "value": 0.40320443289745417, - "normalized_score": 16.487037607634196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.34225, - "normalized_score": 1.847916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29429853723404253, - "normalized_score": 21.588726359338057 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "DeepMount00/Qwen2-1.5B-Ita_v5", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 1.544, - "co2_cost": 0.5321054999622403 - } - }, - { - "id": "DeepMount00/Qwen2-1.5B-Ita_v6_bfloat16_b3360bd6093edb8a98696443405f94ce37a40bd2_True", - "model": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v6", - "sha": "b3360bd6093edb8a98696443405f94ce37a40bd2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.577672072316155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29990425404593146, - "normalized_score": 29.990425404593147 - }, - "bbh": { - "name": "BBH", - "value": 0.42486081646897506, - "normalized_score": 19.093803548070223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3754583333333333, - "normalized_score": 4.765625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28715093085106386, - "normalized_score": 20.794547872340427 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.497, - "co2_cost": 0.6074762902125238 - } - }, - { - "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder_bfloat16_90df996cdb1f3d5f051513c50df4cdfda858b5f2_True", - "model": { - "name": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", - "sha": "90df996cdb1f3d5f051513c50df4cdfda858b5f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.39690970265251, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15302508455342934, - "normalized_score": 15.302508455342934 - }, - "bbh": { - "name": "BBH", - "value": 0.2998444769655102, - "normalized_score": 2.636670587150039 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3806354166666666, - "normalized_score": 5.379427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11178523936170212, - "normalized_score": 1.309471040189124 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.5853591543493346 - } - }, - { - "id": "DeepMount00/mergekit-ties-okvgjfz_bfloat16_90df996cdb1f3d5f051513c50df4cdfda858b5f2_True", - "model": { - "name": "DeepMount00/mergekit-ties-okvgjfz", - "sha": "90df996cdb1f3d5f051513c50df4cdfda858b5f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.39690970265251, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15302508455342934, - "normalized_score": 15.302508455342934 - }, - "bbh": { - "name": "BBH", - "value": 0.2998444769655102, - "normalized_score": 2.636670587150039 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3806354166666666, - "normalized_score": 5.379427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11178523936170212, - "normalized_score": 1.309471040189124 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.577642692775661 - } - }, - { - "id": "Delta-Vector/Baldur-8B_bfloat16_97f5d321a8346551a5ed704997dd1e93c59883f3_False", - "model": { - "name": "Delta-Vector/Baldur-8B", - "sha": "97f5d321a8346551a5ed704997dd1e93c59883f3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.191735720120548, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47818233398493776, - "normalized_score": 47.818233398493774 - }, - "bbh": { - "name": "BBH", - "value": 0.5305842954529679, - "normalized_score": 32.54183409581636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14350453172205438, - "normalized_score": 14.350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.43715624999999997, - "normalized_score": 14.011197916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3654421542553192, - "normalized_score": 29.493572695035457 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-10-06", - "generation": 1, - "base_model": "Delta-Vector/Baldur-8B (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 8.0, - "co2_cost": 3.060930655450191 - } - }, - { - "id": "Delta-Vector/Control-8B_bfloat16_c8743ee5ca0efd31aa9dd1bd14c770430c85a6c1_True", - "model": { - "name": "Delta-Vector/Control-8B", - "sha": "c8743ee5ca0efd31aa9dd1bd14c770430c85a6c1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.058025917312307, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5489733906035985, - "normalized_score": 54.89733906035985 - }, - "bbh": { - "name": "BBH", - "value": 0.5041458754993735, - "normalized_score": 29.15507782169402 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.43554166666666666, - "normalized_score": 13.209374999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3731715425531915, - "normalized_score": 30.352393617021285 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-11-25", - "generation": 0, - "base_model": "Delta-Vector/Control-8B", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3549719696219258 - } - }, - { - "id": "Delta-Vector/Control-8B-V1.1_bfloat16_6d4593645d1c4dc61d1c223922f635d79283d22b_True", - "model": { - "name": "Delta-Vector/Control-8B-V1.1", - "sha": "6d4593645d1c4dc61d1c223922f635d79283d22b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.632508988415825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5696562897556262, - "normalized_score": 56.96562897556262 - }, - "bbh": { - "name": "BBH", - "value": 0.49928406748541837, - "normalized_score": 28.725850265520112 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42372916666666666, - "normalized_score": 11.232812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37450132978723405, - "normalized_score": 30.500147754137117 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-25", - "generation": 0, - "base_model": "Delta-Vector/Control-8B-V1.1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.281195193166664 - } - }, - { - "id": "Delta-Vector/Darkens-8B_bfloat16_e82be0389bfcecd1998dba1c3bb35b8d95d01bf2_False", - "model": { - "name": "Delta-Vector/Darkens-8B", - "sha": "e82be0389bfcecd1998dba1c3bb35b8d95d01bf2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.937415404374313, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25476624245889795, - "normalized_score": 25.476624245889795 - }, - "bbh": { - "name": "BBH", - "value": 0.5250590567372793, - "normalized_score": 32.88379503743108 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.4105520833333333, - "normalized_score": 9.019010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3735871010638298, - "normalized_score": 30.398566784869974 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-10-06", - "generation": 1, - "base_model": "Delta-Vector/Darkens-8B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 8.414, - "co2_cost": 2.3994861581672873 - } - }, - { - "id": "Delta-Vector/Henbane-7b-attempt2_bfloat16_448ef54e5af03e13f16f3db8ad8d1481479ac12e_True", - "model": { - "name": "Delta-Vector/Henbane-7b-attempt2", - "sha": "448ef54e5af03e13f16f3db8ad8d1481479ac12e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.813949980028713, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4157335868828043, - "normalized_score": 41.573358688280436 - }, - "bbh": { - "name": "BBH", - "value": 0.5061177974093075, - "normalized_score": 30.865849451121658 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.39734375000000005, - "normalized_score": 8.701302083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4027593085106383, - "normalized_score": 33.63992316784869 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-13", - "submission_date": "2024-10-11", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.0, - "co2_cost": 2.2676763161131808 - } - }, - { - "id": "Delta-Vector/Odin-9B_bfloat16_9ff20f5dd427e751ada834319bfdd9ea60b5e89c_False", - "model": { - "name": "Delta-Vector/Odin-9B", - "sha": "9ff20f5dd427e751ada834319bfdd9ea60b5e89c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 24.977112692219293, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3691970637907419, - "normalized_score": 36.91970637907419 - }, - "bbh": { - "name": "BBH", - "value": 0.5440253444823155, - "normalized_score": 34.83242280758616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.46478125, - "normalized_score": 17.56432291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4046708776595745, - "normalized_score": 33.85231973995272 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-10-06", - "generation": 0, - "base_model": "Delta-Vector/Odin-9B", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 9.242, - "co2_cost": 5.416323354381718 - } - }, - { - "id": "Delta-Vector/Tor-8B_bfloat16_d30a7a121c2ef5dc14004cfdf3fd13208dfbdb4f_False", - "model": { - "name": "Delta-Vector/Tor-8B", - "sha": "d30a7a121c2ef5dc14004cfdf3fd13208dfbdb4f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.406879108800087, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23815476269631244, - "normalized_score": 23.815476269631244 - }, - "bbh": { - "name": "BBH", - "value": 0.5209108776928992, - "normalized_score": 31.73822449849867 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.40921874999999996, - "normalized_score": 8.819010416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37300531914893614, - "normalized_score": 30.33392434988179 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-21", - "submission_date": "2024-10-06", - "generation": 1, - "base_model": "Delta-Vector/Tor-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.414, - "co2_cost": 2.504106680356207 - } - }, - { - "id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B_float16_97a87606addb28c1d76d27cca5e5485c1dbff4e3_False", - "model": { - "name": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", - "sha": "97a87606addb28c1d76d27cca5e5485c1dbff4e3", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.432648640760963, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38488432913558246, - "normalized_score": 38.48843291355825 - }, - "bbh": { - "name": "BBH", - "value": 0.5117943836412089, - "normalized_score": 30.220238234792646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33081570996978854, - "normalized_score": 33.081570996978854 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.44357291666666665, - "normalized_score": 14.779947916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3614527925531915, - "normalized_score": 29.050310283687946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.7195218960856864 - } - }, - { - "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO_bfloat16_177ffda54582d6e8f3830722d91a3b5c99a38a1d_True", - "model": { - "name": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", - "sha": "177ffda54582d6e8f3830722d91a3b5c99a38a1d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.996461674248117, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40376866713653103, - "normalized_score": 40.3768667136531 - }, - "bbh": { - "name": "BBH", - "value": 0.34425676981862185, - "normalized_score": 7.882702983365756 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19561933534743203, - "normalized_score": 19.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.36628124999999995, - "normalized_score": 3.5518229166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23221409574468085, - "normalized_score": 14.690455082742314 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3446612624924523 - } - }, - { - "id": "DoppelReflEx/L3-8B-R1-WolfCore_bfloat16_b457a83cb3e4468315ccd5a768fd5302d2b9926d_False", - "model": { - "name": "DoppelReflEx/L3-8B-R1-WolfCore", - "sha": "b457a83cb3e4468315ccd5a768fd5302d2b9926d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.481232873269377, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3775404814780339, - "normalized_score": 37.75404814780339 - }, - "bbh": { - "name": "BBH", - "value": 0.531794652653343, - "normalized_score": 33.760104762918594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 12.358333333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3716755319148936, - "normalized_score": 30.18617021276595 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "DoppelReflEx/L3-8B-R1-WolfCore (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.660438890438418 - } - }, - { - "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test_bfloat16_044841358609fdc68053b4c6c0a1c41db7e8d327_False", - "model": { - "name": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", - "sha": "044841358609fdc68053b4c6c0a1c41db7e8d327", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.363618284395272, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3955006050612375, - "normalized_score": 39.55006050612376 - }, - "bbh": { - "name": "BBH", - "value": 0.5314954163679548, - "normalized_score": 33.45949833611522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.3840729166666667, - "normalized_score": 8.375781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37275598404255317, - "normalized_score": 30.306220449172578 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.654732953184737 - } - }, - { - "id": "DoppelReflEx/L3-8B-WolfCore_bfloat16_e83eab6e8f04065c770bced65bde494599c54cee_False", - "model": { - "name": "DoppelReflEx/L3-8B-WolfCore", - "sha": "e83eab6e8f04065c770bced65bde494599c54cee", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.170086506168307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4021950646506824, - "normalized_score": 40.219506465068235 - }, - "bbh": { - "name": "BBH", - "value": 0.5181980783946081, - "normalized_score": 31.290072076092343 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.39728125000000003, - "normalized_score": 7.693489583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3705119680851064, - "normalized_score": 30.056885342789595 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "DoppelReflEx/L3-8B-WolfCore (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6531067502572314 - } - }, - { - "id": "DoppelReflEx/MN-12B-FoxFrame-test_bfloat16_b95a2da79360a9da785112ead60214f7b7605e25_False", - "model": { - "name": "DoppelReflEx/MN-12B-FoxFrame-test", - "sha": "b95a2da79360a9da785112ead60214f7b7605e25", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.221061750775235, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42220308780701876, - "normalized_score": 42.22030878070187 - }, - "bbh": { - "name": "BBH", - "value": 0.5456376527271466, - "normalized_score": 34.55981394889724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 13.042447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3503158244680851, - "normalized_score": 27.812869385342786 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.5525658386176966 - } - }, - { - "id": "DoppelReflEx/MN-12B-FoxFrame2-test_bfloat16_322627ea048553a7c30c7351dfe4bff000d979eb_False", - "model": { - "name": "DoppelReflEx/MN-12B-FoxFrame2-test", - "sha": "322627ea048553a7c30c7351dfe4bff000d979eb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.639728527998752, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43189514931492884, - "normalized_score": 43.18951493149288 - }, - "bbh": { - "name": "BBH", - "value": 0.5484795753806021, - "normalized_score": 34.99669953906846 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1404833836858006, - "normalized_score": 14.04833836858006 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.4251875, - "normalized_score": 12.448437499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3568816489361702, - "normalized_score": 28.54240543735224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-FoxFrame2-test (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 0.7505194111203773 - } - }, - { - "id": "DoppelReflEx/MN-12B-FoxFrame3-test_bfloat16_a300b18573c9bcb4702d84e686fa826e7b695686_False", - "model": { - "name": "DoppelReflEx/MN-12B-FoxFrame3-test", - "sha": "a300b18573c9bcb4702d84e686fa826e7b695686", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.94718793396009, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43231957871780213, - "normalized_score": 43.23195787178021 - }, - "bbh": { - "name": "BBH", - "value": 0.5394764281718397, - "normalized_score": 34.041185744792614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.45976041666666667, - "normalized_score": 18.270052083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35289228723404253, - "normalized_score": 28.09914302600473 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-FoxFrame3-test (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 0.697196284743319 - } - }, - { - "id": "DoppelReflEx/MN-12B-Kakigori_bfloat16_43cdb3d3df47f5d4ed8386f411859b9d72ea9017_False", - "model": { - "name": "DoppelReflEx/MN-12B-Kakigori", - "sha": "43cdb3d3df47f5d4ed8386f411859b9d72ea9017", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.69773317205639, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.359329911302012, - "normalized_score": 35.932991130201195 - }, - "bbh": { - "name": "BBH", - "value": 0.5415529337961275, - "normalized_score": 34.3313471973429 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.40521875, - "normalized_score": 9.352343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3581283244680851, - "normalized_score": 28.680924940898343 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Kakigori (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.5956599074145044 - } - }, - { - "id": "DoppelReflEx/MN-12B-LilithFrame_float16_e3e8cce8267613d5c2ff68884aaeac8ab9b39e93_False", - "model": { - "name": "DoppelReflEx/MN-12B-LilithFrame", - "sha": "e3e8cce8267613d5c2ff68884aaeac8ab9b39e93", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.321059817195124, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4509545782966972, - "normalized_score": 45.095457829669726 - }, - "bbh": { - "name": "BBH", - "value": 0.4944264226434414, - "normalized_score": 27.492064455270775 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.3895625, - "normalized_score": 9.428645833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3256316489361702, - "normalized_score": 25.070183215130022 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.8565068435824486 - } - }, - { - "id": "DoppelReflEx/MN-12B-LilithFrame_bfloat16_e3e8cce8267613d5c2ff68884aaeac8ab9b39e93_False", - "model": { - "name": "DoppelReflEx/MN-12B-LilithFrame", - "sha": "e3e8cce8267613d5c2ff68884aaeac8ab9b39e93", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.022540131471107, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43604192431636946, - "normalized_score": 43.604192431636946 - }, - "bbh": { - "name": "BBH", - "value": 0.4956125598349656, - "normalized_score": 27.653497805773082 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.3842604166666666, - "normalized_score": 8.73255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32372007978723405, - "normalized_score": 24.857786643026003 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.928004753006219 - } - }, - { - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2_bfloat16_75316e8ed913cf62482f36713a007d471813bb0e_False", - "model": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", - "sha": "75316e8ed913cf62482f36713a007d471813bb0e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.002090295792474, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4299469851106176, - "normalized_score": 42.99469851106176 - }, - "bbh": { - "name": "BBH", - "value": 0.4982672766561394, - "normalized_score": 28.11118297022868 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.3804479166666666, - "normalized_score": 8.822656249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32762632978723405, - "normalized_score": 25.29181442080378 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.839135306107101 - } - }, - { - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3_bfloat16_e33ca2d80584a934a6c2ed1a9ba788b8998d0d15_False", - "model": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", - "sha": "e33ca2d80584a934a6c2ed1a9ba788b8998d0d15", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.13928263244864, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4127858526487498, - "normalized_score": 41.27858526487498 - }, - "bbh": { - "name": "BBH", - "value": 0.5468080647121653, - "normalized_score": 34.998286324114495 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.4038541666666667, - "normalized_score": 9.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3603723404255319, - "normalized_score": 28.930260047281326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 2.362722818955918 - } - }, - { - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4_bfloat16_242fffeb766e1de3e7040cb7a981fc9fb37ada3c_False", - "model": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", - "sha": "242fffeb766e1de3e7040cb7a981fc9fb37ada3c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.52862068050267, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3981480250180632, - "normalized_score": 39.81480250180631 - }, - "bbh": { - "name": "BBH", - "value": 0.5534370722864824, - "normalized_score": 35.77765007970374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.43706249999999996, - "normalized_score": 14.966145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3648603723404255, - "normalized_score": 29.428930260047277 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.6437133049727632 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake_bfloat16_c1aee5ad2926129a5299e264a33c3890eb83cb8f_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", - "sha": "c1aee5ad2926129a5299e264a33c3890eb83cb8f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.0150134402288, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47800724300411795, - "normalized_score": 47.8007243004118 - }, - "bbh": { - "name": "BBH", - "value": 0.5480509710089697, - "normalized_score": 35.390600609410264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.4305833333333333, - "normalized_score": 13.589583333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3651097074468085, - "normalized_score": 29.4566341607565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Mimicore-GreenSnake (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.6886018245229184 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne_bfloat16_5cea74ebd1b0a4b3043e2789e21aa68706a9d817_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-Nocturne", - "sha": "5cea74ebd1b0a4b3043e2789e21aa68706a9d817", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.06630184974314, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3956502081144696, - "normalized_score": 39.56502081144696 - }, - "bbh": { - "name": "BBH", - "value": 0.5703329773483826, - "normalized_score": 38.39866772323549 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.45690625, - "normalized_score": 17.31328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36336436170212766, - "normalized_score": 29.262706855791958 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Mimicore-Nocturne (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 0.8795798612287349 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi_bfloat16_59515c9a5224bb45a1d2a7ea141e37a5ab9a9021_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi", - "sha": "59515c9a5224bb45a1d2a7ea141e37a5ab9a9021", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.652222849504522, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4620451513096362, - "normalized_score": 46.20451513096362 - }, - "bbh": { - "name": "BBH", - "value": 0.54977394640115, - "normalized_score": 35.28322975535459 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13595166163141995, - "normalized_score": 13.595166163141995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.45458333333333334, - "normalized_score": 17.256249999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34466422872340424, - "normalized_score": 27.184914302600472 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Mimicore-Orochi (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.5209626837047343 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment_bfloat16_b0140973cf249ecb2ba399f1174f8229c91dc363_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", - "sha": "b0140973cf249ecb2ba399f1174f8229c91dc363", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.804010674590824, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2842413684579139, - "normalized_score": 28.424136845791388 - }, - "bbh": { - "name": "BBH", - "value": 0.5322525988273211, - "normalized_score": 32.77471060618407 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.45737500000000003, - "normalized_score": 18.205208333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3423371010638298, - "normalized_score": 26.926344562647753 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.1022812062861855 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment_bfloat16_d1f9bd2cd64564217f59802648a941a57b2b9733_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", - "sha": "d1f9bd2cd64564217f59802648a941a57b2b9733", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.641023288108883, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4101628124487471, - "normalized_score": 41.016281244874705 - }, - "bbh": { - "name": "BBH", - "value": 0.5437817873983797, - "normalized_score": 34.569479663100296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.44379166666666664, - "normalized_score": 15.773958333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.339594414893617, - "normalized_score": 26.621601654846334 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.334928752887779 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment_bfloat16_41bc20297c95adc8bc1d2e993110f671907f0c32_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", - "sha": "41bc20297c95adc8bc1d2e993110f671907f0c32", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.57577520136853, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4320702402957486, - "normalized_score": 43.20702402957486 - }, - "bbh": { - "name": "BBH", - "value": 0.5462502212045214, - "normalized_score": 35.29906847464648 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4449375, - "normalized_score": 15.483854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3519780585106383, - "normalized_score": 27.997562056737586 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.8864542031174691 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake_bfloat16_ca84b8ab989a61658fc17e270b7344ed3885071f_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", - "sha": "ca84b8ab989a61658fc17e270b7344ed3885071f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.058560008551854, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44376033369238066, - "normalized_score": 44.376033369238066 - }, - "bbh": { - "name": "BBH", - "value": 0.5604605871844869, - "normalized_score": 36.89971047715762 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.456875, - "normalized_score": 17.34270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3657746010638298, - "normalized_score": 29.53051122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.598793921384112 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1_bfloat16_f1fb881039e54ac80d84298b9054773a2bd72d21_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", - "sha": "f1fb881039e54ac80d84298b9054773a2bd72d21", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.86667946331972, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39090391272933595, - "normalized_score": 39.090391272933594 - }, - "bbh": { - "name": "BBH", - "value": 0.48656395204478037, - "normalized_score": 27.07796440244996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3789583333333333, - "normalized_score": 8.303124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31141954787234044, - "normalized_score": 23.49106087470449 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.8769184354473236 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2_bfloat16__False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.422542529894162, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31239333856389934, - "normalized_score": 31.239333856389933 - }, - "bbh": { - "name": "BBH", - "value": 0.5126398500939828, - "normalized_score": 30.665719593512282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.39746875, - "normalized_score": 11.51692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33136635638297873, - "normalized_score": 25.70737293144208 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 2.634526312899575 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3_bfloat16_12985da577e2bdcba11ad75b4aad6cf07cb67b51_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", - "sha": "12985da577e2bdcba11ad75b4aad6cf07cb67b51", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.601520651982586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4302218114602588, - "normalized_score": 43.02218114602588 - }, - "bbh": { - "name": "BBH", - "value": 0.4811798810475259, - "normalized_score": 26.321395450260827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.3684166666666666, - "normalized_score": 7.918749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31981382978723405, - "normalized_score": 24.423758865248228 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.8143176591923247 - } - }, - { - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4_bfloat16_b7ec319e84b66dba6c620b9b01dc579cad96eb8d_False", - "model": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", - "sha": "b7ec319e84b66dba6c620b9b01dc579cad96eb8d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.794341198490546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42405151664250856, - "normalized_score": 42.40515166425085 - }, - "bbh": { - "name": "BBH", - "value": 0.5184748714407336, - "normalized_score": 31.422947025186726 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.40019791666666665, - "normalized_score": 11.458072916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3341921542553192, - "normalized_score": 26.021350472813243 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 1.7271200547540042 - } - }, - { - "id": "DoppelReflEx/MN-12B-Unleashed-Twilight_bfloat16_47bb9e79f33f659c911843c874ac29653a8c4a7b_False", - "model": { - "name": "DoppelReflEx/MN-12B-Unleashed-Twilight", - "sha": "47bb9e79f33f659c911843c874ac29653a8c4a7b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.56427237998545, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3505121965274361, - "normalized_score": 35.05121965274361 - }, - "bbh": { - "name": "BBH", - "value": 0.5520627163174447, - "normalized_score": 35.97610662338389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.4383958333333333, - "normalized_score": 14.499479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3677692819148936, - "normalized_score": 29.752142434988176 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-10", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-Unleashed-Twilight (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 0.8143144766567885 - } - }, - { - "id": "DoppelReflEx/MN-12B-WolFrame_bfloat16_44ef103ff2b5ba1bfa9e375357ea1c897cb33788_False", - "model": { - "name": "DoppelReflEx/MN-12B-WolFrame", - "sha": "44ef103ff2b5ba1bfa9e375357ea1c897cb33788", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.078719684057372, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4397387819873491, - "normalized_score": 43.97387819873491 - }, - "bbh": { - "name": "BBH", - "value": 0.511681287565329, - "normalized_score": 29.991929835267143 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.40146875, - "normalized_score": 10.716927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33934507978723405, - "normalized_score": 26.593897754137114 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "DoppelReflEx/MN-12B-WolFrame (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 1.6813747793154408 - } - }, - { - "id": "DoppelReflEx/MiniusLight-24B_bfloat16_3bb87fa4b45b5554a1bdd8554302ed1a22a3c3ef_False", - "model": { - "name": "DoppelReflEx/MiniusLight-24B", - "sha": "3bb87fa4b45b5554a1bdd8554302ed1a22a3c3ef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.210340083042343, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25766410900854175, - "normalized_score": 25.766410900854176 - }, - "bbh": { - "name": "BBH", - "value": 0.6256461050033514, - "normalized_score": 46.002968874248914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.43191666666666667, - "normalized_score": 12.989583333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5091422872340425, - "normalized_score": 45.46025413711584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "DoppelReflEx/MiniusLight-24B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 23.572, - "co2_cost": 1.4403396216495052 - } - }, - { - "id": "DoppelReflEx/MiniusLight-24B-test_bfloat16_b71988742288492a5728e795e2dc4a0114178835_False", - "model": { - "name": "DoppelReflEx/MiniusLight-24B-test", - "sha": "b71988742288492a5728e795e2dc4a0114178835", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.83721280671412, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.03936776641533354, - "normalized_score": 3.9367766415333536 - }, - "bbh": { - "name": "BBH", - "value": 0.6333927323374534, - "normalized_score": 46.95696642451458 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.40925000000000006, - "normalized_score": 9.32291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5182014627659575, - "normalized_score": 46.466829196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 0.5780434517444448 - } - }, - { - "id": "DoppelReflEx/MiniusLight-24B-v1b-test_bfloat16_d2ec8d77a022b2ad2e207ea882a595aad591de2b_False", - "model": { - "name": "DoppelReflEx/MiniusLight-24B-v1b-test", - "sha": "d2ec8d77a022b2ad2e207ea882a595aad591de2b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 32.3748943419153, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37911408396388246, - "normalized_score": 37.911408396388246 - }, - "bbh": { - "name": "BBH", - "value": 0.6617145681113757, - "normalized_score": 50.63814760323434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2394259818731118, - "normalized_score": 23.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.4557291666666667, - "normalized_score": 16.032812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5364860372340425, - "normalized_score": 48.49844858156028 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.37466461037625 - } - }, - { - "id": "DoppelReflEx/MiniusLight-24B-v1c-test_bfloat16_07fbae508e6e796a33439d40a543f8bd60c6c047_False", - "model": { - "name": "DoppelReflEx/MiniusLight-24B-v1c-test", - "sha": "07fbae508e6e796a33439d40a543f8bd60c6c047", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 34.40831757695809, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37858881102142317, - "normalized_score": 37.85888110214232 - }, - "bbh": { - "name": "BBH", - "value": 0.6752681657268389, - "normalized_score": 52.84065809537824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29682779456193353, - "normalized_score": 29.68277945619335 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.46341666666666664, - "normalized_score": 16.860416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5487034574468085, - "normalized_score": 49.85593971631205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "DoppelReflEx/MiniusLight-24B-v1c-test (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 4, - "params_billions": 23.572, - "co2_cost": 4.322544674351261 - } - }, - { - "id": "DoppelReflEx/MiniusLight-24B-v1d-test_bfloat16_26bb7f9b94257b717afe96e6d19f05141ebe89ac_False", - "model": { - "name": "DoppelReflEx/MiniusLight-24B-v1d-test", - "sha": "26bb7f9b94257b717afe96e6d19f05141ebe89ac", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 34.681949426054764, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40324339419407174, - "normalized_score": 40.324339419407174 - }, - "bbh": { - "name": "BBH", - "value": 0.6712025325276962, - "normalized_score": 52.35844104160633 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2945619335347432, - "normalized_score": 29.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.46208333333333335, - "normalized_score": 16.727083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5488696808510638, - "normalized_score": 49.87440898345154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "DoppelReflEx/MiniusLight-24B-v1d-test (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 23.572, - "co2_cost": 1.4973969873377053 - } - }, - { - "id": "DreadPoor/Again-8B-Model_Stock_float16_10052b086c6896ccd9d26522c45d348f1607c33c_True", - "model": { - "name": "DreadPoor/Again-8B-Model_Stock", - "sha": "10052b086c6896ccd9d26522c45d348f1607c33c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.002387584653476, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6724213974476612, - "normalized_score": 67.2421397447661 - }, - "bbh": { - "name": "BBH", - "value": 0.5309801059970912, - "normalized_score": 33.259461226875196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.39867708333333335, - "normalized_score": 8.701302083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.351811835106383, - "normalized_score": 27.979092789598102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.3606541886058625 - } - }, - { - "id": "DreadPoor/Alita99-8B-LINEAR_bfloat16_cfffa050f433660fc6159a82ce09fc2841fa0b6c_True", - "model": { - "name": "DreadPoor/Alita99-8B-LINEAR", - "sha": "cfffa050f433660fc6159a82ce09fc2841fa0b6c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.392264436250034, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7190077882241341, - "normalized_score": 71.90077882241341 - }, - "bbh": { - "name": "BBH", - "value": 0.5441767095577089, - "normalized_score": 35.00891825379863 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1646525679758308, - "normalized_score": 16.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.42664583333333334, - "normalized_score": 12.930729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38090093085106386, - "normalized_score": 31.21121453900709 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "DreadPoor/Alita99-8B-LINEAR (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.317244660000561 - } - }, - { - "id": "DreadPoor/AnotherTest_bfloat16_40182ce563447e082186414c62e15af7fc33a431_True", - "model": { - "name": "DreadPoor/AnotherTest", - "sha": "40182ce563447e082186414c62e15af7fc33a431", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.50517112101721, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47006387496287627, - "normalized_score": 47.00638749628763 - }, - "bbh": { - "name": "BBH", - "value": 0.46834113564549334, - "normalized_score": 25.197137506034824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.42128125, - "normalized_score": 11.42682291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2874833776595745, - "normalized_score": 20.831486406619387 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5180190760115888 - } - }, - { - "id": "DreadPoor/Aspire-8B-model_stock_bfloat16_5c23cb2aff877d0b7bdcfa4de43d1bc8a1852de0_True", - "model": { - "name": "DreadPoor/Aspire-8B-model_stock", - "sha": "5c23cb2aff877d0b7bdcfa4de43d1bc8a1852de0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.611281691142512, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7140620221013578, - "normalized_score": 71.40620221013577 - }, - "bbh": { - "name": "BBH", - "value": 0.5278251846388996, - "normalized_score": 32.534270073092834 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14954682779456194, - "normalized_score": 14.954682779456194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.42124999999999996, - "normalized_score": 13.456249999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37632978723404253, - "normalized_score": 30.703309692671393 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "DreadPoor/Aspire-8B-model_stock (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.6862557790299026 - } - }, - { - "id": "DreadPoor/Aspire_1.3-8B_model-stock_bfloat16_d36f5540e8c5654a9fdd8ece9ba8e88af26e5c40_True", - "model": { - "name": "DreadPoor/Aspire_1.3-8B_model-stock", - "sha": "d36f5540e8c5654a9fdd8ece9ba8e88af26e5c40", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.38880221155723, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7061685217445268, - "normalized_score": 70.61685217445267 - }, - "bbh": { - "name": "BBH", - "value": 0.5301644606574212, - "normalized_score": 32.66185070730422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1691842900302115, - "normalized_score": 16.91842900302115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4104583333333333, - "normalized_score": 12.240624999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37159242021276595, - "normalized_score": 30.17693557919622 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.431562779750175 - } - }, - { - "id": "DreadPoor/Aspire_V2-8B-Model_Stock_bfloat16_e482d8852ec50b05420b865d27b7ed4682ab5ac8_True", - "model": { - "name": "DreadPoor/Aspire_V2-8B-Model_Stock", - "sha": "e482d8852ec50b05420b865d27b7ed4682ab5ac8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.023157905941588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7371430027881576, - "normalized_score": 73.71430027881576 - }, - "bbh": { - "name": "BBH", - "value": 0.5329650089428358, - "normalized_score": 33.327406017534244 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.38937499999999997, - "normalized_score": 10.138541666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3696808510638298, - "normalized_score": 29.964539007092196 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3396128096032138 - } - }, - { - "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock_bfloat16_c8b0acb6e3b5120cbdad9e6b2acf03ae9e9d1a0f_True", - "model": { - "name": "DreadPoor/Aspire_V2.1-8B-Model_Stock", - "sha": "c8b0acb6e3b5120cbdad9e6b2acf03ae9e9d1a0f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.738387280035045, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7237540836092679, - "normalized_score": 72.37540836092678 - }, - "bbh": { - "name": "BBH", - "value": 0.5236395810818485, - "normalized_score": 32.18794456865431 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17673716012084592, - "normalized_score": 17.673716012084594 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.41359375, - "normalized_score": 11.13255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3800698138297872, - "normalized_score": 31.11886820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.350700850251785 - } - }, - { - "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock_bfloat16_70e838e725b5f3889228103c1ee21f6eb7b0919c_True", - "model": { - "name": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", - "sha": "70e838e725b5f3889228103c1ee21f6eb7b0919c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.059893218432236, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7381170848903134, - "normalized_score": 73.81170848903133 - }, - "bbh": { - "name": "BBH", - "value": 0.5265819478728287, - "normalized_score": 32.44516856104618 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.39749999999999996, - "normalized_score": 10.554166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3726728723404255, - "normalized_score": 30.296985815602827 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3203141655524284 - } - }, - { - "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock_bfloat16_7402061b436bbebb8b74b9f216cd8c788937a8f1_True", - "model": { - "name": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", - "sha": "7402061b436bbebb8b74b9f216cd8c788937a8f1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.059893218432236, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7381170848903134, - "normalized_score": 73.81170848903133 - }, - "bbh": { - "name": "BBH", - "value": 0.5265819478728287, - "normalized_score": 32.44516856104618 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.39749999999999996, - "normalized_score": 10.554166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3726728723404255, - "normalized_score": 30.296985815602827 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.290022700649708 - } - }, - { - "id": "DreadPoor/Aspire_V3-8B-Model_Stock_bfloat16_51a86cfb6f0067d113d31473399e34f13bb83d75_True", - "model": { - "name": "DreadPoor/Aspire_V3-8B-Model_Stock", - "sha": "51a86cfb6f0067d113d31473399e34f13bb83d75", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.135023019694938, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5118795905973927, - "normalized_score": 51.18795905973927 - }, - "bbh": { - "name": "BBH", - "value": 0.5267958758971987, - "normalized_score": 32.68368177507647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.40149999999999997, - "normalized_score": 11.620833333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36419547872340424, - "normalized_score": 29.355053191489354 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3138646967471743 - } - }, - { - "id": "DreadPoor/Aspire_V4-8B-Model_Stock_bfloat16_6a16bacdd20cb1a75a7b31376b46f7be73f8b02f_True", - "model": { - "name": "DreadPoor/Aspire_V4-8B-Model_Stock", - "sha": "6a16bacdd20cb1a75a7b31376b46f7be73f8b02f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.36906658031589, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.769416259967996, - "normalized_score": 76.9416259967996 - }, - "bbh": { - "name": "BBH", - "value": 0.5314037161536506, - "normalized_score": 33.20598944793845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.3867395833333333, - "normalized_score": 9.442447916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.370844414893617, - "normalized_score": 30.093823877068555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3502461987292993 - } - }, - { - "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock_bfloat16_6e6349a32f2e33765d7d92a591e2fa2523dec4ed_True", - "model": { - "name": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", - "sha": "6e6349a32f2e33765d7d92a591e2fa2523dec4ed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.003526021452984, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7365933500888753, - "normalized_score": 73.65933500888754 - }, - "bbh": { - "name": "BBH", - "value": 0.5268232518944024, - "normalized_score": 32.63543581177509 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18126888217522658, - "normalized_score": 18.12688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.3920416666666666, - "normalized_score": 10.405208333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3681848404255319, - "normalized_score": 29.79831560283688 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3543781518936209 - } - }, - { - "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock_bfloat16_5971697affcee3a1caa06553b31a8ba0e9106bb7_True", - "model": { - "name": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", - "sha": "5971697affcee3a1caa06553b31a8ba0e9106bb7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.348131644913156, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7174341857382855, - "normalized_score": 71.74341857382856 - }, - "bbh": { - "name": "BBH", - "value": 0.546535755155883, - "normalized_score": 35.4412470409656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1646525679758308, - "normalized_score": 16.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.41994791666666664, - "normalized_score": 12.226822916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3843916223404255, - "normalized_score": 31.599069148936167 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2785390621096806 - } - }, - { - "id": "DreadPoor/Aurora_faustus-8B-LINEAR_bfloat16_76acf1ac703eb827d2541d07a8d4a7cba4b731d4_True", - "model": { - "name": "DreadPoor/Aurora_faustus-8B-LINEAR", - "sha": "76acf1ac703eb827d2541d07a8d4a7cba4b731d4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.619908170064097, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7281003293483512, - "normalized_score": 72.81003293483514 - }, - "bbh": { - "name": "BBH", - "value": 0.5515538279425277, - "normalized_score": 36.26348248348271 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4145833333333333, - "normalized_score": 12.389583333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3842253989361702, - "normalized_score": 31.580599881796683 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "DreadPoor/Aurora_faustus-8B-LINEAR (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.5347539539225346 - } - }, - { - "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_bfloat16_97746081f7c681dcf7fad10c57de9a341aa10db1_True", - "model": { - "name": "DreadPoor/Aurora_faustus-8B-LORABLATED", - "sha": "97746081f7c681dcf7fad10c57de9a341aa10db1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.127203730886404, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527050448365891, - "normalized_score": 75.27050448365891 - }, - "bbh": { - "name": "BBH", - "value": 0.539159616655651, - "normalized_score": 34.19993531370101 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1487915407854985, - "normalized_score": 14.879154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.42385416666666664, - "normalized_score": 13.781770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36727061170212766, - "normalized_score": 29.696734633569736 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6017002655807937 - } - }, - { - "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT_float16_3ca36587d26bfd936aa1358adc1eabf377aa1e98_True", - "model": { - "name": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", - "sha": "3ca36587d26bfd936aa1358adc1eabf377aa1e98", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.00968134721205, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7377923908562614, - "normalized_score": 73.77923908562614 - }, - "bbh": { - "name": "BBH", - "value": 0.5387670721191214, - "normalized_score": 34.21152011815682 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15861027190332327, - "normalized_score": 15.861027190332328 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4225208333333333, - "normalized_score": 13.781770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36943151595744683, - "normalized_score": 29.93683510638298 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5902321956738188 - } - }, - { - "id": "DreadPoor/Autumn_Dawn-8B-LINEAR_bfloat16_8ea79a3552f0fbfc795f124aeeeb56a9f176e513_True", - "model": { - "name": "DreadPoor/Autumn_Dawn-8B-LINEAR", - "sha": "8ea79a3552f0fbfc795f124aeeeb56a9f176e513", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.605469000443268, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7292993701157373, - "normalized_score": 72.92993701157374 - }, - "bbh": { - "name": "BBH", - "value": 0.5459436958014627, - "normalized_score": 35.845555124121006 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4185520833333333, - "normalized_score": 11.485677083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39677526595744683, - "normalized_score": 32.97502955082743 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.402496876361008 - } - }, - { - "id": "DreadPoor/BaeZel-8B-LINEAR_bfloat16_1deac3287de191794c50543d69d523f43654a803_True", - "model": { - "name": "DreadPoor/BaeZel-8B-LINEAR", - "sha": "1deac3287de191794c50543d69d523f43654a803", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.34681191186813, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7377923908562614, - "normalized_score": 73.77923908562614 - }, - "bbh": { - "name": "BBH", - "value": 0.5463800554321383, - "normalized_score": 35.53537606986396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18126888217522658, - "normalized_score": 18.12688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4227083333333333, - "normalized_score": 13.338541666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3861369680851064, - "normalized_score": 31.792996453900706 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "DreadPoor/BaeZel-8B-LINEAR (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3301376402139433 - } - }, - { - "id": "DreadPoor/BaeZel-8B-Model_Stock_bfloat16_cb6aa300413e77d5e3ce97373ba3df0a0f1f374c_True", - "model": { - "name": "DreadPoor/BaeZel-8B-Model_Stock", - "sha": "cb6aa300413e77d5e3ce97373ba3df0a0f1f374c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.021263046626203, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7713145564878965, - "normalized_score": 77.13145564878965 - }, - "bbh": { - "name": "BBH", - "value": 0.5407680550216925, - "normalized_score": 34.643632940053415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.41991666666666666, - "normalized_score": 11.456249999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38804853723404253, - "normalized_score": 32.00539302600473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3246060112910538 - } - }, - { - "id": "DreadPoor/BaeZel_V2-8B-Model_Stock_bfloat16_aff8ecf21f3e07db47fb2eb1054a73453c898fe0_True", - "model": { - "name": "DreadPoor/BaeZel_V2-8B-Model_Stock", - "sha": "aff8ecf21f3e07db47fb2eb1054a73453c898fe0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.020772741913536, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7676675665013276, - "normalized_score": 76.76675665013276 - }, - "bbh": { - "name": "BBH", - "value": 0.5373871612758611, - "normalized_score": 34.4487493680143 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4185833333333333, - "normalized_score": 11.58958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3946974734042553, - "normalized_score": 32.74416371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4286447016591841 - } - }, - { - "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock_bfloat16_b410e364df1635796403a7e3dbd4c9b11b60f1be_True", - "model": { - "name": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", - "sha": "b410e364df1635796403a7e3dbd4c9b11b60f1be", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.020772741913536, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7676675665013276, - "normalized_score": 76.76675665013276 - }, - "bbh": { - "name": "BBH", - "value": 0.5373871612758611, - "normalized_score": 34.4487493680143 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4185833333333333, - "normalized_score": 11.58958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3946974734042553, - "normalized_score": 32.74416371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4263812233952076 - } - }, - { - "id": "DreadPoor/BaeZel_V3-8B-Model_Stock_bfloat16_65014ed04553da3c0e126e7b1d29705120cdd3e6_True", - "model": { - "name": "DreadPoor/BaeZel_V3-8B-Model_Stock", - "sha": "65014ed04553da3c0e126e7b1d29705120cdd3e6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.719194809944852, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7831797408653485, - "normalized_score": 78.31797408653483 - }, - "bbh": { - "name": "BBH", - "value": 0.539231076759135, - "normalized_score": 34.408658796480346 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18957703927492447, - "normalized_score": 18.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.41743749999999996, - "normalized_score": 11.146354166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3887965425531915, - "normalized_score": 32.08850472813239 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3245907715556766 - } - }, - { - "id": "DreadPoor/Blunt_Edge-8B-SLERP_bfloat16_8da77a17ff176577b6e56d6ae0b2ec4b4afb60e1_True", - "model": { - "name": "DreadPoor/Blunt_Edge-8B-SLERP", - "sha": "8da77a17ff176577b6e56d6ae0b2ec4b4afb60e1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.677478634434237, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7496575752337131, - "normalized_score": 74.96575752337131 - }, - "bbh": { - "name": "BBH", - "value": 0.5389470863694941, - "normalized_score": 34.14138253488904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.417375, - "normalized_score": 11.471874999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37666223404255317, - "normalized_score": 30.740248226950357 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.295227329313974 - } - }, - { - "id": "DreadPoor/BulkUp_bfloat16_e92f4dcb593b966703b0e3dd97967cade69207af_True", - "model": { - "name": "DreadPoor/BulkUp", - "sha": "e92f4dcb593b966703b0e3dd97967cade69207af", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.7903724081838015, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.177804891022487, - "normalized_score": 17.7804891022487 - }, - "bbh": { - "name": "BBH", - "value": 0.28698602947692575, - "normalized_score": 1.6612873090290492 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3446666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11095412234042554, - "normalized_score": 1.2171247044917257 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5267189091550464 - } - }, - { - "id": "DreadPoor/Cadence-8B-LINEAR_bfloat16_b32726956f19bf06168cc49545b8e762dcea293c_True", - "model": { - "name": "DreadPoor/Cadence-8B-LINEAR", - "sha": "b32726956f19bf06168cc49545b8e762dcea293c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.577499098320445, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7682172192006099, - "normalized_score": 76.82172192006098 - }, - "bbh": { - "name": "BBH", - "value": 0.5433358555450108, - "normalized_score": 34.914380348862515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16767371601208458, - "normalized_score": 16.76737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.41734374999999996, - "normalized_score": 10.767968750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3803191489361702, - "normalized_score": 31.146572104018905 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6703182483240965 - } - }, - { - "id": "DreadPoor/Caelid-8B-Model_Stock_bfloat16_bce8eb2eb9071c124e1d6e0bb004f32f81e586d0_True", - "model": { - "name": "DreadPoor/Caelid-8B-Model_Stock", - "sha": "bce8eb2eb9071c124e1d6e0bb004f32f81e586d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.396110525383502, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7247281657114235, - "normalized_score": 72.47281657114235 - }, - "bbh": { - "name": "BBH", - "value": 0.5459605196913864, - "normalized_score": 35.23706805025389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1510574018126888, - "normalized_score": 15.105740181268882 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4001041666666667, - "normalized_score": 8.213020833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3816489361702128, - "normalized_score": 31.29432624113476 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2312977459174455 - } - }, - { - "id": "DreadPoor/Casuar-9B-Model_Stock_bfloat16_d69d1242cef7da8da9be79796649529a4e42adf0_True", - "model": { - "name": "DreadPoor/Casuar-9B-Model_Stock", - "sha": "d69d1242cef7da8da9be79796649529a4e42adf0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.631173922201846, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7764852812759035, - "normalized_score": 77.64852812759035 - }, - "bbh": { - "name": "BBH", - "value": 0.6106681877306871, - "normalized_score": 43.92726768312374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21299093655589124, - "normalized_score": 21.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.41654166666666664, - "normalized_score": 11.201041666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4156416223404255, - "normalized_score": 35.07129137115839 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "DreadPoor/Casuar-9B-Model_Stock (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 3.3008564505336286 - } - }, - { - "id": "DreadPoor/Condensed_Milk-8B-Model_Stock_bfloat16_6e5b73099b9d5a794c9c744c4c5c158b1feb8916_True", - "model": { - "name": "DreadPoor/Condensed_Milk-8B-Model_Stock", - "sha": "6e5b73099b9d5a794c9c744c4c5c158b1feb8916", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.083171334394155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7536292592543341, - "normalized_score": 75.3629259254334 - }, - "bbh": { - "name": "BBH", - "value": 0.5434864122121906, - "normalized_score": 35.12062025244797 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17447129909365558, - "normalized_score": 17.447129909365557 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.41601041666666666, - "normalized_score": 11.10130208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38763297872340424, - "normalized_score": 31.959219858156025 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3093283812844896 - } - }, - { - "id": "DreadPoor/CoolerCoder-8B-LINEAR_bfloat16_db14b0fa821b0b6b07802111fd19ba722344a32b_True", - "model": { - "name": "DreadPoor/CoolerCoder-8B-LINEAR", - "sha": "db14b0fa821b0b6b07802111fd19ba722344a32b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.437537566560177, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4519286603988528, - "normalized_score": 45.19286603988529 - }, - "bbh": { - "name": "BBH", - "value": 0.4761504835496542, - "normalized_score": 26.365382993393997 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.3963541666666666, - "normalized_score": 7.777604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31590757978723405, - "normalized_score": 23.98973108747045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.165600817763367 - } - }, - { - "id": "DreadPoor/Damasteel-8B-LINEAR_bfloat16_cfc389c15e614b14f1d8d16740dcc183047b435a_True", - "model": { - "name": "DreadPoor/Damasteel-8B-LINEAR", - "sha": "cfc389c15e614b14f1d8d16740dcc183047b435a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.97747867481607, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7384417789243651, - "normalized_score": 73.84417789243652 - }, - "bbh": { - "name": "BBH", - "value": 0.5388142176959776, - "normalized_score": 34.10613777622061 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16691842900302115, - "normalized_score": 16.691842900302113 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.42124999999999996, - "normalized_score": 11.856249999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3779089095744681, - "normalized_score": 30.87876773049646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3493181419987625 - } - }, - { - "id": "DreadPoor/Dearly_Beloved-8B-TIES_bfloat16_af6515ee730d6aa17d77687fe2c06c57fa9533fb_True", - "model": { - "name": "DreadPoor/Dearly_Beloved-8B-TIES", - "sha": "af6515ee730d6aa17d77687fe2c06c57fa9533fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.286760418026628, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8266687943545348, - "normalized_score": 82.66687943545348 - }, - "bbh": { - "name": "BBH", - "value": 0.4049833102731906, - "normalized_score": 16.671812993247975 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.41746875, - "normalized_score": 10.450260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2826628989361702, - "normalized_score": 20.295877659574465 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4304810447720981 - } - }, - { - "id": "DreadPoor/Decayed-8B-LINEAR_bfloat16_e2aa8f1455c3466e8a22147691bdf62baf14c146_True", - "model": { - "name": "DreadPoor/Decayed-8B-LINEAR", - "sha": "e2aa8f1455c3466e8a22147691bdf62baf14c146", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.71154213470467, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7676176988169169, - "normalized_score": 76.76176988169169 - }, - "bbh": { - "name": "BBH", - "value": 0.5417014088773181, - "normalized_score": 34.490500759556504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1714501510574018, - "normalized_score": 17.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4186145833333333, - "normalized_score": 11.226822916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37632978723404253, - "normalized_score": 30.703309692671393 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6464955314560469 - } - }, - { - "id": "DreadPoor/Derivative-8B-Model_Stock_bfloat16_23bc90b2cba13b4b703412bd0818ff98a7f70e23_True", - "model": { - "name": "DreadPoor/Derivative-8B-Model_Stock", - "sha": "23bc90b2cba13b4b703412bd0818ff98a7f70e23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.10114939087056, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7667433520835827, - "normalized_score": 76.67433520835827 - }, - "bbh": { - "name": "BBH", - "value": 0.5395493987763994, - "normalized_score": 34.248821021284016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17900302114803626, - "normalized_score": 17.900302114803626 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.42004166666666665, - "normalized_score": 11.605208333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3810671542553192, - "normalized_score": 31.229683806146568 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "DreadPoor/Derivative-8B-Model_Stock (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.279546685554034 - } - }, - { - "id": "DreadPoor/Derivative_V2-8B-Model_Stock_bfloat16_12de06834f5d454bfded8b1fb07ea3a4144772da_True", - "model": { - "name": "DreadPoor/Derivative_V2-8B-Model_Stock", - "sha": "12de06834f5d454bfded8b1fb07ea3a4144772da", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.613500365543217, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7536791269387447, - "normalized_score": 75.36791269387447 - }, - "bbh": { - "name": "BBH", - "value": 0.5392643954415269, - "normalized_score": 34.49028104908971 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41229166666666667, - "normalized_score": 10.503124999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38563829787234044, - "normalized_score": 31.73758865248227 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2734998344522872 - } - }, - { - "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock_bfloat16_93b9c2a1a08977d4514f51b1dcaffa8ad4b559e2_True", - "model": { - "name": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", - "sha": "93b9c2a1a08977d4514f51b1dcaffa8ad4b559e2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.17823315705989, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7719639445560003, - "normalized_score": 77.19639445560003 - }, - "bbh": { - "name": "BBH", - "value": 0.5365351570462934, - "normalized_score": 34.09335381950132 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18806646525679757, - "normalized_score": 18.806646525679756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.41346875, - "normalized_score": 10.78359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38821476063829785, - "normalized_score": 32.023862293144205 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3019899546090274 - } - }, - { - "id": "DreadPoor/Derivative_V3-8B-Model_Stock_bfloat16_a1667b6f265aa779feb6a54611fa9af8cdb8f498_True", - "model": { - "name": "DreadPoor/Derivative_V3-8B-Model_Stock", - "sha": "a1667b6f265aa779feb6a54611fa9af8cdb8f498", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.930558897103452, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6963767248677952, - "normalized_score": 69.63767248677954 - }, - "bbh": { - "name": "BBH", - "value": 0.524319745545524, - "normalized_score": 32.42070195179438 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4149895833333333, - "normalized_score": 11.140364583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35023271276595747, - "normalized_score": 27.803634751773053 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3470456912363171 - } - }, - { - "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR_bfloat16_610d22c28cf1b5050c93e746fc3671e9a9c632aa_True", - "model": { - "name": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", - "sha": "610d22c28cf1b5050c93e746fc3671e9a9c632aa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.65682513548015, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7131378076836128, - "normalized_score": 71.31378076836128 - }, - "bbh": { - "name": "BBH", - "value": 0.5456414280881592, - "normalized_score": 35.31017141761393 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4145520833333333, - "normalized_score": 11.752343749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3813996010638298, - "normalized_score": 31.26662234042553 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2727870512490191 - } - }, - { - "id": "DreadPoor/Emu_Eggs-9B-Model_Stock_bfloat16_3fb1b2da72f3618f6943aedfd1600df27886792a_True", - "model": { - "name": "DreadPoor/Emu_Eggs-9B-Model_Stock", - "sha": "3fb1b2da72f3618f6943aedfd1600df27886792a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.68321529929168, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7606982805622415, - "normalized_score": 76.06982805622415 - }, - "bbh": { - "name": "BBH", - "value": 0.6051657213517168, - "normalized_score": 42.783674159620205 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.4070833333333333, - "normalized_score": 9.318750000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4227061170212766, - "normalized_score": 35.85623522458629 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 6.176699167352184 - } - }, - { - "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR_bfloat16_c674956327af664735cf39b20c7a8276dfa579f9_True", - "model": { - "name": "DreadPoor/Eunoia_Vespera-8B-LINEAR", - "sha": "c674956327af664735cf39b20c7a8276dfa579f9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.956332369127022, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7235291249440374, - "normalized_score": 72.35291249440374 - }, - "bbh": { - "name": "BBH", - "value": 0.5399310621081937, - "normalized_score": 34.21610348917685 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4184895833333333, - "normalized_score": 12.611197916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38389295212765956, - "normalized_score": 31.543661347517727 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.626520093949101 - } - }, - { - "id": "DreadPoor/Fu_sion_HA-8B-SLERP_bfloat16_09b7b157bdf83840d7a1dc828d1ab4285b9f809d_True", - "model": { - "name": "DreadPoor/Fu_sion_HA-8B-SLERP", - "sha": "09b7b157bdf83840d7a1dc828d1ab4285b9f809d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.959011604457412, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7609232392274721, - "normalized_score": 76.0923239227472 - }, - "bbh": { - "name": "BBH", - "value": 0.5372804197028272, - "normalized_score": 34.05290214273217 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17522658610271905, - "normalized_score": 17.522658610271904 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.41601041666666666, - "normalized_score": 10.967968749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38248005319148937, - "normalized_score": 31.386672576832154 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2951105435024746 - } - }, - { - "id": "DreadPoor/HOT_STINKING_GARBAGE_bfloat16_70e1c55971ea79ac0b5bc52938aa02f3bc5af5de_True", - "model": { - "name": "DreadPoor/HOT_STINKING_GARBAGE", - "sha": "70e1c55971ea79ac0b5bc52938aa02f3bc5af5de", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.584573242489594, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5754265349273262, - "normalized_score": 57.54265349273261 - }, - "bbh": { - "name": "BBH", - "value": 0.4884000866161456, - "normalized_score": 27.851418136508787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.42500000000000004, - "normalized_score": 11.625000000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30169547872340424, - "normalized_score": 22.410608747044915 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4737654914835887 - } - }, - { - "id": "DreadPoor/H_the_eighth-8B-LINEAR_bfloat16_a1f5676e4151796de4c670b73dfdcaef7d328389_True", - "model": { - "name": "DreadPoor/H_the_eighth-8B-LINEAR", - "sha": "a1f5676e4151796de4c670b73dfdcaef7d328389", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.18954140441448, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7469347996648892, - "normalized_score": 74.69347996648892 - }, - "bbh": { - "name": "BBH", - "value": 0.5383752114303682, - "normalized_score": 34.15424498998624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.41728125, - "normalized_score": 12.76015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3823969414893617, - "normalized_score": 31.37743794326241 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "DreadPoor/H_the_eighth-8B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6574557527465881 - } - }, - { - "id": "DreadPoor/Happy_New_Year-8B-Model_Stock_bfloat16_e02c7ec9245262f2f43541db4b835144d45851a3_True", - "model": { - "name": "DreadPoor/Happy_New_Year-8B-Model_Stock", - "sha": "e02c7ec9245262f2f43541db4b835144d45851a3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.722378273097977, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7615726272955757, - "normalized_score": 76.15726272955757 - }, - "bbh": { - "name": "BBH", - "value": 0.5367913866457493, - "normalized_score": 34.00006494069371 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4185520833333333, - "normalized_score": 11.752343749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3878823138297872, - "normalized_score": 31.98692375886525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.281487412969691 - } - }, - { - "id": "DreadPoor/Heart_Stolen-8B-Model_Stock_bfloat16_6d77987af7115c7455ddb072c48316815b018999_True", - "model": { - "name": "DreadPoor/Heart_Stolen-8B-Model_Stock", - "sha": "6d77987af7115c7455ddb072c48316815b018999", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.411035925290932, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7244533393617822, - "normalized_score": 72.44533393617823 - }, - "bbh": { - "name": "BBH", - "value": 0.5395443745186658, - "normalized_score": 34.44482164620488 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17220543806646527, - "normalized_score": 17.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.41622916666666665, - "normalized_score": 12.361979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37940492021276595, - "normalized_score": 31.044991134751776 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-10", - "generation": 1, - "base_model": "DreadPoor/Heart_Stolen-8B-Model_Stock (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.4986020927170227 - } - }, - { - "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock_bfloat16_03d1d70cb7eb5a743468b97c9c580028df487564_True", - "model": { - "name": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", - "sha": "03d1d70cb7eb5a743468b97c9c580028df487564", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.8678381845889, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7183584001560305, - "normalized_score": 71.83584001560305 - }, - "bbh": { - "name": "BBH", - "value": 0.526338467747489, - "normalized_score": 32.354424456472486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15634441087613293, - "normalized_score": 15.634441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.40549999999999997, - "normalized_score": 9.754166666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37724401595744683, - "normalized_score": 30.80489066193854 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.471254548591098 - } - }, - { - "id": "DreadPoor/Here_We_Go_Again-8B-SLERP_bfloat16_ea5658071e1fd45f1c8261ab2070244de2f40c38_True", - "model": { - "name": "DreadPoor/Here_We_Go_Again-8B-SLERP", - "sha": "ea5658071e1fd45f1c8261ab2070244de2f40c38", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.129656228309155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7442120240960651, - "normalized_score": 74.42120240960651 - }, - "bbh": { - "name": "BBH", - "value": 0.5460182474181831, - "normalized_score": 35.53148620432186 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4186770833333333, - "normalized_score": 12.434635416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3873005319148936, - "normalized_score": 31.922281323877062 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.225435569157672 - } - }, - { - "id": "DreadPoor/Howdy-8B-LINEAR_bfloat16_2d16b904fdaa1388431c804e9b3e7497901928b5_True", - "model": { - "name": "DreadPoor/Howdy-8B-LINEAR", - "sha": "2d16b904fdaa1388431c804e9b3e7497901928b5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.644826479517462, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7377923908562614, - "normalized_score": 73.77923908562614 - }, - "bbh": { - "name": "BBH", - "value": 0.5383981582614435, - "normalized_score": 34.22706196535763 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.41213541666666664, - "normalized_score": 12.316927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3806515957446808, - "normalized_score": 31.183510638297868 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3063248770741902 - } - }, - { - "id": "DreadPoor/Incidental-8B-Model_Stock_bfloat16_2fa6bd06595818371f6395f00e5b6ac2a91a6ad6_True", - "model": { - "name": "DreadPoor/Incidental-8B-Model_Stock", - "sha": "2fa6bd06595818371f6395f00e5b6ac2a91a6ad6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.5682894801915, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.748183708116686, - "normalized_score": 74.81837081166859 - }, - "bbh": { - "name": "BBH", - "value": 0.5452070612873019, - "normalized_score": 35.15766080254052 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.42401041666666667, - "normalized_score": 12.301302083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3873005319148936, - "normalized_score": 31.922281323877062 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2634223164614282 - } - }, - { - "id": "DreadPoor/Irina-8B-model_stock_bfloat16_b282e3ab449d71a31f48b8c13eb43a4435968728_True", - "model": { - "name": "DreadPoor/Irina-8B-model_stock", - "sha": "b282e3ab449d71a31f48b8c13eb43a4435968728", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.34985665112922, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6799403360860294, - "normalized_score": 67.99403360860295 - }, - "bbh": { - "name": "BBH", - "value": 0.5236638956084764, - "normalized_score": 32.08833034979686 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.40029166666666666, - "normalized_score": 8.636458333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35738031914893614, - "normalized_score": 28.59781323877068 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.491172890181649 - } - }, - { - "id": "DreadPoor/Kindling-8B-Model_Stock_bfloat16_17fd1dbd6f97bdda7608c2e9508d9f855df410e1_True", - "model": { - "name": "DreadPoor/Kindling-8B-Model_Stock", - "sha": "17fd1dbd6f97bdda7608c2e9508d9f855df410e1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.66229791748307, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7308231049171753, - "normalized_score": 73.08231049171752 - }, - "bbh": { - "name": "BBH", - "value": 0.5492054832931256, - "normalized_score": 35.77883534009465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17522658610271905, - "normalized_score": 17.522658610271904 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4068333333333333, - "normalized_score": 11.087499999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3829787234042553, - "normalized_score": 31.44208037825059 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2885656354328203 - } - }, - { - "id": "DreadPoor/L3.1-BaeZel-8B-Della_bfloat16_ec61b6f5355a7f3975d80f1afac69e0407e612e5_True", - "model": { - "name": "DreadPoor/L3.1-BaeZel-8B-Della", - "sha": "ec61b6f5355a7f3975d80f1afac69e0407e612e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.25567172776485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5180243974875552, - "normalized_score": 51.802439748755525 - }, - "bbh": { - "name": "BBH", - "value": 0.5448449542185521, - "normalized_score": 35.15745504522053 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17447129909365558, - "normalized_score": 17.447129909365557 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.4199791666666666, - "normalized_score": 11.597395833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3902094414893617, - "normalized_score": 32.245493498817964 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3179894823741092 - } - }, - { - "id": "DreadPoor/Laughing_Stock-8B-Model_Stock_bfloat16_9ead55adb81aaa1c6b5c828e5be03340f32f960b_True", - "model": { - "name": "DreadPoor/Laughing_Stock-8B-Model_Stock", - "sha": "9ead55adb81aaa1c6b5c828e5be03340f32f960b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.212504336786974, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7189579205397235, - "normalized_score": 71.89579205397234 - }, - "bbh": { - "name": "BBH", - "value": 0.5449429262155, - "normalized_score": 34.93824337423678 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1578549848942598, - "normalized_score": 15.785498489425981 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4145520833333333, - "normalized_score": 10.68567708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3764128989361702, - "normalized_score": 30.712544326241126 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2966608065773144 - } - }, - { - "id": "DreadPoor/Lava_Lamp-8B-SLERP_bfloat16_5f57c546340b533777f84e6b631b5c1b2a6f6ff2_True", - "model": { - "name": "DreadPoor/Lava_Lamp-8B-SLERP", - "sha": "5f57c546340b533777f84e6b631b5c1b2a6f6ff2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.154893020196578, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7381170848903134, - "normalized_score": 73.81170848903133 - }, - "bbh": { - "name": "BBH", - "value": 0.5367586873360172, - "normalized_score": 33.802734199229576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17371601208459214, - "normalized_score": 17.371601208459214 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4187083333333333, - "normalized_score": 12.00520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.375, - "normalized_score": 30.555555555555557 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.264561277760501 - } - }, - { - "id": "DreadPoor/LemonP-8B-Model_Stock_bfloat16_8c3f1eba0a5ff4fa70b47a1e94e4404e6fa5d878_True", - "model": { - "name": "DreadPoor/LemonP-8B-Model_Stock", - "sha": "8c3f1eba0a5ff4fa70b47a1e94e4404e6fa5d878", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.052497237216315, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7676176988169169, - "normalized_score": 76.76176988169169 - }, - "bbh": { - "name": "BBH", - "value": 0.5439348074265458, - "normalized_score": 35.371476735853804 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17673716012084592, - "normalized_score": 17.673716012084594 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.40810416666666666, - "normalized_score": 10.079687499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40043218085106386, - "normalized_score": 33.38135342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "DreadPoor/LemonP-8B-Model_Stock (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3972866396390922 - } - }, - { - "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR_bfloat16_87cbd7f5ebd1f6e7db2df8f62ecac7d65ba3adb0_True", - "model": { - "name": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", - "sha": "87cbd7f5ebd1f6e7db2df8f62ecac7d65ba3adb0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.10503527354818, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.760323718843779, - "normalized_score": 76.03237188437791 - }, - "bbh": { - "name": "BBH", - "value": 0.5379527944750039, - "normalized_score": 33.86908733681889 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17673716012084592, - "normalized_score": 17.673716012084594 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 13.099479166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3800698138297872, - "normalized_score": 31.11886820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6539682055402704 - } - }, - { - "id": "DreadPoor/Matryoshka-8B-LINEAR_bfloat16_20d260e6d881fcd3b4f76071797675d095ba8e98_True", - "model": { - "name": "DreadPoor/Matryoshka-8B-LINEAR", - "sha": "20d260e6d881fcd3b4f76071797675d095ba8e98", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.825025523646488, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7262519005128614, - "normalized_score": 72.62519005128615 - }, - "bbh": { - "name": "BBH", - "value": 0.5444280006376178, - "normalized_score": 35.110911704209016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17522658610271905, - "normalized_score": 17.522658610271904 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.42524999999999996, - "normalized_score": 12.456249999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3865525265957447, - "normalized_score": 31.839169621749413 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3246274294755 - } - }, - { - "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock_bfloat16_6c761644a57ab267624987ec2211c4af7a51a16a_True", - "model": { - "name": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", - "sha": "6c761644a57ab267624987ec2211c4af7a51a16a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.2612899751994, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7296240641497892, - "normalized_score": 72.96240641497891 - }, - "bbh": { - "name": "BBH", - "value": 0.5390507664719518, - "normalized_score": 34.38486477258895 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1646525679758308, - "normalized_score": 16.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4198854166666666, - "normalized_score": 11.485677083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38289561170212766, - "normalized_score": 31.432845744680847 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.347093322001088 - } - }, - { - "id": "DreadPoor/Minthy-8B-Model_Stock_bfloat16_feadcfefefe2fc49b14eb18ba198b4472721d17b_True", - "model": { - "name": "DreadPoor/Minthy-8B-Model_Stock", - "sha": "feadcfefefe2fc49b14eb18ba198b4472721d17b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.052988869325077, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.765769269981427, - "normalized_score": 76.57692699814271 - }, - "bbh": { - "name": "BBH", - "value": 0.5352951319641014, - "normalized_score": 34.170029690638636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.40940624999999997, - "normalized_score": 9.975781249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3992686170212766, - "normalized_score": 33.25206855791962 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3610815072271032 - } - }, - { - "id": "DreadPoor/Minthy_ALT-8B-Model_Stock_bfloat16_f316665116c167f281b084717951ea6856bd3e90_True", - "model": { - "name": "DreadPoor/Minthy_ALT-8B-Model_Stock", - "sha": "f316665116c167f281b084717951ea6856bd3e90", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.422530280387416, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6991992358054406, - "normalized_score": 69.91992358054407 - }, - "bbh": { - "name": "BBH", - "value": 0.5374800202589046, - "normalized_score": 34.16825686462601 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4225208333333333, - "normalized_score": 11.648437499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3673537234042553, - "normalized_score": 29.70596926713948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3178485106532778 - } - }, - { - "id": "DreadPoor/Minthy_V2-8B-Model_Stock_bfloat16_6fe73accfef6ae694256a6e73b4397a264ab16cd_True", - "model": { - "name": "DreadPoor/Minthy_V2-8B-Model_Stock", - "sha": "6fe73accfef6ae694256a6e73b4397a264ab16cd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.428890431570498, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7125881549843305, - "normalized_score": 71.25881549843305 - }, - "bbh": { - "name": "BBH", - "value": 0.5491095928821667, - "normalized_score": 35.82274773153525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4198854166666666, - "normalized_score": 11.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37367021276595747, - "normalized_score": 30.40780141843972 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3756823635961435 - } - }, - { - "id": "DreadPoor/Minus_Penus-8B-Model_Stock_bfloat16_5b373df78401e9f5abf9f86909159480f940b14e_True", - "model": { - "name": "DreadPoor/Minus_Penus-8B-Model_Stock", - "sha": "5b373df78401e9f5abf9f86909159480f940b14e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.14247782385972, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7311477989512272, - "normalized_score": 73.11477989512272 - }, - "bbh": { - "name": "BBH", - "value": 0.5343781571200968, - "normalized_score": 33.57084078325773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.40190624999999996, - "normalized_score": 9.638281249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3751662234042553, - "normalized_score": 30.57402482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6067024859198201 - } - }, - { - "id": "DreadPoor/Morphing-8B-Model_Stock_bfloat16_19c7dd26e8ca0a43f138d6c53d5721b6e4d65093_True", - "model": { - "name": "DreadPoor/Morphing-8B-Model_Stock", - "sha": "19c7dd26e8ca0a43f138d6c53d5721b6e4d65093", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.281274084097088, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.744536718130117, - "normalized_score": 74.45367181301171 - }, - "bbh": { - "name": "BBH", - "value": 0.5396942172954088, - "normalized_score": 34.452420920507734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4068645833333333, - "normalized_score": 10.391406249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38522273936170215, - "normalized_score": 31.691415484633573 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3703618658259002 - } - }, - { - "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock_bfloat16_1f160553251edcc8f55752b98e19f66ed17c953e_True", - "model": { - "name": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", - "sha": "1f160553251edcc8f55752b98e19f66ed17c953e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.473061853716462, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7721889032212308, - "normalized_score": 77.21889032212307 - }, - "bbh": { - "name": "BBH", - "value": 0.5350849793007441, - "normalized_score": 33.71901446820086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.41473958333333333, - "normalized_score": 10.70911458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3839760638297872, - "normalized_score": 31.55289598108747 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3929903739376703 - } - }, - { - "id": "DreadPoor/Nother_One-8B-Model_Stock_bfloat16_559d4f0a1741c17e1c91f299d317876fa8279121_True", - "model": { - "name": "DreadPoor/Nother_One-8B-Model_Stock", - "sha": "559d4f0a1741c17e1c91f299d317876fa8279121", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.077271313760956, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6863101016414226, - "normalized_score": 68.63101016414225 - }, - "bbh": { - "name": "BBH", - "value": 0.5204527600425481, - "normalized_score": 31.92112823138842 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.38702083333333337, - "normalized_score": 6.6442708333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35945811170212766, - "normalized_score": 28.82867907801418 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.262117274440706 - } - }, - { - "id": "DreadPoor/Noxis-8B-LINEAR_bfloat16_e4fde6aa8c0eb9d93a61e9e564fa9568d959488f_True", - "model": { - "name": "DreadPoor/Noxis-8B-LINEAR", - "sha": "e4fde6aa8c0eb9d93a61e9e564fa9568d959488f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.287317950959928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6913057354486096, - "normalized_score": 69.13057354486097 - }, - "bbh": { - "name": "BBH", - "value": 0.5420956502068554, - "normalized_score": 34.8222565522494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4230833333333333, - "normalized_score": 13.252083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3660239361702128, - "normalized_score": 29.558215130023648 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "DreadPoor/Noxis-8B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.632762720740592 - } - }, - { - "id": "DreadPoor/Nullsworn-12B-LINEAR_bfloat16_d9c0d1e58d17ec818259e02609df20910187b3b2_True", - "model": { - "name": "DreadPoor/Nullsworn-12B-LINEAR", - "sha": "d9c0d1e58d17ec818259e02609df20910187b3b2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.778140361939588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44356086295473784, - "normalized_score": 44.35608629547379 - }, - "bbh": { - "name": "BBH", - "value": 0.5483045026677609, - "normalized_score": 35.51240857664764 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.43495833333333334, - "normalized_score": 14.436458333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3645279255319149, - "normalized_score": 29.391991725768314 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "DreadPoor/Nullsworn-12B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 0.8642260980352026 - } - }, - { - "id": "DreadPoor/Nwah-8B-Model_Stock_bfloat16_08ccbacf8c7519024b984c1678bac4acf8a98e62_True", - "model": { - "name": "DreadPoor/Nwah-8B-Model_Stock", - "sha": "08ccbacf8c7519024b984c1678bac4acf8a98e62", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.720182729588192, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7715893828375378, - "normalized_score": 77.15893828375377 - }, - "bbh": { - "name": "BBH", - "value": 0.5384269019541996, - "normalized_score": 34.11306781436337 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4039479166666667, - "normalized_score": 9.82682291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3807347074468085, - "normalized_score": 31.19274527186761 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6453176169467952 - } - }, - { - "id": "DreadPoor/ONeil-model_stock-8B_bfloat16_d4b84956211fd57b85122fe0c6f88b2cb9a9c86a_True", - "model": { - "name": "DreadPoor/ONeil-model_stock-8B", - "sha": "d4b84956211fd57b85122fe0c6f88b2cb9a9c86a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.935908369361425, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6785662043378236, - "normalized_score": 67.85662043378235 - }, - "bbh": { - "name": "BBH", - "value": 0.5548337982400763, - "normalized_score": 36.4126125295027 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.41734374999999996, - "normalized_score": 10.967968749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35987367021276595, - "normalized_score": 28.874852245862886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-07-15", - "generation": 1, - "base_model": "DreadPoor/ONeil-model_stock-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.5288975332798018 - } - }, - { - "id": "DreadPoor/Oh_Boy-8B-LINEAR_bfloat16_68b8a8e495b810b1951bf29e13aa18258cfa6b0d_True", - "model": { - "name": "DreadPoor/Oh_Boy-8B-LINEAR", - "sha": "68b8a8e495b810b1951bf29e13aa18258cfa6b0d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.675174242254258, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7503069633018169, - "normalized_score": 75.0306963301817 - }, - "bbh": { - "name": "BBH", - "value": 0.5375114406292553, - "normalized_score": 34.14329045372282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4107708333333333, - "normalized_score": 11.679687499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3848902925531915, - "normalized_score": 31.654476950354614 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.335458516352382 - } - }, - { - "id": "DreadPoor/OrangeJ-8B-Model_Stock_bfloat16_99248153895557dccccd1d62c6e5b73c4c096d56_True", - "model": { - "name": "DreadPoor/OrangeJ-8B-Model_Stock", - "sha": "99248153895557dccccd1d62c6e5b73c4c096d56", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.2188791324389, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7841039552830933, - "normalized_score": 78.41039552830932 - }, - "bbh": { - "name": "BBH", - "value": 0.5413478053905038, - "normalized_score": 35.15080738484264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4027708333333333, - "normalized_score": 10.346354166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3968583776595745, - "normalized_score": 32.98426418439716 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4141550662108153 - } - }, - { - "id": "DreadPoor/Promissum_Mane-8B-LINEAR_bfloat16_ff399e7004040e1807e8d08b4d0967206fc50afa_True", - "model": { - "name": "DreadPoor/Promissum_Mane-8B-LINEAR", - "sha": "ff399e7004040e1807e8d08b4d0967206fc50afa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.099649072440133, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7150361042035134, - "normalized_score": 71.50361042035134 - }, - "bbh": { - "name": "BBH", - "value": 0.5457684398146738, - "normalized_score": 35.253190231117536 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1555891238670695, - "normalized_score": 15.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42004166666666665, - "normalized_score": 13.338541666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38505651595744683, - "normalized_score": 31.672946217494097 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6572930442289775 - } - }, - { - "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated_bfloat16_34c4a30b7462704810e35e033aa5ef33b075a97b_True", - "model": { - "name": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", - "sha": "34c4a30b7462704810e35e033aa5ef33b075a97b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.82332716689665, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7156356245872064, - "normalized_score": 71.56356245872064 - }, - "bbh": { - "name": "BBH", - "value": 0.5435183631990302, - "normalized_score": 34.60910725048443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4197916666666666, - "normalized_score": 13.840624999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37391954787234044, - "normalized_score": 30.43550531914893 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5846849960800864 - } - }, - { - "id": "DreadPoor/RPMash-8B-Model_Stock_bfloat16_005b435f7810d50c508a186b9fc029587c962643_True", - "model": { - "name": "DreadPoor/RPMash-8B-Model_Stock", - "sha": "005b435f7810d50c508a186b9fc029587c962643", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.700898440005755, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4563502617499346, - "normalized_score": 45.635026174993456 - }, - "bbh": { - "name": "BBH", - "value": 0.5169088291675549, - "normalized_score": 31.112591631105516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.405375, - "normalized_score": 8.805208333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3603723404255319, - "normalized_score": 28.930260047281326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3049476110310343 - } - }, - { - "id": "DreadPoor/RPMash_V3-8B-Model_Stock_bfloat16_cc331dfb7b2f6ed280dd473077c3776c813c2a5a_True", - "model": { - "name": "DreadPoor/RPMash_V3-8B-Model_Stock", - "sha": "cc331dfb7b2f6ed280dd473077c3776c813c2a5a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.785089599589032, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.70491961329273, - "normalized_score": 70.491961329273 - }, - "bbh": { - "name": "BBH", - "value": 0.5217453397523113, - "normalized_score": 31.99104716376097 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.37775000000000003, - "normalized_score": 6.052083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36136968085106386, - "normalized_score": 29.041075650118202 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2215115599421278 - } - }, - { - "id": "DreadPoor/Rusted_Gold-8B-LINEAR_bfloat16_23186793cc5230820b32031257b8e288d6294bfb_True", - "model": { - "name": "DreadPoor/Rusted_Gold-8B-LINEAR", - "sha": "23186793cc5230820b32031257b8e288d6294bfb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.318843902928162, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7296240641497892, - "normalized_score": 72.96240641497891 - }, - "bbh": { - "name": "BBH", - "value": 0.5386646439313688, - "normalized_score": 34.12086145705898 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1933534743202417, - "normalized_score": 19.335347432024168 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.41775, - "normalized_score": 12.118749999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37799202127659576, - "normalized_score": 30.888002364066192 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "DreadPoor/Rusted_Gold-8B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.5831037946767536 - } - }, - { - "id": "DreadPoor/Rusted_Platinum-8B-LINEAR_bfloat16_632042f8b0f8fe0684c7251b023df00bb81b9286_True", - "model": { - "name": "DreadPoor/Rusted_Platinum-8B-LINEAR", - "sha": "632042f8b0f8fe0684c7251b023df00bb81b9286", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.83034980109748, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7179838384375679, - "normalized_score": 71.7983838437568 - }, - "bbh": { - "name": "BBH", - "value": 0.5427868416987739, - "normalized_score": 34.78573450204923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17220543806646527, - "normalized_score": 17.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.39666666666666667, - "normalized_score": 8.816666666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37300531914893614, - "normalized_score": 30.33392434988179 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "DreadPoor/Rusted_Platinum-8B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6016343137475283 - } - }, - { - "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock_bfloat16_8c7e325cd412d70f5cb1e0a2f72f1cd3860f9eb3_True", - "model": { - "name": "DreadPoor/Rusted_Platinum-8B-Model_Stock", - "sha": "8c7e325cd412d70f5cb1e0a2f72f1cd3860f9eb3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.303818181145747, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44078821970150317, - "normalized_score": 44.078821970150315 - }, - "bbh": { - "name": "BBH", - "value": 0.5242840148078765, - "normalized_score": 32.36931288487491 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.37406249999999996, - "normalized_score": 4.424479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3546376329787234, - "normalized_score": 28.29307033096927 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6156083073986824 - } - }, - { - "id": "DreadPoor/Sellen-8B-model_stock_bfloat16_accde7145d81a428c782695ea61eebc608efd980_True", - "model": { - "name": "DreadPoor/Sellen-8B-model_stock", - "sha": "accde7145d81a428c782695ea61eebc608efd980", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.387643367541383, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7112893788481229, - "normalized_score": 71.1289378848123 - }, - "bbh": { - "name": "BBH", - "value": 0.5231680557624704, - "normalized_score": 31.3609793143707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3960416666666666, - "normalized_score": 10.671874999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35696476063829785, - "normalized_score": 28.551640070921984 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.614941045520685 - } - }, - { - "id": "DreadPoor/Something-8B-Model_Stock_bfloat16_c08094253a01bfa99d0076a28fc758522ce5dd60_True", - "model": { - "name": "DreadPoor/Something-8B-Model_Stock", - "sha": "c08094253a01bfa99d0076a28fc758522ce5dd60", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.93310010934314, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5043107842746135, - "normalized_score": 50.43107842746136 - }, - "bbh": { - "name": "BBH", - "value": 0.5395029370473196, - "normalized_score": 34.539896807500135 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.41873958333333333, - "normalized_score": 11.642447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3885472074468085, - "normalized_score": 32.06080082742317 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5553240142927571 - } - }, - { - "id": "DreadPoor/Spring_Dusk-8B-SCE_bfloat16_fdf3bfd36ef2529dc971a5103d8194a98a0bf04e_True", - "model": { - "name": "DreadPoor/Spring_Dusk-8B-SCE", - "sha": "fdf3bfd36ef2529dc971a5103d8194a98a0bf04e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.661455778534272, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6514636719459922, - "normalized_score": 65.14636719459922 - }, - "bbh": { - "name": "BBH", - "value": 0.5635271357931001, - "normalized_score": 37.76481840522523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.45997916666666666, - "normalized_score": 17.33072916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3435837765957447, - "normalized_score": 27.06486406619385 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "DreadPoor/Spring_Dusk-8B-SCE (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4256880940800347 - } - }, - { - "id": "DreadPoor/Summer_Dawn-8B-SCE_bfloat16_1c9ef9985436d5d44eff6b09e1c7a9dfe7ebf140_True", - "model": { - "name": "DreadPoor/Summer_Dawn-8B-SCE", - "sha": "1c9ef9985436d5d44eff6b09e1c7a9dfe7ebf140", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.707526796543544, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6642032030567783, - "normalized_score": 66.42032030567782 - }, - "bbh": { - "name": "BBH", - "value": 0.539111375413361, - "normalized_score": 34.62851818112923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17220543806646527, - "normalized_score": 17.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.41204166666666664, - "normalized_score": 10.671874999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37533244680851063, - "normalized_score": 30.592494089834517 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.281365469197718 - } - }, - { - "id": "DreadPoor/Summer_Dusk-8B-TIES_bfloat16_529d03b76a413c42c498f3a64781f4afc0ecb4e4_True", - "model": { - "name": "DreadPoor/Summer_Dusk-8B-TIES", - "sha": "529d03b76a413c42c498f3a64781f4afc0ecb4e4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.70113735165475, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4922206412319312, - "normalized_score": 49.22206412319312 - }, - "bbh": { - "name": "BBH", - "value": 0.5359662578395569, - "normalized_score": 33.83081371910365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4266770833333333, - "normalized_score": 13.76796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3855551861702128, - "normalized_score": 31.728354018912537 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3505131314656516 - } - }, - { - "id": "DreadPoor/Summer_Rain-8B-SCE_bfloat16_3071fdf77d85661b472c17ed496954072fcba80e_True", - "model": { - "name": "DreadPoor/Summer_Rain-8B-SCE", - "sha": "3071fdf77d85661b472c17ed496954072fcba80e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.516588284918253, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5459259210007226, - "normalized_score": 54.59259210007225 - }, - "bbh": { - "name": "BBH", - "value": 0.5845948417986419, - "normalized_score": 40.61854240395792 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4477291666666667, - "normalized_score": 15.366145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3550531914893617, - "normalized_score": 28.33924349881796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.417925622177407 - } - }, - { - "id": "DreadPoor/Summer_Rain-8B-TIES_bfloat16_ff5a9e8f6c8b2d34e0eac427c33c0b3569804c66_True", - "model": { - "name": "DreadPoor/Summer_Rain-8B-TIES", - "sha": "ff5a9e8f6c8b2d34e0eac427c33c0b3569804c66", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.49119270489429, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5444021861992845, - "normalized_score": 54.44021861992846 - }, - "bbh": { - "name": "BBH", - "value": 0.5845948417986419, - "normalized_score": 40.61854240395792 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4477291666666667, - "normalized_score": 15.366145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3550531914893617, - "normalized_score": 28.33924349881796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4687409513265564 - } - }, - { - "id": "DreadPoor/Sun-8B-Model_Stock_bfloat16_7fa7747e70e20c385c134f545ceb6287a436af06_True", - "model": { - "name": "DreadPoor/Sun-8B-Model_Stock", - "sha": "7fa7747e70e20c385c134f545ceb6287a436af06", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.080714550686338, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7758358932077998, - "normalized_score": 77.58358932077998 - }, - "bbh": { - "name": "BBH", - "value": 0.5263511014407583, - "normalized_score": 32.954595045665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.40975, - "normalized_score": 10.852083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38347739361702127, - "normalized_score": 31.49748817966903 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3758434175655363 - } - }, - { - "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock_bfloat16_307d93aeb51160a8b0ce236b8abd13e04873fef1_True", - "model": { - "name": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", - "sha": "307d93aeb51160a8b0ce236b8abd13e04873fef1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.56115847933121, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7417142071924716, - "normalized_score": 74.17142071924715 - }, - "bbh": { - "name": "BBH", - "value": 0.5406287643522295, - "normalized_score": 34.670888066076664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18731117824773413, - "normalized_score": 18.731117824773413 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4106770833333333, - "normalized_score": 11.10130208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38480718085106386, - "normalized_score": 31.645242316784866 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3313386343071156 - } - }, - { - "id": "DreadPoor/TEST02-Ignore_bfloat16_e0a0826f7695f452cf920b4d3261cf82286fa6c0_True", - "model": { - "name": "DreadPoor/TEST02-Ignore", - "sha": "e0a0826f7695f452cf920b4d3261cf82286fa6c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.97786899148115, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6118964347930158, - "normalized_score": 61.18964347930159 - }, - "bbh": { - "name": "BBH", - "value": 0.5601644306147606, - "normalized_score": 37.13218783602701 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.41985416666666664, - "normalized_score": 10.848437500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3468251329787234, - "normalized_score": 27.425014775413715 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.445361409314325 - } - }, - { - "id": "DreadPoor/TEST03-ignore_bfloat16_70ad8c5d5131c144904cbb19f13e60cd2e4db1d1_True", - "model": { - "name": "DreadPoor/TEST03-ignore", - "sha": "70ad8c5d5131c144904cbb19f13e60cd2e4db1d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.440777656618568, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6967014189018471, - "normalized_score": 69.67014189018471 - }, - "bbh": { - "name": "BBH", - "value": 0.5383414134372179, - "normalized_score": 34.38735467240193 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16540785498489427, - "normalized_score": 16.540785498489427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4186145833333333, - "normalized_score": 11.226822916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37890625, - "normalized_score": 30.989583333333336 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9057666825537358 - } - }, - { - "id": "DreadPoor/TEST06-ignore_bfloat16_bed8b9e607470aa9ba1b827009f34bee55e9cdcb_True", - "model": { - "name": "DreadPoor/TEST06-ignore", - "sha": "bed8b9e607470aa9ba1b827009f34bee55e9cdcb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.721454908565875, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7322969720342026, - "normalized_score": 73.22969720342026 - }, - "bbh": { - "name": "BBH", - "value": 0.5509060880148441, - "normalized_score": 35.92411184894727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4224895833333333, - "normalized_score": 11.411197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3615359042553192, - "normalized_score": 29.05954491725768 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.1041045761762494 - } - }, - { - "id": "DreadPoor/TEST07-ignore_bfloat16_18d361629e2853bbca79ce97819f0f8cf33b1468_True", - "model": { - "name": "DreadPoor/TEST07-ignore", - "sha": "18d361629e2853bbca79ce97819f0f8cf33b1468", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.498717642655507, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7399655137258031, - "normalized_score": 73.99655137258031 - }, - "bbh": { - "name": "BBH", - "value": 0.5561275711510345, - "normalized_score": 36.88142926288663 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1661631419939577, - "normalized_score": 16.61631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.40937500000000004, - "normalized_score": 9.671875000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3879654255319149, - "normalized_score": 31.99615839243498 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3740561440519077 - } - }, - { - "id": "DreadPoor/TEST08-ignore_bfloat16_fbcbf1bc95596028635441a46e9b2ef250c9e5fb_True", - "model": { - "name": "DreadPoor/TEST08-ignore", - "sha": "fbcbf1bc95596028635441a46e9b2ef250c9e5fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.66477204531803, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7466599733152479, - "normalized_score": 74.66599733152479 - }, - "bbh": { - "name": "BBH", - "value": 0.5453519655444978, - "normalized_score": 35.35061865878467 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.40810416666666666, - "normalized_score": 9.679687499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3853058510638298, - "normalized_score": 31.700650118203306 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.37025617734741 - } - }, - { - "id": "DreadPoor/Trinas_Nectar-8B-model_stock_bfloat16_cb46b8431872557904d83fc5aa1b90dabeb74acc_True", - "model": { - "name": "DreadPoor/Trinas_Nectar-8B-model_stock", - "sha": "cb46b8431872557904d83fc5aa1b90dabeb74acc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.522454161375844, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7259272064788096, - "normalized_score": 72.59272064788095 - }, - "bbh": { - "name": "BBH", - "value": 0.5256123853406084, - "normalized_score": 31.97509368554489 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4067708333333333, - "normalized_score": 11.413020833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36178523936170215, - "normalized_score": 29.08724881796691 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-08-27", - "generation": 1, - "base_model": "DreadPoor/Trinas_Nectar-8B-model_stock (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.733447793388806 - } - }, - { - "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock_bfloat16_f424bcd1b0b28a4f21b807c520710142ca797a7c_False", - "model": { - "name": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", - "sha": "f424bcd1b0b28a4f21b807c520710142ca797a7c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.474742716103354, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47176270074513404, - "normalized_score": 47.176270074513404 - }, - "bbh": { - "name": "BBH", - "value": 0.5475027267486955, - "normalized_score": 35.427098756489926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4449375, - "normalized_score": 15.15052083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.378656914893617, - "normalized_score": 30.961879432624112 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2984347569797885 - } - }, - { - "id": "DreadPoor/VENN_1.2-8B-Model_Stock_bfloat16_eb564a91d66eeec24b7699633867b0182b9bef5f_True", - "model": { - "name": "DreadPoor/VENN_1.2-8B-Model_Stock", - "sha": "eb564a91d66eeec24b7699633867b0182b9bef5f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.85327338885053, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7226049105262924, - "normalized_score": 72.26049105262925 - }, - "bbh": { - "name": "BBH", - "value": 0.5458812486333333, - "normalized_score": 35.12536864211717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.42001041666666666, - "normalized_score": 12.167968749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3720910904255319, - "normalized_score": 30.232343380614658 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2817874457469578 - } - }, - { - "id": "DreadPoor/WIP-Acacia-8B-Model_Stock_bfloat16_ae4f1a21b9de70ec75d02e9a84209ae6360a01e9_True", - "model": { - "name": "DreadPoor/WIP-Acacia-8B-Model_Stock", - "sha": "ae4f1a21b9de70ec75d02e9a84209ae6360a01e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.72381960950896, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6246359659038019, - "normalized_score": 62.46359659038019 - }, - "bbh": { - "name": "BBH", - "value": 0.5194665568943516, - "normalized_score": 31.162352922428383 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16691842900302115, - "normalized_score": 16.691842900302113 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4225833333333333, - "normalized_score": 12.122916666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37367021276595747, - "normalized_score": 30.40780141843972 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3259010853742466 - } - }, - { - "id": "DreadPoor/WIP_Damascus-8B-TIES_bfloat16_c7720a0b0a8d24e62bf71b0e955b1aca8e62f1cb_True", - "model": { - "name": "DreadPoor/WIP_Damascus-8B-TIES", - "sha": "c7720a0b0a8d24e62bf71b0e955b1aca8e62f1cb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.97055556390754, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4776326812856554, - "normalized_score": 47.76326812856554 - }, - "bbh": { - "name": "BBH", - "value": 0.5410672913070808, - "normalized_score": 34.52230581565854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16540785498489427, - "normalized_score": 16.540785498489427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41185416666666663, - "normalized_score": 12.715104166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37608045212765956, - "normalized_score": 30.67560579196217 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6362246106711529 - } - }, - { - "id": "DreadPoor/Wannabe-8B-Model_Stock_bfloat16_26050fd408c163b516a2a3363148acca2dc1dcbd_True", - "model": { - "name": "DreadPoor/Wannabe-8B-Model_Stock", - "sha": "26050fd408c163b516a2a3363148acca2dc1dcbd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.111295234622002, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7204816553411615, - "normalized_score": 72.04816553411615 - }, - "bbh": { - "name": "BBH", - "value": 0.5389637944785705, - "normalized_score": 34.278852846231906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.41346875, - "normalized_score": 12.316927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.383061835106383, - "normalized_score": 31.451315011820324 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1807024438715077 - } - }, - { - "id": "DreadPoor/What_A_Thrill-8B-Model_Stock_bfloat16_492f72ec38b55b943f5014608df6a6ba2e1f458e_True", - "model": { - "name": "DreadPoor/What_A_Thrill-8B-Model_Stock", - "sha": "492f72ec38b55b943f5014608df6a6ba2e1f458e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.009599613230606, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7064433480941679, - "normalized_score": 70.64433480941679 - }, - "bbh": { - "name": "BBH", - "value": 0.531144904394377, - "normalized_score": 32.94877726470522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.40804166666666664, - "normalized_score": 10.938541666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3615359042553192, - "normalized_score": 29.05954491725768 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2363391557136492 - } - }, - { - "id": "DreadPoor/Winter-8B-SCE_bfloat16_f4b79055af20d661b693151aa7e5db0000b8ee44_True", - "model": { - "name": "DreadPoor/Winter-8B-SCE", - "sha": "f4b79055af20d661b693151aa7e5db0000b8ee44", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.224572446583664, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7536292592543341, - "normalized_score": 75.3629259254334 - }, - "bbh": { - "name": "BBH", - "value": 0.5261733490323383, - "normalized_score": 32.53825480363242 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4070833333333333, - "normalized_score": 10.118749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38389295212765956, - "normalized_score": 31.543661347517727 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3285695722275177 - } - }, - { - "id": "DreadPoor/Winter_Dawn-8B-TIES_bfloat16_51a34187becda09317bd5afd193f265e18b731b3_True", - "model": { - "name": "DreadPoor/Winter_Dawn-8B-TIES", - "sha": "51a34187becda09317bd5afd193f265e18b731b3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.797849910055774, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5496482665992899, - "normalized_score": 54.96482665992899 - }, - "bbh": { - "name": "BBH", - "value": 0.5309416142154736, - "normalized_score": 33.71410059102799 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.42785416666666665, - "normalized_score": 13.2484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3910405585106383, - "normalized_score": 32.337839834515364 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4492919129493278 - } - }, - { - "id": "DreadPoor/Winter_Dusk-8B-TIES_bfloat16_cc85bae1be7b642bfaa31563f6e4651f8f5181ab_True", - "model": { - "name": "DreadPoor/Winter_Dusk-8B-TIES", - "sha": "cc85bae1be7b642bfaa31563f6e4651f8f5181ab", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.11123263157222, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7152610628687439, - "normalized_score": 71.52610628687438 - }, - "bbh": { - "name": "BBH", - "value": 0.4951882158967103, - "normalized_score": 28.227815382165357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3688229166666666, - "normalized_score": 3.6028645833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3478224734042553, - "normalized_score": 27.53583037825059 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3938008057333435 - } - }, - { - "id": "DreadPoor/Winter_Night-8B-Model_Stock_bfloat16_384e051eb8cb178c13b2e0aa711070752da68eb4_True", - "model": { - "name": "DreadPoor/Winter_Night-8B-Model_Stock", - "sha": "384e051eb8cb178c13b2e0aa711070752da68eb4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.21635948101721, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7040452665593957, - "normalized_score": 70.40452665593958 - }, - "bbh": { - "name": "BBH", - "value": 0.5184968441488284, - "normalized_score": 31.604274148057367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.3914270833333333, - "normalized_score": 9.595052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3666057180851064, - "normalized_score": 29.622857565011817 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6128445606478136 - } - }, - { - "id": "DreadPoor/Yafune-8B-Model_Stock_bfloat16_43d989e709b7a22329700943e3b424f85f83a82f_True", - "model": { - "name": "DreadPoor/Yafune-8B-Model_Stock", - "sha": "43d989e709b7a22329700943e3b424f85f83a82f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.104766282389747, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7533045652202822, - "normalized_score": 75.33045652202821 - }, - "bbh": { - "name": "BBH", - "value": 0.5466719512941253, - "normalized_score": 35.2912300982616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1661631419939577, - "normalized_score": 16.61631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.41728125, - "normalized_score": 11.426822916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38505651595744683, - "normalized_score": 31.672946217494097 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-12", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.8160167821079294 - } - }, - { - "id": "DreadPoor/Yearn_V3-8B-Model_Stock_bfloat16_ece46c6b72ddeb7f82939b8fb621fb95824bdf3a_True", - "model": { - "name": "DreadPoor/Yearn_V3-8B-Model_Stock", - "sha": "ece46c6b72ddeb7f82939b8fb621fb95824bdf3a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.95670005151148, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7289746760816855, - "normalized_score": 72.89746760816854 - }, - "bbh": { - "name": "BBH", - "value": 0.5322019394938072, - "normalized_score": 33.41239643429133 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18957703927492447, - "normalized_score": 18.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3908958333333333, - "normalized_score": 9.961979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3801529255319149, - "normalized_score": 31.128102836879428 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3757086381451888 - } - }, - { - "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT_bfloat16_449eee9a5051ae644e161d94f48ad43a0acda225_True", - "model": { - "name": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", - "sha": "449eee9a5051ae644e161d94f48ad43a0acda225", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.250545465114055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5511221337163171, - "normalized_score": 55.11221337163172 - }, - "bbh": { - "name": "BBH", - "value": 0.5231075970343642, - "normalized_score": 32.2576119397764 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1903323262839879, - "normalized_score": 19.033232628398792 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.41492708333333334, - "normalized_score": 11.265885416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3890458776595745, - "normalized_score": 32.116208628841605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3636809735215683 - } - }, - { - "id": "DreadPoor/Zelus-8B-Model_Stock_bfloat16_a26637d53266e83d9c80b2b7609449266ad551d1_True", - "model": { - "name": "DreadPoor/Zelus-8B-Model_Stock", - "sha": "a26637d53266e83d9c80b2b7609449266ad551d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.7413869053032, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.778833495126265, - "normalized_score": 77.8833495126265 - }, - "bbh": { - "name": "BBH", - "value": 0.5307011398651839, - "normalized_score": 33.05816146454601 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1646525679758308, - "normalized_score": 16.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.42140625, - "normalized_score": 11.975781249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38414228723404253, - "normalized_score": 31.57136524822695 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "DreadPoor/Zelus-8B-Model_Stock (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3257089020449855 - } - }, - { - "id": "DreadPoor/Zelus_V2-8B-Model_Stock_bfloat16_ee54a92a0de36a5c4602d9dbbf372a361d0a3230_True", - "model": { - "name": "DreadPoor/Zelus_V2-8B-Model_Stock", - "sha": "ee54a92a0de36a5c4602d9dbbf372a361d0a3230", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.16449183127745, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7898243327703826, - "normalized_score": 78.98243327703825 - }, - "bbh": { - "name": "BBH", - "value": 0.5344816839912676, - "normalized_score": 33.66407644966964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3960729166666667, - "normalized_score": 8.375781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38331117021276595, - "normalized_score": 31.47901891252955 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3413598010523688 - } - }, - { - "id": "DreadPoor/felix_dies-mistral-7B-model_stock_bfloat16_bb317aa7565625327e18c5158aebd4710aa1d925_False", - "model": { - "name": "DreadPoor/felix_dies-mistral-7B-model_stock", - "sha": "bb317aa7565625327e18c5158aebd4710aa1d925", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.13959245219531, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30077860077926566, - "normalized_score": 30.077860077926566 - }, - "bbh": { - "name": "BBH", - "value": 0.49009180735274227, - "normalized_score": 28.890798050964488 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4518229166666667, - "normalized_score": 15.477864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3109208776595745, - "normalized_score": 23.43565307328605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3231433347271495 - } - }, - { - "id": "DreadPoor/hakuchido-8B-MODEL_STOCK_bfloat16_f647d8a2712eef34339eb7be5ec9276d809f3a5b_True", - "model": { - "name": "DreadPoor/hakuchido-8B-MODEL_STOCK", - "sha": "f647d8a2712eef34339eb7be5ec9276d809f3a5b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.346239826870143, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7375175645066203, - "normalized_score": 73.75175645066203 - }, - "bbh": { - "name": "BBH", - "value": 0.5398373390214104, - "normalized_score": 34.575610505807134 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19486404833836857, - "normalized_score": 19.486404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.41746875, - "normalized_score": 11.316927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3781582446808511, - "normalized_score": 30.906471631205672 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6512636744112785 - } - }, - { - "id": "DreadPoor/ichor-8B-Model_Stock_bfloat16_96c0a0e4367abc4c9c300f4ef8a03b9f8c06b157_True", - "model": { - "name": "DreadPoor/ichor-8B-Model_Stock", - "sha": "96c0a0e4367abc4c9c300f4ef8a03b9f8c06b157", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.325804130424327, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5386319410275846, - "normalized_score": 53.863194102758456 - }, - "bbh": { - "name": "BBH", - "value": 0.5084222037759372, - "normalized_score": 29.922368800073574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.42121875, - "normalized_score": 11.552343749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31507646276595747, - "normalized_score": 23.89738475177305 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.8754344347697692 - } - }, - { - "id": "DreadPoor/ichor_1.1-8B-Model_Stock_bfloat16_e4af616fd4e10ca94c0cee55cd1b7fa90cbb49f3_True", - "model": { - "name": "DreadPoor/ichor_1.1-8B-Model_Stock", - "sha": "e4af616fd4e10ca94c0cee55cd1b7fa90cbb49f3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.01706630636859, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8096328851890761, - "normalized_score": 80.9632885189076 - }, - "bbh": { - "name": "BBH", - "value": 0.528067770617839, - "normalized_score": 32.6207492618971 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4067708333333333, - "normalized_score": 9.546354166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3855551861702128, - "normalized_score": 31.728354018912537 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "DreadPoor/ichor_1.1-8B-Model_Stock (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6235860749127251 - } - }, - { - "id": "DreadPoor/inexpertus-8B-Model_Stock_bfloat16_e6da16c921c073facbe15769fae301d02163ef34_True", - "model": { - "name": "DreadPoor/inexpertus-8B-Model_Stock", - "sha": "e6da16c921c073facbe15769fae301d02163ef34", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.70787002354444, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7795327508787795, - "normalized_score": 77.95327508787796 - }, - "bbh": { - "name": "BBH", - "value": 0.5280190470468065, - "normalized_score": 32.4633736795138 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.41182291666666665, - "normalized_score": 11.811197916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3790724734042553, - "normalized_score": 31.008052600472812 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "DreadPoor/inexpertus-8B-Model_Stock (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.665457340724185 - } - }, - { - "id": "DreadPoor/inexpertus_1.1-8B-LINEAR_bfloat16_497e83a2ead3a83d693f78531c0bc802849eef64_True", - "model": { - "name": "DreadPoor/inexpertus_1.1-8B-LINEAR", - "sha": "497e83a2ead3a83d693f78531c0bc802849eef64", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.609547612278366, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527050448365891, - "normalized_score": 75.27050448365891 - }, - "bbh": { - "name": "BBH", - "value": 0.5524638802167572, - "normalized_score": 36.1991911954092 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.41734374999999996, - "normalized_score": 11.10130208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38272938829787234, - "normalized_score": 31.414376477541367 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "DreadPoor/inexpertus_1.1-8B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6597645235646241 - } - }, - { - "id": "DreadPoor/inexpertus_1.2-8B-LINEAR_bfloat16_9236276e5e9528276e327197b0fa00fb0826e6f9_True", - "model": { - "name": "DreadPoor/inexpertus_1.2-8B-LINEAR", - "sha": "9236276e5e9528276e327197b0fa00fb0826e6f9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.786491464394853, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7347947889377962, - "normalized_score": 73.47947889377961 - }, - "bbh": { - "name": "BBH", - "value": 0.5523440600721518, - "normalized_score": 36.05652346278443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15861027190332325, - "normalized_score": 15.861027190332324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.41334374999999995, - "normalized_score": 10.301302083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37882313829787234, - "normalized_score": 30.98034869976359 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "DreadPoor/inexpertus_1.2-8B-LINEAR (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6493941807487916 - } - }, - { - "id": "DreadPoor/mergekit-nuslerp-nqzkedi_bfloat16_2aa839aaa0200a0b3cd6c6be0b82c30ca0dc84b4_True", - "model": { - "name": "DreadPoor/mergekit-nuslerp-nqzkedi", - "sha": "2aa839aaa0200a0b3cd6c6be0b82c30ca0dc84b4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.296302202290054, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7764852812759035, - "normalized_score": 77.64852812759035 - }, - "bbh": { - "name": "BBH", - "value": 0.5361918366546249, - "normalized_score": 34.09522783768477 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18806646525679757, - "normalized_score": 18.806646525679756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4224583333333333, - "normalized_score": 11.973958333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3918716755319149, - "normalized_score": 32.430186170212764 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4374317529670209 - } - }, - { - "id": "DreadPoor/remember_to_breathe-8b-Model-Stock_bfloat16_fa88f1b06cf9ca7bd0d859c6a4b2240485363ae0_True", - "model": { - "name": "DreadPoor/remember_to_breathe-8b-Model-Stock", - "sha": "fa88f1b06cf9ca7bd0d859c6a4b2240485363ae0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.25652417725776, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7104150321147887, - "normalized_score": 71.04150321147887 - }, - "bbh": { - "name": "BBH", - "value": 0.5411654435599922, - "normalized_score": 34.67899076231628 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1487915407854985, - "normalized_score": 14.879154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4144583333333333, - "normalized_score": 11.440624999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37608045212765956, - "normalized_score": 30.67560579196217 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3270803241537383 - } - }, - { - "id": "DreadPoor/test_bfloat16_8f4e90a3e665f1d4d7cf737b43e7bdb360de3ffa_True", - "model": { - "name": "DreadPoor/test", - "sha": "8f4e90a3e665f1d4d7cf737b43e7bdb360de3ffa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.952990701638594, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49369450834895856, - "normalized_score": 49.36945083489586 - }, - "bbh": { - "name": "BBH", - "value": 0.5371873804638203, - "normalized_score": 34.28751436834826 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1933534743202417, - "normalized_score": 19.335347432024168 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.4350833333333333, - "normalized_score": 14.518749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3646941489361702, - "normalized_score": 29.410460992907794 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.207847051540963 - } - }, - { - "id": "DreadPoor/test_ALT_bfloat16_15f7baaea9416ce8ba8b1ea972969fd54c2bacdd_True", - "model": { - "name": "DreadPoor/test_ALT", - "sha": "15f7baaea9416ce8ba8b1ea972969fd54c2bacdd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.265670200166753, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.499689712185889, - "normalized_score": 49.9689712185889 - }, - "bbh": { - "name": "BBH", - "value": 0.5370433315307738, - "normalized_score": 33.986912493518574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.4362916666666667, - "normalized_score": 14.303124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3492353723404255, - "normalized_score": 27.692819148936167 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5629934211563457 - } - }, - { - "id": "DreadPoor/tests_pending-do_not_use_yet_bfloat16_47f0a91f2a6b06724ff51b4fcb4ee6831c1f49e9_True", - "model": { - "name": "DreadPoor/tests_pending-do_not_use_yet", - "sha": "47f0a91f2a6b06724ff51b4fcb4ee6831c1f49e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.64145994849537, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7691414336183549, - "normalized_score": 76.91414336183549 - }, - "bbh": { - "name": "BBH", - "value": 0.5407897873885027, - "normalized_score": 34.6745089445582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.40047916666666666, - "normalized_score": 8.79322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38272938829787234, - "normalized_score": 31.414376477541367 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2634870441411765 - } - }, - { - "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2_bfloat16_6f26ffcb82b8a8d14400471da7047b8b4a8e4d10_False", - "model": { - "name": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", - "sha": "6f26ffcb82b8a8d14400471da7047b8b4a8e4d10", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.06556562824206, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40289432040319684, - "normalized_score": 40.28943204031969 - }, - "bbh": { - "name": "BBH", - "value": 0.5401935891431586, - "normalized_score": 36.00403694105599 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.43321875, - "normalized_score": 13.752343749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38605385638297873, - "normalized_score": 31.783761820330973 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.821, - "co2_cost": 0.4245496438848985 - } - }, - { - "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2_bfloat16__False", - "model": { - "name": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", - "sha": "", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.81260628765727, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4038429145777648, - "normalized_score": 40.38429145777648 - }, - "bbh": { - "name": "BBH", - "value": 0.6090237540046592, - "normalized_score": 43.60784925797068 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3406344410876133, - "normalized_score": 34.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.4794479166666667, - "normalized_score": 19.630989583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5135472074468085, - "normalized_score": 45.94968971631205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-12-26", - "generation": 1, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 20, - "params_billions": 14.77, - "co2_cost": 4.665346050968798 - } - }, - { - "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2_bfloat16_2590214b30391392b9a84e7cbe40fff3a92c6814_True", - "model": { - "name": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", - "sha": "2590214b30391392b9a84e7cbe40fff3a92c6814", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.221596025457025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6878837041272712, - "normalized_score": 68.78837041272712 - }, - "bbh": { - "name": "BBH", - "value": 0.7088012228048761, - "normalized_score": 59.0667326828602 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4312688821752266, - "normalized_score": 43.126888217522655 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4085570469798658, - "normalized_score": 21.140939597315437 - }, - "musr": { - "name": "MUSR", - "value": 0.47197916666666667, - "normalized_score": 19.730729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.581283244680851, - "normalized_score": 53.47591607565011 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 17, - "params_billions": 72.706, - "co2_cost": 45.910196932620046 - } - }, - { - "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16_bfloat16_4b8290f9ef1f7d33df282d3764f795af4e64022c_True", - "model": { - "name": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", - "sha": "4b8290f9ef1f7d33df282d3764f795af4e64022c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.448866811862917, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7147114101694614, - "normalized_score": 71.47114101694615 - }, - "bbh": { - "name": "BBH", - "value": 0.4979908369885237, - "normalized_score": 28.25639185977421 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.33415625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36361369680851063, - "normalized_score": 29.290410756501185 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-12", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3115600651658479 - } - }, - { - "id": "EleutherAI/gpt-j-6b_bfloat16_47e169305d2e8376be1d31e765533382721b2cc1_False", - "model": { - "name": "EleutherAI/gpt-j-6b", - "sha": "47e169305d2e8376be1d31e765533382721b2cc1", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTJForCausalLM", - "average_score": 6.570411768928537, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2522185578708937, - "normalized_score": 25.221855787089368 - }, - "bbh": { - "name": "BBH", - "value": 0.3191044431037278, - "normalized_score": 4.912818068323685 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36575, - "normalized_score": 5.252083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12408577127659574, - "normalized_score": 2.6761968085106376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-08-19", - "generation": 0, - "base_model": "EleutherAI/gpt-j-6b", - "hub_license": "apache-2.0", - "hub_hearts": 1488, - "params_billions": 6.0, - "co2_cost": 1.5348643127710762 - } - }, - { - "id": "EleutherAI/gpt-neo-1.3B_bfloat16_dbe59a7f4a88d01d1ba9798d78dbe3fe038792c8_False", - "model": { - "name": "EleutherAI/gpt-neo-1.3B", - "sha": "dbe59a7f4a88d01d1ba9798d78dbe3fe038792c8", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoForCausalLM", - "average_score": 5.391090848825532, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20790502533278366, - "normalized_score": 20.790502533278367 - }, - "bbh": { - "name": "BBH", - "value": 0.30392315869356407, - "normalized_score": 3.024569180930987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.38165625, - "normalized_score": 4.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1163563829787234, - "normalized_score": 1.8173758865248217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "EleutherAI/gpt-neo-1.3B", - "hub_license": "mit", - "hub_hearts": 293, - "params_billions": 1.366, - "co2_cost": 0.7188481230380952 - } - }, - { - "id": "EleutherAI/gpt-neo-125m_bfloat16_21def0189f5705e2521767faed922f1f15e7d7db_False", - "model": { - "name": "EleutherAI/gpt-neo-125m", - "sha": "21def0189f5705e2521767faed922f1f15e7d7db", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoForCausalLM", - "average_score": 4.407321907614049, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19054442213327305, - "normalized_score": 19.054442213327306 - }, - "bbh": { - "name": "BBH", - "value": 0.3115156885791523, - "normalized_score": 3.436738951426704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3593333333333333, - "normalized_score": 2.6166666666666654 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10255984042553191, - "normalized_score": 0.28442671394798985 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "EleutherAI/gpt-neo-125m", - "hub_license": "mit", - "hub_hearts": 199, - "params_billions": 0.15, - "co2_cost": 0.4058049731865188 - } - }, - { - "id": "EleutherAI/gpt-neo-2.7B_bfloat16_e24fa291132763e59f4a5422741b424fb5d59056_False", - "model": { - "name": "EleutherAI/gpt-neo-2.7B", - "sha": "e24fa291132763e59f4a5422741b424fb5d59056", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoForCausalLM", - "average_score": 6.4310478009874465, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2589628851447493, - "normalized_score": 25.896288514474925 - }, - "bbh": { - "name": "BBH", - "value": 0.3139516033315253, - "normalized_score": 4.178602667081014 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3553645833333334, - "normalized_score": 3.5205729166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11627327127659574, - "normalized_score": 1.8081412529550822 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "EleutherAI/gpt-neo-2.7B", - "hub_license": "mit", - "hub_hearts": 478, - "params_billions": 2.718, - "co2_cost": 1.0167628032465814 - } - }, - { - "id": "EleutherAI/gpt-neox-20b_float16_c292233c833e336628618a88a648727eb3dff0a7_False", - "model": { - "name": "EleutherAI/gpt-neox-20b", - "sha": "c292233c833e336628618a88a648727eb3dff0a7", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 6.1165221524743325, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2586880587951081, - "normalized_score": 25.86880587951081 - }, - "bbh": { - "name": "BBH", - "value": 0.31650380320877564, - "normalized_score": 4.929114201526899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36466666666666664, - "normalized_score": 2.816666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1155252659574468, - "normalized_score": 1.725029550827422 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-04-07", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "EleutherAI/gpt-neox-20b", - "hub_license": "apache-2.0", - "hub_hearts": 555, - "params_billions": 20.739, - "co2_cost": 6.293472747230082 - } - }, - { - "id": "EleutherAI/pythia-1.4b_float16_fedc38a16eea3bd36a96b906d78d11d2ce18ed79_False", - "model": { - "name": "EleutherAI/pythia-1.4b", - "sha": "fedc38a16eea3bd36a96b906d78d11d2ce18ed79", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 6.008531439497028, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23708094522533543, - "normalized_score": 23.708094522533543 - }, - "bbh": { - "name": "BBH", - "value": 0.315042649740714, - "normalized_score": 3.878989478987103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.35378125, - "normalized_score": 4.02265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-02-09", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "EleutherAI/pythia-1.4b", - "hub_license": "apache-2.0", - "hub_hearts": 23, - "params_billions": 1.515, - "co2_cost": 0.3872326488039495 - } - }, - { - "id": "EleutherAI/pythia-12b_float16_35c9d7f32fbb108fb8b5bdd574eb03369d1eed49_False", - "model": { - "name": "EleutherAI/pythia-12b", - "sha": "35c9d7f32fbb108fb8b5bdd574eb03369d1eed49", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 6.059841492942702, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24714756845170813, - "normalized_score": 24.71475684517081 - }, - "bbh": { - "name": "BBH", - "value": 0.3179653957935337, - "normalized_score": 4.987531038290507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3646979166666667, - "normalized_score": 3.7872395833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11087101063829788, - "normalized_score": 1.2078900709219857 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-02-28", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "EleutherAI/pythia-12b", - "hub_license": "apache-2.0", - "hub_hearts": 135, - "params_billions": 12.0, - "co2_cost": 2.2360143063327462 - } - }, - { - "id": "EleutherAI/pythia-160m_float16_50f5173d932e8e61f858120bcb800b97af589f46_False", - "model": { - "name": "EleutherAI/pythia-160m", - "sha": "50f5173d932e8e61f858120bcb800b97af589f46", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.730394616916023, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18155161637787737, - "normalized_score": 18.155161637787735 - }, - "bbh": { - "name": "BBH", - "value": 0.2970437484241321, - "normalized_score": 2.198832279508135 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.4179375, - "normalized_score": 10.675520833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11195146276595745, - "normalized_score": 1.3279403073286051 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-02-08", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "EleutherAI/pythia-160m", - "hub_license": "apache-2.0", - "hub_hearts": 30, - "params_billions": 0.213, - "co2_cost": 0.4706770826081154 - } - }, - { - "id": "EleutherAI/pythia-1b_float16_f73d7dcc545c8bd326d8559c8ef84ffe92fea6b2_False", - "model": { - "name": "EleutherAI/pythia-1b", - "sha": "f73d7dcc545c8bd326d8559c8ef84ffe92fea6b2", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.07026822083096, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2207941594968018, - "normalized_score": 22.079415949680183 - }, - "bbh": { - "name": "BBH", - "value": 0.3004093017564394, - "normalized_score": 2.2939863995762866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.35520833333333335, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11361369680851063, - "normalized_score": 1.512632978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-03-10", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "EleutherAI/pythia-1b", - "hub_license": "apache-2.0", - "hub_hearts": 37, - "params_billions": 1.079, - "co2_cost": 0.31229480497840917 - } - }, - { - "id": "EleutherAI/pythia-2.8b_float16_2a259cdd96a4beb1cdf467512e3904197345f6a9_False", - "model": { - "name": "EleutherAI/pythia-2.8b", - "sha": "2a259cdd96a4beb1cdf467512e3904197345f6a9", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.554946281603011, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21732226049105263, - "normalized_score": 21.73222604910526 - }, - "bbh": { - "name": "BBH", - "value": 0.3224085936276087, - "normalized_score": 5.077786161905462 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3485729166666667, - "normalized_score": 3.6382812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11369680851063829, - "normalized_score": 1.521867612293143 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-02-13", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "EleutherAI/pythia-2.8b", - "hub_license": "apache-2.0", - "hub_hearts": 30, - "params_billions": 2.909, - "co2_cost": 1.507804003243129 - } - }, - { - "id": "EleutherAI/pythia-410m_float16_9879c9b5f8bea9051dcb0e68dff21493d67e9d4f_False", - "model": { - "name": "EleutherAI/pythia-410m", - "sha": "9879c9b5f8bea9051dcb0e68dff21493d67e9d4f", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.227072311484412, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21954525104500505, - "normalized_score": 21.954525104500505 - }, - "bbh": { - "name": "BBH", - "value": 0.302813387064426, - "normalized_score": 2.7154281203357473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35781250000000003, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-02-13", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "EleutherAI/pythia-410m", - "hub_license": "apache-2.0", - "hub_hearts": 23, - "params_billions": 0.506, - "co2_cost": 0.7541636544345159 - } - }, - { - "id": "EleutherAI/pythia-6.9b_float16_f271943e880e60c0c715fd10e4dc74ec4e31eb44_False", - "model": { - "name": "EleutherAI/pythia-6.9b", - "sha": "f271943e880e60c0c715fd10e4dc74ec4e31eb44", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.966546774741993, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22811362739752744, - "normalized_score": 22.811362739752745 - }, - "bbh": { - "name": "BBH", - "value": 0.3232287869322383, - "normalized_score": 5.88163197981621 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3590520833333333, - "normalized_score": 3.8148437499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-02-14", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "EleutherAI/pythia-6.9b", - "hub_license": "apache-2.0", - "hub_hearts": 50, - "params_billions": 6.9, - "co2_cost": 1.7377338289158397 - } - }, - { - "id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4_bfloat16_328722ae96e3a112ec900dbe77d410788a526c5c_True", - "model": { - "name": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", - "sha": "328722ae96e3a112ec900dbe77d410788a526c5c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.68446940398294, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4188807918545016, - "normalized_score": 41.88807918545016 - }, - "bbh": { - "name": "BBH", - "value": 0.4074954889367559, - "normalized_score": 16.875928374989595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.41700000000000004, - "normalized_score": 10.758333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2634640957446808, - "normalized_score": 18.162677304964536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 0, - "params_billions": 8.031, - "co2_cost": 2.018255930617328 - } - }, - { - "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B_bfloat16_6a5d745bdd304753244fe601e2a958d37d13cd71_True", - "model": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", - "sha": "6a5d745bdd304753244fe601e2a958d37d13cd71", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.51454591188142, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31953771548380516, - "normalized_score": 31.953771548380523 - }, - "bbh": { - "name": "BBH", - "value": 0.4151575806137866, - "normalized_score": 17.507545086854382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.4070520833333333, - "normalized_score": 9.081510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21509308510638298, - "normalized_score": 12.788120567375886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-01", - "submission_date": "2024-07-08", - "generation": 0, - "base_model": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 0, - "params_billions": 8.031, - "co2_cost": 2.368674679609323 - } - }, - { - "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3_bfloat16_cf29b8b484a909132e3a1f85ce891d28347c0d13_True", - "model": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", - "sha": "cf29b8b484a909132e3a1f85ce891d28347c0d13", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.128287498482546, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5082569803676467, - "normalized_score": 50.82569803676467 - }, - "bbh": { - "name": "BBH", - "value": 0.4100577461090639, - "normalized_score": 16.668385554519382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.42357291666666663, - "normalized_score": 12.313281249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2990359042553192, - "normalized_score": 22.11510047281324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.941671164165739 - } - }, - { - "id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9_bfloat16_c740871122fd471a1a225cf2b4368e333752d74c_True", - "model": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", - "sha": "c740871122fd471a1a225cf2b4368e333752d74c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.575099921155847, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4689147018799009, - "normalized_score": 46.891470187990095 - }, - "bbh": { - "name": "BBH", - "value": 0.41602720836190127, - "normalized_score": 17.49829637438283 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3831770833333333, - "normalized_score": 5.43046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2595578457446808, - "normalized_score": 17.728649527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8651424904010883 - } - }, - { - "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502_bfloat16_46438f0966b908da5594a4a2abb0202ef08c0355_False", - "model": { - "name": "EnnoAi/EnnoAi-7B-French-Instruct-202502", - "sha": "46438f0966b908da5594a4a2abb0202ef08c0355", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.28743755248046, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5564424615575562, - "normalized_score": 55.64424615575562 - }, - "bbh": { - "name": "BBH", - "value": 0.5574545199388612, - "normalized_score": 36.92413092542704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3723564954682779, - "normalized_score": 37.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.45997916666666666, - "normalized_score": 18.39739583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4013464095744681, - "normalized_score": 33.48293439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-11", - "generation": 0, - "base_model": "EnnoAi/EnnoAi-7B-French-Instruct-202502", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 0.6896437531248036 - } - }, - { - "id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0_bfloat16_c740871122fd471a1a225cf2b4368e333752d74c_True", - "model": { - "name": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", - "sha": "c740871122fd471a1a225cf2b4368e333752d74c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.600495501179813, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4704384366813389, - "normalized_score": 47.04384366813389 - }, - "bbh": { - "name": "BBH", - "value": 0.41602720836190127, - "normalized_score": 17.49829637438283 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3831770833333333, - "normalized_score": 5.43046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2595578457446808, - "normalized_score": 17.728649527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8912831845639797 - } - }, - { - "id": "Epiculous/Azure_Dusk-v0.2_bfloat16_ebddf1b2efbe7f9cae066d263b0991ded89c88e8_True", - "model": { - "name": "Epiculous/Azure_Dusk-v0.2", - "sha": "ebddf1b2efbe7f9cae066d263b0991ded89c88e8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.239648971826766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.346715603487635, - "normalized_score": 34.67156034876351 - }, - "bbh": { - "name": "BBH", - "value": 0.4119721873553597, - "normalized_score": 17.396414392379338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3834583333333333, - "normalized_score": 6.365625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3034408244680851, - "normalized_score": 22.604536052009458 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Epiculous/Azure_Dusk-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 12.248, - "co2_cost": 3.98282278279691 - } - }, - { - "id": "Epiculous/Crimson_Dawn-v0.2_bfloat16_4cceb1e25026afef241ad5325097e88eccd8f37a_True", - "model": { - "name": "Epiculous/Crimson_Dawn-v0.2", - "sha": "4cceb1e25026afef241ad5325097e88eccd8f37a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.085950749805088, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3103454389907667, - "normalized_score": 31.034543899076674 - }, - "bbh": { - "name": "BBH", - "value": 0.44823796489645434, - "normalized_score": 21.68824851395527 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.4151770833333333, - "normalized_score": 10.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27210771276595747, - "normalized_score": 19.123079196217496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-02", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "Epiculous/Crimson_Dawn-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 12.248, - "co2_cost": 5.253400057014685 - } - }, - { - "id": "Epiculous/NovaSpark_bfloat16_a46340895859e470c3e69661f0b894677cf4c5cb_True", - "model": { - "name": "Epiculous/NovaSpark", - "sha": "a46340895859e470c3e69661f0b894677cf4c5cb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.253737990368, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6408473960203371, - "normalized_score": 64.08473960203372 - }, - "bbh": { - "name": "BBH", - "value": 0.5063958663768304, - "normalized_score": 29.52691068844395 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3881979166666667, - "normalized_score": 6.924739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3648603723404255, - "normalized_score": 29.428930260047277 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "Epiculous/NovaSpark (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.6363700494168842 - } - }, - { - "id": "Epiculous/Violet_Twilight-v0.2_bfloat16_30c8bad3c1f565150afbf2fc90cacf4f45d096f6_True", - "model": { - "name": "Epiculous/Violet_Twilight-v0.2", - "sha": "30c8bad3c1f565150afbf2fc90cacf4f45d096f6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.55277348742638, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45317756885064964, - "normalized_score": 45.317756885064966 - }, - "bbh": { - "name": "BBH", - "value": 0.4614552476845888, - "normalized_score": 23.94053725590186 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02870090634441088, - "normalized_score": 2.870090634441088 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.42993750000000003, - "normalized_score": 13.608854166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3110871010638298, - "normalized_score": 23.45412234042553 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-16", - "generation": 0, - "base_model": "Epiculous/Violet_Twilight-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 31, - "params_billions": 12.248, - "co2_cost": 1.7704359904032256 - } - }, - { - "id": "EpistemeAI/Alpaca-Llama3.1-8B_float16_3152dfa17322dff7c6af6dbf3daceaf5db51e230_False", - "model": { - "name": "EpistemeAI/Alpaca-Llama3.1-8B", - "sha": "3152dfa17322dff7c6af6dbf3daceaf5db51e230", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.985046352420847, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15986914719610634, - "normalized_score": 15.986914719610633 - }, - "bbh": { - "name": "BBH", - "value": 0.47552608539742874, - "normalized_score": 25.93522655511771 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.3402604166666667, - "normalized_score": 6.599218750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3246343085106383, - "normalized_score": 24.959367612293143 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-11", - "submission_date": "2024-08-13", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8417051746542898 - } - }, - { - "id": "EpistemeAI/Athena-gemma-2-2b-it_float16_661c1dc6a1a096222e33416e099bd02b7b970405_False", - "model": { - "name": "EpistemeAI/Athena-gemma-2-2b-it", - "sha": "661c1dc6a1a096222e33416e099bd02b7b970405", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.54609172912029, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3134172883504657, - "normalized_score": 31.341728835046567 - }, - "bbh": { - "name": "BBH", - "value": 0.42642293591146, - "normalized_score": 19.417817674461514 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.43505208333333334, - "normalized_score": 13.348177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2421875, - "normalized_score": 15.79861111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-29", - "submission_date": "2024-09-06", - "generation": 4, - "base_model": "google/gemma-2-9b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 2.0, - "co2_cost": 3.0277164101834417 - } - }, - { - "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos_float16_dea2b35d496bd32ed3c88d42ff3022654153f2e1_True", - "model": { - "name": "EpistemeAI/Athena-gemma-2-2b-it-Philos", - "sha": "dea2b35d496bd32ed3c88d42ff3022654153f2e1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 15.663946300399497, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4620950189940469, - "normalized_score": 46.20950189940469 - }, - "bbh": { - "name": "BBH", - "value": 0.37947768790586744, - "normalized_score": 13.212088152695856 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.43136458333333333, - "normalized_score": 12.853906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22481715425531915, - "normalized_score": 13.86857269503546 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-09-05", - "generation": 1, - "base_model": "unsloth/gemma-2-2b-it-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.0, - "co2_cost": 2.2571856348609125 - } - }, - { - "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3_float16_9c26e1242a11178b53937bc0e9a744ef6141e05a_False", - "model": { - "name": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", - "sha": "9c26e1242a11178b53937bc0e9a744ef6141e05a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 17.314021588433324, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40299405577201824, - "normalized_score": 40.299405577201824 - }, - "bbh": { - "name": "BBH", - "value": 0.4331916189482215, - "normalized_score": 20.87379456667128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.4503020833333333, - "normalized_score": 14.854427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25872672872340424, - "normalized_score": 17.636303191489358 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-06", - "submission_date": "2024-09-06", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.0, - "co2_cost": 1.9439560175879884 - } - }, - { - "id": "EpistemeAI/DeepPhi-3.5-mini-instruct_float16_8fd61f3c0003a629524752d2f857c01d2f9843f4_False", - "model": { - "name": "EpistemeAI/DeepPhi-3.5-mini-instruct", - "sha": "8fd61f3c0003a629524752d2f857c01d2f9843f4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.46432918593773, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1325915238234551, - "normalized_score": 13.259152382345508 - }, - "bbh": { - "name": "BBH", - "value": 0.28822860667627487, - "normalized_score": 1.667358455856632 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2332214765100671, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36562500000000003, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11028922872340426, - "normalized_score": 1.1432476359338057 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 2, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 0.423737744033451 - } - }, - { - "id": "EpistemeAI/DeepThinkers-Phi4_bfloat16_3e2b390dc391232880542300f3ca1578f3b53ef5_True", - "model": { - "name": "EpistemeAI/DeepThinkers-Phi4", - "sha": "3e2b390dc391232880542300f3ca1578f3b53ef5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.40710857221479, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6939786433330231, - "normalized_score": 69.3978643333023 - }, - "bbh": { - "name": "BBH", - "value": 0.6790415739665393, - "normalized_score": 53.786668719353166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45845921450151056, - "normalized_score": 45.84592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 8.024479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5257646276595744, - "normalized_score": 47.307180851063826 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-03-01", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 14.66, - "co2_cost": 0.9143025960276635 - } - }, - { - "id": "EpistemeAI/FineLlama3.1-8B-Instruct_4bit_a8b0fc584b10e0110e04f9d21c7f10d24391c1d5_False", - "model": { - "name": "EpistemeAI/FineLlama3.1-8B-Instruct", - "sha": "a8b0fc584b10e0110e04f9d21c7f10d24391c1d5", - "precision": "4bit", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 11.239255981586004, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08000992921005155, - "normalized_score": 8.000992921005155 - }, - "bbh": { - "name": "BBH", - "value": 0.45573635384163325, - "normalized_score": 23.506618815003453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3481666666666667, - "normalized_score": 4.954166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3112533244680851, - "normalized_score": 23.472591607565015 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.483, - "co2_cost": 4.709921965614319 - } - }, - { - "id": "EpistemeAI/Fireball-12B_bfloat16_e2ed12c3244f2502321fb20e76dfc72ad7817d6e_False", - "model": { - "name": "EpistemeAI/Fireball-12B", - "sha": "e2ed12c3244f2502321fb20e76dfc72ad7817d6e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.534531353358348, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1833501775289565, - "normalized_score": 18.335017752895652 - }, - "bbh": { - "name": "BBH", - "value": 0.5110893652548262, - "normalized_score": 30.666711502632058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.42363541666666665, - "normalized_score": 12.521093749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3343583776595745, - "normalized_score": 26.03981973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-20", - "submission_date": "2024-08-21", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.2370416419749457 - } - }, - { - "id": "EpistemeAI/Fireball-12B-v1.13a-philosophers_bfloat16_7fa824d4a40abca3f8c75d432ea151dc0d1d67d6_False", - "model": { - "name": "EpistemeAI/Fireball-12B-v1.13a-philosophers", - "sha": "7fa824d4a40abca3f8c75d432ea151dc0d1d67d6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.466040850876615, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08755324760524298, - "normalized_score": 8.755324760524298 - }, - "bbh": { - "name": "BBH", - "value": 0.5102697700597862, - "normalized_score": 30.336232640303574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4080729166666666, - "normalized_score": 9.975781249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3366855053191489, - "normalized_score": 26.29838947990544 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-28", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.0, - "co2_cost": 3.325326791263037 - } - }, - { - "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200_float16_27d67626304954db71f21fec9e7fc516421274ec_False", - "model": { - "name": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", - "sha": "27d67626304954db71f21fec9e7fc516421274ec", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.129914482629534, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4577243934981405, - "normalized_score": 45.77243934981405 - }, - "bbh": { - "name": "BBH", - "value": 0.4838398624677178, - "normalized_score": 26.377774027551386 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.39445833333333336, - "normalized_score": 6.907291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35829454787234044, - "normalized_score": 28.69939420803782 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-16", - "generation": 4, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8447622495689262 - } - }, - { - "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta_float16_2851384717556dd6ac14c00ed87aac1f267eb263_True", - "model": { - "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", - "sha": "2851384717556dd6ac14c00ed87aac1f267eb263", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.242227896928764, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274010735958367, - "normalized_score": 72.74010735958367 - }, - "bbh": { - "name": "BBH", - "value": 0.48648902139668476, - "normalized_score": 26.897964171299805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3619375, - "normalized_score": 4.275520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3543051861702128, - "normalized_score": 28.25613179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-14", - "generation": 5, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.771289856542313 - } - }, - { - "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2_float16_b19336101aa5f4807d1574f4c11eebc1c1a1c34e_False", - "model": { - "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", - "sha": "b19336101aa5f4807d1574f4c11eebc1c1a1c34e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.550476627240737, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46731561146646455, - "normalized_score": 46.731561146646456 - }, - "bbh": { - "name": "BBH", - "value": 0.4932027479020209, - "normalized_score": 28.24700927539328 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.46236458333333336, - "normalized_score": 16.995572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3351894946808511, - "normalized_score": 26.13216607565012 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-14", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.6234862691629817 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto_float16_19b23c434b6c4524e2146926cdbf4f0e927ae3ab_False", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", - "sha": "19b23c434b6c4524e2146926cdbf4f0e927ae3ab", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.567361907712467, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44318630123627534, - "normalized_score": 44.31863012362753 - }, - "bbh": { - "name": "BBH", - "value": 0.4823644760491404, - "normalized_score": 26.832966985874886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4066458333333333, - "normalized_score": 8.730729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3515625, - "normalized_score": 27.95138888888889 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-15", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.3899885588128729 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K_float16_b4a88fb5fb27fc5d8a503303cdb7aaeff373fd92_False", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", - "sha": "b4a88fb5fb27fc5d8a503303cdb7aaeff373fd92", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.62716790652169, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4457339858242796, - "normalized_score": 44.573398582427956 - }, - "bbh": { - "name": "BBH", - "value": 0.48973199216860547, - "normalized_score": 28.02516078188715 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.37622916666666667, - "normalized_score": 4.895312499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3543051861702128, - "normalized_score": 28.25613179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 8.0, - "co2_cost": 1.6295729321675876 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code_float16_8e8f1569a8a01ed3d6588f2669c730d4993355b5_False", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", - "sha": "8e8f1569a8a01ed3d6588f2669c730d4993355b5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.934713958118284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5975334335119704, - "normalized_score": 59.753343351197046 - }, - "bbh": { - "name": "BBH", - "value": 0.4904191122627008, - "normalized_score": 28.171887782172757 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.40103125, - "normalized_score": 8.462239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34225398936170215, - "normalized_score": 26.917109929078016 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-05", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.0, - "co2_cost": 1.7086355618094373 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds_float16_8b73dd02349f0544c48c581cc73ada5cac6ff946_True", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", - "sha": "8b73dd02349f0544c48c581cc73ada5cac6ff946", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.14416546279052, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.669099101495144, - "normalized_score": 66.9099101495144 - }, - "bbh": { - "name": "BBH", - "value": 0.4668070143164938, - "normalized_score": 24.462654168996078 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.34178125, - "normalized_score": 4.555989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33892952127659576, - "normalized_score": 26.547724586288417 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-15", - "generation": 4, - "base_model": "Removed", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.0, - "co2_cost": 2.5938260467706735 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto_float16_f18598c62a783bcc0d436a35df0c8a335e8ee5d7_True", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "sha": "f18598c62a783bcc0d436a35df0c8a335e8ee5d7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.74994070118673, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7304984108831234, - "normalized_score": 73.04984108831235 - }, - "bbh": { - "name": "BBH", - "value": 0.46492466713692354, - "normalized_score": 24.58673708035273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.32088541666666665, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34798869680851063, - "normalized_score": 27.554299645390067 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 2.2853056830713743 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto_bfloat16_055e87600d18e58594a8d193f45c0ee9a90e1780_True", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "sha": "055e87600d18e58594a8d193f45c0ee9a90e1780", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.627287074905905, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7207066140063919, - "normalized_score": 72.0706614006392 - }, - "bbh": { - "name": "BBH", - "value": 0.4610092915501656, - "normalized_score": 23.544253406580115 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3432395833333333, - "normalized_score": 4.1716145833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3353557180851064, - "normalized_score": 26.150635342789595 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.3441355413277183 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT_float16_bb90c19dc7c4a509e7bd73f4620dca818b58be25_False", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", - "sha": "bb90c19dc7c4a509e7bd73f4620dca818b58be25", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.857427405175983, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4578241288669619, - "normalized_score": 45.78241288669619 - }, - "bbh": { - "name": "BBH", - "value": 0.4760520079608936, - "normalized_score": 25.82086537586569 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13821752265861026, - "normalized_score": 13.821752265861026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.3881354166666667, - "normalized_score": 6.450260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3470744680851064, - "normalized_score": 27.452718676122934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-11", - "generation": 3, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.6780731310692927 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto_bfloat16_db5ddb161ed26bc16baa814e31892dbe2f22b7a0_True", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", - "sha": "db5ddb161ed26bc16baa814e31892dbe2f22b7a0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.874258540372427, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7204816553411615, - "normalized_score": 72.04816553411615 - }, - "bbh": { - "name": "BBH", - "value": 0.4817795525811035, - "normalized_score": 26.452059604470918 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14350453172205438, - "normalized_score": 14.350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35480385638297873, - "normalized_score": 28.311539598108748 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.0, - "co2_cost": 1.4902619095486798 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math_bfloat16_677c97b4f92bfc330d4fae628e9a1df1ef606dcc_False", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", - "sha": "677c97b4f92bfc330d4fae628e9a1df1ef606dcc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.557928990030614, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46229559790245434, - "normalized_score": 46.22955979024543 - }, - "bbh": { - "name": "BBH", - "value": 0.49829504320793055, - "normalized_score": 28.95934409387967 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3640729166666667, - "normalized_score": 5.97578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33311170212765956, - "normalized_score": 25.901300236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-23", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8205431961818275 - } - }, - { - "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO_float16_b3c0fce7daa359cd8ed5be6595dd1a76ca2cfea2_False", - "model": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", - "sha": "b3c0fce7daa359cd8ed5be6595dd1a76ca2cfea2", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.293561415203758, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46109655713506825, - "normalized_score": 46.109655713506825 - }, - "bbh": { - "name": "BBH", - "value": 0.48010141537970213, - "normalized_score": 26.317877757648734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.3998229166666667, - "normalized_score": 8.077864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35206117021276595, - "normalized_score": 28.00679669030733 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-09", - "generation": 3, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.0, - "co2_cost": 1.667151564837414 - } - }, - { - "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2_bfloat16_2cf732fbffefdf37341b946edd7995f14d3f9487_False", - "model": { - "name": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", - "sha": "2cf732fbffefdf37341b946edd7995f14d3f9487", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.339340300308171, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18607295309778055, - "normalized_score": 18.607295309778056 - }, - "bbh": { - "name": "BBH", - "value": 0.49677687590350894, - "normalized_score": 28.567824892702276 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4040104166666667, - "normalized_score": 9.501302083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33527260638297873, - "normalized_score": 26.14140070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-19", - "submission_date": "2024-08-19", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 3.5425381988690936 - } - }, - { - "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B_float16_7d7ca4fa9887a0c6d721353fa962ed93e633d856_True", - "model": { - "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B", - "sha": "7d7ca4fa9887a0c6d721353fa962ed93e633d856", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.729862999540122, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4427363839058143, - "normalized_score": 44.27363839058143 - }, - "bbh": { - "name": "BBH", - "value": 0.36434977901496834, - "normalized_score": 10.273656027098815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.311178247734139, - "normalized_score": 31.1178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32879166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11153590425531915, - "normalized_score": 1.2817671394799046 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-12", - "generation": 2, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.7517107170434107 - } - }, - { - "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT_bfloat16_66b8420e4c1003aedfc809f68e8f346ae972710a_True", - "model": { - "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", - "sha": "66b8420e4c1003aedfc809f68e8f346ae972710a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.486213316641551, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3216111029845255, - "normalized_score": 32.16111029845255 - }, - "bbh": { - "name": "BBH", - "value": 0.37162741490176326, - "normalized_score": 12.153438929855737 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3270392749244713, - "normalized_score": 32.703927492447136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.31136458333333333, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1402094414893617, - "normalized_score": 4.467715721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-28", - "generation": 3, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.747615825519124 - } - }, - { - "id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B_float16_863882f3081647135d269b82698e079b4c78d9ee_True", - "model": { - "name": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", - "sha": "863882f3081647135d269b82698e079b4c78d9ee", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.130881566678475, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3676234613048932, - "normalized_score": 36.76234613048933 - }, - "bbh": { - "name": "BBH", - "value": 0.33260007841271594, - "normalized_score": 6.28685605104149 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13821752265861026, - "normalized_score": 13.821752265861026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3419375, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11153590425531915, - "normalized_score": 1.2817671394799046 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-03-02", - "generation": 3, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.8073899540313522 - } - }, - { - "id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder_float16_7ff4e77796b6d308e96d0150e1a01081c0b82e01_False", - "model": { - "name": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", - "sha": "7ff4e77796b6d308e96d0150e1a01081c0b82e01", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.91456180890705, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5399562050913798, - "normalized_score": 53.99562050913798 - }, - "bbh": { - "name": "BBH", - "value": 0.4303758760727905, - "normalized_score": 19.02580948500905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.36680208333333336, - "normalized_score": 7.783593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28515625, - "normalized_score": 20.572916666666664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.0, - "co2_cost": 1.4216313423297888 - } - }, - { - "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math_float16_1ac4205f8da109326b4a5cf173e5491a20087d76_False", - "model": { - "name": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", - "sha": "1ac4205f8da109326b4a5cf173e5491a20087d76", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.60399677564458, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.06946790072563022, - "normalized_score": 6.946790072563022 - }, - "bbh": { - "name": "BBH", - "value": 0.5364928342081372, - "normalized_score": 33.835810999564586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.42921875, - "normalized_score": 12.885677083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32962101063829785, - "normalized_score": 25.513445626477537 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.7272149280286 - } - }, - { - "id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0_float16_94ac34a32fd2266e84f92e60eab63131540fce2e_True", - "model": { - "name": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", - "sha": "94ac34a32fd2266e84f92e60eab63131540fce2e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.939578822051278, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274010735958367, - "normalized_score": 72.74010735958367 - }, - "bbh": { - "name": "BBH", - "value": 0.45185934849403964, - "normalized_score": 22.80780815942809 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.3460625, - "normalized_score": 2.024479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31341422872340424, - "normalized_score": 23.712692080378247 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-15", - "generation": 2, - "base_model": "Removed", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 0.5924359491094815 - } - }, - { - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy_float16_daabf0dcd2915991531abac59da346f27864c7e7_True", - "model": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", - "sha": "daabf0dcd2915991531abac59da346f27864c7e7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.321646038971995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7100903380807368, - "normalized_score": 71.0090338080737 - }, - "bbh": { - "name": "BBH", - "value": 0.46279874531423665, - "normalized_score": 24.419414132832703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3194895833333333, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33111702127659576, - "normalized_score": 25.67966903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 2, - "base_model": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.3356626948818016 - } - }, - { - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic_float16_20a0141e08db10f1d0ffb771676e56c7d2045acf_True", - "model": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", - "sha": "20a0141e08db10f1d0ffb771676e56c7d2045acf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.204900560269166, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.712213593265868, - "normalized_score": 71.22135932658679 - }, - "bbh": { - "name": "BBH", - "value": 0.45659361690861294, - "normalized_score": 23.576450573055748 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.32348958333333333, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33502327127659576, - "normalized_score": 26.11369680851064 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-20", - "generation": 2, - "base_model": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3689958633217039 - } - }, - { - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent_float16_3cba0f0085c1f95f011cbf76d35a2303c54b2141_True", - "model": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", - "sha": "3cba0f0085c1f95f011cbf76d35a2303c54b2141", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.02725584049078, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6915306941138402, - "normalized_score": 69.15306941138402 - }, - "bbh": { - "name": "BBH", - "value": 0.4524732961901791, - "normalized_score": 22.89036811976203 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.35775, - "normalized_score": 5.518750000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32903922872340424, - "normalized_score": 25.44880319148936 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-20", - "generation": 2, - "base_model": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3958844451637349 - } - }, - { - "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT_bfloat16_3ae39e39a02ff222a7436499462261b22ca28367_True", - "model": { - "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", - "sha": "3ae39e39a02ff222a7436499462261b22ca28367", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.208175433660585, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4828532737580731, - "normalized_score": 48.285327375807306 - }, - "bbh": { - "name": "BBH", - "value": 0.47357563863974517, - "normalized_score": 25.544054444754476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.31821875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33427526595744683, - "normalized_score": 26.03058510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.447840402014464 - } - }, - { - "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO_float16_d37dcdb2f9a663c356fb670b6e449b4ef1b54977_False", - "model": { - "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", - "sha": "d37dcdb2f9a663c356fb670b6e449b4ef1b54977", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.331579969605, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4553263119633683, - "normalized_score": 45.53263119633683 - }, - "bbh": { - "name": "BBH", - "value": 0.4804219047211424, - "normalized_score": 25.89560083857891 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.393125, - "normalized_score": 7.173958333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3597905585106383, - "normalized_score": 28.865617612293136 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 2, - "base_model": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4427844314051856 - } - }, - { - "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2_float16_a72b9f8f059647f799209c19931e263be79fbc03_True", - "model": { - "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", - "sha": "a72b9f8f059647f799209c19931e263be79fbc03", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.508931658156023, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40871443325930756, - "normalized_score": 40.871443325930755 - }, - "bbh": { - "name": "BBH", - "value": 0.3324495305251265, - "normalized_score": 6.5772148357059725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3221875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11785239361702128, - "normalized_score": 1.9835992907801419 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.9031677511029814 - } - }, - { - "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3_float16_b7dfff75dc619c3a5705a5ffbdea2310db121b96_True", - "model": { - "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", - "sha": "b7dfff75dc619c3a5705a5ffbdea2310db121b96", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.170098865797046, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3272816127874041, - "normalized_score": 32.72816127874041 - }, - "bbh": { - "name": "BBH", - "value": 0.3262818751942827, - "normalized_score": 6.111150533529301 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.326, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11727061170212766, - "normalized_score": 1.9189568557919614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-05", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7538632079758366 - } - }, - { - "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1_bfloat16_a40bd9becd2d0bd8ed6ca5727d5b2b4f5cb75393_True", - "model": { - "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", - "sha": "a40bd9becd2d0bd8ed6ca5727d5b2b4f5cb75393", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.78003203358188, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5119538380386264, - "normalized_score": 51.19538380386264 - }, - "bbh": { - "name": "BBH", - "value": 0.43810846923178864, - "normalized_score": 20.728881523559497 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.34352083333333333, - "normalized_score": 2.1734375000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2789228723404255, - "normalized_score": 19.880319148936167 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.2263268803331018 - } - }, - { - "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO_bfloat16_563f7d4f8cd930e2b8079ec4844f8259ac19ad1c_True", - "model": { - "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", - "sha": "563f7d4f8cd930e2b8079ec4844f8259ac19ad1c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.430640115437427, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7289746760816855, - "normalized_score": 72.89746760816854 - }, - "bbh": { - "name": "BBH", - "value": 0.45181862491313, - "normalized_score": 23.004650311661738 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3486666666666667, - "normalized_score": 2.8833333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3100066489361702, - "normalized_score": 23.33407210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.3505097740284058 - } - }, - { - "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math_bfloat16_28e5169a7406b61fa7bbfbeecf8a8d544f1650dd_True", - "model": { - "name": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", - "sha": "28e5169a7406b61fa7bbfbeecf8a8d544f1650dd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.475016650589627, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5902893212232432, - "normalized_score": 59.02893212232432 - }, - "bbh": { - "name": "BBH", - "value": 0.436379591348482, - "normalized_score": 19.821269315746473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3314270833333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28233045212765956, - "normalized_score": 20.258939125295505 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 4, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5962760270217874 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-0_float16_8eebaf7d2bef9d80ba3e99c19e61f46da7bd83a9_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-0", - "sha": "8eebaf7d2bef9d80ba3e99c19e61f46da7bd83a9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.526303672038257, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7341454008696924, - "normalized_score": 73.41454008696924 - }, - "bbh": { - "name": "BBH", - "value": 0.44460707451155984, - "normalized_score": 22.166822802153792 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15861027190332325, - "normalized_score": 15.861027190332324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.35539583333333336, - "normalized_score": 2.5578125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3172373670212766, - "normalized_score": 24.137485224586285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.5853917932437854 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect_bfloat16_62e3cb0026f2cbd1d70e8fb45fcaf26d7256cc7d_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", - "sha": "62e3cb0026f2cbd1d70e8fb45fcaf26d7256cc7d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.51162994918955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7334960128015887, - "normalized_score": 73.34960128015888 - }, - "bbh": { - "name": "BBH", - "value": 0.44496323889512146, - "normalized_score": 22.265678862081568 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3527291666666667, - "normalized_score": 3.091145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31441156914893614, - "normalized_score": 23.823507683215126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 0.5679692340468294 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-R01_float16_046404c2e8b0c956f0c50f0e5e8f423455306ff1_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-R01", - "sha": "046404c2e8b0c956f0c50f0e5e8f423455306ff1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.035244688635869, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29760590787998065, - "normalized_score": 29.76059078799807 - }, - "bbh": { - "name": "BBH", - "value": 0.43725189001258497, - "normalized_score": 20.62436739279366 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.31945833333333334, - "normalized_score": 1.698958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25914228723404253, - "normalized_score": 17.682476359338057 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-08", - "generation": 3, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 0.6070635739248887 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2_float16_429b18420956128532a4286a9d2180f6ba3aacae_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2", - "sha": "429b18420956128532a4286a9d2180f6ba3aacae", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.57087320558054, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7393161256576994, - "normalized_score": 73.93161256576994 - }, - "bbh": { - "name": "BBH", - "value": 0.44623884450165807, - "normalized_score": 22.472883801695517 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15634441087613293, - "normalized_score": 15.634441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3540625, - "normalized_score": 2.024479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31806848404255317, - "normalized_score": 24.229831560283685 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-19", - "generation": 3, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5824159628332604 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A_bfloat16_d8a325429306b3993f731eb580187d545e221de6_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2A", - "sha": "d8a325429306b3993f731eb580187d545e221de6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.398292233746382, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5732534120577845, - "normalized_score": 57.32534120577844 - }, - "bbh": { - "name": "BBH", - "value": 0.4189899823502799, - "normalized_score": 18.05942921696145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.33520833333333333, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2736037234042553, - "normalized_score": 19.289302600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-25", - "generation": 4, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5854265795489074 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B_float16_a66f75bde34c2d08062cb1ae455a789e0bba9e1d_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2B", - "sha": "a66f75bde34c2d08062cb1ae455a789e0bba9e1d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.80359555586663, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5051097753959495, - "normalized_score": 50.51097753959495 - }, - "bbh": { - "name": "BBH", - "value": 0.41678877951897175, - "normalized_score": 17.62918970867094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3448229166666667, - "normalized_score": 1.8028645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26728723404255317, - "normalized_score": 18.587470449172574 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-25", - "generation": 5, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 0.6061262394745162 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C_bfloat16_aa270574bdcf8d85d273d7bc74ce85de5d9505b1_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2C", - "sha": "aa270574bdcf8d85d273d7bc74ce85de5d9505b1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.648061999116766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5057092957796425, - "normalized_score": 50.57092957796425 - }, - "bbh": { - "name": "BBH", - "value": 0.41774567831526244, - "normalized_score": 17.793270992983555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.34215625, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2691156914893617, - "normalized_score": 18.790632387706854 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 6, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5980412814196955 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-T1-V1_float16_a6f33848cdb2eb198d4ea44b1988238c25a2501b_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-T1-V1", - "sha": "a6f33848cdb2eb198d4ea44b1988238c25a2501b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.243643018070443, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7207564816908026, - "normalized_score": 72.07564816908027 - }, - "bbh": { - "name": "BBH", - "value": 0.4516908992961786, - "normalized_score": 23.06533291680579 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.35403125, - "normalized_score": 2.720572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31200132978723405, - "normalized_score": 23.55570330969267 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-10", - "submission_date": "2025-02-11", - "generation": 3, - "base_model": "Removed", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5761983025195495 - } - }, - { - "id": "EpistemeAI/ReasoningCore-3B-T1_1_float16_a1564589a7b766e23d444f2adfdfaef21aee3ce3_True", - "model": { - "name": "EpistemeAI/ReasoningCore-3B-T1_1", - "sha": "a1564589a7b766e23d444f2adfdfaef21aee3ce3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.492474520020917, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274509412802475, - "normalized_score": 72.74509412802476 - }, - "bbh": { - "name": "BBH", - "value": 0.45239424517060806, - "normalized_score": 23.094998793873298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3553645833333334, - "normalized_score": 2.720572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3116688829787234, - "normalized_score": 23.51876477541371 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1654658615572893 - } - }, - { - "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2_bfloat16_21b31062334a316b50680e8c3a141a72e4c30b61_False", - "model": { - "name": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", - "sha": "21b31062334a316b50680e8c3a141a72e4c30b61", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 15.718390848250493, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4351177098986245, - "normalized_score": 43.511770989862455 - }, - "bbh": { - "name": "BBH", - "value": 0.41754154460978427, - "normalized_score": 18.971369774912947 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.41696875000000005, - "normalized_score": 10.387760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22972074468085107, - "normalized_score": 14.413416075650117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-08-26", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 1.939269685082392 - } - }, - { - "id": "EpistemeAI2/Fireball-12B-v1.2_bfloat16_57af42edf8232189ee99e9a21e33a0c306e3f561_False", - "model": { - "name": "EpistemeAI2/Fireball-12B-v1.2", - "sha": "57af42edf8232189ee99e9a21e33a0c306e3f561", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.200286583835771, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13553925805750963, - "normalized_score": 13.553925805750964 - }, - "bbh": { - "name": "BBH", - "value": 0.5018583230653281, - "normalized_score": 29.776014226579218 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4173125, - "normalized_score": 11.264062499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33369348404255317, - "normalized_score": 25.965942671394792 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-08-28", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.0, - "co2_cost": 3.745129376454524 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos_bfloat16_3dcca4cf9bdd9003c8dc91f5c78cefef1d4ae0d7_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", - "sha": "3dcca4cf9bdd9003c8dc91f5c78cefef1d4ae0d7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.551673578610224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.498640274471735, - "normalized_score": 49.8640274471735 - }, - "bbh": { - "name": "BBH", - "value": 0.4977581192690881, - "normalized_score": 29.259226071264724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 11.891666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3405917553191489, - "normalized_score": 26.73241725768321 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-29", - "submission_date": "2024-08-29", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.0, - "co2_cost": 1.6966642760929416 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos_bfloat16_f97293ed5cec7fb9482b16600259967c6c923e4b_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", - "sha": "f97293ed5cec7fb9482b16600259967c6c923e4b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.567143968773337, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42117913802045237, - "normalized_score": 42.11791380204524 - }, - "bbh": { - "name": "BBH", - "value": 0.49561092312727917, - "normalized_score": 28.628475325906475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13595166163141995, - "normalized_score": 13.595166163141995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.43706249999999996, - "normalized_score": 13.432812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33834773936170215, - "normalized_score": 26.483082151300234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-03", - "submission_date": "2024-09-03", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.7411433753603245 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos_bfloat16_6e60f783f80f7d126b8e4f2b417e14dea63d2c4f_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", - "sha": "6e60f783f80f7d126b8e4f2b417e14dea63d2c4f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.274573357917856, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3880814017916905, - "normalized_score": 38.808140179169044 - }, - "bbh": { - "name": "BBH", - "value": 0.49508699339363266, - "normalized_score": 27.99254879661235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.42801041666666667, - "normalized_score": 12.034635416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3355219414893617, - "normalized_score": 26.16910460992908 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-09-04", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.0, - "co2_cost": 1.5950461475759945 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos_float16_efd0c251373e1a2fa2bc8cead502c03ff6dc7c8b_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", - "sha": "efd0c251373e1a2fa2bc8cead502c03ff6dc7c8b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.094517497325704, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40843960690966635, - "normalized_score": 40.84396069096664 - }, - "bbh": { - "name": "BBH", - "value": 0.4930009712421776, - "normalized_score": 27.963797525362725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.43721875, - "normalized_score": 13.685677083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3402593085106383, - "normalized_score": 26.695478723404253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-09-05", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.5304958366458974 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo_float16_3e76f190b505b515479cc25e92f8229c2b05159f_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", - "sha": "3e76f190b505b515479cc25e92f8229c2b05159f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.867631752899076, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4865756193566404, - "normalized_score": 48.657561935664035 - }, - "bbh": { - "name": "BBH", - "value": 0.48807730539009225, - "normalized_score": 27.207176615070413 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3931875, - "normalized_score": 6.848437499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3614527925531915, - "normalized_score": 29.050310283687946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-09", - "generation": 5, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8695479199902147 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math_float16_0b2842bddfa6c308f67eb5a20daf04536a4e6d1a_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", - "sha": "0b2842bddfa6c308f67eb5a20daf04536a4e6d1a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.97087003498899, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5079079065767719, - "normalized_score": 50.79079065767719 - }, - "bbh": { - "name": "BBH", - "value": 0.4847020640542447, - "normalized_score": 26.901201452028392 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40630208333333334, - "normalized_score": 7.854427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35305851063829785, - "normalized_score": 28.117612293144205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-10", - "generation": 4, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8040592656869943 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection_float16_dc900138b4406353b7e84251bc8649d70c16f13f_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", - "sha": "dc900138b4406353b7e84251bc8649d70c16f13f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.894624934810878, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39522577871159636, - "normalized_score": 39.52257787115964 - }, - "bbh": { - "name": "BBH", - "value": 0.49553052334314723, - "normalized_score": 27.571611204577167 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4048125, - "normalized_score": 10.4015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35929188829787234, - "normalized_score": 28.8102098108747 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-16", - "generation": 6, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.7679480184248195 - } - }, - { - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1_float16_c57c786426123635baf6c8b4d30638d2053f4565_False", - "model": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", - "sha": "c57c786426123635baf6c8b4d30638d2053f4565", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.511188245481957, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5316382753316755, - "normalized_score": 53.16382753316755 - }, - "bbh": { - "name": "BBH", - "value": 0.4827931104634334, - "normalized_score": 26.76368521382555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4103020833333333, - "normalized_score": 8.454427083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3523105053191489, - "normalized_score": 28.034500591016542 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-13", - "submission_date": "2024-09-13", - "generation": 4, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8195181295523026 - } - }, - { - "id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection_float16_4b0b75d9235886e8a947c45b94f87c5a65a81467_False", - "model": { - "name": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", - "sha": "4b0b75d9235886e8a947c45b94f87c5a65a81467", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.37672090429901, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3596047376516532, - "normalized_score": 35.96047376516532 - }, - "bbh": { - "name": "BBH", - "value": 0.4897693552241443, - "normalized_score": 27.76979570236311 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.3957291666666667, - "normalized_score": 9.632812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3550531914893617, - "normalized_score": 28.33924349881796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-17", - "generation": 5, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.78988673075239 - } - }, - { - "id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo_bfloat16_6b7d851c66359f39d16da6fbcf810b816dc6e4bc_True", - "model": { - "name": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", - "sha": "6b7d851c66359f39d16da6fbcf810b816dc6e4bc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.369982746356044, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30972043067948596, - "normalized_score": 30.972043067948597 - }, - "bbh": { - "name": "BBH", - "value": 0.43276373285682107, - "normalized_score": 21.145528378150857 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.4029583333333333, - "normalized_score": 8.96979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11477726063829788, - "normalized_score": 1.6419178486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-21", - "submission_date": "2024-08-24", - "generation": 2, - "base_model": "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 11.58, - "co2_cost": 3.762851272980899 - } - }, - { - "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math_float16_aa21037cf0984cb293facb69c41895e7fccb1340_False", - "model": { - "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", - "sha": "aa21037cf0984cb293facb69c41895e7fccb1340", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.727957302291042, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5515465631191904, - "normalized_score": 55.15465631191904 - }, - "bbh": { - "name": "BBH", - "value": 0.48075580310342053, - "normalized_score": 26.743767165585172 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1351963746223565, - "normalized_score": 13.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.36925, - "normalized_score": 6.789583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3420046542553192, - "normalized_score": 26.889406028368796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-12", - "generation": 3, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.5833654691318217 - } - }, - { - "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT_float16_cf8b99d4aa00c18fdaebfb24fa3c674ee6defa1a_False", - "model": { - "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", - "sha": "cf8b99d4aa00c18fdaebfb24fa3c674ee6defa1a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.037758831912942, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4633195476890207, - "normalized_score": 46.331954768902065 - }, - "bbh": { - "name": "BBH", - "value": 0.4790834283312441, - "normalized_score": 26.400991557555106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.37743750000000004, - "normalized_score": 5.013020833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3564660904255319, - "normalized_score": 28.496232269503547 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-11", - "generation": 3, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.6016352294968985 - } - }, - { - "id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos_bfloat16_147715051102034fac98091e2a0cae6cade15ae0_True", - "model": { - "name": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", - "sha": "147715051102034fac98091e2a0cae6cade15ae0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.67636693004503, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5312880933700359, - "normalized_score": 53.128809337003595 - }, - "bbh": { - "name": "BBH", - "value": 0.6177842639287514, - "normalized_score": 46.20887281141656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.41390625, - "normalized_score": 10.704947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45985704787234044, - "normalized_score": 39.984116430260045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-20", - "generation": 1, - "base_model": "unsloth/phi-3-medium-4k-instruct-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 1.5436275349185773 - } - }, - { - "id": "Eric111/CatunaMayo_bfloat16_23337893381293975cbcc35f75b634954fbcefaf_False", - "model": { - "name": "Eric111/CatunaMayo", - "sha": "23337893381293975cbcc35f75b634954fbcefaf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.27397881040676, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4074156571231, - "normalized_score": 40.741565712310006 - }, - "bbh": { - "name": "BBH", - "value": 0.5243635518600797, - "normalized_score": 33.299425909067885 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.45398958333333334, - "normalized_score": 15.348697916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3178191489361702, - "normalized_score": 24.202127659574465 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-15", - "submission_date": "2024-07-03", - "generation": 0, - "base_model": "Eric111/CatunaMayo", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1016492560543947 - } - }, - { - "id": "Eric111/CatunaMayo-DPO_float16_6bdbe06c10d57d152dd8a79a71edd8e30135b689_False", - "model": { - "name": "Eric111/CatunaMayo-DPO", - "sha": "6bdbe06c10d57d152dd8a79a71edd8e30135b689", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.292884967568227, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4214539643700936, - "normalized_score": 42.14539643700936 - }, - "bbh": { - "name": "BBH", - "value": 0.5223991323844243, - "normalized_score": 33.08995159999345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.44503125, - "normalized_score": 14.662239583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3169880319148936, - "normalized_score": 24.109781323877066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-21", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "Eric111/CatunaMayo-DPO", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.1080452807347074 - } - }, - { - "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties_bfloat16_8a9c3d745e0805e769b544622b3f5c039abc9b07_False", - "model": { - "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", - "sha": "8a9c3d745e0805e769b544622b3f5c039abc9b07", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.981820729952233, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3724694920588483, - "normalized_score": 37.24694920588483 - }, - "bbh": { - "name": "BBH", - "value": 0.5410649663618229, - "normalized_score": 35.58334267696659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4649375, - "normalized_score": 17.817187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39777260638297873, - "normalized_score": 33.08584515366431 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.2709938613196208 - } - }, - { - "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2_bfloat16_121b0831361743558e1a56fd89ae3d3c03272cc4_False", - "model": { - "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", - "sha": "121b0831361743558e1a56fd89ae3d3c03272cc4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.0072163099762, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37399322686028624, - "normalized_score": 37.399322686028626 - }, - "bbh": { - "name": "BBH", - "value": 0.5410649663618229, - "normalized_score": 35.58334267696659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4649375, - "normalized_score": 17.817187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39777260638297873, - "normalized_score": 33.08584515366431 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.2625910778696097 - } - }, - { - "id": "Etherll/Herplete-LLM-Llama-3.1-8b_float16_b3829cf437216f099c031a9ab5e4c8ec974766dd_True", - "model": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b", - "sha": "b3829cf437216f099c031a9ab5e4c8ec974766dd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.58870802333299, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46719149634082013, - "normalized_score": 46.71914963408201 - }, - "bbh": { - "name": "BBH", - "value": 0.5013428726325629, - "normalized_score": 28.952590926070883 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.38599999999999995, - "normalized_score": 6.6833333333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34815492021276595, - "normalized_score": 27.57276891252955 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-08-29", - "generation": 1, - "base_model": "Etherll/Herplete-LLM-Llama-3.1-8b (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 0.9736847295609784 - } - }, - { - "id": "Etherll/Herplete-LLM-Llama-3.1-8b_bfloat16_d1383d993fad005d515be4d815797019601c679f_False", - "model": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b", - "sha": "d1383d993fad005d515be4d815797019601c679f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.260139418825688, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6105976586568084, - "normalized_score": 61.05976586568083 - }, - "bbh": { - "name": "BBH", - "value": 0.5347253355929804, - "normalized_score": 33.206608363709044 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.3990520833333333, - "normalized_score": 8.614843749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.375249335106383, - "normalized_score": 30.583259456264773 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-10-18", - "generation": 1, - "base_model": "Etherll/Herplete-LLM-Llama-3.1-8b (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.709613282616363 - } - }, - { - "id": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties_bfloat16__False", - "model": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", - "sha": "", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.53329128692283, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6163679038285084, - "normalized_score": 61.63679038285083 - }, - "bbh": { - "name": "BBH", - "value": 0.5337975953250876, - "normalized_score": 33.07089034564546 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.40171874999999996, - "normalized_score": 8.948177083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.375249335106383, - "normalized_score": 30.583259456264773 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-17", - "generation": 1, - "base_model": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7244029419033515 - } - }, - { - "id": "Etherll/Qwen2.5-7B-della-test_bfloat16_c2b2ffc38627e68e7b43a1b596dc16ee93c1c63b_True", - "model": { - "name": "Etherll/Qwen2.5-7B-della-test", - "sha": "c2b2ffc38627e68e7b43a1b596dc16ee93c1c63b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.81656745139639, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7624968417133207, - "normalized_score": 76.24968417133206 - }, - "bbh": { - "name": "BBH", - "value": 0.5447331985391859, - "normalized_score": 35.54689390845198 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48942598187311176, - "normalized_score": 48.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.40469791666666666, - "normalized_score": 8.987239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4360871010638298, - "normalized_score": 37.34301122931442 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "Etherll/Qwen2.5-7B-della-test (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.084641406728251 - } - }, - { - "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties_bfloat16_d8c1624a2fa60f05030e34a128af391b5d8be332_False", - "model": { - "name": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", - "sha": "d8c1624a2fa60f05030e34a128af391b5d8be332", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.513720070911546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5005385709916355, - "normalized_score": 50.05385709916355 - }, - "bbh": { - "name": "BBH", - "value": 0.4895144464043051, - "normalized_score": 28.008294264156472 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.43728125, - "normalized_score": 13.426822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3503158244680851, - "normalized_score": 27.812869385342786 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.394362523519261 - } - }, - { - "id": "Etherll/Replete-LLM-V3-Llama-3.1-8b_float16_e79849d72f70ef74677ed81a8885403973b2470c_True", - "model": { - "name": "Etherll/Replete-LLM-V3-Llama-3.1-8b", - "sha": "e79849d72f70ef74677ed81a8885403973b2470c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.70431724543067, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5262924595628488, - "normalized_score": 52.629245956284876 - }, - "bbh": { - "name": "BBH", - "value": 0.4543377420594779, - "normalized_score": 22.902455222412783 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3516458333333334, - "normalized_score": 2.0557291666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34699135638297873, - "normalized_score": 27.44348404255319 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-08-26", - "generation": 1, - "base_model": "Etherll/Replete-LLM-V3-Llama-3.1-8b (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.5786586422846098 - } - }, - { - "id": "Etherll/SuperHermes_bfloat16_7edd56cb37722d09b0334826e0532b223d334939_False", - "model": { - "name": "Etherll/SuperHermes", - "sha": "7edd56cb37722d09b0334826e0532b223d334939", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.91930524281158, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5459015412438996, - "normalized_score": 54.590154124389954 - }, - "bbh": { - "name": "BBH", - "value": 0.5289531792679852, - "normalized_score": 32.84031674117277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16540785498489427, - "normalized_score": 16.540785498489427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.44004166666666666, - "normalized_score": 14.938541666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39486369680851063, - "normalized_score": 32.7626329787234 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-10-27", - "generation": 1, - "base_model": "Etherll/SuperHermes (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.500030829605098 - } - }, - { - "id": "Eurdem/Defne-llama3.1-8B_bfloat16_7832ba3066636bf4dab3e7d658c0b3ded12491ae_False", - "model": { - "name": "Eurdem/Defne-llama3.1-8B", - "sha": "7832ba3066636bf4dab3e7d658c0b3ded12491ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.120605411030215, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5036115285220991, - "normalized_score": 50.36115285220991 - }, - "bbh": { - "name": "BBH", - "value": 0.5320979090308238, - "normalized_score": 32.822381370434904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.43309375, - "normalized_score": 13.536718749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3865525265957447, - "normalized_score": 31.839169621749413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-08-14", - "generation": 0, - "base_model": "Eurdem/Defne-llama3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 2.6645961851911526 - } - }, - { - "id": "FINGU-AI/Chocolatine-Fusion-14B_float16_49b7b720ddd40ccdca303922037a4bb34b1ca33b_False", - "model": { - "name": "FINGU-AI/Chocolatine-Fusion-14B", - "sha": "49b7b720ddd40ccdca303922037a4bb34b1ca33b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.36155927006158, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6949028577507679, - "normalized_score": 69.49028577507679 - }, - "bbh": { - "name": "BBH", - "value": 0.64132285324613, - "normalized_score": 48.600901490542746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3851963746223565, - "normalized_score": 38.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.49402083333333335, - "normalized_score": 21.985937499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5261801861702128, - "normalized_score": 47.35335401891253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "FINGU-AI/Chocolatine-Fusion-14B", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 8.367, - "co2_cost": 3.778182731900417 - } - }, - { - "id": "FINGU-AI/L3-8B_float16_7e7999af68810a8158bf1cf939b1874d430d51f1_True", - "model": { - "name": "FINGU-AI/L3-8B", - "sha": "7e7999af68810a8158bf1cf939b1874d430d51f1", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.91453457774598, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7517309627344335, - "normalized_score": 75.17309627344335 - }, - "bbh": { - "name": "BBH", - "value": 0.4985585187130108, - "normalized_score": 28.805821240438444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.38283333333333336, - "normalized_score": 8.687500000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36394614361702127, - "normalized_score": 29.32734929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "FINGU-AI/L3-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4241896674664711 - } - }, - { - "id": "FINGU-AI/Phi-4-RRStock_float16_d2a5483701f222aedbec6de974929a83ae533c4d_False", - "model": { - "name": "FINGU-AI/Phi-4-RRStock", - "sha": "d2a5483701f222aedbec6de974929a83ae533c4d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.415394122718638, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28554125276488607, - "normalized_score": 28.554125276488605 - }, - "bbh": { - "name": "BBH", - "value": 0.6443442865581455, - "normalized_score": 48.68220452265962 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.44794791666666667, - "normalized_score": 14.960156250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48828125, - "normalized_score": 43.14236111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "FINGU-AI/Phi-4-RRStock", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 6.652, - "co2_cost": 2.73031594302183 - } - }, - { - "id": "FINGU-AI/Q-Small-3B_float16_42ad8458821a8574c3973d7e8088208a32c2fb81_True", - "model": { - "name": "FINGU-AI/Q-Small-3B", - "sha": "42ad8458821a8574c3973d7e8088208a32c2fb81", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.89041454501282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4145345461154182, - "normalized_score": 41.45345461154182 - }, - "bbh": { - "name": "BBH", - "value": 0.43185314557630744, - "normalized_score": 21.386476977673595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.40054166666666663, - "normalized_score": 8.067708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27900598404255317, - "normalized_score": 19.889553782505907 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "FINGU-AI/Q-Small-3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.4546580986103825 - } - }, - { - "id": "FINGU-AI/QwQ-Buddy-32B-Alpha_float16_d975cf81a61e62ea087d83d598d0b51a3629de52_False", - "model": { - "name": "FINGU-AI/QwQ-Buddy-32B-Alpha", - "sha": "d975cf81a61e62ea087d83d598d0b51a3629de52", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.1782720586928, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34464221598691475, - "normalized_score": 34.46422159869147 - }, - "bbh": { - "name": "BBH", - "value": 0.642442234274039, - "normalized_score": 48.730953196766194 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3851963746223565, - "normalized_score": 38.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.5059895833333333, - "normalized_score": 24.41536458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5294215425531915, - "normalized_score": 47.71350472813239 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "FINGU-AI/QwQ-Buddy-32B-Alpha", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 19.662, - "co2_cost": 11.118192684812248 - } - }, - { - "id": "FINGU-AI/RomboUltima-32B_float16_98a732a32e2366a2ab8f08fdc3d668892e7c1f7f_False", - "model": { - "name": "FINGU-AI/RomboUltima-32B", - "sha": "98a732a32e2366a2ab8f08fdc3d668892e7c1f7f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.731545333278405, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6671509372908327, - "normalized_score": 66.71509372908326 - }, - "bbh": { - "name": "BBH", - "value": 0.6938448333620042, - "normalized_score": 56.67376969863417 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5385196374622356, - "normalized_score": 53.85196374622356 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.4836354166666667, - "normalized_score": 21.721093749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.578873005319149, - "normalized_score": 53.20811170212767 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "FINGU-AI/RomboUltima-32B", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 17.645, - "co2_cost": 7.804714974681665 - } - }, - { - "id": "FINGU-AI/Ultimos-32B_bfloat16_d2cb2b0ee4425e06a2303c27a1f4ae4570b5f5ca_True", - "model": { - "name": "FINGU-AI/Ultimos-32B", - "sha": "d2cb2b0ee4425e06a2303c27a1f4ae4570b5f5ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.640726542069556, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1592197591280026, - "normalized_score": 15.92197591280026 - }, - "bbh": { - "name": "BBH", - "value": 0.2905531373728777, - "normalized_score": 2.277935201319207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32860416666666664, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-10", - "submission_date": "2025-02-10", - "generation": 0, - "base_model": "FINGU-AI/Ultimos-32B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 9.604, - "co2_cost": 1.703313123882228 - } - }, - { - "id": "FallenMerick/Chewy-Lemon-Cookie-11B_bfloat16_0f5d0d6d218b3ef034f58eba32d6fe7ac4c237ae_False", - "model": { - "name": "FallenMerick/Chewy-Lemon-Cookie-11B", - "sha": "0f5d0d6d218b3ef034f58eba32d6fe7ac4c237ae", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.043725653783593, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4875242135312083, - "normalized_score": 48.75242135312082 - }, - "bbh": { - "name": "BBH", - "value": 0.5251122307375103, - "normalized_score": 33.01430008846961 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.45455208333333336, - "normalized_score": 15.952343750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3267121010638298, - "normalized_score": 25.190233451536642 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-06", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "FallenMerick/Chewy-Lemon-Cookie-11B (Merge)", - "hub_license": "cc-by-4.0", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.714547555272019 - } - }, - { - "id": "Felladrin/Llama-160M-Chat-v1_float16_e7f50665676821867ee7dfad32d0ca9fb68fc6bc_True", - "model": { - "name": "Felladrin/Llama-160M-Chat-v1", - "sha": "e7f50665676821867ee7dfad32d0ca9fb68fc6bc", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.201766115349323, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15754642127333254, - "normalized_score": 15.754642127333252 - }, - "bbh": { - "name": "BBH", - "value": 0.30360811146348365, - "normalized_score": 3.166755569392556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.366125, - "normalized_score": 3.165625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11361369680851063, - "normalized_score": 1.512632978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-20", - "submission_date": "2024-07-23", - "generation": 1, - "base_model": "JackFram/llama-160m", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 0.162, - "co2_cost": 0.3631612182790685 - } - }, - { - "id": "Felladrin/Minueza-32M-UltraChat_float16_28506b99c5902d2215eb378ec91d4226a7396c49_True", - "model": { - "name": "Felladrin/Minueza-32M-UltraChat", - "sha": "28506b99c5902d2215eb378ec91d4226a7396c49", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 3.9242559881806987, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13756277787381924, - "normalized_score": 13.756277787381924 - }, - "bbh": { - "name": "BBH", - "value": 0.2941478734048925, - "normalized_score": 2.4372895622895623 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.37418749999999995, - "normalized_score": 4.640104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11328125, - "normalized_score": 1.4756944444444438 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-27", - "submission_date": "2024-07-23", - "generation": 1, - "base_model": "Felladrin/Minueza-32M-Base", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 0.033, - "co2_cost": 0.33613377903411323 - } - }, - { - "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit_float16_ea6ceae8a6894f1c6ea3fe978846b2a66c3e369c_True", - "model": { - "name": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "sha": "ea6ceae8a6894f1c6ea3fe978846b2a66c3e369c", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.550830451075553, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30832191917445706, - "normalized_score": 30.83219191744571 - }, - "bbh": { - "name": "BBH", - "value": 0.3323387445789459, - "normalized_score": 7.3478250067150155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.33021875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14976728723404256, - "normalized_score": 5.529698581560284 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-29", - "generation": 3, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.5, - "co2_cost": 0.967387192446507 - } - }, - { - "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit_float16_a2eb0460779e76bb511339bcc2545b4729c9d78e_True", - "model": { - "name": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "sha": "a2eb0460779e76bb511339bcc2545b4729c9d78e", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.04356389387962, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.509730847484674, - "normalized_score": 50.9730847484674 - }, - "bbh": { - "name": "BBH", - "value": 0.5214989784123593, - "normalized_score": 32.60779983085876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.43095833333333333, - "normalized_score": 13.569791666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37691156914893614, - "normalized_score": 30.767952127659566 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "unsloth/phi-3-mini-4k-instruct-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 16.0, - "co2_cost": 0.9750895997519636 - } - }, - { - "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit_float16_2152657b389375f48fc5073413bba17835117bcc_True", - "model": { - "name": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "sha": "2152657b389375f48fc5073413bba17835117bcc", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.363924265648496, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28154408081667753, - "normalized_score": 28.154408081667754 - }, - "bbh": { - "name": "BBH", - "value": 0.3305518729746925, - "normalized_score": 7.530228683558659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.33021875, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15408909574468085, - "normalized_score": 6.009899527186761 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-25", - "generation": 3, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.5, - "co2_cost": 1.0167301564092104 - } - }, - { - "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit_float16_64c61d9c777da56597a338afd7586cc4ad07d350_True", - "model": { - "name": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "sha": "64c61d9c777da56597a338afd7586cc4ad07d350", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.381580312764262, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3015775919006015, - "normalized_score": 30.157759190060155 - }, - "bbh": { - "name": "BBH", - "value": 0.33246082656550385, - "normalized_score": 7.532089563374818 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3408229166666667, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14852061170212766, - "normalized_score": 5.391179078014184 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-25", - "generation": 3, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.5, - "co2_cost": 0.9631341659191968 - } - }, - { - "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit_float16_4c4d3660d0288295f89880a3a86f4eb9ecc9d344_True", - "model": { - "name": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "sha": "4c4d3660d0288295f89880a3a86f4eb9ecc9d344", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.427461121141809, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28693976426991497, - "normalized_score": 28.693976426991494 - }, - "bbh": { - "name": "BBH", - "value": 0.33465340701604496, - "normalized_score": 8.132273330945772 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3289479166666667, - "normalized_score": 1.4184895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15550199468085107, - "normalized_score": 6.166888297872341 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-11-26", - "generation": 3, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 0.5, - "co2_cost": 0.9843727823413427 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_float16_a0f91cfda4e5a820dbe30bd5e3fbb8f233f7467e_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", - "sha": "a0f91cfda4e5a820dbe30bd5e3fbb8f233f7467e", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.20780841481479, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14845388014911545, - "normalized_score": 14.845388014911546 - }, - "bbh": { - "name": "BBH", - "value": 0.2917939408206228, - "normalized_score": 2.708743994249611 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.35806249999999995, - "normalized_score": 3.2911458333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1163563829787234, - "normalized_score": 1.8173758865248217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-14", - "generation": 5, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6758171455944094 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed_float16_73ba3da387b3bdc50d6e3594c5c89ddebb271e81_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", - "sha": "73ba3da387b3bdc50d6e3594c5c89ddebb271e81", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.06134991308063, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15537329840379083, - "normalized_score": 15.537329840379083 - }, - "bbh": { - "name": "BBH", - "value": 0.3066426145674803, - "normalized_score": 3.2742669103063946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.35803125, - "normalized_score": 3.2539062500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11427859042553191, - "normalized_score": 1.5865100472813234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-27", - "generation": 5, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6782092114841033 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected_float16_e2115c3c7315400cb6338465672087c457b157ac_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", - "sha": "e2115c3c7315400cb6338465672087c457b157ac", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.0558427645833435, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14678054229444543, - "normalized_score": 14.678054229444541 - }, - "bbh": { - "name": "BBH", - "value": 0.29317781029884354, - "normalized_score": 2.113413506540018 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.4047604166666667, - "normalized_score": 8.995052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11569148936170212, - "normalized_score": 1.7434988179669018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-12", - "generation": 5, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.669616488744322 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_float16_d886605e0d45787f492f628fd0ea72c27f205f83_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", - "sha": "d886605e0d45787f492f628fd0ea72c27f205f83", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.188312239003598, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15809607397261488, - "normalized_score": 15.809607397261487 - }, - "bbh": { - "name": "BBH", - "value": 0.29409841468035297, - "normalized_score": 2.2372961213751537 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3713645833333333, - "normalized_score": 3.65390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10762965425531915, - "normalized_score": 0.8477393617021267 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-12", - "generation": 6, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6707608185610174 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed_float16_d743033d6f0048af31089e1133de7cee8b1e83f5_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", - "sha": "d743033d6f0048af31089e1133de7cee8b1e83f5", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.280291147523802, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.157771379938563, - "normalized_score": 15.7771379938563 - }, - "bbh": { - "name": "BBH", - "value": 0.29496212100634955, - "normalized_score": 2.8494191919191914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.36999999999999994, - "normalized_score": 3.416666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11394614361702128, - "normalized_score": 1.549571513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 6, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6721534238637444 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected_float16_8c05c5b2f00c84d4120b3221c81c1f481c585768_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", - "sha": "8c05c5b2f00c84d4120b3221c81c1f481c585768", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.03050468089069, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15847063569107744, - "normalized_score": 15.847063569107746 - }, - "bbh": { - "name": "BBH", - "value": 0.29604672415652145, - "normalized_score": 2.206545314818884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3567291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11643949468085106, - "normalized_score": 1.8266105200945615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-14", - "generation": 6, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6705899568677858 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_float16_a9c59a43cf0da87ad05ec8bd4a4c75d22c2e367c_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", - "sha": "a9c59a43cf0da87ad05ec8bd4a4c75d22c2e367c", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.992957353487976, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17638089158987041, - "normalized_score": 17.638089158987043 - }, - "bbh": { - "name": "BBH", - "value": 0.2921781950918249, - "normalized_score": 2.1601002436397274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3873333333333333, - "normalized_score": 6.016666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1079621010638298, - "normalized_score": 0.8846778959810875 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 7, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.688092511805587 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed_float16_f2851eedb367100fa0ca50ed25ff610a83713de2_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", - "sha": "f2851eedb367100fa0ca50ed25ff610a83713de2", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.063032045008573, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17066051410258115, - "normalized_score": 17.066051410258115 - }, - "bbh": { - "name": "BBH", - "value": 0.2992388897714206, - "normalized_score": 2.630029274903698 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3939375, - "normalized_score": 7.008854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11045545212765957, - "normalized_score": 1.1617169030732852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 7, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6882452217267977 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected_float16_098a8e666d272a8cb4863b0877b6f4507e1c230c_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", - "sha": "098a8e666d272a8cb4863b0877b6f4507e1c230c", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.624639622707802, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15384956360235286, - "normalized_score": 15.384956360235286 - }, - "bbh": { - "name": "BBH", - "value": 0.291672957517483, - "normalized_score": 2.631616005839404 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.37406249999999996, - "normalized_score": 4.6911458333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11369680851063829, - "normalized_score": 1.521867612293143 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 7, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.675417141898889 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed_float16_4bacfcaa1040d1cba93da123ce57749bf2ed5e82_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", - "sha": "4bacfcaa1040d1cba93da123ce57749bf2ed5e82", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.8819679158721776, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14747979804695985, - "normalized_score": 14.747979804695984 - }, - "bbh": { - "name": "BBH", - "value": 0.30287372123209483, - "normalized_score": 2.8225402293478967 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.35784375, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11195146276595745, - "normalized_score": 1.3279403073286051 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6664092675100278 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected_float16_381cdec29375aeaf0fb1bcc8ab2218443fc1cadd_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", - "sha": "381cdec29375aeaf0fb1bcc8ab2218443fc1cadd", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.4920262751647098, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13451530827094332, - "normalized_score": 13.45153082709433 - }, - "bbh": { - "name": "BBH", - "value": 0.2927186496606003, - "normalized_score": 2.3223515642593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125, - "normalized_score": 2.853906249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11311502659574468, - "normalized_score": 1.457225177304964 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.135, - "co2_cost": 0.6822896325237215 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_float16_2601cf93307104afc3f57f467323f5368567cb74_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb", - "sha": "2601cf93307104afc3f57f467323f5368567cb74", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.224944649553536, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1511267880335288, - "normalized_score": 15.112678803352882 - }, - "bbh": { - "name": "BBH", - "value": 0.29723404576965046, - "normalized_score": 1.8897660733797557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3794270833333333, - "normalized_score": 4.995052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11627327127659574, - "normalized_score": 1.8081412529550822 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6912568660729026 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed_float16_c99f5022db1982d463626b4d87c7aeeff519b3fa_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", - "sha": "c99f5022db1982d463626b4d87c7aeeff519b3fa", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.710926533341115, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.155648124753432, - "normalized_score": 15.564812475343201 - }, - "bbh": { - "name": "BBH", - "value": 0.3048804422828362, - "normalized_score": 3.575492461700485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.38599999999999995, - "normalized_score": 6.016666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11377992021276596, - "normalized_score": 1.5311022458628842 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6793362286993389 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected_float16_ecac44607d60c294b460a8786f6253d561f3de85_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", - "sha": "ecac44607d60c294b460a8786f6253d561f3de85", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.38733089603933, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15842076800666677, - "normalized_score": 15.842076800666677 - }, - "bbh": { - "name": "BBH", - "value": 0.2925171720555518, - "normalized_score": 2.073466032877798 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.38199999999999995, - "normalized_score": 5.416666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1157746010638298, - "normalized_score": 1.7527334515366433 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.135, - "co2_cost": 0.671529895073909 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_float16_6922498cf15ce9558b8ad2c33fc43106628d0cec_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb", - "sha": "6922498cf15ce9558b8ad2c33fc43106628d0cec", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.886739290130319, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16391618682872555, - "normalized_score": 16.391618682872554 - }, - "bbh": { - "name": "BBH", - "value": 0.3013718229200533, - "normalized_score": 3.4240529327139213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.38085416666666666, - "normalized_score": 5.373437499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-11", - "generation": 3, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6741543448317168 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed_float16_02a7c39af8a00dbd0ffa449cd830cf57261246b3_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", - "sha": "02a7c39af8a00dbd0ffa449cd830cf57261246b3", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.644401741390064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16414114549395603, - "normalized_score": 16.414114549395602 - }, - "bbh": { - "name": "BBH", - "value": 0.30001678726257036, - "normalized_score": 2.418749257278669 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3793333333333333, - "normalized_score": 4.816666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 3, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6677341935122562 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected_float16_66e4931a5409bb8739522ff5df3b4f3373738fad_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", - "sha": "66e4931a5409bb8739522ff5df3b4f3373738fad", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.657606376854525, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16059389087620846, - "normalized_score": 16.059389087620847 - }, - "bbh": { - "name": "BBH", - "value": 0.2983444769655102, - "normalized_score": 2.1651564259299954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3846354166666666, - "normalized_score": 5.712760416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11619015957446809, - "normalized_score": 1.798906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 3, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6755758340530301 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_float16_066f4d48c5f6d83ac9a44e8572a3d20c74f6ec08_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb", - "sha": "066f4d48c5f6d83ac9a44e8572a3d20c74f6ec08", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.17450578103981, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16414114549395603, - "normalized_score": 16.414114549395602 - }, - "bbh": { - "name": "BBH", - "value": 0.29594449748780255, - "normalized_score": 2.348388410325006 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.370125, - "normalized_score": 3.765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11519281914893617, - "normalized_score": 1.6880910165484628 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-14", - "generation": 4, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6726619081749295 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed_float16_60c100113d77cced9b284172608f100297183ac9_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", - "sha": "60c100113d77cced9b284172608f100297183ac9", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.03254333671229, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1622927166584662, - "normalized_score": 16.22927166584662 - }, - "bbh": { - "name": "BBH", - "value": 0.3038096660271284, - "normalized_score": 3.210703163941808 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3992708333333333, - "normalized_score": 8.208854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11377992021276596, - "normalized_score": 1.5311022458628842 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 4, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.668405275912299 - } - }, - { - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected_float16_7b351540b5fb395759e44385826c5fedef8672ec_False", - "model": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", - "sha": "7b351540b5fb395759e44385826c5fedef8672ec", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.118739390912178, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14742993036254914, - "normalized_score": 14.742993036254914 - }, - "bbh": { - "name": "BBH", - "value": 0.2942808065535252, - "normalized_score": 1.9228579509844617 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3766354166666666, - "normalized_score": 4.579427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11303191489361702, - "normalized_score": 1.4479905437352243 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-14", - "generation": 4, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6702372444523718 - } - }, - { - "id": "FlofloB/smollm2_pretrained_200k_fineweb_float16_c3086ab3555e766f0b3903b8b9a1a290e3e25f3d_False", - "model": { - "name": "FlofloB/smollm2_pretrained_200k_fineweb", - "sha": "c3086ab3555e766f0b3903b8b9a1a290e3e25f3d", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.00559929596115, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15270039051937748, - "normalized_score": 15.270039051937747 - }, - "bbh": { - "name": "BBH", - "value": 0.299468427221449, - "normalized_score": 2.872523000621309 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3699375, - "normalized_score": 3.7421874999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11594082446808511, - "normalized_score": 1.7712027186761226 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.135, - "co2_cost": 0.6594641244107149 - } - }, - { - "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit_float16_cfd97ca5927a2e09ec30001a576d82dd8b635e09_True", - "model": { - "name": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "sha": "cfd97ca5927a2e09ec30001a576d82dd8b635e09", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.48570245546706, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.521546164177715, - "normalized_score": 52.15461641777151 - }, - "bbh": { - "name": "BBH", - "value": 0.5240829189778252, - "normalized_score": 32.882433170322564 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.42441666666666666, - "normalized_score": 12.452083333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3720910904255319, - "normalized_score": 30.232343380614658 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-21", - "generation": 1, - "base_model": "unsloth/phi-3-mini-4k-instruct-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 16.0, - "co2_cost": 1.5153220268770875 - } - }, - { - "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs_bfloat16_24c0bea14d53e6f67f1fbe2eca5bfe7cae389b33_True", - "model": { - "name": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", - "sha": "24c0bea14d53e6f67f1fbe2eca5bfe7cae389b33", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 20.395988185954646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5420041046645123, - "normalized_score": 54.20041046645124 - }, - "bbh": { - "name": "BBH", - "value": 0.47730323895548116, - "normalized_score": 26.596860970431894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.417375, - "normalized_score": 11.20520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2956283244680851, - "normalized_score": 21.736480496453904 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.483, - "co2_cost": 2.004096435504555 - } - }, - { - "id": "FuJhen/mistral-instruct-7B-DPO_bfloat16_e0bc86c23ce5aae1db576c8cca6f06f1f73af2db_True", - "model": { - "name": "FuJhen/mistral-instruct-7B-DPO", - "sha": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 19.029530782130646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49684171332065585, - "normalized_score": 49.68417133206558 - }, - "bbh": { - "name": "BBH", - "value": 0.46239050561386214, - "normalized_score": 24.925827194936442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4015625, - "normalized_score": 9.428645833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30335771276595747, - "normalized_score": 22.595301418439718 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "FuJhen/mistral-instruct-7B-DPO (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.496, - "co2_cost": 2.019293470108344 - } - }, - { - "id": "FuJhen/mistral_7b_v0.1_structedData_e2e_bfloat16_7231864981174d9bee8c7687c24c8344414eae6b_False", - "model": { - "name": "FuJhen/mistral_7b_v0.1_structedData_e2e", - "sha": "7231864981174d9bee8c7687c24c8344414eae6b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 10.909311048443838, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17268403391889076, - "normalized_score": 17.268403391889077 - }, - "bbh": { - "name": "BBH", - "value": 0.4113914854984489, - "normalized_score": 18.06242392393546 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3722916666666667, - "normalized_score": 5.6364583333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2810837765957447, - "normalized_score": 20.12041962174941 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-13", - "submission_date": "2024-09-13", - "generation": 1, - "base_model": "FuJhen/mistral_7b_v0.1_structedData_e2e (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 2.1604922772920125 - } - }, - { - "id": "FuJhen/mistral_7b_v0.1_structedData_viggo_bfloat16_7231864981174d9bee8c7687c24c8344414eae6b_False", - "model": { - "name": "FuJhen/mistral_7b_v0.1_structedData_viggo", - "sha": "7231864981174d9bee8c7687c24c8344414eae6b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 12.440582510781873, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17832905579418165, - "normalized_score": 17.832905579418167 - }, - "bbh": { - "name": "BBH", - "value": 0.45238634545986817, - "normalized_score": 23.960171694414896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.37381250000000005, - "normalized_score": 3.9265625000000015 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2942154255319149, - "normalized_score": 21.579491725768317 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-13", - "submission_date": "2024-09-13", - "generation": 1, - "base_model": "FuJhen/mistral_7b_v0.1_structedData_viggo (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.483, - "co2_cost": 2.1522273022121814 - } - }, - { - "id": "FuseAI/FuseChat-7B-v2.0_float16_65fdb310c09f56b9aca01b89a849f06f39faeb75_False", - "model": { - "name": "FuseAI/FuseChat-7B-v2.0", - "sha": "65fdb310c09f56b9aca01b89a849f06f39faeb75", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.14636743747765, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3423194900641409, - "normalized_score": 34.23194900641409 - }, - "bbh": { - "name": "BBH", - "value": 0.4954212795868764, - "normalized_score": 29.341638180782923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4796666666666667, - "normalized_score": 20.224999999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3162400265957447, - "normalized_score": 24.02666962174941 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-11-21", - "generation": 1, - "base_model": "openchat/openchat_3.5", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 7.242, - "co2_cost": 0.8866122219783815 - } - }, - { - "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct_bfloat16_cbb3accdd01a81194e947dfde1b95707db67f2b7_True", - "model": { - "name": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", - "sha": "cbb3accdd01a81194e947dfde1b95707db67f2b7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.595509644559655, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7204816553411615, - "normalized_score": 72.04816553411615 - }, - "bbh": { - "name": "BBH", - "value": 0.5119887898349903, - "normalized_score": 30.84806521622957 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24773413897280966, - "normalized_score": 24.773413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.38200000000000006, - "normalized_score": 6.150000000000006 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37333776595744683, - "normalized_score": 30.37086288416076 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 1.3404290644370094 - } - }, - { - "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct_bfloat16_db208455d103432dc8d683c242ef8b678d5b26c2_True", - "model": { - "name": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", - "sha": "db208455d103432dc8d683c242ef8b678d5b26c2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.746913825686644, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.684886102208806, - "normalized_score": 68.48861022088059 - }, - "bbh": { - "name": "BBH", - "value": 0.46583679221755164, - "normalized_score": 24.2199004496583 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24244712990936557, - "normalized_score": 24.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.39139583333333333, - "normalized_score": 7.691145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31316489361702127, - "normalized_score": 23.684988179669027 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2025-02-08", - "generation": 0, - "base_model": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 3.213, - "co2_cost": 0.5067297106978743 - } - }, - { - "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct_bfloat16_7735ee1acb31112cf93c35e8e22e764ad27cce3b_True", - "model": { - "name": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", - "sha": "7735ee1acb31112cf93c35e8e22e764ad27cce3b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.40771569311038, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5905641475728844, - "normalized_score": 59.05641475728844 - }, - "bbh": { - "name": "BBH", - "value": 0.552599883615556, - "normalized_score": 36.25134763068275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4561933534743202, - "normalized_score": 45.61933534743202 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.3873645833333333, - "normalized_score": 6.720572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41181848404255317, - "normalized_score": 34.64649822695036 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", - "hub_license": "", - "hub_hearts": 13, - "params_billions": 7.616, - "co2_cost": 1.309118142780276 - } - }, - { - "id": "GalrionSoftworks/MN-LooseCannon-12B-v1_bfloat16__True", - "model": { - "name": "GalrionSoftworks/MN-LooseCannon-12B-v1", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.12442725339166, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5417791459992819, - "normalized_score": 54.177914599928194 - }, - "bbh": { - "name": "BBH", - "value": 0.5128183808679557, - "normalized_score": 29.976062092951295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.41384375, - "normalized_score": 10.963802083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3195644946808511, - "normalized_score": 24.396054964539008 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-09", - "submission_date": "2024-09-05", - "generation": 1, - "base_model": "GalrionSoftworks/MN-LooseCannon-12B-v1 (Merge)", - "hub_license": "", - "hub_hearts": 8, - "params_billions": 12.248, - "co2_cost": 3.0580393650897593 - } - }, - { - "id": "GalrionSoftworks/MagnusIntellectus-12B-v1_bfloat16_fc83cb3eec2f8328448c5fe3cb830fc77983a6b9_True", - "model": { - "name": "GalrionSoftworks/MagnusIntellectus-12B-v1", - "sha": "fc83cb3eec2f8328448c5fe3cb830fc77983a6b9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.773295532025813, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4421368635221213, - "normalized_score": 44.21368635221213 - }, - "bbh": { - "name": "BBH", - "value": 0.5323010476246133, - "normalized_score": 33.26225439614359 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4428020833333333, - "normalized_score": 15.183593749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34208776595744683, - "normalized_score": 26.898640661938533 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-05", - "generation": 1, - "base_model": "GalrionSoftworks/MagnusIntellectus-12B-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 3.2485276176573255 - } - }, - { - "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged_bfloat16_0a86455c5f0606f6b743ba0f0b1c1c26bd50976c_False", - "model": { - "name": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", - "sha": "0a86455c5f0606f6b743ba0f0b1c1c26bd50976c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 14.006450277637631, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30637375497014585, - "normalized_score": 30.637375497014588 - }, - "bbh": { - "name": "BBH", - "value": 0.3887493166323577, - "normalized_score": 13.661591696662839 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.4550208333333334, - "normalized_score": 16.9109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23844747340425532, - "normalized_score": 15.383052600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-09", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.538, - "co2_cost": 1.8188650858383462 - } - }, - { - "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged_bfloat16_0b0363f808aabaf8fe85ae8229e968abca2a54de_False", - "model": { - "name": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", - "sha": "0b0363f808aabaf8fe85ae8229e968abca2a54de", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 11.99472750549792, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30637375497014585, - "normalized_score": 30.637375497014588 - }, - "bbh": { - "name": "BBH", - "value": 0.4130633897394575, - "normalized_score": 17.68358820544961 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3523854166666667, - "normalized_score": 4.081510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22282247340425532, - "normalized_score": 13.6469414893617 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-01", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.538, - "co2_cost": 1.9810632012201572 - } - }, - { - "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged_bfloat16_837acef7bd681ef60f03ab16e4670fb72e47e134_False", - "model": { - "name": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", - "sha": "837acef7bd681ef60f03ab16e4670fb72e47e134", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 13.28281509458921, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30207737691547315, - "normalized_score": 30.207737691547315 - }, - "bbh": { - "name": "BBH", - "value": 0.4141445378464817, - "normalized_score": 17.96825031857262 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25335570469798663, - "normalized_score": 0.4474272930648837 - }, - "musr": { - "name": "MUSR", - "value": 0.42785416666666665, - "normalized_score": 11.6484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2265625, - "normalized_score": 14.0625 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-12", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.538, - "co2_cost": 2.0074310732975738 - } - }, - { - "id": "GenVRadmin/llama38bGenZ_Vikas-Merged_bfloat16_a15de41fcf74b13bdc8d9b680bdc7836fc5aecfe_False", - "model": { - "name": "GenVRadmin/llama38bGenZ_Vikas-Merged", - "sha": "a15de41fcf74b13bdc8d9b680bdc7836fc5aecfe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.09338316937527, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30002947734234053, - "normalized_score": 30.002947734234056 - }, - "bbh": { - "name": "BBH", - "value": 0.4535981003984562, - "normalized_score": 23.131910422007774 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.44016666666666665, - "normalized_score": 13.620833333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26221742021276595, - "normalized_score": 18.02415780141844 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "GenVRadmin/llama38bGenZ_Vikas-Merged", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4353168101343385 - } - }, - { - "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct_bfloat16_ca19cec82a7d2bdba20020e1bebf296417cfc3ee_False", - "model": { - "name": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", - "sha": "ca19cec82a7d2bdba20020e1bebf296417cfc3ee", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.46826016178984, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6550607942481504, - "normalized_score": 65.50607942481504 - }, - "bbh": { - "name": "BBH", - "value": 0.5954551751157878, - "normalized_score": 41.86650373118869 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4778645833333333, - "normalized_score": 19.333072916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4263630319148936, - "normalized_score": 36.26255910165484 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-20", - "generation": 1, - "base_model": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct (Merge)", - "hub_license": "gemma", - "hub_hearts": 35, - "params_billions": 9.242, - "co2_cost": 3.8621893014690505 - } - }, - { - "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct_bfloat16_20fd3cff1dc86553d11b5c4b2fdbb6f2dd1ede55_True", - "model": { - "name": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", - "sha": "20fd3cff1dc86553d11b5c4b2fdbb6f2dd1ede55", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.05939902243402, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.523844510343666, - "normalized_score": 52.38445103436659 - }, - "bbh": { - "name": "BBH", - "value": 0.4951292004509417, - "normalized_score": 28.539529393915917 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.44884375, - "normalized_score": 15.172135416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3453291223404255, - "normalized_score": 27.258791371158388 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-20", - "generation": 1, - "base_model": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 11, - "params_billions": 8.03, - "co2_cost": 1.3468218426289178 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1_bfloat16_bfc0e7dc6add02baecd9b6f84a078f7f3d164315_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "sha": "bfc0e7dc6add02baecd9b6f84a078f7f3d164315", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.768236092545305, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.347189900574919, - "normalized_score": 34.7189900574919 - }, - "bbh": { - "name": "BBH", - "value": 0.32683063456958195, - "normalized_score": 6.845785955173547 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.32625, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16414561170212766, - "normalized_score": 7.127290189125294 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-18", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.63, - "co2_cost": 0.9752848034650319 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1_float16_bfc0e7dc6add02baecd9b6f84a078f7f3d164315_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "sha": "bfc0e7dc6add02baecd9b6f84a078f7f3d164315", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.415919404386559, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3416944817528602, - "normalized_score": 34.16944817528602 - }, - "bbh": { - "name": "BBH", - "value": 0.32921013057720044, - "normalized_score": 7.2211690840719855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.002265861027190332, - "normalized_score": 0.2265861027190332 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3249166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16381316489361702, - "normalized_score": 7.0903516548463354 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-18", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.63, - "co2_cost": 0.49800361885240557 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1_bfloat16_eca7edeba61e894597e9940348e8d90817c1ad79_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", - "sha": "eca7edeba61e894597e9940348e8d90817c1ad79", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.44117473256343, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47685806992114255, - "normalized_score": 47.68580699211426 - }, - "bbh": { - "name": "BBH", - "value": 0.418600731531926, - "normalized_score": 18.306013289403676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2084592145015106, - "normalized_score": 20.84592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3674895833333333, - "normalized_score": 4.002864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27825797872340424, - "normalized_score": 19.806442080378247 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 1.777, - "co2_cost": 1.5667617401508371 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2_bfloat16_ff4a6eff69adb015dfcfbff7a2d2dc43b34afe89_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", - "sha": "ff4a6eff69adb015dfcfbff7a2d2dc43b34afe89", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.56674922027016, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.421553699738915, - "normalized_score": 42.155369973891496 - }, - "bbh": { - "name": "BBH", - "value": 0.40418921704436744, - "normalized_score": 16.499503211302823 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37685416666666666, - "normalized_score": 4.7067708333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25615026595744683, - "normalized_score": 17.350029550827426 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-28", - "generation": 2, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.544, - "co2_cost": 1.4384858224632824 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3_bfloat16_03ffa6f7a6ada9d63d838707c597297f048d409b_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", - "sha": "03ffa6f7a6ada9d63d838707c597297f048d409b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.592787360948462, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42525055740989465, - "normalized_score": 42.52505574098946 - }, - "bbh": { - "name": "BBH", - "value": 0.4053446177133173, - "normalized_score": 16.439711885397813 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37018749999999995, - "normalized_score": 4.240104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25556848404255317, - "normalized_score": 17.28538711583924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-28", - "generation": 3, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.544, - "co2_cost": 1.4124015158696626 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4_bfloat16_00afd27eef16e835fcb0d8e687435dca3c185bdf_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", - "sha": "00afd27eef16e835fcb0d8e687435dca3c185bdf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.550065874753976, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8291666112581284, - "normalized_score": 82.91666112581285 - }, - "bbh": { - "name": "BBH", - "value": 0.6355637424320617, - "normalized_score": 48.05226992969286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5422960725075529, - "normalized_score": 54.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.4286666666666667, - "normalized_score": 13.15 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5018284574468085, - "normalized_score": 44.64760638297872 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-10-23", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 17, - "params_billions": 14.77, - "co2_cost": 3.4942336302425923 - } - }, - { - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2_bfloat16_ecf4024048ea1be2f0840a50080fb79b88aacde9_True", - "model": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "sha": "ecf4024048ea1be2f0840a50080fb79b88aacde9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.31663337889917, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7813811797142693, - "normalized_score": 78.13811797142694 - }, - "bbh": { - "name": "BBH", - "value": 0.5309672164610734, - "normalized_score": 33.33398601463088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45317220543806647, - "normalized_score": 45.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.43539583333333337, - "normalized_score": 13.957812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4119847074468085, - "normalized_score": 34.66496749408983 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.616, - "co2_cost": 2.4030128559486643 - } - }, - { - "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1_bfloat16_d5ddad290d83b1ba8a7612a6c1cfad6fb4346fe4_True", - "model": { - "name": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", - "sha": "d5ddad290d83b1ba8a7612a6c1cfad6fb4346fe4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.07804770111701, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41883092417009093, - "normalized_score": 41.883092417009095 - }, - "bbh": { - "name": "BBH", - "value": 0.41242101633634826, - "normalized_score": 17.748016873381815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3528541666666667, - "normalized_score": 1.4401041666666685 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2554853723404255, - "normalized_score": 17.2761524822695 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-08", - "generation": 2, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.544, - "co2_cost": 1.5823051977207447 - } - }, - { - "id": "Goekdeniz-Guelmez/josie-3b-v6.0_float16_3f8ce40bdaa0757ede5aaaf2cdd14538b559b4db_True", - "model": { - "name": "Goekdeniz-Guelmez/josie-3b-v6.0", - "sha": "3f8ce40bdaa0757ede5aaaf2cdd14538b559b4db", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.746540779866937, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6009554648333089, - "normalized_score": 60.09554648333089 - }, - "bbh": { - "name": "BBH", - "value": 0.4496147842264783, - "normalized_score": 22.87108835512943 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2938066465256798, - "normalized_score": 29.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.386125, - "normalized_score": 6.098958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32197473404255317, - "normalized_score": 24.66385933806146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 1.491912040011956 - } - }, - { - "id": "Goekdeniz-Guelmez/josie-7b-v6.0_float16_d2e22fda9ce97aa5ca745d3b6d8ca2f1f7103ed5_True", - "model": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0", - "sha": "d2e22fda9ce97aa5ca745d3b6d8ca2f1f7103ed5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.374168071296964, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7411645544931892, - "normalized_score": 74.11645544931892 - }, - "bbh": { - "name": "BBH", - "value": 0.5104855208094123, - "normalized_score": 30.44475330982171 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43580060422960726, - "normalized_score": 43.58006042296073 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.41539583333333335, - "normalized_score": 10.557812499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3806515957446808, - "normalized_score": 31.183510638297868 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-01-07", - "generation": 3, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3509256308404543 - } - }, - { - "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000_bfloat16_df28f1369c22a5f2feac05793d4a460a5f873891_True", - "model": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "sha": "df28f1369c22a5f2feac05793d4a460a5f873891", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.97043751712353, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7627716680629618, - "normalized_score": 76.27716680629618 - }, - "bbh": { - "name": "BBH", - "value": 0.5097811950503962, - "normalized_score": 30.081094189593973 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.45793750000000005, - "normalized_score": 17.7421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40325797872340424, - "normalized_score": 33.69533096926713 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-11", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3788389441043891 - } - }, - { - "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000_float16_df28f1369c22a5f2feac05793d4a460a5f873891_True", - "model": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "sha": "df28f1369c22a5f2feac05793d4a460a5f873891", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.83292639490552, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7597740661444966, - "normalized_score": 75.97740661444966 - }, - "bbh": { - "name": "BBH", - "value": 0.510712680636641, - "normalized_score": 30.395812905314894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42371601208459214, - "normalized_score": 42.37160120845921 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.45393750000000005, - "normalized_score": 17.208854166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4011801861702128, - "normalized_score": 33.46446513002365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-11", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3833978614758087 - } - }, - { - "id": "GreenNode/GreenNode-small-9B-it_float16_1ba4ce8e2267c7fcc820961a9bfc13ab80150866_True", - "model": { - "name": "GreenNode/GreenNode-small-9B-it", - "sha": "1ba4ce8e2267c7fcc820961a9bfc13ab80150866", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 31.194506347689323, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7436125037123721, - "normalized_score": 74.36125037123722 - }, - "bbh": { - "name": "BBH", - "value": 0.599383874005197, - "normalized_score": 41.899925635193455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17447129909365558, - "normalized_score": 17.447129909365557 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.42041666666666666, - "normalized_score": 11.652083333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3927027925531915, - "normalized_score": 32.52253250591017 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 5.291887986432054 - } - }, - { - "id": "GritLM/GritLM-7B-KTO_bfloat16_b5c48669508c1de18c698460c187f64e90e7df44_True", - "model": { - "name": "GritLM/GritLM-7B-KTO", - "sha": "b5c48669508c1de18c698460c187f64e90e7df44", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.2358949114183, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5310132670203948, - "normalized_score": 53.10132670203948 - }, - "bbh": { - "name": "BBH", - "value": 0.485293719684692, - "normalized_score": 27.904317623033844 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.37102083333333336, - "normalized_score": 6.6442708333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26803523936170215, - "normalized_score": 18.670582151300238 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-16", - "submission_date": "2024-08-04", - "generation": 0, - "base_model": "GritLM/GritLM-7B-KTO", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.242, - "co2_cost": 1.2797271864870114 - } - }, - { - "id": "GritLM/GritLM-8x7B-KTO_bfloat16_938913477064fcc498757c5136d9899bb6e713ed_True", - "model": { - "name": "GritLM/GritLM-8x7B-KTO", - "sha": "938913477064fcc498757c5136d9899bb6e713ed", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 26.2413047085506, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5714049832222946, - "normalized_score": 57.14049832222946 - }, - "bbh": { - "name": "BBH", - "value": 0.5820304362331497, - "normalized_score": 40.8261615594601 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42165625, - "normalized_score": 11.673697916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36477726063829785, - "normalized_score": 29.419695626477544 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-08-04", - "generation": 0, - "base_model": "GritLM/GritLM-8x7B-KTO", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 46.703, - "co2_cost": 9.208926459712531 - } - }, - { - "id": "Groq/Llama-3-Groq-8B-Tool-Use_bfloat16_3bf6b914d7043d1bbfcfc7a9aa7581a8104eabac_True", - "model": { - "name": "Groq/Llama-3-Groq-8B-Tool-Use", - "sha": "3bf6b914d7043d1bbfcfc7a9aa7581a8104eabac", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.44560137489326, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6098230472922956, - "normalized_score": 60.982304729229554 - }, - "bbh": { - "name": "BBH", - "value": 0.4863384977901497, - "normalized_score": 27.254234386573227 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125000000003, - "normalized_score": 5.3872395833333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33992686170212766, - "normalized_score": 26.658540189125297 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 274, - "params_billions": 8.03, - "co2_cost": 1.0086565382541746 - } - }, - { - "id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3_bfloat16_70a6df202c9df9abdc6928bec5a5ab47f2667aee_True", - "model": { - "name": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", - "sha": "70a6df202c9df9abdc6928bec5a5ab47f2667aee", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.87312240356046, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39325212657969744, - "normalized_score": 39.32521265796974 - }, - "bbh": { - "name": "BBH", - "value": 0.4539075127777334, - "normalized_score": 23.631914688111305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3832395833333333, - "normalized_score": 5.504947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30668218085106386, - "normalized_score": 22.964686761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-08", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "apache-2.0", - "hub_hearts": 46, - "params_billions": 8.03, - "co2_cost": 1.4416729891301094 - } - }, - { - "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo_bfloat16_00107381f05f69666772d88a1b11affe77c94a47_True", - "model": { - "name": "Gryphe/Pantheon-RP-1.5-12b-Nemo", - "sha": "00107381f05f69666772d88a1b11affe77c94a47", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.32374664039768, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47630841722186024, - "normalized_score": 47.63084172218603 - }, - "bbh": { - "name": "BBH", - "value": 0.519582216884963, - "normalized_score": 31.750144021634004 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.44203125000000004, - "normalized_score": 15.053906250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3302027925531915, - "normalized_score": 25.57808806146572 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-08-04", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 31, - "params_billions": 12.248, - "co2_cost": 3.371166401118355 - } - }, - { - "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo_bfloat16_60cf38ae0367baf314e3cce748d9a199adfea557_True", - "model": { - "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo", - "sha": "60cf38ae0367baf314e3cce748d9a199adfea557", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.56659922855925, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44805671174705336, - "normalized_score": 44.80567117470534 - }, - "bbh": { - "name": "BBH", - "value": 0.5204007434392454, - "normalized_score": 31.687343826178374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4287604166666667, - "normalized_score": 12.92838541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33111702127659576, - "normalized_score": 25.67966903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-18", - "submission_date": "2024-08-31", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 12.248, - "co2_cost": 3.4745060547622137 - } - }, - { - "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO_bfloat16_6cb6d8d9a7352d71f539ab5053987e058c090443_True", - "model": { - "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", - "sha": "6cb6d8d9a7352d71f539ab5053987e058c090443", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.558598525492474, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4636187537954849, - "normalized_score": 46.36187537954849 - }, - "bbh": { - "name": "BBH", - "value": 0.5276980814125921, - "normalized_score": 33.032200369425645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4247916666666667, - "normalized_score": 12.165625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33818151595744683, - "normalized_score": 26.46461288416076 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-28", - "submission_date": "2024-08-31", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 3.3640528836556807 - } - }, - { - "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small_bfloat16_d031830dcb3bc5ad9634374db4dd15b3ef6ebe0f_True", - "model": { - "name": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", - "sha": "d031830dcb3bc5ad9634374db4dd15b3ef6ebe0f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.138635196099433, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6931042965996888, - "normalized_score": 69.31042965996889 - }, - "bbh": { - "name": "BBH", - "value": 0.5304537230538597, - "normalized_score": 31.683163209870646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.37647916666666664, - "normalized_score": 4.393229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39419880319148937, - "normalized_score": 32.68875591016548 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-15", - "generation": 1, - "base_model": "mistralai/Mistral-Small-Instruct-2409", - "hub_license": "other", - "hub_hearts": 29, - "params_billions": 22.247, - "co2_cost": 2.906640148295222 - } - }, - { - "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall_float16_2968459668192def7382b614630cabab48f2c865_True", - "model": { - "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "sha": "2968459668192def7382b614630cabab48f2c865", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.68902218654482, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4985405391029136, - "normalized_score": 49.854053910291356 - }, - "bbh": { - "name": "BBH", - "value": 0.5644838945274894, - "normalized_score": 37.752975371907866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25755287009063443, - "normalized_score": 25.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.43728125, - "normalized_score": 13.960156249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44290226063829785, - "normalized_score": 38.1002511820331 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6894452515867744 - } - }, - { - "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall_bfloat16_2968459668192def7382b614630cabab48f2c865_True", - "model": { - "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "sha": "2968459668192def7382b614630cabab48f2c865", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.37476976386807, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47910654840268263, - "normalized_score": 47.910654840268265 - }, - "bbh": { - "name": "BBH", - "value": 0.5648715950622487, - "normalized_score": 37.812775827443616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25, - "normalized_score": 25.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4439166666666667, - "normalized_score": 15.056249999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4408244680851064, - "normalized_score": 37.8693853427896 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6936433748469978 - } - }, - { - "id": "Gunulhona/Gemma-Ko-Merge_bfloat16_ca6b0eb1405f21db6a7a9cce3b112d21fcfdde97_True", - "model": { - "name": "Gunulhona/Gemma-Ko-Merge", - "sha": "ca6b0eb1405f21db6a7a9cce3b112d21fcfdde97", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 29.044658369613114, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6415721397004392, - "normalized_score": 64.15721397004393 - }, - "bbh": { - "name": "BBH", - "value": 0.5813027258981727, - "normalized_score": 38.78719707326869 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18806646525679757, - "normalized_score": 18.806646525679756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.40469791666666666, - "normalized_score": 9.120572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3878823138297872, - "normalized_score": 31.98692375886525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "Gunulhona/Gemma-Ko-Merge (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.274495075785254 - } - }, - { - "id": "Gunulhona/Gemma-Ko-Merge-PEFT_bfloat16_ca6b0eb1405f21db6a7a9cce3b112d21fcfdde97_False", - "model": { - "name": "Gunulhona/Gemma-Ko-Merge-PEFT", - "sha": "ca6b0eb1405f21db6a7a9cce3b112d21fcfdde97", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 18.16949453226467, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28803906966847964, - "normalized_score": 28.80390696684797 - }, - "bbh": { - "name": "BBH", - "value": 0.5154093999781059, - "normalized_score": 30.18627333134207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.40801041666666665, - "normalized_score": 8.76796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38173204787234044, - "normalized_score": 31.303560874704488 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-17", - "generation": 0, - "base_model": "Gunulhona/Gemma-Ko-Merge-PEFT", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 20.318, - "co2_cost": 5.876476720258237 - } - }, - { - "id": "Gunulhona/Gemma-Ko-Merge-PEFT_float16_ca6b0eb1405f21db6a7a9cce3b112d21fcfdde97_True", - "model": { - "name": "Gunulhona/Gemma-Ko-Merge-PEFT", - "sha": "ca6b0eb1405f21db6a7a9cce3b112d21fcfdde97", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 18.06624017039761, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4441348954108433, - "normalized_score": 44.413489541084324 - }, - "bbh": { - "name": "BBH", - "value": 0.4862989687822461, - "normalized_score": 26.015069295888747 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.3985833333333333, - "normalized_score": 7.056249999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3097573138297872, - "normalized_score": 23.30636820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "Gunulhona/Gemma-Ko-Merge-PEFT", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 20.318, - "co2_cost": 18.78866710415633 - } - }, - { - "id": "HPAI-BSC/Llama3-Aloe-8B-Alpha_bfloat16_f0bce5c1fee5ea2a6679bb3dc9de8548e7262c9e_True", - "model": { - "name": "HPAI-BSC/Llama3-Aloe-8B-Alpha", - "sha": "f0bce5c1fee5ea2a6679bb3dc9de8548e7262c9e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.23044725696465, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5081073773144147, - "normalized_score": 50.81073773144146 - }, - "bbh": { - "name": "BBH", - "value": 0.48308532966126966, - "normalized_score": 27.145977577581778 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3672708333333334, - "normalized_score": 5.8755208333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3295378989361702, - "normalized_score": 25.5042109929078 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-10-29", - "generation": 0, - "base_model": "HPAI-BSC/Llama3-Aloe-8B-Alpha", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 59, - "params_billions": 8.03, - "co2_cost": 1.5904895773762358 - } - }, - { - "id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B_bfloat16_3f2f0bbfb03cb0a8310efa50659688c1f2c02da0_True", - "model": { - "name": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", - "sha": "3f2f0bbfb03cb0a8310efa50659688c1f2c02da0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.524195182673978, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7253276860951166, - "normalized_score": 72.53276860951165 - }, - "bbh": { - "name": "BBH", - "value": 0.5092760762748857, - "normalized_score": 30.369624781344758 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3834583333333333, - "normalized_score": 6.8322916666666655 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35804521276595747, - "normalized_score": 28.671690307328607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-07", - "generation": 0, - "base_model": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", - "hub_license": "llama3.1", - "hub_hearts": 11, - "params_billions": 8.03, - "co2_cost": 2.0813801340892315 - } - }, - { - "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B_bfloat16_853ee78094c4e6ae096fe616fbc7b617dd78f1f5_True", - "model": { - "name": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", - "sha": "853ee78094c4e6ae096fe616fbc7b617dd78f1f5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.826720985708487, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4553506917201914, - "normalized_score": 45.535069172019135 - }, - "bbh": { - "name": "BBH", - "value": 0.5048995904321122, - "normalized_score": 30.33160459968833 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3542296072507553, - "normalized_score": 35.422960725075534 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 12.920572916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4354222074468085, - "normalized_score": 37.2691341607565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-09", - "submission_date": "2024-12-17", - "generation": 0, - "base_model": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.216719947943519 - } - }, - { - "id": "HarbingerX/Zeitgeist-3b-V1_float16_de159b93ae7c7d816de552025bcbd8a91f8952c1_True", - "model": { - "name": "HarbingerX/Zeitgeist-3b-V1", - "sha": "de159b93ae7c7d816de552025bcbd8a91f8952c1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.70541406485785, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6711724889958643, - "normalized_score": 67.11724889958643 - }, - "bbh": { - "name": "BBH", - "value": 0.4440790761237121, - "normalized_score": 21.647559636357908 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.3579375, - "normalized_score": 4.542187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3009474734042553, - "normalized_score": 22.327497044917255 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5911335287258164 - } - }, - { - "id": "HarbingerX/Zeitgeist-3b-V1.2_float16_e4679006fa1a030eafa948852a8e084028970405_True", - "model": { - "name": "HarbingerX/Zeitgeist-3b-V1.2", - "sha": "e4679006fa1a030eafa948852a8e084028970405", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.62222005411458, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6754189993661264, - "normalized_score": 67.54189993661264 - }, - "bbh": { - "name": "BBH", - "value": 0.4440650477102142, - "normalized_score": 21.6297148573501 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.35790625000000004, - "normalized_score": 3.9049479166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30560172872340424, - "normalized_score": 22.844636524822693 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.6026767345231062 - } - }, - { - "id": "Hastagaras/L3.2-JametMini-3B-MK.III_float16_54e451f243ab69327068e92925fe2ecbc91ed06e_True", - "model": { - "name": "Hastagaras/L3.2-JametMini-3B-MK.III", - "sha": "54e451f243ab69327068e92925fe2ecbc91ed06e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.75038505136374, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6182662003484088, - "normalized_score": 61.826620034840886 - }, - "bbh": { - "name": "BBH", - "value": 0.45385245294894094, - "normalized_score": 22.362058620348964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3686041666666667, - "normalized_score": 5.342187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2982878989361702, - "normalized_score": 22.031988770685576 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Hastagaras/L3.2-JametMini-3B-MK.III (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 7, - "params_billions": 3.213, - "co2_cost": 0.5696355012646784 - } - }, - { - "id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I_float16_26cb97042b04fee7d0140375a7babbf92278f8ac_True", - "model": { - "name": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", - "sha": "26cb97042b04fee7d0140375a7babbf92278f8ac", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.42380601749484, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7338207068356406, - "normalized_score": 73.38207068356405 - }, - "bbh": { - "name": "BBH", - "value": 0.5048666433733161, - "normalized_score": 29.503904748319474 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3726041666666667, - "normalized_score": 6.142187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3482380319148936, - "normalized_score": 27.582003546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4374797961123538 - } - }, - { - "id": "Hastagaras/Zabuza-8B-Llama-3.1_bfloat16_57ffa92f229b8308916aae1d64d8f0dc9baa0a34_True", - "model": { - "name": "Hastagaras/Zabuza-8B-Llama-3.1", - "sha": "57ffa92f229b8308916aae1d64d8f0dc9baa0a34", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.92582695678787, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6265342624237025, - "normalized_score": 62.653426242370244 - }, - "bbh": { - "name": "BBH", - "value": 0.4538915742546196, - "normalized_score": 23.220320849670458 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3567916666666667, - "normalized_score": 4.8989583333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29230385638297873, - "normalized_score": 21.3670951536643 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-05", - "submission_date": "2024-11-05", - "generation": 1, - "base_model": "Hastagaras/Zabuza-8B-Llama-3.1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3505746169086037 - } - }, - { - "id": "HelpingAI/Cipher-20B_bfloat16_a01cc17784a3afa765de402da36805b2adff70f7_True", - "model": { - "name": "HelpingAI/Cipher-20B", - "sha": "a01cc17784a3afa765de402da36805b2adff70f7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.976007616737775, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5377575942942504, - "normalized_score": 53.77575942942504 - }, - "bbh": { - "name": "BBH", - "value": 0.6032432743536918, - "normalized_score": 43.43973598811683 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19939577039274925, - "normalized_score": 19.939577039274926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40029166666666666, - "normalized_score": 8.169791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3744182180851064, - "normalized_score": 30.49091312056737 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-14", - "generation": 0, - "base_model": "HelpingAI/Cipher-20B", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 20.551, - "co2_cost": 4.068230413560843 - } - }, - { - "id": "HelpingAI/Dhanishtha-Large_bfloat16_54544ebab9ef04370a3bb41e18c60e3ce8b41d83_False", - "model": { - "name": "HelpingAI/Dhanishtha-Large", - "sha": "54544ebab9ef04370a3bb41e18c60e3ce8b41d83", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.889172686673326, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24567370133468086, - "normalized_score": 24.56737013346809 - }, - "bbh": { - "name": "BBH", - "value": 0.46036539145861094, - "normalized_score": 24.0022140693209 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3851963746223565, - "normalized_score": 38.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.38451041666666663, - "normalized_score": 5.697135416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2755152925531915, - "normalized_score": 19.501699172576835 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "HelpingAI/Dhanishtha-Large (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.6810018739499718 - } - }, - { - "id": "HelpingAI/Priya-10B_float16_82f217b1c0b50c3941a6d3f0cff94812aa10c0b9_True", - "model": { - "name": "HelpingAI/Priya-10B", - "sha": "82f217b1c0b50c3941a6d3f0cff94812aa10c0b9", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.14328407509064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40429283190822574, - "normalized_score": 40.429283190822574 - }, - "bbh": { - "name": "BBH", - "value": 0.4441457310476767, - "normalized_score": 19.96679684299346 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3792708333333333, - "normalized_score": 5.208854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24925199468085107, - "normalized_score": 16.583554964539008 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-15", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "HelpingAI/HelpingAI2.5-10B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 10.211, - "co2_cost": 1.8156790123754452 - } - }, - { - "id": "HelpingAI/Priya-3B_bfloat16_43681968e92d52df5b171aff6aa59baf4f3cdeba_True", - "model": { - "name": "HelpingAI/Priya-3B", - "sha": "43681968e92d52df5b171aff6aa59baf4f3cdeba", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.429592166190526, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4525780484669566, - "normalized_score": 45.25780484669566 - }, - "bbh": { - "name": "BBH", - "value": 0.3961184863327844, - "normalized_score": 14.335273409626753 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3713020833333333, - "normalized_score": 3.779427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23387632978723405, - "normalized_score": 14.875147754137116 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-14", - "generation": 0, - "base_model": "HelpingAI/Priya-3B", - "hub_license": "other", - "hub_hearts": 5, - "params_billions": 2.81, - "co2_cost": 1.2826597442200018 - } - }, - { - "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B_float16_b4e6fe291d5c1d61805f31ae42742d81cf7cd594_False", - "model": { - "name": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", - "sha": "b4e6fe291d5c1d61805f31ae42742d81cf7cd594", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.563387722893334, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1869472998311148, - "normalized_score": 18.69472998311148 - }, - "bbh": { - "name": "BBH", - "value": 0.29134447696551025, - "normalized_score": 2.147966883446335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.37384375, - "normalized_score": 3.830468749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.164, - "co2_cost": 0.6427932957296756 - } - }, - { - "id": "HeraiHench/Double-Down-Qwen-Math-7B_float16_339e6488b9bc9cb2874e91396a984ee96865a34a_False", - "model": { - "name": "HeraiHench/Double-Down-Qwen-Math-7B", - "sha": "339e6488b9bc9cb2874e91396a984ee96865a34a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.309785575689419, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1669636564316015, - "normalized_score": 16.69636564316015 - }, - "bbh": { - "name": "BBH", - "value": 0.2844613514203868, - "normalized_score": 1.9548697694104622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.37365625, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11120345744680851, - "normalized_score": 1.2448286052009452 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3997668900620879 - } - }, - { - "id": "HeraiHench/Marge-Qwen-Math-7B_float16_f6c2071d34616d61474b9727f1151de56f566e93_False", - "model": { - "name": "HeraiHench/Marge-Qwen-Math-7B", - "sha": "f6c2071d34616d61474b9727f1151de56f566e93", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.083812323812456, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12622175826806206, - "normalized_score": 12.622175826806208 - }, - "bbh": { - "name": "BBH", - "value": 0.3068846024368302, - "normalized_score": 3.363509103932151 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.39390624999999996, - "normalized_score": 7.3716145833333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10555186170212766, - "normalized_score": 0.616873522458628 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.373742012462644 - } - }, - { - "id": "HeraiHench/Phi-4-slerp-ReasoningRP-14B_float16_2763ec1a660dad11fab64eed0507e5d24e71806d_False", - "model": { - "name": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", - "sha": "2763ec1a660dad11fab64eed0507e5d24e71806d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.51238737192275, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15754642127333254, - "normalized_score": 15.754642127333252 - }, - "bbh": { - "name": "BBH", - "value": 0.41957191458446336, - "normalized_score": 18.8853764817121 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.3116145833333333, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18999335106382978, - "normalized_score": 9.999261229314419 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.207, - "co2_cost": 1.3111993722915893 - } - }, - { - "id": "HiroseKoichi/Llama-Salad-4x8B-V3_bfloat16_a343915429779efbd1478f01ba1f7fd9d8d226c0_True", - "model": { - "name": "HiroseKoichi/Llama-Salad-4x8B-V3", - "sha": "a343915429779efbd1478f01ba1f7fd9d8d226c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.922701724199346, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6653523761397536, - "normalized_score": 66.53523761397537 - }, - "bbh": { - "name": "BBH", - "value": 0.5244649789001753, - "normalized_score": 31.92884881074505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.37403125, - "normalized_score": 6.453906249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.351811835106383, - "normalized_score": 27.979092789598102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-17", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "HiroseKoichi/Llama-Salad-4x8B-V3", - "hub_license": "llama3", - "hub_hearts": 6, - "params_billions": 24.942, - "co2_cost": 4.275390404302961 - } - }, - { - "id": "HoangHa/Pensez-Llama3.1-8B_bfloat16_e7eab5cd6096c33f1f08a36a05d0ac83c7d950b1_True", - "model": { - "name": "HoangHa/Pensez-Llama3.1-8B", - "sha": "e7eab5cd6096c33f1f08a36a05d0ac83c7d950b1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.04804643436893, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3886809221753835, - "normalized_score": 38.868092217538354 - }, - "bbh": { - "name": "BBH", - "value": 0.46691313514505667, - "normalized_score": 24.84515798598393 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3596979166666667, - "normalized_score": 10.328906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31258311170212766, - "normalized_score": 23.620345744680847 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-19", - "generation": 0, - "base_model": "HoangHa/Pensez-Llama3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.2326767237191776 - } - }, - { - "id": "HuggingFaceH4/zephyr-7b-alpha_bfloat16_2ce2d025864af849b3e5029e2ec9d568eeda892d_True", - "model": { - "name": "HuggingFaceH4/zephyr-7b-alpha", - "sha": "2ce2d025864af849b3e5029e2ec9d568eeda892d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.598795237504422, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5191480826429429, - "normalized_score": 51.914808264294294 - }, - "bbh": { - "name": "BBH", - "value": 0.45828635059044115, - "normalized_score": 23.890291427068444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3949583333333333, - "normalized_score": 7.503125000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2795046542553192, - "normalized_score": 19.94496158392435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-09", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "mit", - "hub_hearts": 1108, - "params_billions": 7.242, - "co2_cost": 0.9030544872725952 - } - }, - { - "id": "HuggingFaceH4/zephyr-7b-beta_bfloat16_b70e0c9a2d9e14bd1e812d3c398e5f313e93b473_True", - "model": { - "name": "HuggingFaceH4/zephyr-7b-beta", - "sha": "b70e0c9a2d9e14bd1e812d3c398e5f313e93b473", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.792237227370464, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49504315216957673, - "normalized_score": 49.50431521695767 - }, - "bbh": { - "name": "BBH", - "value": 0.431582191918003, - "normalized_score": 21.487542182806738 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.3925416666666666, - "normalized_score": 7.7343749999999964 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2780917553191489, - "normalized_score": 19.78797281323877 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-26", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "mit", - "hub_hearts": 1667, - "params_billions": 7.242, - "co2_cost": 1.110046181738907 - } - }, - { - "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1_bfloat16_03b3427d0ed07d2e0f86c0a7e53d82d4beef9540_True", - "model": { - "name": "HuggingFaceH4/zephyr-7b-gemma-v0.1", - "sha": "03b3427d0ed07d2e0f86c0a7e53d82d4beef9540", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 16.030043342251584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3363741539116212, - "normalized_score": 33.637415391162115 - }, - "bbh": { - "name": "BBH", - "value": 0.4623735014679749, - "normalized_score": 23.751162749201274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.37396874999999996, - "normalized_score": 4.179427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2847406914893617, - "normalized_score": 20.526743498817968 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-01", - "submission_date": "2024-06-12", - "generation": 2, - "base_model": "google/gemma-7b", - "hub_license": "other", - "hub_hearts": 124, - "params_billions": 8.538, - "co2_cost": 2.860947822540511 - } - }, - { - "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1_float16_a3be084543d278e61b64cd600f28157afc79ffd3_True", - "model": { - "name": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", - "sha": "a3be084543d278e61b64cd600f28157afc79ffd3", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 34.125963384670946, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6510891102275296, - "normalized_score": 65.10891102275296 - }, - "bbh": { - "name": "BBH", - "value": 0.6290439728524093, - "normalized_score": 47.503796286541196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.4465208333333333, - "normalized_score": 14.715104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4586103723404255, - "normalized_score": 39.84559692671394 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-10", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistral-community/Mixtral-8x22B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 266, - "params_billions": 140.621, - "co2_cost": 84.13557284613555 - } - }, - { - "id": "HuggingFaceTB/SmolLM-1.7B_bfloat16_673a07602ca1191e5bc2ddda428e2f608a0a14c0_False", - "model": { - "name": "HuggingFaceTB/SmolLM-1.7B", - "sha": "673a07602ca1191e5bc2ddda428e2f608a0a14c0", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.576455936269043, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23615673080759053, - "normalized_score": 23.615673080759052 - }, - "bbh": { - "name": "BBH", - "value": 0.3180516538964782, - "normalized_score": 4.4111278515492005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34209375000000003, - "normalized_score": 2.1283854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11477726063829788, - "normalized_score": 1.6419178486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-07-18", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM-1.7B", - "hub_license": "apache-2.0", - "hub_hearts": 170, - "params_billions": 1.71, - "co2_cost": 0.6486145130353238 - } - }, - { - "id": "HuggingFaceTB/SmolLM-1.7B-Instruct_bfloat16_0ad161e59935a9a691dfde2818df8b98786f30a7_True", - "model": { - "name": "HuggingFaceTB/SmolLM-1.7B-Instruct", - "sha": "0ad161e59935a9a691dfde2818df8b98786f30a7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.490688803655309, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23478259905938464, - "normalized_score": 23.478259905938465 - }, - "bbh": { - "name": "BBH", - "value": 0.28851114363217695, - "normalized_score": 2.0803742908537424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3486666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-15", - "submission_date": "2024-07-18", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM-1.7B", - "hub_license": "apache-2.0", - "hub_hearts": 110, - "params_billions": 1.71, - "co2_cost": 0.6340453268534886 - } - }, - { - "id": "HuggingFaceTB/SmolLM-135M_bfloat16_eec6e461571fba3e197a57c298f60b75422eae02_False", - "model": { - "name": "HuggingFaceTB/SmolLM-135M", - "sha": "eec6e461571fba3e197a57c298f60b75422eae02", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.951489892134872, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21247622973709757, - "normalized_score": 21.247622973709756 - }, - "bbh": { - "name": "BBH", - "value": 0.3046054260062988, - "normalized_score": 3.2853998220852616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.4366041666666667, - "normalized_score": 13.342187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11220079787234043, - "normalized_score": 1.3556442080378246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-07-18", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM-135M", - "hub_license": "apache-2.0", - "hub_hearts": 197, - "params_billions": 0.13, - "co2_cost": 0.6867550151153348 - } - }, - { - "id": "HuggingFaceTB/SmolLM-135M-Instruct_bfloat16_8ca7af58e27777cae460ad8ca3ab9db15f5c160d_True", - "model": { - "name": "HuggingFaceTB/SmolLM-135M-Instruct", - "sha": "8ca7af58e27777cae460ad8ca3ab9db15f5c160d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.652287650202965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12140121544169469, - "normalized_score": 12.14012154416947 - }, - "bbh": { - "name": "BBH", - "value": 0.30150816789978757, - "normalized_score": 2.6929580047045802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.36345833333333327, - "normalized_score": 3.365625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11760305851063829, - "normalized_score": 1.9558953900709206 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-15", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM-135M", - "hub_license": "apache-2.0", - "hub_hearts": 109, - "params_billions": 0.135, - "co2_cost": 0.5972598335396585 - } - }, - { - "id": "HuggingFaceTB/SmolLM-360M_bfloat16_318cc630b73730bfd712e5873063156ffb8936b5_False", - "model": { - "name": "HuggingFaceTB/SmolLM-360M", - "sha": "318cc630b73730bfd712e5873063156ffb8936b5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.260888857386585, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2133505764704318, - "normalized_score": 21.33505764704318 - }, - "bbh": { - "name": "BBH", - "value": 0.30645160333152527, - "normalized_score": 3.284915303246592 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.40178125, - "normalized_score": 8.089322916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11236702127659574, - "normalized_score": 1.374113475177304 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-07-18", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM-360M", - "hub_license": "apache-2.0", - "hub_hearts": 62, - "params_billions": 0.36, - "co2_cost": 0.7305190643342521 - } - }, - { - "id": "HuggingFaceTB/SmolLM-360M-Instruct_bfloat16_8e951de8c220295ea4f85d078c4e320df7137535_True", - "model": { - "name": "HuggingFaceTB/SmolLM-360M-Instruct", - "sha": "8e951de8c220295ea4f85d078c4e320df7137535", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.0088989557053685, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19516549422199764, - "normalized_score": 19.516549422199766 - }, - "bbh": { - "name": "BBH", - "value": 0.28851114363217695, - "normalized_score": 2.0803742908537424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.34717708333333336, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-15", - "submission_date": "2024-08-20", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM-360M", - "hub_license": "apache-2.0", - "hub_hearts": 80, - "params_billions": 0.362, - "co2_cost": 0.7330020485979701 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-1.7B_bfloat16_4fa12cab4f5f53670b05125fb9d2873af587d231_False", - "model": { - "name": "HuggingFaceTB/SmolLM2-1.7B", - "sha": "4fa12cab4f5f53670b05125fb9d2873af587d231", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.583621041753199, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2440003634800108, - "normalized_score": 24.40003634800108 - }, - "bbh": { - "name": "BBH", - "value": 0.3452594377166261, - "normalized_score": 9.301788459551682 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3485416666666667, - "normalized_score": 4.601041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2137632978723404, - "normalized_score": 12.640366430260045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM2-1.7B", - "hub_license": "apache-2.0", - "hub_hearts": 113, - "params_billions": 1.71, - "co2_cost": 0.6500519822806164 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct_bfloat16_d1bb90bcfbe0f211109880f4da18da66f229c4f6_True", - "model": { - "name": "HuggingFaceTB/SmolLM2-1.7B-Instruct", - "sha": "d1bb90bcfbe0f211109880f4da18da66f229c4f6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.02227766709556, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5367835121920947, - "normalized_score": 53.678351219209475 - }, - "bbh": { - "name": "BBH", - "value": 0.3598617531415158, - "normalized_score": 10.917989226208066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.342125, - "normalized_score": 4.098958333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2053690159574468, - "normalized_score": 11.707668439716311 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-1.7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 580, - "params_billions": 1.711, - "co2_cost": 0.9397207100161583 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-135M_bfloat16_28e66ca6931668447a3bac213f23d990ad3b0e2b_False", - "model": { - "name": "HuggingFaceTB/SmolLM2-135M", - "sha": "28e66ca6931668447a3bac213f23d990ad3b0e2b", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.695927392648112, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18177657504310785, - "normalized_score": 18.177657504310783 - }, - "bbh": { - "name": "BBH", - "value": 0.3044234246877141, - "normalized_score": 3.70807758683998 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4111770833333333, - "normalized_score": 10.030468749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10945811170212766, - "normalized_score": 1.0509013002364058 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM2-135M", - "hub_license": "apache-2.0", - "hub_hearts": 70, - "params_billions": 0.135, - "co2_cost": 0.677924434996086 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-135M-Instruct_bfloat16_5a33ba103645800d7b3790c4448546c1b73efc71_True", - "model": { - "name": "HuggingFaceTB/SmolLM2-135M-Instruct", - "sha": "5a33ba103645800d7b3790c4448546c1b73efc71", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.467364720358819, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2883138960181208, - "normalized_score": 28.83138960181208 - }, - "bbh": { - "name": "BBH", - "value": 0.3124321328066677, - "normalized_score": 4.720807660805284 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23573825503355705, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36621875000000004, - "normalized_score": 3.6773437500000035 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11145279255319149, - "normalized_score": 1.2725325059101646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 158, - "params_billions": 0.135, - "co2_cost": 0.338375628206877 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-135M-Instruct_float16_5a33ba103645800d7b3790c4448546c1b73efc71_False", - "model": { - "name": "HuggingFaceTB/SmolLM2-135M-Instruct", - "sha": "5a33ba103645800d7b3790c4448546c1b73efc71", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.2065969565576755, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05925167444602544, - "normalized_score": 5.925167444602544 - }, - "bbh": { - "name": "BBH", - "value": 0.31347502947335903, - "normalized_score": 4.796275744662444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23406040268456377, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3871458333333333, - "normalized_score": 6.059895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10920877659574468, - "normalized_score": 1.0231973995271864 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 158, - "params_billions": 0.135, - "co2_cost": 0.697507687387014 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-360M_bfloat16_3ce05f63c246c44616da500b47b01f082f4d3bcc_False", - "model": { - "name": "HuggingFaceTB/SmolLM2-360M", - "sha": "3ce05f63c246c44616da500b47b01f082f4d3bcc", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.251282350517303, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21145227995053123, - "normalized_score": 21.145227995053123 - }, - "bbh": { - "name": "BBH", - "value": 0.3233478044302361, - "normalized_score": 5.543603155369513 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3954270833333333, - "normalized_score": 7.728385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11693816489361702, - "normalized_score": 1.8820183215130022 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM2-360M", - "hub_license": "apache-2.0", - "hub_hearts": 40, - "params_billions": 0.36, - "co2_cost": 0.7733156187446921 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-360M-Instruct_float16_4873f67095301d304753fae05bc09ec766634e50_False", - "model": { - "name": "HuggingFaceTB/SmolLM2-360M-Instruct", - "sha": "4873f67095301d304753fae05bc09ec766634e50", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.1000195398620374, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08303191088533979, - "normalized_score": 8.303191088533978 - }, - "bbh": { - "name": "BBH", - "value": 0.3052703401844317, - "normalized_score": 3.2990473293233173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34228125000000004, - "normalized_score": 2.751822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-14", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM2-360M-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 102, - "params_billions": 0.362, - "co2_cost": 0.39238186927176233 - } - }, - { - "id": "HuggingFaceTB/SmolLM2-360M-Instruct_bfloat16_4873f67095301d304753fae05bc09ec766634e50_True", - "model": { - "name": "HuggingFaceTB/SmolLM2-360M-Instruct", - "sha": "4873f67095301d304753fae05bc09ec766634e50", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.139566424375877, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38415958545548035, - "normalized_score": 38.41595854554804 - }, - "bbh": { - "name": "BBH", - "value": 0.31435050538888504, - "normalized_score": 4.173863636363637 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.346125, - "normalized_score": 2.7656250000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11170212765957446, - "normalized_score": 1.300236406619384 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "HuggingFaceTB/SmolLM2-360M-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 102, - "params_billions": 0.36, - "co2_cost": 0.7516385415285941 - } - }, - { - "id": "HumanLLMs/Humanish-LLama3-8B-Instruct_bfloat16_42f73ada2b7fb16f18a75404d72b7911bf1e65ce_True", - "model": { - "name": "HumanLLMs/Humanish-LLama3-8B-Instruct", - "sha": "42f73ada2b7fb16f18a75404d72b7911bf1e65ce", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.678203747779094, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6497903340913221, - "normalized_score": 64.97903340913221 - }, - "bbh": { - "name": "BBH", - "value": 0.49677096627896544, - "normalized_score": 28.012476599572 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.35815624999999995, - "normalized_score": 2.0028645833333325 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37017952127659576, - "normalized_score": 30.019946808510632 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 18, - "params_billions": 8.03, - "co2_cost": 1.496556281341137 - } - }, - { - "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407_bfloat16_45b80bdce8d447ef494af06751904afcc607eb37_True", - "model": { - "name": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", - "sha": "45b80bdce8d447ef494af06751904afcc607eb37", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.888067848972184, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5451269298793867, - "normalized_score": 54.51269298793867 - }, - "bbh": { - "name": "BBH", - "value": 0.5261780772532613, - "normalized_score": 32.70961342122556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.39676041666666667, - "normalized_score": 9.395052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35206117021276595, - "normalized_score": 28.00679669030733 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-10-06", - "generation": 2, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 12.248, - "co2_cost": 3.2405666128817936 - } - }, - { - "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct_bfloat16_7d2c71d926832d6e257ad2776011494dbac2d151_True", - "model": { - "name": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", - "sha": "7d2c71d926832d6e257ad2776011494dbac2d151", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.99870743015318, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7284250233824031, - "normalized_score": 72.84250233824031 - }, - "bbh": { - "name": "BBH", - "value": 0.5363681457807072, - "normalized_score": 34.47899758661866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5, - "normalized_score": 50.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 8.424479166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4398271276595745, - "normalized_score": 37.75856973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-05", - "submission_date": "2024-10-05", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 7.616, - "co2_cost": 2.3867854372664867 - } - }, - { - "id": "IDEA-CCNL/Ziya-LLaMA-13B-v1_float16_64d931f346e1a49ea3bbca07a83137075bab1c66_False", - "model": { - "name": "IDEA-CCNL/Ziya-LLaMA-13B-v1", - "sha": "64d931f346e1a49ea3bbca07a83137075bab1c66", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.9064248386004103, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16968643200042555, - "normalized_score": 16.968643200042553 - }, - "bbh": { - "name": "BBH", - "value": 0.28770292445409473, - "normalized_score": 1.4636170460989157 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37505208333333334, - "normalized_score": 3.881510416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11012300531914894, - "normalized_score": 1.124778368794326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-16", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "IDEA-CCNL/Ziya-LLaMA-13B-v1", - "hub_license": "gpl-3.0", - "hub_hearts": 274, - "params_billions": 13.0, - "co2_cost": 2.2165148606691836 - } - }, - { - "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0_float16_2ce5574f5d0daf61b39cffd80023dd73782b87e3_True", - "model": { - "name": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", - "sha": "2ce5574f5d0daf61b39cffd80023dd73782b87e3", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 1.6780628068086922, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11668882978723404, - "normalized_score": 1.854314420803781 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-12-15", - "generation": 1, - "base_model": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0 (Merge)", - "hub_license": "gemma", - "hub_hearts": 15, - "params_billions": 27.227, - "co2_cost": 20.726243657243476 - } - }, - { - "id": "IlyaGusev/gemma-2-2b-it-abliterated_float16__True", - "model": { - "name": "IlyaGusev/gemma-2-2b-it-abliterated", - "sha": "", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.705746365531194, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.533086654521115, - "normalized_score": 53.30866545211151 - }, - "bbh": { - "name": "BBH", - "value": 0.4118601326211988, - "normalized_score": 16.796334504661765 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.37818749999999995, - "normalized_score": 4.906770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25382313829787234, - "normalized_score": 17.091459810874703 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-31", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "IlyaGusev/gemma-2-2b-it-abliterated", - "hub_license": "gemma", - "hub_hearts": 49, - "params_billions": 2.614, - "co2_cost": 3.68717312543207 - } - }, - { - "id": "IlyaGusev/gemma-2-9b-it-abliterated_float16_e2b6426b20a3a889f0c182056b0dbbb7fa585d25_True", - "model": { - "name": "IlyaGusev/gemma-2-9b-it-abliterated", - "sha": "e2b6426b20a3a889f0c182056b0dbbb7fa585d25", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 31.294229585782904, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.747259493698941, - "normalized_score": 74.7259493698941 - }, - "bbh": { - "name": "BBH", - "value": 0.59063299776093, - "normalized_score": 40.82468502686295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.4033645833333333, - "normalized_score": 9.320572916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39153922872340424, - "normalized_score": 32.393247635933804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-13", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "IlyaGusev/gemma-2-9b-it-abliterated", - "hub_license": "gemma", - "hub_hearts": 37, - "params_billions": 9.242, - "co2_cost": 2.6226877064686693 - } - }, - { - "id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0_float16_9c542d9ec3f86e145ae445c200c6ebe9066e8cd6_False", - "model": { - "name": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", - "sha": "9c542d9ec3f86e145ae445c200c6ebe9066e8cd6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.16266165602399, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20243398626754788, - "normalized_score": 20.24339862675479 - }, - "bbh": { - "name": "BBH", - "value": 0.43507435668237937, - "normalized_score": 20.83116494676627 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4609375, - "normalized_score": 16.750520833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21600731382978725, - "normalized_score": 12.889701536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.7162005063744825 - } - }, - { - "id": "Intel/neural-chat-7b-v3_float16_fc679274dfcd28a8b6087634f71af7ed2a0659c4_False", - "model": { - "name": "Intel/neural-chat-7b-v3", - "sha": "fc679274dfcd28a8b6087634f71af7ed2a0659c4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.069527284193285, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27779735546128714, - "normalized_score": 27.779735546128713 - }, - "bbh": { - "name": "BBH", - "value": 0.5048316221363103, - "normalized_score": 30.205692320538088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.5054895833333334, - "normalized_score": 23.019531249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26986369680851063, - "normalized_score": 18.873744089834513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-25", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 67, - "params_billions": 7.0, - "co2_cost": 0.9785806289666895 - } - }, - { - "id": "Intel/neural-chat-7b-v3-1_float16_c0d379a49c1c0579529d5e6f2e936ddb759552a8_False", - "model": { - "name": "Intel/neural-chat-7b-v3-1", - "sha": "c0d379a49c1c0579529d5e6f2e936ddb759552a8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.06792676095294, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4686897432146704, - "normalized_score": 46.868974321467036 - }, - "bbh": { - "name": "BBH", - "value": 0.5051565464054848, - "normalized_score": 29.7397523676162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.49789583333333337, - "normalized_score": 22.236979166666657 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2677859042553192, - "normalized_score": 18.642878250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-14", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 546, - "params_billions": 7.242, - "co2_cost": 1.1273840573130482 - } - }, - { - "id": "Intel/neural-chat-7b-v3-2_float16_0d8f77647810d21d935ea90c66d6339b85e65a75_False", - "model": { - "name": "Intel/neural-chat-7b-v3-2", - "sha": "0d8f77647810d21d935ea90c66d6339b85e65a75", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.471411168582193, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4988397452093778, - "normalized_score": 49.883974520937784 - }, - "bbh": { - "name": "BBH", - "value": 0.5032226831964403, - "normalized_score": 30.237457969159426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.48952083333333335, - "normalized_score": 20.056770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26670545212765956, - "normalized_score": 18.522828014184395 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-21", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Intel/neural-chat-7b-v3-2", - "hub_license": "apache-2.0", - "hub_hearts": 57, - "params_billions": 7.0, - "co2_cost": 1.1208829689611992 - } - }, - { - "id": "Intel/neural-chat-7b-v3-3_float16_bdd31cf498d13782cc7497cba5896996ce429f91_False", - "model": { - "name": "Intel/neural-chat-7b-v3-3", - "sha": "bdd31cf498d13782cc7497cba5896996ce429f91", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.557585514141866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4762585495374495, - "normalized_score": 47.62585495374495 - }, - "bbh": { - "name": "BBH", - "value": 0.48766180524289693, - "normalized_score": 27.753850886523697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4859583333333333, - "normalized_score": 20.578124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2624667553191489, - "normalized_score": 18.05186170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-09", - "submission_date": "2024-06-12", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 79, - "params_billions": 7.0, - "co2_cost": 1.1190477903556486 - } - }, - { - "id": "IntervitensInc/internlm2_5-20b-llamafied_bfloat16_0b6fc3cc0b9bf3529816061eb508483c20b77fe9_False", - "model": { - "name": "IntervitensInc/internlm2_5-20b-llamafied", - "sha": "0b6fc3cc0b9bf3529816061eb508483c20b77fe9", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.216880819026148, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3409952260003457, - "normalized_score": 34.099522600034575 - }, - "bbh": { - "name": "BBH", - "value": 0.7478466526577329, - "normalized_score": 63.47057952429436 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1714501510574018, - "normalized_score": 17.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.44754166666666667, - "normalized_score": 14.942708333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4050864361702128, - "normalized_score": 33.89849290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-11-11", - "generation": 0, - "base_model": "IntervitensInc/internlm2_5-20b-llamafied", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 19.861, - "co2_cost": 2.76225561886365 - } - }, - { - "id": "Invalid-Null/PeiYangMe-0.5_float16_34e2c17d7bc7b34bd744ce5466046c04db2cf367_False", - "model": { - "name": "Invalid-Null/PeiYangMe-0.5", - "sha": "34e2c17d7bc7b34bd744ce5466046c04db2cf367", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.4273672940351996, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14088507382633633, - "normalized_score": 14.088507382633633 - }, - "bbh": { - "name": "BBH", - "value": 0.27907748194216614, - "normalized_score": 1.4745771439889088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37381249999999994, - "normalized_score": 3.793229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11087101063829788, - "normalized_score": 1.2078900709219857 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Invalid-Null/PeiYangMe-0.5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.061, - "co2_cost": 1.5153573523009258 - } - }, - { - "id": "Invalid-Null/PeiYangMe-0.7_float16_b30d72771b170eea4eeab447ce2d62696a292e02_False", - "model": { - "name": "Invalid-Null/PeiYangMe-0.7", - "sha": "b30d72771b170eea4eeab447ce2d62696a292e02", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.397279528582866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1491032682172192, - "normalized_score": 14.91032682172192 - }, - "bbh": { - "name": "BBH", - "value": 0.30275310145886614, - "normalized_score": 3.600797717385781 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2332214765100671, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38571874999999994, - "normalized_score": 5.6148437499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11012300531914894, - "normalized_score": 1.124778368794326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Invalid-Null/PeiYangMe-0.7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.061, - "co2_cost": 1.493280597282422 - } - }, - { - "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4_float16_a8380a7be51b547761824e524b3d95ac73203122_False", - "model": { - "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "sha": "a8380a7be51b547761824e524b3d95ac73203122", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.66808216767364, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2552660274737696, - "normalized_score": 25.526602747376963 - }, - "bbh": { - "name": "BBH", - "value": 0.4724973116620121, - "normalized_score": 25.7872756997585 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3654375, - "normalized_score": 6.0796874999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3316156914893617, - "normalized_score": 25.7350768321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-08-03", - "generation": 0, - "base_model": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7811642100845821 - } - }, - { - "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4_bfloat16_a8380a7be51b547761824e524b3d95ac73203122_False", - "model": { - "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "sha": "a8380a7be51b547761824e524b3d95ac73203122", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.419272249038903, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2476972211509905, - "normalized_score": 24.76972211509905 - }, - "bbh": { - "name": "BBH", - "value": 0.4758066295235124, - "normalized_score": 25.919578359413446 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3641041666666667, - "normalized_score": 6.346354166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32920545212765956, - "normalized_score": 25.467272458628837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-08-03", - "generation": 0, - "base_model": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.8798822262772638 - } - }, - { - "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated_bfloat16_879168f9ce9fac315a19dd4f4c7df5253bb660f2_True", - "model": { - "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", - "sha": "879168f9ce9fac315a19dd4f4c7df5253bb660f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.06474762506518, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7317473193349202, - "normalized_score": 73.17473193349201 - }, - "bbh": { - "name": "BBH", - "value": 0.5396376284460921, - "normalized_score": 34.904315688323074 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49244712990936557, - "normalized_score": 49.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4086666666666667, - "normalized_score": 9.616666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4276097074468085, - "normalized_score": 36.401078605200944 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.1535813501469194 - } - }, - { - "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2_bfloat16_5d07f58562422feb9f25c9c048e40356d2cf7e4b_True", - "model": { - "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "sha": "5d07f58562422feb9f25c9c048e40356d2cf7e4b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.685532582369234, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7841039552830933, - "normalized_score": 78.41039552830932 - }, - "bbh": { - "name": "BBH", - "value": 0.5310923599182072, - "normalized_score": 33.2945398202129 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47205438066465255, - "normalized_score": 47.205438066465256 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.43539583333333337, - "normalized_score": 13.957812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4128158244680851, - "normalized_score": 34.757313829787236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.616, - "co2_cost": 2.261829293581302 - } - }, - { - "id": "J-LAB/Thynk_orpo_float16_c6606d402f26d005b9f1a71a1cde9139d1cffb2a_False", - "model": { - "name": "J-LAB/Thynk_orpo", - "sha": "c6606d402f26d005b9f1a71a1cde9139d1cffb2a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.263933728919593, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21017788357114678, - "normalized_score": 21.017788357114675 - }, - "bbh": { - "name": "BBH", - "value": 0.44631138778709606, - "normalized_score": 22.062783944144368 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.45147916666666665, - "normalized_score": 15.201562500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32313829787234044, - "normalized_score": 24.793144208037827 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 2.4295282214320713 - } - }, - { - "id": "JackFram/llama-160m_bfloat16_aca9b687d1425f863dcf5de9a4c96e3fe36266dd_False", - "model": { - "name": "JackFram/llama-160m", - "sha": "aca9b687d1425f863dcf5de9a4c96e3fe36266dd", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.7381298467963955, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1791036671586945, - "normalized_score": 17.91036671586945 - }, - "bbh": { - "name": "BBH", - "value": 0.28880217539042424, - "normalized_score": 2.033606152852728 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3792083333333333, - "normalized_score": 4.667708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-05-26", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "JackFram/llama-160m", - "hub_license": "apache-2.0", - "hub_hearts": 34, - "params_billions": 0.162, - "co2_cost": 0.1869487883487398 - } - }, - { - "id": "JackFram/llama-68m_bfloat16_964a5d77df908b69f8d6476fb70e940425b04cb5_False", - "model": { - "name": "JackFram/llama-68m", - "sha": "964a5d77df908b69f8d6476fb70e940425b04cb5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.963339700174584, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17263416623448008, - "normalized_score": 17.26341662344801 - }, - "bbh": { - "name": "BBH", - "value": 0.29362986509336414, - "normalized_score": 2.5910478068354776 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3909895833333333, - "normalized_score": 6.607031250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11436170212765957, - "normalized_score": 1.595744680851063 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-07-19", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "JackFram/llama-68m", - "hub_license": "apache-2.0", - "hub_hearts": 26, - "params_billions": 0.068, - "co2_cost": 0.1211158078368217 - } - }, - { - "id": "Jacoby746/Casual-Magnum-34B_float16_b628c6959441db75460cfd49536322b1ea46130e_False", - "model": { - "name": "Jacoby746/Casual-Magnum-34B", - "sha": "b628c6959441db75460cfd49536322b1ea46130e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.79792072114355, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19301675110927893, - "normalized_score": 19.301675110927896 - }, - "bbh": { - "name": "BBH", - "value": 0.6032046880542974, - "normalized_score": 43.05156762846773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3724832214765101, - "normalized_score": 16.33109619686801 - }, - "musr": { - "name": "MUSR", - "value": 0.4077604166666666, - "normalized_score": 8.403385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5183676861702128, - "normalized_score": 46.485298463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "Jacoby746/Casual-Magnum-34B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 34.389, - "co2_cost": 6.85339428403758 - } - }, - { - "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B_float16_9ab68beb6fe16cab2ab708b9af4417c89751d297_False", - "model": { - "name": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", - "sha": "9ab68beb6fe16cab2ab708b9af4417c89751d297", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.186181285193417, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38798166642286913, - "normalized_score": 38.79816664228691 - }, - "bbh": { - "name": "BBH", - "value": 0.518546209727402, - "normalized_score": 32.38700420411291 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.42804166666666665, - "normalized_score": 12.338541666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3271276595744681, - "normalized_score": 25.236406619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-20", - "generation": 1, - "base_model": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 2.780861420852331 - } - }, - { - "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B_float16_711263c24f812676eb382a31a5f0fed9bd8c16e4_False", - "model": { - "name": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", - "sha": "711263c24f812676eb382a31a5f0fed9bd8c16e4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.01822829916165, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3636019095998617, - "normalized_score": 36.36019095998617 - }, - "bbh": { - "name": "BBH", - "value": 0.5209417299963208, - "normalized_score": 32.259183510828905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.43197916666666664, - "normalized_score": 13.264062499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32721077127659576, - "normalized_score": 25.24564125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.732529076638037 - } - }, - { - "id": "Jacoby746/Proto-Athena-4x7B_float16_450fcba7a630fb61a662f71936d37979226fced8_False", - "model": { - "name": "Jacoby746/Proto-Athena-4x7B", - "sha": "450fcba7a630fb61a662f71936d37979226fced8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.77557762348212, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37029636918930664, - "normalized_score": 37.02963691893066 - }, - "bbh": { - "name": "BBH", - "value": 0.5106547638742905, - "normalized_score": 30.870822876627887 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.43477083333333333, - "normalized_score": 13.813020833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32064494680851063, - "normalized_score": 24.516105200945624 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-21", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Jacoby746/Proto-Athena-4x7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 24.154, - "co2_cost": 3.3532287352854695 - } - }, - { - "id": "Jacoby746/Proto-Athena-v0.2-4x7B_bfloat16_01feeded217ea83a8794e7968c8850859b5f0b14_False", - "model": { - "name": "Jacoby746/Proto-Athena-v0.2-4x7B", - "sha": "01feeded217ea83a8794e7968c8850859b5f0b14", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.345307494794017, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37524213531208306, - "normalized_score": 37.524213531208304 - }, - "bbh": { - "name": "BBH", - "value": 0.5067731005424964, - "normalized_score": 30.34084433030367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.42128125, - "normalized_score": 10.960156250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3197307180851064, - "normalized_score": 24.414524231678485 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-21", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Jacoby746/Proto-Athena-v0.2-4x7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 24.154, - "co2_cost": 3.3027439043825804 - } - }, - { - "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B_float16_bbb5d7c7a0c9e999e057ffa71eaa93d59d95b36b_False", - "model": { - "name": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", - "sha": "bbb5d7c7a0c9e999e057ffa71eaa93d59d95b36b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 22.481213617019815, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4904719477652628, - "normalized_score": 49.04719477652628 - }, - "bbh": { - "name": "BBH", - "value": 0.5186849053052595, - "normalized_score": 32.63252990159268 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.44496874999999997, - "normalized_score": 14.12109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33011968085106386, - "normalized_score": 25.56885342789598 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.7636816551119412 - } - }, - { - "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B_float16_984cca02cd930b2e1b7b2a7d53471d32d9821cdd_False", - "model": { - "name": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", - "sha": "984cca02cd930b2e1b7b2a7d53471d32d9821cdd", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.84999990070571, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43326928106313467, - "normalized_score": 43.32692810631347 - }, - "bbh": { - "name": "BBH", - "value": 0.4735771808296548, - "normalized_score": 26.913110159424864 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.43166666666666664, - "normalized_score": 12.29166666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30693151595744683, - "normalized_score": 22.992390661938536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "Jacoby746/Proto-Harpy-Spark-v0.1-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1916091917298846 - } - }, - { - "id": "JayHyeon/Qwen-0.5B-DPO-1epoch_bfloat16_f5569969d307d193798eff52c0527e23f4ac8bb9_True", - "model": { - "name": "JayHyeon/Qwen-0.5B-DPO-1epoch", - "sha": "f5569969d307d193798eff52c0527e23f4ac8bb9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.385732750247261, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26473313031644924, - "normalized_score": 26.473313031644928 - }, - "bbh": { - "name": "BBH", - "value": 0.31907502434278595, - "normalized_score": 5.543694750350672 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.33517708333333335, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15575132978723405, - "normalized_score": 6.19459219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen-0.5B-DPO-1epoch", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.956513484116059 - } - }, - { - "id": "JayHyeon/Qwen-0.5B-DPO-5epoch_bfloat16_4363737d67e793b7cfb714dda4aa27677a4db6e4_True", - "model": { - "name": "JayHyeon/Qwen-0.5B-DPO-5epoch", - "sha": "4363737d67e793b7cfb714dda4aa27677a4db6e4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.198583184761694, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25701472094043804, - "normalized_score": 25.701472094043805 - }, - "bbh": { - "name": "BBH", - "value": 0.3112109544868782, - "normalized_score": 5.056692258334079 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33796875, - "normalized_score": 2.512760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15325797872340424, - "normalized_score": 5.917553191489359 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen-0.5B-DPO-5epoch", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 0.494, - "co2_cost": 1.4107319976270312 - } - }, - { - "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch_bfloat16_2dc73651ff3cbf0e4638c3bd5b1d87cfe2afc15f_True", - "model": { - "name": "JayHyeon/Qwen-0.5B-IRPO-1epoch", - "sha": "2dc73651ff3cbf0e4638c3bd5b1d87cfe2afc15f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.031892544703613, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25891301746033857, - "normalized_score": 25.891301746033857 - }, - "bbh": { - "name": "BBH", - "value": 0.31638216610052033, - "normalized_score": 5.324351851851852 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3286354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15001662234042554, - "normalized_score": 5.557402482269504 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen-0.5B-IRPO-1epoch", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9908871168069725 - } - }, - { - "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch_bfloat16_dca128b2490982a6f2d53d017ad44c1b7829fabe_True", - "model": { - "name": "JayHyeon/Qwen-0.5B-IRPO-5epoch", - "sha": "dca128b2490982a6f2d53d017ad44c1b7829fabe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 6.9234686409529, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24867130325314607, - "normalized_score": 24.86713032531461 - }, - "bbh": { - "name": "BBH", - "value": 0.31891656220326015, - "normalized_score": 5.7113344972692275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32866666666666666, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1506815159574468, - "normalized_score": 5.631279550827422 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen-0.5B-IRPO-5epoch", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.4630507773978825 - } - }, - { - "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch_bfloat16_d24f341c6034334f397c156593ac8eece0a8a6ff_True", - "model": { - "name": "JayHyeon/Qwen-0.5B-eDPO-1epoch", - "sha": "d24f341c6034334f397c156593ac8eece0a8a6ff", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.280997177732158, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26233504878167707, - "normalized_score": 26.233504878167707 - }, - "bbh": { - "name": "BBH", - "value": 0.3180637583450692, - "normalized_score": 5.918400632703615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33269791666666665, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15525265957446807, - "normalized_score": 6.139184397163118 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen-0.5B-eDPO-1epoch", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9731382094916657 - } - }, - { - "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch_bfloat16_8de61ddfbe7dc2a00228309af4851797694cd153_True", - "model": { - "name": "JayHyeon/Qwen-0.5B-eDPO-5epoch", - "sha": "8de61ddfbe7dc2a00228309af4851797694cd153", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 6.72779460682018, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24774708883540117, - "normalized_score": 24.774708883540114 - }, - "bbh": { - "name": "BBH", - "value": 0.3096491823869347, - "normalized_score": 5.1978376906318084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3326354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15226063829787234, - "normalized_score": 5.8067375886524815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen-0.5B-eDPO-5epoch", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.446003944235028 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT_bfloat16_2e7122e69e62e72eba6e21b0bc921906402dd5fa_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", - "sha": "2e7122e69e62e72eba6e21b0bc921906402dd5fa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.158039000338244, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27677340567472086, - "normalized_score": 27.677340567472086 - }, - "bbh": { - "name": "BBH", - "value": 0.3253697801563151, - "normalized_score": 5.93241994210165 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.33415625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15201130319148937, - "normalized_score": 5.779033687943262 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 0.63, - "co2_cost": 1.009091526783928 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1_bfloat16_2736ff4f329f204d91dd47b8bc951945b7ccc572_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", - "sha": "2736ff4f329f204d91dd47b8bc951945b7ccc572", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.15078478555964, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24687274210206694, - "normalized_score": 24.687274210206695 - }, - "bbh": { - "name": "BBH", - "value": 0.3260313037664168, - "normalized_score": 6.12607291013818 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06495468277945618, - "normalized_score": 6.495468277945618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.34336458333333336, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1574966755319149, - "normalized_score": 6.3885195035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0041674574559156 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1_bfloat16_c469240bdc78d707215b4e58d12a72c7b75abfb3_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", - "sha": "c469240bdc78d707215b4e58d12a72c7b75abfb3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.319017359475316, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2605863553150086, - "normalized_score": 26.05863553150086 - }, - "bbh": { - "name": "BBH", - "value": 0.3308028437367363, - "normalized_score": 6.622365326648162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3288229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16256648936170212, - "normalized_score": 6.951832151300234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0015027584440948 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1_bfloat16_77f99c1b2a2d32c84d0cd986eb952927c3b77497_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", - "sha": "77f99c1b2a2d32c84d0cd986eb952927c3b77497", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.896219502136105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2529178136234081, - "normalized_score": 25.291781362340814 - }, - "bbh": { - "name": "BBH", - "value": 0.3261949089625076, - "normalized_score": 6.129987166860655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.330125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15757978723404256, - "normalized_score": 6.3977541371158395 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9959784337587837 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT_bfloat16_804606871a044917289c9ea22d335a80a0708cb6_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT", - "sha": "804606871a044917289c9ea22d335a80a0708cb6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.6110587340398945, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19636453498938372, - "normalized_score": 19.636453498938373 - }, - "bbh": { - "name": "BBH", - "value": 0.31207478976310743, - "normalized_score": 4.4344753335124 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.3394270833333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16730385638297873, - "normalized_score": 7.478206264775414 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7276743059196251 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4_bfloat16_2303839a4f4f3a1ada55113c451177ad481eb647_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", - "sha": "2303839a4f4f3a1ada55113c451177ad481eb647", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.941930910286053, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2019596891802639, - "normalized_score": 20.19596891802639 - }, - "bbh": { - "name": "BBH", - "value": 0.3017092819749249, - "normalized_score": 4.331493365022777 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3446354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16190159574468085, - "normalized_score": 6.877955082742317 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.714998944312747 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep_bfloat16_b183a552d7e26053a6fb0ee01191835d7735b80d_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", - "sha": "b183a552d7e26053a6fb0ee01191835d7735b80d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.345325435592133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2140498322229462, - "normalized_score": 21.40498322229462 - }, - "bbh": { - "name": "BBH", - "value": 0.3172227797719337, - "normalized_score": 5.650884333531393 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34727083333333336, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15367353723404256, - "normalized_score": 5.963726359338062 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.580354599240864 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep_bfloat16_c8eb2639aeba964754425b9e22fda656fdfb9f06_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", - "sha": "c8eb2639aeba964754425b9e22fda656fdfb9f06", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.469032763498576, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22573992561957826, - "normalized_score": 22.573992561957823 - }, - "bbh": { - "name": "BBH", - "value": 0.3064261556890236, - "normalized_score": 4.797235096058625 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36606249999999996, - "normalized_score": 2.891145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15317486702127658, - "normalized_score": 5.908318557919619 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4200189869366269 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep_bfloat16_3baf97b78f186e2a2bfadb77a894cc12642709a8_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", - "sha": "3baf97b78f186e2a2bfadb77a894cc12642709a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.890344422168456, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19868726091215752, - "normalized_score": 19.868726091215752 - }, - "bbh": { - "name": "BBH", - "value": 0.31044747322019184, - "normalized_score": 4.784241393250257 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3406666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15575132978723405, - "normalized_score": 6.19459219858156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.2679503095602949 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5_bfloat16_7a00c50afd13d3020b200733e87309ea81126501_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", - "sha": "7a00c50afd13d3020b200733e87309ea81126501", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.664256620538701, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1985875255433361, - "normalized_score": 19.858752554333613 - }, - "bbh": { - "name": "BBH", - "value": 0.3139860294769257, - "normalized_score": 4.213683823190269 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.34603125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1697972074468085, - "normalized_score": 7.7552452718676115 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.71611308822178 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep_bfloat16_27b7325a3e7b629ded08040ba017a6c63e3be68a_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", - "sha": "27b7325a3e7b629ded08040ba017a6c63e3be68a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.003091729061743, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19706379074189817, - "normalized_score": 19.706379074189815 - }, - "bbh": { - "name": "BBH", - "value": 0.3224699194774388, - "normalized_score": 5.619297426317974 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3367604166666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1651429521276596, - "normalized_score": 7.238105791962175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6947619269204228 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep_bfloat16_869cde816bd94004f54e73447291eb4eb9da832b_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", - "sha": "869cde816bd94004f54e73447291eb4eb9da832b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.740340198303553, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2241164554493189, - "normalized_score": 22.41164554493189 - }, - "bbh": { - "name": "BBH", - "value": 0.32468117082421427, - "normalized_score": 6.246296486215907 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3353333333333333, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16888297872340424, - "normalized_score": 7.65366430260047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.651995118215892 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep_bfloat16_c91713cc1c0a0d316f467ae16010fdde6dc9f267_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", - "sha": "c91713cc1c0a0d316f467ae16010fdde6dc9f267", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.8890047643086625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22918744486850445, - "normalized_score": 22.918744486850443 - }, - "bbh": { - "name": "BBH", - "value": 0.3259343389530942, - "normalized_score": 6.5376144197820265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3235208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16879986702127658, - "normalized_score": 7.644429669030731 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.641911210347618 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4_bfloat16_e6754bdc0fbeb7fc8d0df3c3677c0f70f9c4d3a8_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", - "sha": "e6754bdc0fbeb7fc8d0df3c3677c0f70f9c4d3a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.555176381236881, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2034335562972912, - "normalized_score": 20.343355629729118 - }, - "bbh": { - "name": "BBH", - "value": 0.2935549587263229, - "normalized_score": 3.114588111846371 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3434270833333333, - "normalized_score": 1.8617187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14128989361702127, - "normalized_score": 4.587765957446806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7071369576852713 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep_bfloat16_5a8ef93b6c4867dc95656b3c49f45fc26207444c_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", - "sha": "5a8ef93b6c4867dc95656b3c49f45fc26207444c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.5183018931282595, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18307535117931534, - "normalized_score": 18.307535117931533 - }, - "bbh": { - "name": "BBH", - "value": 0.29839616748934167, - "normalized_score": 3.1994992498175416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3567604166666667, - "normalized_score": 3.728385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1484375, - "normalized_score": 5.381944444444444 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.711045303670801 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep_bfloat16_7c5ba3070e1f395881226ab15cfad8af17522c52_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", - "sha": "7c5ba3070e1f395881226ab15cfad8af17522c52", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.465283047679917, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1989620872617987, - "normalized_score": 19.89620872617987 - }, - "bbh": { - "name": "BBH", - "value": 0.3109875129533253, - "normalized_score": 4.420916097586122 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3449479166666667, - "normalized_score": 0.8851562499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14162234042553193, - "normalized_score": 4.62470449172577 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5817230626386445 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep_bfloat16_ed756a6854f9ccf5dfa119f55ce74a7c791b0868_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", - "sha": "ed756a6854f9ccf5dfa119f55ce74a7c791b0868", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.087118748270012, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18971994308434953, - "normalized_score": 18.97199430843495 - }, - "bbh": { - "name": "BBH", - "value": 0.2936418449815176, - "normalized_score": 3.069330255279086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.38739583333333333, - "normalized_score": 6.357812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13364361702127658, - "normalized_score": 3.7381796690307305 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.640625424134062 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5_bfloat16_59b7674144f30acea5e4470e29cc4d59b48d5e8e_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", - "sha": "59b7674144f30acea5e4470e29cc4d59b48d5e8e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.9691100972452, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2067558522498083, - "normalized_score": 20.67558522498083 - }, - "bbh": { - "name": "BBH", - "value": 0.3203968601167082, - "normalized_score": 4.981848012762598 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3486666666666667, - "normalized_score": 2.3499999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16780252659574468, - "normalized_score": 7.533614066193853 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6849158103513178 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep_bfloat16_f4688fa593ea40689cc53d0683a2bb03262b0cd9_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", - "sha": "f4688fa593ea40689cc53d0683a2bb03262b0cd9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.69236788584868, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2201447714286981, - "normalized_score": 22.014477142869808 - }, - "bbh": { - "name": "BBH", - "value": 0.3217197270809481, - "normalized_score": 5.7648020969838045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.33669791666666665, - "normalized_score": 2.720572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17096077127659576, - "normalized_score": 7.884530141843972 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.741069582939448 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam_bfloat16_c40b7687fb6eb50a8e7dbc85ed5a37d9566cef55_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", - "sha": "c40b7687fb6eb50a8e7dbc85ed5a37d9566cef55", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.460129211594215, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24105262924595627, - "normalized_score": 24.10526292459563 - }, - "bbh": { - "name": "BBH", - "value": 0.31671815484837784, - "normalized_score": 6.702479854955681 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.330125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15625, - "normalized_score": 6.249999999999999 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6802691752300627 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam_bfloat16_d581f58e13931b7b21b6237a6277fbe130bec706_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", - "sha": "d581f58e13931b7b21b6237a6277fbe130bec706", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.89170131391145, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23685598656010498, - "normalized_score": 23.685598656010498 - }, - "bbh": { - "name": "BBH", - "value": 0.3260038632940968, - "normalized_score": 6.892109783030413 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3355208333333333, - "normalized_score": 2.4401041666666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15699800531914893, - "normalized_score": 6.333111702127657 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9629006772435991 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam_bfloat16_cbb9bedaa4b57478682a4e96e2807fede7a85a39_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", - "sha": "cbb9bedaa4b57478682a4e96e2807fede7a85a39", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.618464097122786, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22623971063444992, - "normalized_score": 22.62397106344499 - }, - "bbh": { - "name": "BBH", - "value": 0.3261540051256346, - "normalized_score": 7.351407019428777 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3408229166666667, - "normalized_score": 2.336197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15408909574468085, - "normalized_score": 6.009899527186761 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.966450423771423 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam_bfloat16_02d1f2ab9340abc3592d8b8e5ed654b31d0c314c_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", - "sha": "02d1f2ab9340abc3592d8b8e5ed654b31d0c314c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.868642802061662, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25079455843827714, - "normalized_score": 25.07945584382771 - }, - "bbh": { - "name": "BBH", - "value": 0.3199331515135054, - "normalized_score": 6.453776300474527 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.33545833333333336, - "normalized_score": 1.965625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15550199468085107, - "normalized_score": 6.166888297872341 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-03", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6735105292159798 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam_bfloat16_c064c510a2b5723caaa43cdae0fda827f7d671d6_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", - "sha": "c064c510a2b5723caaa43cdae0fda827f7d671d6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.430131653175725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.238979241745236, - "normalized_score": 23.8979241745236 - }, - "bbh": { - "name": "BBH", - "value": 0.31816042712158116, - "normalized_score": 6.676263541946459 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.33279166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15600066489361702, - "normalized_score": 6.22229609929078 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.949960446462803 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam_bfloat16_066449b81749965ba3713217c5f924a1f96d19c9_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", - "sha": "066449b81749965ba3713217c5f924a1f96d19c9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.336426732183583, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2423015376977531, - "normalized_score": 24.230153769775306 - }, - "bbh": { - "name": "BBH", - "value": 0.3154080373582542, - "normalized_score": 6.449024830632406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.33279166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15475398936170212, - "normalized_score": 6.083776595744679 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9530341118411932 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam_bfloat16_c89911f8d376b2504dc7ef337d366ff13fd6fbc1_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", - "sha": "c89911f8d376b2504dc7ef337d366ff13fd6fbc1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.648469593189115, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24932069132124984, - "normalized_score": 24.932069132124983 - }, - "bbh": { - "name": "BBH", - "value": 0.3189717077702392, - "normalized_score": 6.900838556122601 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15608377659574468, - "normalized_score": 6.23153073286052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.619904502829789 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam_bfloat16_f0ec8638c05222a9f5b66ba4a283dc6535866cef_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", - "sha": "f0ec8638c05222a9f5b66ba4a283dc6535866cef", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.714071022336462, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2541667220752049, - "normalized_score": 25.41667220752049 - }, - "bbh": { - "name": "BBH", - "value": 0.31671883869615397, - "normalized_score": 6.330228534336109 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.32885416666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15799534574468085, - "normalized_score": 6.443927304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9335881625277723 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam_bfloat16_e5f701dc4c9011ac425ac3d8f43f0a0c5fc07485_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", - "sha": "e5f701dc4c9011ac425ac3d8f43f0a0c5fc07485", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.634084055658554, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24507418095098782, - "normalized_score": 24.507418095098785 - }, - "bbh": { - "name": "BBH", - "value": 0.3159533058861391, - "normalized_score": 6.636386948995733 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3301875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15608377659574468, - "normalized_score": 6.23153073286052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9826218900481373 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam_bfloat16_297d485cf519619f394f13f1cb4c18bb724fa587_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", - "sha": "297d485cf519619f394f13f1cb4c18bb724fa587", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.772250160016548, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25574032456105356, - "normalized_score": 25.574032456105357 - }, - "bbh": { - "name": "BBH", - "value": 0.31419826948787827, - "normalized_score": 6.317309144357895 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3315208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1574966755319149, - "normalized_score": 6.3885195035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.585209509005599 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam_bfloat16_338a87eaf84f8f843abfc765fa761f4aadfa650d_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", - "sha": "338a87eaf84f8f843abfc765fa761f4aadfa650d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.773024145558947, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26053648763059795, - "normalized_score": 26.053648763059794 - }, - "bbh": { - "name": "BBH", - "value": 0.3166968072745491, - "normalized_score": 6.435274271047841 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15766289893617022, - "normalized_score": 6.406988770685579 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9553102297737666 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam_bfloat16_dd17624113765a095ddb8908cb145f2b186e92d0_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", - "sha": "dd17624113765a095ddb8908cb145f2b186e92d0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.5699062251194, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25781371206177384, - "normalized_score": 25.781371206177386 - }, - "bbh": { - "name": "BBH", - "value": 0.31732037273750646, - "normalized_score": 6.3853505237711525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.32879166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1583277925531915, - "normalized_score": 6.4808658392434975 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.730292283324373 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam_bfloat16_4f0af6566d027fd47391c5ae107634ac53b4f37a_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", - "sha": "4f0af6566d027fd47391c5ae107634ac53b4f37a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.393492455729212, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23353369060758786, - "normalized_score": 23.353369060758787 - }, - "bbh": { - "name": "BBH", - "value": 0.3197619098572027, - "normalized_score": 6.402744874205392 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348994, - "normalized_score": 3.3557046979865834 - }, - "musr": { - "name": "MUSR", - "value": 0.32755208333333335, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1580784574468085, - "normalized_score": 6.453161938534278 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9391865870283091 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam_bfloat16_7e9475946e5e36c20e5e16ddbcda8a1ed66d68dd_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", - "sha": "7e9475946e5e36c20e5e16ddbcda8a1ed66d68dd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.844417926699584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24719743613611883, - "normalized_score": 24.719743613611882 - }, - "bbh": { - "name": "BBH", - "value": 0.32262707839652854, - "normalized_score": 6.901808055033277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.32621875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15375664893617022, - "normalized_score": 5.972960992907801 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9538609155694655 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam_bfloat16_c9dd97520558d2f9c363d8088ca940b2a22e78d5_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", - "sha": "c9dd97520558d2f9c363d8088ca940b2a22e78d5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.696682188954132, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2474223948013493, - "normalized_score": 24.742239480134934 - }, - "bbh": { - "name": "BBH", - "value": 0.32291208173140107, - "normalized_score": 7.002679365505716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.32748958333333333, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15392287234042554, - "normalized_score": 5.991430260047281 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6179762800252908 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam_bfloat16_105abeccbc545d8b3400a1f62c33196411580777_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", - "sha": "105abeccbc545d8b3400a1f62c33196411580777", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.753028845132317, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24027801788144343, - "normalized_score": 24.027801788144345 - }, - "bbh": { - "name": "BBH", - "value": 0.32453683161596314, - "normalized_score": 6.620615393798311 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.32621875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1573304521276596, - "normalized_score": 6.37005023640662 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9503727986258511 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam_bfloat16_1aa0cee350e6029e0d1f1af2337625e241ffa794_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", - "sha": "1aa0cee350e6029e0d1f1af2337625e241ffa794", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.5524251169059395, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23680611887569425, - "normalized_score": 23.680611887569427 - }, - "bbh": { - "name": "BBH", - "value": 0.3224293761524927, - "normalized_score": 6.7804484137642875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.33548958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15159574468085107, - "normalized_score": 5.7328605200945635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9838564283065735 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam_bfloat16_b50ee3302d29ee2fe2a0eba12b14b425ddd8f8af_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", - "sha": "b50ee3302d29ee2fe2a0eba12b14b425ddd8f8af", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.645766396524414, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23718068059415687, - "normalized_score": 23.718068059415685 - }, - "bbh": { - "name": "BBH", - "value": 0.32477052921998556, - "normalized_score": 7.007125824454593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3394270833333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1550033244680851, - "normalized_score": 6.111480496453899 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5996652304020564 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam_bfloat16_5dada6736554e6198d817258e32db4ba67598c6a_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", - "sha": "5dada6736554e6198d817258e32db4ba67598c6a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.580039598446345, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24992021170494289, - "normalized_score": 24.99202117049429 - }, - "bbh": { - "name": "BBH", - "value": 0.31806007750183346, - "normalized_score": 6.671898931566538 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3288229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15741356382978725, - "normalized_score": 6.37928486997636 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.727522292956337 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam_bfloat16_b82b883ed0c5ad5cc1156f69343b0a73e48216f0_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", - "sha": "b82b883ed0c5ad5cc1156f69343b0a73e48216f0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.682977767911112, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23810489501190177, - "normalized_score": 23.810489501190176 - }, - "bbh": { - "name": "BBH", - "value": 0.32421844512358233, - "normalized_score": 6.428288118900529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3328229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15724734042553193, - "normalized_score": 6.3608156028368805 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9244260639179844 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam_bfloat16_3506c4972cb7966146afcd51f7bfc85a6ad1af4a_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", - "sha": "3506c4972cb7966146afcd51f7bfc85a6ad1af4a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.668495371524835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2420765790325226, - "normalized_score": 24.20765790325226 - }, - "bbh": { - "name": "BBH", - "value": 0.3224798177796032, - "normalized_score": 6.992686975855793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3408229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14960106382978725, - "normalized_score": 5.511229314420804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.4388206268955834 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam_bfloat16_6dbdfc7167e8066b907a67bf271a3ece465c9126_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", - "sha": "6dbdfc7167e8066b907a67bf271a3ece465c9126", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.698046996488631, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23805502732749106, - "normalized_score": 23.805502732749105 - }, - "bbh": { - "name": "BBH", - "value": 0.32652003776870003, - "normalized_score": 7.221132822992211 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.34079166666666666, - "normalized_score": 1.698958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14985039893617022, - "normalized_score": 5.538933215130023 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-03", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5684267432703647 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam_bfloat16_6ca6763c8b83910834975eaed0a676a7818ed7f7_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", - "sha": "6ca6763c8b83910834975eaed0a676a7818ed7f7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.824147748385339, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25264298727376694, - "normalized_score": 25.264298727376698 - }, - "bbh": { - "name": "BBH", - "value": 0.3176911636441555, - "normalized_score": 6.537585206076343 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.33415625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15724734042553193, - "normalized_score": 6.3608156028368805 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9560694003572412 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam_bfloat16_7816a4be1e7fa2a70963b3551a516023d2ff497b_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", - "sha": "7816a4be1e7fa2a70963b3551a516023d2ff497b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.706086604401125, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24572356901909154, - "normalized_score": 24.572356901909156 - }, - "bbh": { - "name": "BBH", - "value": 0.316045450978746, - "normalized_score": 6.56672292356821 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.33015625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15716422872340424, - "normalized_score": 6.351580969267137 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9856434837220436 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam_bfloat16_90d15e820203f295c1b8d8989a1a84c1bc8aa33c_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", - "sha": "90d15e820203f295c1b8d8989a1a84c1bc8aa33c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.813940287125231, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2441998342176536, - "normalized_score": 24.41998342176536 - }, - "bbh": { - "name": "BBH", - "value": 0.3193544697854515, - "normalized_score": 6.932125973677143 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.33148958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1566655585106383, - "normalized_score": 6.296173167848698 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.955982707001288 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam_bfloat16_420c57e2f72e08a93f66f683e4cf6d2e6f271273_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", - "sha": "420c57e2f72e08a93f66f683e4cf6d2e6f271273", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.842463015004548, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26036139664977814, - "normalized_score": 26.036139664977817 - }, - "bbh": { - "name": "BBH", - "value": 0.31784656431310543, - "normalized_score": 6.4262889097800455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3288229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15674867021276595, - "normalized_score": 6.305407801418437 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9937085506127672 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam_bfloat16_7c26fd2ab4ca381fe40283e3a2654027c8d15f29_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", - "sha": "7c26fd2ab4ca381fe40283e3a2654027c8d15f29", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.58869267971069, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24899599728719796, - "normalized_score": 24.899599728719796 - }, - "bbh": { - "name": "BBH", - "value": 0.3172899997448431, - "normalized_score": 6.4779954188677 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3301875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15691489361702127, - "normalized_score": 6.323877068557918 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9328624737149237 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam_bfloat16_f59298bb65ecfe0cf66bc10244c72d54cce32283_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", - "sha": "f59298bb65ecfe0cf66bc10244c72d54cce32283", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.82811926779825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26036139664977814, - "normalized_score": 26.036139664977817 - }, - "bbh": { - "name": "BBH", - "value": 0.3149566664115098, - "normalized_score": 6.25191698749515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3341875, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15658244680851063, - "normalized_score": 6.286938534278959 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9555559480929502 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam_bfloat16_ec9d56ff27bb4fc040e8b74aa5d13393bb783d09_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", - "sha": "ec9d56ff27bb4fc040e8b74aa5d13393bb783d09", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.958391854528215, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2550410688085391, - "normalized_score": 25.50410688085391 - }, - "bbh": { - "name": "BBH", - "value": 0.3211026993947845, - "normalized_score": 6.714916509982989 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.32876041666666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15708111702127658, - "normalized_score": 6.342346335697398 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9798458957510294 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam_bfloat16_5c2e63ca2afe100d77356911c58ff31abe5cf274_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", - "sha": "5c2e63ca2afe100d77356911c58ff31abe5cf274", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.678680596214881, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24779695651981187, - "normalized_score": 24.779695651981186 - }, - "bbh": { - "name": "BBH", - "value": 0.3197773660515741, - "normalized_score": 6.875548169039305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.33145833333333335, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15866023936170212, - "normalized_score": 6.5178043735224565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9858942704409892 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam_bfloat16_d8cf7978e4bbee8b07ed7cc50137716a316df6e1_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", - "sha": "d8cf7978e4bbee8b07ed7cc50137716a316df6e1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.707475337922656, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24747226248576, - "normalized_score": 24.747226248576 - }, - "bbh": { - "name": "BBH", - "value": 0.32246983072126806, - "normalized_score": 7.08976945115946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.330125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15558510638297873, - "normalized_score": 6.176122931442081 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6879119412493844 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam_bfloat16_2b0966c028ebf7d08fb81eb972dead8625f8caf5_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", - "sha": "2b0966c028ebf7d08fb81eb972dead8625f8caf5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.811415481732978, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2590127528291599, - "normalized_score": 25.901275282915996 - }, - "bbh": { - "name": "BBH", - "value": 0.3185132309797721, - "normalized_score": 6.706365162503357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15857712765957446, - "normalized_score": 6.508569739952717 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5746445665887192 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam_bfloat16_acd21ede9d21f47e42bb6dbddc663190ed84115c_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", - "sha": "acd21ede9d21f47e42bb6dbddc663190ed84115c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.608040308738378, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23233464984020177, - "normalized_score": 23.233464984020177 - }, - "bbh": { - "name": "BBH", - "value": 0.3179474145066817, - "normalized_score": 6.3889454442355325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.32625, - "normalized_score": 1.0479166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15475398936170212, - "normalized_score": 6.083776595744679 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9484488489610159 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam_bfloat16_5997ff3f07583d3a8fda77b3aa2613f5a57aec7a_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", - "sha": "5997ff3f07583d3a8fda77b3aa2613f5a57aec7a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.376023721057838, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23151017079127825, - "normalized_score": 23.151017079127826 - }, - "bbh": { - "name": "BBH", - "value": 0.3259705145690442, - "normalized_score": 7.4891521174679925 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3383125, - "normalized_score": 0.9890625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15209441489361702, - "normalized_score": 5.788268321513002 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9495726767487782 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam_bfloat16_1ebb2a4f735168fc784c4f059ca616d89f0dc7f2_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", - "sha": "1ebb2a4f735168fc784c4f059ca616d89f0dc7f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.500614312695059, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2298368329366082, - "normalized_score": 22.983683293660818 - }, - "bbh": { - "name": "BBH", - "value": 0.33204616486918276, - "normalized_score": 8.34016877070463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.33288541666666666, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15674867021276595, - "normalized_score": 6.305407801418437 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-03", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.606487559792415 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam_bfloat16_a0422084edb424f6b97840b66a28dfb42ea159b0_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", - "sha": "a0422084edb424f6b97840b66a28dfb42ea159b0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.830243972560936, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24687274210206694, - "normalized_score": 24.687274210206695 - }, - "bbh": { - "name": "BBH", - "value": 0.3178544697854515, - "normalized_score": 6.5670715074462045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.33015625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1574966755319149, - "normalized_score": 6.3885195035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9237289026935195 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam_bfloat16_23d9c3e3c8991bbc2c3c300831fdcb360d4b3b64_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", - "sha": "23d9c3e3c8991bbc2c3c300831fdcb360d4b3b64", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.764548314144669, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2520434668900739, - "normalized_score": 25.20434668900739 - }, - "bbh": { - "name": "BBH", - "value": 0.3167822100533442, - "normalized_score": 6.6590467663473305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3328229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15757978723404256, - "normalized_score": 6.3977541371158395 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9535723370555079 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam_bfloat16_8e123c07a2773ab109d8f81b0f9b36fb28ad8f75_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", - "sha": "8e123c07a2773ab109d8f81b0f9b36fb28ad8f75", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.87411176143835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2665815591519391, - "normalized_score": 26.65815591519391 - }, - "bbh": { - "name": "BBH", - "value": 0.3190675981811982, - "normalized_score": 6.8009730056406115 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.32885416666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1566655585106383, - "normalized_score": 6.296173167848698 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6214702427172307 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam_bfloat16_ca25ec43238ad3b072850761241df89cafd041a9_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", - "sha": "ca25ec43238ad3b072850761241df89cafd041a9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.491580739378782, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24992021170494289, - "normalized_score": 24.99202117049429 - }, - "bbh": { - "name": "BBH", - "value": 0.31779941873624934, - "normalized_score": 6.560842435908914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15625, - "normalized_score": 6.249999999999999 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9777035716539286 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam_bfloat16_0a5353e020ead8035ef74fcd41057ccac07a0f0b_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", - "sha": "0a5353e020ead8035ef74fcd41057ccac07a0f0b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.496575969696766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24170201731406002, - "normalized_score": 24.170201731406003 - }, - "bbh": { - "name": "BBH", - "value": 0.3178391594145879, - "normalized_score": 6.524571656668756 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.33279166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1574966755319149, - "normalized_score": 6.3885195035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9748893197902357 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam_bfloat16_0ad95d884546d654dc1dae59291bc8c7eb130715_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", - "sha": "0ad95d884546d654dc1dae59291bc8c7eb130715", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.758022627216668, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2562401095759252, - "normalized_score": 25.62401095759252 - }, - "bbh": { - "name": "BBH", - "value": 0.31904280434381205, - "normalized_score": 6.739192109111529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15757978723404256, - "normalized_score": 6.3977541371158395 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.595170776368906 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam_bfloat16_856e9a81ea94b2d3d7b3ba66e2e15db4018fd770_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", - "sha": "856e9a81ea94b2d3d7b3ba66e2e15db4018fd770", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.538763276089521, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2408276705807258, - "normalized_score": 24.082767058072584 - }, - "bbh": { - "name": "BBH", - "value": 0.31647277641099675, - "normalized_score": 6.42055719700361 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3315208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1556682180851064, - "normalized_score": 6.18535756501182 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5812332726991494 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam_bfloat16_82d617dbd311ef70dad0c1286f2268f57f9f6326_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", - "sha": "82d617dbd311ef70dad0c1286f2268f57f9f6326", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.900913542849916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24812165055386376, - "normalized_score": 24.812165055386373 - }, - "bbh": { - "name": "BBH", - "value": 0.3204166266783764, - "normalized_score": 6.6874635150864 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3301875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15915890957446807, - "normalized_score": 6.573212174940895 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9418792589240153 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam_bfloat16_5d424327b110d4eafae4a4250092e0ff8eb474f8_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", - "sha": "5d424327b110d4eafae4a4250092e0ff8eb474f8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.872419066693536, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2544914161092568, - "normalized_score": 25.44914161092568 - }, - "bbh": { - "name": "BBH", - "value": 0.3185709286639082, - "normalized_score": 6.553899558301251 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.32885416666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15608377659574468, - "normalized_score": 6.23153073286052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.994687575413746 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam_bfloat16_38e2dc85b55c6489ac69c986426e1dc625298779_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", - "sha": "38e2dc85b55c6489ac69c986426e1dc625298779", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.662137140256275, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2519935992056632, - "normalized_score": 25.19935992056632 - }, - "bbh": { - "name": "BBH", - "value": 0.320368681472897, - "normalized_score": 6.583344002745696 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.32615625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15375664893617022, - "normalized_score": 5.972960992907801 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6287579769869538 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam_bfloat16_35c3951d1d1508e67968cee2599d017738add9e8_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", - "sha": "35c3951d1d1508e67968cee2599d017738add9e8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.613408611706003, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23146030310686755, - "normalized_score": 23.146030310686758 - }, - "bbh": { - "name": "BBH", - "value": 0.32128474090743103, - "normalized_score": 6.560212045244278 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.32221875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15824468085106383, - "normalized_score": 6.471631205673758 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9466656521995674 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam_bfloat16_70509db679e5a3b648b8e240599bfbd38d4dfdde_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", - "sha": "70509db679e5a3b648b8e240599bfbd38d4dfdde", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.688163880198053, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25149381419079153, - "normalized_score": 25.149381419079155 - }, - "bbh": { - "name": "BBH", - "value": 0.31867127828365593, - "normalized_score": 6.83074782881189 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.32888541666666665, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15392287234042554, - "normalized_score": 5.991430260047281 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9649867208676277 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam_bfloat16_13169c96ccb74161783784aee75d3debc92d3191_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", - "sha": "13169c96ccb74161783784aee75d3debc92d3191", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.510368922283475, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24719743613611883, - "normalized_score": 24.719743613611882 - }, - "bbh": { - "name": "BBH", - "value": 0.3213274785812292, - "normalized_score": 6.204970976213926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3261875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15882646276595744, - "normalized_score": 6.536273640661936 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9650369713069662 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam_bfloat16_b4960bf1fc8ae5cc21a88cff4632d12a6f38f578_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", - "sha": "b4960bf1fc8ae5cc21a88cff4632d12a6f38f578", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.696957299033557, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24599839536873275, - "normalized_score": 24.599839536873276 - }, - "bbh": { - "name": "BBH", - "value": 0.32337658694524307, - "normalized_score": 7.019682155967409 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.33021875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15334109042553193, - "normalized_score": 5.926787825059102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9756798711523569 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam_bfloat16_5c391a7ae4a7a50bae309ef408a774f2cc6c5f87_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", - "sha": "5c391a7ae4a7a50bae309ef408a774f2cc6c5f87", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.113734287630097, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25236816092412573, - "normalized_score": 25.236816092412575 - }, - "bbh": { - "name": "BBH", - "value": 0.3255638228201855, - "normalized_score": 7.0604011102157775 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.33679166666666666, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15309175531914893, - "normalized_score": 5.89908392434988 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6261828818601183 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam_bfloat16_d2a971d280f6df1042e11dbb7cfa45df44514096_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", - "sha": "d2a971d280f6df1042e11dbb7cfa45df44514096", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.472755907553406, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2264646692996804, - "normalized_score": 22.646466929968042 - }, - "bbh": { - "name": "BBH", - "value": 0.3252098558034601, - "normalized_score": 6.715595621810369 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.32615625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1568317819148936, - "normalized_score": 6.314642434988178 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9554874322349814 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam_bfloat16_c7f7a7e1650c751b29689ec2df185d4b2e5bab3a_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", - "sha": "c7f7a7e1650c751b29689ec2df185d4b2e5bab3a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.491778365975214, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23016152697066006, - "normalized_score": 23.01615269706601 - }, - "bbh": { - "name": "BBH", - "value": 0.3224479825736107, - "normalized_score": 6.98474035276211 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.34079166666666666, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15001662234042554, - "normalized_score": 5.557402482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9770739578099976 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam_bfloat16_ca502a15a6eba95542bc1e8b0a30b09497e7a5b0_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", - "sha": "ca502a15a6eba95542bc1e8b0a30b09497e7a5b0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.931808376696927, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25236816092412573, - "normalized_score": 25.236816092412575 - }, - "bbh": { - "name": "BBH", - "value": 0.3278027492189594, - "normalized_score": 7.363649162861492 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.33945833333333336, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15209441489361702, - "normalized_score": 5.788268321513002 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6191035921575376 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam_bfloat16_174ec47630d774ab7c5a4166535ec74147c642aa_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", - "sha": "174ec47630d774ab7c5a4166535ec74147c642aa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.664774636147854, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2657570801030156, - "normalized_score": 26.575708010301554 - }, - "bbh": { - "name": "BBH", - "value": 0.31752113645211816, - "normalized_score": 6.726276300474528 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3301875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1574966755319149, - "normalized_score": 6.3885195035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5672288780515016 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam_bfloat16_1b22458420a322788506fff16bf5d56721c8f2f6_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", - "sha": "1b22458420a322788506fff16bf5d56721c8f2f6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.6637155055011235, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2487211709375568, - "normalized_score": 24.87211709375568 - }, - "bbh": { - "name": "BBH", - "value": 0.3189091360416723, - "normalized_score": 6.708541946458948 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15949135638297873, - "normalized_score": 6.610150709219859 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9606349545204644 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam_bfloat16_b7fe3cbf6d09cc0a1ea9fe6fa34808ed72933c8a_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", - "sha": "b7fe3cbf6d09cc0a1ea9fe6fa34808ed72933c8a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.759729017494784, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2560151509106947, - "normalized_score": 25.601515091069473 - }, - "bbh": { - "name": "BBH", - "value": 0.3158776856286612, - "normalized_score": 6.253469424299401 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15616688829787234, - "normalized_score": 6.240765366430259 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9716211474827172 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam_bfloat16_933667537bf908dae0b1114e2bff815280fc6b83_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", - "sha": "933667537bf908dae0b1114e2bff815280fc6b83", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.566871862822331, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2498703440205322, - "normalized_score": 24.98703440205322 - }, - "bbh": { - "name": "BBH", - "value": 0.31561997255280577, - "normalized_score": 6.443718103679828 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3301875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15558510638297873, - "normalized_score": 6.176122931442081 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.635012914673809 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam_bfloat16_b2bfe72f4ad00468aabc2886b5960438b43cd84c_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", - "sha": "b2bfe72f4ad00468aabc2886b5960438b43cd84c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.522450873561554, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.249595517670891, - "normalized_score": 24.9595517670891 - }, - "bbh": { - "name": "BBH", - "value": 0.31774285416798703, - "normalized_score": 6.39987465305757 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.33148958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1566655585106383, - "normalized_score": 6.296173167848698 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.4377628452926612 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam_bfloat16_b53ec7094778d0d77cc37e267c18506977df2a95_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", - "sha": "b53ec7094778d0d77cc37e267c18506977df2a95", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.658751981428238, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25149381419079153, - "normalized_score": 25.149381419079155 - }, - "bbh": { - "name": "BBH", - "value": 0.3172338500122228, - "normalized_score": 6.482712194466828 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15533577127659576, - "normalized_score": 6.148419030732861 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9673478289923537 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep_bfloat16_21ae492ab6eccf22bf141d33b1928b57558aa26e_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", - "sha": "21ae492ab6eccf22bf141d33b1928b57558aa26e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.452521242968808, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22808813946993975, - "normalized_score": 22.808813946993972 - }, - "bbh": { - "name": "BBH", - "value": 0.3239538094779519, - "normalized_score": 6.085450245131953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.330125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17461768617021275, - "normalized_score": 8.290854018912528 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4998281694187698 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep_bfloat16_dfc638508a822295921d68aa11b3d7ae97d912c8_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", - "sha": "dfc638508a822295921d68aa11b3d7ae97d912c8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.715260794976504, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23478259905938464, - "normalized_score": 23.478259905938465 - }, - "bbh": { - "name": "BBH", - "value": 0.33076056644270485, - "normalized_score": 7.179764119844701 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.34088541666666666, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16954787234042554, - "normalized_score": 7.727541371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4430169200453364 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_bfloat16_5b0f281d0bfdc9723af05b90e3586b3a122d3f7e_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", - "sha": "5b0f281d0bfdc9723af05b90e3586b3a122d3f7e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.834821841142105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25259311958935626, - "normalized_score": 25.259311958935623 - }, - "bbh": { - "name": "BBH", - "value": 0.323809171214906, - "normalized_score": 6.757650565553466 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3528229166666667, - "normalized_score": 2.336197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15741356382978725, - "normalized_score": 6.37928486997636 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-31", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5272056484708256 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep_bfloat16_0565f9723b4d12663e25664b2b81c0111562c739_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", - "sha": "0565f9723b4d12663e25664b2b81c0111562c739", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.472893784749629, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24812165055386376, - "normalized_score": 24.812165055386373 - }, - "bbh": { - "name": "BBH", - "value": 0.31748404240871353, - "normalized_score": 5.959990748201868 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.34752083333333333, - "normalized_score": 1.9067708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15965757978723405, - "normalized_score": 6.628619976359339 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9262143872438419 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep_bfloat16_b546b909e307db7301cbf21aacdc3ed5a32dff55_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", - "sha": "b546b909e307db7301cbf21aacdc3ed5a32dff55", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.494108519452317, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25476624245889795, - "normalized_score": 25.476624245889795 - }, - "bbh": { - "name": "BBH", - "value": 0.3199073234678175, - "normalized_score": 6.1123436893783385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34348958333333335, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15616688829787234, - "normalized_score": 6.240765366430259 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.920317545171435 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_bfloat16_164c3419bbd4db8e8d0eac08b67c7863038dc6e8_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", - "sha": "164c3419bbd4db8e8d0eac08b67c7863038dc6e8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.453297176972131, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2423015376977531, - "normalized_score": 24.230153769775306 - }, - "bbh": { - "name": "BBH", - "value": 0.32193163799444524, - "normalized_score": 6.240268525979645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.35152083333333334, - "normalized_score": 1.9067708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15633311170212766, - "normalized_score": 6.259234633569739 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-31", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4310927512750664 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep_bfloat16_63d97aa17e6e7657aa46a39ad4b0a69456443db4_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", - "sha": "63d97aa17e6e7657aa46a39ad4b0a69456443db4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.592452067439491, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24927082363683917, - "normalized_score": 24.927082363683915 - }, - "bbh": { - "name": "BBH", - "value": 0.3190945593427599, - "normalized_score": 6.292637807025397 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.34752083333333333, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15915890957446807, - "normalized_score": 6.573212174940895 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.911805268810864 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep_bfloat16_29fe2c7fb9320dbce9444b094bc3f2df2aea7a95_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", - "sha": "29fe2c7fb9320dbce9444b094bc3f2df2aea7a95", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.702241458661472, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24779695651981187, - "normalized_score": 24.779695651981186 - }, - "bbh": { - "name": "BBH", - "value": 0.3218405915852565, - "normalized_score": 6.357407183573582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.35152083333333334, - "normalized_score": 2.1734375000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15558510638297873, - "normalized_score": 6.176122931442081 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9175874353422176 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5_bfloat16_60e9b653fa459a1c86b5b6753fbf53c65349c6c4_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", - "sha": "60e9b653fa459a1c86b5b6753fbf53c65349c6c4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.609556986596828, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2009856070781083, - "normalized_score": 20.098560707810833 - }, - "bbh": { - "name": "BBH", - "value": 0.31093810553451656, - "normalized_score": 4.746972667855022 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.33809375, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16722074468085107, - "normalized_score": 7.468971631205674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7130763775454447 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep_bfloat16_74c7d21925c30d90602995e9b33b6662fe547109_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", - "sha": "74c7d21925c30d90602995e9b33b6662fe547109", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.850186995920002, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21747186354428472, - "normalized_score": 21.74718635442847 - }, - "bbh": { - "name": "BBH", - "value": 0.3179879277889672, - "normalized_score": 5.832625611473314 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.33679166666666666, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16273271276595744, - "normalized_score": 6.970301418439714 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.615044419227721 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep_bfloat16_f56b42c1eb2ef4b675db004f5c723c62fc7483db_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", - "sha": "f56b42c1eb2ef4b675db004f5c723c62fc7483db", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.074425558693155, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2198699450790569, - "normalized_score": 21.986994507905692 - }, - "bbh": { - "name": "BBH", - "value": 0.32974820176156994, - "normalized_score": 7.669544389639071 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.35933333333333334, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1651429521276596, - "normalized_score": 7.238105791962175 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4376652440986477 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep_bfloat16_6dfaa85d4394d0427aa4159d5d81d523a3412da8_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", - "sha": "6dfaa85d4394d0427aa4159d5d81d523a3412da8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.440232615798167, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2077299343519639, - "normalized_score": 20.77299343519639 - }, - "bbh": { - "name": "BBH", - "value": 0.3275980298873716, - "normalized_score": 7.191287458251598 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3766354166666666, - "normalized_score": 4.979427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15866023936170212, - "normalized_score": 6.5178043735224565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.361085858952767 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5_bfloat16_49a3b8378a402807b855c0119e034098abe0d46f_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", - "sha": "49a3b8378a402807b855c0119e034098abe0d46f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.761335194180454, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20925366915340185, - "normalized_score": 20.925366915340184 - }, - "bbh": { - "name": "BBH", - "value": 0.3158179005969299, - "normalized_score": 6.35784176033773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.33669791666666665, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1622340425531915, - "normalized_score": 6.914893617021275 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6818854605225897 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep_bfloat16_7b66d2d83065a5a70131af7c1f4ef4697e3a4793_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", - "sha": "7b66d2d83065a5a70131af7c1f4ef4697e3a4793", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.502777310751012, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2156234347087949, - "normalized_score": 21.56234347087949 - }, - "bbh": { - "name": "BBH", - "value": 0.3100411318318588, - "normalized_score": 5.463607645078233 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3367291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15674867021276595, - "normalized_score": 6.305407801418437 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4621946194053217 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep_bfloat16_1549cc71c00753fab474efdcd3c8feec58aea2eb_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", - "sha": "1549cc71c00753fab474efdcd3c8feec58aea2eb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.894032740125938, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23805502732749106, - "normalized_score": 23.805502732749105 - }, - "bbh": { - "name": "BBH", - "value": 0.3199313632207049, - "normalized_score": 5.984021663044627 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3553645833333334, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15217752659574468, - "normalized_score": 5.7975029550827415 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.437731349494459 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep_bfloat16_cc8c819193645a52b75833dbb3ef9d5895afdb7e_False", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", - "sha": "cc8c819193645a52b75833dbb3ef9d5895afdb7e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.575560410304202, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21197644472222593, - "normalized_score": 21.197644472222592 - }, - "bbh": { - "name": "BBH", - "value": 0.32002953673668666, - "normalized_score": 5.343662111309169 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37127083333333327, - "normalized_score": 3.7421874999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1628158244680851, - "normalized_score": 6.979536052009454 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4582645673428911 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1_bfloat16_0193e5d0320c7810011a6d9574e8657706eac706_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", - "sha": "0193e5d0320c7810011a6d9574e8657706eac706", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 6.245597623653178, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20245947419513555, - "normalized_score": 20.245947419513556 - }, - "bbh": { - "name": "BBH", - "value": 0.326814314271471, - "normalized_score": 6.136573850240249 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3209166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13297872340425532, - "normalized_score": 3.664302600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.961642524321046 - } - }, - { - "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1_bfloat16_9d73894bc39e9d994cdd154ef006eeb9a1b06b1b_True", - "model": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", - "sha": "9d73894bc39e9d994cdd154ef006eeb9a1b06b1b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 6.644463688191298, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1964144026737944, - "normalized_score": 19.64144026737944 - }, - "bbh": { - "name": "BBH", - "value": 0.32925816453885065, - "normalized_score": 6.524721998388398 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.32615625, - "normalized_score": 1.8028645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13372672872340424, - "normalized_score": 3.7474143026004705 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 0, - "base_model": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9604210151237631 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam_bfloat16_0ff5d1179bf3a8a2b83654ea465fa826f2be163a_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", - "sha": "0ff5d1179bf3a8a2b83654ea465fa826f2be163a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.889615512040611, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25324250765746, - "normalized_score": 25.324250765746 - }, - "bbh": { - "name": "BBH", - "value": 0.3140431891367934, - "normalized_score": 6.140998671919301 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.33145833333333335, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15658244680851063, - "normalized_score": 6.286938534278959 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.703989511017824 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam_bfloat16_d3aa726a59441183996a5b1307d98cd22c5a7c41_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", - "sha": "d3aa726a59441183996a5b1307d98cd22c5a7c41", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.889282686740077, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26695612087040166, - "normalized_score": 26.695612087040164 - }, - "bbh": { - "name": "BBH", - "value": 0.3188575312560274, - "normalized_score": 6.642106052466649 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.32879166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15625, - "normalized_score": 6.249999999999999 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.759286387006712 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam_bfloat16_413e2bcce19a1674df62dea89d2884158f38e0a1_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", - "sha": "413e2bcce19a1674df62dea89d2884158f38e0a1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.503528444281592, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24807178286945303, - "normalized_score": 24.807178286945305 - }, - "bbh": { - "name": "BBH", - "value": 0.32608064671010917, - "normalized_score": 6.943810696272422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3368229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15649933510638298, - "normalized_score": 6.277703900709218 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9443681211187519 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam_bfloat16_7a7b741efe8ec7601b97d3c81a17a51186db4d1f_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", - "sha": "7a7b741efe8ec7601b97d3c81a17a51186db4d1f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.695125090795082, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23832985367713222, - "normalized_score": 23.83298536771322 - }, - "bbh": { - "name": "BBH", - "value": 0.32184656431310543, - "normalized_score": 7.149861894529503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3341875, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15034906914893617, - "normalized_score": 5.594341016548462 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9402523468511222 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam_bfloat16_b220068434876242a9d403157b213b495be060ce_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", - "sha": "b220068434876242a9d403157b213b495be060ce", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.64886621890572, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24714756845170813, - "normalized_score": 24.71475684517081 - }, - "bbh": { - "name": "BBH", - "value": 0.32244323308961736, - "normalized_score": 6.96901565344555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.33276041666666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15334109042553193, - "normalized_score": 5.926787825059102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6105590892603854 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam_bfloat16_2606b30c631db1d04fa00c5e070c6ab4a63da9d5_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", - "sha": "2606b30c631db1d04fa00c5e070c6ab4a63da9d5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.460153325587554, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24474948691693596, - "normalized_score": 24.474948691693598 - }, - "bbh": { - "name": "BBH", - "value": 0.3181429193838813, - "normalized_score": 6.629315516160802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15649933510638298, - "normalized_score": 6.277703900709218 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9978701848610899 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam_bfloat16_69e9ffab2fcc5f6ea1a85a016392b9188795e897_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", - "sha": "69e9ffab2fcc5f6ea1a85a016392b9188795e897", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.630732587028278, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2551408041773605, - "normalized_score": 25.51408041773605 - }, - "bbh": { - "name": "BBH", - "value": 0.3194064593640778, - "normalized_score": 6.682421807383531 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.32615625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1566655585106383, - "normalized_score": 6.296173167848698 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.478217700979221 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam_bfloat16_2bde9be8d3d84bea9a411f0d6d13473231cf95c8_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", - "sha": "2bde9be8d3d84bea9a411f0d6d13473231cf95c8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.691670985905742, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25379216035674235, - "normalized_score": 25.379216035674233 - }, - "bbh": { - "name": "BBH", - "value": 0.31530652457997205, - "normalized_score": 6.35458053541051 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.326125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1583277925531915, - "normalized_score": 6.4808658392434975 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-15", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6883948898981882 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam_bfloat16_76b620a990201a95b5deec1ba40f9b261838313e_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", - "sha": "76b620a990201a95b5deec1ba40f9b261838313e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.493393961856238, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24022815019703275, - "normalized_score": 24.022815019703277 - }, - "bbh": { - "name": "BBH", - "value": 0.3168335157841944, - "normalized_score": 6.505902199540394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.33279166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1568317819148936, - "normalized_score": 6.314642434988178 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9850000355205352 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam_bfloat16_e0b144725edee45dd354686f05120eb77bf6a24c_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", - "sha": "e0b144725edee45dd354686f05120eb77bf6a24c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.718986668912689, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24839647690350491, - "normalized_score": 24.83964769035049 - }, - "bbh": { - "name": "BBH", - "value": 0.3210570160312575, - "normalized_score": 6.769462425761781 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3288229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1573304521276596, - "normalized_score": 6.37005023640662 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9684952369307005 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam_bfloat16_87b4f03642dfa89ad2a131cfd16c9d9de701a3e0_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", - "sha": "87b4f03642dfa89ad2a131cfd16c9d9de701a3e0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.865443795694458, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25781371206177384, - "normalized_score": 25.781371206177386 - }, - "bbh": { - "name": "BBH", - "value": 0.32030958605054793, - "normalized_score": 6.7976270630017614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.32885416666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1583277925531915, - "normalized_score": 6.4808658392434975 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6480123216517317 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam_bfloat16_d87a8bb4c6aeb88b3594f56e180df574092b5a1c_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", - "sha": "d87a8bb4c6aeb88b3594f56e180df574092b5a1c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.62746022008641, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23163539408768735, - "normalized_score": 23.163539408768735 - }, - "bbh": { - "name": "BBH", - "value": 0.3258499805340021, - "normalized_score": 6.865286941551371 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.322125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15799534574468085, - "normalized_score": 6.443927304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4718979698729255 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam_bfloat16_a9658d65542a379073a29bd6a74e91ac7a016e24_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", - "sha": "a9658d65542a379073a29bd6a74e91ac7a016e24", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.573179197494986, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23598163982677073, - "normalized_score": 23.59816398267707 - }, - "bbh": { - "name": "BBH", - "value": 0.3225125170893353, - "normalized_score": 7.100430208613126 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.32221875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1595744680851064, - "normalized_score": 6.619385342789598 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6979621580424176 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam_bfloat16_297178a89709f6f072b473177f7cd6dcc3fc9240_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", - "sha": "297178a89709f6f072b473177f7cd6dcc3fc9240", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 6.91953175659681, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23370878158840763, - "normalized_score": 23.370878158840764 - }, - "bbh": { - "name": "BBH", - "value": 0.3132229900705577, - "normalized_score": 5.926862745098039 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3235208333333333, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15325797872340424, - "normalized_score": 5.917553191489359 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0017297185291754 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam_bfloat16_2dd99da9b5bae43985f64a70ae44cf165f70129f_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", - "sha": "2dd99da9b5bae43985f64a70ae44cf165f70129f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.075905010851175, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25693936532843964, - "normalized_score": 25.693936532843964 - }, - "bbh": { - "name": "BBH", - "value": 0.32760017293049276, - "normalized_score": 7.35619576137537 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.3155833333333333, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15649933510638298, - "normalized_score": 6.277703900709218 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.990338277414754 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam_bfloat16_33ff7ce2813289b10c8e871edadbd345e0107fcf_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", - "sha": "33ff7ce2813289b10c8e871edadbd345e0107fcf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.64297366620885, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24599839536873275, - "normalized_score": 24.599839536873276 - }, - "bbh": { - "name": "BBH", - "value": 0.32674094707635526, - "normalized_score": 7.139829635632065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3209166666666667, - "normalized_score": 1.3145833333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15433843085106383, - "normalized_score": 6.03760342789598 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.6055028700638796 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam_bfloat16_acaeef2bbe0c2196192e3ac91a9b98650ecc5e57_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", - "sha": "acaeef2bbe0c2196192e3ac91a9b98650ecc5e57", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.97015103471792, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2529178136234081, - "normalized_score": 25.291781362340814 - }, - "bbh": { - "name": "BBH", - "value": 0.32292563083414066, - "normalized_score": 6.931145581520281 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3195208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15965757978723405, - "normalized_score": 6.628619976359339 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0747653244281923 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam_bfloat16_dc29c77fe34684723cc4d8e26e4f21ba6c6d1602_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", - "sha": "dc29c77fe34684723cc4d8e26e4f21ba6c6d1602", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.9611230480624675, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25046986440422525, - "normalized_score": 25.046986440422522 - }, - "bbh": { - "name": "BBH", - "value": 0.3255735108237258, - "normalized_score": 7.1273111588623275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.3194895833333333, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15990691489361702, - "normalized_score": 6.656323877068558 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.008058398895298 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam_bfloat16_24edabcf18f0669900661f08ca79a165dfd94916_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", - "sha": "24edabcf18f0669900661f08ca79a165dfd94916", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.740913588485586, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2387044153955948, - "normalized_score": 23.87044153955948 - }, - "bbh": { - "name": "BBH", - "value": 0.3258394284267221, - "normalized_score": 7.222719655594354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.31685416666666666, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1589095744680851, - "normalized_score": 6.545508274231676 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-15", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6343527793020924 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam_bfloat16_70e0eb9864c46bd73f24c774be412b6c38737118_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", - "sha": "70e0eb9864c46bd73f24c774be412b6c38737118", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.081986081140814, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25324250765746, - "normalized_score": 25.324250765746 - }, - "bbh": { - "name": "BBH", - "value": 0.32182747858122923, - "normalized_score": 6.663952457695408 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.32085416666666666, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15932513297872342, - "normalized_score": 6.591681442080379 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-25", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.115253927209164 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam_bfloat16_56cd57dddcf1408ccea18f3a796455aae3c4abc8_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", - "sha": "56cd57dddcf1408ccea18f3a796455aae3c4abc8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.095071949326913, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24562383365027018, - "normalized_score": 24.562383365027017 - }, - "bbh": { - "name": "BBH", - "value": 0.3299192088381941, - "normalized_score": 7.732770540484078 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.318125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16015625, - "normalized_score": 6.684027777777778 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9417871309922725 - } - }, - { - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam_bfloat16_e6918f39615c9ccb51f5e8af19baa92f570ea244_True", - "model": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", - "sha": "e6918f39615c9ccb51f5e8af19baa92f570ea244", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.974924607629624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24225167001334236, - "normalized_score": 24.22516700133424 - }, - "bbh": { - "name": "BBH", - "value": 0.32712145602920534, - "normalized_score": 7.20213873220521 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.318125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15949135638297873, - "normalized_score": 6.610150709219859 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5068458858938178 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam_bfloat16_77800c2d6a4a8440b403b61bd8b80bfbd099b6c2_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", - "sha": "77800c2d6a4a8440b403b61bd8b80bfbd099b6c2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.328387378313979, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2573892826589006, - "normalized_score": 25.73892826589006 - }, - "bbh": { - "name": "BBH", - "value": 0.3279091360416723, - "normalized_score": 7.733923210075507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.31685416666666666, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16505984042553193, - "normalized_score": 7.228871158392436 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.61252824106982 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam_bfloat16_988e4c6867133c584f00c130e47dc6bd841019ca_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", - "sha": "988e4c6867133c584f00c130e47dc6bd841019ca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.595287891585235, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3072481017034801, - "normalized_score": 30.724810170348007 - }, - "bbh": { - "name": "BBH", - "value": 0.32638442794247285, - "normalized_score": 6.747260572417705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.31564583333333335, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1624002659574468, - "normalized_score": 6.933362884160756 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.4504781313625785 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam_bfloat16_da7a999d5e5aa3ed84fb5e13c444416c7d6416c4_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", - "sha": "da7a999d5e5aa3ed84fb5e13c444416c7d6416c4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.800520466258392, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25509093649294984, - "normalized_score": 25.509093649294982 - }, - "bbh": { - "name": "BBH", - "value": 0.3242353334886223, - "normalized_score": 7.213578356761274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.31825, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15741356382978725, - "normalized_score": 6.37928486997636 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5367821997177236 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam_bfloat16_a67e92b345a9088967f1e55d7c2dd06a1f0a73b1_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", - "sha": "a67e92b345a9088967f1e55d7c2dd06a1f0a73b1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.273392756694859, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26358395723347383, - "normalized_score": 26.358395723347385 - }, - "bbh": { - "name": "BBH", - "value": 0.3198054258965539, - "normalized_score": 6.900346643984839 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.32615625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15857712765957446, - "normalized_score": 6.508569739952717 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5957421944206596 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam_bfloat16_946496c22b300bdbce3406e3f799af781484e5f5_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", - "sha": "946496c22b300bdbce3406e3f799af781484e5f5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.058958298337019, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23228478215579107, - "normalized_score": 23.228478215579106 - }, - "bbh": { - "name": "BBH", - "value": 0.3254731912466387, - "normalized_score": 7.306987942817919 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.31688541666666664, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16115359042553193, - "normalized_score": 6.794843380614658 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.032004910729791 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam_bfloat16_93dd6319a0506da6e4691e74b01c5a55edad95fd_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", - "sha": "93dd6319a0506da6e4691e74b01c5a55edad95fd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.289618635016649, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24137732328000816, - "normalized_score": 24.137732328000816 - }, - "bbh": { - "name": "BBH", - "value": 0.3314225693635648, - "normalized_score": 7.390762452621842 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.33415625, - "normalized_score": 2.602864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15317486702127658, - "normalized_score": 5.908318557919619 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9566755642062155 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam_bfloat16_a5e33e9e6aef256f83a08a89ebefd485070c99e2_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", - "sha": "a5e33e9e6aef256f83a08a89ebefd485070c99e2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.103245031991229, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2677805999193252, - "normalized_score": 26.778059991932516 - }, - "bbh": { - "name": "BBH", - "value": 0.3361518077587983, - "normalized_score": 8.111779105858478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.33815625, - "normalized_score": 1.8028645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15608377659574468, - "normalized_score": 6.23153073286052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.489902248473601 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam_bfloat16_f357af91864d488236707089ad7de042b2916f49_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", - "sha": "f357af91864d488236707089ad7de042b2916f49", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.061310556795855, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25606501859510544, - "normalized_score": 25.60650185951054 - }, - "bbh": { - "name": "BBH", - "value": 0.3231121828613069, - "normalized_score": 7.0010276210941 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.31955208333333335, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1589095744680851, - "normalized_score": 6.545508274231676 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.5261960014175653 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam_bfloat16_cde396f65f5126dd17c25a7ca1f1749df260883c_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", - "sha": "cde396f65f5126dd17c25a7ca1f1749df260883c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.09865538559358, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2639086512675257, - "normalized_score": 26.390865126752573 - }, - "bbh": { - "name": "BBH", - "value": 0.3257435380157632, - "normalized_score": 7.133620064464142 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.32085416666666666, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15866023936170212, - "normalized_score": 6.5178043735224565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-15", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6548160816235464 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam_bfloat16_124bc1a305f8ed8e3dca3eeaa792af32a6923a9b_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", - "sha": "124bc1a305f8ed8e3dca3eeaa792af32a6923a9b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.092380138165963, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2517686405404327, - "normalized_score": 25.17686405404327 - }, - "bbh": { - "name": "BBH", - "value": 0.3213578303108222, - "normalized_score": 7.061898931566539 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.31688541666666664, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1584940159574468, - "normalized_score": 6.499335106382978 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9939143395959069 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam_bfloat16_c4a122c84aa40921a419b2f40c1e031cc4bc74a8_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", - "sha": "c4a122c84aa40921a419b2f40c1e031cc4bc74a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 8.02099509740262, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24382527249919106, - "normalized_score": 24.382527249919107 - }, - "bbh": { - "name": "BBH", - "value": 0.3266053460297184, - "normalized_score": 7.42829155101919 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.31955208333333335, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15541888297872342, - "normalized_score": 6.157653664302601 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9537584810036167 - } - }, - { - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam_bfloat16_edc6ae10fa40e12f04d5f0ccc579adf8df14bb2b_True", - "model": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", - "sha": "edc6ae10fa40e12f04d5f0ccc579adf8df14bb2b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.856695293795835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24654804806801509, - "normalized_score": 24.654804806801508 - }, - "bbh": { - "name": "BBH", - "value": 0.32458923603023143, - "normalized_score": 7.086835437371295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.31821875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15633311170212766, - "normalized_score": 6.259234633569739 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5479838953154563 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam_bfloat16_14348fe846810b297fc960df76650eb786725e0b_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", - "sha": "14348fe846810b297fc960df76650eb786725e0b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.047077683691153, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2505695997730466, - "normalized_score": 25.056959977304665 - }, - "bbh": { - "name": "BBH", - "value": 0.32614538576285174, - "normalized_score": 6.819112125824454 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.33818750000000003, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15217752659574468, - "normalized_score": 5.7975029550827415 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6532249263422252 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam_bfloat16_1451c4d82e68073eaeff5f3bf0bc5cc27d70c5e0_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", - "sha": "1451c4d82e68073eaeff5f3bf0bc5cc27d70c5e0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.30787971874518, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24567370133468086, - "normalized_score": 24.56737013346809 - }, - "bbh": { - "name": "BBH", - "value": 0.3179765517720094, - "normalized_score": 6.6221693974393405 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3315208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15658244680851063, - "normalized_score": 6.286938534278959 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6946543424159073 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam_bfloat16_a12da9d1ddf0c4347cee214f43ac448a9390929e_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", - "sha": "a12da9d1ddf0c4347cee214f43ac448a9390929e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.891216810785522, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24539887498503968, - "normalized_score": 24.539887498503965 - }, - "bbh": { - "name": "BBH", - "value": 0.32157618750132033, - "normalized_score": 6.5665358731011425 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.33818750000000003, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1544215425531915, - "normalized_score": 6.046838061465721 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.635614455627846 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam_bfloat16_e747ce9b85803323ba7176cfc76ccbdaf0648cf5_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", - "sha": "e747ce9b85803323ba7176cfc76ccbdaf0648cf5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.408853983144998, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2341830786756916, - "normalized_score": 23.418307867569162 - }, - "bbh": { - "name": "BBH", - "value": 0.3189252460411593, - "normalized_score": 6.633772569910765 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.33015625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15799534574468085, - "normalized_score": 6.443927304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.3537206927444156 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam_bfloat16_330a386eae0d8993e329ef744a0e16c5b6d5853f_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", - "sha": "330a386eae0d8993e329ef744a0e16c5b6d5853f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.59126646499979, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23196008812173918, - "normalized_score": 23.196008812173922 - }, - "bbh": { - "name": "BBH", - "value": 0.3233548545784329, - "normalized_score": 7.674537111648312 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.33688541666666666, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15425531914893617, - "normalized_score": 6.02836879432624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6884974551073353 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam_bfloat16_18a06ad9a792b24dfa889fb8bc1acb657c10285a_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", - "sha": "18a06ad9a792b24dfa889fb8bc1acb657c10285a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.390617436527434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24175188499847072, - "normalized_score": 24.175188499847074 - }, - "bbh": { - "name": "BBH", - "value": 0.3175499101875348, - "normalized_score": 6.547597964604411 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3288229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15799534574468085, - "normalized_score": 6.443927304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.1823138872922259 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam_bfloat16_4ded3d9ed25196a691d5cab4e2275f27f024caf8_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", - "sha": "4ded3d9ed25196a691d5cab4e2275f27f024caf8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 7.7092768708900685, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24932069132124984, - "normalized_score": 24.932069132124983 - }, - "bbh": { - "name": "BBH", - "value": 0.3196623899087389, - "normalized_score": 6.797543498373474 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.33148958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15708111702127658, - "normalized_score": 6.342346335697398 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9849166918543725 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam_bfloat16_7938337efded76eb32d6adf658eeaac767186de8_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", - "sha": "7938337efded76eb32d6adf658eeaac767186de8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.48846250817605, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2520434668900739, - "normalized_score": 25.20434668900739 - }, - "bbh": { - "name": "BBH", - "value": 0.3197552188491219, - "normalized_score": 6.479625973677142 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3261875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15508643617021275, - "normalized_score": 6.120715130023639 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-15", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7511048688142228 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam_bfloat16_ef86bce4016fdc0974b1788c7ceebe00fab85815_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", - "sha": "ef86bce4016fdc0974b1788c7ceebe00fab85815", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.137448792080777, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25803867072700437, - "normalized_score": 25.803867072700434 - }, - "bbh": { - "name": "BBH", - "value": 0.3248229336342538, - "normalized_score": 7.112517981317336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.34215625, - "normalized_score": 1.8028645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15392287234042554, - "normalized_score": 5.991430260047281 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.599250543749703 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam_bfloat16_f004676afae0ef729a983ffdd66c0efadb51ae08_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", - "sha": "f004676afae0ef729a983ffdd66c0efadb51ae08", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.4529439055513045, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23196008812173918, - "normalized_score": 23.196008812173922 - }, - "bbh": { - "name": "BBH", - "value": 0.326545450978746, - "normalized_score": 7.3733460172501255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27097315436241615, - "normalized_score": 2.796420581655486 - }, - "musr": { - "name": "MUSR", - "value": 0.33948958333333334, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15367353723404256, - "normalized_score": 5.963726359338062 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6748997032943111 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam_bfloat16_2cc3826051afe999f0f91fb138b949c1b7d4d04b_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", - "sha": "2cc3826051afe999f0f91fb138b949c1b7d4d04b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.944784882371251, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2487710386219675, - "normalized_score": 24.877103862196748 - }, - "bbh": { - "name": "BBH", - "value": 0.3272739110084265, - "normalized_score": 7.574128764138837 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.33415625, - "normalized_score": 1.8028645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15309175531914893, - "normalized_score": 5.89908392434988 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7743450616304828 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam_bfloat16_5851868459ddc86358bfdcfd65a811e2127993cd_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", - "sha": "5851868459ddc86358bfdcfd65a811e2127993cd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.6880135592533145, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25236816092412573, - "normalized_score": 25.236816092412575 - }, - "bbh": { - "name": "BBH", - "value": 0.3129690310926447, - "normalized_score": 6.263411227504701 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.32885416666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15641622340425532, - "normalized_score": 6.268469267139479 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7628564670187923 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam_bfloat16_6a2bc27b8052769f882501b7403afe5ae5240548_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", - "sha": "6a2bc27b8052769f882501b7403afe5ae5240548", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.816323890967756, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2513940788219702, - "normalized_score": 25.139407882197013 - }, - "bbh": { - "name": "BBH", - "value": 0.322095658026178, - "normalized_score": 6.770439236577431 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.33148958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15383976063829788, - "normalized_score": 5.982195626477541 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6180608379538362 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam_bfloat16_df437c8128c3d99f6d9d0cffef573b9f0d3fd458_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", - "sha": "df437c8128c3d99f6d9d0cffef573b9f0d3fd458", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.460108761190238, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24567370133468086, - "normalized_score": 24.56737013346809 - }, - "bbh": { - "name": "BBH", - "value": 0.3180087717709833, - "normalized_score": 6.636595785955174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15724734042553193, - "normalized_score": 6.3608156028368805 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.345510946531508 - } - }, - { - "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam_bfloat16_62a07f38868329115db25dd196c5aa3f03b27fe3_True", - "model": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", - "sha": "62a07f38868329115db25dd196c5aa3f03b27fe3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.877893571937886, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26363382491788456, - "normalized_score": 26.363382491788457 - }, - "bbh": { - "name": "BBH", - "value": 0.31806866682195567, - "normalized_score": 6.534335437371297 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3235208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15741356382978725, - "normalized_score": 6.37928486997636 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.8309944741150626 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const_bfloat16_86c3f69c05edb7cd30617129825f9c0f1b6cfd47_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", - "sha": "86c3f69c05edb7cd30617129825f9c0f1b6cfd47", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.151029068055777, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24829674153468353, - "normalized_score": 24.829674153468353 - }, - "bbh": { - "name": "BBH", - "value": 0.3174312444218736, - "normalized_score": 6.10466259440003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.33279166666666665, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1558344414893617, - "normalized_score": 6.2038268321513 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-27", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8767476863718147 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam_bfloat16_98a1ff10ebf69d5c0a83820fbe962777a7a36295_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", - "sha": "98a1ff10ebf69d5c0a83820fbe962777a7a36295", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.018805167387615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2517686405404327, - "normalized_score": 25.17686405404327 - }, - "bbh": { - "name": "BBH", - "value": 0.3218020653711833, - "normalized_score": 6.86099852269675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.32348958333333333, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15949135638297873, - "normalized_score": 6.610150709219859 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6170922607453528 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const_bfloat16_40a33af5a65a4821f73a092060c0fbc26b71de76_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", - "sha": "40a33af5a65a4821f73a092060c0fbc26b71de76", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.070117533212565, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25361706937592254, - "normalized_score": 25.361706937592253 - }, - "bbh": { - "name": "BBH", - "value": 0.3234331515135053, - "normalized_score": 7.109440788491956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.32355208333333335, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15965757978723405, - "normalized_score": 6.628619976359339 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.7989427864939285 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const_bfloat16_797fba63d80f5d798b7bfaff04460ab5d3b02190_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", - "sha": "797fba63d80f5d798b7bfaff04460ab5d3b02190", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.079611266480615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24479935460134664, - "normalized_score": 24.479935460134662 - }, - "bbh": { - "name": "BBH", - "value": 0.32395300683134437, - "normalized_score": 6.97515616139911 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.32485416666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15866023936170212, - "normalized_score": 6.5178043735224565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8346728545974762 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const_bfloat16_1adc557891a1a092249e7ecdd4500dc3fdb02e83_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", - "sha": "1adc557891a1a092249e7ecdd4500dc3fdb02e83", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.784158197402131, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25046986440422525, - "normalized_score": 25.046986440422522 - }, - "bbh": { - "name": "BBH", - "value": 0.322699453909483, - "normalized_score": 6.852004431909751 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3209166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1589095744680851, - "normalized_score": 6.545508274231676 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8344717513563957 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam_bfloat16_3eb6d75040e859b5ded8987a4d08d168110c2948_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", - "sha": "3eb6d75040e859b5ded8987a4d08d168110c2948", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.039735734618104, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24719743613611883, - "normalized_score": 24.719743613611882 - }, - "bbh": { - "name": "BBH", - "value": 0.325505796038594, - "normalized_score": 7.2279757961023074 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.32079166666666664, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15866023936170212, - "normalized_score": 6.5178043735224565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.6581914337571315 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const_bfloat16_ee1bdbc1e3d8253a8b0e5993d8a878d5c41f2808_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", - "sha": "ee1bdbc1e3d8253a8b0e5993d8a878d5c41f2808", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.004444649280561, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24165214962964932, - "normalized_score": 24.16521496296493 - }, - "bbh": { - "name": "BBH", - "value": 0.3255889369754366, - "normalized_score": 7.343317068075329 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.32745833333333335, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15625, - "normalized_score": 6.249999999999999 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.8056727355826006 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const_bfloat16_d2f49ff695a41b6c443b1a211f8c5698eed18e33_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", - "sha": "d2f49ff695a41b6c443b1a211f8c5698eed18e33", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.231694304691628, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2526928549581776, - "normalized_score": 25.269285495817762 - }, - "bbh": { - "name": "BBH", - "value": 0.32354099176995715, - "normalized_score": 7.24175202196556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.32348958333333333, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15799534574468085, - "normalized_score": 6.443927304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.9629762658816848 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam_bfloat16_1935f4c3d5625e9d56de5505323bc705019ce9ae_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", - "sha": "1935f4c3d5625e9d56de5505323bc705019ce9ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.71371672694166, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26685638550158025, - "normalized_score": 26.685638550158025 - }, - "bbh": { - "name": "BBH", - "value": 0.3313735254746672, - "normalized_score": 7.834260900707316 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3168229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16339760638297873, - "normalized_score": 7.0441784869976365 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.5718164174484692 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const_bfloat16_b60f933f70c85d41946b47a3b86150eb4a36e933_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", - "sha": "b60f933f70c85d41946b47a3b86150eb4a36e933", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.993052881905456, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.270228549138508, - "normalized_score": 27.022854913850807 - }, - "bbh": { - "name": "BBH", - "value": 0.3299802970903615, - "normalized_score": 7.692240203539559 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.32079166666666664, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1634807180851064, - "normalized_score": 7.053413120567376 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8492316769292112 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const_bfloat16_b75e64467b66f304a1f834f936e17de86950428f_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", - "sha": "b75e64467b66f304a1f834f936e17de86950428f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.292943161631719, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24802191518504235, - "normalized_score": 24.802191518504237 - }, - "bbh": { - "name": "BBH", - "value": 0.33086196042215565, - "normalized_score": 7.776385232936402 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3208229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16489361702127658, - "normalized_score": 7.210401891252953 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8293594279742648 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const_bfloat16_32887d1f909914b5c73ae91cc93c61e7b11b8c0a_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", - "sha": "32887d1f909914b5c73ae91cc93c61e7b11b8c0a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.721617990921317, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26223531341285566, - "normalized_score": 26.223531341285565 - }, - "bbh": { - "name": "BBH", - "value": 0.3281993681712964, - "normalized_score": 7.655186826633241 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.322125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16339760638297873, - "normalized_score": 7.0441784869976365 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8032189364040347 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const_bfloat16_167a76f8c3787035a2304395d51b9eeb3cadfcf3_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", - "sha": "167a76f8c3787035a2304395d51b9eeb3cadfcf3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.601146924480913, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2608611816646498, - "normalized_score": 26.086118166464978 - }, - "bbh": { - "name": "BBH", - "value": 0.32980236442597805, - "normalized_score": 7.67033388545677 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.31679166666666664, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1651429521276596, - "normalized_score": 7.238105791962175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8291183283913374 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam_bfloat16_4e7e89a1964d83b08d23f8ed435921ab56bd8269_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", - "sha": "4e7e89a1964d83b08d23f8ed435921ab56bd8269", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.579605529729827, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2930347034756668, - "normalized_score": 29.30347034756668 - }, - "bbh": { - "name": "BBH", - "value": 0.3219547893625387, - "normalized_score": 6.099203002357716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3115833333333333, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1590757978723404, - "normalized_score": 6.563977541371156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.3846643617962733 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const_bfloat16_4a601df8da78c41435239c1521526bbf2c7aabb3_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", - "sha": "4a601df8da78c41435239c1521526bbf2c7aabb3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.853042881324415, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28813880503730105, - "normalized_score": 28.8138805037301 - }, - "bbh": { - "name": "BBH", - "value": 0.32553831509236264, - "normalized_score": 6.454270227116723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.31024999999999997, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15816156914893617, - "normalized_score": 6.4623965721040175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-08", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.653186297134859 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const_bfloat16_37188bcb5c994b1f3c5b45d425945ff46326391d_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", - "sha": "37188bcb5c994b1f3c5b45d425945ff46326391d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.001725127164766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2887383254209941, - "normalized_score": 28.873832542099404 - }, - "bbh": { - "name": "BBH", - "value": 0.3237016212336586, - "normalized_score": 6.08394171367177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.31425, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16090425531914893, - "normalized_score": 6.767139479905436 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-08", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.6594938597017257 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const_bfloat16_8730504c742876706bdd399d57aaa36671b23149_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", - "sha": "8730504c742876706bdd399d57aaa36671b23149", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.944503538443925, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2905368865720732, - "normalized_score": 29.05368865720732 - }, - "bbh": { - "name": "BBH", - "value": 0.3254390641560331, - "normalized_score": 6.616879159578596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3129166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15741356382978725, - "normalized_score": 6.37928486997636 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-08", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.7392397993919483 - } - }, - { - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const_bfloat16_4322bfbee423ecf57c561bc3fe821d6a54d8570c_True", - "model": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", - "sha": "4322bfbee423ecf57c561bc3fe821d6a54d8570c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.730937881241061, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2904870188876625, - "normalized_score": 29.048701888766253 - }, - "bbh": { - "name": "BBH", - "value": 0.32381698216947513, - "normalized_score": 5.989063404661713 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.30894791666666666, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15915890957446807, - "normalized_score": 6.573212174940895 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-08", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.6903602932272771 - } - }, - { - "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1_bfloat16_3c369e3227ae050829233e92ee3238c36490f607_True", - "model": { - "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", - "sha": "3c369e3227ae050829233e92ee3238c36490f607", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.905104898093658, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23925406809487715, - "normalized_score": 23.925406809487715 - }, - "bbh": { - "name": "BBH", - "value": 0.3244192088381941, - "normalized_score": 7.201174679917629 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3221875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1573304521276596, - "normalized_score": 6.37005023640662 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-20", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.7898335214086819 - } - }, - { - "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3_bfloat16_7b50bab8ec188103a31d5bb3ebd3ff5c54a719b7_True", - "model": { - "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", - "sha": "7b50bab8ec188103a31d5bb3ebd3ff5c54a719b7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.980367592203365, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24747226248576, - "normalized_score": 24.747226248576 - }, - "bbh": { - "name": "BBH", - "value": 0.32090616030928304, - "normalized_score": 6.9860820873250375 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1566655585106383, - "normalized_score": 6.296173167848698 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-17", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.8270344996930198 - } - }, - { - "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1_bfloat16_e5712d2ba9fbb61af9c9cdd22d264cf16cb7bcdd_True", - "model": { - "name": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", - "sha": "e5712d2ba9fbb61af9c9cdd22d264cf16cb7bcdd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.0836211579470865, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.232135179102559, - "normalized_score": 23.213517910255902 - }, - "bbh": { - "name": "BBH", - "value": 0.32779679775418075, - "normalized_score": 6.90518932317681 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3021875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14960106382978725, - "normalized_score": 5.511229314420804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.7678363288986092 - } - }, - { - "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1_bfloat16_33c2af4b4fd13f9ff5037d0f2f8320a29f8c6cef_True", - "model": { - "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", - "sha": "33c2af4b4fd13f9ff5037d0f2f8320a29f8c6cef", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.149323958197453, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2541667220752049, - "normalized_score": 25.41667220752049 - }, - "bbh": { - "name": "BBH", - "value": 0.3253117533747236, - "normalized_score": 7.1964107499925385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.318125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16090425531914893, - "normalized_score": 6.767139479905436 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-20", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.7945318466738231 - } - }, - { - "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3_bfloat16_bbffb6820cbdf889675c983aa617a3d4a9dbafeb_True", - "model": { - "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", - "sha": "bbffb6820cbdf889675c983aa617a3d4a9dbafeb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.7152906600553655, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.273875539125077, - "normalized_score": 27.3875539125077 - }, - "bbh": { - "name": "BBH", - "value": 0.3245102552473828, - "normalized_score": 6.775212492911929 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3089166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15965757978723405, - "normalized_score": 6.628619976359339 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-02-17", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.7159497773067481 - } - }, - { - "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2_float16_53a517ceaef324efc3626be44140b4f18a010591_True", - "model": { - "name": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", - "sha": "53a517ceaef324efc3626be44140b4f18a010591", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.59465136328484, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6540368444615842, - "normalized_score": 65.40368444615842 - }, - "bbh": { - "name": "BBH", - "value": 0.498371102582105, - "normalized_score": 29.1238227637126 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.40125000000000005, - "normalized_score": 8.389583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3686003989361702, - "normalized_score": 29.844488770685572 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.026303449176492 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun_bfloat16_00c02a823b4ff1a6cfcded6085ba9630df633998_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", - "sha": "00c02a823b4ff1a6cfcded6085ba9630df633998", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.144995316075697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6717221416951467, - "normalized_score": 67.17221416951466 - }, - "bbh": { - "name": "BBH", - "value": 0.48797965672899357, - "normalized_score": 27.755228582197066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4040729166666667, - "normalized_score": 8.709114583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36336436170212766, - "normalized_score": 29.262706855791958 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.9635818182639769 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log_bfloat16_99d9e31df5b7e88b1da78b1bd335cac3215dfd6e_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", - "sha": "99d9e31df5b7e88b1da78b1bd335cac3215dfd6e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.09614909345055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6555605792630221, - "normalized_score": 65.55605792630222 - }, - "bbh": { - "name": "BBH", - "value": 0.49345840367294164, - "normalized_score": 28.613596677525617 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4000104166666667, - "normalized_score": 8.167968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3657746010638298, - "normalized_score": 29.53051122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.9570708276407989 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log_bfloat16_49a029ea2605d768e89b638ad78a59fd62d192ab_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", - "sha": "49a029ea2605d768e89b638ad78a59fd62d192ab", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.03715310569549, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6315055164740666, - "normalized_score": 63.150551647406665 - }, - "bbh": { - "name": "BBH", - "value": 0.4916414793938901, - "normalized_score": 27.666183558964235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.3935, - "normalized_score": 7.087500000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3611203457446808, - "normalized_score": 29.01337174940898 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.0449703983820167 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4_bfloat16_de8bb28ad7a9d1158f318a4461dc47ad03e6e560_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", - "sha": "de8bb28ad7a9d1158f318a4461dc47ad03e6e560", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.393777045662546, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6284580468711907, - "normalized_score": 62.84580468711906 - }, - "bbh": { - "name": "BBH", - "value": 0.4986088445592742, - "normalized_score": 29.329731874817796 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.40137500000000004, - "normalized_score": 9.071875000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3544714095744681, - "normalized_score": 28.27460106382979 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.9607422024142419 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun_bfloat16_e9692d8dbe30273839763757aa9ef07a5fcf0c59_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", - "sha": "e9692d8dbe30273839763757aa9ef07a5fcf0c59", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.385612249734205, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6677504576745258, - "normalized_score": 66.77504576745258 - }, - "bbh": { - "name": "BBH", - "value": 0.4940463886115545, - "normalized_score": 28.390676236054286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.3987083333333334, - "normalized_score": 8.005208333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3657746010638298, - "normalized_score": 29.53051122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5136981730468007 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log_bfloat16_9ff0ce408abb8dbcf7efb9b6533338f2c344a355_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", - "sha": "9ff0ce408abb8dbcf7efb9b6533338f2c344a355", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.210850058799164, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6605063453857986, - "normalized_score": 66.05063453857986 - }, - "bbh": { - "name": "BBH", - "value": 0.49160075581298046, - "normalized_score": 28.075035515119442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4000416666666667, - "normalized_score": 7.805208333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3664394946808511, - "normalized_score": 29.604388297872337 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.0039870513171805 - } - }, - { - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log_bfloat16_ec67f95c4d1813a34bbde52d0ad14824fd7111a0_True", - "model": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", - "sha": "ec67f95c4d1813a34bbde52d0ad14824fd7111a0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.056972059408523, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.649190813707629, - "normalized_score": 64.91908137076291 - }, - "bbh": { - "name": "BBH", - "value": 0.4952489348573605, - "normalized_score": 28.56256683836558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.3961354166666667, - "normalized_score": 7.383593750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37109375, - "normalized_score": 30.12152777777778 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.9731718962458619 - } - }, - { - "id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32_bfloat16_823930851c57b11fd2e25cd65b5c53f909209d0e_True", - "model": { - "name": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", - "sha": "823930851c57b11fd2e25cd65b5c53f909209d0e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.25287703936368, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6185410266980501, - "normalized_score": 61.854102669805016 - }, - "bbh": { - "name": "BBH", - "value": 0.5177452540141246, - "normalized_score": 30.724096614147953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4369375, - "normalized_score": 13.617187499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31441156914893614, - "normalized_score": 23.823507683215126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-25", - "generation": 0, - "base_model": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.7075451204073241 - } - }, - { - "id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base_bfloat16_f1e2cad4dca10f948fd2ee9588f80df0b40d7232_True", - "model": { - "name": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", - "sha": "f1e2cad4dca10f948fd2ee9588f80df0b40d7232", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.245028049214653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8096328851890761, - "normalized_score": 80.9632885189076 - }, - "bbh": { - "name": "BBH", - "value": 0.5147423127141911, - "normalized_score": 31.46581339489899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4109895833333333, - "normalized_score": 10.74036458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38804853723404253, - "normalized_score": 32.00539302600473 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", - "hub_license": "llama3.1", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.7494627877110125 - } - }, - { - "id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf_float16_85629ec9b18efee31d07630664e7a3815121badf_True", - "model": { - "name": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", - "sha": "85629ec9b18efee31d07630664e7a3815121badf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 11.258522615146035, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23565694579271884, - "normalized_score": 23.565694579271884 - }, - "bbh": { - "name": "BBH", - "value": 0.4396616219689493, - "normalized_score": 22.45340222827221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.34345833333333337, - "normalized_score": 1.965625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2160904255319149, - "normalized_score": 12.898936170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-25", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 2.78, - "co2_cost": 0.9428072189943586 - } - }, - { - "id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama_float16_bdb6c63ff1025241e8e10b1858d67dc410f0a702_True", - "model": { - "name": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", - "sha": "bdb6c63ff1025241e8e10b1858d67dc410f0a702", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.250959952944556, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22269245601670234, - "normalized_score": 22.269245601670235 - }, - "bbh": { - "name": "BBH", - "value": 0.292556113105267, - "normalized_score": 2.5522242028124382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.33555208333333336, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12142619680851063, - "normalized_score": 2.3806885342789585 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-05", - "submission_date": "2024-11-07", - "generation": 0, - "base_model": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 1.1, - "co2_cost": 0.34758719675338845 - } - }, - { - "id": "Josephgflowers/TinyLlama-Cinder-Agent-v1_float16_a9cd8b48bfe30f29bb1f819213da9a4c41eee67f_True", - "model": { - "name": "Josephgflowers/TinyLlama-Cinder-Agent-v1", - "sha": "a9cd8b48bfe30f29bb1f819213da9a4c41eee67f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.332677189374276, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26695612087040166, - "normalized_score": 26.695612087040164 - }, - "bbh": { - "name": "BBH", - "value": 0.31160367351776513, - "normalized_score": 3.804167155031377 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33945833333333336, - "normalized_score": 2.232291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11610704787234043, - "normalized_score": 1.7896719858156023 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-21", - "submission_date": "2024-06-26", - "generation": 4, - "base_model": "Josephgflowers/TinyLlama-3T-Cinder-v1.2", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 1.1, - "co2_cost": 0.47566302529029164 - } - }, - { - "id": "Josephgflowers/TinyLlama-v1.1-Cinders-World_float16_11a2c305f787a7908dd87c4e5a7d0f1e314a1f05_True", - "model": { - "name": "Josephgflowers/TinyLlama-v1.1-Cinders-World", - "sha": "11a2c305f787a7908dd87c4e5a7d0f1e314a1f05", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.683002628938165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24692260978647768, - "normalized_score": 24.692260978647766 - }, - "bbh": { - "name": "BBH", - "value": 0.29979653176003074, - "normalized_score": 3.1077144735021442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3356145833333333, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11984707446808511, - "normalized_score": 2.2052304964539005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-13", - "generation": 0, - "base_model": "Josephgflowers/TinyLlama-v1.1-Cinders-World", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.1, - "co2_cost": 0.5147665325802105 - } - }, - { - "id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1_float16_6f7c2aaf0b8723bc6a1dc23a4a1ff0ec24dc11ec_False", - "model": { - "name": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", - "sha": "6f7c2aaf0b8723bc6a1dc23a4a1ff0ec24dc11ec", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 2.0028112750677205, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.00784363267242029, - "normalized_score": 0.784363267242029 - }, - "bbh": { - "name": "BBH", - "value": 0.31463497508928434, - "normalized_score": 4.164017098724634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23406040268456377, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34990625000000003, - "normalized_score": 3.6382812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11319813829787234, - "normalized_score": 1.466459810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-23", - "submission_date": "2024-09-09", - "generation": 0, - "base_model": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.1, - "co2_cost": 0.5458875585125622 - } - }, - { - "id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1_float16_c6880b94e72dddbe591fdf30fa15fe42ea60b924_True", - "model": { - "name": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", - "sha": "c6880b94e72dddbe591fdf30fa15fe42ea60b924", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.683635033220809, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21257596510591897, - "normalized_score": 21.257596510591895 - }, - "bbh": { - "name": "BBH", - "value": 0.30843808427144626, - "normalized_score": 3.731312676862636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2348993288590604, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10862699468085106, - "normalized_score": 0.9585549645390061 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.1, - "co2_cost": 0.3267712803723987 - } - }, - { - "id": "Josephgflowers/Tinyllama-r1_float16_01af42eb435005a6103760b3f85549ad5e5c35dc_True", - "model": { - "name": "Josephgflowers/Tinyllama-r1", - "sha": "01af42eb435005a6103760b3f85549ad5e5c35dc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.217266718069099, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2119265770378152, - "normalized_score": 21.19265770378152 - }, - "bbh": { - "name": "BBH", - "value": 0.3014631984266974, - "normalized_score": 3.2046589179465896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.33148958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11344747340425532, - "normalized_score": 1.4941637115839235 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "Josephgflowers/Tinyllama-r1 (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 2, - "params_billions": 1.1, - "co2_cost": 0.21477235201797365 - } - }, - { - "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3_bfloat16_5362b0e623096cf6667a47cefd2b33e2e3dd37a9_False", - "model": { - "name": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", - "sha": "5362b0e623096cf6667a47cefd2b33e2e3dd37a9", - "precision": "bfloat16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 47.091544713804204, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.732396707403024, - "normalized_score": 73.2396707403024 - }, - "bbh": { - "name": "BBH", - "value": 0.7585971930826706, - "normalized_score": 65.46659667305961 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2862537764350453, - "normalized_score": 28.625377643504528 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41694630872483224, - "normalized_score": 22.259507829977633 - }, - "musr": { - "name": "MUSR", - "value": 0.5911041666666667, - "normalized_score": 38.688020833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5884308510638298, - "normalized_score": 54.270094562647756 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 0.0, - "co2_cost": 1.397413513183364 - } - }, - { - "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3_bfloat16_76b2538f6a0646f8c507f9c9ab070030d3c1b90c_True", - "model": { - "name": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", - "sha": "76b2538f6a0646f8c507f9c9ab070030d3c1b90c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.091544713804204, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.732396707403024, - "normalized_score": 73.2396707403024 - }, - "bbh": { - "name": "BBH", - "value": 0.7585971930826706, - "normalized_score": 65.46659667305961 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2862537764350453, - "normalized_score": 28.625377643504528 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41694630872483224, - "normalized_score": 22.259507829977633 - }, - "musr": { - "name": "MUSR", - "value": 0.5911041666666667, - "normalized_score": 38.688020833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5884308510638298, - "normalized_score": 54.270094562647756 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-13", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 14.77, - "co2_cost": 1.561938476492499 - } - }, - { - "id": "Junhoee/Qwen-Megumin_float16_bb46c15ee4bb56c5b63245ef50fd7637234d6f75_True", - "model": { - "name": "Junhoee/Qwen-Megumin", - "sha": "bb46c15ee4bb56c5b63245ef50fd7637234d6f75", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 33.992180349901545, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7141118897857683, - "normalized_score": 71.41118897857685 - }, - "bbh": { - "name": "BBH", - "value": 0.528526812457251, - "normalized_score": 33.64214368642437 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4901812688821752, - "normalized_score": 49.01812688821752 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.39803125, - "normalized_score": 8.187239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41988031914893614, - "normalized_score": 35.54225768321512 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-11-26", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 15.231, - "co2_cost": 2.8402978495698914 - } - }, - { - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415_bfloat16_1c09728455567898116d2d9cfb6cbbbbd4ee730c_False", - "model": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", - "sha": "1c09728455567898116d2d9cfb6cbbbbd4ee730c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.119232723240636, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6179913739987677, - "normalized_score": 61.79913739987677 - }, - "bbh": { - "name": "BBH", - "value": 0.6650146340680478, - "normalized_score": 51.328741195678994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.4565416666666667, - "normalized_score": 17.801041666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5242686170212766, - "normalized_score": 47.140957446808514 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 19.20205773179167 - } - }, - { - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500_bfloat16_856a23f28aeada23d1135c86a37e05524307e8ed_False", - "model": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", - "sha": "856a23f28aeada23d1135c86a37e05524307e8ed", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.953711985827894, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6105223030448099, - "normalized_score": 61.052230304481 - }, - "bbh": { - "name": "BBH", - "value": 0.6692236023098005, - "normalized_score": 51.8870262488106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21374622356495468, - "normalized_score": 21.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.45114583333333336, - "normalized_score": 16.993229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.522689494680851, - "normalized_score": 46.96549940898345 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 18.947476483090963 - } - }, - { - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final_bfloat16_391bbd94173b34975d1aa2c7356977a630253b75_False", - "model": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", - "sha": "391bbd94173b34975d1aa2c7356977a630253b75", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.09383714321667, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6164676391973297, - "normalized_score": 61.64676391973297 - }, - "bbh": { - "name": "BBH", - "value": 0.6650146340680478, - "normalized_score": 51.328741195678994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.4565416666666667, - "normalized_score": 17.801041666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5242686170212766, - "normalized_score": 47.140957446808514 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 19.312397977552983 - } - }, - { - "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step_bfloat16_b195fea0d8f350ff29243d4e88654b1baa5af79e_False", - "model": { - "name": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", - "sha": "b195fea0d8f350ff29243d4e88654b1baa5af79e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.75025867079505, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7238039512936785, - "normalized_score": 72.38039512936786 - }, - "bbh": { - "name": "BBH", - "value": 0.6903120365165111, - "normalized_score": 55.48536459580824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.32099697885196377, - "normalized_score": 32.09969788519638 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3959731543624161, - "normalized_score": 19.463087248322143 - }, - "musr": { - "name": "MUSR", - "value": 0.45917708333333335, - "normalized_score": 17.830468749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5251828457446809, - "normalized_score": 47.242538416075654 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 25.108894169483907 - } - }, - { - "id": "Khetterman/DarkAtom-12B-v3_bfloat16_7c7dacc560b64dcff96121ea99374794ccd64b7c_True", - "model": { - "name": "Khetterman/DarkAtom-12B-v3", - "sha": "7c7dacc560b64dcff96121ea99374794ccd64b7c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.88038265945612, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6173419859306639, - "normalized_score": 61.734198593066395 - }, - "bbh": { - "name": "BBH", - "value": 0.5153709655381875, - "normalized_score": 31.6595419899606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4468020833333333, - "normalized_score": 16.116927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3546376329787234, - "normalized_score": 28.29307033096927 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2024-12-09", - "generation": 1, - "base_model": "Khetterman/DarkAtom-12B-v3 (Merge)", - "hub_license": "", - "hub_hearts": 15, - "params_billions": 12.248, - "co2_cost": 2.1065675447310444 - } - }, - { - "id": "Khetterman/Kosmos-8B-v1_bfloat16_16ad5242ca89c6901fae1f41033f00ce455f4be0_False", - "model": { - "name": "Khetterman/Kosmos-8B-v1", - "sha": "16ad5242ca89c6901fae1f41033f00ce455f4be0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.31847148009074, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41291107594515886, - "normalized_score": 41.291107594515886 - }, - "bbh": { - "name": "BBH", - "value": 0.5233522858623628, - "normalized_score": 31.75895920189927 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.3918854166666667, - "normalized_score": 8.819010416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.366938164893617, - "normalized_score": 29.659796099290777 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "Khetterman/Kosmos-8B-v1 (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.2588972069970084 - } - }, - { - "id": "Kimargin/GPT-NEO-1.3B-wiki_float16_92fa51fa6589f6e8fdfcc83f085216b3dae11da5_False", - "model": { - "name": "Kimargin/GPT-NEO-1.3B-wiki", - "sha": "92fa51fa6589f6e8fdfcc83f085216b3dae11da5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoForCausalLM", - "average_score": 5.349183189126563, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19206815693471102, - "normalized_score": 19.2068156934711 - }, - "bbh": { - "name": "BBH", - "value": 0.3026339952046975, - "normalized_score": 3.423611572649296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3882604166666666, - "normalized_score": 6.932552083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10987367021276596, - "normalized_score": 1.0970744680851066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-24", - "generation": 1, - "base_model": "Kimargin/GPT-NEO-1.3B-wiki (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.316, - "co2_cost": 1.248672424714661 - } - }, - { - "id": "KingNish/Qwen2.5-0.5b-Test-ft_float16_f905bb1d37c7853fb5c7157d8d3ad0f062b65c0f_False", - "model": { - "name": "KingNish/Qwen2.5-0.5b-Test-ft", - "sha": "f905bb1d37c7853fb5c7157d8d3ad0f062b65c0f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.865415598812703, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26708134416681073, - "normalized_score": 26.708134416681073 - }, - "bbh": { - "name": "BBH", - "value": 0.3231533857529747, - "normalized_score": 6.058845092070314 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.342125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16888297872340424, - "normalized_score": 7.65366430260047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "KingNish/Qwen2.5-0.5b-Test-ft (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 0.494, - "co2_cost": 1.3373808598428163 - } - }, - { - "id": "KingNish/Reasoning-0.5b_float16_fca9019dec693bfcb8a1fbc39e301636ae2c518d_True", - "model": { - "name": "KingNish/Reasoning-0.5b", - "sha": "fca9019dec693bfcb8a1fbc39e301636ae2c518d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.163893227645488, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.217421995859874, - "normalized_score": 21.7421995859874 - }, - "bbh": { - "name": "BBH", - "value": 0.33536255853174524, - "normalized_score": 7.491210642552303 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.35133333333333333, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16414561170212766, - "normalized_score": 7.127290189125294 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-05", - "submission_date": "2025-03-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 30, - "params_billions": 0.494, - "co2_cost": 0.51124066474765 - } - }, - { - "id": "KingNish/Reasoning-Llama-3b-v0.1_float16_d164caf591c42a4cbc3b21d46493e72fbdbd9de8_True", - "model": { - "name": "KingNish/Reasoning-Llama-3b-v0.1", - "sha": "d164caf591c42a4cbc3b21d46493e72fbdbd9de8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.212379567792407, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6224628430342602, - "normalized_score": 62.246284303426016 - }, - "bbh": { - "name": "BBH", - "value": 0.43433592509582786, - "normalized_score": 19.86245115758441 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.31676041666666666, - "normalized_score": 2.3950520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3029421542553192, - "normalized_score": 22.549128250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 9, - "params_billions": 3.213, - "co2_cost": 1.3504700051621026 - } - }, - { - "id": "KingNish/qwen-1b-continued_float16_4abdfa59671b7c23c535aca87ce15baef8ed1125_False", - "model": { - "name": "KingNish/qwen-1b-continued", - "sha": "4abdfa59671b7c23c535aca87ce15baef8ed1125", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.792600271519794, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12547263483113694, - "normalized_score": 12.547263483113692 - }, - "bbh": { - "name": "BBH", - "value": 0.29909543894796364, - "normalized_score": 4.387464099020609 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.38587499999999997, - "normalized_score": 5.6677083333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1260804521276596, - "normalized_score": 2.897828014184398 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.277, - "co2_cost": 0.8910725804726569 - } - }, - { - "id": "KingNish/qwen-1b-continued-v2_float16_bfa7ea0a2675dc4acf890d6ca5e3c218315e017c_False", - "model": { - "name": "KingNish/qwen-1b-continued-v2", - "sha": "bfa7ea0a2675dc4acf890d6ca5e3c218315e017c", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.4416424249765365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1578711153073844, - "normalized_score": 15.787111530738443 - }, - "bbh": { - "name": "BBH", - "value": 0.31194932022650246, - "normalized_score": 4.989232311632899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33927083333333335, - "normalized_score": 2.675520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11926529255319149, - "normalized_score": 2.1405880614657202 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.277, - "co2_cost": 0.9114872192218902 - } - }, - { - "id": "KingNish/qwen-1b-continued-v2.1_float16_b1f1624062c683f84815bdeead038c0b2cf2c884_False", - "model": { - "name": "KingNish/qwen-1b-continued-v2.1", - "sha": "b1f1624062c683f84815bdeead038c0b2cf2c884", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.461814579103264, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11268323603594019, - "normalized_score": 11.26832360359402 - }, - "bbh": { - "name": "BBH", - "value": 0.30416583041069006, - "normalized_score": 4.197658352409889 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.41539583333333335, - "normalized_score": 10.957812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1278257978723404, - "normalized_score": 3.0917553191489344 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.277, - "co2_cost": 0.8904057809009366 - } - }, - { - "id": "KingNish/qwen-1b-continued-v2.2_float16_8afa5b9fb92d599500e9043f4219de945b890839_False", - "model": { - "name": "KingNish/qwen-1b-continued-v2.2", - "sha": "8afa5b9fb92d599500e9043f4219de945b890839", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.441641963318088, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14125963554479892, - "normalized_score": 14.12596355447989 - }, - "bbh": { - "name": "BBH", - "value": 0.30586579449667844, - "normalized_score": 4.956068589848116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.35130208333333335, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1262466755319149, - "normalized_score": 2.916297281323877 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-09", - "generation": 3, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.277, - "co2_cost": 0.9038140571785205 - } - }, - { - "id": "Kquant03/CognitiveFusion2-4x7B-BF16_bfloat16_db45b86c462bb93db7ba4f2c3fe3517582c859a1_True", - "model": { - "name": "Kquant03/CognitiveFusion2-4x7B-BF16", - "sha": "db45b86c462bb93db7ba4f2c3fe3517582c859a1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 15.629054867362484, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35665700341759865, - "normalized_score": 35.665700341759866 - }, - "bbh": { - "name": "BBH", - "value": 0.41078286111483786, - "normalized_score": 17.689002759870313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4145520833333333, - "normalized_score": 9.95234375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27925531914893614, - "normalized_score": 19.917257683215126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-06", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "Kquant03/CognitiveFusion2-4x7B-BF16", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 24.154, - "co2_cost": 3.3320706625435443 - } - }, - { - "id": "Kquant03/L3-Pneuma-8B_bfloat16_257aa8d00e82f91b7a780384aa76573c2ea614a8_False", - "model": { - "name": "Kquant03/L3-Pneuma-8B", - "sha": "257aa8d00e82f91b7a780384aa76573c2ea614a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.617570099377616, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2374056392593873, - "normalized_score": 23.74056392593873 - }, - "bbh": { - "name": "BBH", - "value": 0.49550433176754827, - "normalized_score": 28.820201716269676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41715624999999995, - "normalized_score": 10.211197916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31840093085106386, - "normalized_score": 24.266770094562652 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6076220151511036 - } - }, - { - "id": "Krystalan/DRT-o1-14B_bfloat16_b89415f3ceb805687dc75d0ac82d1425e497bcaa_False", - "model": { - "name": "Krystalan/DRT-o1-14B", - "sha": "b89415f3ceb805687dc75d0ac82d1425e497bcaa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.16608563724584, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4067662690549963, - "normalized_score": 40.67662690549963 - }, - "bbh": { - "name": "BBH", - "value": 0.637927537514229, - "normalized_score": 48.14182185507073 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4826283987915408, - "normalized_score": 48.26283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.47951041666666666, - "normalized_score": 19.83880208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5178690159574468, - "normalized_score": 46.429890661938536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "Krystalan/DRT-o1-14B (Merge)", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 22, - "params_billions": 14.77, - "co2_cost": 3.7459805026120465 - } - }, - { - "id": "Krystalan/DRT-o1-7B_bfloat16_5e1848ded3209d414113c1ded1389cd04aef99c2_False", - "model": { - "name": "Krystalan/DRT-o1-7B", - "sha": "5e1848ded3209d414113c1ded1389cd04aef99c2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.402661796813607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3928276971768242, - "normalized_score": 39.28276971768241 - }, - "bbh": { - "name": "BBH", - "value": 0.5467693339610741, - "normalized_score": 35.73893662836844 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4478851963746224, - "normalized_score": 44.78851963746224 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.50865625, - "normalized_score": 24.08203125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41514295212765956, - "normalized_score": 35.01588356973995 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Krystalan/DRT-o1-7B (Merge)", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 13, - "params_billions": 7.616, - "co2_cost": 1.3219640226212213 - } - }, - { - "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5_float16_43ea8d27d652dc15e4d27f665c5d636a5937780b_True", - "model": { - "name": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", - "sha": "43ea8d27d652dc15e4d27f665c5d636a5937780b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.006004625989025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4552509563513699, - "normalized_score": 45.52509563513699 - }, - "bbh": { - "name": "BBH", - "value": 0.3988446544778517, - "normalized_score": 16.386034485864904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4281979166666667, - "normalized_score": 13.058072916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2824135638297872, - "normalized_score": 20.268173758865245 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-07", - "submission_date": "2024-07-30", - "generation": 0, - "base_model": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.242, - "co2_cost": 0.9097203198829114 - } - }, - { - "id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1_float16_1fe849c1e7e4793c2fdd869fcfb51e0d1910674f_False", - "model": { - "name": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", - "sha": "1fe849c1e7e4793c2fdd869fcfb51e0d1910674f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.259598634719698, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4371412297149342, - "normalized_score": 43.71412297149342 - }, - "bbh": { - "name": "BBH", - "value": 0.4986771544360115, - "normalized_score": 28.008307823364888 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.40711458333333334, - "normalized_score": 9.68932291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.379155585106383, - "normalized_score": 31.017287234042552 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-11", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1 (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7059282828813764 - } - }, - { - "id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3_float16_aa176c0db7791a1c09039135791145b0704a5f46_True", - "model": { - "name": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", - "sha": "aa176c0db7791a1c09039135791145b0704a5f46", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.74874162408916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5275912356990563, - "normalized_score": 52.759123569905626 - }, - "bbh": { - "name": "BBH", - "value": 0.4557141539616392, - "normalized_score": 22.391711908230842 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37003125, - "normalized_score": 3.65390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3056848404255319, - "normalized_score": 22.853871158392433 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-14", - "submission_date": "2024-07-28", - "generation": 1, - "base_model": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.8310201685600287 - } - }, - { - "id": "Kukedlc/NeuralSynthesis-7B-v0.1_bfloat16_547a5dc8963e127a9638256bb80eb3a36da1cc5d_False", - "model": { - "name": "Kukedlc/NeuralSynthesis-7B-v0.1", - "sha": "547a5dc8963e127a9638256bb80eb3a36da1cc5d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.015676781413074, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4184563624516283, - "normalized_score": 41.84563624516284 - }, - "bbh": { - "name": "BBH", - "value": 0.5144745481048844, - "normalized_score": 31.83439540006779 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.43328125, - "normalized_score": 13.160156250000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.304936835106383, - "normalized_score": 22.770759456264773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-06", - "submission_date": "2024-06-29", - "generation": 0, - "base_model": "Kukedlc/NeuralSynthesis-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.1925980596664074 - } - }, - { - "id": "Kukedlc/NeuralSynthesis-7B-v0.3_bfloat16_090fab29146f8e55066bce2f5f5859ab2d6027f4_False", - "model": { - "name": "Kukedlc/NeuralSynthesis-7B-v0.3", - "sha": "090fab29146f8e55066bce2f5f5859ab2d6027f4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.095272707991136, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4078400865259733, - "normalized_score": 40.78400865259733 - }, - "bbh": { - "name": "BBH", - "value": 0.5138078814382175, - "normalized_score": 31.811748341244265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.4345833333333333, - "normalized_score": 13.389583333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30501994680851063, - "normalized_score": 22.779994089834513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-07", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "Kukedlc/NeuralSynthesis-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1671442421174543 - } - }, - { - "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp_bfloat16_bb3bd36fce162f472668dbd91960cd1525b45f30_False", - "model": { - "name": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", - "sha": "bb3bd36fce162f472668dbd91960cd1525b45f30", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.530512651958592, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3947259936967247, - "normalized_score": 39.47259936967247 - }, - "bbh": { - "name": "BBH", - "value": 0.5142932549151301, - "normalized_score": 31.99718681136041 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.43324999999999997, - "normalized_score": 13.056250000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3042719414893617, - "normalized_score": 22.696882387706854 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-12", - "submission_date": "2024-07-31", - "generation": 1, - "base_model": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1981285001848536 - } - }, - { - "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT_float16_71d8ed0634e20921a761a0e349dca9eb86b63f82_False", - "model": { - "name": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", - "sha": "71d8ed0634e20921a761a0e349dca9eb86b63f82", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.573639430598366, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4210295349672203, - "normalized_score": 42.10295349672203 - }, - "bbh": { - "name": "BBH", - "value": 0.5601947823443537, - "normalized_score": 36.86336502529178 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2726586102719033, - "normalized_score": 27.26586102719033 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4776770833333333, - "normalized_score": 18.442968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4363364361702128, - "normalized_score": 37.37071513002365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.2730720495918209 - } - }, - { - "id": "Kumar955/Hemanth-llm_bfloat16_871325cc04f57cd953c161a0ace49c47af8eca4c_False", - "model": { - "name": "Kumar955/Hemanth-llm", - "sha": "871325cc04f57cd953c161a0ace49c47af8eca4c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.143018437127726, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5045102550122564, - "normalized_score": 50.451025501225644 - }, - "bbh": { - "name": "BBH", - "value": 0.522494907014536, - "normalized_score": 33.044262388969805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4485625, - "normalized_score": 14.503645833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3112533244680851, - "normalized_score": 23.472591607565015 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "Kumar955/Hemanth-llm (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 2.672246600024527 - } - }, - { - "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1_bfloat16_483902db68f99affe1d7f1139755dfd115abbca5_False", - "model": { - "name": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", - "sha": "483902db68f99affe1d7f1139755dfd115abbca5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.854317212289837, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27422572108671656, - "normalized_score": 27.42257210867166 - }, - "bbh": { - "name": "BBH", - "value": 0.422793974567173, - "normalized_score": 19.083009480539996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.3841354166666667, - "normalized_score": 6.18359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29247007978723405, - "normalized_score": 21.38556442080378 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.17955660208701 - } - }, - { - "id": "LEESM/llama-2-7b-hf-lora-oki100p_float16_4bfd99888bf37e23d966f1e537fe199992c27a72_False", - "model": { - "name": "LEESM/llama-2-7b-hf-lora-oki100p", - "sha": "4bfd99888bf37e23d966f1e537fe199992c27a72", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.782858998727155, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25129434345314877, - "normalized_score": 25.129434345314877 - }, - "bbh": { - "name": "BBH", - "value": 0.34916752720369776, - "normalized_score": 10.265743141867235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3687291666666666, - "normalized_score": 3.5578125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18558843085106383, - "normalized_score": 9.509825650118202 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "LEESM/llama-2-7b-hf-lora-oki100p", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 6.738, - "co2_cost": 0.9676354321139934 - } - }, - { - "id": "LEESM/llama-2-7b-hf-lora-oki10p_float16_d6e5af01616a038ac2b5cb83f458e490e1102244_False", - "model": { - "name": "LEESM/llama-2-7b-hf-lora-oki10p", - "sha": "d6e5af01616a038ac2b5cb83f458e490e1102244", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.168375740980447, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22701432199896276, - "normalized_score": 22.701432199896274 - }, - "bbh": { - "name": "BBH", - "value": 0.3530929513059229, - "normalized_score": 9.438287176618806 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.34752083333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16788563829787234, - "normalized_score": 7.542848699763592 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "LEESM/llama-2-7b-hf-lora-oki10p", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 6.738, - "co2_cost": 1.4755304334859534 - } - }, - { - "id": "LEESM/llama-3-8b-bnb-4b-kowiki231101_bfloat16_63b8f715daab6a0c7196a20855be8e85fe7ddcb4_False", - "model": { - "name": "LEESM/llama-3-8b-bnb-4b-kowiki231101", - "sha": "63b8f715daab6a0c7196a20855be8e85fe7ddcb4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.47249832488312, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16848739123303944, - "normalized_score": 16.848739123303943 - }, - "bbh": { - "name": "BBH", - "value": 0.4130805653617178, - "normalized_score": 16.93486814930169 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3551458333333333, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24251994680851063, - "normalized_score": 15.83554964539007 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5137752144009051 - } - }, - { - "id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p_float16_d105e0365510f9e5f8550558343083cab8523524_False", - "model": { - "name": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", - "sha": "d105e0365510f9e5f8550558343083cab8523524", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.509662923847069, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21372513818889433, - "normalized_score": 21.372513818889434 - }, - "bbh": { - "name": "BBH", - "value": 0.43430121169320707, - "normalized_score": 19.797435760911394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.38692708333333337, - "normalized_score": 7.665885416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3176529255319149, - "normalized_score": 24.183658392434985 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-22", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.516715580023216 - } - }, - { - "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct_bfloat16_7f15baedd46858153d817445aff032f4d6cf4939_True", - "model": { - "name": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", - "sha": "7f15baedd46858153d817445aff032f4d6cf4939", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "ExaoneForCausalLM", - "average_score": 25.733775511042666, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7192826145737754, - "normalized_score": 71.92826145737754 - }, - "bbh": { - "name": "BBH", - "value": 0.4174432647784512, - "normalized_score": 17.97733539518049 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30438066465256797, - "normalized_score": 30.438066465256796 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.366125, - "normalized_score": 3.298958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35771276595744683, - "normalized_score": 28.63475177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-31", - "submission_date": "2024-08-18", - "generation": 0, - "base_model": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", - "hub_license": "other", - "hub_hearts": 407, - "params_billions": 7.8, - "co2_cost": 1.650255954959219 - } - }, - { - "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct_float16_e949c91dec92095908d34e6b560af77dd0c993f8_True", - "model": { - "name": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", - "sha": "e949c91dec92095908d34e6b560af77dd0c993f8", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "ExaoneForCausalLM", - "average_score": 27.143883211239544, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7950449252428002, - "normalized_score": 79.50449252428001 - }, - "bbh": { - "name": "BBH", - "value": 0.4092347113723405, - "normalized_score": 15.94743717105687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3678247734138973, - "normalized_score": 36.78247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.366125, - "normalized_score": 3.165625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32804188829787234, - "normalized_score": 25.33798758865248 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-11", - "generation": 0, - "base_model": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", - "hub_license": "other", - "hub_hearts": 143, - "params_billions": 2.405, - "co2_cost": 1.2145429205973002 - } - }, - { - "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct_float16_d6fa88cd8d2c9512b40578bdc44e64909e5a5042_True", - "model": { - "name": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", - "sha": "d6fa88cd8d2c9512b40578bdc44e64909e5a5042", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "ExaoneForCausalLM", - "average_score": 37.603165755662836, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8391833668000904, - "normalized_score": 83.91833668000905 - }, - "bbh": { - "name": "BBH", - "value": 0.5760913742720142, - "normalized_score": 39.82420331711213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5128398791540786, - "normalized_score": 51.283987915407856 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.38066666666666665, - "normalized_score": 5.150000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4636801861702128, - "normalized_score": 40.40890957446809 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2025-01-13", - "generation": 0, - "base_model": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", - "hub_license": "other", - "hub_hearts": 113, - "params_billions": 32.003, - "co2_cost": 30.995242391297282 - } - }, - { - "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct_float16_0ff6b5ec7c13b049b253a16a889aa269e6b79a94_True", - "model": { - "name": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", - "sha": "0ff6b5ec7c13b049b253a16a889aa269e6b79a94", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "ExaoneForCausalLM", - "average_score": 32.547229958748495, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8136045692096969, - "normalized_score": 81.3604569209697 - }, - "bbh": { - "name": "BBH", - "value": 0.4727592304359862, - "normalized_score": 25.65374942082902 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47507552870090636, - "normalized_score": 47.507552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3779375, - "normalized_score": 4.9421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4133144946808511, - "normalized_score": 34.812721631205676 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-11", - "generation": 0, - "base_model": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", - "hub_license": "other", - "hub_hearts": 112, - "params_billions": 7.818, - "co2_cost": 1.439943340986877 - } - }, - { - "id": "LLM360/K2_float16_49d159b6f2b64d562e745f0ff06e65b9a4c28ead_False", - "model": { - "name": "LLM360/K2", - "sha": "49d159b6f2b64d562e745f0ff06e65b9a4c28ead", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.643753289939289, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2252157608478836, - "normalized_score": 22.52157608478836 - }, - "bbh": { - "name": "BBH", - "value": 0.4971835676523677, - "normalized_score": 28.220402834201128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.39799999999999996, - "normalized_score": 8.550000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30044880319148937, - "normalized_score": 22.272089243498815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "LLM360/K2", - "hub_license": "apache-2.0", - "hub_hearts": 87, - "params_billions": 65.286, - "co2_cost": 17.676412835458454 - } - }, - { - "id": "LLM360/K2-Chat_bfloat16_5454f2d28031c9127e4227c873ca2f154e02e4c7_True", - "model": { - "name": "LLM360/K2-Chat", - "sha": "5454f2d28031c9127e4227c873ca2f154e02e4c7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.387145154830876, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5151763986223221, - "normalized_score": 51.51763986223221 - }, - "bbh": { - "name": "BBH", - "value": 0.5358099630242067, - "normalized_score": 33.79382923599304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.457, - "normalized_score": 16.824999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3371010638297872, - "normalized_score": 26.34456264775413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "LLM360/K2-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 36, - "params_billions": 65.286, - "co2_cost": 34.519656126301996 - } - }, - { - "id": "LLM4Binary/llm4decompile-1.3b-v2_bfloat16_a347dabcb1ea9f21c9339bd764c150262e993b95_False", - "model": { - "name": "LLM4Binary/llm4decompile-1.3b-v2", - "sha": "a347dabcb1ea9f21c9339bd764c150262e993b95", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.93902486034668, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22678936333373229, - "normalized_score": 22.67893633337323 - }, - "bbh": { - "name": "BBH", - "value": 0.3271808417267589, - "normalized_score": 5.915475430438469 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23573825503355705, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4071770833333333, - "normalized_score": 9.430468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12092752659574468, - "normalized_score": 2.3252807328605196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2024-11-16", - "generation": 0, - "base_model": "LLM4Binary/llm4decompile-1.3b-v2", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 1.346, - "co2_cost": 0.49516535188306937 - } - }, - { - "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B_bfloat16_dac3be334098338fb6c02636349e8ed53f18c4a4_False", - "model": { - "name": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", - "sha": "dac3be334098338fb6c02636349e8ed53f18c4a4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.07061834522751, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47938137475232384, - "normalized_score": 47.93813747523238 - }, - "bbh": { - "name": "BBH", - "value": 0.6458988582965893, - "normalized_score": 48.989609006737815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4622356495468278, - "normalized_score": 46.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3766778523489933, - "normalized_score": 16.890380313199106 - }, - "musr": { - "name": "MUSR", - "value": 0.47700000000000004, - "normalized_score": 19.624999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.538813164893617, - "normalized_score": 48.757018321512994 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-28", - "generation": 1, - "base_model": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 4.529301034479197 - } - }, - { - "id": "Langboat/Mengzi3-8B-Chat_bfloat16_128fffd3dac7c6067ca4d1a650e836e3ef46c013_True", - "model": { - "name": "Langboat/Mengzi3-8B-Chat", - "sha": "128fffd3dac7c6067ca4d1a650e836e3ef46c013", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.28829250015946, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.513977357854936, - "normalized_score": 51.3977357854936 - }, - "bbh": { - "name": "BBH", - "value": 0.4683725003203179, - "normalized_score": 25.188298449475578 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.4077916666666667, - "normalized_score": 9.040625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31416223404255317, - "normalized_score": 23.795803782505907 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-10-21", - "generation": 0, - "base_model": "Langboat/Mengzi3-8B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7038712588964957 - } - }, - { - "id": "Lawnakk/BBA100_bfloat16_1f67fe78975a4f053e61106fa448055976151144_False", - "model": { - "name": "Lawnakk/BBA100", - "sha": "1f67fe78975a4f053e61106fa448055976151144", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.585008068956385, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2075803312987318, - "normalized_score": 20.75803312987318 - }, - "bbh": { - "name": "BBH", - "value": 0.2825701502983552, - "normalized_score": 2.1684042140448265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.40196875, - "normalized_score": 8.24609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11220079787234043, - "normalized_score": 1.3556442080378246 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Lawnakk/BBA100 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.7259936221060549 - } - }, - { - "id": "Lawnakk/BBALAW1_bfloat16_f8c558ee7cf7033e738c71ae73cc8764bc9ec944_False", - "model": { - "name": "Lawnakk/BBALAW1", - "sha": "f8c558ee7cf7033e738c71ae73cc8764bc9ec944", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.709610503141909, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19054442213327305, - "normalized_score": 19.054442213327306 - }, - "bbh": { - "name": "BBH", - "value": 0.28723681696502185, - "normalized_score": 2.532750619273585 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4152708333333333, - "normalized_score": 10.3421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11211768617021277, - "normalized_score": 1.3464095744680846 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Lawnakk/BBALAW1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.670119798592899 - } - }, - { - "id": "Lawnakk/BBALAW1.0_bfloat16_e10c170571afb38d21b16ff4790eb10218f0fc07_False", - "model": { - "name": "Lawnakk/BBALAW1.0", - "sha": "e10c170571afb38d21b16ff4790eb10218f0fc07", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.2556700001423438, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13511482865463637, - "normalized_score": 13.511482865463638 - }, - "bbh": { - "name": "BBH", - "value": 0.28276697965906106, - "normalized_score": 1.2476381461675576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3525729166666667, - "normalized_score": 2.5716145833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.0 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.353, - "co2_cost": 0.4032986216068706 - } - }, - { - "id": "Lawnakk/BBALAW1.2_bfloat16_19994613ecdfb585d645fb7f12c481ffda6c2968_False", - "model": { - "name": "Lawnakk/BBALAW1.2", - "sha": "19994613ecdfb585d645fb7f12c481ffda6c2968", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.4174798823756753, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13543952268868825, - "normalized_score": 13.543952268868823 - }, - "bbh": { - "name": "BBH", - "value": 0.28112730419661675, - "normalized_score": 1.316794909883146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.35790625000000004, - "normalized_score": 2.5716145833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11053856382978723, - "normalized_score": 1.1709515366430252 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.353, - "co2_cost": 0.38648182061739816 - } - }, - { - "id": "Lawnakk/BBALAW1.3_bfloat16_b9f66617f14946b773f4a55947bfc7dc2c1c184f_False", - "model": { - "name": "Lawnakk/BBALAW1.3", - "sha": "b9f66617f14946b773f4a55947bfc7dc2c1c184f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.3502359012820597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13543952268868825, - "normalized_score": 13.543952268868823 - }, - "bbh": { - "name": "BBH", - "value": 0.28269808045232453, - "normalized_score": 1.2233765196960207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.36190625000000004, - "normalized_score": 2.8382812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.109375, - "normalized_score": 1.041666666666666 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.353, - "co2_cost": 0.373648987936219 - } - }, - { - "id": "Lawnakk/BBALAW1.6_bfloat16_d0a9e2e9caffb1f7b8d2c7f7bd3236cb9cc0afd9_False", - "model": { - "name": "Lawnakk/BBALAW1.6", - "sha": "d0a9e2e9caffb1f7b8d2c7f7bd3236cb9cc0afd9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.048651979731375, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5245437660961804, - "normalized_score": 52.45437660961804 - }, - "bbh": { - "name": "BBH", - "value": 0.555356284691385, - "normalized_score": 36.42650233476133 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.43684375, - "normalized_score": 12.572135416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45071476063829785, - "normalized_score": 38.968306737588655 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.6 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6417412500395793 - } - }, - { - "id": "Lawnakk/BBALAW1.61_bfloat16_5d868edc32bba0c4079156f2ecc5c0a511a3795d_False", - "model": { - "name": "Lawnakk/BBALAW1.61", - "sha": "5d868edc32bba0c4079156f2ecc5c0a511a3795d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.793920081305952, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5771253607095839, - "normalized_score": 57.71253607095839 - }, - "bbh": { - "name": "BBH", - "value": 0.5548582474785428, - "normalized_score": 36.40356700548264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36631419939577037, - "normalized_score": 36.631419939577036 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4355104166666666, - "normalized_score": 12.50546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4470578457446808, - "normalized_score": 38.56198286052009 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.61 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6594259935922203 - } - }, - { - "id": "Lawnakk/BBALAW1.62_bfloat16_9822fa5f241e48ed681d6ad87abac296c73fc28e_False", - "model": { - "name": "Lawnakk/BBALAW1.62", - "sha": "9822fa5f241e48ed681d6ad87abac296c73fc28e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.539930945484546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5046099903810778, - "normalized_score": 50.46099903810777 - }, - "bbh": { - "name": "BBH", - "value": 0.5580519941056026, - "normalized_score": 37.104537583170234 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2824773413897281, - "normalized_score": 28.247734138972806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.4343333333333333, - "normalized_score": 12.758333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45445478723404253, - "normalized_score": 39.383865248226954 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.62 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6135535451642553 - } - }, - { - "id": "Lawnakk/BBALAW1.63_bfloat16_5e60f6aa903178906c57cacd0149db10bbda945f_False", - "model": { - "name": "Lawnakk/BBALAW1.63", - "sha": "5e60f6aa903178906c57cacd0149db10bbda945f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.37358914303694, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44073835201709244, - "normalized_score": 44.073835201709244 - }, - "bbh": { - "name": "BBH", - "value": 0.5540633758841665, - "normalized_score": 36.36091509685001 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37009063444108764, - "normalized_score": 37.00906344410876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4303333333333333, - "normalized_score": 11.958333333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4470578457446808, - "normalized_score": 38.56198286052009 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.63 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6989091923818987 - } - }, - { - "id": "Lawnakk/BBALAW1.64_bfloat16_7bd7f808a66dee4360f7c6a02ece7dc72127c1fc_False", - "model": { - "name": "Lawnakk/BBALAW1.64", - "sha": "7bd7f808a66dee4360f7c6a02ece7dc72127c1fc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.1811180912654695, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13946107439371977, - "normalized_score": 13.946107439371977 - }, - "bbh": { - "name": "BBH", - "value": 0.27790701865141654, - "normalized_score": 1.7755006354076055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3446666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11153590425531915, - "normalized_score": 1.2817671394799046 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Lawnakk/BBALAW1.64 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.654014424890504 - } - }, - { - "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0_bfloat16_e5691dcc682a10dc9ef4bdbb3dc896fcf271018e_True", - "model": { - "name": "LenguajeNaturalAI/leniachat-gemma-2b-v0", - "sha": "e5691dcc682a10dc9ef4bdbb3dc896fcf271018e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 5.737240998088876, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21497404664069114, - "normalized_score": 21.497404664069116 - }, - "bbh": { - "name": "BBH", - "value": 0.30740211895412034, - "normalized_score": 4.1382969637280675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.36590625000000004, - "normalized_score": 3.6382812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11702127659574468, - "normalized_score": 1.891252955082742 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-09", - "submission_date": "2024-09-01", - "generation": 1, - "base_model": "google/gemma-2b", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 2.506, - "co2_cost": 1.933155344561648 - } - }, - { - "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0_bfloat16_031a2efebb3cc1150e46f42ba0bea9fa7b855436_True", - "model": { - "name": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", - "sha": "031a2efebb3cc1150e46f42ba0bea9fa7b855436", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.580803348718375, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22211842356059697, - "normalized_score": 22.211842356059698 - }, - "bbh": { - "name": "BBH", - "value": 0.36835590195612017, - "normalized_score": 12.771666354808303 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3749895833333334, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18799867021276595, - "normalized_score": 9.77763002364066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-16", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "Qwen/Qwen2-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 1.543, - "co2_cost": 1.6891235326099068 - } - }, - { - "id": "LeroyDyer/CheckPoint_A_bfloat16_508f1ec7ee33134a8c1de7774c08c1ce091466ed_True", - "model": { - "name": "LeroyDyer/CheckPoint_A", - "sha": "508f1ec7ee33134a8c1de7774c08c1ce091466ed", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.929189414483588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45127927233074905, - "normalized_score": 45.127927233074914 - }, - "bbh": { - "name": "BBH", - "value": 0.4747699745968042, - "normalized_score": 25.80938677777848 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4230833333333333, - "normalized_score": 11.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28798204787234044, - "normalized_score": 20.886894208037827 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.919345737699857 - } - }, - { - "id": "LeroyDyer/CheckPoint_B_bfloat16_54c3b64f0d6293a787843f26f508335f845ded9b_True", - "model": { - "name": "LeroyDyer/CheckPoint_B", - "sha": "54c3b64f0d6293a787843f26f508335f845ded9b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.539373022938637, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4439852923576111, - "normalized_score": 44.39852923576112 - }, - "bbh": { - "name": "BBH", - "value": 0.47799475378324896, - "normalized_score": 26.27124893315022 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.38984375, - "normalized_score": 6.830468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29072473404255317, - "normalized_score": 21.19163711583924 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9203635380516662 - } - }, - { - "id": "LeroyDyer/CheckPoint_C_bfloat16_1c6aabe7f7c497396393253fb4e21dc19285d7bd_True", - "model": { - "name": "LeroyDyer/CheckPoint_C", - "sha": "1c6aabe7f7c497396393253fb4e21dc19285d7bd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.11316049965239, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34768968558979063, - "normalized_score": 34.76896855897907 - }, - "bbh": { - "name": "BBH", - "value": 0.45864215446207585, - "normalized_score": 24.18304052622304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.4346145833333333, - "normalized_score": 12.960156250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30211103723404253, - "normalized_score": 22.456781914893615 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8722510506606865 - } - }, - { - "id": "LeroyDyer/CheckPoint_R1_bfloat16_36edb2be57e2f42ba9bd41314acf54f0f237b1c0_True", - "model": { - "name": "LeroyDyer/CheckPoint_R1", - "sha": "36edb2be57e2f42ba9bd41314acf54f0f237b1c0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.767224308402712, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17278376928771216, - "normalized_score": 17.278376928771216 - }, - "bbh": { - "name": "BBH", - "value": 0.4225419506658359, - "normalized_score": 17.927717512520747 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.4031458333333333, - "normalized_score": 8.459895833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22049534574468085, - "normalized_score": 13.388371749408984 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.5266344322747649 - } - }, - { - "id": "LeroyDyer/LCARS_AI_001_float16_3452e84fbfd92c62085fdce3834eff5c9cd87d4f_False", - "model": { - "name": "LeroyDyer/LCARS_AI_001", - "sha": "3452e84fbfd92c62085fdce3834eff5c9cd87d4f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.429205739419857, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31094495937445976, - "normalized_score": 31.094495937445977 - }, - "bbh": { - "name": "BBH", - "value": 0.42578875825590146, - "normalized_score": 19.460966800253622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.43836458333333334, - "normalized_score": 13.328906250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2670378989361702, - "normalized_score": 18.559766548463354 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.163983647345908 - } - }, - { - "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI_float16_917c84d241bfff8b8648d9d865ae4b5bead68c6b_False", - "model": { - "name": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", - "sha": "917c84d241bfff8b8648d9d865ae4b5bead68c6b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.518229049313643, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41111251479407973, - "normalized_score": 41.11125147940797 - }, - "bbh": { - "name": "BBH", - "value": 0.49198503573704794, - "normalized_score": 28.423430655930204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4506145833333333, - "normalized_score": 15.560156250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29720744680851063, - "normalized_score": 21.911938534278956 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-08-07", - "generation": 1, - "base_model": "LeroyDyer/LCARS_AI_1x4_003_SuperAI (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 24.154, - "co2_cost": 3.3145180203347833 - } - }, - { - "id": "LeroyDyer/LCARS_AI_StarTrek_Computer_float16_9d4af4ab13df574ad0d40ed71de7d43c17f59a94_False", - "model": { - "name": "LeroyDyer/LCARS_AI_StarTrek_Computer", - "sha": "9d4af4ab13df574ad0d40ed71de7d43c17f59a94", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.613893572975764, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35825609383103496, - "normalized_score": 35.8256093831035 - }, - "bbh": { - "name": "BBH", - "value": 0.4446191188748297, - "normalized_score": 21.781003095704886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3950208333333333, - "normalized_score": 7.444270833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24584441489361702, - "normalized_score": 16.204934988179666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-11", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "LeroyDyer/LCARS_AI_StarTrek_Computer", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 7.242, - "co2_cost": 1.3238612018973137 - } - }, - { - "id": "LeroyDyer/LCARS_TOP_SCORE_float16_ada3e3ac6ae162503da5158e72851053f4c7dac8_False", - "model": { - "name": "LeroyDyer/LCARS_TOP_SCORE", - "sha": "ada3e3ac6ae162503da5158e72851053f4c7dac8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.322005404213282, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43706587410293574, - "normalized_score": 43.70658741029358 - }, - "bbh": { - "name": "BBH", - "value": 0.5127371051825098, - "normalized_score": 31.69912679947687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.42928125, - "normalized_score": 12.426822916666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3031083776595745, - "normalized_score": 22.567597517730498 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-30", - "submission_date": "2024-08-08", - "generation": 1, - "base_model": "LeroyDyer/LCARS_TOP_SCORE (Merge)", - "hub_license": "openrail", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.2266362452075965 - } - }, - { - "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b_float16_fd997ccdee03788e7e79944d26d9c641dc4fcd4c_True", - "model": { - "name": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", - "sha": "fd997ccdee03788e7e79944d26d9c641dc4fcd4c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 4.358661915319342, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1533996462718919, - "normalized_score": 15.339964627189191 - }, - "bbh": { - "name": "BBH", - "value": 0.3055092453201354, - "normalized_score": 3.211683047233007 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34203125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12076130319148937, - "normalized_score": 2.30681146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-10", - "submission_date": "2024-07-12", - "generation": 0, - "base_model": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 7.242, - "co2_cost": 1.3969911569441005 - } - }, - { - "id": "LeroyDyer/SpydazWebAI_Human_AGI_float16_0bc02d34a0b49c3473505d8df757de211af37131_False", - "model": { - "name": "LeroyDyer/SpydazWebAI_Human_AGI", - "sha": "0bc02d34a0b49c3473505d8df757de211af37131", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 9.970349395057786, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3388221031308041, - "normalized_score": 33.88221031308041 - }, - "bbh": { - "name": "BBH", - "value": 0.3374862127508733, - "normalized_score": 7.445695539873622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.39663541666666663, - "normalized_score": 7.379427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1478557180851064, - "normalized_score": 5.317302009456265 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.335430798055127 - } - }, - { - "id": "LeroyDyer/SpydazWebAI_Human_AGI_001_bfloat16_4ed76e404deb425d5c934cdbbb4b99b4c1017433_False", - "model": { - "name": "LeroyDyer/SpydazWebAI_Human_AGI_001", - "sha": "4ed76e404deb425d5c934cdbbb4b99b4c1017433", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.209095309808564, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31181930610779396, - "normalized_score": 31.1819306107794 - }, - "bbh": { - "name": "BBH", - "value": 0.3433421938604874, - "normalized_score": 8.661200013836998 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.39939583333333334, - "normalized_score": 8.224479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14261968085106383, - "normalized_score": 4.735520094562647 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8848131796026907 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b_float16_50c69e539578ab5384eb018a60cc1268637becae_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", - "sha": "50c69e539578ab5384eb018a60cc1268637becae", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.566675630986595, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15557276914143361, - "normalized_score": 15.557276914143362 - }, - "bbh": { - "name": "BBH", - "value": 0.48107736108561827, - "normalized_score": 27.74553183153259 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.41362499999999996, - "normalized_score": 10.303125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2865691489361702, - "normalized_score": 20.72990543735224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-14", - "submission_date": "2024-07-12", - "generation": 1, - "base_model": "LeroyDyer/Mixtral_AI_CyberTron_Ultra", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.242, - "co2_cost": 1.3130920737826801 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2_float16_5b109c398f2b55e972f01149a12d84a8df7a3fe2_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", - "sha": "5b109c398f2b55e972f01149a12d84a8df7a3fe2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.993604787957782, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39395138233221183, - "normalized_score": 39.39513823322119 - }, - "bbh": { - "name": "BBH", - "value": 0.4888172059118469, - "normalized_score": 27.921505094387758 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4503020833333333, - "normalized_score": 15.254427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.300531914893617, - "normalized_score": 22.281323877068555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8947806161189631 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002_float16_32bcafbec000377b16a59c8c4e2fd989e4236f2e_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", - "sha": "32bcafbec000377b16a59c8c4e2fd989e4236f2e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.01474000615888, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40876430094371824, - "normalized_score": 40.87643009437183 - }, - "bbh": { - "name": "BBH", - "value": 0.5043871825389313, - "normalized_score": 29.62691236357759 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.48648958333333336, - "normalized_score": 21.144531249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3058510638297872, - "normalized_score": 22.872340425531913 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "LeroyDyer/SpydazWeb_AI_HumanAGI_002 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 0.8780139193723864 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001_bfloat16_7d664b94eb7c50bd0314ee74b7ac564c55efa878_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_001", - "sha": "7d664b94eb7c50bd0314ee74b7ac564c55efa878", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 7.741907804524428, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22516589316347294, - "normalized_score": 22.516589316347293 - }, - "bbh": { - "name": "BBH", - "value": 0.33440360243051986, - "normalized_score": 8.06526235359233 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.38603125, - "normalized_score": 6.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1270777925531915, - "normalized_score": 3.008643617021276 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2883549367569518 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006_bfloat16_c3ef6d31d58344f6d67825769a304b9ac5e702ca_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_006", - "sha": "c3ef6d31d58344f6d67825769a304b9ac5e702ca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 4.896570612993905, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14300832901146734, - "normalized_score": 14.300832901146736 - }, - "bbh": { - "name": "BBH", - "value": 0.3301800420981355, - "normalized_score": 6.725319981390316 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3567916666666667, - "normalized_score": 1.7656250000000016 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11353058510638298, - "normalized_score": 1.5033983451536632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8978213161790637 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007_bfloat16_38d8c760a50e09cc877497275701de207ed54953_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_007", - "sha": "38d8c760a50e09cc877497275701de207ed54953", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.423616885073201, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3351751131442351, - "normalized_score": 33.51751131442351 - }, - "bbh": { - "name": "BBH", - "value": 0.3415665794743605, - "normalized_score": 8.46281905839016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.40962499999999996, - "normalized_score": 9.236458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13522273936170212, - "normalized_score": 3.9136377068557904 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8974823394844018 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT_bfloat16_2d337fc6b89936da20a6d2062069b97a53720748_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", - "sha": "2d337fc6b89936da20a6d2062069b97a53720748", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 9.615900799535378, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2973310815303395, - "normalized_score": 29.73310815303395 - }, - "bbh": { - "name": "BBH", - "value": 0.3306728717792965, - "normalized_score": 7.033815600268059 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.41375, - "normalized_score": 10.31875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1432845744680851, - "normalized_score": 4.809397163120566 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8955279064095867 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT_bfloat16_d127a9f81ff8a13ff5bcb90db65aa6dd3b321e89_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", - "sha": "d127a9f81ff8a13ff5bcb90db65aa6dd3b321e89", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.376848844277914, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2506948230694557, - "normalized_score": 25.06948230694557 - }, - "bbh": { - "name": "BBH", - "value": 0.33363164762455844, - "normalized_score": 7.351959868629893 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.41371874999999997, - "normalized_score": 10.01484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14303523936170212, - "normalized_score": 4.781693262411347 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.873459797883098 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_bfloat16_4e84d3da2fdd58b49e1b2abbc2653b854234fe89_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", - "sha": "4e84d3da2fdd58b49e1b2abbc2653b854234fe89", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 9.775896143015682, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3148667757106699, - "normalized_score": 31.48667757106699 - }, - "bbh": { - "name": "BBH", - "value": 0.3522609512356862, - "normalized_score": 9.415285279602362 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3831458333333333, - "normalized_score": 5.793229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15949135638297873, - "normalized_score": 6.610150709219859 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9255094767140524 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_float16_64703ab13b49858f8eadc4b07085c2c891cc2f92_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", - "sha": "64703ab13b49858f8eadc4b07085c2c891cc2f92", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.939790526210038, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37524213531208306, - "normalized_score": 37.524213531208304 - }, - "bbh": { - "name": "BBH", - "value": 0.39840187861283577, - "normalized_score": 15.599125312205127 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.42391666666666666, - "normalized_score": 10.922916666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2018783244680851, - "normalized_score": 11.31981382978723 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.904510040863411 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1_float16_eb4e635b7cf2c93ab793f9e27949a86d1e725a87_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", - "sha": "eb4e635b7cf2c93ab793f9e27949a86d1e725a87", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.981736164423843, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4049677079039171, - "normalized_score": 40.496770790391714 - }, - "bbh": { - "name": "BBH", - "value": 0.48583341042911066, - "normalized_score": 27.188612130290466 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3921354166666667, - "normalized_score": 7.250260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2956283244680851, - "normalized_score": 21.736480496453904 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8817864153323666 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA_float16_d468d4143cfe92a28b6f9a9eea4cdc3583158b03_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "sha": "d468d4143cfe92a28b6f9a9eea4cdc3583158b03", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.1139675375779, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30355124403250044, - "normalized_score": 30.35512440325004 - }, - "bbh": { - "name": "BBH", - "value": 0.4575107149412439, - "normalized_score": 23.116920348203426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.42534374999999996, - "normalized_score": 11.16796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23287898936170212, - "normalized_score": 14.764332151300236 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.47040430523339716 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA_bfloat16_d468d4143cfe92a28b6f9a9eea4cdc3583158b03_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "sha": "d468d4143cfe92a28b6f9a9eea4cdc3583158b03", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.141877964341816, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30664858131978706, - "normalized_score": 30.664858131978704 - }, - "bbh": { - "name": "BBH", - "value": 0.45768864760562744, - "normalized_score": 23.177267229663993 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 11.309114583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23179853723404256, - "normalized_score": 14.644281914893616 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3673417812306632 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX_bfloat16_ca98afcfc2bf1ef7c82413be1c607fe2245d30b6_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", - "sha": "ca98afcfc2bf1ef7c82413be1c607fe2245d30b6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.04579774103192, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3065987136353764, - "normalized_score": 30.65987136353764 - }, - "bbh": { - "name": "BBH", - "value": 0.3158421938604874, - "normalized_score": 5.885758837366411 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.34438541666666667, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11070478723404255, - "normalized_score": 1.1894208037825047 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.890738782445888 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA_bfloat16_8493f4f3199a845f0d44041d617af751f10a91df_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "sha": "8493f4f3199a845f0d44041d617af751f10a91df", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.915086092179353, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35788153211257245, - "normalized_score": 35.78815321125724 - }, - "bbh": { - "name": "BBH", - "value": 0.4476544560399054, - "normalized_score": 22.464434124386614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.41340625, - "normalized_score": 9.775781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23761635638297873, - "normalized_score": 15.290706264775414 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4610457299103071 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA_float16_8493f4f3199a845f0d44041d617af751f10a91df_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "sha": "8493f4f3199a845f0d44041d617af751f10a91df", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.451680981130227, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37976347203198624, - "normalized_score": 37.97634720319862 - }, - "bbh": { - "name": "BBH", - "value": 0.44827466097749213, - "normalized_score": 22.728636272258512 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4148020833333333, - "normalized_score": 10.183593750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2388630319148936, - "normalized_score": 15.429225768321512 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9537306082222248 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP_bfloat16_0569cca30df948b9f2e5145ce5c2b5a03ec025ae_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", - "sha": "0569cca30df948b9f2e5145ce5c2b5a03ec025ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 7.78223636896281, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2541168543907942, - "normalized_score": 25.41168543907942 - }, - "bbh": { - "name": "BBH", - "value": 0.33230179059744286, - "normalized_score": 7.1764945791517105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3882604166666666, - "normalized_score": 5.865885416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1323969414893617, - "normalized_score": 3.599660165484633 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "LeroyDyer/SpydazWeb_AI_HumanAI_RP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8874170892010981 - } - }, - { - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision_bfloat16_ba0dcf52fec492cc5d91b3297c08c5581d893607_False", - "model": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", - "sha": "ba0dcf52fec492cc5d91b3297c08c5581d893607", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 9.496584895881528, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3062740196013245, - "normalized_score": 30.62740196013245 - }, - "bbh": { - "name": "BBH", - "value": 0.33536617928965984, - "normalized_score": 7.525593201173674 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.39384375, - "normalized_score": 7.497135416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13871343085106383, - "normalized_score": 4.301492316784869 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.911430325456633 - } - }, - { - "id": "LeroyDyer/SpydazWeb_HumanAI_M1_float16_c9bb5fdc262f9c68d02b798eb867495199bf3dbf_False", - "model": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M1", - "sha": "c9bb5fdc262f9c68d02b798eb867495199bf3dbf", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.391053333653405, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3582062261466243, - "normalized_score": 35.820622614662426 - }, - "bbh": { - "name": "BBH", - "value": 0.35632705798398107, - "normalized_score": 10.02754339015283 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36711458333333336, - "normalized_score": 4.289322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1663065159574468, - "normalized_score": 7.367390661938533 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3217805289052 - } - }, - { - "id": "LeroyDyer/SpydazWeb_HumanAI_M2_float16_82fd99df73eeaf8ce12add6e74fda7901c75f86c_False", - "model": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M2", - "sha": "82fd99df73eeaf8ce12add6e74fda7901c75f86c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.484114662749576, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3750171766468526, - "normalized_score": 37.50171766468526 - }, - "bbh": { - "name": "BBH", - "value": 0.39308772552915555, - "normalized_score": 15.397194202296944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3751458333333333, - "normalized_score": 3.9932291666666657 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2010472074468085, - "normalized_score": 11.227467494089833 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.262119278883262 - } - }, - { - "id": "LeroyDyer/SpydazWeb_HumanAI_M3_float16_01dbeb9536ad2cba5a3c4fbeef77e6b3f692adc5_False", - "model": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M3", - "sha": "01dbeb9536ad2cba5a3c4fbeef77e6b3f692adc5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.505800569799587, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1578711153073844, - "normalized_score": 15.787111530738443 - }, - "bbh": { - "name": "BBH", - "value": 0.31272572546166244, - "normalized_score": 4.765388996591294 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3914270833333333, - "normalized_score": 7.128385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11486037234042554, - "normalized_score": 1.6511524822695034 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3750988908993114 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_12_float16_675cf7fbfa36974b2eb5aef53afdf56a65ecfcfd_False", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_12", - "sha": "675cf7fbfa36974b2eb5aef53afdf56a65ecfcfd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 6.615158396750675, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2764985793250797, - "normalized_score": 27.64985793250797 - }, - "bbh": { - "name": "BBH", - "value": 0.31633960292107943, - "normalized_score": 4.495993524198578 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.35815624999999995, - "normalized_score": 2.2028645833333327 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11369680851063829, - "normalized_score": 1.521867612293143 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.297050807163551 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_14_float16_53e73726a0a780db48303f4befbf7574e5c04984_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_14", - "sha": "53e73726a0a780db48303f4befbf7574e5c04984", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 4.378364522899276, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1811770546594148, - "normalized_score": 18.11770546594148 - }, - "bbh": { - "name": "BBH", - "value": 0.2988848127354542, - "normalized_score": 2.162400468558809 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3395208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11394614361702128, - "normalized_score": 1.549571513002364 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2427017129472753 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001_bfloat16_53da602f67f7cd5cc0a9d17689ec4456779a4b4b_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", - "sha": "53da602f67f7cd5cc0a9d17689ec4456779a4b4b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.185695306409894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4505046609662362, - "normalized_score": 45.05046609662362 - }, - "bbh": { - "name": "BBH", - "value": 0.4609124425176902, - "normalized_score": 24.633582827112217 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.42559375, - "normalized_score": 11.465885416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2734375, - "normalized_score": 19.270833333333332 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-27", - "generation": 2, - "base_model": "LeroyDyer/_Spydaz_Web_AI_Mistral_R1_Base (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.4444431573280053 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002_float16_5514d6c3b7fe81381daf8a5ae1b27730d4e5520e_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", - "sha": "5514d6c3b7fe81381daf8a5ae1b27730d4e5520e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.869138876185648, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5306885729863429, - "normalized_score": 53.06885729863429 - }, - "bbh": { - "name": "BBH", - "value": 0.4682582050072746, - "normalized_score": 25.041939148110867 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.42546875, - "normalized_score": 11.78359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28939494680851063, - "normalized_score": 21.043882978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-28", - "generation": 2, - "base_model": "LeroyDyer/_Spydaz_Web_AI_Mistral_R1_Base (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.45289301578969215 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR_bfloat16_8dacb71de4e5e4a57ea85178d9312b785e50432f_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", - "sha": "8dacb71de4e5e4a57ea85178d9312b785e50432f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.9872217391289, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.478606763387811, - "normalized_score": 47.860676338781104 - }, - "bbh": { - "name": "BBH", - "value": 0.4671769411194033, - "normalized_score": 25.691235509522457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.48689583333333336, - "normalized_score": 21.428645833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2828291223404255, - "normalized_score": 20.314346926713945 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4626438746160488 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder_bfloat16_7aecfd3b21a77c5c2f45903541cf538014b61a70_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", - "sha": "7aecfd3b21a77c5c2f45903541cf538014b61a70", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.155771203993552, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.414259719765777, - "normalized_score": 41.4259719765777 - }, - "bbh": { - "name": "BBH", - "value": 0.4689417813020516, - "normalized_score": 26.08792985697224 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.47197916666666667, - "normalized_score": 18.73072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27194148936170215, - "normalized_score": 19.104609929078016 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4411424337479603 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001_bfloat16_ac794a3f7bb607a64aed52bc55ea5f5ad4445810_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", - "sha": "ac794a3f7bb607a64aed52bc55ea5f5ad4445810", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.354655381937874, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4571492528712705, - "normalized_score": 45.714925287127045 - }, - "bbh": { - "name": "BBH", - "value": 0.48178882135920675, - "normalized_score": 27.307995975060845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.47784375000000007, - "normalized_score": 19.897135416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2681183510638298, - "normalized_score": 18.679816784869978 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4552535355664265 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003_bfloat16_f4d7aaa3b96d720ebe911ced845dacf5773ce5b0_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", - "sha": "f4d7aaa3b96d720ebe911ced845dacf5773ce5b0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.118137399107567, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6200148938150774, - "normalized_score": 62.00148938150774 - }, - "bbh": { - "name": "BBH", - "value": 0.4755509035158693, - "normalized_score": 26.687805677059902 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.42019791666666667, - "normalized_score": 10.724739583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29986702127659576, - "normalized_score": 22.20744680851064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.45535791158285255 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent_float16_4311f338718148b54025a7e19686a30d8890a796_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", - "sha": "4311f338718148b54025a7e19686a30d8890a796", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.34166379927646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5950854842927876, - "normalized_score": 59.50854842927876 - }, - "bbh": { - "name": "BBH", - "value": 0.4927473238025393, - "normalized_score": 28.149314045731852 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.5198229166666667, - "normalized_score": 25.144531249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2999501329787234, - "normalized_score": 22.21668144208038 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 2, - "base_model": "LeroyDyer/_Spydaz_Web_AI_Mistral_R1_Base (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4530458393635682 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student_bfloat16_eaf21297619c127dca07676381d3ccc5378bc762_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", - "sha": "eaf21297619c127dca07676381d3ccc5378bc762", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.588092892468367, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5735781060918363, - "normalized_score": 57.35781060918363 - }, - "bbh": { - "name": "BBH", - "value": 0.48808115770970123, - "normalized_score": 27.800315912370298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.50975, - "normalized_score": 24.452083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.292719414893617, - "normalized_score": 21.413268321513 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4670612939138445 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher_bfloat16_4e127a036fb9ebd7c77198512537a2744c4b47f5_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", - "sha": "4e127a036fb9ebd7c77198512537a2744c4b47f5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.69427042218614, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5772250960784053, - "normalized_score": 57.72250960784053 - }, - "bbh": { - "name": "BBH", - "value": 0.4805094960871836, - "normalized_score": 26.945441313118206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.5222395833333334, - "normalized_score": 25.513281250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2956283244680851, - "normalized_score": 21.736480496453904 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4429826487447223 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001_bfloat16_6973e799cc92930cd5fd645d960a955cf4cf9c56_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", - "sha": "6973e799cc92930cd5fd645d960a955cf4cf9c56", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.49357945577393, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5817963004827191, - "normalized_score": 58.17963004827192 - }, - "bbh": { - "name": "BBH", - "value": 0.4907982146977475, - "normalized_score": 28.227992314307016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4486041666666667, - "normalized_score": 15.608854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29055851063829785, - "normalized_score": 21.17316784869976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-02", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.49080394435712554 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002_bfloat16_2ea7b7587ca45326e4015ceda7a1fed7c8604d3d_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", - "sha": "2ea7b7587ca45326e4015ceda7a1fed7c8604d3d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.794293676873966, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.546150879665953, - "normalized_score": 54.6150879665953 - }, - "bbh": { - "name": "BBH", - "value": 0.4655028607746287, - "normalized_score": 25.171424439522994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.45108333333333334, - "normalized_score": 15.452083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28665226063829785, - "normalized_score": 20.739140070921984 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4598289986506186 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder_bfloat16_ac6cb71ceac4efd32db3b546cf7f68bf28545069_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", - "sha": "ac6cb71ceac4efd32db3b546cf7f68bf28545069", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.644667648469394, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4923702442851634, - "normalized_score": 49.237024428516335 - }, - "bbh": { - "name": "BBH", - "value": 0.46376531085099754, - "normalized_score": 24.689255801381034 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.5624583333333334, - "normalized_score": 32.37395833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28897938829787234, - "normalized_score": 20.997709810874703 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2062289710968703 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math_bfloat16_1ff4c958dfb9dbb35a6d387675c4aecea1f9f1be_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", - "sha": "1ff4c958dfb9dbb35a6d387675c4aecea1f9f1be", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.84226778022403, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5033112142448702, - "normalized_score": 50.33112142448702 - }, - "bbh": { - "name": "BBH", - "value": 0.4676503002757066, - "normalized_score": 25.173866858214165 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4325729166666667, - "normalized_score": 13.171614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29130651595744683, - "normalized_score": 21.256279550827426 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.45687559663339133 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster_bfloat16_67e38b8e52162e55ebc74fe94788acbca05a82bc_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", - "sha": "67e38b8e52162e55ebc74fe94788acbca05a82bc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.160821436103028, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5558429411738631, - "normalized_score": 55.584294117386314 - }, - "bbh": { - "name": "BBH", - "value": 0.47422312505675873, - "normalized_score": 26.599272622298702 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.45098958333333333, - "normalized_score": 15.807031250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2672041223404255, - "normalized_score": 18.578235815602834 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4462280679498404 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder_bfloat16_a7c5c1ff62187e88b153f65bfba62f41b4e968a4_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", - "sha": "a7c5c1ff62187e88b153f65bfba62f41b4e968a4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.71902736748913, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5449518388985669, - "normalized_score": 54.49518388985669 - }, - "bbh": { - "name": "BBH", - "value": 0.4650844324968853, - "normalized_score": 25.058233605951802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.43883333333333335, - "normalized_score": 13.954166666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27684507978723405, - "normalized_score": 19.64945330969267 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.442445862401488 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder_bfloat16_4517146312fb9e254f94aecaa12959586bf83631_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", - "sha": "4517146312fb9e254f94aecaa12959586bf83631", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.669991182750483, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5081572449988254, - "normalized_score": 50.815724499882535 - }, - "bbh": { - "name": "BBH", - "value": 0.47965526444811907, - "normalized_score": 27.13550121385457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.4338125, - "normalized_score": 13.593229166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28449135638297873, - "normalized_score": 20.499039598108748 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.45112726189485874 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student_float16_8a00eda106a9d4ff88071b3138bc99f2d102c052_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", - "sha": "8a00eda106a9d4ff88071b3138bc99f2d102c052", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.30674491645848, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6039530667517742, - "normalized_score": 60.395306675177416 - }, - "bbh": { - "name": "BBH", - "value": 0.49877449828070924, - "normalized_score": 29.306594525870576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.5397916666666667, - "normalized_score": 29.37395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30244348404255317, - "normalized_score": 22.49372044917257 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.46845559140932896 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1_float16_e5325f67abced31bf1e4c3922987146a18fff149_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", - "sha": "e5325f67abced31bf1e4c3922987146a18fff149", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.16205847224752, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.427323944910615, - "normalized_score": 42.7323944910615 - }, - "bbh": { - "name": "BBH", - "value": 0.47589342126093026, - "normalized_score": 26.46227536747539 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.4231770833333333, - "normalized_score": 11.763802083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2890625, - "normalized_score": 21.006944444444443 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.428180268927522 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2_float16_6875dcde7e374dcc93112527f783d7622f597939_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", - "sha": "6875dcde7e374dcc93112527f783d7622f597939", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.423303290574818, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5433782364127182, - "normalized_score": 54.337823641271825 - }, - "bbh": { - "name": "BBH", - "value": 0.4785559277736029, - "normalized_score": 27.24420156630025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.46953125, - "normalized_score": 19.12473958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29205452127659576, - "normalized_score": 21.339391252955085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.44896326833710487 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1_bfloat16_d3b16c67173258c060cc0df3e4065379a6d83eba_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", - "sha": "d3b16c67173258c060cc0df3e4065379a6d83eba", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.102174437145493, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5426036250482054, - "normalized_score": 54.26036250482054 - }, - "bbh": { - "name": "BBH", - "value": 0.4701061648636955, - "normalized_score": 25.310204381698416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.42013541666666665, - "normalized_score": 11.383593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28939494680851063, - "normalized_score": 21.043882978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-02-28", - "generation": 2, - "base_model": "LeroyDyer/_Spydaz_Web_AI_Mistral_R1_Base (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.43778639744459247 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002_bfloat16_f47113e6352f4df8c50e9e571fc85cd7a154a07f_False", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", - "sha": "f47113e6352f4df8c50e9e571fc85cd7a154a07f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 6.86090172693433, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21949538336059432, - "normalized_score": 21.949538336059433 - }, - "bbh": { - "name": "BBH", - "value": 0.3289070186514165, - "normalized_score": 6.349580156104774 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.34069791666666666, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13680186170212766, - "normalized_score": 4.089095744680851 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.306499756515286 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002_float16_9475af8113cf4027839974283b702d6be502f7fa_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", - "sha": "9475af8113cf4027839974283b702d6be502f7fa", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.715725326733474, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24122772022677608, - "normalized_score": 24.12277202267761 - }, - "bbh": { - "name": "BBH", - "value": 0.3106383598957094, - "normalized_score": 4.191974214495695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3623125, - "normalized_score": 2.7890625000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10945811170212766, - "normalized_score": 1.0509013002364058 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2816098695014941 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_float16_9f86dd12d4c75e0290aa3084a44cf111bc975144_False", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA", - "sha": "9f86dd12d4c75e0290aa3084a44cf111bc975144", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.115133515206828, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1414591062824417, - "normalized_score": 14.14591062824417 - }, - "bbh": { - "name": "BBH", - "value": 0.32359493837413505, - "normalized_score": 5.599561733978841 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3447291666666667, - "normalized_score": 2.5578125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14752327127659576, - "normalized_score": 5.280363475177306 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.164158799637762 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003_bfloat16__False", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", - "sha": "", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 6.2575600117500825, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22091938279321088, - "normalized_score": 22.09193827932109 - }, - "bbh": { - "name": "BBH", - "value": 0.3171811407815537, - "normalized_score": 4.293436202390652 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.38184375, - "normalized_score": 5.830468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11328125, - "normalized_score": 1.4756944444444438 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2596251679493742 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_TEMP__float16_4e2bdb05ac79c4d1777a7c30aa20f84f4feedc13_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_TEMP_", - "sha": "4e2bdb05ac79c4d1777a7c30aa20f84f4feedc13", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.348042495722837, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47953097780555587, - "normalized_score": 47.95309778055559 - }, - "bbh": { - "name": "BBH", - "value": 0.495695749059555, - "normalized_score": 28.61644015422753 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.42175, - "normalized_score": 11.652083333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3120844414893617, - "normalized_score": 23.56493794326241 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "LeroyDyer/_Spydaz_Web_AI_TEMP_ (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.46340542859468026 - } - }, - { - "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher__float16_b50bb1c7829f30bb580cf745f9607785555a0bfc_True", - "model": { - "name": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", - "sha": "b50bb1c7829f30bb580cf745f9607785555a0bfc", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.895686397389127, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44038817005545283, - "normalized_score": 44.03881700554528 - }, - "bbh": { - "name": "BBH", - "value": 0.48909617780536035, - "normalized_score": 27.7244640207969 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4366041666666667, - "normalized_score": 14.475520833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3149933510638298, - "normalized_score": 23.88815011820331 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.43395015038866985 - } - }, - { - "id": "LightningRodLabs/Flashlight-v1.0_float16_a15ce43233bcdbd15a7c3777467448f6fc26b211_True", - "model": { - "name": "LightningRodLabs/Flashlight-v1.0", - "sha": "a15ce43233bcdbd15a7c3777467448f6fc26b211", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.5748695700393, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6745446526327921, - "normalized_score": 67.4544652632792 - }, - "bbh": { - "name": "BBH", - "value": 0.6876833310149727, - "normalized_score": 55.15022389243257 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49697885196374625, - "normalized_score": 49.69788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.41009375, - "normalized_score": 9.92838541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5402260638297872, - "normalized_score": 48.91400709219858 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-18", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.8498306419905917 - } - }, - { - "id": "LightningRodLabs/Flashlight-v1.1_float16_5422435615348d6abaf1c5e30ad020285be8d2cf_True", - "model": { - "name": "LightningRodLabs/Flashlight-v1.1", - "sha": "5422435615348d6abaf1c5e30ad020285be8d2cf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 40.98595152297397, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6720967034136092, - "normalized_score": 67.20967034136092 - }, - "bbh": { - "name": "BBH", - "value": 0.6901141327534415, - "normalized_score": 55.432811255376855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5324773413897281, - "normalized_score": 53.24773413897282 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.4047604166666667, - "normalized_score": 8.995052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5415558510638298, - "normalized_score": 49.06176122931441 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "microsoft/phi-4", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.7236163211583977 - } - }, - { - "id": "LightningRodLabs/Flashlight-v1.2_float16_e231994ab93846ef1ddb273d829398b302a41351_True", - "model": { - "name": "LightningRodLabs/Flashlight-v1.2", - "sha": "e231994ab93846ef1ddb273d829398b302a41351", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.37861284160058, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4359920566319587, - "normalized_score": 43.599205663195875 - }, - "bbh": { - "name": "BBH", - "value": 0.3264526807518731, - "normalized_score": 6.159209487289303 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1555891238670695, - "normalized_score": 15.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23573825503355705, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.45536458333333335, - "normalized_score": 16.45390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24850398936170212, - "normalized_score": 16.500443262411345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.9192427357924062 - } - }, - { - "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1_bfloat16_fda2d7dd2d797726ebd34cee88095e0ae6b0b093_False", - "model": { - "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", - "sha": "fda2d7dd2d797726ebd34cee88095e0ae6b0b093", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.160448654695188, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5823459531820016, - "normalized_score": 58.23459531820015 - }, - "bbh": { - "name": "BBH", - "value": 0.4287069505821554, - "normalized_score": 19.53491130754225 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.43746875, - "normalized_score": 13.916927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2677859042553192, - "normalized_score": 18.642878250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-05", - "submission_date": "2024-11-07", - "generation": 1, - "base_model": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 3.6337057897117258 - } - }, - { - "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2_bfloat16_7e55d63df09ec396f39adcc426a91f2e74606bd0_False", - "model": { - "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", - "sha": "7e55d63df09ec396f39adcc426a91f2e74606bd0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.073953219725933, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5542693386880144, - "normalized_score": 55.42693386880145 - }, - "bbh": { - "name": "BBH", - "value": 0.43764741906109417, - "normalized_score": 20.197376640583055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.44816666666666666, - "normalized_score": 15.620833333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2744348404255319, - "normalized_score": 19.38164893617021 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-05", - "submission_date": "2024-11-05", - "generation": 1, - "base_model": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.579664968730134 - } - }, - { - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP_bfloat16_fdc2ac0da72ad62ecc9677cdac32dd097bc99c3a_False", - "model": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", - "sha": "fdc2ac0da72ad62ecc9677cdac32dd097bc99c3a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.51530852235753, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5577412376937636, - "normalized_score": 55.77412376937637 - }, - "bbh": { - "name": "BBH", - "value": 0.5556642048146725, - "normalized_score": 36.482570049394035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3632930513595166, - "normalized_score": 36.329305135951664 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.43960416666666663, - "normalized_score": 13.483854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45071476063829785, - "normalized_score": 38.968306737588655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "Lil-R/2_PRYMMAL-ECE-7B-SLERP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.360989849337451 - } - }, - { - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1_bfloat16_1f9b9683053a13b9f5c3863a6de53d9e14a2e6c5_False", - "model": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", - "sha": "1f9b9683053a13b9f5c3863a6de53d9e14a2e6c5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.733001542831197, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10733742026711349, - "normalized_score": 10.733742026711349 - }, - "bbh": { - "name": "BBH", - "value": 0.30525797550329686, - "normalized_score": 2.7840182309259673 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3910833333333333, - "normalized_score": 7.318749999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11236702127659574, - "normalized_score": 1.374113475177304 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.4991997392141676 - } - }, - { - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2_bfloat16_d633d064bcd8723da5b2337048cee1079e745766_False", - "model": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", - "sha": "d633d064bcd8723da5b2337048cee1079e745766", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.733001542831197, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10733742026711349, - "normalized_score": 10.733742026711349 - }, - "bbh": { - "name": "BBH", - "value": 0.30525797550329686, - "normalized_score": 2.7840182309259673 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3910833333333333, - "normalized_score": 7.318749999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11236702127659574, - "normalized_score": 1.374113475177304 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.550422871160006 - } - }, - { - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3_bfloat16_691d98e52b8136355cf3884a4c29968bf0fc6dcf_False", - "model": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", - "sha": "691d98e52b8136355cf3884a4c29968bf0fc6dcf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.878726613265554, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22346706738121516, - "normalized_score": 22.346706738121515 - }, - "bbh": { - "name": "BBH", - "value": 0.357839880712804, - "normalized_score": 10.612229209084205 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.4107083333333333, - "normalized_score": 9.738541666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18168218085106383, - "normalized_score": 9.075797872340424 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.5716017005737966 - } - }, - { - "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1_bfloat16_5770824fbfc2f9df22f6a1442e1392b029e333ec_False", - "model": { - "name": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", - "sha": "5770824fbfc2f9df22f6a1442e1392b029e333ec", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.88329630475093, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2874395492847866, - "normalized_score": 28.743954928478658 - }, - "bbh": { - "name": "BBH", - "value": 0.41904526564708194, - "normalized_score": 17.999676133564762 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.39743749999999994, - "normalized_score": 7.346354166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2925531914893617, - "normalized_score": 21.39479905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.2946189513567496 - } - }, - { - "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8_bfloat16_19fa915c941013075673c2943e2d06d131afcfef_False", - "model": { - "name": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", - "sha": "19fa915c941013075673c2943e2d06d131afcfef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.386229392038714, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1258471965495995, - "normalized_score": 12.58471965495995 - }, - "bbh": { - "name": "BBH", - "value": 0.2955092966258663, - "normalized_score": 2.270601109130521 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36314583333333333, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.5562215438928173 - } - }, - { - "id": "LilRg/10PRYMMAL-3B-slerp_bfloat16_3e0a12c2ec82e18136fc1cf1609c66154cff8a6e_False", - "model": { - "name": "LilRg/10PRYMMAL-3B-slerp", - "sha": "3e0a12c2ec82e18136fc1cf1609c66154cff8a6e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 21.087895754438843, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1945903535951276, - "normalized_score": 19.45903535951276 - }, - "bbh": { - "name": "BBH", - "value": 0.5320377091634505, - "normalized_score": 34.877917500461 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14954682779456194, - "normalized_score": 14.954682779456194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.45290625, - "normalized_score": 15.713281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3881316489361702, - "normalized_score": 32.014627659574465 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "LilRg/10PRYMMAL-3B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 4.117509941048346 - } - }, - { - "id": "LilRg/ECE-1B-merge-PRYMMAL_bfloat16_009c75039786c38e2a6168cf93c9a46a4d111fb9_False", - "model": { - "name": "LilRg/ECE-1B-merge-PRYMMAL", - "sha": "009c75039786c38e2a6168cf93c9a46a4d111fb9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.497652546900952, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27122811916825135, - "normalized_score": 27.122811916825135 - }, - "bbh": { - "name": "BBH", - "value": 0.42345600176908743, - "normalized_score": 19.141465000010825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3801041666666667, - "normalized_score": 5.279687499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2906416223404255, - "normalized_score": 21.1824024822695 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "LilRg/ECE-1B-merge-PRYMMAL (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.3807140737643455 - } - }, - { - "id": "LilRg/ECE_Finetunning_float16_8d10549bcf802355f2d6203a33ed27e81b15b9e5_False", - "model": { - "name": "LilRg/ECE_Finetunning", - "sha": "8d10549bcf802355f2d6203a33ed27e81b15b9e5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 11.987032259556209, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04453849120334047, - "normalized_score": 4.453849120334047 - }, - "bbh": { - "name": "BBH", - "value": 0.47321596790730514, - "normalized_score": 26.530834895216355 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.38394791666666667, - "normalized_score": 7.693489583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3191489361702128, - "normalized_score": 24.34988179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-28", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 16.061, - "co2_cost": 3.5403764871403727 - } - }, - { - "id": "LilRg/PRYMMAL-6B-slerp_bfloat16_1ce0f5fdaae6a7866eda77df18378e9b5621af65_False", - "model": { - "name": "LilRg/PRYMMAL-6B-slerp", - "sha": "1ce0f5fdaae6a7866eda77df18378e9b5621af65", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.23270592407536, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11533065599276586, - "normalized_score": 11.533065599276586 - }, - "bbh": { - "name": "BBH", - "value": 0.28676215692036117, - "normalized_score": 2.2124311744899985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36975, - "normalized_score": 4.452083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1107878989361702, - "normalized_score": 1.1986554373522447 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "LilRg/PRYMMAL-6B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.293, - "co2_cost": 0.6961630869331326 - } - }, - { - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3_bfloat16_742eee22ab39880acb8650b7290d420065d0514b_False", - "model": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", - "sha": "742eee22ab39880acb8650b7290d420065d0514b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.533946396700676, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12432346174816154, - "normalized_score": 12.432346174816153 - }, - "bbh": { - "name": "BBH", - "value": 0.2957239084980124, - "normalized_score": 2.2903233313527434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36714583333333334, - "normalized_score": 3.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "LilRg/PRYMMAL-ECE-7B-SLERP-V3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.562254215088475 - } - }, - { - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4_bfloat16_5a45274282197dcce0f22b442f65df14ac75f507_False", - "model": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", - "sha": "5a45274282197dcce0f22b442f65df14ac75f507", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.54393840309556, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12492298213185458, - "normalized_score": 12.492298213185457 - }, - "bbh": { - "name": "BBH", - "value": 0.2957239084980124, - "normalized_score": 2.2903233313527434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36714583333333334, - "normalized_score": 3.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "LilRg/PRYMMAL-ECE-7B-SLERP-V4 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.6114206257376 - } - }, - { - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5_bfloat16_63f1ed2c963e3cb78d6c6a89836e0712aa7c3a6f_False", - "model": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", - "sha": "63f1ed2c963e3cb78d6c6a89836e0712aa7c3a6f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.54393840309556, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12492298213185458, - "normalized_score": 12.492298213185457 - }, - "bbh": { - "name": "BBH", - "value": 0.2957239084980124, - "normalized_score": 2.2903233313527434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36714583333333334, - "normalized_score": 3.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "LilRg/PRYMMAL-ECE-7B-SLERP-V5 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.5728503968169294 - } - }, - { - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6_bfloat16_92a8c865ef44974d0bafd22c1f991afe7889717b_False", - "model": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", - "sha": "92a8c865ef44974d0bafd22c1f991afe7889717b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.533946396700676, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12432346174816154, - "normalized_score": 12.432346174816153 - }, - "bbh": { - "name": "BBH", - "value": 0.2957239084980124, - "normalized_score": 2.2903233313527434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36714583333333334, - "normalized_score": 3.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "LilRg/PRYMMAL-ECE-7B-SLERP-V6 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.495682107974202 - } - }, - { - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7_bfloat16_834363d4b420f85ff1af920a68149240c580726c_False", - "model": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", - "sha": "834363d4b420f85ff1af920a68149240c580726c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.54393840309556, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12492298213185458, - "normalized_score": 12.492298213185457 - }, - "bbh": { - "name": "BBH", - "value": 0.2957239084980124, - "normalized_score": 2.2903233313527434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36714583333333334, - "normalized_score": 3.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "LilRg/PRYMMAL-ECE-7B-SLERP-V7 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.5492398159940417 - } - }, - { - "id": "LilRg/PRYMMAL-slerp-Merge_bfloat16_e5597549ceb5afe56428097cb297326537d07c3e_False", - "model": { - "name": "LilRg/PRYMMAL-slerp-Merge", - "sha": "e5597549ceb5afe56428097cb297326537d07c3e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.42765486276747, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.304400102838247, - "normalized_score": 30.440010283824698 - }, - "bbh": { - "name": "BBH", - "value": 0.5364156271768925, - "normalized_score": 35.553775523419816 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.46347916666666666, - "normalized_score": 17.2015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3863031914893617, - "normalized_score": 31.811465721040182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "LilRg/PRYMMAL-slerp-Merge (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 2.8350270896004477 - } - }, - { - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged_bfloat16_d4ec745f8279e3ac6d41709153c21cc077e66385_True", - "model": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", - "sha": "d4ec745f8279e3ac6d41709153c21cc077e66385", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.472907319238214, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6946280314011268, - "normalized_score": 69.46280314011267 - }, - "bbh": { - "name": "BBH", - "value": 0.48600920882996324, - "normalized_score": 26.655629407381003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3316145833333333, - "normalized_score": 2.218489583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3505651595744681, - "normalized_score": 27.840573286052013 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-08-28", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6532492673325214 - } - }, - { - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged_bfloat16_548a221b00d8056fe7090f5e6e0af58ee7c62563_True", - "model": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", - "sha": "548a221b00d8056fe7090f5e6e0af58ee7c62563", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.90871377207617, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6762933460994606, - "normalized_score": 67.62933460994606 - }, - "bbh": { - "name": "BBH", - "value": 0.4908161460506797, - "normalized_score": 27.02521610839599 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3356145833333333, - "normalized_score": 1.1518229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34956781914893614, - "normalized_score": 27.729757683215126 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6725621343744599 - } - }, - { - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged_bfloat16_e21e4932c56cebd3f9816bf083c1792cdccbe7a7_True", - "model": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", - "sha": "e21e4932c56cebd3f9816bf083c1792cdccbe7a7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.711489727026887, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6492406813920397, - "normalized_score": 64.92406813920398 - }, - "bbh": { - "name": "BBH", - "value": 0.48526582322240047, - "normalized_score": 26.37017668673992 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3607916666666667, - "normalized_score": 3.565625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3353557180851064, - "normalized_score": 26.150635342789595 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-08-28", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4646248117248357 - } - }, - { - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged_bfloat16_9c8939ccdc10beee56462eadbc16e28359a6d4c4_False", - "model": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", - "sha": "9c8939ccdc10beee56462eadbc16e28359a6d4c4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.61324636978146, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32108693821283085, - "normalized_score": 32.10869382128308 - }, - "bbh": { - "name": "BBH", - "value": 0.47387586084568856, - "normalized_score": 24.57473532012109 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.40692708333333333, - "normalized_score": 9.399218750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33527260638297873, - "normalized_score": 26.14140070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-06", - "submission_date": "2024-08-28", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7742841471854716 - } - }, - { - "id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged_bfloat16_911ffe6614d23bfc9cb7ece0cd3afd861a65d7f0_True", - "model": { - "name": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", - "sha": "911ffe6614d23bfc9cb7ece0cd3afd861a65d7f0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.317695413728828, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6490157227268093, - "normalized_score": 64.90157227268094 - }, - "bbh": { - "name": "BBH", - "value": 0.4694777854416285, - "normalized_score": 24.185738827978955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.37523958333333335, - "normalized_score": 6.038281250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33402593085106386, - "normalized_score": 26.002881205673763 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-31", - "submission_date": "2024-08-31", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6673737820627772 - } - }, - { - "id": "Locutusque/CollectiveLM-Falcon-3-7B_bfloat16_cd2fcfdf0d98ba4b60083fc48896261e1237a284_True", - "model": { - "name": "Locutusque/CollectiveLM-Falcon-3-7B", - "sha": "cd2fcfdf0d98ba4b60083fc48896261e1237a284", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.94962177916018, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3918281271470808, - "normalized_score": 39.182812714708085 - }, - "bbh": { - "name": "BBH", - "value": 0.5105131374222629, - "normalized_score": 30.787344558496812 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.3887291666666666, - "normalized_score": 6.957812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35987367021276595, - "normalized_score": 28.874852245862886 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.255770375940584 - } - }, - { - "id": "Locutusque/Hercules-6.0-Llama-3.1-8B_bfloat16_f35a95aeabf9f82bbd64bfc6fd0d857df750ee83_True", - "model": { - "name": "Locutusque/Hercules-6.0-Llama-3.1-8B", - "sha": "f35a95aeabf9f82bbd64bfc6fd0d857df750ee83", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.836013153195413, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6630041622893922, - "normalized_score": 66.30041622893921 - }, - "bbh": { - "name": "BBH", - "value": 0.48133037900119535, - "normalized_score": 26.63965184405082 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16691842900302115, - "normalized_score": 16.691842900302113 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.362125, - "normalized_score": 2.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3614527925531915, - "normalized_score": 29.050310283687946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2024-09-26", - "generation": 0, - "base_model": "Locutusque/Hercules-6.0-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.770286150299259 - } - }, - { - "id": "Locutusque/Hercules-6.1-Llama-3.1-8B_bfloat16_f4abf4385111b4acbea8bee2c6636ef84b2dac43_True", - "model": { - "name": "Locutusque/Hercules-6.1-Llama-3.1-8B", - "sha": "f4abf4385111b4acbea8bee2c6636ef84b2dac43", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.723248561634563, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6006806384836678, - "normalized_score": 60.068063848366776 - }, - "bbh": { - "name": "BBH", - "value": 0.46562423765034017, - "normalized_score": 24.151873375413786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.35533333333333333, - "normalized_score": 3.4166666666666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36685505319148937, - "normalized_score": 29.650561465721044 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "Locutusque/Hercules-6.1-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.9136060068848615 - } - }, - { - "id": "Locutusque/Llama-3-NeuralHercules-5.0-8B_bfloat16_2bbb675e592a1772f2389fe2d58a5b610d479d94_True", - "model": { - "name": "Locutusque/Llama-3-NeuralHercules-5.0-8B", - "sha": "2bbb675e592a1772f2389fe2d58a5b610d479d94", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.042475909115613, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4489310584803876, - "normalized_score": 44.89310584803876 - }, - "bbh": { - "name": "BBH", - "value": 0.3940474241916672, - "normalized_score": 16.34207153663529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3880729166666667, - "normalized_score": 6.7757812500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29330119680851063, - "normalized_score": 21.47791075650118 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Locutusque/Llama-3-NeuralHercules-5.0-8B", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.7814295450242073 - } - }, - { - "id": "Locutusque/Llama-3-Yggdrasil-2.0-8B_bfloat16_ec2329946ccc81a7c1ae36210728f717bc4f01d8_True", - "model": { - "name": "Locutusque/Llama-3-Yggdrasil-2.0-8B", - "sha": "ec2329946ccc81a7c1ae36210728f717bc4f01d8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.46019749438689, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5370583385417359, - "normalized_score": 53.70583385417359 - }, - "bbh": { - "name": "BBH", - "value": 0.47724551424666856, - "normalized_score": 26.922800957191782 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.39765625, - "normalized_score": 8.073697916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.316655585106383, - "normalized_score": 24.072842789598106 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-05", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "Locutusque/Llama-3-Yggdrasil-2.0-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.629881836380663 - } - }, - { - "id": "Locutusque/TinyMistral-248M-v2.5_float16_214e48aabc01235e25c67477898756f1bebef215_True", - "model": { - "name": "Locutusque/TinyMistral-248M-v2.5", - "sha": "214e48aabc01235e25c67477898756f1bebef215", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 4.035439468453137, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1336409615376091, - "normalized_score": 13.36409615376091 - }, - "bbh": { - "name": "BBH", - "value": 0.30385761123260785, - "normalized_score": 3.181881126755549 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.37815624999999997, - "normalized_score": 5.069531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11353058510638298, - "normalized_score": 1.5033983451536632 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-24", - "submission_date": "2024-09-17", - "generation": 0, - "base_model": "Locutusque/TinyMistral-248M-v2.5", - "hub_license": "apache-2.0", - "hub_hearts": 27, - "params_billions": 0.248, - "co2_cost": 0.4844288518969972 - } - }, - { - "id": "Luni/StarDust-12b-v1_bfloat16_91976b0c71dce1310f4a6139552e10a6149bdc31_True", - "model": { - "name": "Luni/StarDust-12b-v1", - "sha": "91976b0c71dce1310f4a6139552e10a6149bdc31", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.447979554409255, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5459259210007226, - "normalized_score": 54.59259210007225 - }, - "bbh": { - "name": "BBH", - "value": 0.5366139363101082, - "normalized_score": 34.44627563758498 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.43244791666666665, - "normalized_score": 13.755989583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34117353723404253, - "normalized_score": 26.797059692671393 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-29", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "Luni/StarDust-12b-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 12.248, - "co2_cost": 2.90526607847839 - } - }, - { - "id": "Luni/StarDust-12b-v2_bfloat16_75bffd7b86f37c2cebc4fdf83fbc3ab33d6c6e05_True", - "model": { - "name": "Luni/StarDust-12b-v2", - "sha": "75bffd7b86f37c2cebc4fdf83fbc3ab33d6c6e05", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.215076436734588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5628620947973599, - "normalized_score": 56.28620947973599 - }, - "bbh": { - "name": "BBH", - "value": 0.5419479534912178, - "normalized_score": 34.95288411454464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4338125, - "normalized_score": 14.259895833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3439162234042553, - "normalized_score": 27.101802600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-01", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "Luni/StarDust-12b-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 31, - "params_billions": 12.248, - "co2_cost": 3.0650490699095467 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3_bfloat16_2257ed2bb74b3b66465d7c57f1469c07953b9a8e_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", - "sha": "2257ed2bb74b3b66465d7c57f1469c07953b9a8e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.23204160839732, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7048697456083193, - "normalized_score": 70.48697456083192 - }, - "bbh": { - "name": "BBH", - "value": 0.6478481476573447, - "normalized_score": 49.31202888085861 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4161631419939577, - "normalized_score": 41.616314199395774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.48075, - "normalized_score": 19.59375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5393949468085106, - "normalized_score": 48.82166075650118 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.879799645775924 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4_float16_ae9277d471285c5ec52cf4a816ad1db91deacdf8_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", - "sha": "ae9277d471285c5ec52cf4a816ad1db91deacdf8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.28165983782925, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6943033373670748, - "normalized_score": 69.4303337367075 - }, - "bbh": { - "name": "BBH", - "value": 0.6419880364363972, - "normalized_score": 48.72079878457851 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3466767371601209, - "normalized_score": 34.66767371601209 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.476875, - "normalized_score": 19.409374999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5251828457446809, - "normalized_score": 47.242538416075654 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-19", - "generation": 3, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.9675471125913835 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5_float16_df87535c8f1bf955a5b682f88584fa3d473a104b_True", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", - "sha": "df87535c8f1bf955a5b682f88584fa3d473a104b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.96266321754947, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7485084021507378, - "normalized_score": 74.85084021507379 - }, - "bbh": { - "name": "BBH", - "value": 0.6466679318879384, - "normalized_score": 49.50507308185843 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43580060422960726, - "normalized_score": 43.58006042296073 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.4473020833333334, - "normalized_score": 16.846093749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5140458776595744, - "normalized_score": 46.00509751773049 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 5, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.61707672271528 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6_bfloat16_37c6300e2ad9a03042b28c70033bca0f7358ec41_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", - "sha": "37c6300e2ad9a03042b28c70033bca0f7358ec41", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.647194392786716, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.704320092909037, - "normalized_score": 70.4320092909037 - }, - "bbh": { - "name": "BBH", - "value": 0.6457646219275207, - "normalized_score": 49.104366248398456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3957703927492447, - "normalized_score": 39.577039274924466 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3775167785234899, - "normalized_score": 17.00223713646532 - }, - "musr": { - "name": "MUSR", - "value": 0.47678125, - "normalized_score": 18.964322916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5392287234042553, - "normalized_score": 48.8031914893617 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.9257024670705207 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt_bfloat16_05bf73eec1812d308f42cc250d4ea56894e013f9_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", - "sha": "05bf73eec1812d308f42cc250d4ea56894e013f9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.927489569706715, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46634152936430895, - "normalized_score": 46.634152936430894 - }, - "bbh": { - "name": "BBH", - "value": 0.6214839063250638, - "normalized_score": 44.89488240805749 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33157099697885195, - "normalized_score": 33.157099697885194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.49373958333333334, - "normalized_score": 21.38411458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5204454787234043, - "normalized_score": 46.716164302600475 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.968517364465306 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7_bfloat16_5f3b2fdab8fb45a506a323c04086a963ea6ca226_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", - "sha": "5f3b2fdab8fb45a506a323c04086a963ea6ca226", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.859469762785686, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6793906833867471, - "normalized_score": 67.93906833867472 - }, - "bbh": { - "name": "BBH", - "value": 0.653127892154805, - "normalized_score": 50.00470562826306 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.4833854166666667, - "normalized_score": 20.356510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5375664893617021, - "normalized_score": 48.6184988179669 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9439535309616702 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase_bfloat16_b24100c4bfa90e793706f4af7fb475da86b94566_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", - "sha": "b24100c4bfa90e793706f4af7fb475da86b94566", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.62401757502077, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.693054428915278, - "normalized_score": 69.3054428915278 - }, - "bbh": { - "name": "BBH", - "value": 0.6422587980411637, - "normalized_score": 48.854078526667685 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3406344410876133, - "normalized_score": 34.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.48881250000000004, - "normalized_score": 21.33489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5276761968085106, - "normalized_score": 47.519577423167846 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.905122732018174 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8_bfloat16_351171919b27332a7686cdafd9fe8a380f1f055e_True", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", - "sha": "351171919b27332a7686cdafd9fe8a380f1f055e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.7836905135922, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7874761189200211, - "normalized_score": 78.74761189200211 - }, - "bbh": { - "name": "BBH", - "value": 0.6419472828128271, - "normalized_score": 49.0341381962539 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5558912386706949, - "normalized_score": 55.58912386706949 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.43936458333333334, - "normalized_score": 15.18723958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5206117021276596, - "normalized_score": 46.734633569739955 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 5.303604837640059 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5_bfloat16_770fd3503a058619e186a59966a093c92a25475c_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", - "sha": "770fd3503a058619e186a59966a093c92a25475c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.22998867442242, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5928624937388352, - "normalized_score": 59.28624937388352 - }, - "bbh": { - "name": "BBH", - "value": 0.6451310724242122, - "normalized_score": 49.078225184099324 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36555891238670696, - "normalized_score": 36.5558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.47696875, - "normalized_score": 19.45442708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5290059840425532, - "normalized_score": 47.667331560283685 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9989400208093273 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6_bfloat16_bd6c996b6f7cd1f905e82064d7fc98612b2a5350_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", - "sha": "bd6c996b6f7cd1f905e82064d7fc98612b2a5350", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.62574720619401, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5919382793210903, - "normalized_score": 59.19382793210902 - }, - "bbh": { - "name": "BBH", - "value": 0.6457173605698173, - "normalized_score": 48.98525935245095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4070996978851964, - "normalized_score": 40.70996978851964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.49532291666666667, - "normalized_score": 22.082031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9912335913535633 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7_bfloat16_2ddb5cfe869eb6456ee9ea3cc19783db4ff7ab63_True", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", - "sha": "2ddb5cfe869eb6456ee9ea3cc19783db4ff7ab63", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.093410973736184, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7874761189200211, - "normalized_score": 78.74761189200211 - }, - "bbh": { - "name": "BBH", - "value": 0.6482757721443902, - "normalized_score": 49.91009317302106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.540785498489426, - "normalized_score": 54.0785498489426 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.4380625, - "normalized_score": 15.157812499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.524185505319149, - "normalized_score": 47.131722813238774 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.5632180211681876 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8_bfloat16_356ebbe36a74f620d78bbf5b2554c31131ba5248_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", - "sha": "356ebbe36a74f620d78bbf5b2554c31131ba5248", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.54655143545798, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7027963581075989, - "normalized_score": 70.27963581075988 - }, - "bbh": { - "name": "BBH", - "value": 0.6565626437486437, - "normalized_score": 50.74642495052259 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42371601208459214, - "normalized_score": 42.37160120845921 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.4911979166666667, - "normalized_score": 21.06640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5323304521276596, - "normalized_score": 48.03671690307329 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.7597076355480763 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9_bfloat16_91e8a2f67ea1e5b6f4719a451f7c3556340c6a8c_True", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", - "sha": "91e8a2f67ea1e5b6f4719a451f7c3556340c6a8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.523098714975845, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7993413032974729, - "normalized_score": 79.93413032974729 - }, - "bbh": { - "name": "BBH", - "value": 0.6483097746745584, - "normalized_score": 49.946869654224066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5370090634441088, - "normalized_score": 53.70090634441088 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.43282291666666667, - "normalized_score": 14.269531249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5199468085106383, - "normalized_score": 46.660756501182036 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.4618021251597524 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9_bfloat16_8f12f3e6fcecc948f848c6f7df9361933f39996a_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", - "sha": "8f12f3e6fcecc948f848c6f7df9361933f39996a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.87089983109002, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.523519816309614, - "normalized_score": 52.351981630961404 - }, - "bbh": { - "name": "BBH", - "value": 0.6545588984302916, - "normalized_score": 50.25550159491318 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43655589123867067, - "normalized_score": 43.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.4805625, - "normalized_score": 19.3703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.542220744680851, - "normalized_score": 49.13563829787233 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9849218611245187 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock_bfloat16_5d8d588acc956b3bc575ae9d2b2b881ff60c13f7_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", - "sha": "5d8d588acc956b3bc575ae9d2b2b881ff60c13f7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.70544769121661, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6513639365771708, - "normalized_score": 65.13639365771708 - }, - "bbh": { - "name": "BBH", - "value": 0.6570671029574323, - "normalized_score": 50.620019436503945 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41842900302114805, - "normalized_score": 41.842900302114806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.4819583333333333, - "normalized_score": 19.711458333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5412234042553191, - "normalized_score": 49.02482269503546 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 2.0047170912565835 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1_bfloat16_185ba0627a43d0ed1d0838ca65e09c4a9da061e3_True", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", - "sha": "185ba0627a43d0ed1d0838ca65e09c4a9da061e3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.309072158690384, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8002655177152178, - "normalized_score": 80.02655177152178 - }, - "bbh": { - "name": "BBH", - "value": 0.6554749578648256, - "normalized_score": 50.73787805937095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5468277945619335, - "normalized_score": 54.68277945619335 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.43539583333333337, - "normalized_score": 14.757812499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5250997340425532, - "normalized_score": 47.23330378250591 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.6595945851512308 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2_bfloat16_90d3b5a25b5f7aec6c2bce2cf6150d2565324d4a_True", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", - "sha": "90d3b5a25b5f7aec6c2bce2cf6150d2565324d4a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.22504860283835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7862272104682243, - "normalized_score": 78.62272104682242 - }, - "bbh": { - "name": "BBH", - "value": 0.6537693501484436, - "normalized_score": 50.455174757059126 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5332326283987915, - "normalized_score": 53.32326283987915 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.43809375, - "normalized_score": 15.261718749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5283410904255319, - "normalized_score": 47.593454491725765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.6805573291447646 - } - }, - { - "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion_bfloat16_d07dd0ea8d735f36c9c28625682da4d71c7ef871_False", - "model": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", - "sha": "d07dd0ea8d735f36c9c28625682da4d71c7ef871", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.60963092380559, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6141947809589667, - "normalized_score": 61.419478095896665 - }, - "bbh": { - "name": "BBH", - "value": 0.6592166466793806, - "normalized_score": 50.991749442122575 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42749244712990936, - "normalized_score": 42.74924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.51215625, - "normalized_score": 23.952864583333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5238530585106383, - "normalized_score": 47.094784278959814 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.70382521675319 - } - }, - { - "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3_float16_35ab483f04afa763f36f978408f4f82e0379ee25_True", - "model": { - "name": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", - "sha": "35ab483f04afa763f36f978408f4f82e0379ee25", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.75346114088417, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7098155117310957, - "normalized_score": 70.98155117310957 - }, - "bbh": { - "name": "BBH", - "value": 0.4949521619329585, - "normalized_score": 27.83521213410715 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1903323262839879, - "normalized_score": 19.033232628398792 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.346125, - "normalized_score": 4.8989583333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36178523936170215, - "normalized_score": 29.08724881796691 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8314746833161262 - } - }, - { - "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04_bfloat16_59d93307c6f2cb7a29c593cbc7393122d502d1b1_True", - "model": { - "name": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", - "sha": "59d93307c6f2cb7a29c593cbc7393122d502d1b1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.647307771096619, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5773503193748144, - "normalized_score": 57.73503193748144 - }, - "bbh": { - "name": "BBH", - "value": 0.3515036874279285, - "normalized_score": 8.894408584162113 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.32355208333333335, - "normalized_score": 2.5440104166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18425864361702127, - "normalized_score": 9.362071513002363 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.9002666105733118 - } - }, - { - "id": "Lyte/Llama-3.2-3B-Overthinker_float16_0e7af37fb3381365905fc2df24811c0e6d2ba5b2_True", - "model": { - "name": "Lyte/Llama-3.2-3B-Overthinker", - "sha": "0e7af37fb3381365905fc2df24811c0e6d2ba5b2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.167473631942162, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6407975283359264, - "normalized_score": 64.07975283359264 - }, - "bbh": { - "name": "BBH", - "value": 0.4320093097952517, - "normalized_score": 20.095582226457154 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15634441087613293, - "normalized_score": 15.634441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.34190625, - "normalized_score": 3.9049479166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29853723404255317, - "normalized_score": 22.059692671394796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-17", - "submission_date": "2024-10-18", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 20, - "params_billions": 3.213, - "co2_cost": 1.4672793947890184 - } - }, - { - "id": "M4-ai/TinyMistral-248M-v3_bfloat16_fa23fe617768c671f0bbbff1edf4556cfe844167_False", - "model": { - "name": "M4-ai/TinyMistral-248M-v3", - "sha": "fa23fe617768c671f0bbbff1edf4556cfe844167", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 4.20563631018843, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16386631914431488, - "normalized_score": 16.386631914431486 - }, - "bbh": { - "name": "BBH", - "value": 0.2884549938995566, - "normalized_score": 1.7775539303863237 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3793333333333333, - "normalized_score": 5.150000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11319813829787234, - "normalized_score": 1.466459810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-05", - "submission_date": "2024-10-18", - "generation": 0, - "base_model": "M4-ai/TinyMistral-248M-v3", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 0.248, - "co2_cost": 0.4683672228415573 - } - }, - { - "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis_float16_7a9d848188a674302d64a865786d4508be19571a_False", - "model": { - "name": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "sha": "7a9d848188a674302d64a865786d4508be19571a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 3.9187386193794325, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08662903318749807, - "normalized_score": 8.662903318749805 - }, - "bbh": { - "name": "BBH", - "value": 0.305728612437881, - "normalized_score": 3.237774271047842 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888216, - "normalized_score": 1.0574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.40171874999999996, - "normalized_score": 8.614843750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11544215425531915, - "normalized_score": 1.7157949172576823 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.103112462385385 - } - }, - { - "id": "MLP-KTLim/llama-3-Korean-Bllossom-8B_bfloat16_8a738f9f622ffc2b0a4a6b81dabbca80406248bf_True", - "model": { - "name": "MLP-KTLim/llama-3-Korean-Bllossom-8B", - "sha": "8a738f9f622ffc2b0a4a6b81dabbca80406248bf", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.39691634614766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5112800702136997, - "normalized_score": 51.12800702136997 - }, - "bbh": { - "name": "BBH", - "value": 0.49004556470187666, - "normalized_score": 26.927527973055067 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3674583333333334, - "normalized_score": 3.6322916666666694 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.359375, - "normalized_score": 28.819444444444446 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-07-09", - "generation": 1, - "base_model": "MLP-KTLim/llama-3-Korean-Bllossom-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 319, - "params_billions": 8.03, - "co2_cost": 1.5494411286696896 - } - }, - { - "id": "MTSAIR/Cotype-Nano_bfloat16_91817ff717dd16d216304fa9d749e08fce2aa38d_True", - "model": { - "name": "MTSAIR/Cotype-Nano", - "sha": "91817ff717dd16d216304fa9d749e08fce2aa38d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.812756376126401, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3747922179816221, - "normalized_score": 37.479221798162214 - }, - "bbh": { - "name": "BBH", - "value": 0.3864940969601492, - "normalized_score": 14.446870023241223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3289166666666667, - "normalized_score": 2.114583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24767287234042554, - "normalized_score": 16.40809692671395 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-12-01", - "generation": 0, - "base_model": "MTSAIR/Cotype-Nano", - "hub_license": "other", - "hub_hearts": 50, - "params_billions": 1.544, - "co2_cost": 0.986576031111959 - } - }, - { - "id": "MTSAIR/MultiVerse_70B_bfloat16_063430cdc4d972a0884e3e3e3d45ea4afbdf71a2_False", - "model": { - "name": "MTSAIR/MultiVerse_70B", - "sha": "063430cdc4d972a0884e3e3e3d45ea4afbdf71a2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.24436452801163, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5249183278146429, - "normalized_score": 52.49183278146429 - }, - "bbh": { - "name": "BBH", - "value": 0.6183134284931178, - "normalized_score": 46.135898982415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.47398958333333335, - "normalized_score": 18.815364583333327 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48603723404255317, - "normalized_score": 42.89302600472813 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-25", - "submission_date": "2024-06-29", - "generation": 0, - "base_model": "MTSAIR/MultiVerse_70B", - "hub_license": "other", - "hub_hearts": 39, - "params_billions": 72.289, - "co2_cost": 27.203634766972748 - } - }, - { - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1_bfloat16_1ed587f54f70334f495efb9c027acb03e96fe24f_True", - "model": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", - "sha": "1ed587f54f70334f495efb9c027acb03e96fe24f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.954087586908154, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4361416596851908, - "normalized_score": 43.61416596851908 - }, - "bbh": { - "name": "BBH", - "value": 0.4615102744527366, - "normalized_score": 23.990124398411343 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.32773958333333336, - "normalized_score": 0.0 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2863198138297872, - "normalized_score": 20.702201536643024 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-06", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.6671382773267995 - } - }, - { - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3_bfloat16_d2578eb754d1c20efe604749296580f680950917_True", - "model": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", - "sha": "d2578eb754d1c20efe604749296580f680950917", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.55322537025943, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5063586838477463, - "normalized_score": 50.635868384774625 - }, - "bbh": { - "name": "BBH", - "value": 0.45715808996720547, - "normalized_score": 23.698815892387586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.34237500000000004, - "normalized_score": 0.3968749999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2902260638297872, - "normalized_score": 21.136229314420802 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-13", - "submission_date": "2024-08-06", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.7908402381549982 - } - }, - { - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1_bfloat16_a83ddac146fb2da1dd1bfa4069e336074d1439a8_True", - "model": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "sha": "a83ddac146fb2da1dd1bfa4069e336074d1439a8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.473094269110153, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4118117705465941, - "normalized_score": 41.181177054659415 - }, - "bbh": { - "name": "BBH", - "value": 0.4811441560714845, - "normalized_score": 26.69176089392447 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3046979166666667, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3006150265957447, - "normalized_score": 22.2905585106383 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2024-07-03", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 0.9068485399826427 - } - }, - { - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1_float16_a83ddac146fb2da1dd1bfa4069e336074d1439a8_True", - "model": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "sha": "a83ddac146fb2da1dd1bfa4069e336074d1439a8", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.484004626097455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4027192294223771, - "normalized_score": 40.27192294223771 - }, - "bbh": { - "name": "BBH", - "value": 0.47894081019705514, - "normalized_score": 26.289712088654472 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3086979166666666, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30011635638297873, - "normalized_score": 22.23515070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2024-07-03", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 2.7745806463451914 - } - }, - { - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3_bfloat16_7e420ddd6ff48bf213dcab2a9ddb7845b80dd1aa_True", - "model": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", - "sha": "7e420ddd6ff48bf213dcab2a9ddb7845b80dd1aa", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.40249479320385, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44970566984490046, - "normalized_score": 44.97056698449004 - }, - "bbh": { - "name": "BBH", - "value": 0.456960506522001, - "normalized_score": 24.311446807587018 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34060416666666665, - "normalized_score": 3.7421875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31341422872340424, - "normalized_score": 23.712692080378247 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-15", - "submission_date": "2024-08-06", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.4845792557938584 - } - }, - { - "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1_bfloat16_b191916912f0e76b2bdc93c46c0af590cc87e7ae_True", - "model": { - "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", - "sha": "b191916912f0e76b2bdc93c46c0af590cc87e7ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.975799245688165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47820671374176077, - "normalized_score": 47.82067137417607 - }, - "bbh": { - "name": "BBH", - "value": 0.4764157817799906, - "normalized_score": 26.13667696363235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3397395833333334, - "normalized_score": 1.8666666666666683 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29429853723404253, - "normalized_score": 21.588726359338057 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-23", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.749024212957996 - } - }, - { - "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1_bfloat16_dd34258a5f2bf7630b5a8e5662b050c60a088927_True", - "model": { - "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", - "sha": "dd34258a5f2bf7630b5a8e5662b050c60a088927", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.54685483954604, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4457838535086903, - "normalized_score": 44.57838535086903 - }, - "bbh": { - "name": "BBH", - "value": 0.46223963164680143, - "normalized_score": 24.04053735093787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.31406249999999997, - "normalized_score": 3.091145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32621343085106386, - "normalized_score": 25.134825650118202 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-09-17", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.4160822155654091 - } - }, - { - "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1_bfloat16_0b30eabc82a01fb42f44ba62c2dc81e1bd09cc04_True", - "model": { - "name": "Magpie-Align/MagpieLM-8B-Chat-v0.1", - "sha": "0b30eabc82a01fb42f44ba62c2dc81e1bd09cc04", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.026344312964895, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3700714105240761, - "normalized_score": 37.00714105240761 - }, - "bbh": { - "name": "BBH", - "value": 0.4172338260055306, - "normalized_score": 18.25580502860485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3500625, - "normalized_score": 2.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3194813829787234, - "normalized_score": 24.386820330969268 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 22, - "params_billions": 8.03, - "co2_cost": 1.4737531937094557 - } - }, - { - "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1_bfloat16_b91f605a511707cb3b7f0893a8ed80c77b32d5a8_True", - "model": { - "name": "Magpie-Align/MagpieLM-8B-SFT-v0.1", - "sha": "b91f605a511707cb3b7f0893a8ed80c77b32d5a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.78392950063439, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4720619068515982, - "normalized_score": 47.20619068515982 - }, - "bbh": { - "name": "BBH", - "value": 0.45528501595553356, - "normalized_score": 23.612313350177956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3648854166666667, - "normalized_score": 3.877343750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2989527925531915, - "normalized_score": 22.1058658392435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.6008421965008803 - } - }, - { - "id": "MagusCorp/grpo_lora_enem_llama3_7b_float16_a7782b3ab1954d78353985e1ae4a7cf24a651209_False", - "model": { - "name": "MagusCorp/grpo_lora_enem_llama3_7b", - "sha": "a7782b3ab1954d78353985e1ae4a7cf24a651209", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.634388075131113, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4723622211288271, - "normalized_score": 47.23622211288271 - }, - "bbh": { - "name": "BBH", - "value": 0.48014581980384746, - "normalized_score": 25.896379467976946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.397125, - "normalized_score": 7.973958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35738031914893614, - "normalized_score": 28.59781323877068 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-11", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.9166090008924255 - } - }, - { - "id": "ManoloPueblo/ContentCuisine_1-7B-slerp_float16_e811e880075a2945623040ee43e9a6972675ff2e_False", - "model": { - "name": "ManoloPueblo/ContentCuisine_1-7B-slerp", - "sha": "e811e880075a2945623040ee43e9a6972675ff2e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.05279849551743, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3907044419916932, - "normalized_score": 39.070444199169316 - }, - "bbh": { - "name": "BBH", - "value": 0.5188437309746964, - "normalized_score": 32.78974404613455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.46719791666666666, - "normalized_score": 17.26640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30535239361702127, - "normalized_score": 22.816932624113473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "ManoloPueblo/ContentCuisine_1-7B-slerp (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.999011673173047 - } - }, - { - "id": "ManoloPueblo/LLM_MERGE_CC2_float16_a39dcd4e8175c0e2ab9bda2c7a4f377b97549644_False", - "model": { - "name": "ManoloPueblo/LLM_MERGE_CC2", - "sha": "a39dcd4e8175c0e2ab9bda2c7a4f377b97549644", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.747367714260108, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3853087585384557, - "normalized_score": 38.53087585384557 - }, - "bbh": { - "name": "BBH", - "value": 0.5209367401710429, - "normalized_score": 33.241073524404655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.45929166666666665, - "normalized_score": 16.444791666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30319148936170215, - "normalized_score": 22.576832151300238 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-02", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "ManoloPueblo/LLM_MERGE_CC2", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.1470867378640077 - } - }, - { - "id": "ManoloPueblo/LLM_MERGE_CC3_float16_79d2bd3866e363b9e700f59cfc573b2bc9de2442_False", - "model": { - "name": "ManoloPueblo/LLM_MERGE_CC3", - "sha": "79d2bd3866e363b9e700f59cfc573b2bc9de2442", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.67864029208779, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3958751667797001, - "normalized_score": 39.58751667797001 - }, - "bbh": { - "name": "BBH", - "value": 0.5246290546274339, - "normalized_score": 33.23001780763082 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4671666666666667, - "normalized_score": 17.42916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3155751329787234, - "normalized_score": 23.95279255319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-10", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "ManoloPueblo/LLM_MERGE_CC3", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.073860880696609 - } - }, - { - "id": "MarinaraSpaghetti/NemoReRemix-12B_bfloat16_9ebc7c2d4577b663fb050d86ed91fb676eb2e1f2_False", - "model": { - "name": "MarinaraSpaghetti/NemoReRemix-12B", - "sha": "9ebc7c2d4577b663fb050d86ed91fb676eb2e1f2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.03458082808498, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33425089872649016, - "normalized_score": 33.42508987264902 - }, - "bbh": { - "name": "BBH", - "value": 0.5536511805668158, - "normalized_score": 36.12470152357596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4501458333333333, - "normalized_score": 15.668229166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3597905585106383, - "normalized_score": 28.865617612293136 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-14", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "MarinaraSpaghetti/NemoReRemix-12B (Merge)", - "hub_license": "", - "hub_hearts": 27, - "params_billions": 12.248, - "co2_cost": 3.1540150968180765 - } - }, - { - "id": "MarinaraSpaghetti/Nemomix-v4.0-12B_bfloat16_69fbd8449ce3e916fc257e982a78189308123074_True", - "model": { - "name": "MarinaraSpaghetti/Nemomix-v4.0-12B", - "sha": "69fbd8449ce3e916fc257e982a78189308123074", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.467976638019234, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5574664113441224, - "normalized_score": 55.74664113441224 - }, - "bbh": { - "name": "BBH", - "value": 0.5274986611124783, - "normalized_score": 32.879942700903165 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.42444791666666665, - "normalized_score": 12.75598958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36128656914893614, - "normalized_score": 29.031841016548455 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-30", - "submission_date": "2024-08-02", - "generation": 1, - "base_model": "MarinaraSpaghetti/Nemomix-v4.0-12B (Merge)", - "hub_license": "", - "hub_hearts": 26, - "params_billions": 12.248, - "co2_cost": 2.7090968927914263 - } - }, - { - "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial_bfloat16_df21939a22e7233ebb7d62dfaf1c854facc5c772_False", - "model": { - "name": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", - "sha": "df21939a22e7233ebb7d62dfaf1c854facc5c772", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 12.494620122547147, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25484159807089635, - "normalized_score": 25.484159807089632 - }, - "bbh": { - "name": "BBH", - "value": 0.3952730330493959, - "normalized_score": 15.297499289020854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.40832291666666665, - "normalized_score": 9.273697916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22739361702127658, - "normalized_score": 14.154846335697396 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-10-06", - "generation": 1, - "base_model": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 2.614, - "co2_cost": 2.9055799546244083 - } - }, - { - "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial_bfloat16_0787682e65f7763ef978c4cf2e32803be8b49298_False", - "model": { - "name": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", - "sha": "0787682e65f7763ef978c4cf2e32803be8b49298", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.081989145779028, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2794961812435449, - "normalized_score": 27.949618124354487 - }, - "bbh": { - "name": "BBH", - "value": 0.42301343044108936, - "normalized_score": 19.019948525917457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.38673958333333336, - "normalized_score": 6.509114583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2922207446808511, - "normalized_score": 21.35786052009456 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.3783159566860677 - } - }, - { - "id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial_bfloat16_9cb9e74d2a65abd6458dffac103ad99c3b8f5154_False", - "model": { - "name": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", - "sha": "9cb9e74d2a65abd6458dffac103ad99c3b8f5154", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 6.761759514131008, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16973629968483622, - "normalized_score": 16.973629968483625 - }, - "bbh": { - "name": "BBH", - "value": 0.3464368053320647, - "normalized_score": 8.870227428732667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3990833333333333, - "normalized_score": 7.8520833333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13788231382978725, - "normalized_score": 4.209145981087471 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 24.16, - "co2_cost": 3.8130180920348336 - } - }, - { - "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial_bfloat16_42992194a835a6fcad1edf1f94527ac08a7a60fb_False", - "model": { - "name": "Marsouuu/general3B-ECE-PRYMMAL-Martial", - "sha": "42992194a835a6fcad1edf1f94527ac08a7a60fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 22.978904737174783, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27222658102722996, - "normalized_score": 27.222658102722995 - }, - "bbh": { - "name": "BBH", - "value": 0.5394350977017502, - "normalized_score": 35.70087336193955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.4700520833333333, - "normalized_score": 18.223177083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38763297872340424, - "normalized_score": 31.959219858156025 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "Marsouuu/general3B-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.4488851959454079 - } - }, - { - "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial_float16_c6c5b3b0ecf9d04fc3a35bc4135df7cc08be3eb9_False", - "model": { - "name": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", - "sha": "c6c5b3b0ecf9d04fc3a35bc4135df7cc08be3eb9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.917859287211872, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5692817280371636, - "normalized_score": 56.928172803716365 - }, - "bbh": { - "name": "BBH", - "value": 0.5636569831901026, - "normalized_score": 37.66776306891555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36706948640483383, - "normalized_score": 36.70694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.43960416666666663, - "normalized_score": 13.28385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4498005319148936, - "normalized_score": 38.86672576832151 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.434263453665052 - } - }, - { - "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial_bfloat16_907a62bb805596e2105c9dca28c0e9ed1e9fd402_False", - "model": { - "name": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", - "sha": "907a62bb805596e2105c9dca28c0e9ed1e9fd402", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.081989145779028, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2794961812435449, - "normalized_score": 27.949618124354487 - }, - "bbh": { - "name": "BBH", - "value": 0.42301343044108936, - "normalized_score": 19.019948525917457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.38673958333333336, - "normalized_score": 6.509114583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2922207446808511, - "normalized_score": 21.35786052009456 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.2657841773350142 - } - }, - { - "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial_bfloat16_2c8be0ac28ae27dd441298e83f19e17409d89f4e_False", - "model": { - "name": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", - "sha": "2c8be0ac28ae27dd441298e83f19e17409d89f4e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.942055269453448, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33032908239028, - "normalized_score": 33.032908239028 - }, - "bbh": { - "name": "BBH", - "value": 0.5453325807578268, - "normalized_score": 36.35072191454026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.47246875, - "normalized_score": 18.39192708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37666223404255317, - "normalized_score": 30.740248226950357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 0.9819892075314531 - } - }, - { - "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial_float16_ff92a6f314c392085af6c85f60a7da745e064653_False", - "model": { - "name": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", - "sha": "ff92a6f314c392085af6c85f60a7da745e064653", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.112665840964375, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5753267995585047, - "normalized_score": 57.532679955850476 - }, - "bbh": { - "name": "BBH", - "value": 0.562336014537904, - "normalized_score": 37.471640114731564 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36555891238670696, - "normalized_score": 36.5558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.4369375, - "normalized_score": 12.817187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45113031914893614, - "normalized_score": 39.01447990543734 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3314888159040754 - } - }, - { - "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1_bfloat16_e2fab90eef37977002947684043f139a1660f519_False", - "model": { - "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", - "sha": "e2fab90eef37977002947684043f139a1660f519", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.023903328526014, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4315205875964663, - "normalized_score": 43.15205875964662 - }, - "bbh": { - "name": "BBH", - "value": 0.5102819889174134, - "normalized_score": 31.26187805626151 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4198854166666666, - "normalized_score": 10.619010416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3056848404255319, - "normalized_score": 22.853871158392433 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-17", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 24.154, - "co2_cost": 2.7219662057105984 - } - }, - { - "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2_bfloat16_ffef41baf94b3f88b30cf0aeb3fd72d9e4187161_False", - "model": { - "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", - "sha": "ffef41baf94b3f88b30cf0aeb3fd72d9e4187161", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.176360946073107, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.429447200095746, - "normalized_score": 42.944720009574596 - }, - "bbh": { - "name": "BBH", - "value": 0.5110766802558263, - "normalized_score": 31.396819621762447 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.43176041666666665, - "normalized_score": 12.536718750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30576795212765956, - "normalized_score": 22.86310579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-17", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 24.154, - "co2_cost": 2.83142235775091 - } - }, - { - "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1_bfloat16_6db1cb4256525fc5429734ddc0eb941d08d0be30_True", - "model": { - "name": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", - "sha": "6db1cb4256525fc5429734ddc0eb941d08d0be30", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.33391343739024, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47143800671108216, - "normalized_score": 47.14380067110822 - }, - "bbh": { - "name": "BBH", - "value": 0.5366257615951637, - "normalized_score": 32.71291726367119 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4433020833333334, - "normalized_score": 15.312760416666672 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4617686170212766, - "normalized_score": 40.196513002364064 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-14", - "submission_date": "2024-06-26", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 70.554, - "co2_cost": 22.52797197482258 - } - }, - { - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10_bfloat16_4411eb9f6f5e4c462a6bdbc64c26dcc123100b66_True", - "model": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", - "sha": "4411eb9f6f5e4c462a6bdbc64c26dcc123100b66", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.797403388133414, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7667433520835827, - "normalized_score": 76.67433520835827 - }, - "bbh": { - "name": "BBH", - "value": 0.4924311866686311, - "normalized_score": 27.924674302120888 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.42143749999999996, - "normalized_score": 10.813020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38622007978723405, - "normalized_score": 31.80223108747045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-06-26", - "generation": 4, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "other", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 2.284261240148562 - } - }, - { - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8_bfloat16_94d222b8447b600b9836da4036df9490b59fe966_True", - "model": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", - "sha": "94d222b8447b600b9836da4036df9490b59fe966", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.888884373724007, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527549125209998, - "normalized_score": 75.27549125209998 - }, - "bbh": { - "name": "BBH", - "value": 0.49627836815949883, - "normalized_score": 28.270418759783485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.42019791666666667, - "normalized_score": 10.924739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3853058510638298, - "normalized_score": 31.700650118203306 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-01", - "submission_date": "2024-07-11", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 3.6248466169697076 - } - }, - { - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9_bfloat16_ddf91fdc0a3ab5e5d76864f1c4cf44e5adacd565_True", - "model": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", - "sha": "ddf91fdc0a3ab5e5d76864f1c4cf44e5adacd565", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.786644392116216, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.763046494412603, - "normalized_score": 76.3046494412603 - }, - "bbh": { - "name": "BBH", - "value": 0.4936132794870085, - "normalized_score": 27.903013285410182 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4148020833333333, - "normalized_score": 9.850260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3845578457446808, - "normalized_score": 31.61753841607564 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-30", - "submission_date": "2024-08-06", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "other", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.5327136470619735 - } - }, - { - "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow_bfloat16_191cf0630b7b50fe1fc9be198e1f203935df1428_True", - "model": { - "name": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", - "sha": "191cf0630b7b50fe1fc9be198e1f203935df1428", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 12.325434589632001, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29543278501043896, - "normalized_score": 29.5432785010439 - }, - "bbh": { - "name": "BBH", - "value": 0.3920071454890602, - "normalized_score": 15.47343942401254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.35021875, - "normalized_score": 2.010677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23803191489361702, - "normalized_score": 15.336879432624112 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-30", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "Qwen/Qwen1.5-MoE-A2.7B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.316, - "co2_cost": 16.612165900607092 - } - }, - { - "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1_bfloat16_5123ecd76cefd4ef3b6009542b13e060d03e5232_False", - "model": { - "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", - "sha": "5123ecd76cefd4ef3b6009542b13e060d03e5232", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.98150895877193, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33522498082864577, - "normalized_score": 33.52249808286457 - }, - "bbh": { - "name": "BBH", - "value": 0.5123061019250074, - "normalized_score": 31.92360727430821 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.44347916666666665, - "normalized_score": 13.868229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3857214095744681, - "normalized_score": 31.746823286052013 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-07-07", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.9071985413619625 - } - }, - { - "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8_bfloat16_a6f9d0e11efcba18c905554ab43b877ead187a77_False", - "model": { - "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", - "sha": "a6f9d0e11efcba18c905554ab43b877ead187a77", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.558137588276004, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27747266142723526, - "normalized_score": 27.74726614272353 - }, - "bbh": { - "name": "BBH", - "value": 0.4637108491317945, - "normalized_score": 25.532524528361474 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17673716012084592, - "normalized_score": 17.673716012084594 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4293125, - "normalized_score": 12.0640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3566323138297872, - "normalized_score": 28.514701536643027 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-07-07", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.616, - "co2_cost": 2.668212714788885 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-llama3.1-70b_bfloat16_f39ad1c90b0f30379e80756d29c6533cf84c362a_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-llama3.1-70b", - "sha": "f39ad1c90b0f30379e80756d29c6533cf84c362a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.936032608772955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8434298771703524, - "normalized_score": 84.34298771703524 - }, - "bbh": { - "name": "BBH", - "value": 0.644755327496552, - "normalized_score": 48.55364600487639 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.43803125000000004, - "normalized_score": 13.720572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5282579787234043, - "normalized_score": 47.58421985815603 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-23", - "submission_date": "2024-07-24", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 30.909679147607232 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-phi3-4b_bfloat16_6764c79badacba5fa3584d2d2593d762caa1d17d_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-phi3-4b", - "sha": "6764c79badacba5fa3584d2d2593d762caa1d17d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.985365252885945, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.552520645221346, - "normalized_score": 55.2520645221346 - }, - "bbh": { - "name": "BBH", - "value": 0.5595320442699866, - "normalized_score": 38.1242795228128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.40153124999999995, - "normalized_score": 8.258072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3745844414893617, - "normalized_score": 30.50938238770685 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-09", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "microsoft/Phi-3-mini-4k-instruct", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 3.821, - "co2_cost": 1.5049372475909382 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-phi3.5-4b_bfloat16_583b7f382a8ed35f6f7c09f2950f0f2346945a83_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-phi3.5-4b", - "sha": "583b7f382a8ed35f6f7c09f2950f0f2346945a83", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 28.000378443622285, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5659095644002359, - "normalized_score": 56.59095644002358 - }, - "bbh": { - "name": "BBH", - "value": 0.5483695590203843, - "normalized_score": 36.110096929573025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2039274924471299, - "normalized_score": 20.39274924471299 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.3994583333333333, - "normalized_score": 9.765624999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3935339095744681, - "normalized_score": 32.61487884160757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-08-23", - "generation": 1, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 3.821, - "co2_cost": 2.0091025057578364 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-qwen2-72b_bfloat16_0369c39770f45f2464587918f2dbdb8449ea3a0d_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-qwen2-72b", - "sha": "0369c39770f45f2464587918f2dbdb8449ea3a0d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.39894388647036, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8162774770941104, - "normalized_score": 81.62774770941103 - }, - "bbh": { - "name": "BBH", - "value": 0.6965560971922596, - "normalized_score": 57.3258823447103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4078549848942598, - "normalized_score": 40.78549848942598 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.47321875, - "normalized_score": 20.15234375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5414727393617021, - "normalized_score": 49.05252659574468 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 2, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 28, - "params_billions": 72.699, - "co2_cost": 26.269742406950456 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-qwen2-7b_bfloat16_5aac57e2290f7c49af88a9cb9883ce25b58882a1_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-qwen2-7b", - "sha": "5aac57e2290f7c49af88a9cb9883ce25b58882a1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.5418799100886, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3816119008674761, - "normalized_score": 38.16119008674761 - }, - "bbh": { - "name": "BBH", - "value": 0.5045925887362795, - "normalized_score": 31.00709744702013 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2311178247734139, - "normalized_score": 23.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.44369791666666664, - "normalized_score": 13.795572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3692652925531915, - "normalized_score": 29.918365839243506 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.8685104076670456 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b_bfloat16_eb6c92dec932070ea872f39469ca5b9daf2d34e6_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", - "sha": "eb6c92dec932070ea872f39469ca5b9daf2d34e6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.85672202584548, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8662360315075112, - "normalized_score": 86.62360315075111 - }, - "bbh": { - "name": "BBH", - "value": 0.7261624327092416, - "normalized_score": 61.65570318314716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5913897280966768, - "normalized_score": 59.13897280966768 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.42984375, - "normalized_score": 13.297135416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5619182180851063, - "normalized_score": 51.3242464539007 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 72.7, - "co2_cost": 29.497787277892904 - } - }, - { - "id": "MaziyarPanahi/calme-2.1-rys-78b_bfloat16_e746f5ddc0c9b31a2382d985a4ec87fa910847c7_True", - "model": { - "name": "MaziyarPanahi/calme-2.1-rys-78b", - "sha": "e746f5ddc0c9b31a2382d985a4ec87fa910847c7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.64399850290042, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8135547015252862, - "normalized_score": 81.35547015252862 - }, - "bbh": { - "name": "BBH", - "value": 0.7097861139530462, - "normalized_score": 59.4700307859535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3942598187311178, - "normalized_score": 39.42598187311178 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.4693125, - "normalized_score": 18.99739583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5443816489361702, - "normalized_score": 49.37573877068559 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-08", - "generation": 1, - "base_model": "dnhkng/RYS-XLarge", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 77.965, - "co2_cost": 28.66457680547633 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-llama3-70b_bfloat16_95366b974baedee4d95c1e841bc3d15e94753804_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-llama3-70b", - "sha": "95366b974baedee4d95c1e841bc3d15e94753804", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.14006369052438, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8208486814984242, - "normalized_score": 82.0848681498424 - }, - "bbh": { - "name": "BBH", - "value": 0.6435431762417703, - "normalized_score": 48.57170594999846 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2394259818731118, - "normalized_score": 23.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.4445729166666667, - "normalized_score": 15.30494791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5206948138297872, - "normalized_score": 46.74386820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-27", - "submission_date": "2024-06-26", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 17, - "params_billions": 70.554, - "co2_cost": 21.256546615022618 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-llama3.1-70b_bfloat16_c81ac05ed2c2344e9fd366cfff197da406ef5234_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-llama3.1-70b", - "sha": "c81ac05ed2c2344e9fd366cfff197da406ef5234", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.31100681386724, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8592667455684251, - "normalized_score": 85.92667455684251 - }, - "bbh": { - "name": "BBH", - "value": 0.6792920009427085, - "normalized_score": 54.20646208605566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43655589123867067, - "normalized_score": 43.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.45415625000000004, - "normalized_score": 17.06953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5414727393617021, - "normalized_score": 49.05252659574468 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-09", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 31.683647635659195 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-phi3-4b_bfloat16_c0a366a4c01d7e724ceba7e2f2c19251983423fe_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-phi3-4b", - "sha": "c0a366a4c01d7e724ceba7e2f2c19251983423fe", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.232640533631113, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5069083365470286, - "normalized_score": 50.69083365470286 - }, - "bbh": { - "name": "BBH", - "value": 0.5529604896487258, - "normalized_score": 37.733734155011526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.3975625, - "normalized_score": 7.695312500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3813996010638298, - "normalized_score": 31.26662234042553 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-10", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "microsoft/Phi-3-mini-4k-instruct", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 3.821, - "co2_cost": 1.5935829335350737 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-qwen2-72b_bfloat16_529e9bd80a76d943409bc92bb246aa7ca63dd9e6_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-qwen2-72b", - "sha": "529e9bd80a76d943409bc92bb246aa7ca63dd9e6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.09009576064759, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8008151704145002, - "normalized_score": 80.08151704145003 - }, - "bbh": { - "name": "BBH", - "value": 0.6939595229335245, - "normalized_score": 56.79594225047665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45317220543806647, - "normalized_score": 45.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.4508020833333333, - "normalized_score": 16.516927083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.543467420212766, - "normalized_score": 49.27415780141844 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-08-06", - "generation": 1, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 5, - "params_billions": 72.706, - "co2_cost": 27.03482915645056 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-qwen2-7b_bfloat16_bbb1d119f75c5b2eaa8978286808bd59cae04997_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-qwen2-7b", - "sha": "bbb1d119f75c5b2eaa8978286808bd59cae04997", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.58331943928233, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35972996094806226, - "normalized_score": 35.97299609480623 - }, - "bbh": { - "name": "BBH", - "value": 0.5214913750127922, - "normalized_score": 33.10936559556884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.43582291666666667, - "normalized_score": 13.277864583333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3898769946808511, - "normalized_score": 32.208554964539005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 3.0974991050428233 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b_bfloat16_c6c7fdf70d8bf81364108975eb8ba78eecac83d4_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", - "sha": "c6c7fdf70d8bf81364108975eb8ba78eecac83d4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.22457675508573, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8476763875406145, - "normalized_score": 84.76763875406145 - }, - "bbh": { - "name": "BBH", - "value": 0.7276399007138082, - "normalized_score": 61.80360419146786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5891238670694864, - "normalized_score": 58.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4206666666666667, - "normalized_score": 12.016666666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.561751994680851, - "normalized_score": 51.305777186761226 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 6, - "params_billions": 72.7, - "co2_cost": 28.5161275490216 - } - }, - { - "id": "MaziyarPanahi/calme-2.2-rys-78b_bfloat16_8d0dde25c9042705f65559446944a19259c3fc8e_True", - "model": { - "name": "MaziyarPanahi/calme-2.2-rys-78b", - "sha": "8d0dde25c9042705f65559446944a19259c3fc8e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.38633440385336, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7986420475449585, - "normalized_score": 79.86420475449586 - }, - "bbh": { - "name": "BBH", - "value": 0.7081014602379213, - "normalized_score": 59.268645675184494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4070996978851964, - "normalized_score": 40.70996978851964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40687919463087246, - "normalized_score": 20.917225950782996 - }, - "musr": { - "name": "MUSR", - "value": 0.45356250000000004, - "normalized_score": 16.82864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.538563829787234, - "normalized_score": 48.72931442080378 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-08", - "generation": 1, - "base_model": "dnhkng/RYS-XLarge", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 77.965, - "co2_cost": 27.046712502629788 - } - }, - { - "id": "MaziyarPanahi/calme-2.3-llama3-70b_bfloat16_bd17453eaae0e36d1e1e17da13fdd155fce91a29_True", - "model": { - "name": "MaziyarPanahi/calme-2.3-llama3-70b", - "sha": "bd17453eaae0e36d1e1e17da13fdd155fce91a29", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.067032265745496, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8010401290797307, - "normalized_score": 80.10401290797307 - }, - "bbh": { - "name": "BBH", - "value": 0.6399173489368603, - "normalized_score": 48.0085850617923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2326283987915408, - "normalized_score": 23.26283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.42612500000000003, - "normalized_score": 12.565625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5204454787234043, - "normalized_score": 46.716164302600475 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-27", - "submission_date": "2024-08-30", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 19.273618126297322 - } - }, - { - "id": "MaziyarPanahi/calme-2.3-llama3.1-70b_bfloat16_a39c79250721b75beefa1b1763895eafd010f6f6_True", - "model": { - "name": "MaziyarPanahi/calme-2.3-llama3.1-70b", - "sha": "a39c79250721b75beefa1b1763895eafd010f6f6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.275189943129554, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8604657863358112, - "normalized_score": 86.04657863358113 - }, - "bbh": { - "name": "BBH", - "value": 0.6871653740091753, - "normalized_score": 55.58549511699308 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39274924471299094, - "normalized_score": 39.274924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.45682291666666663, - "normalized_score": 17.736197916666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5363198138297872, - "normalized_score": 48.4799793144208 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-18", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 70.554, - "co2_cost": 28.12111574040212 - } - }, - { - "id": "MaziyarPanahi/calme-2.3-phi3-4b_bfloat16_e1f70c3724c728aadd1c7c1bb279487494f7059e_True", - "model": { - "name": "MaziyarPanahi/calme-2.3-phi3-4b", - "sha": "e1f70c3724c728aadd1c7c1bb279487494f7059e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.981612770364112, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49264507063480456, - "normalized_score": 49.26450706348045 - }, - "bbh": { - "name": "BBH", - "value": 0.5537867816134527, - "normalized_score": 37.65889241962552 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1472809667673716, - "normalized_score": 14.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.3988333333333333, - "normalized_score": 7.754166666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3828125, - "normalized_score": 31.42361111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-10", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "microsoft/Phi-3-mini-4k-instruct", - "hub_license": "mit", - "hub_hearts": 9, - "params_billions": 3.821, - "co2_cost": 1.6758616959307877 - } - }, - { - "id": "MaziyarPanahi/calme-2.3-qwen2-72b_bfloat16_12ff2e800f968e867a580c072905cf4671da066f_True", - "model": { - "name": "MaziyarPanahi/calme-2.3-qwen2-72b", - "sha": "12ff2e800f968e867a580c072905cf4671da066f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.00083092091415, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3849840645044039, - "normalized_score": 38.498406450440385 - }, - "bbh": { - "name": "BBH", - "value": 0.6576306700720502, - "normalized_score": 51.22830430718469 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31722054380664655, - "normalized_score": 31.722054380664655 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.4112395833333333, - "normalized_score": 11.23828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418882978723404, - "normalized_score": 49.09869976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 72.706, - "co2_cost": 38.89696999088852 - } - }, - { - "id": "MaziyarPanahi/calme-2.3-qwen2-7b_bfloat16_ca39e60052a600a709e03fefceabd9620e0b66d7_True", - "model": { - "name": "MaziyarPanahi/calme-2.3-qwen2-7b", - "sha": "ca39e60052a600a709e03fefceabd9620e0b66d7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.08170100435127, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3824862476008103, - "normalized_score": 38.24862476008103 - }, - "bbh": { - "name": "BBH", - "value": 0.5064049035932394, - "normalized_score": 30.95608211537095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20694864048338368, - "normalized_score": 20.694864048338367 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4422395833333333, - "normalized_score": 13.313281249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3611203457446808, - "normalized_score": 29.01337174940898 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 3.6603998269542943 - } - }, - { - "id": "MaziyarPanahi/calme-2.3-rys-78b_bfloat16_a8a4e55c2f7054d25c2f0ab3a3b3d806eb915180_True", - "model": { - "name": "MaziyarPanahi/calme-2.3-rys-78b", - "sha": "a8a4e55c2f7054d25c2f0ab3a3b3d806eb915180", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.55737382877077, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8065854155862002, - "normalized_score": 80.65854155862002 - }, - "bbh": { - "name": "BBH", - "value": 0.7107763314317289, - "normalized_score": 59.57454695904105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39803625377643503, - "normalized_score": 39.803625377643506 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40436241610738255, - "normalized_score": 20.581655480984338 - }, - "musr": { - "name": "MUSR", - "value": 0.45492708333333337, - "normalized_score": 16.999218749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5475398936170213, - "normalized_score": 49.7266548463357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "dnhkng/RYS-XLarge", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 77.965, - "co2_cost": 26.597219222694612 - } - }, - { - "id": "MaziyarPanahi/calme-2.4-llama3-70b_bfloat16_cb03e4d810b82d86e7cb01ab146bade09a5d06d1_True", - "model": { - "name": "MaziyarPanahi/calme-2.4-llama3-70b", - "sha": "cb03e4d810b82d86e7cb01ab146bade09a5d06d1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.486225248764676, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5027371817887649, - "normalized_score": 50.27371817887649 - }, - "bbh": { - "name": "BBH", - "value": 0.6418191966839487, - "normalized_score": 48.39776612820646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24471299093655588, - "normalized_score": 24.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.4287916666666667, - "normalized_score": 13.098958333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5203623670212766, - "normalized_score": 46.706929669030735 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-28", - "submission_date": "2024-06-26", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 14, - "params_billions": 70.554, - "co2_cost": 35.487516640485715 - } - }, - { - "id": "MaziyarPanahi/calme-2.4-qwen2-7b_bfloat16_d683c3ef1feb13e92227f5fd92fe5bc4b55ea4a2_True", - "model": { - "name": "MaziyarPanahi/calme-2.4-qwen2-7b", - "sha": "d683c3ef1feb13e92227f5fd92fe5bc4b55ea4a2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.85144077010553, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32995452067181746, - "normalized_score": 32.995452067181745 - }, - "bbh": { - "name": "BBH", - "value": 0.5101416326251771, - "normalized_score": 31.818265642234802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.44528125, - "normalized_score": 14.426822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3976894946808511, - "normalized_score": 33.076610520094555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 3.2369807670677138 - } - }, - { - "id": "MaziyarPanahi/calme-2.4-rys-78b_bfloat16_0a35e51ffa9efa644c11816a2d56434804177acb_True", - "model": { - "name": "MaziyarPanahi/calme-2.4-rys-78b", - "sha": "0a35e51ffa9efa644c11816a2d56434804177acb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 50.76504719022304, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8010899967641414, - "normalized_score": 80.10899967641413 - }, - "bbh": { - "name": "BBH", - "value": 0.7279510956242796, - "normalized_score": 62.15654929467119 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4070996978851964, - "normalized_score": 40.70996978851964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40268456375838924, - "normalized_score": 20.3579418344519 - }, - "musr": { - "name": "MUSR", - "value": 0.5770624999999999, - "normalized_score": 34.56614583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.7002160904255319, - "normalized_score": 66.690676713948 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-09-03", - "generation": 2, - "base_model": "dnhkng/RYS-XLarge", - "hub_license": "mit", - "hub_hearts": 46, - "params_billions": 77.965, - "co2_cost": 25.952656402536917 - } - }, - { - "id": "MaziyarPanahi/calme-2.5-qwen2-7b_bfloat16_20fb1afc22c0722cb2c57185fff59befeba0fbec_True", - "model": { - "name": "MaziyarPanahi/calme-2.5-qwen2-7b", - "sha": "20fb1afc22c0722cb2c57185fff59befeba0fbec", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.659538685051796, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31449221399220734, - "normalized_score": 31.449221399220733 - }, - "bbh": { - "name": "BBH", - "value": 0.4886561146965678, - "normalized_score": 28.28099517875505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2258308157099698, - "normalized_score": 22.58308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.45646875, - "normalized_score": 15.791927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3681848404255319, - "normalized_score": 29.79831560283688 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.7983382435944977 - } - }, - { - "id": "MaziyarPanahi/calme-2.6-qwen2-7b_bfloat16_ebfaae016a50f8922098a2a262ec3ca704504cae_True", - "model": { - "name": "MaziyarPanahi/calme-2.6-qwen2-7b", - "sha": "ebfaae016a50f8922098a2a262ec3ca704504cae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.23233085446529, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3442676542684522, - "normalized_score": 34.426765426845215 - }, - "bbh": { - "name": "BBH", - "value": 0.4930243946403894, - "normalized_score": 29.30841923308876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2843959731543625, - "normalized_score": 4.586129753914999 - }, - "musr": { - "name": "MUSR", - "value": 0.4586145833333333, - "normalized_score": 16.560156250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3731715425531915, - "normalized_score": 30.352393617021285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 3.2805416064206994 - } - }, - { - "id": "MaziyarPanahi/calme-2.7-qwen2-7b_bfloat16_edc11a1baccedc04a5a4576ee4910fd8922ad47f_True", - "model": { - "name": "MaziyarPanahi/calme-2.7-qwen2-7b", - "sha": "edc11a1baccedc04a5a4576ee4910fd8922ad47f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.355267151114735, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3592301759331906, - "normalized_score": 35.92301759331906 - }, - "bbh": { - "name": "BBH", - "value": 0.4883170901309997, - "normalized_score": 28.912244614673995 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13821752265861026, - "normalized_score": 13.821752265861026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.48242708333333334, - "normalized_score": 19.93671875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3705119680851064, - "normalized_score": 30.056885342789595 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 2.728560577007425 - } - }, - { - "id": "MaziyarPanahi/calme-3.1-baguette-3b_bfloat16_4601b18deed3931c33907ae98060898e787c7758_True", - "model": { - "name": "MaziyarPanahi/calme-3.1-baguette-3b", - "sha": "4601b18deed3931c33907ae98060898e787c7758", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.581601911367752, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6234369251364158, - "normalized_score": 62.343692513641585 - }, - "bbh": { - "name": "BBH", - "value": 0.46833341042911075, - "normalized_score": 25.50768075774144 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25604229607250756, - "normalized_score": 25.604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.40079166666666666, - "normalized_score": 8.565625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33992686170212766, - "normalized_score": 26.658540189125297 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.085, - "co2_cost": 1.4631794412606183 - } - }, - { - "id": "MaziyarPanahi/calme-3.1-instruct-3b_bfloat16_3bbd7f1f7949dd7c3679a29a781a95bd1085dc19_True", - "model": { - "name": "MaziyarPanahi/calme-3.1-instruct-3b", - "sha": "3bbd7f1f7949dd7c3679a29a781a95bd1085dc19", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.507090636089913, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43359397509718656, - "normalized_score": 43.35939750971866 - }, - "bbh": { - "name": "BBH", - "value": 0.4812730148043098, - "normalized_score": 27.309895959339894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.39520833333333333, - "normalized_score": 7.401041666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.355718085106383, - "normalized_score": 28.413120567375884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 3.085, - "co2_cost": 2.7873243394680145 - } - }, - { - "id": "MaziyarPanahi/calme-3.1-instruct-78b_bfloat16_7ccd7f1a55ae79af7969f721bb7055511cc6b986_True", - "model": { - "name": "MaziyarPanahi/calme-3.1-instruct-78b", - "sha": "7ccd7f1a55ae79af7969f721bb7055511cc6b986", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 51.28748997363473, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8135547015252862, - "normalized_score": 81.35547015252862 - }, - "bbh": { - "name": "BBH", - "value": 0.7305154498840408, - "normalized_score": 62.40968270370106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39274924471299094, - "normalized_score": 39.274924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3959731543624161, - "normalized_score": 19.463087248322143 - }, - "musr": { - "name": "MUSR", - "value": 0.5890624999999999, - "normalized_score": 36.49947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.718500664893617, - "normalized_score": 68.72229609929079 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "Removed", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 77.965, - "co2_cost": 64.4378892937895 - } - }, - { - "id": "MaziyarPanahi/calme-3.1-llamaloi-3b_bfloat16_62547548c06bb22f0b82c2bda7ac466507314a4b_True", - "model": { - "name": "MaziyarPanahi/calme-3.1-llamaloi-3b", - "sha": "62547548c06bb22f0b82c2bda7ac466507314a4b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.0933506270088, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7375175645066203, - "normalized_score": 73.75175645066203 - }, - "bbh": { - "name": "BBH", - "value": 0.4587340004998879, - "normalized_score": 23.7691655758483 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.35152083333333334, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3204787234042553, - "normalized_score": 24.497635933806144 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.7873980509685605 - } - }, - { - "id": "MaziyarPanahi/calme-3.2-baguette-3b_bfloat16_bba8e602432bd467b64cabf9cb62326893060e60_True", - "model": { - "name": "MaziyarPanahi/calme-3.2-baguette-3b", - "sha": "bba8e602432bd467b64cabf9cb62326893060e60", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.332491008465908, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6338282423968404, - "normalized_score": 63.38282423968404 - }, - "bbh": { - "name": "BBH", - "value": 0.470862269902714, - "normalized_score": 25.865746650731108 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2824773413897281, - "normalized_score": 28.247734138972806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.40209374999999997, - "normalized_score": 8.595052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3337765957446808, - "normalized_score": 25.975177304964536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.085, - "co2_cost": 1.5520249950030056 - } - }, - { - "id": "MaziyarPanahi/calme-3.2-instruct-3b_bfloat16_12347f5991157e752de6ba9f773a1bbc22445e3a_True", - "model": { - "name": "MaziyarPanahi/calme-3.2-instruct-3b", - "sha": "12347f5991157e752de6ba9f773a1bbc22445e3a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.62035249644337, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5533196363426819, - "normalized_score": 55.33196363426819 - }, - "bbh": { - "name": "BBH", - "value": 0.4865641110376735, - "normalized_score": 27.976798242393084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21676737160120846, - "normalized_score": 21.676737160120847 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.40469791666666666, - "normalized_score": 8.787239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36527593085106386, - "normalized_score": 29.47510342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 3.086, - "co2_cost": 1.4868663383387295 - } - }, - { - "id": "MaziyarPanahi/calme-3.2-instruct-78b_bfloat16_731f4daf584f822f1393731ccff1d58c7f06b99e_True", - "model": { - "name": "MaziyarPanahi/calme-3.2-instruct-78b", - "sha": "731f4daf584f822f1393731ccff1d58c7f06b99e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 52.08138397879168, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8062607215521482, - "normalized_score": 80.62607215521483 - }, - "bbh": { - "name": "BBH", - "value": 0.7318616272092674, - "normalized_score": 62.609443282901594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4033232628398791, - "normalized_score": 40.33232628398791 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40268456375838924, - "normalized_score": 20.3579418344519 - }, - "musr": { - "name": "MUSR", - "value": 0.6023645833333333, - "normalized_score": 38.52890624999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.7303025265957447, - "normalized_score": 70.03361406619385 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "Removed", - "hub_license": "other", - "hub_hearts": 112, - "params_billions": 77.965, - "co2_cost": 66.01113070973882 - } - }, - { - "id": "MaziyarPanahi/calme-3.3-baguette-3b_bfloat16_66f9438922503e5616b6b4488e96fd9342d5efb0_True", - "model": { - "name": "MaziyarPanahi/calme-3.3-baguette-3b", - "sha": "66f9438922503e5616b6b4488e96fd9342d5efb0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.407100524692495, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6359514975819713, - "normalized_score": 63.59514975819714 - }, - "bbh": { - "name": "BBH", - "value": 0.4678217295957521, - "normalized_score": 25.596594106096415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3806646525679758, - "normalized_score": 38.066465256797585 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.39282291666666663, - "normalized_score": 7.136197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3341921542553192, - "normalized_score": 26.021350472813243 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 1.4938458376125139 - } - }, - { - "id": "MaziyarPanahi/calme-3.3-instruct-3b_bfloat16_ea7d7fb442c981ecd44c5a9060ac6b062927f231_True", - "model": { - "name": "MaziyarPanahi/calme-3.3-instruct-3b", - "sha": "ea7d7fb442c981ecd44c5a9060ac6b062927f231", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.77891097790277, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6423212631373645, - "normalized_score": 64.23212631373644 - }, - "bbh": { - "name": "BBH", - "value": 0.46933409427688694, - "normalized_score": 25.682137818579093 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37386706948640486, - "normalized_score": 37.38670694864049 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.40742708333333333, - "normalized_score": 9.395052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33053523936170215, - "normalized_score": 25.615026595744684 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 3.086, - "co2_cost": 1.5050970304024172 - } - }, - { - "id": "Minami-su/Amara-o1-7B-Qwen_bfloat16_835395d4e693cd8cfb5143f12fae53673164846f_True", - "model": { - "name": "Minami-su/Amara-o1-7B-Qwen", - "sha": "835395d4e693cd8cfb5143f12fae53673164846f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.488976713679584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7389914316236474, - "normalized_score": 73.89914316236475 - }, - "bbh": { - "name": "BBH", - "value": 0.5199420077880453, - "normalized_score": 32.7968298710958 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5181268882175226, - "normalized_score": 51.81268882175226 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.40066666666666667, - "normalized_score": 8.35 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4083277925531915, - "normalized_score": 34.258643617021285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "Minami-su/Amara-o1-7B-Qwen", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2751190711213067 - } - }, - { - "id": "Minami-su/Amara-o2-7B-Qwen_bfloat16_16271c35b4e1b33dbf9da9c567a1cea0f9d5142b_True", - "model": { - "name": "Minami-su/Amara-o2-7B-Qwen", - "sha": "16271c35b4e1b33dbf9da9c567a1cea0f9d5142b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.034506834602485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7146615424850509, - "normalized_score": 71.46615424850509 - }, - "bbh": { - "name": "BBH", - "value": 0.5173432604435285, - "normalized_score": 31.79812727299554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4086102719033233, - "normalized_score": 40.86102719033233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.37809374999999995, - "normalized_score": 5.128385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41647273936170215, - "normalized_score": 35.163637706855795 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "Minami-su/Amara-o2-7B-Qwen", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 0.9848082871947184 - } - }, - { - "id": "Minami-su/test-7B-00_bfloat16_c8e5b7745c921b5020192f0b3a553c63725048f9_True", - "model": { - "name": "Minami-su/test-7B-00", - "sha": "c8e5b7745c921b5020192f0b3a553c63725048f9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.950834319546107, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6690492338107332, - "normalized_score": 66.90492338107333 - }, - "bbh": { - "name": "BBH", - "value": 0.44661237656101793, - "normalized_score": 21.48995001898129 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4516616314199396, - "normalized_score": 45.16616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.41260416666666666, - "normalized_score": 10.342187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3587932180851064, - "normalized_score": 28.75480200945626 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.5012082113522585 - } - }, - { - "id": "Minami-su/test-7B-01_bfloat16_9af628070dff4480252a4d4e5f07a9884e3f71d4_True", - "model": { - "name": "Minami-su/test-7B-01", - "sha": "9af628070dff4480252a4d4e5f07a9884e3f71d4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.08150931571436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6736204382150472, - "normalized_score": 67.36204382150471 - }, - "bbh": { - "name": "BBH", - "value": 0.4422359420239754, - "normalized_score": 20.824494266256867 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4554380664652568, - "normalized_score": 45.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41530208333333335, - "normalized_score": 10.979427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35355718085106386, - "normalized_score": 28.173020094562645 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.5290372610276757 - } - }, - { - "id": "Minami-su/test-v2-7B-00_bfloat16_fcba9bb21f9cf521dcd5d41749ccce77434fe4dc_True", - "model": { - "name": "Minami-su/test-v2-7B-00", - "sha": "fcba9bb21f9cf521dcd5d41749ccce77434fe4dc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.484350067576724, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6747197436136119, - "normalized_score": 67.47197436136119 - }, - "bbh": { - "name": "BBH", - "value": 0.4415989344595353, - "normalized_score": 21.19075482398095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4418429003021148, - "normalized_score": 44.18429003021148 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.41542708333333334, - "normalized_score": 10.995052083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3472406914893617, - "normalized_score": 27.47118794326241 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.5334495807759827 - } - }, - { - "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1_bfloat16_c7a837a34207b3b3b949f8d0344a66d3a3ad4255_True", - "model": { - "name": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", - "sha": "c7a837a34207b3b3b949f8d0344a66d3a3ad4255", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.430004377968435, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5268919799465418, - "normalized_score": 52.68919799465418 - }, - "bbh": { - "name": "BBH", - "value": 0.3252726665015006, - "normalized_score": 5.859172283470534 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3249166666666667, - "normalized_score": 1.0479166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17644614361702127, - "normalized_score": 8.494015957446807 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 5.453, - "co2_cost": 1.3218277469924238 - } - }, - { - "id": "ModelSpace/GemmaX2-28-9B-v0.1_float16_cdb5e2e66bd966cd475ef24f7a9eb61c55b25bf7_False", - "model": { - "name": "ModelSpace/GemmaX2-28-9B-v0.1", - "sha": "cdb5e2e66bd966cd475ef24f7a9eb61c55b25bf7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 5.991108905848541, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.003921816336210145, - "normalized_score": 0.3921816336210145 - }, - "bbh": { - "name": "BBH", - "value": 0.3687226427280163, - "normalized_score": 11.707676917585223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.35365625, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2230718085106383, - "normalized_score": 13.67464539007092 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "ModelSpace/GemmaX2-28-9B-v0.1 (Merge)", - "hub_license": "gemma", - "hub_hearts": 47, - "params_billions": 10.159, - "co2_cost": 1.0660588261144064 - } - }, - { - "id": "MoonRide/Llama-3.2-3B-Khelavaster_float16_30c3794ab0ff5351bcd7586da8b5384adc77c775_True", - "model": { - "name": "MoonRide/Llama-3.2-3B-Khelavaster", - "sha": "30c3794ab0ff5351bcd7586da8b5384adc77c775", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.144904682630184, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4924954675815725, - "normalized_score": 49.24954675815725 - }, - "bbh": { - "name": "BBH", - "value": 0.45156712929620335, - "normalized_score": 22.686343682382102 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.36990625000000005, - "normalized_score": 5.504947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31216755319148937, - "normalized_score": 23.57417257683215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "MoonRide/Llama-3.2-3B-Khelavaster (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.607, - "co2_cost": 0.5762180237117631 - } - }, - { - "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged_bfloat16_301ab532cd25a048a183768891b7cf095b61dd9f_True", - "model": { - "name": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", - "sha": "301ab532cd25a048a183768891b7cf095b61dd9f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.192716297399995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13206735905176042, - "normalized_score": 13.206735905176043 - }, - "bbh": { - "name": "BBH", - "value": 0.3003508901818665, - "normalized_score": 2.4675184428555217 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33815625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11311502659574468, - "normalized_score": 1.457225177304964 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.776372786246016 - } - }, - { - "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i_bfloat16_aed2a68b257f1c12bf75ae7d98f6ab2d235e1061_False", - "model": { - "name": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", - "sha": "aed2a68b257f1c12bf75ae7d98f6ab2d235e1061", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.58291861856009, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3834603297029659, - "normalized_score": 38.34603297029659 - }, - "bbh": { - "name": "BBH", - "value": 0.458548650453507, - "normalized_score": 23.792248519686385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.401375, - "normalized_score": 9.605208333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3068484042553192, - "normalized_score": 22.983156028368796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.1586250453943039 - } - }, - { - "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b_bfloat16_7254c57bd7b62a66572a5fed9ef9451fd81b7e9b_False", - "model": { - "name": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", - "sha": "7254c57bd7b62a66572a5fed9ef9451fd81b7e9b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.935345737822864, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34754008253655855, - "normalized_score": 34.754008253655854 - }, - "bbh": { - "name": "BBH", - "value": 0.4515254903058233, - "normalized_score": 22.941209992999802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.42788541666666663, - "normalized_score": 12.152343750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2565658244680851, - "normalized_score": 17.396202718676122 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.4513738415171142 - } - }, - { - "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B_float16_9f3f58da3fb4b1825c2b97effc421e7809c95848_True", - "model": { - "name": "MultivexAI/Gladiator-Mini-Exp-1211-3B", - "sha": "9f3f58da3fb4b1825c2b97effc421e7809c95848", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.27221013046616, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.68760887777763, - "normalized_score": 68.760887777763 - }, - "bbh": { - "name": "BBH", - "value": 0.44843752663028075, - "normalized_score": 22.116062493215253 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13746223564954682, - "normalized_score": 13.746223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.326, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3151595744680851, - "normalized_score": 23.90661938534279 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-11", - "generation": 1, - "base_model": "MultivexAI/Gladiator-Mini-Exp-1211-3B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1915628190823662 - } - }, - { - "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct_float16_1a3f3808cd5335fb71c88d3c2b681459c2420044_True", - "model": { - "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", - "sha": "1a3f3808cd5335fb71c88d3c2b681459c2420044", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.114352261478917, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6078748830879843, - "normalized_score": 60.78748830879843 - }, - "bbh": { - "name": "BBH", - "value": 0.4369766992416903, - "normalized_score": 20.39546213621821 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1351963746223565, - "normalized_score": 13.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.31145833333333334, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3048537234042553, - "normalized_score": 22.761524822695034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.2021382255133486 - } - }, - { - "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2_float16_b092e130c61aa44f2556b5db224b4df545fb51aa_True", - "model": { - "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", - "sha": "b092e130c61aa44f2556b5db224b4df545fb51aa", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.415195595809834, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6215386286165153, - "normalized_score": 62.15386286165153 - }, - "bbh": { - "name": "BBH", - "value": 0.438883390990549, - "normalized_score": 20.651248138719236 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.30082291666666666, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3025265957446808, - "normalized_score": 22.50295508274231 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1815760239132476 - } - }, - { - "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct_float16_07c170bb7c6b09ddeb4d4fe0fc894b371b27cc50_True", - "model": { - "name": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", - "sha": "07c170bb7c6b09ddeb4d4fe0fc894b371b27cc50", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.353088592931517, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6163180361440976, - "normalized_score": 61.63180361440977 - }, - "bbh": { - "name": "BBH", - "value": 0.4373182371021645, - "normalized_score": 20.567490871055163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.31276041666666665, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30169547872340424, - "normalized_score": 22.410608747044915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.2116490881751003 - } - }, - { - "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF_float16_0bdec12abd74bc164fdfa432528b914e19f6a9aa_True", - "model": { - "name": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", - "sha": "0bdec12abd74bc164fdfa432528b914e19f6a9aa", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.8347704638023603, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14398241111362298, - "normalized_score": 14.398241111362296 - }, - "bbh": { - "name": "BBH", - "value": 0.29077474506950557, - "normalized_score": 1.6023814703484736 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.3641979166666667, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11087101063829788, - "normalized_score": 1.2078900709219857 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.0121245066641238 - } - }, - { - "id": "Mxode/NanoLM-0.3B-Instruct-v1_bfloat16_638cda2c122e96c7992227b56b29967d9c8fd57e_True", - "model": { - "name": "Mxode/NanoLM-0.3B-Instruct-v1", - "sha": "638cda2c122e96c7992227b56b29967d9c8fd57e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.737739264323998, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1536744726215331, - "normalized_score": 15.36744726215331 - }, - "bbh": { - "name": "BBH", - "value": 0.30282462164767127, - "normalized_score": 3.104609898338746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.41552083333333334, - "normalized_score": 10.440104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11053856382978723, - "normalized_score": 1.1709515366430252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-03", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "Mxode/NanoLM-0.3B-Instruct-v1", - "hub_license": "gpl-3.0", - "hub_hearts": 0, - "params_billions": 0.315, - "co2_cost": 1.2125516098920441 - } - }, - { - "id": "Mxode/NanoLM-0.3B-Instruct-v1.1_bfloat16_7338464708c691667b193e7bb8f6b5bb3f9df27d_True", - "model": { - "name": "Mxode/NanoLM-0.3B-Instruct-v1.1", - "sha": "7338464708c691667b193e7bb8f6b5bb3f9df27d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.974298608437977, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17827918810977095, - "normalized_score": 17.827918810977096 - }, - "bbh": { - "name": "BBH", - "value": 0.3014403673764691, - "normalized_score": 3.0952799822018195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.42733333333333334, - "normalized_score": 12.216666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11211768617021277, - "normalized_score": 1.3464095744680846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "Mxode/NanoLM-0.3B-Instruct-v1.1", - "hub_license": "gpl-3.0", - "hub_hearts": 2, - "params_billions": 0.315, - "co2_cost": 1.2149597582994591 - } - }, - { - "id": "Mxode/NanoLM-0.3B-Instruct-v2_bfloat16_40027e2a1a404144975cfc0dd7d354057b98854b_True", - "model": { - "name": "Mxode/NanoLM-0.3B-Instruct-v2", - "sha": "40027e2a1a404144975cfc0dd7d354057b98854b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.01367055199107, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1667885654507817, - "normalized_score": 16.67885654507817 - }, - "bbh": { - "name": "BBH", - "value": 0.29211039456850646, - "normalized_score": 2.2094810446663793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3954583333333333, - "normalized_score": 7.565625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11344747340425532, - "normalized_score": 1.4941637115839235 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-07", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "Mxode/NanoLM-0.3B-Instruct-v2", - "hub_license": "gpl-3.0", - "hub_hearts": 0, - "params_billions": 0.315, - "co2_cost": 1.2130422448625946 - } - }, - { - "id": "Mxode/NanoLM-1B-Instruct-v1.1_bfloat16_cad6274afcfcf33927dc6c116d63013dcc1dfc48_True", - "model": { - "name": "Mxode/NanoLM-1B-Instruct-v1.1", - "sha": "cad6274afcfcf33927dc6c116d63013dcc1dfc48", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.756723242219516, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23952889444451833, - "normalized_score": 23.952889444451834 - }, - "bbh": { - "name": "BBH", - "value": 0.31835012059590373, - "normalized_score": 6.106919191919192 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.34327083333333336, - "normalized_score": 2.675520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12150930851063829, - "normalized_score": 2.3899231678486985 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-07", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "Mxode/NanoLM-1B-Instruct-v1.1", - "hub_license": "gpl-3.0", - "hub_hearts": 0, - "params_billions": 1.076, - "co2_cost": 1.6509597362924062 - } - }, - { - "id": "Mxode/NanoLM-1B-Instruct-v2_bfloat16_ebd8c374447985dbd4e247ffe6c5ebb5b4910418_True", - "model": { - "name": "Mxode/NanoLM-1B-Instruct-v2", - "sha": "ebd8c374447985dbd4e247ffe6c5ebb5b4910418", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.35441459615685, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2629844368497808, - "normalized_score": 26.298443684978082 - }, - "bbh": { - "name": "BBH", - "value": 0.3123145400715591, - "normalized_score": 4.910622895622894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.35520833333333335, - "normalized_score": 4.3343750000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12375332446808511, - "normalized_score": 2.639258274231678 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-07", - "submission_date": "2024-09-09", - "generation": 0, - "base_model": "Mxode/NanoLM-1B-Instruct-v2", - "hub_license": "gpl-3.0", - "hub_hearts": 0, - "params_billions": 1.076, - "co2_cost": 1.6205629500777163 - } - }, - { - "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0_float16_c75cc878c364615db4b1b173b21b97ebcfb13d70_False", - "model": { - "name": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", - "sha": "c75cc878c364615db4b1b173b21b97ebcfb13d70", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 1.6796019124036488, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "NAPS-ai/naps-gemma-2-27b-v-0.1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 22.449722077818265 - } - }, - { - "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0_float16_befb5b776e052a364bad4a5b3380a4d8370572dd_False", - "model": { - "name": "NAPS-ai/naps-gemma-2-27b-v0.1.0", - "sha": "befb5b776e052a364bad4a5b3380a4d8370572dd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 1.6796019124036488, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "NAPS-ai/naps-gemma-2-27b-v0.1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 34.0134345443038 - } - }, - { - "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3_float16_3dcd36be024e02de712d537f8786d868659127bb_False", - "model": { - "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", - "sha": "3dcd36be024e02de712d537f8786d868659127bb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.27833752065767, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5390818583580456, - "normalized_score": 53.90818583580456 - }, - "bbh": { - "name": "BBH", - "value": 0.4900525115527062, - "normalized_score": 26.27454019814682 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1903323262839879, - "normalized_score": 19.033232628398792 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.37870833333333337, - "normalized_score": 7.205208333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33984375, - "normalized_score": 26.649305555555554 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-02", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9386157659309937 - } - }, - { - "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4_float16_152229e8de5270aea7b9d7689503fb2577f8911a_True", - "model": { - "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", - "sha": "152229e8de5270aea7b9d7689503fb2577f8911a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.71508453774743, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7344202272193336, - "normalized_score": 73.44202272193337 - }, - "bbh": { - "name": "BBH", - "value": 0.4861833360906734, - "normalized_score": 27.832818693945708 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19637462235649547, - "normalized_score": 19.637462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.4421145833333333, - "normalized_score": 13.964322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3474900265957447, - "normalized_score": 27.498891843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7838555808247463 - } - }, - { - "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0_float16_bf6d3578346e80c586ec1a4a9883079523b48c11_True", - "model": { - "name": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", - "sha": "bf6d3578346e80c586ec1a4a9883079523b48c11", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.000822862572466, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5020124381086628, - "normalized_score": 50.20124381086628 - }, - "bbh": { - "name": "BBH", - "value": 0.4147584365689691, - "normalized_score": 18.110133310152776 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.37127083333333327, - "normalized_score": 3.675520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26138630319148937, - "normalized_score": 17.93181146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.2752422230364955 - } - }, - { - "id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0_float16_e0ce03ea6539f9398adbe14d8f9512e5484625b4_False", - "model": { - "name": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", - "sha": "e0ce03ea6539f9398adbe14d8f9512e5484625b4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.779501356421212, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3280063564675062, - "normalized_score": 32.80063564675062 - }, - "bbh": { - "name": "BBH", - "value": 0.45284530156109354, - "normalized_score": 22.339533869982745 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.37390624999999994, - "normalized_score": 3.971614583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3240525265957447, - "normalized_score": 24.894725177304963 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-11-13", - "generation": 1, - "base_model": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5754005635001274 - } - }, - { - "id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16_float16_98040775c86bb30230a3cfb8477ca546adcd9a66_False", - "model": { - "name": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", - "sha": "98040775c86bb30230a3cfb8477ca546adcd9a66", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.215465224433852, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1844993506119319, - "normalized_score": 18.449935061193187 - }, - "bbh": { - "name": "BBH", - "value": 0.3040736853180832, - "normalized_score": 3.0702609839914836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34860416666666666, - "normalized_score": 2.675520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10987367021276596, - "normalized_score": 1.0970744680851066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-10", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 70.761, - "co2_cost": 148.7725359700582 - } - }, - { - "id": "NCSOFT/Llama-VARCO-8B-Instruct_bfloat16_fe2d9358a2d35451c04e4589b47e361cfacd350d_True", - "model": { - "name": "NCSOFT/Llama-VARCO-8B-Instruct", - "sha": "fe2d9358a2d35451c04e4589b47e361cfacd350d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.983509272665156, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4470327619604871, - "normalized_score": 44.703276196048705 - }, - "bbh": { - "name": "BBH", - "value": 0.5022879316026018, - "normalized_score": 29.177056729688577 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3840729166666666, - "normalized_score": 10.775781249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31898271276595747, - "normalized_score": 24.33141252955083 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "NCSOFT/Llama-VARCO-8B-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 69, - "params_billions": 8.03, - "co2_cost": 1.1900045983108498 - } - }, - { - "id": "NJS26/NJS_777_bfloat16_6fbf69f7058f449afa1a113e66869953f585bb7f_False", - "model": { - "name": "NJS26/NJS_777", - "sha": "6fbf69f7058f449afa1a113e66869953f585bb7f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.500174317249103, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18809647291409015, - "normalized_score": 18.809647291409014 - }, - "bbh": { - "name": "BBH", - "value": 0.21782097894078087, - "normalized_score": 3.1606011091305213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2063758389261745, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35378125, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11627327127659574, - "normalized_score": 1.8081412529550822 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "NJS26/NJS_777 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.362, - "co2_cost": 0.9693995075895436 - } - }, - { - "id": "NLPark/AnFeng_v3.1-Avocet_float16_5170739731033323e6e66a0f68d34790042a3b2a_False", - "model": { - "name": "NLPark/AnFeng_v3.1-Avocet", - "sha": "5170739731033323e6e66a0f68d34790042a3b2a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.39095634882293, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5096311121158525, - "normalized_score": 50.96311121158526 - }, - "bbh": { - "name": "BBH", - "value": 0.582852329074409, - "normalized_score": 40.309033651453255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.44757291666666665, - "normalized_score": 14.979947916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44381648936170215, - "normalized_score": 38.20183215130024 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "NLPark/AnFeng_v3.1-Avocet", - "hub_license": "cc-by-nc-nd-4.0", - "hub_hearts": 0, - "params_billions": 34.393, - "co2_cost": 6.344016066691716 - } - }, - { - "id": "NLPark/B-and-W_Flycatcher-3AD1E_bfloat16_21044e39f6854f5a6df84c5074d449b7eb96b522_True", - "model": { - "name": "NLPark/B-and-W_Flycatcher-3AD1E", - "sha": "21044e39f6854f5a6df84c5074d449b7eb96b522", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.467332952617294, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49084650948372543, - "normalized_score": 49.08465094837254 - }, - "bbh": { - "name": "BBH", - "value": 0.6065117528534355, - "normalized_score": 43.74245801092346 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23791540785498488, - "normalized_score": 23.791540785498487 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.44227083333333334, - "normalized_score": 13.883854166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4740691489361702, - "normalized_score": 41.56323877068557 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "NLPark/B-and-W_Flycatcher-3AD1E", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.1026445938483675 - } - }, - { - "id": "NLPark/Shi-Ci-Robin-Test_3AD80_bfloat16_995887837a259817570489183cbe8b1abffd23b1_True", - "model": { - "name": "NLPark/Shi-Ci-Robin-Test_3AD80", - "sha": "995887837a259817570489183cbe8b1abffd23b1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.234122021270416, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7226547782107031, - "normalized_score": 72.26547782107032 - }, - "bbh": { - "name": "BBH", - "value": 0.6704805157570325, - "normalized_score": 52.265661751102094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3157099697885196, - "normalized_score": 31.570996978851962 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3598993288590604, - "normalized_score": 14.65324384787472 - }, - "musr": { - "name": "MUSR", - "value": 0.46959375000000003, - "normalized_score": 18.86588541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5120511968085106, - "normalized_score": 45.78346631205674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "NLPark/Shi-Ci-Robin-Test_3AD80 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 24.893508960713934 - } - }, - { - "id": "NTQAI/NxMobileLM-1.5B-SFT_float16_c5095c4969a48999c99f0e34ba3db929a0b59b8b_True", - "model": { - "name": "NTQAI/NxMobileLM-1.5B-SFT", - "sha": "c5095c4969a48999c99f0e34ba3db929a0b59b8b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.734829287552035, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6392239258500778, - "normalized_score": 63.92239258500778 - }, - "bbh": { - "name": "BBH", - "value": 0.39571778048116, - "normalized_score": 16.162542725891466 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35552083333333334, - "normalized_score": 2.4401041666666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28174867021276595, - "normalized_score": 20.194296690307326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "NTQAI/NxMobileLM-1.5B-SFT (Merge)", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 1.544, - "co2_cost": 1.6783146614348514 - } - }, - { - "id": "NTQAI/Nxcode-CQ-7B-orpo_bfloat16_74f3b3c06de36b261af9ef857279d6e33f893336_True", - "model": { - "name": "NTQAI/Nxcode-CQ-7B-orpo", - "sha": "74f3b3c06de36b261af9ef857279d6e33f893336", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.373779699446112, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40072119753365515, - "normalized_score": 40.07211975336551 - }, - "bbh": { - "name": "BBH", - "value": 0.4143023249178217, - "normalized_score": 17.58000487008142 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.39396875, - "normalized_score": 7.04609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16115359042553193, - "normalized_score": 6.794843380614658 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-24", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "NTQAI/Nxcode-CQ-7B-orpo", - "hub_license": "other", - "hub_hearts": 124, - "params_billions": 7.25, - "co2_cost": 1.6843498267871815 - } - }, - { - "id": "NYTK/PULI-GPTrio_float16_16a56dd22d184e4b7b49d90461fa8d4810639463_False", - "model": { - "name": "NYTK/PULI-GPTrio", - "sha": "16a56dd22d184e4b7b49d90461fa8d4810639463", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.833727911056492, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21797164855915638, - "normalized_score": 21.79716485591564 - }, - "bbh": { - "name": "BBH", - "value": 0.30600290906237543, - "normalized_score": 3.0152211415704975 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.38187499999999996, - "normalized_score": 5.3343750000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11369680851063829, - "normalized_score": 1.521867612293143 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-06-08", - "submission_date": "2024-08-24", - "generation": 0, - "base_model": "NYTK/PULI-GPTrio", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 10, - "params_billions": 7.673, - "co2_cost": 1.444093990695365 - } - }, - { - "id": "NYTK/PULI-LlumiX-32K_float16_a589894397a36b61c578d0dd4778ee6e5fe471ff_False", - "model": { - "name": "NYTK/PULI-LlumiX-32K", - "sha": "a589894397a36b61c578d0dd4778ee6e5fe471ff", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.519109356715034, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1699612583500667, - "normalized_score": 16.99612583500667 - }, - "bbh": { - "name": "BBH", - "value": 0.31893582242949375, - "normalized_score": 5.107047129907727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.39641666666666664, - "normalized_score": 7.718750000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16805186170212766, - "normalized_score": 7.561317966903072 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-12", - "submission_date": "2024-08-24", - "generation": 0, - "base_model": "NYTK/PULI-LlumiX-32K", - "hub_license": "llama2", - "hub_hearts": 11, - "params_billions": 6.738, - "co2_cost": 1.6451394799256278 - } - }, - { - "id": "Naveenpoliasetty/llama3-8B-V2_float16_e0458381d02bc411b9e576796d185f23dcc11f71_False", - "model": { - "name": "Naveenpoliasetty/llama3-8B-V2", - "sha": "e0458381d02bc411b9e576796d185f23dcc11f71", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.820686737632144, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4122616878770551, - "normalized_score": 41.226168787705504 - }, - "bbh": { - "name": "BBH", - "value": 0.5188657580065063, - "normalized_score": 30.873209425039573 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.40813541666666664, - "normalized_score": 9.183593750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3737533244680851, - "normalized_score": 30.417036052009454 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "Naveenpoliasetty/llama3-8B-V2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.50402447937112 - } - }, - { - "id": "NbAiLab/nb-llama-3.1-8B-Instruct_bfloat16_e56aaceb823e1b0d29029c8a9e4bc090a07d81c4_True", - "model": { - "name": "NbAiLab/nb-llama-3.1-8B-Instruct", - "sha": "e56aaceb823e1b0d29029c8a9e4bc090a07d81c4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.479797435320535, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.362502604201297, - "normalized_score": 36.2502604201297 - }, - "bbh": { - "name": "BBH", - "value": 0.32466553135589526, - "normalized_score": 5.448858800501278 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.32076041666666666, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1196808510638298, - "normalized_score": 2.186761229314421 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.495082112488492 - } - }, - { - "id": "NbAiLab/nb-llama-3.1-8B-sft_float16_4afbe8f228a7c10155e6687bd337499726db0604_True", - "model": { - "name": "NbAiLab/nb-llama-3.1-8B-sft", - "sha": "4afbe8f228a7c10155e6687bd337499726db0604", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.180260967339196, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36157838978355206, - "normalized_score": 36.15783897835521 - }, - "bbh": { - "name": "BBH", - "value": 0.3281509048328078, - "normalized_score": 5.952497646658256 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3287291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12217420212765957, - "normalized_score": 2.4638002364066187 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-12-11", - "generation": 0, - "base_model": "NbAiLab/nb-llama-3.1-8B-sft", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.476169872939846 - } - }, - { - "id": "Nekochu/Llama-3.1-8B-German-ORPO_float16_463ea77e46fb6d69c86f23df21b0ab0a0b9e77cd_False", - "model": { - "name": "Nekochu/Llama-3.1-8B-German-ORPO", - "sha": "463ea77e46fb6d69c86f23df21b0ab0a0b9e77cd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.254052215833834, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4610710692074806, - "normalized_score": 46.10710692074806 - }, - "bbh": { - "name": "BBH", - "value": 0.4982577044334462, - "normalized_score": 29.419254274936463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.46475, - "normalized_score": 16.860416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33934507978723405, - "normalized_score": 26.593897754137114 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-13", - "submission_date": "2024-09-24", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8883950726456662 - } - }, - { - "id": "Nekochu/Llama-3.1-8B-french-DPO_bfloat16_b0c66dd2a2814a6bfb05313ffec856fd4c6c7bd7_False", - "model": { - "name": "Nekochu/Llama-3.1-8B-french-DPO", - "sha": "b0c66dd2a2814a6bfb05313ffec856fd4c6c7bd7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.701292312420733, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46564227361179444, - "normalized_score": 46.56422736117945 - }, - "bbh": { - "name": "BBH", - "value": 0.5110888403999433, - "normalized_score": 30.03259699633449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.4215625, - "normalized_score": 11.561979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3414228723404255, - "normalized_score": 26.824763593380613 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "NousResearch/Meta-Llama-3.1-8B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6424149313308514 - } - }, - { - "id": "Nekochu/Luminia-13B-v3_bfloat16_602563f3af32b3c6be067ad522e6f3eaff4f8627_False", - "model": { - "name": "Nekochu/Luminia-13B-v3", - "sha": "602563f3af32b3c6be067ad522e6f3eaff4f8627", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.635076640566737, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25231829323971505, - "normalized_score": 25.231829323971507 - }, - "bbh": { - "name": "BBH", - "value": 0.41121515510929624, - "normalized_score": 17.690523920374847 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3983333333333334, - "normalized_score": 8.891666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22149268617021275, - "normalized_score": 13.499187352245862 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-18", - "submission_date": "2024-09-25", - "generation": 1, - "base_model": "meta-llama/Llama-2-13b-chat-hf", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 13.016, - "co2_cost": 2.277759486101257 - } - }, - { - "id": "Nekochu/Luminia-8B-RP_float16_619be17206729d86b898b9d1b3369a7135c1a9b9_False", - "model": { - "name": "Nekochu/Luminia-8B-RP", - "sha": "619be17206729d86b898b9d1b3369a7135c1a9b9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.61809279716832, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5574165436597118, - "normalized_score": 55.74165436597117 - }, - "bbh": { - "name": "BBH", - "value": 0.5218151030627874, - "normalized_score": 31.802699112572423 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13595166163141995, - "normalized_score": 13.595166163141995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3997604166666666, - "normalized_score": 11.070052083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3631150265957447, - "normalized_score": 29.235002955082745 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-13", - "submission_date": "2024-09-24", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.9051954807151352 - } - }, - { - "id": "NeverSleep/Lumimaid-v0.2-12B_bfloat16_b04f4e8f9a0c64fbb271d1135b208c90c3aa0ad0_False", - "model": { - "name": "NeverSleep/Lumimaid-v0.2-12B", - "sha": "b04f4e8f9a0c64fbb271d1135b208c90c3aa0ad0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.147314436553692, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10993497253952846, - "normalized_score": 10.993497253952846 - }, - "bbh": { - "name": "BBH", - "value": 0.5395610525850818, - "normalized_score": 34.40988943485442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.48211458333333335, - "normalized_score": 21.297656250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3511469414893617, - "normalized_score": 27.905215721040182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "NeverSleep/Lumimaid-v0.2-12B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 94, - "params_billions": 12.248, - "co2_cost": 3.1283867273639814 - } - }, - { - "id": "NeverSleep/Lumimaid-v0.2-8B_bfloat16_4563201f29ef18c62d16e9f6fffd3931a63ccb51_False", - "model": { - "name": "NeverSleep/Lumimaid-v0.2-8B", - "sha": "4563201f29ef18c62d16e9f6fffd3931a63ccb51", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.41199658071584, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5038109992597419, - "normalized_score": 50.3810999259742 - }, - "bbh": { - "name": "BBH", - "value": 0.5237767601226618, - "normalized_score": 31.963373781180234 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14350453172205438, - "normalized_score": 14.350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4303020833333333, - "normalized_score": 12.321093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36361369680851063, - "normalized_score": 29.290410756501185 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-08-09", - "generation": 0, - "base_model": "NeverSleep/Lumimaid-v0.2-8B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 71, - "params_billions": 8.03, - "co2_cost": 1.4793910691125391 - } - }, - { - "id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated_bfloat16_3ab9a1cebae25ff08ad915328466b75b5dc8f860_True", - "model": { - "name": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", - "sha": "3ab9a1cebae25ff08ad915328466b75b5dc8f860", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.378391787254499, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5311883580012146, - "normalized_score": 53.11883580012146 - }, - "bbh": { - "name": "BBH", - "value": 0.3240787338568713, - "normalized_score": 6.8620788810826765 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32367708333333334, - "normalized_score": 0.29296874999999956 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1373005319148936, - "normalized_score": 4.14450354609929 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-05", - "generation": 1, - "base_model": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.34511206922249005 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0_bfloat16_7ac196a06fdc9e86b08a13cde4c76a38913dd647_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", - "sha": "7ac196a06fdc9e86b08a13cde4c76a38913dd647", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.648390871227136, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6809144181881852, - "normalized_score": 68.09144181881851 - }, - "bbh": { - "name": "BBH", - "value": 0.5155095936229447, - "normalized_score": 31.122746611299153 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1865558912386707, - "normalized_score": 18.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3665833333333333, - "normalized_score": 9.456250000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34375, - "normalized_score": 27.083333333333332 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.6851869942793544 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0_bfloat16_c1a135395817932a87ae985fc7413078e52d0470_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", - "sha": "c1a135395817932a87ae985fc7413078e52d0470", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.472783833453928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7100903380807368, - "normalized_score": 71.0090338080737 - }, - "bbh": { - "name": "BBH", - "value": 0.51203649030939, - "normalized_score": 30.73278378127992 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.37576041666666665, - "normalized_score": 10.003385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34408244680851063, - "normalized_score": 27.12027186761229 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.3836783278954334 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R_bfloat16_0f5f7b67439f16e1a82e7fdcd1f9cc771eb97e5e_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", - "sha": "0f5f7b67439f16e1a82e7fdcd1f9cc771eb97e5e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.728397716509164, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.759999024809727, - "normalized_score": 75.99990248097271 - }, - "bbh": { - "name": "BBH", - "value": 0.525696414662245, - "normalized_score": 32.825750312399464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23187311178247735, - "normalized_score": 23.187311178247736 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.38521875, - "normalized_score": 9.885677083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36884973404255317, - "normalized_score": 29.8721926713948 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.6974687182638726 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01_bfloat16_560554a7ce692b74c08aa0f69f60b9edaa67e136_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", - "sha": "560554a7ce692b74c08aa0f69f60b9edaa67e136", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.16438239831213, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7995662619627034, - "normalized_score": 79.95662619627035 - }, - "bbh": { - "name": "BBH", - "value": 0.5250767747736031, - "normalized_score": 32.37775915384321 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4011875, - "normalized_score": 10.581770833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3790724734042553, - "normalized_score": 31.008052600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.031, - "co2_cost": 0.6568801609790043 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02_bfloat16_fbc865067c1ef712763598fd8ee46aa6a4932d72_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", - "sha": "fbc865067c1ef712763598fd8ee46aa6a4932d72", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.47563015963895, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7746368524404137, - "normalized_score": 77.46368524404136 - }, - "bbh": { - "name": "BBH", - "value": 0.531273698652086, - "normalized_score": 33.35331271516665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19939577039274925, - "normalized_score": 19.939577039274926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.39458333333333334, - "normalized_score": 9.456250000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3764128989361702, - "normalized_score": 30.712544326241126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6844381598060006 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03_bfloat16_c90f034cc7badd745115f53064b029fce64c3c16_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", - "sha": "c90f034cc7badd745115f53064b029fce64c3c16", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.82476968118114, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7764354135914928, - "normalized_score": 77.64354135914928 - }, - "bbh": { - "name": "BBH", - "value": 0.5294434267893284, - "normalized_score": 33.032829118870865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20770392749244712, - "normalized_score": 20.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3045302013422819, - "normalized_score": 7.270693512304256 - }, - "musr": { - "name": "MUSR", - "value": 0.39058333333333334, - "normalized_score": 9.989583333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37217420212765956, - "normalized_score": 30.24157801418439 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 0.6694867320616492 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01_bfloat16_5b6413555d32172bc6372075f0a31fbf7895723b_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", - "sha": "5b6413555d32172bc6372075f0a31fbf7895723b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.309648600363573, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7977677008116243, - "normalized_score": 79.77677008116243 - }, - "bbh": { - "name": "BBH", - "value": 0.5252760762748857, - "normalized_score": 32.110872058033216 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1986404833836858, - "normalized_score": 19.86404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.40896874999999994, - "normalized_score": 12.521093749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3738364361702128, - "normalized_score": 30.426270685579198 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.031, - "co2_cost": 0.6917604997282902 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02_bfloat16__True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.733559169624357, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8016895171478344, - "normalized_score": 80.16895171478345 - }, - "bbh": { - "name": "BBH", - "value": 0.5261737638679802, - "normalized_score": 32.31915252588661 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.39706249999999993, - "normalized_score": 11.232812499999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37608045212765956, - "normalized_score": 30.67560579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6860352698884155 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03_bfloat16_68b895fd2242abe1115c2bc4a13fdb7bb70b1811_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", - "sha": "68b895fd2242abe1115c2bc4a13fdb7bb70b1811", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.60438973869286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7941207108250552, - "normalized_score": 79.41207108250552 - }, - "bbh": { - "name": "BBH", - "value": 0.530825004382936, - "normalized_score": 33.022959512096214 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22205438066465258, - "normalized_score": 22.20543806646526 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.3958541666666667, - "normalized_score": 10.31510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37857380319148937, - "normalized_score": 30.952644799054376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 0.6845834230731944 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10_bfloat16_143314ac0b5bb2bd2bb6c05c3c167e534e291a24_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", - "sha": "143314ac0b5bb2bd2bb6c05c3c167e534e291a24", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.406547342023725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8053863748188141, - "normalized_score": 80.53863748188141 - }, - "bbh": { - "name": "BBH", - "value": 0.5278362703806528, - "normalized_score": 32.75807763538685 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1971299093655589, - "normalized_score": 19.71299093655589 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.41566666666666663, - "normalized_score": 11.558333333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3854720744680851, - "normalized_score": 31.719119385342786 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6632804225210029 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01_bfloat16_34f6ec6d3fa767d2dbaed76ede30d3bd4d09617c_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", - "sha": "34f6ec6d3fa767d2dbaed76ede30d3bd4d09617c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.312043044222406, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7533544329046928, - "normalized_score": 75.33544329046929 - }, - "bbh": { - "name": "BBH", - "value": 0.5312389177563648, - "normalized_score": 33.28793158004305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.37470833333333337, - "normalized_score": 9.33854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3732546542553192, - "normalized_score": 30.361628250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.711628014074646 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03_bfloat16_a63fd02d6595cb821cdb2f934d12e90a8da6016a_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", - "sha": "a63fd02d6595cb821cdb2f934d12e90a8da6016a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.78932292513281, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7564019025075688, - "normalized_score": 75.64019025075689 - }, - "bbh": { - "name": "BBH", - "value": 0.5316448098766001, - "normalized_score": 33.285577384040586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20921450151057402, - "normalized_score": 20.921450151057403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.3800416666666666, - "normalized_score": 9.605208333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37200797872340424, - "normalized_score": 30.223108747044915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6868622213761216 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01_bfloat16_3bbe50712a23b6d8711a1e628fcea1bc40178982_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", - "sha": "3bbe50712a23b6d8711a1e628fcea1bc40178982", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.45268314376055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.508657030013697, - "normalized_score": 50.865703001369695 - }, - "bbh": { - "name": "BBH", - "value": 0.5193615033347353, - "normalized_score": 31.7058028455963 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.39448958333333334, - "normalized_score": 10.211197916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3570478723404255, - "normalized_score": 28.560874704491717 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 8.031, - "co2_cost": 0.7476537745023463 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04_bfloat16_a80370b825f9f4fa00daecc44433035f3f48aa2a_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", - "sha": "a80370b825f9f4fa00daecc44433035f3f48aa2a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.113033386800094, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889001183526376, - "normalized_score": 78.89001183526376 - }, - "bbh": { - "name": "BBH", - "value": 0.5195180641442355, - "normalized_score": 31.641151925155338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4029583333333333, - "normalized_score": 9.169791666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3888796542553192, - "normalized_score": 32.097739361702125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6572829499007699 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04_bfloat16_8625849c98769300f69b6b7695e02482c7f4f0b3_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", - "sha": "8625849c98769300f69b6b7695e02482c7f4f0b3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.277769930189056, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7871514248859692, - "normalized_score": 78.71514248859691 - }, - "bbh": { - "name": "BBH", - "value": 0.5191641616026265, - "normalized_score": 31.658971634698442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1865558912386707, - "normalized_score": 18.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4110520833333333, - "normalized_score": 10.881510416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38821476063829785, - "normalized_score": 32.023862293144205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6696681062849801 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01_bfloat16_fce000e697ca714141a0962e5e50ab2a1d58f680_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", - "sha": "fce000e697ca714141a0962e5e50ab2a1d58f680", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.809186985379387, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5001141415887622, - "normalized_score": 50.01141415887623 - }, - "bbh": { - "name": "BBH", - "value": 0.5170855986734039, - "normalized_score": 31.129961752766558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.40084374999999994, - "normalized_score": 12.63880208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34266954787234044, - "normalized_score": 26.963283096926716 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.7556058524578672 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03_bfloat16_4859975e28c66fe2461474883a22fc4ac6d7561d_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", - "sha": "4859975e28c66fe2461474883a22fc4ac6d7561d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.50163295870381, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6647528557560606, - "normalized_score": 66.47528557560605 - }, - "bbh": { - "name": "BBH", - "value": 0.5140787918844759, - "normalized_score": 30.801237679727436 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3613125, - "normalized_score": 9.130729166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3488198138297872, - "normalized_score": 27.64664598108747 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6914988124726777 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01_bfloat16_9037f457eff988502bca12f4f69a7a7dc0f4011e_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", - "sha": "9037f457eff988502bca12f4f69a7a7dc0f4011e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.78885888839048, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5061592131101034, - "normalized_score": 50.61592131101034 - }, - "bbh": { - "name": "BBH", - "value": 0.4918197968512548, - "normalized_score": 27.6484297794192 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1646525679758308, - "normalized_score": 16.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.36965624999999996, - "normalized_score": 8.407031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3550531914893617, - "normalized_score": 28.33924349881796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.031, - "co2_cost": 0.7053396012461731 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01_bfloat16_458b223cf66a9057d048d3e1bca609c1ad952ec3_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", - "sha": "458b223cf66a9057d048d3e1bca609c1ad952ec3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.984272459370358, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18847103463255274, - "normalized_score": 18.847103463255273 - }, - "bbh": { - "name": "BBH", - "value": 0.44148325896745977, - "normalized_score": 20.441503466276757 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.38978124999999997, - "normalized_score": 6.622656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2993683510638298, - "normalized_score": 22.1520390070922 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Mediver_V1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.031, - "co2_cost": 0.7479143699507365 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01_bfloat16_c133aae6cba4c86a8aabd686d6f137c34fdf67f0_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", - "sha": "c133aae6cba4c86a8aabd686d6f137c34fdf67f0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.38168287785029, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7685419132346618, - "normalized_score": 76.85419132346618 - }, - "bbh": { - "name": "BBH", - "value": 0.5017727187674992, - "normalized_score": 30.029014461113167 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.40667708333333336, - "normalized_score": 9.03463541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3531416223404255, - "normalized_score": 28.126846926713938 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Medusa_v1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.031, - "co2_cost": 0.65086918442432 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1_bfloat16_c67ac7a09e51cff541126912a29d7b2985d210ce_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", - "sha": "c67ac7a09e51cff541126912a29d7b2985d210ce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.105107642426063, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6345529860769425, - "normalized_score": 63.455298607694246 - }, - "bbh": { - "name": "BBH", - "value": 0.5112504828088763, - "normalized_score": 30.6976172290985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26057401812688824, - "normalized_score": 26.057401812688823 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4188020833333333, - "normalized_score": 12.316927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3645279255319149, - "normalized_score": 29.391991725768314 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.7310147805115926 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01_bfloat16_195c80b1bf8431108d0f7bb87c3e84277793f437_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", - "sha": "195c80b1bf8431108d0f7bb87c3e84277793f437", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.623634123187415, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8151283040111349, - "normalized_score": 81.51283040111349 - }, - "bbh": { - "name": "BBH", - "value": 0.5241273021389002, - "normalized_score": 32.275457070272665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23413897280966767, - "normalized_score": 23.413897280966765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.37892708333333336, - "normalized_score": 8.199218750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3735871010638298, - "normalized_score": 30.398566784869974 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.6993025622981146 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04_bfloat16_4c0c354283e2fc130f5fb09c9d2ca23a0e5ea0d7_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", - "sha": "4c0c354283e2fc130f5fb09c9d2ca23a0e5ea0d7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.709951883012703, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7852531283660686, - "normalized_score": 78.52531283660687 - }, - "bbh": { - "name": "BBH", - "value": 0.5207086605445487, - "normalized_score": 31.780498933752288 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18504531722054382, - "normalized_score": 18.50453172205438 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.3948958333333334, - "normalized_score": 8.36197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38522273936170215, - "normalized_score": 31.691415484633573 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6546167837636284 - } - }, - { - "id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03_bfloat16_67b0226762ae10e3600657831bb0f5e144057036_True", - "model": { - "name": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", - "sha": "67b0226762ae10e3600657831bb0f5e144057036", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.634801511067575, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8078343240379969, - "normalized_score": 80.7834324037997 - }, - "bbh": { - "name": "BBH", - "value": 0.5313965802672672, - "normalized_score": 33.3207800758965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.38146875, - "normalized_score": 7.783593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3842253989361702, - "normalized_score": 31.580599881796683 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6863833166655988 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1_bfloat16_10be340c512c912bb0c80bb528534cf7ceab5d3c_False", - "model": { - "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", - "sha": "10be340c512c912bb0c80bb528534cf7ceab5d3c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.988906174606645, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2741004977903075, - "normalized_score": 27.41004977903075 - }, - "bbh": { - "name": "BBH", - "value": 0.3284363786988483, - "normalized_score": 6.212570678140739 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34603125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1377992021276596, - "normalized_score": 4.199911347517731 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3720924895779698 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11_bfloat16_450eb37685485fe0cbed86f7bfe1bec224945676_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", - "sha": "450eb37685485fe0cbed86f7bfe1bec224945676", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.867026710843518, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24312601674667658, - "normalized_score": 24.312601674667658 - }, - "bbh": { - "name": "BBH", - "value": 0.3111956727868642, - "normalized_score": 3.648692187121848 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3367604166666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1116190159574468, - "normalized_score": 1.2910017730496441 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.39339451169055945 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Dolto_0.1_bfloat16_36b009df0e265a4b67a581dbec03c2ea5f6a317d_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Dolto_0.1", - "sha": "36b009df0e265a4b67a581dbec03c2ea5f6a317d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.865611272101537, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5433782364127182, - "normalized_score": 54.337823641271825 - }, - "bbh": { - "name": "BBH", - "value": 0.3350056502150862, - "normalized_score": 6.613056736761025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23741610738255034, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.342125, - "normalized_score": 2.498958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13638630319148937, - "normalized_score": 4.042922576832151 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Dolto_0.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.498, - "co2_cost": 0.3695687778343242 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1_bfloat16_bd402805c92e2b707afa506950e4057226104021_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1", - "sha": "bd402805c92e2b707afa506950e4057226104021", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.724136311067048, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2552660274737696, - "normalized_score": 25.526602747376963 - }, - "bbh": { - "name": "BBH", - "value": 0.3009715832098017, - "normalized_score": 2.6467030023577163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.33936458333333336, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11527593085106383, - "normalized_score": 1.6973256501182026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Odyssea_V1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.37123814821002854 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01_bfloat16_802bdf9518add0247fa3abfd0d2039c4299c36f7_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", - "sha": "802bdf9518add0247fa3abfd0d2039c4299c36f7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.655298422411647, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24954564998648032, - "normalized_score": 24.95456499864803 - }, - "bbh": { - "name": "BBH", - "value": 0.3044651612138552, - "normalized_score": 2.848403718897272 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.34203125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11519281914893617, - "normalized_score": 1.6880910165484628 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3888020975076618 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1_bfloat16_84f5694efc7f14c835b07b961b924bf74033b841_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", - "sha": "84f5694efc7f14c835b07b961b924bf74033b841", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.343255865705624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5366339091388627, - "normalized_score": 53.663390913886275 - }, - "bbh": { - "name": "BBH", - "value": 0.3279521771600605, - "normalized_score": 6.205590121534224 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.31307291666666665, - "normalized_score": 1.6000000000000014 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16747007978723405, - "normalized_score": 7.496675531914894 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.498, - "co2_cost": 0.3669722679816723 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1_bfloat16_e27d69f429dacb17ec280ea3af055f0a951a77d0_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", - "sha": "e27d69f429dacb17ec280ea3af055f0a951a77d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.801537525133925, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5948605256275571, - "normalized_score": 59.48605256275572 - }, - "bbh": { - "name": "BBH", - "value": 0.355031362479927, - "normalized_score": 9.790397902616846 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33803125, - "normalized_score": 3.5205729166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19040890957446807, - "normalized_score": 10.045434397163119 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_OrcaSun_V1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3649577293888529 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1_bfloat16_b9f06764ed2b70d955cb320bbeb574b85a11ba6e_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", - "sha": "b9f06764ed2b70d955cb320bbeb574b85a11ba6e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.811633255227909, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5542693386880144, - "normalized_score": 55.42693386880145 - }, - "bbh": { - "name": "BBH", - "value": 0.34277067367168224, - "normalized_score": 7.937728461020434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3249166666666667, - "normalized_score": 1.58125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15633311170212766, - "normalized_score": 6.259234633569739 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 1.498, - "co2_cost": 0.3584078542302758 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_SunOrca_V1_bfloat16_d4113f570bf95a718f508728fc59f6e99080d5a7_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_SunOrca_V1", - "sha": "d4113f570bf95a718f508728fc59f6e99080d5a7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.008724276561884, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.542953807009845, - "normalized_score": 54.295380700984495 - }, - "bbh": { - "name": "BBH", - "value": 0.34306447662530104, - "normalized_score": 7.852676178179103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.32625, - "normalized_score": 2.114583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18841422872340424, - "normalized_score": 9.82380319148936 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_SunOrca_V1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3575000928997862 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1_bfloat16_3a4d831813bcbde4e0027e37a986cec773802aa5_False", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", - "sha": "3a4d831813bcbde4e0027e37a986cec773802aa5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.524505982897409, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21967047434141412, - "normalized_score": 21.96704743414141 - }, - "bbh": { - "name": "BBH", - "value": 0.31210928710549807, - "normalized_score": 4.742438568322573 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22818791946308725, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33818750000000003, - "normalized_score": 1.9067708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12242353723404255, - "normalized_score": 2.491504137115838 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Sydonia_0.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.37070692849973563 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2_bfloat16_1b09dee1571a9e973e82d1fef4e33e469dc281d6_False", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", - "sha": "1b09dee1571a9e973e82d1fef4e33e469dc281d6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.41209307631663, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21574865800520399, - "normalized_score": 21.5748658005204 - }, - "bbh": { - "name": "BBH", - "value": 0.3138849872298115, - "normalized_score": 4.769662709936999 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2348993288590604, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33428125000000003, - "normalized_score": 1.4184895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12267287234042554, - "normalized_score": 2.519208037825059 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Syneridol_0.2 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.37818584959224644 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1_bfloat16_9b47c5de30189f552a093684e916f945ea8b49fe_False", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", - "sha": "9b47c5de30189f552a093684e916f945ea8b49fe", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.959888823517752, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17638089158987041, - "normalized_score": 17.638089158987043 - }, - "bbh": { - "name": "BBH", - "value": 0.31619439082949846, - "normalized_score": 4.9658457398394615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34609375000000003, - "normalized_score": 2.9283854166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12308843085106383, - "normalized_score": 2.565381205673758 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Synopsys_0.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.377301673436357 - } - }, - { - "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.11_bfloat16_c0b74cb7b633876de47a3033fe2ef23c3f9853ca_True", - "model": { - "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", - "sha": "c0b74cb7b633876de47a3033fe2ef23c3f9853ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.557108632155365, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28421698870109086, - "normalized_score": 28.421698870109086 - }, - "bbh": { - "name": "BBH", - "value": 0.31019707628668325, - "normalized_score": 3.4442338168143976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.35133333333333333, - "normalized_score": 3.1500000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_1b_Synopsys_0.11 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3999450405712147 - } - }, - { - "id": "Nexesenex/Llama_3.2_3b_Kermes_v1_bfloat16_0a8ba971a51571175580b19cb9696169fe837807_True", - "model": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v1", - "sha": "0a8ba971a51571175580b19cb9696169fe837807", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.069896218578325, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4851759996808468, - "normalized_score": 48.51759996808468 - }, - "bbh": { - "name": "BBH", - "value": 0.4409910297279671, - "normalized_score": 21.16913127462915 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.40702083333333333, - "normalized_score": 9.310937500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2547373670212766, - "normalized_score": 17.193040780141843 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_3b_Kermes_v1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.5856456956570596 - } - }, - { - "id": "Nexesenex/Llama_3.2_3b_Kermes_v2_bfloat16_722ae4841fa96ad40b98d6d95931b592a8cc256e_True", - "model": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v2", - "sha": "722ae4841fa96ad40b98d6d95931b592a8cc256e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.49497166284455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5753766672429155, - "normalized_score": 57.53766672429155 - }, - "bbh": { - "name": "BBH", - "value": 0.44554539692939316, - "normalized_score": 22.049944802060324 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.37781249999999994, - "normalized_score": 4.659895833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2734375, - "normalized_score": 19.270833333333332 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-12", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_3b_Kermes_v2 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.593093942086941 - } - }, - { - "id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1_bfloat16_e1234eecf6f6de09726f04c884eb144f0fd53c66_True", - "model": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", - "sha": "e1234eecf6f6de09726f04c884eb144f0fd53c66", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.90711773153537, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5583906257618674, - "normalized_score": 55.839062576186734 - }, - "bbh": { - "name": "BBH", - "value": 0.44638999626044323, - "normalized_score": 22.166370114893436 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3963541666666666, - "normalized_score": 7.510937500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26919880319148937, - "normalized_score": 18.799867021276594 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "Nexesenex/Llama_3.2_3b_Kermes_v2.1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.5762673428022961 - } - }, - { - "id": "Nexesenex/Nemotron_W_4b_Halo_0.1_bfloat16_762d22637e0b1523dfcd53c001cb4613312ee658_True", - "model": { - "name": "Nexesenex/Nemotron_W_4b_Halo_0.1", - "sha": "762d22637e0b1523dfcd53c001cb4613312ee658", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.094245712466758, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3627275628665275, - "normalized_score": 36.27275628665275 - }, - "bbh": { - "name": "BBH", - "value": 0.4135101667655742, - "normalized_score": 18.55038854839016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28020134228187926, - "normalized_score": 4.026845637583901 - }, - "musr": { - "name": "MUSR", - "value": 0.41651041666666666, - "normalized_score": 10.763802083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25049867021276595, - "normalized_score": 16.722074468085104 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "Nexesenex/Nemotron_W_4b_Halo_0.1 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 4.513, - "co2_cost": 0.7189799733472263 - } - }, - { - "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1_bfloat16_7cef3440b378aa345e3db26db8ee69e8ece7fd8a_True", - "model": { - "name": "Nexesenex/Nemotron_W_4b_MagLight_0.1", - "sha": "7cef3440b378aa345e3db26db8ee69e8ece7fd8a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.194616822912348, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4230275668559422, - "normalized_score": 42.30275668559422 - }, - "bbh": { - "name": "BBH", - "value": 0.42314083807225433, - "normalized_score": 19.287938293762227 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.41120833333333334, - "normalized_score": 9.934375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2544880319148936, - "normalized_score": 17.165336879432623 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "Nexesenex/Nemotron_W_4b_MagLight_0.1 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 4.513, - "co2_cost": 0.6645364215346731 - } - }, - { - "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a_bfloat16_c887feb9b69e73b424f39b22828f9b911da50d66_True", - "model": { - "name": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", - "sha": "c887feb9b69e73b424f39b22828f9b911da50d66", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.961623188488065, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4011954946209391, - "normalized_score": 40.119549462093914 - }, - "bbh": { - "name": "BBH", - "value": 0.4636652015725344, - "normalized_score": 24.370416982068875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1805135951661631, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.43204166666666666, - "normalized_score": 12.871875000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2859873670212766, - "normalized_score": 20.665263002364064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.085, - "co2_cost": 0.7280219550441767 - } - }, - { - "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL_bfloat16_8c1916bcfe68de1f2fbd47de8846c9879f81115e_True", - "model": { - "name": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", - "sha": "8c1916bcfe68de1f2fbd47de8846c9879f81115e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.011891964697455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5889905450870357, - "normalized_score": 58.89905450870358 - }, - "bbh": { - "name": "BBH", - "value": 0.3562492190965966, - "normalized_score": 9.728333239440493 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728095, - "normalized_score": 7.477341389728095 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.33955208333333337, - "normalized_score": 2.8106770833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1802692819148936, - "normalized_score": 8.918809101654844 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-05", - "generation": 1, - "base_model": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3439868370003894 - } - }, - { - "id": "Nexusflow/NexusRaven-V2-13B_bfloat16_cdab7132db4a4fd64513123374ea1451d85a7ace_False", - "model": { - "name": "Nexusflow/NexusRaven-V2-13B", - "sha": "cdab7132db4a4fd64513123374ea1451d85a7ace", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.488064786804268, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1790781792311068, - "normalized_score": 17.90781792311068 - }, - "bbh": { - "name": "BBH", - "value": 0.39488604640507335, - "normalized_score": 15.336448395229596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3736875, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18716755319148937, - "normalized_score": 9.685283687943262 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-04", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "codellama/CodeLlama-13b-Instruct-hf", - "hub_license": "other", - "hub_hearts": 466, - "params_billions": 13.0, - "co2_cost": 2.179609826431666 - } - }, - { - "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch_bfloat16_166818c371eaafb212b243aecadd50b1079fa776_False", - "model": { - "name": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", - "sha": "166818c371eaafb212b243aecadd50b1079fa776", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.507477680245945, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2848918646967823, - "normalized_score": 28.48918646967823 - }, - "bbh": { - "name": "BBH", - "value": 0.426284784119477, - "normalized_score": 19.829634263073658 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.39251041666666664, - "normalized_score": 7.230468750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23761635638297873, - "normalized_score": 15.290706264775414 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.791, - "co2_cost": 1.3141206228546138 - } - }, - { - "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200_bfloat16_1e4805e2d12d993d0fd155af8a5ba420d7abc0b4_False", - "model": { - "name": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", - "sha": "1e4805e2d12d993d0fd155af8a5ba420d7abc0b4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.389863637734279, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18080249294095221, - "normalized_score": 18.08024929409522 - }, - "bbh": { - "name": "BBH", - "value": 0.28148007801214714, - "normalized_score": 1.586299722165597 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.37495833333333334, - "normalized_score": 4.303125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11427859042553191, - "normalized_score": 1.5865100472813234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "nvidia/AceMath-1.5B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.928, - "co2_cost": 3.363059836301886 - } - }, - { - "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500_bfloat16_54484019f17f6f14964cee95f7386bbaee385374_False", - "model": { - "name": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", - "sha": "54484019f17f6f14964cee95f7386bbaee385374", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.5788564701687924, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17485715678843247, - "normalized_score": 17.48571567884325 - }, - "bbh": { - "name": "BBH", - "value": 0.2601595454586609, - "normalized_score": 0.3579812834224598 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33796875, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1124501329787234, - "normalized_score": 1.383348108747044 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 2, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.157, - "co2_cost": 0.9581683956345384 - } - }, - { - "id": "NikolaSigmoid/acemath-200_bfloat16_166818c371eaafb212b243aecadd50b1079fa776_False", - "model": { - "name": "NikolaSigmoid/acemath-200", - "sha": "166818c371eaafb212b243aecadd50b1079fa776", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.507477680245945, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2848918646967823, - "normalized_score": 28.48918646967823 - }, - "bbh": { - "name": "BBH", - "value": 0.426284784119477, - "normalized_score": 19.829634263073658 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.39251041666666664, - "normalized_score": 7.230468750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23761635638297873, - "normalized_score": 15.290706264775414 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "nvidia/AceMath-1.5B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.791, - "co2_cost": 1.310058879931355 - } - }, - { - "id": "NikolaSigmoid/phi-4-14b_bfloat16_c6220bde10fff762dbd72c3331894aa4cade249d_False", - "model": { - "name": "NikolaSigmoid/phi-4-14b", - "sha": "c6220bde10fff762dbd72c3331894aa4cade249d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "", - "average_score": 29.913839706837063, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05607898154674043, - "normalized_score": 5.607898154674043 - }, - "bbh": { - "name": "BBH", - "value": 0.669500080799667, - "normalized_score": 52.50069258575488 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2938066465256798, - "normalized_score": 29.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4035234899328859, - "normalized_score": 20.46979865771812 - }, - "musr": { - "name": "MUSR", - "value": 0.5046875000000001, - "normalized_score": 23.985937499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.527842420212766, - "normalized_score": 47.538046690307326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-28", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.704, - "co2_cost": 1.3519592058580718 - } - }, - { - "id": "NikolaSigmoid/phi-4-1steps_bfloat16_c6220bde10fff762dbd72c3331894aa4cade249d_False", - "model": { - "name": "NikolaSigmoid/phi-4-1steps", - "sha": "c6220bde10fff762dbd72c3331894aa4cade249d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "", - "average_score": 29.87038792129772, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05275668559422333, - "normalized_score": 5.275668559422333 - }, - "bbh": { - "name": "BBH", - "value": 0.6707359457278651, - "normalized_score": 52.760921126950045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2983383685800604, - "normalized_score": 29.83383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40184563758389263, - "normalized_score": 20.246085011185684 - }, - "musr": { - "name": "MUSR", - "value": 0.5020520833333334, - "normalized_score": 23.62317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.52734375, - "normalized_score": 47.48263888888889 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.704, - "co2_cost": 1.3451774240042267 - } - }, - { - "id": "NikolaSigmoid/phi-4-300steps_bfloat16_c6220bde10fff762dbd72c3331894aa4cade249d_False", - "model": { - "name": "NikolaSigmoid/phi-4-300steps", - "sha": "c6220bde10fff762dbd72c3331894aa4cade249d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "", - "average_score": 29.960260183698754, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05607898154674043, - "normalized_score": 5.607898154674043 - }, - "bbh": { - "name": "BBH", - "value": 0.6701123802649077, - "normalized_score": 52.64505879688579 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2945619335347432, - "normalized_score": 29.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4052013422818792, - "normalized_score": 20.69351230425056 - }, - "musr": { - "name": "MUSR", - "value": 0.5033541666666667, - "normalized_score": 23.719270833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5287566489361702, - "normalized_score": 47.63962765957447 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-07", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.704, - "co2_cost": 2.712571051487887 - } - }, - { - "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420_float16_d02fedecc3401123516c837421727997b9d8a218_True", - "model": { - "name": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", - "sha": "d02fedecc3401123516c837421727997b9d8a218", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.923807450212067, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6312805578088361, - "normalized_score": 63.12805578088361 - }, - "bbh": { - "name": "BBH", - "value": 0.5078530730075063, - "normalized_score": 30.43355174896298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4228020833333333, - "normalized_score": 12.050260416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.359624335106383, - "normalized_score": 28.847148345153663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420 (Merge)", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.511220389133317 - } - }, - { - "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B_float16_2af7bb5641c77a215f2685b02db7427ab2831a6e_True", - "model": { - "name": "Nitral-AI/Captain-Eris_BMO-Violent-12B", - "sha": "2af7bb5641c77a215f2685b02db7427ab2831a6e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.922969151836924, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.615218730745533, - "normalized_score": 61.5218730745533 - }, - "bbh": { - "name": "BBH", - "value": 0.5104372825851065, - "normalized_score": 31.041896932456876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.42553124999999997, - "normalized_score": 12.791406249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35713098404255317, - "normalized_score": 28.570109338061467 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "Nitral-AI/Captain-Eris_BMO-Violent-12B (Merge)", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.4590005564920683 - } - }, - { - "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420_float16_2faf48adc3490fed4fabd4faf8becda866527139_True", - "model": { - "name": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", - "sha": "2faf48adc3490fed4fabd4faf8becda866527139", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.272146530177896, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6261597007052399, - "normalized_score": 62.615970070523986 - }, - "bbh": { - "name": "BBH", - "value": 0.515921407165298, - "normalized_score": 31.108573670502523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.42791666666666667, - "normalized_score": 12.456249999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35347406914893614, - "normalized_score": 28.1637854609929 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-17", - "generation": 1, - "base_model": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420 (Merge)", - "hub_license": "other", - "hub_hearts": 20, - "params_billions": 12.248, - "co2_cost": 0.787860975370184 - } - }, - { - "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B_bfloat16_b1a87ce62601e19fff206a16590d28f009965799_False", - "model": { - "name": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", - "sha": "b1a87ce62601e19fff206a16590d28f009965799", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.626620517485538, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43391866913123844, - "normalized_score": 43.39186691312385 - }, - "bbh": { - "name": "BBH", - "value": 0.5478099417611365, - "normalized_score": 35.32694075023367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.43306249999999996, - "normalized_score": 13.899479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3722573138297872, - "normalized_score": 30.250812647754138 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "Nitral-AI/Captain-Eris_Violet-V0.420-12B (Merge)", - "hub_license": "other", - "hub_hearts": 34, - "params_billions": 12.248, - "co2_cost": 3.0311565732739445 - } - }, - { - "id": "Nitral-AI/Captain_BMO-12B_bfloat16_ba2950f1c9831c6aacd6141851e7b9724be6759a_False", - "model": { - "name": "Nitral-AI/Captain_BMO-12B", - "sha": "ba2950f1c9831c6aacd6141851e7b9724be6759a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.21048454065836, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4750595087700634, - "normalized_score": 47.50595087700634 - }, - "bbh": { - "name": "BBH", - "value": 0.5285960650424973, - "normalized_score": 32.44069784748834 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.37480208333333337, - "normalized_score": 7.516927083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3568816489361702, - "normalized_score": 28.54240543735224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2025-01-04", - "generation": 0, - "base_model": "Nitral-AI/Captain_BMO-12B", - "hub_license": "other", - "hub_hearts": 20, - "params_billions": 12.248, - "co2_cost": 3.4242954711669777 - } - }, - { - "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B_bfloat16_1c9f391c3e349f8ba51b5696290ee6db6a2b63fd_True", - "model": { - "name": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", - "sha": "1c9f391c3e349f8ba51b5696290ee6db6a2b63fd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.917956738512263, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7174840534226963, - "normalized_score": 71.74840534226962 - }, - "bbh": { - "name": "BBH", - "value": 0.5285819178301682, - "normalized_score": 32.826028565585965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3780625, - "normalized_score": 5.557812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36959773936170215, - "normalized_score": 29.955304373522463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-09", - "submission_date": "2024-07-02", - "generation": 0, - "base_model": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", - "hub_license": "other", - "hub_hearts": 61, - "params_billions": 8.03, - "co2_cost": 1.6168459294512427 - } - }, - { - "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85_bfloat16_755c5684c3a1dd68df409e0e32b481b707811a50_True", - "model": { - "name": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", - "sha": "755c5684c3a1dd68df409e0e32b481b707811a50", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.499607921720123, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7110145524984818, - "normalized_score": 71.10145524984819 - }, - "bbh": { - "name": "BBH", - "value": 0.5279036861109899, - "normalized_score": 32.71311306903596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.3646666666666667, - "normalized_score": 4.216666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37200797872340424, - "normalized_score": 30.223108747044915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", - "hub_license": "other", - "hub_hearts": 19, - "params_billions": 8.03, - "co2_cost": 1.209051703922841 - } - }, - { - "id": "Nitral-AI/Nera_Noctis-12B_bfloat16_f18471562642499508a26c7d84a5e25b0cd51897_True", - "model": { - "name": "Nitral-AI/Nera_Noctis-12B", - "sha": "f18471562642499508a26c7d84a5e25b0cd51897", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.661351605876916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45617517076911485, - "normalized_score": 45.61751707691148 - }, - "bbh": { - "name": "BBH", - "value": 0.5193675192746302, - "normalized_score": 31.86959138887418 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.39790624999999996, - "normalized_score": 8.50494791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3468251329787234, - "normalized_score": 27.425014775413715 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Nitral-AI/Nera_Noctis-12B", - "hub_license": "other", - "hub_hearts": 12, - "params_billions": 12.248, - "co2_cost": 1.8487588281915825 - } - }, - { - "id": "Nohobby/MS-Schisandra-22B-v0.1_bfloat16_df698b7b740fb3b5193d61cd51e5e3a42c3b1e1c_False", - "model": { - "name": "Nohobby/MS-Schisandra-22B-v0.1", - "sha": "df698b7b740fb3b5193d61cd51e5e3a42c3b1e1c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 30.1116442664472, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6331289866443259, - "normalized_score": 63.312898664432595 - }, - "bbh": { - "name": "BBH", - "value": 0.5789949714896523, - "normalized_score": 40.01139961622215 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.39284375, - "normalized_score": 9.70546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4095744680851064, - "normalized_score": 34.39716312056737 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-10-30", - "generation": 1, - "base_model": "Nohobby/MS-Schisandra-22B-v0.1 (Merge)", - "hub_license": "other", - "hub_hearts": 5, - "params_billions": 22.247, - "co2_cost": 3.2011072958523936 - } - }, - { - "id": "Nohobby/MS-Schisandra-22B-v0.2_bfloat16_257b6d38d2f1c2a607c38a6a86336a241a81a455_False", - "model": { - "name": "Nohobby/MS-Schisandra-22B-v0.2", - "sha": "257b6d38d2f1c2a607c38a6a86336a241a81a455", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 30.28148918014972, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6382997114323329, - "normalized_score": 63.82997114323328 - }, - "bbh": { - "name": "BBH", - "value": 0.5841215984231857, - "normalized_score": 40.614458088552716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.40747916666666667, - "normalized_score": 10.668229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4136469414893617, - "normalized_score": 34.84966016548463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-02", - "submission_date": "2024-11-02", - "generation": 1, - "base_model": "Nohobby/MS-Schisandra-22B-v0.2 (Merge)", - "hub_license": "other", - "hub_hearts": 9, - "params_billions": 22.247, - "co2_cost": 2.070015853323648 - } - }, - { - "id": "Norquinal/Alpha_float16_e873a2f862b6511d96de50be147e5b3d73d36afd_True", - "model": { - "name": "Norquinal/Alpha", - "sha": "e873a2f862b6511d96de50be147e5b3d73d36afd", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.081123216031896, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2802951723648808, - "normalized_score": 28.029517236488076 - }, - "bbh": { - "name": "BBH", - "value": 0.3373652507108038, - "normalized_score": 8.664581329003166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.36308333333333337, - "normalized_score": 5.785416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30028257978723405, - "normalized_score": 22.25361997635934 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4355619240995625 - } - }, - { - "id": "Norquinal/Bravo_float16_40ae6eda335ee30028fa907ec71e501b46a27f45_True", - "model": { - "name": "Norquinal/Bravo", - "sha": "40ae6eda335ee30028fa907ec71e501b46a27f45", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.664477714927271, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3024519386339357, - "normalized_score": 30.24519386339357 - }, - "bbh": { - "name": "BBH", - "value": 0.3558431980261287, - "normalized_score": 10.654043944684554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.38686458333333335, - "normalized_score": 7.458072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.312749335106383, - "normalized_score": 23.63881501182033 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.394500715728391 - } - }, - { - "id": "Norquinal/Charlie_float16_e93fc3dfad9feb74d0a38f84bd42037f49482635_True", - "model": { - "name": "Norquinal/Charlie", - "sha": "e93fc3dfad9feb74d0a38f84bd42037f49482635", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.162885018671325, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3060989286205047, - "normalized_score": 30.609892862050472 - }, - "bbh": { - "name": "BBH", - "value": 0.3515288346438244, - "normalized_score": 9.860055463308887 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3736875, - "normalized_score": 6.6442708333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30925864361702127, - "normalized_score": 23.250960401891252 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2929580750047036 - } - }, - { - "id": "Norquinal/Delta_float16_acfee7dcb17b597d7278415873571b979b545c8a_True", - "model": { - "name": "Norquinal/Delta", - "sha": "acfee7dcb17b597d7278415873571b979b545c8a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.354799915155942, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.253842028041153, - "normalized_score": 25.384202804115304 - }, - "bbh": { - "name": "BBH", - "value": 0.3434783285415976, - "normalized_score": 9.097511313782466 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3776875, - "normalized_score": 4.310937500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2958776595744681, - "normalized_score": 21.76418439716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3069019975141656 - } - }, - { - "id": "Norquinal/Echo_float16_0f7ce5af69a530e87867225d88275de8d3404ad8_True", - "model": { - "name": "Norquinal/Echo", - "sha": "0f7ce5af69a530e87867225d88275de8d3404ad8", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.446736463673105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31579099012841483, - "normalized_score": 31.579099012841482 - }, - "bbh": { - "name": "BBH", - "value": 0.35304654390055795, - "normalized_score": 10.011495800063487 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3804479166666667, - "normalized_score": 6.1559895833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30950797872340424, - "normalized_score": 23.278664302600472 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3417240510190622 - } - }, - { - "id": "Norquinal/Foxtrot_float16_b102325f40a526f4a020c1e12322a1e7aeebb988_True", - "model": { - "name": "Norquinal/Foxtrot", - "sha": "b102325f40a526f4a020c1e12322a1e7aeebb988", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.353623855929653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3011531624977283, - "normalized_score": 30.115316249772825 - }, - "bbh": { - "name": "BBH", - "value": 0.3558026577191667, - "normalized_score": 10.170272602468412 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3804166666666667, - "normalized_score": 6.318750000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30501994680851063, - "normalized_score": 22.779994089834513 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2730054105347166 - } - }, - { - "id": "Norquinal/Golf_float16_5b1a3ad20fd4f7915fcc8eaf5e04d4c5f996e70b_True", - "model": { - "name": "Norquinal/Golf", - "sha": "5b1a3ad20fd4f7915fcc8eaf5e04d4c5f996e70b", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.926818361317823, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3533601953926692, - "normalized_score": 35.33601953926692 - }, - "bbh": { - "name": "BBH", - "value": 0.35332648991705207, - "normalized_score": 9.898588822688339 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.338, - "normalized_score": 4.750000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30560172872340424, - "normalized_score": 22.844636524822693 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.373041827866632 - } - }, - { - "id": "Norquinal/Hotel_float16_e359329defd5ebbd70c4df759f231d7f9a87364a_True", - "model": { - "name": "Norquinal/Hotel", - "sha": "e359329defd5ebbd70c4df759f231d7f9a87364a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.283271661420043, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3215113676157041, - "normalized_score": 32.15113676157041 - }, - "bbh": { - "name": "BBH", - "value": 0.36785702492059275, - "normalized_score": 11.514936892426823 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3288229166666667, - "normalized_score": 2.869531250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3156582446808511, - "normalized_score": 23.96202718676123 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4087752184140758 - } - }, - { - "id": "NotASI/FineTome-Llama3.2-1B-0929_float16_61c8742238d0cfe68a0a3f61326b84cd6624ad02_True", - "model": { - "name": "NotASI/FineTome-Llama3.2-1B-0929", - "sha": "61c8742238d0cfe68a0a3f61326b84cd6624ad02", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.953180599897282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39907223943580805, - "normalized_score": 39.9072239435808 - }, - "bbh": { - "name": "BBH", - "value": 0.3246274874705644, - "normalized_score": 5.741405038838561 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3487604166666667, - "normalized_score": 2.6617187500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1428690159574468, - "normalized_score": 4.763223995271866 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-10-04", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-1B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.9308168466890548 - } - }, - { - "id": "NotASI/FineTome-Llama3.2-3B-1002_float16_7c8497a24a381e3bfd77bc92e5685442768790d0_True", - "model": { - "name": "NotASI/FineTome-Llama3.2-3B-1002", - "sha": "7c8497a24a381e3bfd77bc92e5685442768790d0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.7624003049829, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5474496558021605, - "normalized_score": 54.744965580216046 - }, - "bbh": { - "name": "BBH", - "value": 0.4319470614025341, - "normalized_score": 19.52006065248879 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3685104166666667, - "normalized_score": 3.963802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24368351063829788, - "normalized_score": 15.96483451536643 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-05", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.0, - "co2_cost": 2.1309013376834227 - } - }, - { - "id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007_float16_5e329d987e9f74dd2703a4fefa56ab8c72b5702b_True", - "model": { - "name": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", - "sha": "5e329d987e9f74dd2703a4fefa56ab8c72b5702b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.24257019295171, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39237777984636324, - "normalized_score": 39.237777984636324 - }, - "bbh": { - "name": "BBH", - "value": 0.32405671121485663, - "normalized_score": 5.801724673541757 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34745833333333337, - "normalized_score": 2.498958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1427027925531915, - "normalized_score": 4.744754728132387 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "NotASI/FineTome-v1.5-Llama3.2-1B-1007 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.9484630415430834 - } - }, - { - "id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007_float16_6c6e71fbcff6c00d04a3fd69084af20bf2a943c8_True", - "model": { - "name": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", - "sha": "6c6e71fbcff6c00d04a3fd69084af20bf2a943c8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.113696212626593, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5507719517546776, - "normalized_score": 55.077195175467764 - }, - "bbh": { - "name": "BBH", - "value": 0.4312372935321582, - "normalized_score": 19.457219278849333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3645416666666667, - "normalized_score": 4.067708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2448470744680851, - "normalized_score": 16.094119385342786 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "NotASI/FineTome-v1.5-Llama3.2-3B-1007 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.4507570508509497 - } - }, - { - "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview_bfloat16_48072dc6c0594a3198eb862c13613c4ab1119009_True", - "model": { - "name": "NousResearch/DeepHermes-3-Mistral-24B-Preview", - "sha": "48072dc6c0594a3198eb862c13613c4ab1119009", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 31.98701372074271, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45357761849669986, - "normalized_score": 45.357761849669984 - }, - "bbh": { - "name": "BBH", - "value": 0.6488196385442672, - "normalized_score": 48.963404310757504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25755287009063443, - "normalized_score": 25.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.4503333333333333, - "normalized_score": 15.95833333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45902593085106386, - "normalized_score": 39.891770094562645 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "mistralai/Mistral-Small-24B-Base-2501", - "hub_license": "apache-2.0", - "hub_hearts": 76, - "params_billions": 23.572, - "co2_cost": 1.459075136536812 - } - }, - { - "id": "NousResearch/Hermes-2-Pro-Llama-3-8B_float16_bc265d1781299ed2045214289c927c207439a729_True", - "model": { - "name": "NousResearch/Hermes-2-Pro-Llama-3-8B", - "sha": "bc265d1781299ed2045214289c927c207439a729", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.06997572152565, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5361839918084017, - "normalized_score": 53.61839918084017 - }, - "bbh": { - "name": "BBH", - "value": 0.507112624310082, - "normalized_score": 30.667993420825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.4262395833333333, - "normalized_score": 11.246614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30518617021276595, - "normalized_score": 22.798463356973993 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-30", - "submission_date": "2024-06-13", - "generation": 1, - "base_model": "NousResearch/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 419, - "params_billions": 8.031, - "co2_cost": 1.499966412528088 - } - }, - { - "id": "NousResearch/Hermes-2-Pro-Mistral-7B_bfloat16_09317b1d8da639b5d9af77c06aa17cde0f0f91c0_True", - "model": { - "name": "NousResearch/Hermes-2-Pro-Mistral-7B", - "sha": "09317b1d8da639b5d9af77c06aa17cde0f0f91c0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.840576807207686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5668337788179807, - "normalized_score": 56.68337788179808 - }, - "bbh": { - "name": "BBH", - "value": 0.4995435330498075, - "normalized_score": 29.427578860536 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.43759375, - "normalized_score": 14.132552083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29463098404255317, - "normalized_score": 21.625664893617017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-11", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 491, - "params_billions": 7.242, - "co2_cost": 0.9455950387200669 - } - }, - { - "id": "NousResearch/Hermes-2-Theta-Llama-3-8B_bfloat16_885173e97ab8572b444f7db1290d5d0386e26816_True", - "model": { - "name": "NousResearch/Hermes-2-Theta-Llama-3-8B", - "sha": "885173e97ab8572b444f7db1290d5d0386e26816", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.78837646080699, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6517883659800441, - "normalized_score": 65.17883659800441 - }, - "bbh": { - "name": "BBH", - "value": 0.5206672260911865, - "normalized_score": 32.046073848075835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3948958333333334, - "normalized_score": 8.36197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33685172872340424, - "normalized_score": 26.316858747044915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-05", - "submission_date": "2024-07-11", - "generation": 2, - "base_model": "NousResearch/Meta-Llama-3-8B", - "hub_license": "apache-2.0", - "hub_hearts": 201, - "params_billions": 8.03, - "co2_cost": 1.487844835193733 - } - }, - { - "id": "NousResearch/Hermes-3-Llama-3.1-70B_bfloat16_093242c69a91f8d9d5b8094c380b88772f9bd7f8_True", - "model": { - "name": "NousResearch/Hermes-3-Llama-3.1-70B", - "sha": "093242c69a91f8d9d5b8094c380b88772f9bd7f8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.51477067349896, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7661438316998896, - "normalized_score": 76.61438316998897 - }, - "bbh": { - "name": "BBH", - "value": 0.6755780641387483, - "normalized_score": 53.76540869130056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.4948958333333333, - "normalized_score": 23.42864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47265625, - "normalized_score": 41.40625 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-08-28", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3", - "hub_hearts": 107, - "params_billions": 70.554, - "co2_cost": 22.415781841842687 - } - }, - { - "id": "NousResearch/Hermes-3-Llama-3.1-8B_bfloat16_aabb745a717e133b74dcae23195d2635cf5f38cc_True", - "model": { - "name": "NousResearch/Hermes-3-Llama-3.1-8B", - "sha": "aabb745a717e133b74dcae23195d2635cf5f38cc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.49087671148001, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6170172918966121, - "normalized_score": 61.70172918966122 - }, - "bbh": { - "name": "BBH", - "value": 0.5177452540141246, - "normalized_score": 30.724096614147953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4369375, - "normalized_score": 13.617187499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3139128989361702, - "normalized_score": 23.768099881796687 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-28", - "submission_date": "2024-08-28", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3", - "hub_hearts": 303, - "params_billions": 8.03, - "co2_cost": 0.9058079001429431 - } - }, - { - "id": "NousResearch/Hermes-3-Llama-3.2-3B_bfloat16_f6a109fe836b13b6905f8c16a7388f2f557c3974_True", - "model": { - "name": "NousResearch/Hermes-3-Llama-3.2-3B", - "sha": "f6a109fe836b13b6905f8c16a7388f2f557c3974", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.242119392530277, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3824862476008103, - "normalized_score": 38.24862476008103 - }, - "bbh": { - "name": "BBH", - "value": 0.43519901506714875, - "normalized_score": 20.187188037454586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.40302083333333333, - "normalized_score": 8.577604166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25440492021276595, - "normalized_score": 17.156102245862883 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "Removed", - "hub_license": "llama3", - "hub_hearts": 148, - "params_billions": 3.213, - "co2_cost": 1.6523844958310772 - } - }, - { - "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO_bfloat16_ebec0a691037d38955727d6949798429a63929dd_True", - "model": { - "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", - "sha": "ebec0a691037d38955727d6949798429a63929dd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.10058697437334, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5762510139762497, - "normalized_score": 57.62510139762497 - }, - "bbh": { - "name": "BBH", - "value": 0.48526536654652347, - "normalized_score": 27.792545658366084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099697, - "normalized_score": 4.758308157099697 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3999791666666667, - "normalized_score": 8.330729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3015292553191489, - "normalized_score": 22.392139479905435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-18", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 175, - "params_billions": 7.242, - "co2_cost": 0.9491978373976354 - } - }, - { - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO_bfloat16_286ae6737d048ad1d965c2e830864df02db50f2f_True", - "model": { - "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "sha": "286ae6737d048ad1d965c2e830864df02db50f2f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 27.353190438571634, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5896898008395501, - "normalized_score": 58.96898008395502 - }, - "bbh": { - "name": "BBH", - "value": 0.5538851384033822, - "normalized_score": 37.10778379133987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4595416666666667, - "normalized_score": 16.676041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3666057180851064, - "normalized_score": 29.622857565011817 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-11", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "mistralai/Mixtral-8x7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 429, - "params_billions": 46.703, - "co2_cost": 15.653110501660983 - } - }, - { - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT_bfloat16_4c06af2684730f75a6874b95e8bf6058105d9612_True", - "model": { - "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", - "sha": "4c06af2684730f75a6874b95e8bf6058105d9612", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 21.841010891461725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5730783210769648, - "normalized_score": 57.30783210769647 - }, - "bbh": { - "name": "BBH", - "value": 0.5057868454026635, - "normalized_score": 30.594312778864406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.421375, - "normalized_score": 11.138541666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30659906914893614, - "normalized_score": 22.95545212765957 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-26", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mixtral-8x7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 56, - "params_billions": 46.703, - "co2_cost": 20.77587969234417 - } - }, - { - "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B_bfloat16_14c1fbe2f71acdcd58247b30d5439bd572d52386_True", - "model": { - "name": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", - "sha": "14c1fbe2f71acdcd58247b30d5439bd572d52386", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.412543159550665, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5278660620486975, - "normalized_score": 52.78660620486975 - }, - "bbh": { - "name": "BBH", - "value": 0.5414294841140173, - "normalized_score": 34.990894584465195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.43728125, - "normalized_score": 13.826822916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3458277925531915, - "normalized_score": 27.314199172576835 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-01", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "upstage/SOLAR-10.7B-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 205, - "params_billions": 10.732, - "co2_cost": 1.2868882760344564 - } - }, - { - "id": "NousResearch/Nous-Hermes-llama-2-7b_bfloat16_b7c3ec54b754175e006ef75696a2ba3802697078_False", - "model": { - "name": "NousResearch/Nous-Hermes-llama-2-7b", - "sha": "b7c3ec54b754175e006ef75696a2ba3802697078", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.316715938919105, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17290788441335658, - "normalized_score": 17.290788441335657 - }, - "bbh": { - "name": "BBH", - "value": 0.3823937686034717, - "normalized_score": 13.78941955171473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.42571875, - "normalized_score": 11.681510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19398271276595744, - "normalized_score": 10.442523640661937 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-25", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "NousResearch/Nous-Hermes-llama-2-7b", - "hub_license": "mit", - "hub_hearts": 70, - "params_billions": 6.738, - "co2_cost": 5.116114115406325 - } - }, - { - "id": "NousResearch/Yarn-Llama-2-13b-128k_bfloat16_4e3e87a067f64f8814c83dd5e3bad92dcf8a2391_False", - "model": { - "name": "NousResearch/Yarn-Llama-2-13b-128k", - "sha": "4e3e87a067f64f8814c83dd5e3bad92dcf8a2391", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.494146676691704, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16546430138698653, - "normalized_score": 16.546430138698653 - }, - "bbh": { - "name": "BBH", - "value": 0.3826816443733663, - "normalized_score": 13.505319085673955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.34575, - "normalized_score": 3.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23204787234042554, - "normalized_score": 14.671985815602836 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-08-30", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "NousResearch/Yarn-Llama-2-13b-128k", - "hub_license": "", - "hub_hearts": 112, - "params_billions": 13.0, - "co2_cost": 103.87156662170433 - } - }, - { - "id": "NousResearch/Yarn-Llama-2-7b-128k_bfloat16_e1ceedbbf2ed28b88086794441a6c05606d15437_False", - "model": { - "name": "NousResearch/Yarn-Llama-2-7b-128k", - "sha": "e1ceedbbf2ed28b88086794441a6c05606d15437", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.814800680431223, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14847825990593846, - "normalized_score": 14.847825990593847 - }, - "bbh": { - "name": "BBH", - "value": 0.32480295375597734, - "normalized_score": 6.1446917129934855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.39669791666666665, - "normalized_score": 8.253906250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1791057180851064, - "normalized_score": 8.789524231678488 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-08-31", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "NousResearch/Yarn-Llama-2-7b-128k", - "hub_license": "", - "hub_hearts": 39, - "params_billions": 7.0, - "co2_cost": 1.6794776736133492 - } - }, - { - "id": "NousResearch/Yarn-Llama-2-7b-64k_bfloat16_08491431ac3b50add7443f5d4c02850801d877be_False", - "model": { - "name": "NousResearch/Yarn-Llama-2-7b-64k", - "sha": "08491431ac3b50add7443f5d4c02850801d877be", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.222883145587748, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1699856381068897, - "normalized_score": 16.99856381068897 - }, - "bbh": { - "name": "BBH", - "value": 0.3326277865253592, - "normalized_score": 7.0440554144724175 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.393875, - "normalized_score": 6.934374999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17985372340425532, - "normalized_score": 8.872635933806146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-08-30", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "NousResearch/Yarn-Llama-2-7b-64k", - "hub_license": "", - "hub_hearts": 23, - "params_billions": 7.0, - "co2_cost": 1.6608026524383015 - } - }, - { - "id": "NousResearch/Yarn-Mistral-7b-128k_bfloat16_d09f1f8ed437d61c1aff94c1beabee554843dcdd_False", - "model": { - "name": "NousResearch/Yarn-Mistral-7b-128k", - "sha": "d09f1f8ed437d61c1aff94c1beabee554843dcdd", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.268755393260783, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19336693307091848, - "normalized_score": 19.33669330709185 - }, - "bbh": { - "name": "BBH", - "value": 0.4314467711273296, - "normalized_score": 20.633112436478672 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4070520833333333, - "normalized_score": 8.948177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.289311835106383, - "normalized_score": 21.034648345153663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-31", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "NousResearch/Yarn-Mistral-7b-128k", - "hub_license": "apache-2.0", - "hub_hearts": 573, - "params_billions": 7.0, - "co2_cost": 1.1005220635485915 - } - }, - { - "id": "NousResearch/Yarn-Mistral-7b-64k_bfloat16_0273c624561fcecc8e8f4030492a9307aa60f945_False", - "model": { - "name": "NousResearch/Yarn-Mistral-7b-64k", - "sha": "0273c624561fcecc8e8f4030492a9307aa60f945", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.540457995525927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2079548930171944, - "normalized_score": 20.79548930171944 - }, - "bbh": { - "name": "BBH", - "value": 0.42931904551037814, - "normalized_score": 20.230200209182886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.41238541666666667, - "normalized_score": 9.881510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2913896276595745, - "normalized_score": 21.265514184397162 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-31", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "NousResearch/Yarn-Mistral-7b-64k", - "hub_license": "apache-2.0", - "hub_hearts": 51, - "params_billions": 7.0, - "co2_cost": 1.082320291391451 - } - }, - { - "id": "NousResearch/Yarn-Solar-10b-32k_bfloat16_ec3158b5276ac6644ddbdb36ccf6f9a106c98ede_False", - "model": { - "name": "NousResearch/Yarn-Solar-10b-32k", - "sha": "ec3158b5276ac6644ddbdb36ccf6f9a106c98ede", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.721261422706204, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19421579187666504, - "normalized_score": 19.421579187666502 - }, - "bbh": { - "name": "BBH", - "value": 0.4986859152325069, - "normalized_score": 28.99482436025671 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4146458333333333, - "normalized_score": 10.597395833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32721077127659576, - "normalized_score": 25.24564125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-17", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "NousResearch/Yarn-Solar-10b-32k", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 10.0, - "co2_cost": 2.0301957352344906 - } - }, - { - "id": "NousResearch/Yarn-Solar-10b-64k_bfloat16_703818628a5e8ef637e48e8dbeb3662aa0497aff_False", - "model": { - "name": "NousResearch/Yarn-Solar-10b-64k", - "sha": "703818628a5e8ef637e48e8dbeb3662aa0497aff", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.162050446653282, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1988867316498003, - "normalized_score": 19.88867316498003 - }, - "bbh": { - "name": "BBH", - "value": 0.49219907954226505, - "normalized_score": 28.395714153595822 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.40143750000000006, - "normalized_score": 9.013020833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3148271276595745, - "normalized_score": 23.869680851063833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-17", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "NousResearch/Yarn-Solar-10b-64k", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 10.0, - "co2_cost": 1.5275057461018122 - } - }, - { - "id": "Novaciano/ASTAROTH-3.2-1B_bfloat16_fe31207a8150fed8d9c68cf21ab7f0d62efb4b01_True", - "model": { - "name": "Novaciano/ASTAROTH-3.2-1B", - "sha": "fe31207a8150fed8d9c68cf21ab7f0d62efb4b01", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.173861365477634, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5612884923115112, - "normalized_score": 56.12884923115112 - }, - "bbh": { - "name": "BBH", - "value": 0.3542962056805596, - "normalized_score": 9.493517929020854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.31421875, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19090757978723405, - "normalized_score": 10.10084219858156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/ASTAROTH-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3488551221706949 - } - }, - { - "id": "Novaciano/BLAST_PROCESSING-3.2-1B_bfloat16_2d1b240529812f2fff4d9a42b845b9c4031a1624_False", - "model": { - "name": "Novaciano/BLAST_PROCESSING-3.2-1B", - "sha": "2d1b240529812f2fff4d9a42b845b9c4031a1624", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.950699067123438, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3921783091087204, - "normalized_score": 39.21783091087204 - }, - "bbh": { - "name": "BBH", - "value": 0.3460318843168258, - "normalized_score": 9.362853718947571 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3351458333333333, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19414893617021275, - "normalized_score": 10.460992907801417 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Novaciano/BLAST_PROCESSING-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.35283818131449957 - } - }, - { - "id": "Novaciano/Cerberus-3.2-1B_bfloat16_45223673222648d0247058e8839826bf0e7596fd_True", - "model": { - "name": "Novaciano/Cerberus-3.2-1B", - "sha": "45223673222648d0247058e8839826bf0e7596fd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.731435487093222, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5016877440746109, - "normalized_score": 50.16877440746109 - }, - "bbh": { - "name": "BBH", - "value": 0.4164937678626939, - "normalized_score": 16.97415923404232 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.32888541666666665, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1663065159574468, - "normalized_score": 7.367390661938533 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/Cerberus-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3553566221681203 - } - }, - { - "id": "Novaciano/Cultist-3.2-1B_bfloat16_59c2c68c6a47d529be97641e451e31ba90b1bc31_True", - "model": { - "name": "Novaciano/Cultist-3.2-1B", - "sha": "59c2c68c6a47d529be97641e451e31ba90b1bc31", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.939673802761396, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5294895322189568, - "normalized_score": 52.94895322189568 - }, - "bbh": { - "name": "BBH", - "value": 0.3399311286410264, - "normalized_score": 7.520040161824312 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3330104166666667, - "normalized_score": 1.8929687500000008 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17137632978723405, - "normalized_score": 7.930703309692672 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/Cultist-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.7370580211066066 - } - }, - { - "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP_bfloat16_be2ceb27b58c77ec4578ffc9de784a16be7a6e4c_True", - "model": { - "name": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", - "sha": "be2ceb27b58c77ec4578ffc9de784a16be7a6e4c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.847576006756135, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.559814625194484, - "normalized_score": 55.9814625194484 - }, - "bbh": { - "name": "BBH", - "value": 0.3487816706572648, - "normalized_score": 8.942707229882872 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.33288541666666666, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17345412234042554, - "normalized_score": 8.16156914893617 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3441348832380188 - } - }, - { - "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative_bfloat16_0a68a513f48c7d34467e4309108eabb676d48498_True", - "model": { - "name": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", - "sha": "0a68a513f48c7d34467e4309108eabb676d48498", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.986208661387776, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5366339091388627, - "normalized_score": 53.663390913886275 - }, - "bbh": { - "name": "BBH", - "value": 0.3434595088038714, - "normalized_score": 8.772109627787136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3209166666666667, - "normalized_score": 1.58125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17578125, - "normalized_score": 8.420138888888888 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3315420554652407 - } - }, - { - "id": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP_bfloat16_925f3903592424e6570aab3ea820fdddd8c0f553_True", - "model": { - "name": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", - "sha": "925f3903592424e6570aab3ea820fdddd8c0f553", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.279018663933527, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5342856952885011, - "normalized_score": 53.42856952885012 - }, - "bbh": { - "name": "BBH", - "value": 0.35023897852759145, - "normalized_score": 9.680798809347326 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3183125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1823470744680851, - "normalized_score": 9.149674940898343 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.32582187531786244 - } - }, - { - "id": "Novaciano/HarmfulProject-3.2-1B_bfloat16_30791fe7304bfdb84995baf252e141a9d81fc496_False", - "model": { - "name": "Novaciano/HarmfulProject-3.2-1B", - "sha": "30791fe7304bfdb84995baf252e141a9d81fc496", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.686879934293591, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3873821460391761, - "normalized_score": 38.738214603917605 - }, - "bbh": { - "name": "BBH", - "value": 0.32744993658117816, - "normalized_score": 6.5128050720912585 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.341875, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18226396276595744, - "normalized_score": 9.140440307328603 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Novaciano/HarmfulProject-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.498, - "co2_cost": 0.36775796043137254 - } - }, - { - "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B_bfloat16_5ff3d4ebc04b8e9ea8f739ba2ce744ed7077c16f_True", - "model": { - "name": "Novaciano/LEWD-Mental-Cultist-3.2-1B", - "sha": "5ff3d4ebc04b8e9ea8f739ba2ce744ed7077c16f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.977293713777764, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5308636639671627, - "normalized_score": 53.08636639671627 - }, - "bbh": { - "name": "BBH", - "value": 0.35127188813594756, - "normalized_score": 8.636853527747606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.32228125, - "normalized_score": 1.4184895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1768617021276596, - "normalized_score": 8.540189125295509 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/LEWD-Mental-Cultist-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.36366032057441167 - } - }, - { - "id": "Novaciano/La_Mejor_Mezcla-3.2-1B_bfloat16_ee2ad8591fd4b90a7d995decdfd097f7ed2e2a06_True", - "model": { - "name": "Novaciano/La_Mejor_Mezcla-3.2-1B", - "sha": "ee2ad8591fd4b90a7d995decdfd097f7ed2e2a06", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.056697294328709, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5509969104199081, - "normalized_score": 55.09969104199081 - }, - "bbh": { - "name": "BBH", - "value": 0.34879364478381225, - "normalized_score": 9.41305894751064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3196145833333333, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18292885638297873, - "normalized_score": 9.214317375886525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/La_Mejor_Mezcla-3.2-1B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.32455996908153595 - } - }, - { - "id": "Novaciano/Sigil-Of-Satan-3.2-1B_bfloat16_021456428c58707f505db03ee05777b2edbb8652_True", - "model": { - "name": "Novaciano/Sigil-Of-Satan-3.2-1B", - "sha": "021456428c58707f505db03ee05777b2edbb8652", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.692280425316104, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5494233079340594, - "normalized_score": 54.942330793405944 - }, - "bbh": { - "name": "BBH", - "value": 0.3545862332731657, - "normalized_score": 9.400065990891237 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3276145833333333, - "normalized_score": 1.4184895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18550531914893617, - "normalized_score": 9.500591016548462 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Novaciano/Sigil-Of-Satan-3.2-1B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.3714004593019818 - } - }, - { - "id": "NucleusAI/nucleus-22B-token-500B_bfloat16_49bb1a47c0d32b4bfa6630a4eff04a857adcd4ca_False", - "model": { - "name": "NucleusAI/nucleus-22B-token-500B", - "sha": "49bb1a47c0d32b4bfa6630a4eff04a857adcd4ca", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 1.6334163485881146, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.025654153202391873, - "normalized_score": 2.5654153202391874 - }, - "bbh": { - "name": "BBH", - "value": 0.29198007801214715, - "normalized_score": 1.8879990685708254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3510520833333333, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11619015957446809, - "normalized_score": 1.798906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-06", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "NucleusAI/nucleus-22B-token-500B", - "hub_license": "mit", - "hub_hearts": 25, - "params_billions": 21.828, - "co2_cost": 1.18963543732518 - } - }, - { - "id": "NyxKrage/Microsoft_Phi-4_bfloat16_d6e415636fc3435ec1cf543db77cf228b6ce6bdd_False", - "model": { - "name": "NyxKrage/Microsoft_Phi-4", - "sha": "d6e415636fc3435ec1cf543db77cf228b6ce6bdd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 30.068601124538514, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0585269307659233, - "normalized_score": 5.852693076592331 - }, - "bbh": { - "name": "BBH", - "value": 0.6690562305322874, - "normalized_score": 52.42784845820486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2990936555891239, - "normalized_score": 29.909365558912388 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40604026845637586, - "normalized_score": 20.805369127516784 - }, - "musr": { - "name": "MUSR", - "value": 0.5033541666666667, - "normalized_score": 23.7859375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5286735372340425, - "normalized_score": 47.630393026004725 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "NyxKrage/Microsoft_Phi-4", - "hub_license": "other", - "hub_hearts": 56, - "params_billions": 14.66, - "co2_cost": 1.788963081142109 - } - }, - { - "id": "OEvortex/Emotional-llama-8B_bfloat16_7d16f2e5354dd8f62ce46e47580bfafbc9d4eabd_False", - "model": { - "name": "OEvortex/Emotional-llama-8B", - "sha": "7d16f2e5354dd8f62ce46e47580bfafbc9d4eabd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.789126430453027, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3516369898535885, - "normalized_score": 35.16369898535885 - }, - "bbh": { - "name": "BBH", - "value": 0.4838573702054177, - "normalized_score": 26.45405447203827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.365875, - "normalized_score": 2.8677083333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35347406914893614, - "normalized_score": 28.1637854609929 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-05", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "OEvortex/Emotional-llama-8B", - "hub_license": "llama3", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.363628341660931 - } - }, - { - "id": "OEvortex/HelpingAI-15B_float16_fcc5d4eeee08c07680a2560a302de3eaa5d6f550_True", - "model": { - "name": "OEvortex/HelpingAI-15B", - "sha": "fcc5d4eeee08c07680a2560a302de3eaa5d6f550", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.515495603660534, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2030091268944179, - "normalized_score": 20.30091268944179 - }, - "bbh": { - "name": "BBH", - "value": 0.2936006977853758, - "normalized_score": 1.8153805514942334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.361875, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-11", - "submission_date": "2024-07-13", - "generation": 0, - "base_model": "OEvortex/HelpingAI-15B", - "hub_license": "other", - "hub_hearts": 13, - "params_billions": 15.323, - "co2_cost": 2.45447461771735 - } - }, - { - "id": "OEvortex/HelpingAI-3B-reloaded_float16_aaee653fea06ba322e7a9ed15530db605cc3b382_True", - "model": { - "name": "OEvortex/HelpingAI-3B-reloaded", - "sha": "aaee653fea06ba322e7a9ed15530db605cc3b382", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.768420533149957, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46466819150963884, - "normalized_score": 46.466819150963886 - }, - "bbh": { - "name": "BBH", - "value": 0.4128512897904065, - "normalized_score": 16.98574044907848 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3524479166666667, - "normalized_score": 4.289322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25947473404255317, - "normalized_score": 17.719414893617017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-10-31", - "generation": 0, - "base_model": "OEvortex/HelpingAI-3B-reloaded", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 2.81, - "co2_cost": 1.1232528968367588 - } - }, - { - "id": "OEvortex/HelpingAI2-9B_float16_b45a18cf41d0d438d71d79687e098ec60dd0aec1_True", - "model": { - "name": "OEvortex/HelpingAI2-9B", - "sha": "b45a18cf41d0d438d71d79687e098ec60dd0aec1", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.606927062260286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44131238447319776, - "normalized_score": 44.131238447319774 - }, - "bbh": { - "name": "BBH", - "value": 0.4844617641983123, - "normalized_score": 27.073241609173305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3710833333333334, - "normalized_score": 6.318750000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28997672872340424, - "normalized_score": 21.108525413711583 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "OEvortex/HelpingAI2-9B", - "hub_license": "other", - "hub_hearts": 23, - "params_billions": 8.903, - "co2_cost": 2.0813028218866374 - } - }, - { - "id": "OEvortex/HelpingAI2.5-10B_float16_25ac750b886c7e42521c769e6c2cd2b1143cfbcc_True", - "model": { - "name": "OEvortex/HelpingAI2.5-10B", - "sha": "25ac750b886c7e42521c769e6c2cd2b1143cfbcc", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.711774162686112, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32765617450586665, - "normalized_score": 32.76561745058666 - }, - "bbh": { - "name": "BBH", - "value": 0.4495657491171711, - "normalized_score": 21.135366144659056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.37381250000000005, - "normalized_score": 6.259895833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25748005319148937, - "normalized_score": 17.49778368794326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "OEvortex/HelpingAI2.5-10B", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 10.211, - "co2_cost": 1.878786764354815 - } - }, - { - "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1_bfloat16_e3032ba89a6df12b801ab3be2a29b59068aa048d_True", - "model": { - "name": "OliveiraJLT/Sagui-7B-Instruct-v0.1", - "sha": "e3032ba89a6df12b801ab3be2a29b59068aa048d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.579407330639999, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28916275482386733, - "normalized_score": 28.916275482386734 - }, - "bbh": { - "name": "BBH", - "value": 0.3110678914743868, - "normalized_score": 5.043571655312187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4190520833333333, - "normalized_score": 10.61484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14852061170212766, - "normalized_score": 5.391179078014184 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-07-18", - "generation": 1, - "base_model": "maritaca-ai/sabia-7b", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 6.738, - "co2_cost": 1.64709154275852 - } - }, - { - "id": "Omkar1102/code-yi_float16_7e875c1d64029d1f8db6813bd2b715cb5406b745_False", - "model": { - "name": "Omkar1102/code-yi", - "sha": "7e875c1d64029d1f8db6813bd2b715cb5406b745", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.921767050278931, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21477457590304835, - "normalized_score": 21.477457590304837 - }, - "bbh": { - "name": "BBH", - "value": 0.2760062695877461, - "normalized_score": 1.8441580122160068 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3802291666666667, - "normalized_score": 4.695312500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.084, - "co2_cost": 0.44252049269387844 - } - }, - { - "id": "Omkar1102/code-yi_bfloat16_7e875c1d64029d1f8db6813bd2b715cb5406b745_False", - "model": { - "name": "Omkar1102/code-yi", - "sha": "7e875c1d64029d1f8db6813bd2b715cb5406b745", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.170300155045384, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2254407195131141, - "normalized_score": 22.54407195131141 - }, - "bbh": { - "name": "BBH", - "value": 0.2750025242693941, - "normalized_score": 1.5813991446240265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3761979166666667, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.084, - "co2_cost": 1.2813594071547754 - } - }, - { - "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b_float16_fbe296d2c76acbb792cdd22e14d1c8bb13723839_True", - "model": { - "name": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", - "sha": "fbe296d2c76acbb792cdd22e14d1c8bb13723839", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.071645037035577, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5963842604289951, - "normalized_score": 59.63842604289951 - }, - "bbh": { - "name": "BBH", - "value": 0.47762434766958123, - "normalized_score": 26.038439832659787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.401875, - "normalized_score": 8.201041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2605551861702128, - "normalized_score": 17.83946513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-07-01", - "generation": 1, - "base_model": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 24.154, - "co2_cost": 2.7529296011383395 - } - }, - { - "id": "OnlyCheeini/greesychat-turbo_bfloat16_6c050859a63f8a677c52aef226fd64705fdf2fa9_True", - "model": { - "name": "OnlyCheeini/greesychat-turbo", - "sha": "6c050859a63f8a677c52aef226fd64705fdf2fa9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "LlamaForCausalLM", - "average_score": 1.8020688847846804, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.023256071667619692, - "normalized_score": 2.3256071667619693 - }, - "bbh": { - "name": "BBH", - "value": 0.30921339082318816, - "normalized_score": 4.018369933555268 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3314270833333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11377992021276596, - "normalized_score": 1.5311022458628842 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-12-18", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.510856749159255 - } - }, - { - "id": "Open-Orca/Mistral-7B-OpenOrca_bfloat16_4a37328cef00f524d3791b1c0cc559a3cc6af14d_True", - "model": { - "name": "Open-Orca/Mistral-7B-OpenOrca", - "sha": "4a37328cef00f524d3791b1c0cc559a3cc6af14d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.72165111279921, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4977659277384008, - "normalized_score": 49.77659277384008 - }, - "bbh": { - "name": "BBH", - "value": 0.4768173517353546, - "normalized_score": 25.840025395269805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.38578124999999996, - "normalized_score": 5.889322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26529255319148937, - "normalized_score": 18.36583924349882 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-29", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Open-Orca/Mistral-7B-OpenOrca", - "hub_license": "apache-2.0", - "hub_hearts": 681, - "params_billions": 7.0, - "co2_cost": 1.0671596867488107 - } - }, - { - "id": "OpenAssistant/oasst-sft-1-pythia-12b_float16_293df535fe7711a5726987fc2f17dfc87de452a1_False", - "model": { - "name": "OpenAssistant/oasst-sft-1-pythia-12b", - "sha": "293df535fe7711a5726987fc2f17dfc87de452a1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 3.6818304933986314, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10553885911603435, - "normalized_score": 10.553885911603434 - }, - "bbh": { - "name": "BBH", - "value": 0.314662875941371, - "normalized_score": 4.778508799161477 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.33269791666666665, - "normalized_score": 2.987239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11128656914893617, - "normalized_score": 1.2540632387706852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-03-09", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "OpenAssistant/oasst-sft-1-pythia-12b", - "hub_license": "apache-2.0", - "hub_hearts": 278, - "params_billions": 12.0, - "co2_cost": 1.77611447437514 - } - }, - { - "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k_bfloat16_6fd3ac4042bd7d05336182f24b3b3380a064756e_True", - "model": { - "name": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", - "sha": "6fd3ac4042bd7d05336182f24b3b3380a064756e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.273412962757565, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5086315420861093, - "normalized_score": 50.86315420861093 - }, - "bbh": { - "name": "BBH", - "value": 0.6003725722032135, - "normalized_score": 42.72636129353911 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21299093655589124, - "normalized_score": 21.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.41864583333333333, - "normalized_score": 10.664062500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3833942819148936, - "normalized_score": 31.488253546099287 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.34, - "co2_cost": 1.6406392065022917 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k_bfloat16_e79a2f16c052fc76eeafb5b51d16261b2b981d0f_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", - "sha": "e79a2f16c052fc76eeafb5b51d16261b2b981d0f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.5534575981615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7010476646409305, - "normalized_score": 70.10476646409305 - }, - "bbh": { - "name": "BBH", - "value": 0.6507443429944494, - "normalized_score": 49.969365808428186 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.45796875000000004, - "normalized_score": 18.04609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4832114361702128, - "normalized_score": 42.579048463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-12", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 26.07187203032198 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k_bfloat16_658508bce03ccd61cea9657e0357bd4cd10503ba_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", - "sha": "658508bce03ccd61cea9657e0357bd4cd10503ba", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.162938316635703, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5569666263292509, - "normalized_score": 55.69666263292508 - }, - "bbh": { - "name": "BBH", - "value": 0.47875007373484046, - "normalized_score": 26.115045337590942 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3987708333333333, - "normalized_score": 10.346354166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2954621010638298, - "normalized_score": 21.71801122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-20", - "submission_date": "2024-08-03", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", - "hub_license": "other", - "hub_hearts": 30, - "params_billions": 8.03, - "co2_cost": 1.638844215778852 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k_bfloat16_f3ea2dec2533a3dd97df32db2376b17875cafda2_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", - "sha": "f3ea2dec2533a3dd97df32db2376b17875cafda2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.069479074823068, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6191904147661538, - "normalized_score": 61.91904147661538 - }, - "bbh": { - "name": "BBH", - "value": 0.4856219845879779, - "normalized_score": 27.252334736558794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.377875, - "normalized_score": 5.934375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3298703457446808, - "normalized_score": 25.541149527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6997787342459105 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k_bfloat16_43ed945180174d79a8f6c68509161c249c884dfa_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", - "sha": "43ed945180174d79a8f6c68509161c249c884dfa", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.24947292836925, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7332710541363582, - "normalized_score": 73.32710541363582 - }, - "bbh": { - "name": "BBH", - "value": 0.6698491606025763, - "normalized_score": 51.94077625159233 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3950151057401813, - "normalized_score": 39.50151057401813 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.46295833333333336, - "normalized_score": 18.23645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5304188829787234, - "normalized_score": 47.82432033096927 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-21", - "submission_date": "2024-08-24", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 70.554, - "co2_cost": 24.383246608651035 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k_bfloat16_0d9d85c7a5e4292e07c346147de56bd3991d525c_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", - "sha": "0d9d85c7a5e4292e07c346147de56bd3991d525c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.4181731020086, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6657269378582162, - "normalized_score": 66.57269378582163 - }, - "bbh": { - "name": "BBH", - "value": 0.5006515954024578, - "normalized_score": 29.057538243651496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.40810416666666666, - "normalized_score": 9.81302083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3310339095744681, - "normalized_score": 25.67043439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-28", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.5821261970512388 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k_bfloat16_0097358fa1a450251b7ea1a03a5effdfded6c461_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", - "sha": "0097358fa1a450251b7ea1a03a5effdfded6c461", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.317953936657844, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5997065563815123, - "normalized_score": 59.97065563815122 - }, - "bbh": { - "name": "BBH", - "value": 0.5065914870348772, - "normalized_score": 30.319510884756202 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.40146875, - "normalized_score": 8.316927083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3277094414893617, - "normalized_score": 25.30104905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-08-24", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.668446440491569 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k_bfloat16_71b61e0e02e55553902f0051074d2ae965413cdb_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", - "sha": "71b61e0e02e55553902f0051074d2ae965413cdb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.350033548941015, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3590052172679601, - "normalized_score": 35.90052172679601 - }, - "bbh": { - "name": "BBH", - "value": 0.3266563226631131, - "normalized_score": 6.043619508652057 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.33421875, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1840093085106383, - "normalized_score": 9.334367612293143 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-09", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", - "hub_license": "llama3.2", - "hub_hearts": 3, - "params_billions": 1.498, - "co2_cost": 0.8922852234636237 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k_bfloat16_7cd2baa3d9bb99e970d711fb7afe786753bc25ea_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", - "sha": "7cd2baa3d9bb99e970d711fb7afe786753bc25ea", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.797653889948387, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4319450169993395, - "normalized_score": 43.19450169993395 - }, - "bbh": { - "name": "BBH", - "value": 0.4072660342069299, - "normalized_score": 16.588825592691695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3263125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2479222074468085, - "normalized_score": 16.435800827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-15", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.607, - "co2_cost": 1.391143644717637 - } - }, - { - "id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k_bfloat16_b4585368e97d413b400503ca9ee2b8e4a8988614_True", - "model": { - "name": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", - "sha": "b4585368e97d413b400503ca9ee2b8e4a8988614", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 45.73679489100153, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.812080834408259, - "normalized_score": 81.2080834408259 - }, - "bbh": { - "name": "BBH", - "value": 0.6858038620320306, - "normalized_score": 54.146647969721265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44108761329305135, - "normalized_score": 44.10876132930513 - }, - "gpqa": { - "name": "GPQA", - "value": 0.43456375838926176, - "normalized_score": 24.608501118568235 - }, - "musr": { - "name": "MUSR", - "value": 0.4869270833333334, - "normalized_score": 22.265885416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5327460106382979, - "normalized_score": 48.082890070921984 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-08", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", - "hub_license": "llama3.3", - "hub_hearts": 1, - "params_billions": 70.554, - "co2_cost": 67.52554694715876 - } - }, - { - "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k_bfloat16_98596b6731058cc9cca85f3b8ac9077342cb60ae_True", - "model": { - "name": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", - "sha": "98596b6731058cc9cca85f3b8ac9077342cb60ae", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 22.329808849468282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.549347952322061, - "normalized_score": 54.9347952322061 - }, - "bbh": { - "name": "BBH", - "value": 0.46561770563515265, - "normalized_score": 24.535442968436797 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.3830520833333333, - "normalized_score": 5.281510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38040226063829785, - "normalized_score": 31.155806737588655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-12", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 46.741, - "co2_cost": 9.737751356111918 - } - }, - { - "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k_bfloat16_d8cb98fb9281a84eb0df8216bae60beaf5181921_True", - "model": { - "name": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", - "sha": "d8cb98fb9281a84eb0df8216bae60beaf5181921", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.78505127265852, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7555275557742346, - "normalized_score": 75.55275557742345 - }, - "bbh": { - "name": "BBH", - "value": 0.6749472828128272, - "normalized_score": 53.18804887163517 - }, - "math": { - "name": "MATH Level 5", - "value": 0.32099697885196377, - "normalized_score": 32.09969788519638 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.45375000000000004, - "normalized_score": 16.38541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5174534574468085, - "normalized_score": 46.38371749408983 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-23", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 36.442432638282575 - } - }, - { - "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k_bfloat16_7a39fd93b078189c6892344c2f01059320543e2f_True", - "model": { - "name": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", - "sha": "7a39fd93b078189c6892344c2f01059320543e2f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.234122021270416, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7226547782107031, - "normalized_score": 72.26547782107032 - }, - "bbh": { - "name": "BBH", - "value": 0.6704805157570325, - "normalized_score": 52.265661751102094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3157099697885196, - "normalized_score": 31.570996978851962 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3598993288590604, - "normalized_score": 14.65324384787472 - }, - "musr": { - "name": "MUSR", - "value": 0.46959375000000003, - "normalized_score": 18.86588541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5120511968085106, - "normalized_score": 45.78346631205674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-24", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 24.78981565434613 - } - }, - { - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k_bfloat16_001e14063e2702a9b2284dc6ec889d2586dc839b_True", - "model": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", - "sha": "001e14063e2702a9b2284dc6ec889d2586dc839b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.52829892906403, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.630880508162786, - "normalized_score": 63.088050816278596 - }, - "bbh": { - "name": "BBH", - "value": 0.601319898776811, - "normalized_score": 43.27649863201484 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2537764350453172, - "normalized_score": 25.377643504531722 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.42404166666666665, - "normalized_score": 11.538541666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4673371010638298, - "normalized_score": 40.81523345153664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 2.9134999987930428 - } - }, - { - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k_bfloat16_0cef6f7719c1eb3bc1ebba133508c2c6d67e635c_True", - "model": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", - "sha": "0cef6f7719c1eb3bc1ebba133508c2c6d67e635c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.29791522872073, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6131453432448126, - "normalized_score": 61.31453432448127 - }, - "bbh": { - "name": "BBH", - "value": 0.6080855261046028, - "normalized_score": 44.1839402106242 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2311178247734139, - "normalized_score": 23.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.4345833333333333, - "normalized_score": 12.722916666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4794714095744681, - "normalized_score": 42.16348995271868 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 14.77, - "co2_cost": 2.9958980156195016 - } - }, - { - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k_bfloat16_91521abfec2a00f4853f6cb4dd620177617ca572_True", - "model": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", - "sha": "91521abfec2a00f4853f6cb4dd620177617ca572", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.863254758763023, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5672582082208539, - "normalized_score": 56.725820822085396 - }, - "bbh": { - "name": "BBH", - "value": 0.5509381466888461, - "normalized_score": 36.3981275172541 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.43632291666666667, - "normalized_score": 13.807031250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.394780585106383, - "normalized_score": 32.75339834515366 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.615, - "co2_cost": 2.7637336349126245 - } - }, - { - "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k_bfloat16_1e9dfa19793d79c53999e2f22794d2c310180c7e_True", - "model": { - "name": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", - "sha": "1e9dfa19793d79c53999e2f22794d2c310180c7e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.96232491540321, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.593661484860171, - "normalized_score": 59.366148486017096 - }, - "bbh": { - "name": "BBH", - "value": 0.6798496773637743, - "normalized_score": 54.46917637764042 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37386706948640486, - "normalized_score": 37.38670694864049 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.484875, - "normalized_score": 21.209374999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5490359042553191, - "normalized_score": 49.89287825059102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-22", - "generation": 3, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 32.764, - "co2_cost": 6.90692943946993 - } - }, - { - "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k_bfloat16_ee9f46f325a4b68d3b06d1dcabc2d81f42df6682_True", - "model": { - "name": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", - "sha": "ee9f46f325a4b68d3b06d1dcabc2d81f42df6682", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.56011201711714, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5969837808126881, - "normalized_score": 59.698378081268814 - }, - "bbh": { - "name": "BBH", - "value": 0.6771537576509328, - "normalized_score": 54.16349558366793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3776435045317221, - "normalized_score": 37.764350453172206 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3766778523489933, - "normalized_score": 16.890380313199106 - }, - "musr": { - "name": "MUSR", - "value": 0.47179166666666666, - "normalized_score": 19.440624999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5446309840425532, - "normalized_score": 49.4034426713948 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-30", - "generation": 3, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 32.764, - "co2_cost": 6.0469398673429895 - } - }, - { - "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k_bfloat16_966be6ad502cdd50a9af94d5f003aec040cdb0b5_True", - "model": { - "name": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", - "sha": "966be6ad502cdd50a9af94d5f003aec040cdb0b5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.92470371318825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5420041046645123, - "normalized_score": 54.20041046645124 - }, - "bbh": { - "name": "BBH", - "value": 0.6162574860411373, - "normalized_score": 45.637092606204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.44394791666666666, - "normalized_score": 14.693489583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4599401595744681, - "normalized_score": 39.99335106382979 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-05", - "submission_date": "2024-08-30", - "generation": 1, - "base_model": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 34.407, - "co2_cost": 6.076653422339551 - } - }, - { - "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k_bfloat16_d9a0b6bc02f283e154c9ad6db43a5a97eed97f5b_True", - "model": { - "name": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", - "sha": "d9a0b6bc02f283e154c9ad6db43a5a97eed97f5b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.406071028276457, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37529200299649373, - "normalized_score": 37.529200299649375 - }, - "bbh": { - "name": "BBH", - "value": 0.4859759816473639, - "normalized_score": 26.289506846678147 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41660416666666666, - "normalized_score": 11.342187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3187333776595745, - "normalized_score": 24.30370862884161 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 14.022, - "co2_cost": 3.3775378276507078 - } - }, - { - "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k_bfloat16_74e1d168c5e917219d668d1483f6355dd0464a31_True", - "model": { - "name": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", - "sha": "74e1d168c5e917219d668d1483f6355dd0464a31", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.713302286048615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3802377691192702, - "normalized_score": 38.02377691192702 - }, - "bbh": { - "name": "BBH", - "value": 0.3934791831798414, - "normalized_score": 15.293406418468868 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3566354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20337433510638298, - "normalized_score": 11.486037234042552 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-02", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 4.769, - "co2_cost": 1.7572375059674725 - } - }, - { - "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k_float16_c7a1a4a6e798f75d1d3219ab9ff9f2692e29f7d5_True", - "model": { - "name": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", - "sha": "c7a1a4a6e798f75d1d3219ab9ff9f2692e29f7d5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.536019963329736, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5057092957796425, - "normalized_score": 50.57092957796425 - }, - "bbh": { - "name": "BBH", - "value": 0.6128345897750148, - "normalized_score": 44.796541615730554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16238670694864046, - "normalized_score": 16.238670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4305208333333333, - "normalized_score": 12.781770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43991023936170215, - "normalized_score": 37.76780437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-10", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 56.707, - "co2_cost": 15.147491894006654 - } - }, - { - "id": "OpenGenerativeAI/Bifrost_bfloat16_7525b3cd69b258aaac8897aa2bff8e9de89f5767_True", - "model": { - "name": "OpenGenerativeAI/Bifrost", - "sha": "7525b3cd69b258aaac8897aa2bff8e9de89f5767", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.14746413152554, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6347524568145853, - "normalized_score": 63.47524568145853 - }, - "bbh": { - "name": "BBH", - "value": 0.6849273974523276, - "normalized_score": 55.09700864855187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.45976041666666667, - "normalized_score": 16.870052083333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5159574468085106, - "normalized_score": 46.21749408983452 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "OpenGenerativeAI/Bifrost (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.7108119851231949 - } - }, - { - "id": "OpenGenerativeAI/Bifrost-14B_bfloat16_2f63272826f4a218a00e6a84d1bd1acb023ae613_True", - "model": { - "name": "OpenGenerativeAI/Bifrost-14B", - "sha": "2f63272826f4a218a00e6a84d1bd1acb023ae613", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.39963457924099, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6615302951723648, - "normalized_score": 66.15302951723649 - }, - "bbh": { - "name": "BBH", - "value": 0.6844897889249308, - "normalized_score": 55.088066493614235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23564954682779457, - "normalized_score": 23.564954682779458 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.46239583333333334, - "normalized_score": 17.09947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5073969414893617, - "normalized_score": 45.26632683215129 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "OpenGenerativeAI/Bifrost-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.8638634012216105 - } - }, - { - "id": "OpenLLM-France/Lucie-7B_bfloat16_39b6dd46c3d39b2b2a61523093d1cf6c8a24730f_False", - "model": { - "name": "OpenLLM-France/Lucie-7B", - "sha": "39b6dd46c3d39b2b2a61523093d1cf6c8a24730f", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.611731815760395, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24964538535530173, - "normalized_score": 24.964538535530174 - }, - "bbh": { - "name": "BBH", - "value": 0.3492469872973046, - "normalized_score": 9.91394298206345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.39232291666666663, - "normalized_score": 6.807031250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14976728723404256, - "normalized_score": 5.529698581560284 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "OpenLLM-France/Lucie-7B", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 6.707, - "co2_cost": 0.9548920211955723 - } - }, - { - "id": "OpenLLM-France/Lucie-7B-Instruct_bfloat16_4632991a54e8713b234302164ff171d909a121f9_True", - "model": { - "name": "OpenLLM-France/Lucie-7B-Instruct", - "sha": "4632991a54e8713b234302164ff171d909a121f9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.364897117485063, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.279645784296777, - "normalized_score": 27.9645784296777 - }, - "bbh": { - "name": "BBH", - "value": 0.3254036581260458, - "normalized_score": 7.261384026200003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.36621875, - "normalized_score": 3.2106770833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15558510638297873, - "normalized_score": 6.176122931442081 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.9798951820675718 - } - }, - { - "id": "OpenLLM-France/Lucie-7B-Instruct-human-data_bfloat16_4842e3bd24e1037658aff7ca3dbc6b6973bb38f4_True", - "model": { - "name": "OpenLLM-France/Lucie-7B-Instruct-human-data", - "sha": "4842e3bd24e1037658aff7ca3dbc6b6973bb38f4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.574866924872492, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29460830596151544, - "normalized_score": 29.460830596151546 - }, - "bbh": { - "name": "BBH", - "value": 0.32842533479733, - "normalized_score": 7.629771133304573 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.37285416666666665, - "normalized_score": 4.0401041666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14295212765957446, - "normalized_score": 4.772458628841606 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "OpenLLM-France/Lucie-7B-Instruct-human-data (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 6.707, - "co2_cost": 0.9751521222163677 - } - }, - { - "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1_float16_204e1880f28b63db2687d174e297548a10e719cb_False", - "model": { - "name": "OpenLLM-France/Lucie-7B-Instruct-v1.1", - "sha": "204e1880f28b63db2687d174e297548a10e719cb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.999352587987943, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3038759380665523, - "normalized_score": 30.38759380665523 - }, - "bbh": { - "name": "BBH", - "value": 0.38158765227444885, - "normalized_score": 14.739314179940692 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.37502083333333336, - "normalized_score": 3.8442708333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1864195478723404, - "normalized_score": 9.602171985815602 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "OpenLLM-France/Lucie-7B-Instruct-v1.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 6.707, - "co2_cost": 0.4879356419067323 - } - }, - { - "id": "OpenLeecher/llama3-8b-lima_bfloat16_237a2bcb240eecd9355a091f839e42ba3d31bda5_True", - "model": { - "name": "OpenLeecher/llama3-8b-lima", - "sha": "237a2bcb240eecd9355a091f839e42ba3d31bda5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.025432287041793, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43706587410293574, - "normalized_score": 43.70658741029358 - }, - "bbh": { - "name": "BBH", - "value": 0.4295828632822993, - "normalized_score": 19.573064881964964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23825503355704697, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37127083333333327, - "normalized_score": 3.7421874999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26263297872340424, - "normalized_score": 18.070330969267136 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.917858732048215 - } - }, - { - "id": "OpenScholar/Llama-3.1_OpenScholar-8B_bfloat16_e26aeb22af568bd8d01ffde86ebbd13c3cf4fcc5_True", - "model": { - "name": "OpenScholar/Llama-3.1_OpenScholar-8B", - "sha": "e26aeb22af568bd8d01ffde86ebbd13c3cf4fcc5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.961332257431867, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6064010159709571, - "normalized_score": 60.64010159709571 - }, - "bbh": { - "name": "BBH", - "value": 0.5207740834450674, - "normalized_score": 32.40392120448785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16540785498489427, - "normalized_score": 16.540785498489427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4275104166666667, - "normalized_score": 11.838802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.370844414893617, - "normalized_score": 30.093823877068555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "OpenScholar/Llama-3.1_OpenScholar-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 57, - "params_billions": 8.0, - "co2_cost": 1.2652134388784446 - } - }, - { - "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored_bfloat16_56ac439ab4c7826871493ffbe2d49f2100a98e97_True", - "model": { - "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", - "sha": "56ac439ab4c7826871493ffbe2d49f2100a98e97", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.175116142740332, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7776843220432896, - "normalized_score": 77.76843220432897 - }, - "bbh": { - "name": "BBH", - "value": 0.5057261652642643, - "normalized_score": 29.24254324176861 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15709969788519637, - "normalized_score": 15.709969788519636 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.3871145833333333, - "normalized_score": 6.422656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37898936170212766, - "normalized_score": 30.99881796690307 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-26", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", - "hub_license": "llama3.1", - "hub_hearts": 46, - "params_billions": 8.03, - "co2_cost": 1.7134701351201276 - } - }, - { - "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2_bfloat16_2340f8fbcd2452125a798686ca90b882a08fb0d9_True", - "model": { - "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", - "sha": "2340f8fbcd2452125a798686ca90b882a08fb0d9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.390881228317323, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7791581891603169, - "normalized_score": 77.91581891603168 - }, - "bbh": { - "name": "BBH", - "value": 0.5084008018783934, - "normalized_score": 29.687032745631218 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1971299093655589, - "normalized_score": 19.71299093655589 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3842916666666667, - "normalized_score": 7.76979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3780751329787234, - "normalized_score": 30.897236997635936 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-09", - "submission_date": "2024-08-28", - "generation": 0, - "base_model": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", - "hub_license": "llama3.1", - "hub_hearts": 180, - "params_billions": 8.03, - "co2_cost": 1.739372628969439 - } - }, - { - "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored_bfloat16_33c24657b4394fc430ad90b5d413e5985ce8e292_True", - "model": { - "name": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", - "sha": "33c24657b4394fc430ad90b5d413e5985ce8e292", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.71881524964477, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7204317876567508, - "normalized_score": 72.04317876567507 - }, - "bbh": { - "name": "BBH", - "value": 0.5473918652157296, - "normalized_score": 35.83245286228819 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4773413897280967, - "normalized_score": 47.73413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.43613541666666666, - "normalized_score": 13.58359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4426529255319149, - "normalized_score": 38.07254728132387 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-10-19", - "generation": 1, - "base_model": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored (Merge)", - "hub_license": "gpl-3.0", - "hub_hearts": 18, - "params_billions": 7.616, - "co2_cost": 2.2336238849080123 - } - }, - { - "id": "Orion-zhen/phi-4-abliterated_bfloat16_90e3bfb1a9507d931c19faa5c2084d3f8d0bfb77_False", - "model": { - "name": "Orion-zhen/phi-4-abliterated", - "sha": "90e3bfb1a9507d931c19faa5c2084d3f8d0bfb77", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 29.97907708859114, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05760271634817839, - "normalized_score": 5.760271634817839 - }, - "bbh": { - "name": "BBH", - "value": 0.6698239306664778, - "normalized_score": 52.45712922578372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3021148036253776, - "normalized_score": 30.211480362537763 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40436241610738255, - "normalized_score": 20.581655480984338 - }, - "musr": { - "name": "MUSR", - "value": 0.500625, - "normalized_score": 23.178124999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5291722074468085, - "normalized_score": 47.685800827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-17", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "Orion-zhen/phi-4-abliterated (Merge)", - "hub_license": "gpl-3.0", - "hub_hearts": 28, - "params_billions": 14.66, - "co2_cost": 1.7887019249616476 - } - }, - { - "id": "P0x0/Astra-v1-12B_bfloat16_c706e253f8d8fa838b505cbec0e1a6aeec545abc_False", - "model": { - "name": "P0x0/Astra-v1-12B", - "sha": "c706e253f8d8fa838b505cbec0e1a6aeec545abc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.737240466519605, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28059437847134494, - "normalized_score": 28.05943784713449 - }, - "bbh": { - "name": "BBH", - "value": 0.5214506484138984, - "normalized_score": 31.80990734117942 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4051875, - "normalized_score": 11.381770833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3460771276595745, - "normalized_score": 27.341903073286055 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-21", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 3.211346744195269 - } - }, - { - "id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B_bfloat16_d47cc29df363f71ffaf6cd21ac4bdeefa27359db_True", - "model": { - "name": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", - "sha": "d47cc29df363f71ffaf6cd21ac4bdeefa27359db", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.166133893343382, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6330791189599152, - "normalized_score": 63.30791189599152 - }, - "bbh": { - "name": "BBH", - "value": 0.527115950402997, - "normalized_score": 32.56361170891586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.38562500000000005, - "normalized_score": 8.036458333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3556349734042553, - "normalized_score": 28.40388593380615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "PJMixers/LLaMa-3-CursedStock-v2.0-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 12, - "params_billions": 8.03, - "co2_cost": 2.8053841117290728 - } - }, - { - "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B_bfloat16_da482263c3258481e235117b58977f01bd9f9e25_True", - "model": { - "name": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", - "sha": "da482263c3258481e235117b58977f01bd9f9e25", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.902648327773703, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46276989498973836, - "normalized_score": 46.27698949897383 - }, - "bbh": { - "name": "BBH", - "value": 0.33018063718974094, - "normalized_score": 6.3866369990439695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.32621875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14827127659574468, - "normalized_score": 5.363475177304964 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7410305950739005 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B_bfloat16_c5b8d7fa43a013e434630a7f89f3bf15ac19606f_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", - "sha": "c5b8d7fa43a013e434630a7f89f3bf15ac19606f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.894562181103485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7871015572015585, - "normalized_score": 78.71015572015585 - }, - "bbh": { - "name": "BBH", - "value": 0.5073267838961463, - "normalized_score": 29.89275635245275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3869895833333333, - "normalized_score": 8.407031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3767453457446808, - "normalized_score": 30.74948286052009 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 13.047, - "co2_cost": 2.5927314715878755 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B_bfloat16_87ff7bb5f3399c4a4c021dbcbd5f2b5f52eedab2_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", - "sha": "87ff7bb5f3399c4a4c021dbcbd5f2b5f52eedab2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.818375441031588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7825303527972447, - "normalized_score": 78.25303527972446 - }, - "bbh": { - "name": "BBH", - "value": 0.5073267838961463, - "normalized_score": 29.89275635245275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3869895833333333, - "normalized_score": 8.407031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3767453457446808, - "normalized_score": 30.74948286052009 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.4369043060818494 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B_bfloat16_5bc997080bae6df93298edb5a82b1391dc972047_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", - "sha": "5bc997080bae6df93298edb5a82b1391dc972047", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.818375441031588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7825303527972447, - "normalized_score": 78.25303527972446 - }, - "bbh": { - "name": "BBH", - "value": 0.5073267838961463, - "normalized_score": 29.89275635245275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3869895833333333, - "normalized_score": 8.407031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3767453457446808, - "normalized_score": 30.74948286052009 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.4410469707927869 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B_bfloat16_1286f51489b06fe67fa36d57aa87331fa37e698b_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", - "sha": "1286f51489b06fe67fa36d57aa87331fa37e698b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.701739941091272, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.693054428915278, - "normalized_score": 69.3054428915278 - }, - "bbh": { - "name": "BBH", - "value": 0.4556166737589294, - "normalized_score": 23.80830677255767 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.37003125000000003, - "normalized_score": 4.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.312749335106383, - "normalized_score": 23.63881501182033 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.4273887421986473 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B_bfloat16_4c348a8dfc1be0b4985e0ed2882329515a60c19d_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", - "sha": "4c348a8dfc1be0b4985e0ed2882329515a60c19d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.772674131965854, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6291573026237051, - "normalized_score": 62.9157302623705 - }, - "bbh": { - "name": "BBH", - "value": 0.45814952191015346, - "normalized_score": 23.341239903737886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.365875, - "normalized_score": 4.867708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3115026595744681, - "normalized_score": 23.50029550827423 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.4197984758501463 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B_bfloat16_17b245cfcffcc6aadc90989bf08d9625455064e1_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", - "sha": "17b245cfcffcc6aadc90989bf08d9625455064e1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.826266808747885, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6503898544750152, - "normalized_score": 65.03898544750152 - }, - "bbh": { - "name": "BBH", - "value": 0.45107942950222196, - "normalized_score": 22.288715309224642 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.3687291666666667, - "normalized_score": 4.691145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3107546542553192, - "normalized_score": 23.417183806146575 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.3580841575987257 - } - }, - { - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B_bfloat16_19faf7463cab41a2492cad26fc54b2fce3a05caf_True", - "model": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", - "sha": "19faf7463cab41a2492cad26fc54b2fce3a05caf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.737295916446044, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5040858256093831, - "normalized_score": 50.408582560938314 - }, - "bbh": { - "name": "BBH", - "value": 0.4483158594793648, - "normalized_score": 22.759588390933697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3515520833333334, - "normalized_score": 4.677343750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.308344414893617, - "normalized_score": 23.149379432624112 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.4048551500864417 - } - }, - { - "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B_bfloat16_61e798861cae00ff1108708fc89ed18bccaf1170_True", - "model": { - "name": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", - "sha": "61e798861cae00ff1108708fc89ed18bccaf1170", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.28885377363677, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7558023821238757, - "normalized_score": 75.58023821238757 - }, - "bbh": { - "name": "BBH", - "value": 0.5398673461520839, - "normalized_score": 34.93145651665548 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4962235649546828, - "normalized_score": 49.62235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4033645833333333, - "normalized_score": 8.720572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4285239361702128, - "normalized_score": 36.50265957446809 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.808, - "co2_cost": 1.3427656159126633 - } - }, - { - "id": "Parissa3/test-model_bfloat16_7021138dac98d930f1ce0ebe186583c0813d6f48_False", - "model": { - "name": "Parissa3/test-model", - "sha": "7021138dac98d930f1ce0ebe186583c0813d6f48", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.745917611192755, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3882564927725103, - "normalized_score": 38.82564927725103 - }, - "bbh": { - "name": "BBH", - "value": 0.5193916761801759, - "normalized_score": 32.83903240379116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.46853125, - "normalized_score": 17.53307291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3056848404255319, - "normalized_score": 22.853871158392433 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "Parissa3/test-model (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9467058058228061 - } - }, - { - "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B_float16_2f964ba6516a9f1e2cbec4c3decde734c340a739_False", - "model": { - "name": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", - "sha": "2f964ba6516a9f1e2cbec4c3decde734c340a739", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.92876945667705, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5084819390328772, - "normalized_score": 50.84819390328772 - }, - "bbh": { - "name": "BBH", - "value": 0.47105662040096935, - "normalized_score": 26.33092647670804 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1691842900302115, - "normalized_score": 16.91842900302115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.44785416666666666, - "normalized_score": 15.315104166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35106382978723405, - "normalized_score": 27.89598108747045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-17", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 3.086, - "co2_cost": 1.5607975432210763 - } - }, - { - "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ_float16_06261f48e94e86201b527854e76ddee3c65054f4_False", - "model": { - "name": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", - "sha": "06261f48e94e86201b527854e76ddee3c65054f4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.36951233941903, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.051457909458015844, - "normalized_score": 5.1457909458015845 - }, - "bbh": { - "name": "BBH", - "value": 0.6719989821162488, - "normalized_score": 52.848496844057365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4199395770392749, - "normalized_score": 41.99395770392749 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3926174496644295, - "normalized_score": 19.01565995525727 - }, - "musr": { - "name": "MUSR", - "value": 0.4913541666666667, - "normalized_score": 21.7859375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.526845079787234, - "normalized_score": 47.42723108747045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ (Merge)", - "hub_license": "mit", - "hub_hearts": 7, - "params_billions": 14.66, - "co2_cost": 1.8630339511133762 - } - }, - { - "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1_float16_dc8f7e9d19f3d4bee7cfd81cc94c320204673dee_False", - "model": { - "name": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", - "sha": "dc8f7e9d19f3d4bee7cfd81cc94c320204673dee", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.104091175053128, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0375193375798437, - "normalized_score": 3.7519337579843706 - }, - "bbh": { - "name": "BBH", - "value": 0.3434736647957908, - "normalized_score": 9.132472807581593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.33539583333333334, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18508976063829788, - "normalized_score": 9.454417848699762 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM2-1.7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.812, - "co2_cost": 0.6183011818245351 - } - }, - { - "id": "Pinkstack/Superthoughts-lite-v1_float16_e63103e31386be7a6287179cf313861184da3276_False", - "model": { - "name": "Pinkstack/Superthoughts-lite-v1", - "sha": "e63103e31386be7a6287179cf313861184da3276", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.399417613485791, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1658643510330368, - "normalized_score": 16.58643510330368 - }, - "bbh": { - "name": "BBH", - "value": 0.3465571905256149, - "normalized_score": 8.902845043233844 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3671770833333334, - "normalized_score": 3.4304687499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17553191489361702, - "normalized_score": 8.39243498817967 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "Pinkstack/Superthoughts-lite-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 1.711, - "co2_cost": 0.33158247389916096 - } - }, - { - "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b_bfloat16_c50db5ba880b7edc0efd32a7f3b9d2f051c3f4a6_True", - "model": { - "name": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", - "sha": "c50db5ba880b7edc0efd32a7f3b9d2f051c3f4a6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 9.490940527989817, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21914520139895477, - "normalized_score": 21.914520139895476 - }, - "bbh": { - "name": "BBH", - "value": 0.3788739075240266, - "normalized_score": 13.232564953040013 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4095625, - "normalized_score": 9.561979166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1219248670212766, - "normalized_score": 2.4360963356973993 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.0, - "co2_cost": 3.177076304074408 - } - }, - { - "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b_bfloat16_e58b4d12f79522209478fed022b65675a18b877f_True", - "model": { - "name": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", - "sha": "e58b4d12f79522209478fed022b65675a18b877f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.044944332447233, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7074672978807343, - "normalized_score": 70.74672978807342 - }, - "bbh": { - "name": "BBH", - "value": 0.5361046243199591, - "normalized_score": 33.666618823462954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.45867708333333335, - "normalized_score": 17.30130208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32621343085106386, - "normalized_score": 25.134825650118202 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 37, - "params_billions": 12.248, - "co2_cost": 3.1727081280344622 - } - }, - { - "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b_bfloat16_e41f1d9faa19a92c70fcd63c2db6df2d5b37393c_True", - "model": { - "name": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", - "sha": "e41f1d9faa19a92c70fcd63c2db6df2d5b37393c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 36.3754641013882, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7886252920029965, - "normalized_score": 78.86252920029965 - }, - "bbh": { - "name": "BBH", - "value": 0.6421213844206719, - "normalized_score": 47.56102293719791 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24546827794561935, - "normalized_score": 24.546827794561935 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.42996875, - "normalized_score": 13.379427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5025764627659575, - "normalized_score": 44.73071808510639 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 59, - "params_billions": 23.572, - "co2_cost": 3.056940773564168 - } - }, - { - "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b_bfloat16_c64612e1eee1ddb3aa064a25eba8921ec3d94325_True", - "model": { - "name": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", - "sha": "c64612e1eee1ddb3aa064a25eba8921ec3d94325", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.207416194838743, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.498190357141274, - "normalized_score": 49.81903571412741 - }, - "bbh": { - "name": "BBH", - "value": 0.47325544259149366, - "normalized_score": 25.68795976908214 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.35415625, - "normalized_score": 3.9361979166666683 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3065159574468085, - "normalized_score": 22.94621749408983 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.8019485168187717 - } - }, - { - "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b_bfloat16_d433b6c0efcf1d1f6534320d88c73c454fd245f2_True", - "model": { - "name": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", - "sha": "d433b6c0efcf1d1f6534320d88c73c454fd245f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.119777597401505, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6520133246452745, - "normalized_score": 65.20133246452745 - }, - "bbh": { - "name": "BBH", - "value": 0.5405357251132225, - "normalized_score": 34.38815279713936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.47452083333333334, - "normalized_score": 19.581770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35596742021276595, - "normalized_score": 28.440824468085108 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 17, - "params_billions": 12.248, - "co2_cost": 0.8727770487974009 - } - }, - { - "id": "PowerInfer/SmallThinker-3B-Preview_bfloat16_f335f911ad120220287e737eeeffe7a3ff54b60b_True", - "model": { - "name": "PowerInfer/SmallThinker-3B-Preview", - "sha": "f335f911ad120220287e737eeeffe7a3ff54b60b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.22006340641872, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6199650261306666, - "normalized_score": 61.99650261306667 - }, - "bbh": { - "name": "BBH", - "value": 0.4494922016660919, - "normalized_score": 22.062104642168745 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27794561933534745, - "normalized_score": 27.794561933534744 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3524791666666667, - "normalized_score": 3.5932291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3017785904255319, - "normalized_score": 22.419843380614655 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "PowerInfer/SmallThinker-3B-Preview (Merge)", - "hub_license": "", - "hub_hearts": 391, - "params_billions": 3.397, - "co2_cost": 1.475888162898983 - } - }, - { - "id": "PranavHarshan/LaMistral-V4_bfloat16_b373c2a1ab08823b6b119899f807793c96ef7888_True", - "model": { - "name": "PranavHarshan/LaMistral-V4", - "sha": "b373c2a1ab08823b6b119899f807793c96ef7888", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.21076517685928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.623861354539289, - "normalized_score": 62.38613545392891 - }, - "bbh": { - "name": "BBH", - "value": 0.5184255342586473, - "normalized_score": 31.091348681794813 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.3642916666666667, - "normalized_score": 5.6364583333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35987367021276595, - "normalized_score": 28.874852245862886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "PranavHarshan/LaMistral-V4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3944647401800156 - } - }, - { - "id": "PranavHarshan/MedNarra-X1_bfloat16_9fe294e7fd69ec56f0b7fa1a23759eed070f44bf_False", - "model": { - "name": "PranavHarshan/MedNarra-X1", - "sha": "9fe294e7fd69ec56f0b7fa1a23759eed070f44bf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.078329076163104, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43384331351924005, - "normalized_score": 43.384331351924004 - }, - "bbh": { - "name": "BBH", - "value": 0.46371668179774184, - "normalized_score": 23.52349513234211 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.35403125, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34308510638297873, - "normalized_score": 27.009456264775412 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "PranavHarshan/MedNarra-X1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3523218777350892 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended_bfloat16_1091b30480f4cc91f26cb1bd7579e527f490f8d2_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", - "sha": "1091b30480f4cc91f26cb1bd7579e527f490f8d2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.735876089495267, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5960595663949432, - "normalized_score": 59.60595663949432 - }, - "bbh": { - "name": "BBH", - "value": 0.4619637884426022, - "normalized_score": 24.057172512288606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 11.77578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-07-31", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 10.732, - "co2_cost": 1.6721992205670189 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved_bfloat16_dd6bd9a8a9a2223a02a4e8aa6270accbc8d4d81a_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", - "sha": "dd6bd9a8a9a2223a02a4e8aa6270accbc8d4d81a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.671701605771233, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5960595663949432, - "normalized_score": 59.60595663949432 - }, - "bbh": { - "name": "BBH", - "value": 0.4619637884426022, - "normalized_score": 24.057172512288606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 11.77578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3298703457446808, - "normalized_score": 25.541149527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-10", - "submission_date": "2024-08-16", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 10.732, - "co2_cost": 1.6731843520126088 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE_bfloat16_da6a73abac7fba68f1df4d42485d79553e97bf91_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", - "sha": "da6a73abac7fba68f1df4d42485d79553e97bf91", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.903680149442671, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3968991165662664, - "normalized_score": 39.68991165662664 - }, - "bbh": { - "name": "BBH", - "value": 0.3471309425137119, - "normalized_score": 8.828394853721203 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.42054166666666665, - "normalized_score": 11.334375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.203125, - "normalized_score": 11.458333333333332 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-02", - "submission_date": "2024-11-02", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_32K-PoSE (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.242, - "co2_cost": 0.9458762678591882 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended_bfloat16_e957847e013bdd2f6e852b8a1c369ddce92fca78_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", - "sha": "e957847e013bdd2f6e852b8a1c369ddce92fca78", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.761271669519232, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5975833011963811, - "normalized_score": 59.75833011963812 - }, - "bbh": { - "name": "BBH", - "value": 0.4619637884426022, - "normalized_score": 24.057172512288606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 11.77578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-26", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.114, - "co2_cost": 1.3674142829393898 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved_bfloat16_485ebe835c6c001af0a1a6e0e40aab27bc195842_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", - "sha": "485ebe835c6c001af0a1a6e0e40aab27bc195842", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.630312716882344, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5960595663949432, - "normalized_score": 59.60595663949432 - }, - "bbh": { - "name": "BBH", - "value": 0.46213045510926887, - "normalized_score": 24.075505845621944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42407291666666663, - "normalized_score": 11.509114583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3298703457446808, - "normalized_score": 25.541149527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-10", - "submission_date": "2024-08-16", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.114, - "co2_cost": 1.3209091512623283 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended_bfloat16_2120720b7fb2ecc27b9c03cc876316fd25b26e40_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", - "sha": "2120720b7fb2ecc27b9c03cc876316fd25b26e40", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.735876089495267, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5960595663949432, - "normalized_score": 59.60595663949432 - }, - "bbh": { - "name": "BBH", - "value": 0.4619637884426022, - "normalized_score": 24.057172512288606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 11.77578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-26", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.987, - "co2_cost": 1.4384987932245612 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved_bfloat16_b6dfa36a99179674706d5e859714afa6b8743640_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", - "sha": "b6dfa36a99179674706d5e859714afa6b8743640", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.65570829690631, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5975833011963811, - "normalized_score": 59.75833011963812 - }, - "bbh": { - "name": "BBH", - "value": 0.46213045510926887, - "normalized_score": 24.075505845621944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42407291666666663, - "normalized_score": 11.509114583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3298703457446808, - "normalized_score": 25.541149527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-10", - "submission_date": "2024-08-16", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.987, - "co2_cost": 1.455622745014315 - } - }, - { - "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended_bfloat16_8a7ef4a2c4faf8760650e26e44509920bace633a_True", - "model": { - "name": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", - "sha": "8a7ef4a2c4faf8760650e26e44509920bace633a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.735876089495267, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5960595663949432, - "normalized_score": 59.60595663949432 - }, - "bbh": { - "name": "BBH", - "value": 0.4619637884426022, - "normalized_score": 24.057172512288606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42540625, - "normalized_score": 11.77578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 9.859, - "co2_cost": 1.573526102454317 - } - }, - { - "id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2_bfloat16_31c11027a7320115af1e5c33b41bcace83420fe2_True", - "model": { - "name": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", - "sha": "31c11027a7320115af1e5c33b41bcace83420fe2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.052276387715246, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37062106322335847, - "normalized_score": 37.06210632233585 - }, - "bbh": { - "name": "BBH", - "value": 0.36271140677296004, - "normalized_score": 10.910767600799835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.4840104166666667, - "normalized_score": 20.56796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2829953457446808, - "normalized_score": 20.33281619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-21", - "submission_date": "2024-07-21", - "generation": 0, - "base_model": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.2436758634757044 - } - }, - { - "id": "PrimeIntellect/INTELLECT-1_bfloat16_3b8d48b5ce11ee9526495f1db9eb1644518bfce0_False", - "model": { - "name": "PrimeIntellect/INTELLECT-1", - "sha": "3b8d48b5ce11ee9526495f1db9eb1644518bfce0", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.8063018019018693, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1757315035217667, - "normalized_score": 17.57315035217667 - }, - "bbh": { - "name": "BBH", - "value": 0.27598007801214713, - "normalized_score": 1.0435001578954441 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3339375, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-29", - "generation": 0, - "base_model": "PrimeIntellect/INTELLECT-1", - "hub_license": "apache-2.0", - "hub_hearts": 62, - "params_billions": 10.211, - "co2_cost": 0.9953176565006855 - } - }, - { - "id": "PrimeIntellect/INTELLECT-1_float16_3b8d48b5ce11ee9526495f1db9eb1644518bfce0_False", - "model": { - "name": "PrimeIntellect/INTELLECT-1", - "sha": "3b8d48b5ce11ee9526495f1db9eb1644518bfce0", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.016002158495076, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1757315035217667, - "normalized_score": 17.57315035217667 - }, - "bbh": { - "name": "BBH", - "value": 0.27398007801214713, - "normalized_score": 1.0435001578954441 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3752708333333333, - "normalized_score": 4.1421874999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11203457446808511, - "normalized_score": 1.337174940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-12-03", - "generation": 0, - "base_model": "PrimeIntellect/INTELLECT-1", - "hub_license": "apache-2.0", - "hub_hearts": 62, - "params_billions": 10.211, - "co2_cost": 1.9927682347571594 - } - }, - { - "id": "PrimeIntellect/INTELLECT-1-Instruct_bfloat16_a672cbe91f9bd4df58f90619ca3c2acb2eb11294_True", - "model": { - "name": "PrimeIntellect/INTELLECT-1-Instruct", - "sha": "a672cbe91f9bd4df58f90619ca3c2acb2eb11294", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 1.4059110426100174, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.28698007801214714, - "normalized_score": 1.7494478703137453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3576875, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10638297872340426, - "normalized_score": 0.7092198581560278 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-29", - "generation": 1, - "base_model": "PrimeIntellect/INTELLECT-1-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 121, - "params_billions": 10.211, - "co2_cost": 2.85273565713582 - } - }, - { - "id": "PuxAI/LUA_model_float16_f098319bc21c8710652f75febe0d8a110058458f_False", - "model": { - "name": "PuxAI/LUA_model", - "sha": "f098319bc21c8710652f75febe0d8a110058458f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.148723748211869, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22821336276634885, - "normalized_score": 22.821336276634884 - }, - "bbh": { - "name": "BBH", - "value": 0.2876778102988436, - "normalized_score": 1.815668408500801 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.34838541666666667, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "PuxAI/LUA_model", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.386, - "co2_cost": 1.2601842278060176 - } - }, - { - "id": "PygmalionAI/pygmalion-6b_float16_2a0d74449c8fbf0378194e95f64aa92e16297294_False", - "model": { - "name": "PygmalionAI/pygmalion-6b", - "sha": "2a0d74449c8fbf0378194e95f64aa92e16297294", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTJForCausalLM", - "average_score": 5.430124009362374, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20910406610016974, - "normalized_score": 20.910406610016974 - }, - "bbh": { - "name": "BBH", - "value": 0.31988944643860034, - "normalized_score": 5.089577143988909 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3683541666666667, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11835106382978723, - "normalized_score": 2.0390070921985806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-01-07", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "PygmalionAI/pygmalion-6b", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 745, - "params_billions": 6.0, - "co2_cost": 63.846238560959755 - } - }, - { - "id": "Q-bert/MetaMath-1B_float16_da62756f069aba78d07d4c76108e246cb91dbc35_True", - "model": { - "name": "Q-bert/MetaMath-1B", - "sha": "da62756f069aba78d07d4c76108e246cb91dbc35", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.36906160654443, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5300391849182392, - "normalized_score": 53.00391849182392 - }, - "bbh": { - "name": "BBH", - "value": 0.34506863677929517, - "normalized_score": 8.434610644832558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3289166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1495179521276596, - "normalized_score": 5.501994680851065 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.9300559878633528 - } - }, - { - "id": "Quazim0t0/1up-14b_bfloat16_e7e74a984b68a33d1efaa36cc9a2bbe70a01ced4_True", - "model": { - "name": "Quazim0t0/1up-14b", - "sha": "e7e74a984b68a33d1efaa36cc9a2bbe70a01ced4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.15181002280717, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6888079185450161, - "normalized_score": 68.88079185450161 - }, - "bbh": { - "name": "BBH", - "value": 0.6920935635451656, - "normalized_score": 55.83975950522497 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4161631419939577, - "normalized_score": 41.616314199395774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.4583333333333333, - "normalized_score": 16.625000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5406416223404256, - "normalized_score": 48.96018026004729 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9299007072842977 - } - }, - { - "id": "Quazim0t0/Adamant-14B-sce_bfloat16_2bb4551934aad7a56fcbfa368bb0e660612f1a89_True", - "model": { - "name": "Quazim0t0/Adamant-14B-sce", - "sha": "2bb4551934aad7a56fcbfa368bb0e660612f1a89", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.321509024135764, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6857604489421402, - "normalized_score": 68.57604489421402 - }, - "bbh": { - "name": "BBH", - "value": 0.6858943778247303, - "normalized_score": 54.97141906331953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3987915407854985, - "normalized_score": 39.87915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.45579166666666665, - "normalized_score": 16.507291666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5371509308510638, - "normalized_score": 48.5723256501182 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 0, - "base_model": "Quazim0t0/Adamant-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.8079077749966712 - } - }, - { - "id": "Quazim0t0/Alice-14B_bfloat16_8e8284482417eafd7b21b59d8777a638cc4c1812_True", - "model": { - "name": "Quazim0t0/Alice-14B", - "sha": "8e8284482417eafd7b21b59d8777a638cc4c1812", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.354214475080205, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6836371937570092, - "normalized_score": 68.36371937570092 - }, - "bbh": { - "name": "BBH", - "value": 0.6937748567349198, - "normalized_score": 56.00650513096938 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4569486404833837, - "normalized_score": 45.69486404833837 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.44794791666666667, - "normalized_score": 15.42682291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418882978723404, - "normalized_score": 49.09869976359338 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9336340789377652 - } - }, - { - "id": "Quazim0t0/Alien-CoT-14B-sce_bfloat16_7a0a59f4a9572894648da9b36b8cfcd6fc162825_False", - "model": { - "name": "Quazim0t0/Alien-CoT-14B-sce", - "sha": "7a0a59f4a9572894648da9b36b8cfcd6fc162825", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.21054018658256, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.07486358417886763, - "normalized_score": 7.486358417886763 - }, - "bbh": { - "name": "BBH", - "value": 0.6395487523790632, - "normalized_score": 48.047822818905125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.520392749244713, - "normalized_score": 52.0392749244713 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.47852083333333334, - "normalized_score": 20.448437499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5170378989361702, - "normalized_score": 46.337544326241144 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Quazim0t0/Alien-CoT-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.912128570185909 - } - }, - { - "id": "Quazim0t0/Aura-8B-Linear_bfloat16_6adb93ad6cec4e867f62322a20617c0867d1114d_True", - "model": { - "name": "Quazim0t0/Aura-8B-Linear", - "sha": "6adb93ad6cec4e867f62322a20617c0867d1114d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.587590048832727, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.794770098893159, - "normalized_score": 79.4770098893159 - }, - "bbh": { - "name": "BBH", - "value": 0.5074298101934884, - "normalized_score": 29.451689498631435 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3686979166666667, - "normalized_score": 4.853906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3800698138297872, - "normalized_score": 31.11886820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4172796772465954 - } - }, - { - "id": "Quazim0t0/Casa-14b-sce_float16_881ac432aa0c9b8983c6838a039935db0f565f28_True", - "model": { - "name": "Quazim0t0/Casa-14b-sce", - "sha": "881ac432aa0c9b8983c6838a039935db0f565f28", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.41164680106136, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6653523761397536, - "normalized_score": 66.53523761397537 - }, - "bbh": { - "name": "BBH", - "value": 0.6901033460664828, - "normalized_score": 55.3984513931396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4697885196374622, - "normalized_score": 46.97885196374622 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.43102083333333335, - "normalized_score": 13.310937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5425531914893617, - "normalized_score": 49.17257683215129 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Quazim0t0/Casa-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.92197207364407 - } - }, - { - "id": "Quazim0t0/Casa-14b-sce_bfloat16_881ac432aa0c9b8983c6838a039935db0f565f28_True", - "model": { - "name": "Quazim0t0/Casa-14b-sce", - "sha": "881ac432aa0c9b8983c6838a039935db0f565f28", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.99359561827428, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6718218770639681, - "normalized_score": 67.1821877063968 - }, - "bbh": { - "name": "BBH", - "value": 0.6891400252742456, - "normalized_score": 55.32965321791952 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4984894259818731, - "normalized_score": 49.848942598187314 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.4322916666666667, - "normalized_score": 13.436458333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5408078457446809, - "normalized_score": 48.97864952718677 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Quazim0t0/Casa-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8578754867311855 - } - }, - { - "id": "Quazim0t0/Charlie-8B-Linear_bfloat16_eac0b6423382577d41cd8a718b2128950d750d96_True", - "model": { - "name": "Quazim0t0/Charlie-8B-Linear", - "sha": "eac0b6423382577d41cd8a718b2128950d750d96", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.060174123056736, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7380672172059026, - "normalized_score": 73.80672172059025 - }, - "bbh": { - "name": "BBH", - "value": 0.5141359215016831, - "normalized_score": 31.524374812766855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26510574018126887, - "normalized_score": 26.51057401812689 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3485416666666667, - "normalized_score": 5.134375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3572972074468085, - "normalized_score": 28.588578605200944 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.7056474850461021 - } - }, - { - "id": "Quazim0t0/Chromatic-8b-sce_bfloat16_8bbd78cbe72638784b2ac21320297ba793101110_False", - "model": { - "name": "Quazim0t0/Chromatic-8b-sce", - "sha": "8bbd78cbe72638784b2ac21320297ba793101110", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.16855020752669, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5085074269604649, - "normalized_score": 50.850742696046495 - }, - "bbh": { - "name": "BBH", - "value": 0.5063171816307924, - "normalized_score": 28.86594147433648 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1555891238670695, - "normalized_score": 15.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.405125, - "normalized_score": 9.840625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37549867021276595, - "normalized_score": 30.610963356973997 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.401494411400827 - } - }, - { - "id": "Quazim0t0/CoT_Phi_bfloat16_bc6cf726add153e6c9a1a6643dd682efdb964e69_True", - "model": { - "name": "Quazim0t0/CoT_Phi", - "sha": "bc6cf726add153e6c9a1a6643dd682efdb964e69", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.20889872430018, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6158681188136367, - "normalized_score": 61.586811881363666 - }, - "bbh": { - "name": "BBH", - "value": 0.6750841958594904, - "normalized_score": 53.36568538313751 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33081570996978854, - "normalized_score": 33.081570996978854 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.42435416666666664, - "normalized_score": 11.444270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4901097074468085, - "normalized_score": 43.345523049645394 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9037506905901385 - } - }, - { - "id": "Quazim0t0/Dyson-14b_bfloat16_c4053de30c3961138d07361ed3abfcf79467048e_True", - "model": { - "name": "Quazim0t0/Dyson-14b", - "sha": "c4053de30c3961138d07361ed3abfcf79467048e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.579565021915634, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5856682491345186, - "normalized_score": 58.56682491345187 - }, - "bbh": { - "name": "BBH", - "value": 0.6862902828866305, - "normalized_score": 54.72936481142644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5392749244712991, - "normalized_score": 53.92749244712991 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4259375, - "normalized_score": 12.875520833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5398936170212766, - "normalized_score": 48.87706855791962 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Quazim0t0/Dyson-14b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.954136034124844 - } - }, - { - "id": "Quazim0t0/Edu-14B-Linear_bfloat16_30e2d0060ebfb73e3e22933d09611459fe7e56f1_True", - "model": { - "name": "Quazim0t0/Edu-14B-Linear", - "sha": "30e2d0060ebfb73e3e22933d09611459fe7e56f1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.65497010041746, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6158182511292261, - "normalized_score": 61.5818251129226 - }, - "bbh": { - "name": "BBH", - "value": 0.6757820996225599, - "normalized_score": 53.21378883250136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24471299093655588, - "normalized_score": 24.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.43775000000000003, - "normalized_score": 14.318750000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.508560505319149, - "normalized_score": 45.39561170212767 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.8959461527664812 - } - }, - { - "id": "Quazim0t0/Fugazi14b_bfloat16_ba1713e8884bccfce02b4577824486ce0bf678b7_True", - "model": { - "name": "Quazim0t0/Fugazi14b", - "sha": "ba1713e8884bccfce02b4577824486ce0bf678b7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.93859963937472, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6997987561891337, - "normalized_score": 69.97987561891337 - }, - "bbh": { - "name": "BBH", - "value": 0.6941017680723065, - "normalized_score": 56.092125930693726 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4652567975830816, - "normalized_score": 46.52567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.45455208333333336, - "normalized_score": 16.419010416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5417220744680851, - "normalized_score": 49.0802304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Quazim0t0/Fugazi14b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9163709788150664 - } - }, - { - "id": "Quazim0t0/GZA-14B-sce_bfloat16_2d15025e02b489507132a58f1e83779af2104a60_True", - "model": { - "name": "Quazim0t0/GZA-14B-sce", - "sha": "2d15025e02b489507132a58f1e83779af2104a60", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.22035199697927, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6274086091570367, - "normalized_score": 62.740860915703664 - }, - "bbh": { - "name": "BBH", - "value": 0.6686539892126272, - "normalized_score": 52.49322024679916 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47205438066465255, - "normalized_score": 47.205438066465256 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4284791666666667, - "normalized_score": 12.926562500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.523188164893617, - "normalized_score": 47.02090721040189 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 0, - "base_model": "Quazim0t0/GZA-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.8708231040456398 - } - }, - { - "id": "Quazim0t0/Geedorah-14B_bfloat16_2d642ab28c2685cde0379722ee73814eeb2e3481_True", - "model": { - "name": "Quazim0t0/Geedorah-14B", - "sha": "2d642ab28c2685cde0379722ee73814eeb2e3481", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.39114666181808, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6872841837435781, - "normalized_score": 68.72841837435782 - }, - "bbh": { - "name": "BBH", - "value": 0.6964189914061528, - "normalized_score": 56.46229284952974 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44486404833836857, - "normalized_score": 44.48640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.45467708333333334, - "normalized_score": 16.567968749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5421376329787234, - "normalized_score": 49.1264036643026 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9325925427874288 - } - }, - { - "id": "Quazim0t0/GivingTree-8b-sce_bfloat16_7cc55ad7bff7d333f19c8ba2d9f739f4d81e4e53_False", - "model": { - "name": "Quazim0t0/GivingTree-8b-sce", - "sha": "7cc55ad7bff7d333f19c8ba2d9f739f4d81e4e53", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.90329552226861, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5006139266036339, - "normalized_score": 50.06139266036338 - }, - "bbh": { - "name": "BBH", - "value": 0.5040482025572203, - "normalized_score": 28.498998630643033 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.405125, - "normalized_score": 9.307291666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37608045212765956, - "normalized_score": 30.67560579196217 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4057611338885363 - } - }, - { - "id": "Quazim0t0/GuiltySpark-14B-ties_bfloat16_f8d188d57437a05666685db680218a04480946e3_True", - "model": { - "name": "Quazim0t0/GuiltySpark-14B-ties", - "sha": "f8d188d57437a05666685db680218a04480946e3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.52404330224518, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6854357549080883, - "normalized_score": 68.54357549080883 - }, - "bbh": { - "name": "BBH", - "value": 0.6914302574038697, - "normalized_score": 55.72193711661121 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38368580060422963, - "normalized_score": 38.368580060422964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.4557291666666667, - "normalized_score": 16.299479166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "Quazim0t0/GuiltySpark-14B-ties (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8577733596461905 - } - }, - { - "id": "Quazim0t0/Halo-14B-sce_bfloat16_f6f465ee3187e46185be2acb5bad28dac83c8803_True", - "model": { - "name": "Quazim0t0/Halo-14B-sce", - "sha": "f6f465ee3187e46185be2acb5bad28dac83c8803", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.258932389335996, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6753691316817156, - "normalized_score": 67.53691316817157 - }, - "bbh": { - "name": "BBH", - "value": 0.6875692490185378, - "normalized_score": 55.27080618595605 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42900302114803623, - "normalized_score": 42.90030211480362 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.44007291666666665, - "normalized_score": 14.242447916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5376496010638298, - "normalized_score": 48.62773345153664 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 0, - "base_model": "Quazim0t0/Halo-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 2.1798576748339618 - } - }, - { - "id": "Quazim0t0/Heretic1.5b_bfloat16_17125d9f62e1595f9a789dbcbfde32a6f56fbfac_False", - "model": { - "name": "Quazim0t0/Heretic1.5b", - "sha": "17125d9f62e1595f9a789dbcbfde32a6f56fbfac", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.59950597926467, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20615633186611523, - "normalized_score": 20.615633186611525 - }, - "bbh": { - "name": "BBH", - "value": 0.3529180801121154, - "normalized_score": 9.643860937325522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24395770392749244, - "normalized_score": 24.395770392749245 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3511458333333333, - "normalized_score": 4.393229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17278922872340424, - "normalized_score": 8.087692080378249 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.73, - "co2_cost": 0.5781061805882853 - } - }, - { - "id": "Quazim0t0/Hyde-14b-sce_bfloat16_7d77fdc0f1ac3cd2fbca740e4f186505b1857696_True", - "model": { - "name": "Quazim0t0/Hyde-14b-sce", - "sha": "7d77fdc0f1ac3cd2fbca740e4f186505b1857696", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.65991703864476, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6715470507143269, - "normalized_score": 67.15470507143269 - }, - "bbh": { - "name": "BBH", - "value": 0.6885164810743584, - "normalized_score": 54.89781444986736 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27341389728096677, - "normalized_score": 27.341389728096676 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.41409375, - "normalized_score": 10.595052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5300033244680851, - "normalized_score": 47.778147163120565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.7907659326317023 - } - }, - { - "id": "Quazim0t0/Imagine-v0.5-16bit_bfloat16_c368718d4c636815d33a9a61f2ba5ce7dea10941_True", - "model": { - "name": "Quazim0t0/Imagine-v0.5-16bit", - "sha": "c368718d4c636815d33a9a61f2ba5ce7dea10941", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.789019815038216, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2758990589413866, - "normalized_score": 27.58990589413866 - }, - "bbh": { - "name": "BBH", - "value": 0.6769135492947932, - "normalized_score": 53.73606811245789 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.43492708333333335, - "normalized_score": 13.732552083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.535405585106383, - "normalized_score": 48.37839834515367 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9823815613014714 - } - }, - { - "id": "Quazim0t0/Imbue-14b_bfloat16_ccb8b5eba7ba0575f02aa628f5c131964b2ca175_True", - "model": { - "name": "Quazim0t0/Imbue-14b", - "sha": "ccb8b5eba7ba0575f02aa628f5c131964b2ca175", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.073352610617825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5199725616918665, - "normalized_score": 51.99725616918665 - }, - "bbh": { - "name": "BBH", - "value": 0.6845292092854045, - "normalized_score": 54.47623938595546 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5317220543806647, - "normalized_score": 53.17220543806647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.41672916666666665, - "normalized_score": 11.491145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5402260638297872, - "normalized_score": 48.91400709219858 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9450565980186211 - } - }, - { - "id": "Quazim0t0/Insom_bfloat16_580897c8f05122c7d164a47d4918e7e1a8e0ea0f_True", - "model": { - "name": "Quazim0t0/Insom", - "sha": "580897c8f05122c7d164a47d4918e7e1a8e0ea0f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.54769675145718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.68183863260593, - "normalized_score": 68.183863260593 - }, - "bbh": { - "name": "BBH", - "value": 0.6881456689046391, - "normalized_score": 55.318559572553454 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3851963746223565, - "normalized_score": 38.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3498322147651007, - "normalized_score": 13.31096196868009 - }, - "musr": { - "name": "MUSR", - "value": 0.43114583333333334, - "normalized_score": 13.593229166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5352393617021277, - "normalized_score": 48.35992907801419 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9900747099814388 - } - }, - { - "id": "Quazim0t0/InspectorDeck-14B-sce_bfloat16_fe7219bbb70d08c8ae342d291f959d35888a4810_True", - "model": { - "name": "Quazim0t0/InspectorDeck-14B-sce", - "sha": "fe7219bbb70d08c8ae342d291f959d35888a4810", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.601861765438738, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32408454013129606, - "normalized_score": 32.40845401312961 - }, - "bbh": { - "name": "BBH", - "value": 0.6668480318764974, - "normalized_score": 52.011891494826756 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3164652567975831, - "normalized_score": 31.64652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.39815625, - "normalized_score": 7.936197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5260970744680851, - "normalized_score": 47.344119385342786 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Quazim0t0/InspectorDeck-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.911837218018592 - } - }, - { - "id": "Quazim0t0/Jekyl-8b-sce_bfloat16_9cad669dceaf4068b019372963698399c3e984a9_False", - "model": { - "name": "Quazim0t0/Jekyl-8b-sce", - "sha": "9cad669dceaf4068b019372963698399c3e984a9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.085441896511327, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46968931324441365, - "normalized_score": 46.96893132444137 - }, - "bbh": { - "name": "BBH", - "value": 0.4993588236391566, - "normalized_score": 28.232789513696968 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.41966666666666663, - "normalized_score": 11.558333333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3686003989361702, - "normalized_score": 29.844488770685572 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.431155965593621 - } - }, - { - "id": "Quazim0t0/Jigsaw-14B-Linear_bfloat16_f591f285bfa033445e7625dea5d162d37bcc7837_True", - "model": { - "name": "Quazim0t0/Jigsaw-14B-Linear", - "sha": "f591f285bfa033445e7625dea5d162d37bcc7837", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.85975504597242, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6480416406246536, - "normalized_score": 64.80416406246536 - }, - "bbh": { - "name": "BBH", - "value": 0.6864625931836906, - "normalized_score": 54.791326721615924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26510574018126887, - "normalized_score": 26.51057401812689 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.44826041666666666, - "normalized_score": 15.932552083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5233543882978723, - "normalized_score": 47.03937647754137 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.8862138639072998 - } - }, - { - "id": "Quazim0t0/Katana-8b-sce_bfloat16_0ef2599d3806bdd755a5684821b4fd181de33e23_False", - "model": { - "name": "Quazim0t0/Katana-8b-sce", - "sha": "0ef2599d3806bdd755a5684821b4fd181de33e23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.188001040020307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5107304175144174, - "normalized_score": 51.073041751441735 - }, - "bbh": { - "name": "BBH", - "value": 0.5074684221457483, - "normalized_score": 29.004160225251976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1510574018126888, - "normalized_score": 15.105740181268882 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.4037604166666667, - "normalized_score": 9.203385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3770777925531915, - "normalized_score": 30.786421394799056 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.451141681464865 - } - }, - { - "id": "Quazim0t0/Knot-CoT-14B-sce_bfloat16_a02ed46b314356b588c458a884325aec312f5a4d_True", - "model": { - "name": "Quazim0t0/Knot-CoT-14B-sce", - "sha": "a02ed46b314356b588c458a884325aec312f5a4d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.73448595442946, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4831779677921249, - "normalized_score": 48.317796779212486 - }, - "bbh": { - "name": "BBH", - "value": 0.6615610657544672, - "normalized_score": 51.44445678655166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3995468277945619, - "normalized_score": 39.95468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.41403125, - "normalized_score": 10.720572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.515375664893617, - "normalized_score": 46.15285165484633 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9509894943173813 - } - }, - { - "id": "Quazim0t0/Lineage-14B_bfloat16_27e67b09a19e2540bb093bfec73dbc238710f124_True", - "model": { - "name": "Quazim0t0/Lineage-14B", - "sha": "27e67b09a19e2540bb093bfec73dbc238710f124", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.64434282329166, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7070428684778609, - "normalized_score": 70.7042868477861 - }, - "bbh": { - "name": "BBH", - "value": 0.6933789516730196, - "normalized_score": 56.02223040682762 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4244712990936556, - "normalized_score": 42.44712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3598993288590604, - "normalized_score": 14.65324384787472 - }, - "musr": { - "name": "MUSR", - "value": 0.4597291666666667, - "normalized_score": 17.032812499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5410571808510638, - "normalized_score": 49.006353427895974 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "Quazim0t0/Lineage-14B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.917298302104046 - } - }, - { - "id": "Quazim0t0/Lo-Phi-14b_bfloat16_a7cfadbc404c24c36b62cdb8d4a4627b96f49ca0_True", - "model": { - "name": "Quazim0t0/Lo-Phi-14b", - "sha": "a7cfadbc404c24c36b62cdb8d4a4627b96f49ca0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.88897941168211, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4941189377518318, - "normalized_score": 49.41189377518318 - }, - "bbh": { - "name": "BBH", - "value": 0.6851928144814953, - "normalized_score": 54.70598224151049 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5196374622356495, - "normalized_score": 51.963746223564954 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.42323958333333334, - "normalized_score": 12.30494791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5369015957446809, - "normalized_score": 48.54462174940899 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.0540242970942009 - } - }, - { - "id": "Quazim0t0/Loke-14B-sce_bfloat16_61c4f8e81372687cc7fcef1e0190e3b2c118f07e_True", - "model": { - "name": "Quazim0t0/Loke-14B-sce", - "sha": "61c4f8e81372687cc7fcef1e0190e3b2c118f07e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.85820722600513, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6847863668399845, - "normalized_score": 68.47863668399846 - }, - "bbh": { - "name": "BBH", - "value": 0.6923902176707362, - "normalized_score": 55.83477772401803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3904833836858006, - "normalized_score": 39.04833836858006 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.46366666666666667, - "normalized_score": 17.55833333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5401429521276596, - "normalized_score": 48.90477245862885 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 0, - "base_model": "Quazim0t0/Loke-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 3.157607540885205 - } - }, - { - "id": "Quazim0t0/MFDOOM-14B_bfloat16_157973c58b21fdf8976f395d507f39dab5beba9e_True", - "model": { - "name": "Quazim0t0/MFDOOM-14B", - "sha": "157973c58b21fdf8976f395d507f39dab5beba9e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.4412132005641, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6736204382150472, - "normalized_score": 67.36204382150471 - }, - "bbh": { - "name": "BBH", - "value": 0.6916400252742457, - "normalized_score": 55.53057914384544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5264350453172205, - "normalized_score": 52.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.43765625, - "normalized_score": 14.207031250000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5425531914893617, - "normalized_score": 49.17257683215129 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Quazim0t0/MFDOOM-14B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.894646411603964 - } - }, - { - "id": "Quazim0t0/MFGRIMM-14B_bfloat16_f00db754bd978924eb26b756ee0df30906fbaf3e_True", - "model": { - "name": "Quazim0t0/MFGRIMM-14B", - "sha": "f00db754bd978924eb26b756ee0df30906fbaf3e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.51803168899253, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6894074389287091, - "normalized_score": 68.94074389287091 - }, - "bbh": { - "name": "BBH", - "value": 0.69087746819662, - "normalized_score": 55.52294469432744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5060422960725075, - "normalized_score": 50.60422960725075 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.43613541666666666, - "normalized_score": 13.783593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5416389627659575, - "normalized_score": 49.07099586288417 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "Quazim0t0/MFGRIMM-14B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.980466808213746 - } - }, - { - "id": "Quazim0t0/Math_Phi4_Reason_bfloat16_38570bd52155b6cb061bde234280114e63e3b1be_True", - "model": { - "name": "Quazim0t0/Math_Phi4_Reason", - "sha": "38570bd52155b6cb061bde234280114e63e3b1be", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.118748796782256, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3220111526305758, - "normalized_score": 32.201115263057574 - }, - "bbh": { - "name": "BBH", - "value": 0.6240212275403677, - "normalized_score": 45.05751713788222 - }, - "math": { - "name": "MATH Level 5", - "value": 0.32779456193353473, - "normalized_score": 32.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4034270833333333, - "normalized_score": 8.528385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5029920212765957, - "normalized_score": 44.77689125295508 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9604983306174951 - } - }, - { - "id": "Quazim0t0/Mithril-14B-sce_bfloat16_e61b5f199a583030626b101626bd63dc7edd6552_True", - "model": { - "name": "Quazim0t0/Mithril-14B-sce", - "sha": "e61b5f199a583030626b101626bd63dc7edd6552", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.98225259227315, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6957772044841022, - "normalized_score": 69.57772044841022 - }, - "bbh": { - "name": "BBH", - "value": 0.6925969240705362, - "normalized_score": 55.92521598371362 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3821752265861027, - "normalized_score": 38.21752265861027 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3691275167785235, - "normalized_score": 15.883668903803136 - }, - "musr": { - "name": "MUSR", - "value": 0.4610625, - "normalized_score": 17.36614583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5403091755319149, - "normalized_score": 48.92324172576833 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 0, - "base_model": "Quazim0t0/Mithril-14B-sce", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.7925213392250092 - } - }, - { - "id": "Quazim0t0/Mononoke-14B-sce_bfloat16_67e96e4a14c8ef33459ffe1b6f60154cb38bb242_True", - "model": { - "name": "Quazim0t0/Mononoke-14B-sce", - "sha": "67e96e4a14c8ef33459ffe1b6f60154cb38bb242", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.97900480209152, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3502129904209719, - "normalized_score": 35.02129904209719 - }, - "bbh": { - "name": "BBH", - "value": 0.6744431226588331, - "normalized_score": 53.15959925346656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4697885196374622, - "normalized_score": 46.97885196374622 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4154583333333333, - "normalized_score": 11.23229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5297539893617021, - "normalized_score": 47.75044326241135 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9168044272834095 - } - }, - { - "id": "Quazim0t0/Motion-8B-Linear_bfloat16_55686324e1ac193e19658a843dfdaeb1bc7bb91c_True", - "model": { - "name": "Quazim0t0/Motion-8B-Linear", - "sha": "55686324e1ac193e19658a843dfdaeb1bc7bb91c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.23537322336956, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7685917809190725, - "normalized_score": 76.85917809190724 - }, - "bbh": { - "name": "BBH", - "value": 0.5084252652465131, - "normalized_score": 29.388867774583897 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.36060416666666667, - "normalized_score": 4.542187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3784906914893617, - "normalized_score": 30.94341016548463 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6920789114823463 - } - }, - { - "id": "Quazim0t0/Mouse-9B_bfloat16_177b7ecaef2c0c122441bc92defbf5c46e0812f0_True", - "model": { - "name": "Quazim0t0/Mouse-9B", - "sha": "177b7ecaef2c0c122441bc92defbf5c46e0812f0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.736733161665297, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1324917884546337, - "normalized_score": 13.24917884546337 - }, - "bbh": { - "name": "BBH", - "value": 0.29789470527601253, - "normalized_score": 2.506439889086948 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3469583333333333, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11386303191489362, - "normalized_score": 1.540336879432624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.207, - "co2_cost": 0.6286929773056579 - } - }, - { - "id": "Quazim0t0/Nova-14b-sce_bfloat16_6935102206dd90782761ed49649b0e08aa8246e3_True", - "model": { - "name": "Quazim0t0/Nova-14b-sce", - "sha": "6935102206dd90782761ed49649b0e08aa8246e3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.406311809340714, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7021968377239058, - "normalized_score": 70.2196837723906 - }, - "bbh": { - "name": "BBH", - "value": 0.6935261478148286, - "normalized_score": 56.034331914713114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4161631419939577, - "normalized_score": 41.616314199395774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.4570625, - "normalized_score": 16.4328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5413065159574468, - "normalized_score": 49.03405732860521 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Quazim0t0/Nova-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.828837001200642 - } - }, - { - "id": "Quazim0t0/NovaScotia-14b-stock_bfloat16_890557675c991f5cb89d37dfa689ce6e4581a077_True", - "model": { - "name": "Quazim0t0/NovaScotia-14b-stock", - "sha": "890557675c991f5cb89d37dfa689ce6e4581a077", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.34811039930162, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6787412953186434, - "normalized_score": 67.87412953186434 - }, - "bbh": { - "name": "BBH", - "value": 0.6935261478148286, - "normalized_score": 56.02714781885254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46299093655589124, - "normalized_score": 46.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.44934375, - "normalized_score": 15.70130208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5408909574468085, - "normalized_score": 48.9878841607565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Quazim0t0/NovaScotia-14b-stock (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8867562083503056 - } - }, - { - "id": "Quazim0t0/ODB-14B-sce_bfloat16_eda0a8fa1470baabc824329b40cf9d023f1fe3b1_False", - "model": { - "name": "Quazim0t0/ODB-14B-sce", - "sha": "eda0a8fa1470baabc824329b40cf9d023f1fe3b1", - "precision": "bfloat16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 26.92045655620284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.292235712354331, - "normalized_score": 29.2235712354331 - }, - "bbh": { - "name": "BBH", - "value": 0.6558922017209644, - "normalized_score": 50.69950430097801 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.39288541666666665, - "normalized_score": 7.277343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5206948138297872, - "normalized_score": 46.74386820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.0, - "co2_cost": 2.98336784194147 - } - }, - { - "id": "Quazim0t0/ODB-14b-sce_bfloat16_8543c41cd3042c9dc7cbb5cc184c14b1d6b442b5_True", - "model": { - "name": "Quazim0t0/ODB-14b-sce", - "sha": "8543c41cd3042c9dc7cbb5cc184c14b1d6b442b5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.33656100926671, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7015973173402128, - "normalized_score": 70.15973173402128 - }, - "bbh": { - "name": "BBH", - "value": 0.6941928144814953, - "normalized_score": 56.192610781815496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.411631419939577, - "normalized_score": 41.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.4570625, - "normalized_score": 16.49947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5411402925531915, - "normalized_score": 49.01558806146573 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9916176982845046 - } - }, - { - "id": "Quazim0t0/Oasis-14B-ties_bfloat16_6fe7ffb17badb7d13a082ef6b00a16144deccdda_True", - "model": { - "name": "Quazim0t0/Oasis-14B-ties", - "sha": "6fe7ffb17badb7d13a082ef6b00a16144deccdda", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.59264321385063, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6936539492989712, - "normalized_score": 69.36539492989712 - }, - "bbh": { - "name": "BBH", - "value": 0.6914976731342066, - "normalized_score": 55.75379172237363 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37537764350453173, - "normalized_score": 37.53776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.4570625, - "normalized_score": 16.632812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5404753989361702, - "normalized_score": 48.94171099290781 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "Quazim0t0/Oasis-14B-ties (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8898820897532416 - } - }, - { - "id": "Quazim0t0/Origami-14B-sce_bfloat16_6d8b6a5b1db316bf86b242938bb2af464732a6db_True", - "model": { - "name": "Quazim0t0/Origami-14B-sce", - "sha": "6d8b6a5b1db316bf86b242938bb2af464732a6db", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.972268286730415, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3259329689667859, - "normalized_score": 32.593296896678595 - }, - "bbh": { - "name": "BBH", - "value": 0.6620277470720752, - "normalized_score": 51.44967004592488 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.40348958333333335, - "normalized_score": 9.002864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5244348404255319, - "normalized_score": 47.15942671394799 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9383875192333773 - } - }, - { - "id": "Quazim0t0/Phi4.Turn.R1Distill.16bit_bfloat16_6f9c6969286f718550a581c351eb4c677f64cba5_True", - "model": { - "name": "Quazim0t0/Phi4.Turn.R1Distill.16bit", - "sha": "6f9c6969286f718550a581c351eb4c677f64cba5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.502616984496523, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31264378515671754, - "normalized_score": 31.26437851567175 - }, - "bbh": { - "name": "BBH", - "value": 0.6563340892011863, - "normalized_score": 50.4691726466956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2311178247734139, - "normalized_score": 23.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.39021875, - "normalized_score": 6.944010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5256815159574468, - "normalized_score": 47.29794621749409 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-30", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.9789495488494413 - } - }, - { - "id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors_bfloat16_1b5dbbb061aa2edaad17d032c19db53cb605d2ed_True", - "model": { - "name": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", - "sha": "1b5dbbb061aa2edaad17d032c19db53cb605d2ed", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.053651878084434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2995296923274689, - "normalized_score": 29.952969232746888 - }, - "bbh": { - "name": "BBH", - "value": 0.645570250166195, - "normalized_score": 49.218136716618574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2190332326283988, - "normalized_score": 21.90332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.39285416666666667, - "normalized_score": 7.040104166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.51171875, - "normalized_score": 45.74652777777778 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.66, - "co2_cost": 1.9542927927918285 - } - }, - { - "id": "Quazim0t0/Phi4Basis-14B-sce_bfloat16_d4aebffaa551eb171f87da063c488fced3b5516b_True", - "model": { - "name": "Quazim0t0/Phi4Basis-14B-sce", - "sha": "d4aebffaa551eb171f87da063c488fced3b5516b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.31326092128696, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6501648958097848, - "normalized_score": 65.01648958097847 - }, - "bbh": { - "name": "BBH", - "value": 0.6909074263536413, - "normalized_score": 55.6651943464438 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4788519637462236, - "normalized_score": 47.88519637462236 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.43378125, - "normalized_score": 14.022656249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5389793882978723, - "normalized_score": 48.775487588652474 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 2.7388313059528873 - } - }, - { - "id": "Quazim0t0/Ponder-14B-linear_bfloat16_03715c1a51c12de9f0c9fb9f58130ab8d5e45e1e_True", - "model": { - "name": "Quazim0t0/Ponder-14B-linear", - "sha": "03715c1a51c12de9f0c9fb9f58130ab8d5e45e1e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.307528659975574, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6906064796960952, - "normalized_score": 69.06064796960952 - }, - "bbh": { - "name": "BBH", - "value": 0.6942602302118323, - "normalized_score": 56.21485209781756 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4282477341389728, - "normalized_score": 42.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.45576041666666667, - "normalized_score": 16.336718750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5408078457446809, - "normalized_score": 48.97864952718677 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "Quazim0t0/Ponder-14B-linear (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8202759937273683 - } - }, - { - "id": "Quazim0t0/RZA-14B-sce_bfloat16_98fb2ab952cd8552b5984fd284f00fd1bf553124_True", - "model": { - "name": "Quazim0t0/RZA-14B-sce", - "sha": "98fb2ab952cd8552b5984fd284f00fd1bf553124", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.03485909755687, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4773578549360142, - "normalized_score": 47.73578549360141 - }, - "bbh": { - "name": "BBH", - "value": 0.6685829139021245, - "normalized_score": 52.297746865541455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5188821752265861, - "normalized_score": 51.88821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.41133333333333333, - "normalized_score": 10.216666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.538314494680851, - "normalized_score": 48.701610520094555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 0, - "base_model": "Quazim0t0/RZA-14B-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9062650010149924 - } - }, - { - "id": "Quazim0t0/Rosemary-14b_bfloat16_c7e14d550b9ec22290fd40ea5a2202b06af506e2_True", - "model": { - "name": "Quazim0t0/Rosemary-14b", - "sha": "c7e14d550b9ec22290fd40ea5a2202b06af506e2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.33770500916808, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6915306941138402, - "normalized_score": 69.15306941138402 - }, - "bbh": { - "name": "BBH", - "value": 0.6955261478148286, - "normalized_score": 56.38346045501811 - }, - "math": { - "name": "MATH Level 5", - "value": 0.438821752265861, - "normalized_score": 43.8821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.44921875, - "normalized_score": 15.552343749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5396442819148937, - "normalized_score": 48.84936465721041 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Quazim0t0/Rosemary-14b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9054265142213617 - } - }, - { - "id": "Quazim0t0/Rune-14b_bfloat16_ee260eb72c5c354494fad9c6a380117feb71c601_True", - "model": { - "name": "Quazim0t0/Rune-14b", - "sha": "ee260eb72c5c354494fad9c6a380117feb71c601", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.81723237189117, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7015973173402128, - "normalized_score": 70.15973173402128 - }, - "bbh": { - "name": "BBH", - "value": 0.6937489642141156, - "normalized_score": 56.053987787163095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45845921450151056, - "normalized_score": 45.84592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.45328125, - "normalized_score": 16.29348958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5411402925531915, - "normalized_score": 49.01558806146573 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "Quazim0t0/Rune-14b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 0.9545781162215535 - } - }, - { - "id": "Quazim0t0/SZA-14B-sce_bfloat16_781e6a4154a6a97173c2058af8c0758c09d7700e_True", - "model": { - "name": "Quazim0t0/SZA-14B-sce", - "sha": "781e6a4154a6a97173c2058af8c0758c09d7700e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.55774467541249, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5659095644002359, - "normalized_score": 56.59095644002358 - }, - "bbh": { - "name": "BBH", - "value": 0.6888749072998727, - "normalized_score": 55.23013277164075 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5241691842900302, - "normalized_score": 52.416918429003026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.433875, - "normalized_score": 14.001041666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5353224734042553, - "normalized_score": 48.36916371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9044615541044274 - } - }, - { - "id": "Quazim0t0/Sake-20b_bfloat16_d0d336e9b351db44ab7df777bf2117295939b654_True", - "model": { - "name": "Quazim0t0/Sake-20b", - "sha": "d0d336e9b351db44ab7df777bf2117295939b654", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.18907381003569, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6692741924759638, - "normalized_score": 66.92741924759638 - }, - "bbh": { - "name": "BBH", - "value": 0.6769823539837527, - "normalized_score": 53.939346240687655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4652567975830816, - "normalized_score": 46.52567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.44940625, - "normalized_score": 15.775781249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5391456117021277, - "normalized_score": 48.79395685579197 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 21.475, - "co2_cost": 2.265105455924606 - } - }, - { - "id": "Quazim0t0/Spok-14b-sce_bfloat16_a7470ff96dbb9ad39978d07db2d9a2d8f115a221_True", - "model": { - "name": "Quazim0t0/Spok-14b-sce", - "sha": "a7470ff96dbb9ad39978d07db2d9a2d8f115a221", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.71086980721715, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6681748870773991, - "normalized_score": 66.81748870773991 - }, - "bbh": { - "name": "BBH", - "value": 0.6899172301380289, - "normalized_score": 55.09355794451866 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2719033232628399, - "normalized_score": 27.19033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.41409375, - "normalized_score": 10.66171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5297539893617021, - "normalized_score": 47.75044326241135 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.762264576767201 - } - }, - { - "id": "Quazim0t0/Sumatra-20b_bfloat16_957f1815d5e147a6718ee9cb1dd39645566178e8_True", - "model": { - "name": "Quazim0t0/Sumatra-20b", - "sha": "957f1815d5e147a6718ee9cb1dd39645566178e8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.21426941729526, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.673795529195867, - "normalized_score": 67.3795529195867 - }, - "bbh": { - "name": "BBH", - "value": 0.6855416597047258, - "normalized_score": 55.13298201406423 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36706948640483383, - "normalized_score": 36.70694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.4560104166666667, - "normalized_score": 16.834635416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5414727393617021, - "normalized_score": 49.05252659574468 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 21.475, - "co2_cost": 4.398667489951043 - } - }, - { - "id": "Quazim0t0/SuperNova14b_bfloat16_927d5e5f1a3ba397e6c050ec2b182a64cdaa83a4_True", - "model": { - "name": "Quazim0t0/SuperNova14b", - "sha": "927d5e5f1a3ba397e6c050ec2b182a64cdaa83a4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.675071451846414, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.707642388861554, - "normalized_score": 70.7642388861554 - }, - "bbh": { - "name": "BBH", - "value": 0.6937489642141156, - "normalized_score": 56.0926914908668 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4395770392749245, - "normalized_score": 43.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.4545208333333333, - "normalized_score": 16.31510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.543467420212766, - "normalized_score": 49.27415780141844 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9194569237304815 - } - }, - { - "id": "Quazim0t0/TB0-8B-sce_bfloat16_72241ecf158cd61d015f2378005f7904670fe78d_False", - "model": { - "name": "Quazim0t0/TB0-8B-sce", - "sha": "72241ecf158cd61d015f2378005f7904670fe78d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.188001040020307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5107304175144174, - "normalized_score": 51.073041751441735 - }, - "bbh": { - "name": "BBH", - "value": 0.5074684221457483, - "normalized_score": 29.004160225251976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1510574018126888, - "normalized_score": 15.105740181268882 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.4037604166666667, - "normalized_score": 9.203385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3770777925531915, - "normalized_score": 30.786421394799056 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4488642356375139 - } - }, - { - "id": "Quazim0t0/TBL-8B-sce_bfloat16_310b10d3ba7eeb3d4a05e5e111df5e555f14ce8c_False", - "model": { - "name": "Quazim0t0/TBL-8B-sce", - "sha": "310b10d3ba7eeb3d4a05e5e111df5e555f14ce8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.759926247831277, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45809895521660304, - "normalized_score": 45.80989552166031 - }, - "bbh": { - "name": "BBH", - "value": 0.5008187839060233, - "normalized_score": 28.362465633086348 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.42363541666666665, - "normalized_score": 11.987760416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3689328457446808, - "normalized_score": 29.881427304964532 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.454904623466604 - } - }, - { - "id": "Quazim0t0/ThinkPhi1.1-Tensors_bfloat16_87f803860e7c0d5c2841fe679d7804bd4864c1ab_True", - "model": { - "name": "Quazim0t0/ThinkPhi1.1-Tensors", - "sha": "87f803860e7c0d5c2841fe679d7804bd4864c1ab", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.934992706320916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3907543096761038, - "normalized_score": 39.07543096761039 - }, - "bbh": { - "name": "BBH", - "value": 0.6449416604455037, - "normalized_score": 49.14167915090875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.418, - "normalized_score": 11.283333333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4907746010638298, - "normalized_score": 43.419400118203306 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-30", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.9240926057904264 - } - }, - { - "id": "Quazim0t0/Venti-20b_bfloat16_1542924c414c214fd5b3aaa665c6b280e7bad8c5_True", - "model": { - "name": "Quazim0t0/Venti-20b", - "sha": "1542924c414c214fd5b3aaa665c6b280e7bad8c5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.503414029785155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6641034676879568, - "normalized_score": 66.41034676879568 - }, - "bbh": { - "name": "BBH", - "value": 0.6901240010129452, - "normalized_score": 55.7998371351703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3391238670694864, - "normalized_score": 33.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.44797916666666665, - "normalized_score": 15.197395833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5386469414893617, - "normalized_score": 48.73854905437352 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 21.475, - "co2_cost": 4.333502389912341 - } - }, - { - "id": "Quazim0t0/Venti-Blend-sce_bfloat16_94532bfef3aacb57672926b913b0f9a60fe9c7d1_True", - "model": { - "name": "Quazim0t0/Venti-Blend-sce", - "sha": "94532bfef3aacb57672926b913b0f9a60fe9c7d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.44869800438639, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6879335718116819, - "normalized_score": 68.79335718116819 - }, - "bbh": { - "name": "BBH", - "value": 0.6842921511560114, - "normalized_score": 54.99405204157026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40558912386706947, - "normalized_score": 40.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.43892708333333336, - "normalized_score": 14.465885416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5413896276595744, - "normalized_score": 49.04329196217494 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 21.475, - "co2_cost": 6.584613142913738 - } - }, - { - "id": "Quazim0t0/Vine-14b-sce_bfloat16_fe938ea2eb2006b91a8a39a07a5f4821ea70a7dc_True", - "model": { - "name": "Quazim0t0/Vine-14b-sce", - "sha": "fe938ea2eb2006b91a8a39a07a5f4821ea70a7dc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.05675554875142, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.673345611865406, - "normalized_score": 67.3345611865406 - }, - "bbh": { - "name": "BBH", - "value": 0.6891400252742456, - "normalized_score": 55.32965321791952 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5007552870090635, - "normalized_score": 50.07552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.4322916666666667, - "normalized_score": 13.436458333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5408078457446809, - "normalized_score": 48.97864952718677 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Quazim0t0/Vine-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8376647721984032 - } - }, - { - "id": "Quazim0t0/Wendy-14B_bfloat16_a55ea42e80d0525e5e48ddbc87e7fbfc54df2235_True", - "model": { - "name": "Quazim0t0/Wendy-14B", - "sha": "a55ea42e80d0525e5e48ddbc87e7fbfc54df2235", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.208648788347965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6772175605172055, - "normalized_score": 67.72175605172055 - }, - "bbh": { - "name": "BBH", - "value": 0.6957587467354328, - "normalized_score": 56.172047866798856 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48338368580060426, - "normalized_score": 48.338368580060425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.4428020833333333, - "normalized_score": 14.783593750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.543467420212766, - "normalized_score": 49.27415780141844 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9582473940757206 - } - }, - { - "id": "Quazim0t0/Wu-14b-sce_bfloat16_adf2bdf08320cb816ec4788d9727b4d484675714_True", - "model": { - "name": "Quazim0t0/Wu-14b-sce", - "sha": "adf2bdf08320cb816ec4788d9727b4d484675714", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.491123345831, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6718218770639681, - "normalized_score": 67.1821877063968 - }, - "bbh": { - "name": "BBH", - "value": 0.6885164810743585, - "normalized_score": 54.94466630171922 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26132930513595165, - "normalized_score": 26.132930513595166 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.41142708333333333, - "normalized_score": 10.128385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5292553191489362, - "normalized_score": 47.69503546099291 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Quazim0t0/Wu-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.7618624186247975 - } - }, - { - "id": "Quazim0t0/bloom-14b-stock_bfloat16_a35b49ccce133c148e20481bca4ffebbf29ea00f_True", - "model": { - "name": "Quazim0t0/bloom-14b-stock", - "sha": "a35b49ccce133c148e20481bca4ffebbf29ea00f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.29106535634419, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6575087434673332, - "normalized_score": 65.75087434673333 - }, - "bbh": { - "name": "BBH", - "value": 0.6877869223612597, - "normalized_score": 55.2730368732428 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4811178247734139, - "normalized_score": 48.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.43095833333333333, - "normalized_score": 13.169791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5373171542553191, - "normalized_score": 48.59079491725768 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Quazim0t0/bloom-14b-stock", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9036596300110886 - } - }, - { - "id": "Quazim0t0/caramel-14B_bfloat16_ac6c4a33304fa7c2bf3b06844c9217833365cb8c_True", - "model": { - "name": "Quazim0t0/caramel-14B", - "sha": "ac6c4a33304fa7c2bf3b06844c9217833365cb8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.2217763981026, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6744947849483814, - "normalized_score": 67.44947849483815 - }, - "bbh": { - "name": "BBH", - "value": 0.6918707471458787, - "normalized_score": 55.75618206414753 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47129909365558914, - "normalized_score": 47.129909365558916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.445375, - "normalized_score": 15.071875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5435505319148937, - "normalized_score": 49.28339243498819 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.0066945945785353 - } - }, - { - "id": "Quazim0t0/graphite-14b-sce_bfloat16_91d7cf277b413e4ea0789e1148e297b86c8165eb_True", - "model": { - "name": "Quazim0t0/graphite-14b-sce", - "sha": "91d7cf277b413e4ea0789e1148e297b86c8165eb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.108509572328675, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3216864585965239, - "normalized_score": 32.168645859652386 - }, - "bbh": { - "name": "BBH", - "value": 0.6631420093244736, - "normalized_score": 51.50924362930217 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30060422960725075, - "normalized_score": 30.060422960725074 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.398125, - "normalized_score": 8.098958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5280086436170213, - "normalized_score": 47.55651595744681 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Quazim0t0/graphite-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9263422470023133 - } - }, - { - "id": "Quazim0t0/mocha-14B_bfloat16_23babbe7b5cf6bbda82b9326ecdfacf777a5a738_True", - "model": { - "name": "Quazim0t0/mocha-14B", - "sha": "23babbe7b5cf6bbda82b9326ecdfacf777a5a738", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.84779848605735, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5893152391210876, - "normalized_score": 58.93152391210876 - }, - "bbh": { - "name": "BBH", - "value": 0.6894730595527842, - "normalized_score": 55.23219353529195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5264350453172205, - "normalized_score": 52.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4271770833333333, - "normalized_score": 12.830468750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5383976063829787, - "normalized_score": 48.71084515366431 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.0012701194823561 - } - }, - { - "id": "Quazim0t0/mosaic-14b-sce_bfloat16_c55e0e66ee5ab5d7b3be16c7058a166bc8449ae7_True", - "model": { - "name": "Quazim0t0/mosaic-14b-sce", - "sha": "c55e0e66ee5ab5d7b3be16c7058a166bc8449ae7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.830438057913014, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6875590100932193, - "normalized_score": 68.75590100932193 - }, - "bbh": { - "name": "BBH", - "value": 0.6907089244809823, - "normalized_score": 55.69112578019082 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4025679758308157, - "normalized_score": 40.25679758308157 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.45579166666666665, - "normalized_score": 16.440624999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5396442819148937, - "normalized_score": 48.84936465721041 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Quazim0t0/mosaic-14b-sce", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8070149193753557 - } - }, - { - "id": "Quazim0t0/tesseract-14b-stock_bfloat16_aad0cf38f9f4255c8575480bc64b3f0a70559aa9_True", - "model": { - "name": "Quazim0t0/tesseract-14b-stock", - "sha": "aad0cf38f9f4255c8575480bc64b3f0a70559aa9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.40111037897865, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5847939024011845, - "normalized_score": 58.47939024011845 - }, - "bbh": { - "name": "BBH", - "value": 0.6880007346047826, - "normalized_score": 54.99686477095802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5143504531722054, - "normalized_score": 51.43504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.42323958333333334, - "normalized_score": 12.438281249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5388962765957447, - "normalized_score": 48.76625295508275 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Quazim0t0/tesseract-14b-stock", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9337244100876665 - } - }, - { - "id": "Quazim0t0/time-14b-stock_bfloat16_3d9f41c197650ae8b78c9477b206fcd2e2d71f34_True", - "model": { - "name": "Quazim0t0/time-14b-stock", - "sha": "3d9f41c197650ae8b78c9477b206fcd2e2d71f34", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.17536154259003, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6699235805440675, - "normalized_score": 66.99235805440674 - }, - "bbh": { - "name": "BBH", - "value": 0.6897025970028126, - "normalized_score": 55.35905866101553 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5083081570996979, - "normalized_score": 50.83081570996979 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.43232291666666667, - "normalized_score": 13.473697916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418882978723404, - "normalized_score": 49.09869976359338 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Quazim0t0/time-14b-stock (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.851146036356196 - } - }, - { - "id": "Qwen/QwQ-32B_bfloat16_7c0a8dc0ac2eef85a227942ad8daeabe9f3ad709_True", - "model": { - "name": "Qwen/QwQ-32B", - "sha": "7c0a8dc0ac2eef85a227942ad8daeabe9f3ad709", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.21487061660057, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39767372793077926, - "normalized_score": 39.767372793077925 - }, - "bbh": { - "name": "BBH", - "value": 0.29829653176003074, - "normalized_score": 2.868335388534824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1608761329305136, - "normalized_score": 16.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.42063541666666665, - "normalized_score": 11.046093749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11959773936170212, - "normalized_score": 2.177526595744679 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 2407, - "params_billions": 32.764, - "co2_cost": 48.30528943515114 - } - }, - { - "id": "Qwen/QwQ-32B-Preview_bfloat16_1032e81cb936c486aae1d33da75b2fbcd5deed4a_True", - "model": { - "name": "Qwen/QwQ-32B-Preview", - "sha": "1032e81cb936c486aae1d33da75b2fbcd5deed4a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.11985336826446, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4035437084713006, - "normalized_score": 40.354370847130056 - }, - "bbh": { - "name": "BBH", - "value": 0.6691381482252744, - "normalized_score": 53.387676351713196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44939577039274925, - "normalized_score": 44.93957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2818791946308725, - "normalized_score": 4.250559284116337 - }, - "musr": { - "name": "MUSR", - "value": 0.4109895833333333, - "normalized_score": 9.80703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5678191489361702, - "normalized_score": 51.979905437352244 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-29", - "generation": 2, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 1722, - "params_billions": 32.764, - "co2_cost": 20.42077955247921 - } - }, - { - "id": "Qwen/Qwen1.5-0.5B_bfloat16_8f445e3628f3500ee69f24e1303c9f10f5342a39_False", - "model": { - "name": "Qwen/Qwen1.5-0.5B", - "sha": "8f445e3628f3500ee69f24e1303c9f10f5342a39", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.3510150735736985, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17056077873375977, - "normalized_score": 17.056077873375976 - }, - "bbh": { - "name": "BBH", - "value": 0.3153538659142558, - "normalized_score": 5.035475836799366 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.36162500000000003, - "normalized_score": 4.303125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1307347074468085, - "normalized_score": 3.4149674940898342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-22", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-0.5B", - "hub_license": "other", - "hub_hearts": 154, - "params_billions": 0.62, - "co2_cost": 1.9574747471014837 - } - }, - { - "id": "Qwen/Qwen1.5-0.5B-Chat_bfloat16_4d14e384a4b037942bb3f3016665157c8bcb70ea_True", - "model": { - "name": "Qwen/Qwen1.5-0.5B-Chat", - "sha": "4d14e384a4b037942bb3f3016665157c8bcb70ea", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.67816209115329, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18072713732895385, - "normalized_score": 18.072713732895387 - }, - "bbh": { - "name": "BBH", - "value": 0.3166662152036714, - "normalized_score": 4.318032636938059 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3837083333333333, - "normalized_score": 6.063541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12125997340425532, - "normalized_score": 2.362219267139479 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-31", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-0.5B-Chat", - "hub_license": "other", - "hub_hearts": 79, - "params_billions": 0.62, - "co2_cost": 1.0994887806344997 - } - }, - { - "id": "Qwen/Qwen1.5-1.8B_bfloat16_7846de7ed421727b318d6605a0bfab659da2c067_False", - "model": { - "name": "Qwen/Qwen1.5-1.8B", - "sha": "7846de7ed421727b318d6605a0bfab659da2c067", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.269492522098927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2154239639711521, - "normalized_score": 21.542396397115212 - }, - "bbh": { - "name": "BBH", - "value": 0.3476121558366305, - "normalized_score": 9.759901587727937 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.36051041666666667, - "normalized_score": 3.963802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18816489361702127, - "normalized_score": 9.79609929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-22", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-1.8B", - "hub_license": "other", - "hub_hearts": 50, - "params_billions": 1.837, - "co2_cost": 1.897741573199241 - } - }, - { - "id": "Qwen/Qwen1.5-1.8B-Chat_bfloat16_e482ee3f73c375a627a16fdf66fd0c8279743ca6_True", - "model": { - "name": "Qwen/Qwen1.5-1.8B-Chat", - "sha": "e482ee3f73c375a627a16fdf66fd0c8279743ca6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.257783499275524, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20190982149585324, - "normalized_score": 20.190982149585324 - }, - "bbh": { - "name": "BBH", - "value": 0.3255912875735599, - "normalized_score": 5.908662877770453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.42596875, - "normalized_score": 12.179427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18035239361702127, - "normalized_score": 8.928043735224584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-1.8B-Chat", - "hub_license": "other", - "hub_hearts": 50, - "params_billions": 1.837, - "co2_cost": 1.1286577147946053 - } - }, - { - "id": "Qwen/Qwen1.5-110B_bfloat16_16659038ecdcc771c1293cf47020fa7cc2750ee8_False", - "model": { - "name": "Qwen/Qwen1.5-110B", - "sha": "16659038ecdcc771c1293cf47020fa7cc2750ee8", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.83367750486893, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3421942667677318, - "normalized_score": 34.21942667677318 - }, - "bbh": { - "name": "BBH", - "value": 0.6099964981780978, - "normalized_score": 44.28047655387545 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24697885196374622, - "normalized_score": 24.69788519637462 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.44084375, - "normalized_score": 13.705468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5360704787234043, - "normalized_score": 48.45227541371159 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-110B", - "hub_license": "other", - "hub_hearts": 97, - "params_billions": 111.21, - "co2_cost": 142.54177685205886 - } - }, - { - "id": "Qwen/Qwen1.5-110B-Chat_bfloat16_85f86cec25901f2dbd870a86e06756903c9a876a_True", - "model": { - "name": "Qwen/Qwen1.5-110B-Chat", - "sha": "85f86cec25901f2dbd870a86e06756903c9a876a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.12715289782008, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5938864435254014, - "normalized_score": 59.388644352540155 - }, - "bbh": { - "name": "BBH", - "value": 0.6183800385588633, - "normalized_score": 44.98454525616634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23413897280966767, - "normalized_score": 23.413897280966765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.45216666666666666, - "normalized_score": 16.287499999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48246343085106386, - "normalized_score": 42.495936761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-110B-Chat", - "hub_license": "other", - "hub_hearts": 125, - "params_billions": 111.21, - "co2_cost": 145.1305861122314 - } - }, - { - "id": "Qwen/Qwen1.5-14B_bfloat16_dce4b190d34470818e5bec2a92cb8233aaa02ca2_False", - "model": { - "name": "Qwen/Qwen1.5-14B", - "sha": "dce4b190d34470818e5bec2a92cb8233aaa02ca2", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.854080062460586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2905368865720732, - "normalized_score": 29.05368865720732 - }, - "bbh": { - "name": "BBH", - "value": 0.5080327493808331, - "normalized_score": 30.063103282917453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.41864583333333333, - "normalized_score": 10.464062500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36436170212765956, - "normalized_score": 29.373522458628837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-22", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-14B", - "hub_license": "other", - "hub_hearts": 39, - "params_billions": 14.167, - "co2_cost": 3.85098117295462 - } - }, - { - "id": "Qwen/Qwen1.5-14B-Chat_bfloat16_9492b22871f43e975435455f5c616c77fe7a50ec_True", - "model": { - "name": "Qwen/Qwen1.5-14B-Chat", - "sha": "9492b22871f43e975435455f5c616c77fe7a50ec", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.566106475051374, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47680820223673187, - "normalized_score": 47.68082022367319 - }, - "bbh": { - "name": "BBH", - "value": 0.5228587510703555, - "normalized_score": 32.75647930053065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.43997916666666664, - "normalized_score": 13.930729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36178523936170215, - "normalized_score": 29.08724881796691 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-14B-Chat", - "hub_license": "other", - "hub_hearts": 112, - "params_billions": 14.167, - "co2_cost": 2.6769320200145597 - } - }, - { - "id": "Qwen/Qwen1.5-32B_bfloat16_cefef80dc06a65f89d1d71d0adbc56d335ca2490_False", - "model": { - "name": "Qwen/Qwen1.5-32B", - "sha": "cefef80dc06a65f89d1d71d0adbc56d335ca2490", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.2987558571606, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.329729562006587, - "normalized_score": 32.97295620065869 - }, - "bbh": { - "name": "BBH", - "value": 0.5715390555959325, - "normalized_score": 38.980351633108974 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3028700906344411, - "normalized_score": 30.28700906344411 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.4277916666666666, - "normalized_score": 12.040625000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4499667553191489, - "normalized_score": 38.88519503546098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-01", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-32B", - "hub_license": "other", - "hub_hearts": 85, - "params_billions": 32.512, - "co2_cost": 119.93431883170673 - } - }, - { - "id": "Qwen/Qwen1.5-32B-Chat_bfloat16_0997b012af6ddd5465d40465a8415535b2f06cfc_True", - "model": { - "name": "Qwen/Qwen1.5-32B-Chat", - "sha": "0997b012af6ddd5465d40465a8415535b2f06cfc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.25746822860332, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5532199009738605, - "normalized_score": 55.32199009738605 - }, - "bbh": { - "name": "BBH", - "value": 0.6066899757930234, - "normalized_score": 44.55485402391639 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19561933534743203, - "normalized_score": 19.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4159791666666666, - "normalized_score": 10.197395833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4457280585106383, - "normalized_score": 38.41422872340425 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-32B-Chat", - "hub_license": "other", - "hub_hearts": 109, - "params_billions": 32.512, - "co2_cost": 92.1188994620152 - } - }, - { - "id": "Qwen/Qwen1.5-4B_bfloat16_a66363a0c24e2155c561e4b53c658b1d3965474e_False", - "model": { - "name": "Qwen/Qwen1.5-4B", - "sha": "a66363a0c24e2155c561e4b53c658b1d3965474e", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.76818275851784, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24447466056729478, - "normalized_score": 24.447466056729475 - }, - "bbh": { - "name": "BBH", - "value": 0.40538970296725463, - "normalized_score": 16.249142581095292 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3604479166666667, - "normalized_score": 4.8226562500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24601063829787234, - "normalized_score": 16.22340425531915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-22", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-4B", - "hub_license": "other", - "hub_hearts": 36, - "params_billions": 3.95, - "co2_cost": 3.277364333807762 - } - }, - { - "id": "Qwen/Qwen1.5-4B-Chat_bfloat16_a7a4d4945d28bac955554c9abd2f74a71ebbf22f_True", - "model": { - "name": "Qwen/Qwen1.5-4B-Chat", - "sha": "a7a4d4945d28bac955554c9abd2f74a71ebbf22f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.627280110791753, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31566576683200576, - "normalized_score": 31.566576683200577 - }, - "bbh": { - "name": "BBH", - "value": 0.40055485611486114, - "normalized_score": 16.29707852890831 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.39778125, - "normalized_score": 7.355989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23961103723404256, - "normalized_score": 15.512337470449172 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-4B-Chat", - "hub_license": "other", - "hub_hearts": 40, - "params_billions": 3.95, - "co2_cost": 1.7323012956775985 - } - }, - { - "id": "Qwen/Qwen1.5-7B_bfloat16_831096e3a59a0789a541415da25ef195ceb802fe_False", - "model": { - "name": "Qwen/Qwen1.5-7B", - "sha": "831096e3a59a0789a541415da25ef195ceb802fe", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.024674155407357, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2684299879874289, - "normalized_score": 26.842998798742894 - }, - "bbh": { - "name": "BBH", - "value": 0.4559896407693445, - "normalized_score": 23.075768754340448 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4103333333333334, - "normalized_score": 9.158333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29163896276595747, - "normalized_score": 21.293218085106382 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-22", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "Qwen/Qwen1.5-7B", - "hub_license": "other", - "hub_hearts": 52, - "params_billions": 7.721, - "co2_cost": 3.6547081081904804 - } - }, - { - "id": "Qwen/Qwen1.5-7B-Chat_bfloat16_5f4f5e69ac7f1d508f8369e977de208b4803444b_True", - "model": { - "name": "Qwen/Qwen1.5-7B-Chat", - "sha": "5f4f5e69ac7f1d508f8369e977de208b4803444b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.62098662745355, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43711574178734647, - "normalized_score": 43.711574178734644 - }, - "bbh": { - "name": "BBH", - "value": 0.4510053116521351, - "normalized_score": 22.379129599952787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.37790624999999994, - "normalized_score": 4.6382812499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2951296542553192, - "normalized_score": 21.681072695035464 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-7B-Chat", - "hub_license": "other", - "hub_hearts": 167, - "params_billions": 7.721, - "co2_cost": 2.15765319338272 - } - }, - { - "id": "Qwen/Qwen1.5-MoE-A2.7B_bfloat16_1a758c50ecb6350748b9ce0a99d2352fd9fc11c9_False", - "model": { - "name": "Qwen/Qwen1.5-MoE-A2.7B", - "sha": "1a758c50ecb6350748b9ce0a99d2352fd9fc11c9", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 13.945920112290063, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.265982038768246, - "normalized_score": 26.598203876824606 - }, - "bbh": { - "name": "BBH", - "value": 0.4113515433010766, - "normalized_score": 18.837858500547185 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.40134375000000005, - "normalized_score": 7.967968750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2777593085106383, - "normalized_score": 19.751034278959807 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-29", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen1.5-MoE-A2.7B", - "hub_license": "other", - "hub_hearts": 199, - "params_billions": 14.316, - "co2_cost": 19.09122579213944 - } - }, - { - "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat_bfloat16_ec052fda178e241c7c443468d2fa1db6618996be_True", - "model": { - "name": "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "sha": "ec052fda178e241c7c443468d2fa1db6618996be", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 15.880899856122355, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37953851336675576, - "normalized_score": 37.95385133667558 - }, - "bbh": { - "name": "BBH", - "value": 0.4272088620635824, - "normalized_score": 20.041818895540953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.38987499999999997, - "normalized_score": 6.334375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29230385638297873, - "normalized_score": 21.3670951536643 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-14", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "hub_license": "other", - "hub_hearts": 119, - "params_billions": 14.316, - "co2_cost": 17.803943075071622 - } - }, - { - "id": "Qwen/Qwen2-0.5B_bfloat16_ff3a49fac17555b8dfc4db6709f480cc8f16a9fe_False", - "model": { - "name": "Qwen/Qwen2-0.5B", - "sha": "ff3a49fac17555b8dfc4db6709f480cc8f16a9fe", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.224121473565234, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18732186154957736, - "normalized_score": 18.732186154957734 - }, - "bbh": { - "name": "BBH", - "value": 0.3239117424825444, - "normalized_score": 7.9185120409032566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.37520833333333337, - "normalized_score": 4.601041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17195811170212766, - "normalized_score": 7.99534574468085 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-11-30", - "generation": 0, - "base_model": "Qwen/Qwen2-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 137, - "params_billions": 0.494, - "co2_cost": 2.6323615007483467 - } - }, - { - "id": "Qwen/Qwen2-0.5B-Instruct_bfloat16_c291d6fce4804a1d39305f388dd32897d1f7acc4_True", - "model": { - "name": "Qwen/Qwen2-0.5B-Instruct", - "sha": "c291d6fce4804a1d39305f388dd32897d1f7acc4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.586780633287707, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22466610814860127, - "normalized_score": 22.466610814860125 - }, - "bbh": { - "name": "BBH", - "value": 0.31725179384863494, - "normalized_score": 5.876044259408482 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33527083333333335, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15309175531914893, - "normalized_score": 5.89908392434988 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-03", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "Qwen/Qwen2-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 179, - "params_billions": 0.494, - "co2_cost": 1.1156954462905264 - } - }, - { - "id": "Qwen/Qwen2-1.5B_bfloat16_8a16abf2848eda07cc5253dec660bf1ce007ad7a_False", - "model": { - "name": "Qwen/Qwen2-1.5B", - "sha": "8a16abf2848eda07cc5253dec660bf1ce007ad7a", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.445452935561454, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21132705665412216, - "normalized_score": 21.132705665412217 - }, - "bbh": { - "name": "BBH", - "value": 0.35747931720577464, - "normalized_score": 11.781833653483531 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.36581250000000004, - "normalized_score": 3.5932291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2551529255319149, - "normalized_score": 17.239213947990542 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "Qwen/Qwen2-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 90, - "params_billions": 1.544, - "co2_cost": 2.2163897840094315 - } - }, - { - "id": "Qwen/Qwen2-1.5B-Instruct_bfloat16_ba1cf1846d7df0a0591d6c00649f57e798519da8_True", - "model": { - "name": "Qwen/Qwen2-1.5B-Instruct", - "sha": "ba1cf1846d7df0a0591d6c00649f57e798519da8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.141936815181689, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3371232773485463, - "normalized_score": 33.712327734854625 - }, - "bbh": { - "name": "BBH", - "value": 0.3852232408376059, - "normalized_score": 13.695346827502663 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.42928125, - "normalized_score": 12.026822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25008311170212766, - "normalized_score": 16.675901300236408 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-03", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "Qwen/Qwen2-1.5B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 140, - "params_billions": 1.544, - "co2_cost": 1.3176478771125482 - } - }, - { - "id": "Qwen/Qwen2-57B-A14B_bfloat16_973e466c39ba76372a2ae464dbca0af3f5a5a2a9_False", - "model": { - "name": "Qwen/Qwen2-57B-A14B", - "sha": "973e466c39ba76372a2ae464dbca0af3f5a5a2a9", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 25.0338731324107, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31126965340851165, - "normalized_score": 31.126965340851164 - }, - "bbh": { - "name": "BBH", - "value": 0.5618204938684165, - "normalized_score": 38.87598905034189 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1865558912386707, - "normalized_score": 18.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.417375, - "normalized_score": 10.538541666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4916057180851064, - "normalized_score": 43.511746453900706 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "Qwen/Qwen2-57B-A14B", - "hub_license": "apache-2.0", - "hub_hearts": 50, - "params_billions": 57.409, - "co2_cost": 107.03147746473374 - } - }, - { - "id": "Qwen/Qwen2-57B-A14B-Instruct_bfloat16_5ea455a449e61a92a5b194ee06be807647d3e8b5_True", - "model": { - "name": "Qwen/Qwen2-57B-A14B-Instruct", - "sha": "5ea455a449e61a92a5b194ee06be807647d3e8b5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 33.015868823547095, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6337783747124297, - "normalized_score": 63.37783747124297 - }, - "bbh": { - "name": "BBH", - "value": 0.5887606963532052, - "normalized_score": 41.785917734842535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28172205438066467, - "normalized_score": 28.172205438066467 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.43613541666666666, - "normalized_score": 14.183593749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45752992021276595, - "normalized_score": 39.725546690307326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-08-14", - "generation": 1, - "base_model": "Qwen/Qwen2-57B-A14B", - "hub_license": "apache-2.0", - "hub_hearts": 80, - "params_billions": 57.409, - "co2_cost": 85.0124950064074 - } - }, - { - "id": "Qwen/Qwen2-72B_bfloat16_87993795c78576318087f70b43fbf530eb7789e7_False", - "model": { - "name": "Qwen/Qwen2-72B", - "sha": "87993795c78576318087f70b43fbf530eb7789e7", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.45667093247413, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3823610243044012, - "normalized_score": 38.23610243044012 - }, - "bbh": { - "name": "BBH", - "value": 0.661734029856643, - "normalized_score": 51.85613118695519 - }, - "math": { - "name": "MATH Level 5", - "value": 0.311178247734139, - "normalized_score": 31.1178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.47036458333333336, - "normalized_score": 19.728906250000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5730551861702128, - "normalized_score": 52.56168735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 200, - "params_billions": 72.706, - "co2_cost": 128.12455847973288 - } - }, - { - "id": "Qwen/Qwen2-72B-Instruct_bfloat16_1af63c698f59c4235668ec9c1395468cb7cd7e79_False", - "model": { - "name": "Qwen/Qwen2-72B-Instruct", - "sha": "1af63c698f59c4235668ec9c1395468cb7cd7e79", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.59406246367795, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7989168738945996, - "normalized_score": 79.89168738945996 - }, - "bbh": { - "name": "BBH", - "value": 0.697730968386067, - "normalized_score": 57.48300911876294 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4176737160120846, - "normalized_score": 41.76737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3724832214765101, - "normalized_score": 16.33109619686801 - }, - "musr": { - "name": "MUSR", - "value": 0.4560104166666667, - "normalized_score": 17.167968749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5403091755319149, - "normalized_score": 48.92324172576833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 713, - "params_billions": 72.706, - "co2_cost": 75.10794884539997 - } - }, - { - "id": "Qwen/Qwen2-7B_bfloat16_453ed1575b739b5b03ce3758b23befdb0967f40e_False", - "model": { - "name": "Qwen/Qwen2-7B", - "sha": "453ed1575b739b5b03ce3758b23befdb0967f40e", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.9251621404824, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3148667757106699, - "normalized_score": 31.48667757106699 - }, - "bbh": { - "name": "BBH", - "value": 0.531531595001889, - "normalized_score": 34.711136202753416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2039274924471299, - "normalized_score": 20.39274924471299 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4439166666666667, - "normalized_score": 14.322916666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41830119680851063, - "normalized_score": 35.366799645390074 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 154, - "params_billions": 7.616, - "co2_cost": 2.561164511151682 - } - }, - { - "id": "Qwen/Qwen2-7B-Instruct_bfloat16_41c66b0be1c3081f13defc6bdf946c2ef240d6a6_True", - "model": { - "name": "Qwen/Qwen2-7B-Instruct", - "sha": "41c66b0be1c3081f13defc6bdf946c2ef240d6a6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.93668778218485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5679075962889577, - "normalized_score": 56.79075962889577 - }, - "bbh": { - "name": "BBH", - "value": 0.5544781563793189, - "normalized_score": 37.80839092310167 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2764350453172205, - "normalized_score": 27.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.39279166666666665, - "normalized_score": 7.3656250000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38472406914893614, - "normalized_score": 31.636007683215123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 622, - "params_billions": 7.616, - "co2_cost": 2.0840779717149838 - } - }, - { - "id": "Qwen/Qwen2-Math-72B-Instruct_bfloat16_5c267882f3377bcfc35882f8609098a894eeeaa8_True", - "model": { - "name": "Qwen/Qwen2-Math-72B-Instruct", - "sha": "5c267882f3377bcfc35882f8609098a894eeeaa8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.020957002292825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.569381463405985, - "normalized_score": 56.93814634059851 - }, - "bbh": { - "name": "BBH", - "value": 0.634337660025181, - "normalized_score": 47.96019950734914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5536253776435045, - "normalized_score": 55.36253776435045 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.45169791666666664, - "normalized_score": 15.728906249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42727726063829785, - "normalized_score": 36.364140070921984 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-08", - "submission_date": "2024-08-19", - "generation": 0, - "base_model": "Qwen/Qwen2-Math-72B-Instruct", - "hub_license": "other", - "hub_hearts": 88, - "params_billions": 72.706, - "co2_cost": 24.33649538965421 - } - }, - { - "id": "Qwen/Qwen2-Math-7B_bfloat16_47a44ff4136da8960adbab02b2326787086bcf6c_True", - "model": { - "name": "Qwen/Qwen2-Math-7B", - "sha": "47a44ff4136da8960adbab02b2326787086bcf6c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.016921148016648, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2687048143370701, - "normalized_score": 26.870481433707006 - }, - "bbh": { - "name": "BBH", - "value": 0.386954741074792, - "normalized_score": 14.064494488871304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24773413897280966, - "normalized_score": 24.773413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.35933333333333334, - "normalized_score": 2.4166666666666683 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1196808510638298, - "normalized_score": 2.186761229314421 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-08", - "submission_date": "2024-08-19", - "generation": 0, - "base_model": "Qwen/Qwen2-Math-7B", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 7.616, - "co2_cost": 3.126072125810204 - } - }, - { - "id": "Qwen/Qwen2-VL-72B-Instruct_bfloat16_f400120e59a6196b024298b7d09fb517f742db7d_True", - "model": { - "name": "Qwen/Qwen2-VL-72B-Instruct", - "sha": "f400120e59a6196b024298b7d09fb517f742db7d", - "precision": "bfloat16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "Qwen2VLForConditionalGeneration", - "average_score": 39.53661995549832, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5982326892644849, - "normalized_score": 59.82326892644849 - }, - "bbh": { - "name": "BBH", - "value": 0.6946287292338682, - "normalized_score": 56.3112338791251 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34441087613293053, - "normalized_score": 34.44108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.44921875, - "normalized_score": 15.885677083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5717253989361702, - "normalized_score": 52.41393321513003 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "Qwen/Qwen2-VL-72B-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 281, - "params_billions": 73.406, - "co2_cost": 54.499433031200404 - } - }, - { - "id": "Qwen/Qwen2-VL-7B-Instruct_bfloat16_51c47430f97dd7c74aa1fa6825e68a813478097f_True", - "model": { - "name": "Qwen/Qwen2-VL-7B-Instruct", - "sha": "51c47430f97dd7c74aa1fa6825e68a813478097f", - "precision": "bfloat16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "Qwen2VLForConditionalGeneration", - "average_score": 26.493258763428177, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4599218961245052, - "normalized_score": 45.99218961245052 - }, - "bbh": { - "name": "BBH", - "value": 0.5464507159069989, - "normalized_score": 35.87710314498947 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1986404833836858, - "normalized_score": 19.86404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.4375, - "normalized_score": 13.554166666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40949135638297873, - "normalized_score": 34.38792848699764 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-28", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "Qwen/Qwen2-VL-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1160, - "params_billions": 8.291, - "co2_cost": 2.1087645694479167 - } - }, - { - "id": "Qwen/Qwen2.5-0.5B_bfloat16_2630d3d2321bc1f1878f702166d1b2af019a7310_False", - "model": { - "name": "Qwen/Qwen2.5-0.5B", - "sha": "2630d3d2321bc1f1878f702166d1b2af019a7310", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.550067614297009, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16271714606133947, - "normalized_score": 16.271714606133948 - }, - "bbh": { - "name": "BBH", - "value": 0.32748148151196615, - "normalized_score": 6.953961634882263 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3433333333333333, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19057513297872342, - "normalized_score": 10.0639036643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 232, - "params_billions": 0.5, - "co2_cost": 2.330685257636701 - } - }, - { - "id": "Qwen/Qwen2.5-0.5B-Instruct_bfloat16_a8b602d9dafd3a75d382e62757d83d89fca3be54_True", - "model": { - "name": "Qwen/Qwen2.5-0.5B-Instruct", - "sha": "a8b602d9dafd3a75d382e62757d83d89fca3be54", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.140647319276075, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.307122878407071, - "normalized_score": 30.712287840707102 - }, - "bbh": { - "name": "BBH", - "value": 0.3340729214937266, - "normalized_score": 8.434863610588833 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.33288541666666666, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16971409574468085, - "normalized_score": 7.7460106382978715 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 270, - "params_billions": 0.5, - "co2_cost": 0.6308244528197698 - } - }, - { - "id": "Qwen/Qwen2.5-0.5B-Instruct_float16_7ae557604adf67be50417f59c2c2f167def9a775_True", - "model": { - "name": "Qwen/Qwen2.5-0.5B-Instruct", - "sha": "7ae557604adf67be50417f59c2c2f167def9a775", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.107543850719255, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31529120511354314, - "normalized_score": 31.529120511354318 - }, - "bbh": { - "name": "BBH", - "value": 0.3321916429549138, - "normalized_score": 8.169502268182768 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3341875, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17195811170212766, - "normalized_score": 7.99534574468085 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 270, - "params_billions": 0.494, - "co2_cost": 1.2371519491666767 - } - }, - { - "id": "Qwen/Qwen2.5-1.5B_bfloat16_e5dfabbcffd9b0c7b31d89b82c5a6b72e663f32c_False", - "model": { - "name": "Qwen/Qwen2.5-1.5B", - "sha": "e5dfabbcffd9b0c7b31d89b82c5a6b72e663f32c", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.852701161320264, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26743041795768563, - "normalized_score": 26.743041795768562 - }, - "bbh": { - "name": "BBH", - "value": 0.40779509451366147, - "normalized_score": 16.660465167691854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.35759375, - "normalized_score": 5.265885416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28548869680851063, - "normalized_score": 20.609855200945624 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 89, - "params_billions": 1.5, - "co2_cost": 2.497002020007018 - } - }, - { - "id": "Qwen/Qwen2.5-1.5B-Instruct_bfloat16_5fee7c4ed634dc66c6e318c8ac2897b8b9154536_True", - "model": { - "name": "Qwen/Qwen2.5-1.5B-Instruct", - "sha": "5fee7c4ed634dc66c6e318c8ac2897b8b9154536", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.430509141644382, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4475569267321817, - "normalized_score": 44.75569267321818 - }, - "bbh": { - "name": "BBH", - "value": 0.4288982740422907, - "normalized_score": 19.809786497358974 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3663125, - "normalized_score": 3.1890625000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27992021276595747, - "normalized_score": 19.99113475177305 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 373, - "params_billions": 1.5, - "co2_cost": 1.3743786623908494 - } - }, - { - "id": "Qwen/Qwen2.5-14B_bfloat16_83a1904df002b00bc8db6f877821cb77dbb363b0_False", - "model": { - "name": "Qwen/Qwen2.5-14B", - "sha": "83a1904df002b00bc8db6f877821cb77dbb363b0", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.951062693148973, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3694464022127954, - "normalized_score": 36.94464022127954 - }, - "bbh": { - "name": "BBH", - "value": 0.616051493531774, - "normalized_score": 45.078312404984935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29003021148036257, - "normalized_score": 29.003021148036257 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.4502395833333333, - "normalized_score": 15.913281249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5248503989361702, - "normalized_score": 47.205599881796694 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 102, - "params_billions": 14.77, - "co2_cost": 8.733912171643484 - } - }, - { - "id": "Qwen/Qwen2.5-14B-Instruct_bfloat16_f55224c616ca27d4bcf28969a156de12c98981cf_True", - "model": { - "name": "Qwen/Qwen2.5-14B-Instruct", - "sha": "f55224c616ca27d4bcf28969a156de12c98981cf", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.30945747711163, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8157776920792386, - "normalized_score": 81.57776920792386 - }, - "bbh": { - "name": "BBH", - "value": 0.6390453705906222, - "normalized_score": 48.36070661282705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.547583081570997, - "normalized_score": 54.7583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4100625, - "normalized_score": 10.157812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4904421542553192, - "normalized_score": 43.382461583924346 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 209, - "params_billions": 14.77, - "co2_cost": 3.547300640675203 - } - }, - { - "id": "Qwen/Qwen2.5-14B-Instruct-1M_bfloat16_b0c9f6e6f0123e755d47922c24818e488e21b93f_True", - "model": { - "name": "Qwen/Qwen2.5-14B-Instruct-1M", - "sha": "b0c9f6e6f0123e755d47922c24818e488e21b93f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.559026792386994, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8413564896696322, - "normalized_score": 84.1356489669632 - }, - "bbh": { - "name": "BBH", - "value": 0.6198222551365405, - "normalized_score": 45.658280600233155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5302114803625377, - "normalized_score": 53.02114803625378 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.418, - "normalized_score": 11.350000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4849567819148936, - "normalized_score": 42.77297576832151 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 286, - "params_billions": 14.77, - "co2_cost": 3.2822772236538045 - } - }, - { - "id": "Qwen/Qwen2.5-32B_bfloat16_ff23665d01c3665be5fdb271d18a62090b65c06d_False", - "model": { - "name": "Qwen/Qwen2.5-32B", - "sha": "ff23665d01c3665be5fdb271d18a62090b65c06d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.00796730514634, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40766499554515356, - "normalized_score": 40.766499554515356 - }, - "bbh": { - "name": "BBH", - "value": 0.6770522448726507, - "normalized_score": 53.954752851331996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41191275167785235, - "normalized_score": 21.588366890380314 - }, - "musr": { - "name": "MUSR", - "value": 0.49783333333333335, - "normalized_score": 22.69583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5805352393617021, - "normalized_score": 53.39280437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 129, - "params_billions": 32.764, - "co2_cost": 11.74977108549449 - } - }, - { - "id": "Qwen/Qwen2.5-32B-Instruct_bfloat16_70e8dfb9ad18a7d499f765fe206ff065ed8ca197_True", - "model": { - "name": "Qwen/Qwen2.5-32B-Instruct", - "sha": "70e8dfb9ad18a7d499f765fe206ff065ed8ca197", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.59714569921449, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8346121623957765, - "normalized_score": 83.46121623957765 - }, - "bbh": { - "name": "BBH", - "value": 0.6912525080134339, - "normalized_score": 56.48934826159387 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6253776435045317, - "normalized_score": 62.53776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.42612500000000003, - "normalized_score": 13.498958333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.566655585106383, - "normalized_score": 51.85062056737589 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 239, - "params_billions": 32.764, - "co2_cost": 11.504966340685739 - } - }, - { - "id": "Qwen/Qwen2.5-3B_bfloat16_e4aa5ac50aa507415cda96cc99eb77ad0a3d2d34_False", - "model": { - "name": "Qwen/Qwen2.5-3B", - "sha": "e4aa5ac50aa507415cda96cc99eb77ad0a3d2d34", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.102770217683673, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2689541527591236, - "normalized_score": 26.895415275912356 - }, - "bbh": { - "name": "BBH", - "value": 0.4612475341011634, - "normalized_score": 24.304241726371686 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4303333333333333, - "normalized_score": 11.758333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3203125, - "normalized_score": 24.479166666666664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 83, - "params_billions": 3.086, - "co2_cost": 5.386605082306483 - } - }, - { - "id": "Qwen/Qwen2.5-3B-Instruct_bfloat16_82f42baa094a9600e39ccd80d34058aeeb3abbc1_True", - "model": { - "name": "Qwen/Qwen2.5-3B-Instruct", - "sha": "82f42baa094a9600e39ccd80d34058aeeb3abbc1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.16175720903232, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6474919879253713, - "normalized_score": 64.74919879253714 - }, - "bbh": { - "name": "BBH", - "value": 0.469276665604885, - "normalized_score": 25.801393944088584 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3678247734138973, - "normalized_score": 36.78247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.39679166666666665, - "normalized_score": 7.565625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3254654255319149, - "normalized_score": 25.05171394799054 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 220, - "params_billions": 3.0, - "co2_cost": 2.7769486111619237 - } - }, - { - "id": "Qwen/Qwen2.5-72B_bfloat16_587cc4061cf6a7cc0d429d05c109447e5cf063af_False", - "model": { - "name": "Qwen/Qwen2.5-72B", - "sha": "587cc4061cf6a7cc0d429d05c109447e5cf063af", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.441143572535815, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4137100670664947, - "normalized_score": 41.37100670664947 - }, - "bbh": { - "name": "BBH", - "value": 0.6797320670694852, - "normalized_score": 54.61505780163693 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39123867069486407, - "normalized_score": 39.12386706948641 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4052013422818792, - "normalized_score": 20.69351230425056 - }, - "musr": { - "name": "MUSR", - "value": 0.477125, - "normalized_score": 19.640624999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5968251329787234, - "normalized_score": 55.20279255319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 62, - "params_billions": 72.706, - "co2_cost": 36.18385317998766 - } - }, - { - "id": "Qwen/Qwen2.5-72B-Instruct_bfloat16_a13fff9ad76700c7ecff2769f75943ba8395b4a7_True", - "model": { - "name": "Qwen/Qwen2.5-72B-Instruct", - "sha": "a13fff9ad76700c7ecff2769f75943ba8395b4a7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.98045991216864, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.863837949972739, - "normalized_score": 86.3837949972739 - }, - "bbh": { - "name": "BBH", - "value": 0.7272747321744824, - "normalized_score": 61.873255668787884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5981873111782477, - "normalized_score": 59.818731117824775 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.42060416666666667, - "normalized_score": 11.742187500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5625831117021277, - "normalized_score": 51.39812352245864 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 777, - "params_billions": 72.706, - "co2_cost": 47.645491341309835 - } - }, - { - "id": "Qwen/Qwen2.5-7B_bfloat16_57597c00770845ceba45271ba1b24c94bbcc7baf_False", - "model": { - "name": "Qwen/Qwen2.5-7B", - "sha": "57597c00770845ceba45271ba1b24c94bbcc7baf", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.019159924095096, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3374479713825982, - "normalized_score": 33.74479713825982 - }, - "bbh": { - "name": "BBH", - "value": 0.5416303767788616, - "normalized_score": 35.81347328754777 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25075528700906347, - "normalized_score": 25.075528700906347 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.4424270833333333, - "normalized_score": 14.13671875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4365026595744681, - "normalized_score": 37.38918439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 154, - "params_billions": 7.616, - "co2_cost": 4.686730426753914 - } - }, - { - "id": "Qwen/Qwen2.5-7B-Instruct_bfloat16_52e20a6f5f475e5c8f6a8ebda4ae5fa6b1ea22ac_True", - "model": { - "name": "Qwen/Qwen2.5-7B-Instruct", - "sha": "52e20a6f5f475e5c8f6a8ebda4ae5fa6b1ea22ac", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.200108659947965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7585251576926999, - "normalized_score": 75.85251576926998 - }, - "bbh": { - "name": "BBH", - "value": 0.5394231968299095, - "normalized_score": 34.89211675876548 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5, - "normalized_score": 50.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.40203125, - "normalized_score": 8.453906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4286901595744681, - "normalized_score": 36.52112884160757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 577, - "params_billions": 7.616, - "co2_cost": 3.2405911036770783 - } - }, - { - "id": "Qwen/Qwen2.5-7B-Instruct-1M_bfloat16_49d7103d61e30d68e8cfdea8c419f3f39e5b9c15_True", - "model": { - "name": "Qwen/Qwen2.5-7B-Instruct-1M", - "sha": "49d7103d61e30d68e8cfdea8c419f3f39e5b9c15", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.76394723937119, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7447616767953474, - "normalized_score": 74.47616767953474 - }, - "bbh": { - "name": "BBH", - "value": 0.5403941270576822, - "normalized_score": 35.02629094112727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4335347432024169, - "normalized_score": 43.353474320241695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.40869791666666666, - "normalized_score": 9.520572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35048204787234044, - "normalized_score": 27.83133865248227 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 274, - "params_billions": 7.616, - "co2_cost": 1.2526745301638325 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-14B_bfloat16_1db30eb5ec86a6e51d8981818ee2910370b3010d_True", - "model": { - "name": "Qwen/Qwen2.5-Coder-14B", - "sha": "1db30eb5ec86a6e51d8981818ee2910370b3010d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.829052280388357, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3472652561869174, - "normalized_score": 34.72652561869174 - }, - "bbh": { - "name": "BBH", - "value": 0.5864860091741232, - "normalized_score": 40.52300211536303 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22507552870090636, - "normalized_score": 22.507552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3873645833333333, - "normalized_score": 6.3872395833333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4521276595744681, - "normalized_score": 39.125295508274235 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 34, - "params_billions": 14.77, - "co2_cost": 7.265250401506517 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-14B-Instruct_bfloat16_1a62978099f9b19f72fdd191988ff958abb18561_True", - "model": { - "name": "Qwen/Qwen2.5-Coder-14B-Instruct", - "sha": "1a62978099f9b19f72fdd191988ff958abb18561", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.12283417812606, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6907560827493273, - "normalized_score": 69.07560827493273 - }, - "bbh": { - "name": "BBH", - "value": 0.6140296423661326, - "normalized_score": 44.220018215668375 - }, - "math": { - "name": "MATH Level 5", - "value": 0.324773413897281, - "normalized_score": 32.477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.3914583333333333, - "normalized_score": 7.032291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3939494680851064, - "normalized_score": 32.661052009456256 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-14B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 95, - "params_billions": 14.77, - "co2_cost": 2.7664281305179466 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-32B_float16_2e12b5f7bc878d424d222e224ed40aee564ec45f_False", - "model": { - "name": "Qwen/Qwen2.5-Coder-32B", - "sha": "2e12b5f7bc878d424d222e224ed40aee564ec45f", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.2623633375386, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4363411304228336, - "normalized_score": 43.63411304228336 - }, - "bbh": { - "name": "BBH", - "value": 0.640395506550809, - "normalized_score": 48.51121340614174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30891238670694865, - "normalized_score": 30.891238670694865 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.4528125, - "normalized_score": 15.868229166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5302526595744681, - "normalized_score": 47.80585106382979 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-12-10", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-32B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 109, - "params_billions": 32.764, - "co2_cost": 9.380500534599602 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-32B-Instruct_float16_b47205940b83b5b484577359f71ee7b88472df67_False", - "model": { - "name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "sha": "b47205940b83b5b484577359f71ee7b88472df67", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.885471774422804, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7265267268625026, - "normalized_score": 72.65267268625026 - }, - "bbh": { - "name": "BBH", - "value": 0.6625222222405129, - "normalized_score": 52.26651520943606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4954682779456193, - "normalized_score": 49.546827794561935 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.4385833333333333, - "normalized_score": 13.722916666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44132313829787234, - "normalized_score": 37.92479314420804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-12-10", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-32B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1724, - "params_billions": 32.764, - "co2_cost": 9.388779325959574 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-7B_bfloat16_097b213c52760d22753af1aa5cbdba94b5c99506_True", - "model": { - "name": "Qwen/Qwen2.5-Coder-7B", - "sha": "097b213c52760d22753af1aa5cbdba94b5c99506", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.209490538962246, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.344592348302504, - "normalized_score": 34.4592348302504 - }, - "bbh": { - "name": "BBH", - "value": 0.48556405534214747, - "normalized_score": 28.438944115255534 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3448541666666667, - "normalized_score": 2.1734375000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3679355053191489, - "normalized_score": 29.770611702127653 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 97, - "params_billions": 7.616, - "co2_cost": 4.60353433601591 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-7B-Instruct_bfloat16_3030861ab8e72c6155e1821631bf977ef40d3e5b_True", - "model": { - "name": "Qwen/Qwen2.5-Coder-7B-Instruct", - "sha": "3030861ab8e72c6155e1821631bf977ef40d3e5b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.052321182704958, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6101477413263474, - "normalized_score": 61.01477413263474 - }, - "bbh": { - "name": "BBH", - "value": 0.5007976986224548, - "normalized_score": 28.938504045379137 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3716012084592145, - "normalized_score": 37.160120845921455 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4072708333333333, - "normalized_score": 9.475520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3351894946808511, - "normalized_score": 26.13216607565012 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-11-07", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 444, - "params_billions": 7.616, - "co2_cost": 2.466291949742477 - } - }, - { - "id": "Qwen/Qwen2.5-Coder-7B-Instruct_float16_f784f10a7b2aac91bd26e6dbe7dccce691cd4ac5_True", - "model": { - "name": "Qwen/Qwen2.5-Coder-7B-Instruct", - "sha": "f784f10a7b2aac91bd26e6dbe7dccce691cd4ac5", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.52451581645211, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6147189457306613, - "normalized_score": 61.47189457306614 - }, - "bbh": { - "name": "BBH", - "value": 0.4999048550311305, - "normalized_score": 28.72657796895031 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4099375, - "normalized_score": 9.875520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33543882978723405, - "normalized_score": 26.15986997635934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-11-07", - "generation": 1, - "base_model": "Qwen/Qwen2.5-Coder-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 444, - "params_billions": 7.616, - "co2_cost": 0.6976473478971377 - } - }, - { - "id": "Qwen/Qwen2.5-Math-1.5B-Instruct_bfloat16_aafeb0fc6f22cbf0eaeed126eff8be45b0360a35_True", - "model": { - "name": "Qwen/Qwen2.5-Math-1.5B-Instruct", - "sha": "aafeb0fc6f22cbf0eaeed126eff8be45b0360a35", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.024869614820517, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1855731680829089, - "normalized_score": 18.557316808290892 - }, - "bbh": { - "name": "BBH", - "value": 0.37515353898426174, - "normalized_score": 12.859775311917048 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2628398791540785, - "normalized_score": 26.283987915407852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3685416666666667, - "normalized_score": 3.5343750000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1801030585106383, - "normalized_score": 8.900339834515366 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2025-03-06", - "generation": 2, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 35, - "params_billions": 1.544, - "co2_cost": 0.5143773674194003 - } - }, - { - "id": "Qwen/Qwen2.5-Math-72B-Instruct_bfloat16_3743c8fd46b002d105c1d28d180f1e531df1d40f_True", - "model": { - "name": "Qwen/Qwen2.5-Math-72B-Instruct", - "sha": "3743c8fd46b002d105c1d28d180f1e531df1d40f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.822863862347894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4003466358151926, - "normalized_score": 40.034663581519254 - }, - "bbh": { - "name": "BBH", - "value": 0.6452266637803764, - "normalized_score": 48.966096029421145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6238670694864048, - "normalized_score": 62.38670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.44727083333333334, - "normalized_score": 16.342187499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4812167553191489, - "normalized_score": 42.35741725768321 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-29", - "generation": 2, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 27, - "params_billions": 72.706, - "co2_cost": 43.05624224831484 - } - }, - { - "id": "Qwen/Qwen2.5-Math-7B_bfloat16_8daf1d676c3f24ddec5a99c5cff00a5c0e1c441c_True", - "model": { - "name": "Qwen/Qwen2.5-Math-7B", - "sha": "8daf1d676c3f24ddec5a99c5cff00a5c0e1c441c", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.836657156289718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24599839536873275, - "normalized_score": 24.599839536873276 - }, - "bbh": { - "name": "BBH", - "value": 0.4454639372840941, - "normalized_score": 22.00876067958663 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.37809374999999995, - "normalized_score": 4.995052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27177526595744683, - "normalized_score": 19.086140661938536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 76, - "params_billions": 7.616, - "co2_cost": 2.6794594912552814 - } - }, - { - "id": "Qwen/Qwen2.5-Math-7B-Instruct_bfloat16_b3b4c5794bf4b68c1978bb3525afc5e0d0d6fcc4_True", - "model": { - "name": "Qwen/Qwen2.5-Math-7B-Instruct", - "sha": "b3b4c5794bf4b68c1978bb3525afc5e0d0d6fcc4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.76814573761254, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26358395723347383, - "normalized_score": 26.358395723347385 - }, - "bbh": { - "name": "BBH", - "value": 0.438762734452786, - "normalized_score": 21.489765755272032 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5808157099697885, - "normalized_score": 58.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3647291666666666, - "normalized_score": 2.891145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2819980053191489, - "normalized_score": 20.222000591016545 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-19", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 65, - "params_billions": 7.0, - "co2_cost": 2.2785331217726195 - } - }, - { - "id": "RDson/WomboCombo-R1-Coder-14B-Preview_bfloat16_199b45ea53952f969d8ff5517fb06c561cab39ee_True", - "model": { - "name": "RDson/WomboCombo-R1-Coder-14B-Preview", - "sha": "199b45ea53952f969d8ff5517fb06c561cab39ee", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.45611578076993, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.628557782240012, - "normalized_score": 62.8557782240012 - }, - "bbh": { - "name": "BBH", - "value": 0.6392098699331132, - "normalized_score": 48.154142488726684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5989425981873112, - "normalized_score": 59.89425981873112 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4843854166666666, - "normalized_score": 22.014843749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5167885638297872, - "normalized_score": 46.30984042553191 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "RDson/WomboCombo-R1-Coder-14B-Preview (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.77, - "co2_cost": 3.9961055061782886 - } - }, - { - "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR_bfloat16_0e8da55d1655e132ee5fa341239cff31bd4e695f_True", - "model": { - "name": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", - "sha": "0e8da55d1655e132ee5fa341239cff31bd4e695f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.185076993762264, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.308172316121225, - "normalized_score": 30.817231612122498 - }, - "bbh": { - "name": "BBH", - "value": 0.3932411333682871, - "normalized_score": 15.629561469371746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3539375, - "normalized_score": 4.808854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27701130319148937, - "normalized_score": 19.66792257683215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.8222638540065557 - } - }, - { - "id": "RESMPDEV/Qwen2-Wukong-0.5B_bfloat16_52c58a4aa3d0b44c363c5761fa658243f5c53943_True", - "model": { - "name": "RESMPDEV/Qwen2-Wukong-0.5B", - "sha": "52c58a4aa3d0b44c363c5761fa658243f5c53943", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.975539710746782, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1854235650296768, - "normalized_score": 18.54235650296768 - }, - "bbh": { - "name": "BBH", - "value": 0.308451428837168, - "normalized_score": 4.19666315993673 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3524791666666667, - "normalized_score": 3.3265624999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13272938829787234, - "normalized_score": 3.6365986997635926 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "RESMPDEV/Qwen2-Wukong-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 0.63, - "co2_cost": 1.9745381862502531 - } - }, - { - "id": "RLHFlow/ArmoRM-Llama3-8B-v0.1_bfloat16_eb2676d20da2f2d41082289d23c59b9f7427f955_True", - "model": { - "name": "RLHFlow/ArmoRM-Llama3-8B-v0.1", - "sha": "eb2676d20da2f2d41082289d23c59b9f7427f955", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForRewardModelWithGating", - "average_score": 4.705487409302649, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18967007539993883, - "normalized_score": 18.967007539993883 - }, - "bbh": { - "name": "BBH", - "value": 0.2876467446788138, - "normalized_score": 1.7494478703137453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3948020833333333, - "normalized_score": 6.650260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10779587765957446, - "normalized_score": 0.8662086288416063 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-23", - "submission_date": "2024-10-08", - "generation": 0, - "base_model": "RLHFlow/ArmoRM-Llama3-8B-v0.1", - "hub_license": "llama3", - "hub_hearts": 169, - "params_billions": 7.511, - "co2_cost": 1.8470458245639196 - } - }, - { - "id": "RLHFlow/LLaMA3-iterative-DPO-final_bfloat16_40b73bd07a019795837f80579fe95470484ca82b_True", - "model": { - "name": "RLHFlow/LLaMA3-iterative-DPO-final", - "sha": "40b73bd07a019795837f80579fe95470484ca82b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.109152797451326, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.53401086893886, - "normalized_score": 53.401086893886 - }, - "bbh": { - "name": "BBH", - "value": 0.5058257182733729, - "normalized_score": 29.787760272097795 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3672708333333334, - "normalized_score": 5.075520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32571476063829785, - "normalized_score": 25.07941784869976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "RLHFlow/LLaMA3-iterative-DPO-final", - "hub_license": "llama3", - "hub_hearts": 40, - "params_billions": 8.03, - "co2_cost": 2.9858864014385222 - } - }, - { - "id": "RWKV/rwkv-raven-14b_float16_359c0649b4f1d10a26ebea32908035bc00d152ee_False", - "model": { - "name": "RWKV/rwkv-raven-14b", - "sha": "359c0649b4f1d10a26ebea32908035bc00d152ee", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "RwkvForCausalLM", - "average_score": 3.960585302315618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.07683723631076655, - "normalized_score": 7.683723631076655 - }, - "bbh": { - "name": "BBH", - "value": 0.3307041176552897, - "normalized_score": 6.763765061303336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22902684563758388, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3951458333333333, - "normalized_score": 7.193229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11502659574468085, - "normalized_score": 1.6696217494089831 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-05-05", - "submission_date": "2024-07-08", - "generation": 0, - "base_model": "RWKV/rwkv-raven-14b", - "hub_license": "", - "hub_hearts": 57, - "params_billions": 14.0, - "co2_cost": 3.1812581398621194 - } - }, - { - "id": "Rakuten/RakutenAI-2.0-mini-instruct_bfloat16_6d902489587d324b7d5e201299e4e1a169f3a40b_True", - "model": { - "name": "Rakuten/RakutenAI-2.0-mini-instruct", - "sha": "6d902489587d324b7d5e201299e4e1a169f3a40b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.318016561874655, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6793906833867471, - "normalized_score": 67.93906833867472 - }, - "bbh": { - "name": "BBH", - "value": 0.2867197270809481, - "normalized_score": 2.429693164521931 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3249166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11178523936170212, - "normalized_score": 1.309471040189124 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Rakuten/RakutenAI-2.0-mini-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 16, - "params_billions": 1.535, - "co2_cost": 0.29941627039325047 - } - }, - { - "id": "Rakuten/RakutenAI-7B_float16_c687b10cbf1aa6c34868904b62ecfcef2e0946bf_False", - "model": { - "name": "Rakuten/RakutenAI-7B", - "sha": "c687b10cbf1aa6c34868904b62ecfcef2e0946bf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.546978376746557, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1555971488982566, - "normalized_score": 15.559714889825662 - }, - "bbh": { - "name": "BBH", - "value": 0.43149052613615435, - "normalized_score": 20.982052312914476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.37381250000000005, - "normalized_score": 4.6598958333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28773271276595747, - "normalized_score": 20.859190307328607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-18", - "submission_date": "2024-09-06", - "generation": 1, - "base_model": "Rakuten/RakutenAI-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 47, - "params_billions": 7.373, - "co2_cost": 1.283697362467484 - } - }, - { - "id": "Rakuten/RakutenAI-7B-chat_float16_1685492c5c40f8a7f57e2cc1c8fa65e5b0c94d31_False", - "model": { - "name": "Rakuten/RakutenAI-7B-chat", - "sha": "1685492c5c40f8a7f57e2cc1c8fa65e5b0c94d31", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.803095362134842, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26855521128383797, - "normalized_score": 26.8555211283838 - }, - "bbh": { - "name": "BBH", - "value": 0.4316204035758174, - "normalized_score": 20.23755200474476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.37895833333333334, - "normalized_score": 5.903125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2798371010638298, - "normalized_score": 19.98190011820331 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-18", - "submission_date": "2024-09-08", - "generation": 1, - "base_model": "Rakuten/RakutenAI-7B-chat (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 61, - "params_billions": 7.373, - "co2_cost": 1.2481386253427609 - } - }, - { - "id": "Replete-AI/L3-Pneuma-8B_bfloat16_3e477fa150bf31b360891d3920ccbd57dac110ab_False", - "model": { - "name": "Replete-AI/L3-Pneuma-8B", - "sha": "3e477fa150bf31b360891d3920ccbd57dac110ab", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.691758570090254, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24132745559559746, - "normalized_score": 24.132745559559744 - }, - "bbh": { - "name": "BBH", - "value": 0.4908680380935449, - "normalized_score": 28.163142118962657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4105208333333333, - "normalized_score": 9.181770833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3175698138297872, - "normalized_score": 24.174423758865245 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-15", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 5.0535998520828125 - } - }, - { - "id": "Replete-AI/L3.1-Pneuma-8B_bfloat16_843163ca811525c4f98f817aae8fb5da7c1fb7bb_True", - "model": { - "name": "Replete-AI/L3.1-Pneuma-8B", - "sha": "843163ca811525c4f98f817aae8fb5da7c1fb7bb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.68476446634547, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.707642388861554, - "normalized_score": 70.7642388861554 - }, - "bbh": { - "name": "BBH", - "value": 0.504990389092237, - "normalized_score": 30.26262992696204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.3871145833333333, - "normalized_score": 6.1559895833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36909906914893614, - "normalized_score": 29.899896572104012 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-09", - "submission_date": "2024-11-13", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.411702210150894 - } - }, - { - "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted_float16_d930f2111913da6fb7693187e1cdc817191c8e5e_True", - "model": { - "name": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", - "sha": "d930f2111913da6fb7693187e1cdc817191c8e5e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.778517848782894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6915306941138402, - "normalized_score": 69.15306941138402 - }, - "bbh": { - "name": "BBH", - "value": 0.48702618293318983, - "normalized_score": 26.88896431517229 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.36339583333333336, - "normalized_score": 2.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3390957446808511, - "normalized_score": 26.566193853427894 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-07-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.5207557241924101 - } - }, - { - "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged_bfloat16_0594615bf84f0803a078b59f14eb090cec2004f3_True", - "model": { - "name": "Replete-AI/Replete-Coder-Instruct-8b-Merged", - "sha": "0594615bf84f0803a078b59f14eb090cec2004f3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.427667565252104, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5387571643239937, - "normalized_score": 53.87571643239937 - }, - "bbh": { - "name": "BBH", - "value": 0.4461693860075828, - "normalized_score": 21.937706578272657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125, - "normalized_score": 3.4539062499999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18051861702127658, - "normalized_score": 8.946513002364064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-07-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8290703222264988 - } - }, - { - "id": "Replete-AI/Replete-Coder-Llama3-8B_bfloat16_2aca75c53e7eb2f523889ab1a279e349b8f1b0e8_True", - "model": { - "name": "Replete-AI/Replete-Coder-Llama3-8B", - "sha": "2aca75c53e7eb2f523889ab1a279e349b8f1b0e8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.957974284260397, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4729362535849324, - "normalized_score": 47.293625358493244 - }, - "bbh": { - "name": "BBH", - "value": 0.3271277102526684, - "normalized_score": 7.055475836799367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26090604026845643, - "normalized_score": 1.4541387024608574 - }, - "musr": { - "name": "MUSR", - "value": 0.39530208333333333, - "normalized_score": 7.512760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13306183510638298, - "normalized_score": 3.6735372340425525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.8316138780159936 - } - }, - { - "id": "Replete-AI/Replete-Coder-Qwen2-1.5b_bfloat16_86fcccbf921b7eb8a4d348e4a3cde0beb63d6626_True", - "model": { - "name": "Replete-AI/Replete-Coder-Qwen2-1.5b", - "sha": "86fcccbf921b7eb8a4d348e4a3cde0beb63d6626", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.561043907502103, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30142798884736943, - "normalized_score": 30.14279888473694 - }, - "bbh": { - "name": "BBH", - "value": 0.34747295666696026, - "normalized_score": 10.4265158026681 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.4072708333333333, - "normalized_score": 9.742187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21467752659574468, - "normalized_score": 12.741947399527188 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 3.423742103559073 - } - }, - { - "id": "Replete-AI/Replete-LLM-Qwen2-7b_float16_e3569433b23fde853683ad61f342d2c1bd01d60a_True", - "model": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b", - "sha": "e3569433b23fde853683ad61f342d2c1bd01d60a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.325393716070183, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09047549391170981, - "normalized_score": 9.047549391170982 - }, - "bbh": { - "name": "BBH", - "value": 0.29852574011260374, - "normalized_score": 2.8429334106486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.38476041666666666, - "normalized_score": 5.861718749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1157746010638298, - "normalized_score": 1.7527334515366433 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2272244583500493 - } - }, - { - "id": "Replete-AI/Replete-LLM-Qwen2-7b_bfloat16_5b75b6180b45d83124e04a00766dc19d2ad52622_True", - "model": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b", - "sha": "5b75b6180b45d83124e04a00766dc19d2ad52622", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.509166955357833, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09324813716494457, - "normalized_score": 9.324813716494457 - }, - "bbh": { - "name": "BBH", - "value": 0.2976924067792704, - "normalized_score": 2.7249704476856373 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.39409374999999996, - "normalized_score": 7.26171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11569148936170212, - "normalized_score": 1.7434988179669018 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.8560068866327826 - } - }, - { - "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview_bfloat16_fe4c3fc2314db69083527ddd0c9a658fcbc54f15_True", - "model": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", - "sha": "fe4c3fc2314db69083527ddd0c9a658fcbc54f15", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.5774317458290468, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08575468645416384, - "normalized_score": 8.575468645416384 - }, - "bbh": { - "name": "BBH", - "value": 0.2929321328066677, - "normalized_score": 1.9656769418510358 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 7.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1284906914893617, - "normalized_score": 3.1656323877068555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-07-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.7141374098407907 - } - }, - { - "id": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b_bfloat16_5ff5224804dcc31f536e491e52310f2e3cdc0b57_False", - "model": { - "name": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", - "sha": "5ff5224804dcc31f536e491e52310f2e3cdc0b57", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.979162332053424, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5514966954347797, - "normalized_score": 55.14966954347798 - }, - "bbh": { - "name": "BBH", - "value": 0.5339203611594218, - "normalized_score": 33.20757217219532 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1404833836858006, - "normalized_score": 14.04833836858006 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4000729166666667, - "normalized_score": 8.375781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37533244680851063, - "normalized_score": 30.592494089834517 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8173619569423272 - } - }, - { - "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B_float16_4bcb8eb1fa4da9385b267573c74592bd65455465_True", - "model": { - "name": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "sha": "4bcb8eb1fa4da9385b267573c74592bd65455465", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.21508244249907, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6858103166265509, - "normalized_score": 68.58103166265508 - }, - "bbh": { - "name": "BBH", - "value": 0.46189139399865614, - "normalized_score": 23.76082401416404 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.36302083333333335, - "normalized_score": 6.6442708333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3143284574468085, - "normalized_score": 23.814273049645386 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5911842340907093 - } - }, - { - "id": "RezVortex/Jajuka-3b_float16_c5f920214d6fc9dd610c014176d3ea7259eb6540_True", - "model": { - "name": "RezVortex/Jajuka-3b", - "sha": "c5f920214d6fc9dd610c014176d3ea7259eb6540", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.475390627446263, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6925047762159957, - "normalized_score": 69.25047762159957 - }, - "bbh": { - "name": "BBH", - "value": 0.4593872338446621, - "normalized_score": 23.204983328457306 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3670833333333334, - "normalized_score": 6.58541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3137466755319149, - "normalized_score": 23.749630614657207 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5819576797318761 - } - }, - { - "id": "Ro-xe/FMixIA-7B-DARE-0_float16_5436487303217ed564ddb9951de5263803bd9069_False", - "model": { - "name": "Ro-xe/FMixIA-7B-DARE-0", - "sha": "5436487303217ed564ddb9951de5263803bd9069", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.934625270181993, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3341256754300811, - "normalized_score": 33.41256754300811 - }, - "bbh": { - "name": "BBH", - "value": 0.5035332799973222, - "normalized_score": 30.438332290985624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.45448958333333334, - "normalized_score": 16.81119791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3016123670212766, - "normalized_score": 22.401374113475175 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 0, - "base_model": "Ro-xe/FMixIA-7B-DARE-0", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.922862416191407 - } - }, - { - "id": "Ro-xe/FMixIA-7B-SLERP-27_float16_30406563f691a74558c1ac468df6c61e8a570191_False", - "model": { - "name": "Ro-xe/FMixIA-7B-SLERP-27", - "sha": "30406563f691a74558c1ac468df6c61e8a570191", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.827314533484948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3765409114482905, - "normalized_score": 37.65409114482905 - }, - "bbh": { - "name": "BBH", - "value": 0.5150591725181265, - "normalized_score": 31.938224362460755 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.44115624999999997, - "normalized_score": 14.677864583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30078125, - "normalized_score": 22.309027777777775 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 0, - "base_model": "Ro-xe/FMixIA-7B-SLERP-27", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9396340099477578 - } - }, - { - "id": "Ro-xe/FMixIA-7B-TIES-1_float16_c0d2ed9c652cf4f6ba990ce1d849e1b1fbd1ebc0_False", - "model": { - "name": "Ro-xe/FMixIA-7B-TIES-1", - "sha": "c0d2ed9c652cf4f6ba990ce1d849e1b1fbd1ebc0", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.541189569686495, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34529160405501846, - "normalized_score": 34.52916040550184 - }, - "bbh": { - "name": "BBH", - "value": 0.5091539642456672, - "normalized_score": 31.12772625110915 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.46890625, - "normalized_score": 18.64661458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2992021276595745, - "normalized_score": 22.13356973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 0, - "base_model": "Ro-xe/FMixIA-7B-TIES-1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9815896320268431 - } - }, - { - "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9_float16_c17ec4748e234236ee91dcdd609092e813ed4c83_False", - "model": { - "name": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", - "sha": "c17ec4748e234236ee91dcdd609092e813ed4c83", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.18640423218573, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19401632113902223, - "normalized_score": 19.401632113902224 - }, - "bbh": { - "name": "BBH", - "value": 0.5087851148631056, - "normalized_score": 30.713041491139638 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.41703124999999996, - "normalized_score": 9.462239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36569148936170215, - "normalized_score": 29.521276595744684 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 0, - "base_model": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.141, - "co2_cost": 4.393437482412552 - } - }, - { - "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b_bfloat16_efb0a8bf8aff0c6d1748b25bcf40e9a1d62f2496_True", - "model": { - "name": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", - "sha": "efb0a8bf8aff0c6d1748b25bcf40e9a1d62f2496", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.259585169099566, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.748183708116686, - "normalized_score": 74.81837081166859 - }, - "bbh": { - "name": "BBH", - "value": 0.5399745025607596, - "normalized_score": 34.90725375177349 - }, - "math": { - "name": "MATH Level 5", - "value": 0.506797583081571, - "normalized_score": 50.6797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.39803125, - "normalized_score": 7.8539062500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4282746010638298, - "normalized_score": 36.47495567375886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3722715337968086 - } - }, - { - "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2_bfloat16_fb0f72b9914a81892bfeea5a04fcd9676c883d64_False", - "model": { - "name": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", - "sha": "fb0f72b9914a81892bfeea5a04fcd9676c883d64", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.11738926127926, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4378903531518593, - "normalized_score": 43.78903531518593 - }, - "bbh": { - "name": "BBH", - "value": 0.5206958722481815, - "normalized_score": 32.794801632016096 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4226145833333333, - "normalized_score": 11.493489583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3087599734042553, - "normalized_score": 23.195552600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-21", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 12.879, - "co2_cost": 1.675063876394784 - } - }, - { - "id": "SaisExperiments/Evil-Alpaca-3B-L3.2_float16_77d25b9182270a66ac60a91d646b447e1530f70e_False", - "model": { - "name": "SaisExperiments/Evil-Alpaca-3B-L3.2", - "sha": "77d25b9182270a66ac60a91d646b447e1530f70e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.188052941960947, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32510848991786234, - "normalized_score": 32.51084899178623 - }, - "bbh": { - "name": "BBH", - "value": 0.4340757699220565, - "normalized_score": 20.851948385581196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.4197604166666667, - "normalized_score": 10.936718750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2621343085106383, - "normalized_score": 18.0149231678487 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-28", - "generation": 1, - "base_model": "SaisExperiments/Evil-Alpaca-3B-L3.2 (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 3.213, - "co2_cost": 1.4662990719171607 - } - }, - { - "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct_bfloat16_7caa9e833d3f5713cf1b8ebd8beeb6ef02da99ea_False", - "model": { - "name": "SaisExperiments/Gemma-2-2B-Opus-Instruct", - "sha": "7caa9e833d3f5713cf1b8ebd8beeb6ef02da99ea", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 17.2459909559155, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.474959773401242, - "normalized_score": 47.4959773401242 - }, - "bbh": { - "name": "BBH", - "value": 0.4292846281445681, - "normalized_score": 19.529532994538695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4056875, - "normalized_score": 8.577604166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2650432180851064, - "normalized_score": 18.3381353427896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-03", - "submission_date": "2024-10-07", - "generation": 2, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 2.614, - "co2_cost": 2.3151877623197996 - } - }, - { - "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered_bfloat16_683443cfa90c7a06978d1c5e9ead0fb0a68b49ca_False", - "model": { - "name": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", - "sha": "683443cfa90c7a06978d1c5e9ead0fb0a68b49ca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 15.491102762862866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4196554032190144, - "normalized_score": 41.96554032190144 - }, - "bbh": { - "name": "BBH", - "value": 0.4149234152222183, - "normalized_score": 17.47886723805338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.40029166666666666, - "normalized_score": 8.103125000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2629654255319149, - "normalized_score": 18.107269503546096 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-10-08", - "generation": 2, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 2.614, - "co2_cost": 1.7767344071723716 - } - }, - { - "id": "SaisExperiments/Not-So-Small-Alpaca-24B_bfloat16_2aa9ee03da6022fe5b98679f4e4d385a9722a21f_True", - "model": { - "name": "SaisExperiments/Not-So-Small-Alpaca-24B", - "sha": "2aa9ee03da6022fe5b98679f4e4d385a9722a21f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.383064508687198, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6243611395541607, - "normalized_score": 62.436113955416076 - }, - "bbh": { - "name": "BBH", - "value": 0.5338637679203099, - "normalized_score": 33.01860534638029 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.42816666666666664, - "normalized_score": 12.0875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36943151595744683, - "normalized_score": 29.93683510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "SaisExperiments/Not-So-Small-Alpaca-24B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 2.641718285116668 - } - }, - { - "id": "SaisExperiments/QwOwO-7B-V1_bfloat16_85d13b4879282566bb280bef4a89a235c09cbb13_True", - "model": { - "name": "SaisExperiments/QwOwO-7B-V1", - "sha": "85d13b4879282566bb280bef4a89a235c09cbb13", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.075777072300355, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45562551806983254, - "normalized_score": 45.56255180698325 - }, - "bbh": { - "name": "BBH", - "value": 0.5431230107025949, - "normalized_score": 35.132501310841604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3859516616314199, - "normalized_score": 38.59516616314199 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.38348958333333333, - "normalized_score": 6.002864583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42237367021276595, - "normalized_score": 35.819296690307326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "SaisExperiments/QwOwO-7B-V1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6211564839381374 - } - }, - { - "id": "SaisExperiments/RightSheep-Llama3.2-3B_bfloat16_6e18a57049705ef813cb7fa426d58a0309cd7a29_False", - "model": { - "name": "SaisExperiments/RightSheep-Llama3.2-3B", - "sha": "6e18a57049705ef813cb7fa426d58a0309cd7a29", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.857393775364821, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4156338515139829, - "normalized_score": 41.563385151398286 - }, - "bbh": { - "name": "BBH", - "value": 0.42407794300783824, - "normalized_score": 18.643298035417295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3767291666666666, - "normalized_score": 4.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25398936170212766, - "normalized_score": 17.109929078014183 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "SaisExperiments/RightSheep-Llama3.2-3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.1538792911990545 - } - }, - { - "id": "Sakalti/Anemoi-3B_float16_002ae5bf1d1741d5c245d34c71b069ec4917d27c_False", - "model": { - "name": "Sakalti/Anemoi-3B", - "sha": "002ae5bf1d1741d5c245d34c71b069ec4917d27c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.619869024533212, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3803629924156793, - "normalized_score": 38.03629924156793 - }, - "bbh": { - "name": "BBH", - "value": 0.4921954661921298, - "normalized_score": 29.05396043035601 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.43706249999999996, - "normalized_score": 12.766145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3765791223404255, - "normalized_score": 30.731013593380606 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "Sakalti/Anemoi-3B (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 1.4790797503578654 - } - }, - { - "id": "Sakalti/Euphrates-14B_float16_fb4de44f697b9dee8918853b3e8187deac0aeecd_False", - "model": { - "name": "Sakalti/Euphrates-14B", - "sha": "fb4de44f697b9dee8918853b3e8187deac0aeecd", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.662907563811626, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26468326263203856, - "normalized_score": 26.468326263203856 - }, - "bbh": { - "name": "BBH", - "value": 0.6137691668744961, - "normalized_score": 44.60858230795797 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.45157291666666666, - "normalized_score": 15.979947916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5255152925531915, - "normalized_score": 47.27947695035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "Sakalti/Euphrates-14B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.024972004103708 - } - }, - { - "id": "Sakalti/Llama3.2-3B-Uranus-1_float16_05468598f7b5c5122bf30bab9595b70bb0e2fe18_False", - "model": { - "name": "Sakalti/Llama3.2-3B-Uranus-1", - "sha": "05468598f7b5c5122bf30bab9595b70bb0e2fe18", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.030483022816096, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5335365718515761, - "normalized_score": 53.3536571851576 - }, - "bbh": { - "name": "BBH", - "value": 0.44368258173181263, - "normalized_score": 21.416406817010444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14954682779456194, - "normalized_score": 14.954682779456194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3668645833333333, - "normalized_score": 6.924739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3094248670212766, - "normalized_score": 23.269429669030732 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 2, - "base_model": "AXCXEPT/EZO-Llama-3.2-3B-Instruct-dpoE (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.16946476137345 - } - }, - { - "id": "Sakalti/Magro-7B-v1.1_bfloat16_e35ed139261f2fd0fbec874f30ba331ca49f53cb_False", - "model": { - "name": "Sakalti/Magro-7B-v1.1", - "sha": "e35ed139261f2fd0fbec874f30ba331ca49f53cb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.14114119775379, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1204016454119514, - "normalized_score": 12.04016454119514 - }, - "bbh": { - "name": "BBH", - "value": 0.41790625208343796, - "normalized_score": 19.410132177265538 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4433229166666666, - "normalized_score": 13.148697916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27642952127659576, - "normalized_score": 19.60328014184397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "Sakalti/Magro-7B-v1.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8638460693173928 - } - }, - { - "id": "Sakalti/Neptuno-3B_float16_bf4ab8d1bbbc95d0a0b7668c152de76eb95cbccf_False", - "model": { - "name": "Sakalti/Neptuno-3B", - "sha": "bf4ab8d1bbbc95d0a0b7668c152de76eb95cbccf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.466342186693208, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42962229107656574, - "normalized_score": 42.962229107656576 - }, - "bbh": { - "name": "BBH", - "value": 0.48335808848564965, - "normalized_score": 27.482799614341257 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2552870090634441, - "normalized_score": 25.528700906344408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40019791666666665, - "normalized_score": 7.858072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3773271276595745, - "normalized_score": 30.814125295508273 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "Sakalti/Neptuno-3B (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 1.4880946044373673 - } - }, - { - "id": "Sakalti/Neptuno-Alpha_float16_30e36db1c63038b075427661cf71bfab26df3580_False", - "model": { - "name": "Sakalti/Neptuno-Alpha", - "sha": "30e36db1c63038b075427661cf71bfab26df3580", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.739135010323622, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3779649108809071, - "normalized_score": 37.79649108809071 - }, - "bbh": { - "name": "BBH", - "value": 0.49247749379461303, - "normalized_score": 29.029618644319683 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.43706249999999996, - "normalized_score": 12.899479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3767453457446808, - "normalized_score": 30.74948286052009 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "Sakalti/Neptuno-Alpha (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 1.4104846818544805 - } - }, - { - "id": "Sakalti/Oxyge1-33B_float16_41be8eb03be9a00da6e87f18fb369ad317078e01_False", - "model": { - "name": "Sakalti/Oxyge1-33B", - "sha": "41be8eb03be9a00da6e87f18fb369ad317078e01", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.60924567669207, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4548265269484966, - "normalized_score": 45.48265269484966 - }, - "bbh": { - "name": "BBH", - "value": 0.7033278292161169, - "normalized_score": 58.03229697403189 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4962235649546828, - "normalized_score": 49.62235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.5007812500000001, - "normalized_score": 24.29765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5909242021276596, - "normalized_score": 54.547133569739955 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "Sakalti/Oxyge1-33B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 32.764, - "co2_cost": 11.323784419799072 - } - }, - { - "id": "Sakalti/Phi3.5-Comets-3.8B_bfloat16_b839027ddc5437b739792aea4dfacc0e91898bb9_False", - "model": { - "name": "Sakalti/Phi3.5-Comets-3.8B", - "sha": "b839027ddc5437b739792aea4dfacc0e91898bb9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 5.760043006242388, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20942876013422163, - "normalized_score": 20.942876013422165 - }, - "bbh": { - "name": "BBH", - "value": 0.3335116874180515, - "normalized_score": 6.53359017300762 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3763541666666667, - "normalized_score": 5.3109375000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11527593085106383, - "normalized_score": 1.6973256501182026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "Sakalti/Phi3.5-Comets-3.8B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.0273448195668746 - } - }, - { - "id": "Sakalti/Qwen2.5-1B-Instruct_float16_6f150c987cbaa2eb5782772e791a1e04cf932bde_False", - "model": { - "name": "Sakalti/Qwen2.5-1B-Instruct", - "sha": "6f150c987cbaa2eb5782772e791a1e04cf932bde", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.275488298707521, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17513198313807365, - "normalized_score": 17.513198313807365 - }, - "bbh": { - "name": "BBH", - "value": 0.30271528035563927, - "normalized_score": 3.437039790947589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.33688541666666666, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12134308510638298, - "normalized_score": 2.3714539007092186 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "Sakalti/Qwen2.5-1B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.988, - "co2_cost": 1.9103899946892298 - } - }, - { - "id": "Sakalti/QwenTest-7_bfloat16_7bff974fb76d3b95dc9cb14271cfb948b479bc00_False", - "model": { - "name": "Sakalti/QwenTest-7", - "sha": "7bff974fb76d3b95dc9cb14271cfb948b479bc00", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.255208552096732, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16718861509683197, - "normalized_score": 16.718861509683194 - }, - "bbh": { - "name": "BBH", - "value": 0.3063209532879154, - "normalized_score": 3.6327089522677753 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.34218750000000003, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12117686170212766, - "normalized_score": 2.352984633569739 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "Sakalti/QwenTest-7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.988, - "co2_cost": 1.8782412242166238 - } - }, - { - "id": "Sakalti/SJT-0.5B_float16_0a531361ffe857405505c3399a4dabd780c6c1e1_False", - "model": { - "name": "Sakalti/SJT-0.5B", - "sha": "0a531361ffe857405505c3399a4dabd780c6c1e1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.576015804911226, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24247662867857286, - "normalized_score": 24.247662867857287 - }, - "bbh": { - "name": "BBH", - "value": 0.33055365550588683, - "normalized_score": 8.409743934103323 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.31958333333333333, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18907912234042554, - "normalized_score": 9.89768026004728 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "Sakalti/SJT-0.5B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.0150278917178388 - } - }, - { - "id": "Sakalti/SJT-1.5B-Alpha_float16_36eb5b63215952c6522728c3d734ff5e0693a7ac_False", - "model": { - "name": "Sakalti/SJT-1.5B-Alpha", - "sha": "36eb5b63215952c6522728c3d734ff5e0693a7ac", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.911765349077854, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3448671746521452, - "normalized_score": 34.48671746521452 - }, - "bbh": { - "name": "BBH", - "value": 0.4240819448548446, - "normalized_score": 18.535867065098497 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4226145833333333, - "normalized_score": 11.093489583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2961269946808511, - "normalized_score": 21.79188829787234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "Sakalti/SJT-1.5B-Alpha (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.1096038170870661 - } - }, - { - "id": "Sakalti/SJT-1.5B-Alpha-1.1_float16_900b199f20bcf74a1c0769ec46858f645dd29230_False", - "model": { - "name": "Sakalti/SJT-1.5B-Alpha-1.1", - "sha": "900b199f20bcf74a1c0769ec46858f645dd29230", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.820570294667494, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3439429602344003, - "normalized_score": 34.39429602344003 - }, - "bbh": { - "name": "BBH", - "value": 0.4243160272518483, - "normalized_score": 18.576163936656105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.42391666666666666, - "normalized_score": 11.256250000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.296625664893617, - "normalized_score": 21.84729609929078 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "Sakalti/SJT-1.5B-Alpha-1.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 1.0746383971386602 - } - }, - { - "id": "Sakalti/SJT-1.7B_float16_3e753aba0535d31f4598baab9f656a9b5a197544_False", - "model": { - "name": "Sakalti/SJT-1.7B", - "sha": "3e753aba0535d31f4598baab9f656a9b5a197544", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.034230484246264, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17762980004166723, - "normalized_score": 17.76298000416672 - }, - "bbh": { - "name": "BBH", - "value": 0.2934008926922806, - "normalized_score": 2.896901055053732 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.39641666666666664, - "normalized_score": 7.918749999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11328125, - "normalized_score": 1.4756944444444438 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-18", - "generation": 2, - "base_model": "Sakalti/Qwen2.5-test-2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.684, - "co2_cost": 1.2959849868492936 - } - }, - { - "id": "Sakalti/SJT-14B_float16_ad063627b30567dbb797c0e58116e481f7ad433f_False", - "model": { - "name": "Sakalti/SJT-14B", - "sha": "ad063627b30567dbb797c0e58116e481f7ad433f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.21030695378075, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5494233079340594, - "normalized_score": 54.942330793405944 - }, - "bbh": { - "name": "BBH", - "value": 0.6536135646865123, - "normalized_score": 49.99070835617017 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38444108761329304, - "normalized_score": 38.4441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.476625, - "normalized_score": 18.978125000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5380651595744681, - "normalized_score": 48.67390661938534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "Sakalti/SJT-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 4.510360371451952 - } - }, - { - "id": "Sakalti/SJT-2.4B_float16_24e19008e5f22b9a4574b72051f23b8654ef9841_False", - "model": { - "name": "Sakalti/SJT-2.4B", - "sha": "24e19008e5f22b9a4574b72051f23b8654ef9841", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.90691477930258, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28042039566128985, - "normalized_score": 28.04203956612898 - }, - "bbh": { - "name": "BBH", - "value": 0.349012395546882, - "normalized_score": 8.383641553044898 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.36990624999999994, - "normalized_score": 4.504947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1858377659574468, - "normalized_score": 9.537529550827422 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "Sakalti/SJT-2.4B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.432, - "co2_cost": 1.7115605292280882 - } - }, - { - "id": "Sakalti/SJT-24B-Alpha_float16_273305272a361d7c5db9623bfa369f738e0b537a_False", - "model": { - "name": "Sakalti/SJT-24B-Alpha", - "sha": "273305272a361d7c5db9623bfa369f738e0b537a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.687175261647443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3206370208823699, - "normalized_score": 32.06370208823699 - }, - "bbh": { - "name": "BBH", - "value": 0.6080838080485248, - "normalized_score": 44.44992027804292 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25302114803625375, - "normalized_score": 25.302114803625376 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.45947916666666666, - "normalized_score": 16.001562500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48570478723404253, - "normalized_score": 42.856087470449175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "Sakalti/SJT-24B-Alpha (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 24.125, - "co2_cost": 5.570278523730434 - } - }, - { - "id": "Sakalti/SJT-2B_float16_12a4385aab2760d505aa21fa933fe1295c95b6ce_False", - "model": { - "name": "Sakalti/SJT-2B", - "sha": "12a4385aab2760d505aa21fa933fe1295c95b6ce", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 4.821696209153122, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21507378200951255, - "normalized_score": 21.507378200951255 - }, - "bbh": { - "name": "BBH", - "value": 0.29364597509285106, - "normalized_score": 1.8859080599169236 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35641666666666666, - "normalized_score": 3.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11868351063829788, - "normalized_score": 2.0759456264775418 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "Sakalti/SJT-2B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.407519547751544 - } - }, - { - "id": "Sakalti/SJT-2B-V1.1_float16_9145939a74b87b3d0f6cf5a2aecbe1242b456ffa_False", - "model": { - "name": "Sakalti/SJT-2B-V1.1", - "sha": "9145939a74b87b3d0f6cf5a2aecbe1242b456ffa", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.58950928973948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3977235956151899, - "normalized_score": 39.772359561518996 - }, - "bbh": { - "name": "BBH", - "value": 0.39838417813569243, - "normalized_score": 15.547066237177035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.42993750000000003, - "normalized_score": 12.542187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21243351063829788, - "normalized_score": 12.492612293144207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Sakalti/SJT-2B-V1.1 (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 2.614, - "co2_cost": 1.6165986508412546 - } - }, - { - "id": "Sakalti/SJT-3.7B_float16_d74dc94f11e3a47cfbf55a186d328051b702a2ed_False", - "model": { - "name": "Sakalti/SJT-3.7B", - "sha": "d74dc94f11e3a47cfbf55a186d328051b702a2ed", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.0893182036401825, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10776184966998675, - "normalized_score": 10.776184966998674 - }, - "bbh": { - "name": "BBH", - "value": 0.3393045259885476, - "normalized_score": 7.8072799104561055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.36171875000000003, - "normalized_score": 4.348177083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1505152925531915, - "normalized_score": 5.612810283687943 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "Sakalti/SJT-3.7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.783, - "co2_cost": 1.2065671720503752 - } - }, - { - "id": "Sakalti/SJT-4B_float16_73f7300ec22890c5fde308db5d2b447283b3e483_False", - "model": { - "name": "Sakalti/SJT-4B", - "sha": "73f7300ec22890c5fde308db5d2b447283b3e483", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.994031536147386, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4077403511571519, - "normalized_score": 40.77403511571519 - }, - "bbh": { - "name": "BBH", - "value": 0.4885743296577029, - "normalized_score": 28.913837340499914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4779583333333333, - "normalized_score": 19.444791666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.328125, - "normalized_score": 25.34722222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-14", - "generation": 4, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 0.833496941991619 - } - }, - { - "id": "Sakalti/SJT-7.5B_float16_5c14b8a47a6549b675a922403107f8f7dc72e076_False", - "model": { - "name": "Sakalti/SJT-7.5B", - "sha": "5c14b8a47a6549b675a922403107f8f7dc72e076", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.88865048007519, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42232831110342783, - "normalized_score": 42.23283111034278 - }, - "bbh": { - "name": "BBH", - "value": 0.5367364587851736, - "normalized_score": 34.23401639666226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21676737160120846, - "normalized_score": 21.676737160120847 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.43988541666666664, - "normalized_score": 14.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3951130319148936, - "normalized_score": 32.790336879432616 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "Sakalti/SJT-7.5B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.4681774046591203 - } - }, - { - "id": "Sakalti/SJT-7B-V1.1_float16_5ca369c7c3beefd739d8a6cfe6bd0358e1452d3d_False", - "model": { - "name": "Sakalti/SJT-7B-V1.1", - "sha": "5ca369c7c3beefd739d8a6cfe6bd0358e1452d3d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.195244495354924, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4702888336281067, - "normalized_score": 47.028883362810674 - }, - "bbh": { - "name": "BBH", - "value": 0.5418885259534293, - "normalized_score": 35.097523213785486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.243202416918429, - "normalized_score": 24.3202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.44106249999999997, - "normalized_score": 13.6328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.441156914893617, - "normalized_score": 37.90632387706855 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "Sakalti/SJT-7B-V1.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 1.323080701100286 - } - }, - { - "id": "Sakalti/SJT-7B-V1.1-Multilingal_float16_592e7258d9151644109fa537158ba79fb222888a_False", - "model": { - "name": "Sakalti/SJT-7B-V1.1-Multilingal", - "sha": "592e7258d9151644109fa537158ba79fb222888a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.71738898050407, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19494053555676716, - "normalized_score": 19.494053555676714 - }, - "bbh": { - "name": "BBH", - "value": 0.2919597646466201, - "normalized_score": 3.060666963755198 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.362125, - "normalized_score": 2.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11369680851063829, - "normalized_score": 1.521867612293143 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Sakalti/SJT-7B-V1.1-Multilingal (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.4192332933550222 - } - }, - { - "id": "Sakalti/SJT-8B_float16_6967017f01d44d8787ab40434f8130c5c7528ac3_False", - "model": { - "name": "Sakalti/SJT-8B", - "sha": "6967017f01d44d8787ab40434f8130c5c7528ac3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.90631962140043, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6534871917623019, - "normalized_score": 65.34871917623019 - }, - "bbh": { - "name": "BBH", - "value": 0.5281955607099067, - "normalized_score": 33.33083133498579 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2537764350453172, - "normalized_score": 25.377643504531722 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.4079791666666666, - "normalized_score": 8.464062499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4266123670212766, - "normalized_score": 36.290263002364064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "Sakalti/SJT-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.548, - "co2_cost": 1.839929665668291 - } - }, - { - "id": "Sakalti/SJT-8B-V1.1_float16_f08d9bc41f35c2730c8323a688d7abac89d7933d_False", - "model": { - "name": "Sakalti/SJT-8B-V1.1", - "sha": "f08d9bc41f35c2730c8323a688d7abac89d7933d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.20087403142101, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4620706392372239, - "normalized_score": 46.20706392372239 - }, - "bbh": { - "name": "BBH", - "value": 0.5120768392487195, - "normalized_score": 31.31949877760972 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20694864048338368, - "normalized_score": 20.694864048338367 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.4266145833333333, - "normalized_score": 11.56015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4231216755319149, - "normalized_score": 35.90240839243498 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "Sakalti/SJT-8B-V1.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.545, - "co2_cost": 2.7253098966587963 - } - }, - { - "id": "Sakalti/SJT-900M_float16_ac5472bb38e1057fd639030239ccb89d773936be_False", - "model": { - "name": "Sakalti/SJT-900M", - "sha": "ac5472bb38e1057fd639030239ccb89d773936be", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.791993813854994, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2410027615615456, - "normalized_score": 24.100276156154557 - }, - "bbh": { - "name": "BBH", - "value": 0.31692036321713823, - "normalized_score": 4.3018424038847485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.35945833333333327, - "normalized_score": 2.9656249999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "Sakalti/SJT-900M (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.899, - "co2_cost": 1.7087868760604994 - } - }, - { - "id": "Sakalti/SJT-Moe2x7.5B_float16_6754f87af42c9ae94a6df15e6cffd0087d59d77b_False", - "model": { - "name": "Sakalti/SJT-Moe2x7.5B", - "sha": "6754f87af42c9ae94a6df15e6cffd0087d59d77b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 25.679678830683567, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41166216749336204, - "normalized_score": 41.1662167493362 - }, - "bbh": { - "name": "BBH", - "value": 0.5370697921185069, - "normalized_score": 34.245683063328926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.43988541666666664, - "normalized_score": 14.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3953623670212766, - "normalized_score": 32.81804078014184 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "Sakalti/SJT-Moe2x7.5B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 13.401, - "co2_cost": 2.6970671992670097 - } - }, - { - "id": "Sakalti/SJTPass-2_float16_ac5719c8fefd3edaae83bdfc85c017b3dbcd9926_False", - "model": { - "name": "Sakalti/SJTPass-2", - "sha": "ac5719c8fefd3edaae83bdfc85c017b3dbcd9926", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.578554710120141, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24002867945939, - "normalized_score": 24.002867945939 - }, - "bbh": { - "name": "BBH", - "value": 0.33022032217255354, - "normalized_score": 8.362336526695914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.32225, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1901595744680851, - "normalized_score": 10.017730496453899 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Sakalti/SJTPass-2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.0617289263368817 - } - }, - { - "id": "Sakalti/SJTPass-4_float16_25903b41cb09c23fee967983a949f474ee3cf9c3_False", - "model": { - "name": "Sakalti/SJTPass-4", - "sha": "25903b41cb09c23fee967983a949f474ee3cf9c3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.078209950944516, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19129354557019818, - "normalized_score": 19.129354557019816 - }, - "bbh": { - "name": "BBH", - "value": 0.2963644180215358, - "normalized_score": 2.456191569806904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.38981249999999995, - "normalized_score": 6.393229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10829454787234043, - "normalized_score": 0.9216164302600468 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Sakalti/SJTPass-4 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.167, - "co2_cost": 2.363076527133505 - } - }, - { - "id": "Sakalti/SJTPass-5_float16_78a097a32d265cf7605f836c289bd8de4e6ff48d_False", - "model": { - "name": "Sakalti/SJTPass-5", - "sha": "78a097a32d265cf7605f836c289bd8de4e6ff48d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.780863382054697, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24247662867857286, - "normalized_score": 24.247662867857287 - }, - "bbh": { - "name": "BBH", - "value": 0.31029599812555747, - "normalized_score": 4.049294123683027 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3794270833333333, - "normalized_score": 4.928385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13272938829787234, - "normalized_score": 3.6365986997635926 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Sakalti/SJTPass-5 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.809, - "co2_cost": 1.460292660441067 - } - }, - { - "id": "Sakalti/Saba-Passthrough-2_bfloat16_dffd6b7eb21495c75643487ddf34b38d20c94828_False", - "model": { - "name": "Sakalti/Saba-Passthrough-2", - "sha": "dffd6b7eb21495c75643487ddf34b38d20c94828", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.019416015292425, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16913677930114318, - "normalized_score": 16.91367793011432 - }, - "bbh": { - "name": "BBH", - "value": 0.36724803467499195, - "normalized_score": 11.94868585880535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3844479166666666, - "normalized_score": 5.42265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20769614361702127, - "normalized_score": 11.966238179669029 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "Sakalti/Saba-Passthrough-2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.087, - "co2_cost": 2.2033995592839033 - } - }, - { - "id": "Sakalti/Saba1-1.8B_float16_cd7a39dcf0614e2446031d1a0c598c4830198651_False", - "model": { - "name": "Sakalti/Saba1-1.8B", - "sha": "cd7a39dcf0614e2446031d1a0c598c4830198651", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.174113467091598, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3332768166243345, - "normalized_score": 33.32768166243345 - }, - "bbh": { - "name": "BBH", - "value": 0.4147375470428282, - "normalized_score": 17.532918576799144 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4238854166666666, - "normalized_score": 11.019010416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2925531914893617, - "normalized_score": 21.39479905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-14", - "generation": 1, - "base_model": "Sakalti/Saba1-1.8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 1.1643531206320985 - } - }, - { - "id": "Sakalti/Saba1-7B_bfloat16_37f44913d4d9246190c963373ce472235539ba2a_False", - "model": { - "name": "Sakalti/Saba1-7B", - "sha": "37f44913d4d9246190c963373ce472235539ba2a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.523726347884292, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45847351693506566, - "normalized_score": 45.84735169350657 - }, - "bbh": { - "name": "BBH", - "value": 0.5489063327459239, - "normalized_score": 35.23563153262108 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36631419939577037, - "normalized_score": 36.631419939577036 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.47932291666666665, - "normalized_score": 19.08203125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43758311170212766, - "normalized_score": 37.50923463356973 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-10", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "Sakalti/Saba1-7B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3641929348973936 - } - }, - { - "id": "Sakalti/Saba1.5-1.5B_float16_e05f428fa08bb15aebd810e3da103534d2068dc0_False", - "model": { - "name": "Sakalti/Saba1.5-1.5B", - "sha": "e05f428fa08bb15aebd810e3da103534d2068dc0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.174113467091598, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3332768166243345, - "normalized_score": 33.32768166243345 - }, - "bbh": { - "name": "BBH", - "value": 0.4147375470428282, - "normalized_score": 17.532918576799144 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4238854166666666, - "normalized_score": 11.019010416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2925531914893617, - "normalized_score": 21.39479905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-07", - "submission_date": "2024-12-14", - "generation": 0, - "base_model": "Sakalti/Saba1.5-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.1457239270410775 - } - }, - { - "id": "Sakalti/Saba1.5-Pro-3B_bfloat16_f656fff24db6c39bbd4fd83c1ed1620d339950b7_False", - "model": { - "name": "Sakalti/Saba1.5-Pro-3B", - "sha": "f656fff24db6c39bbd4fd83c1ed1620d339950b7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.788079074661752, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23860468002677343, - "normalized_score": 23.860468002677344 - }, - "bbh": { - "name": "BBH", - "value": 0.3622910501405146, - "normalized_score": 11.041395854945073 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.44054166666666666, - "normalized_score": 14.001041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19581117021276595, - "normalized_score": 10.645685579196215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-15", - "submission_date": "2024-12-15", - "generation": 1, - "base_model": "Sakalti/Saba1.5-Pro-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 2.9, - "co2_cost": 2.0211880085819254 - } - }, - { - "id": "Sakalti/Saba2-14B-Preview_float16_e8a44ff1844a15780aab1d60f670e07a17b8ccb8_False", - "model": { - "name": "Sakalti/Saba2-14B-Preview", - "sha": "e8a44ff1844a15780aab1d60f670e07a17b8ccb8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.687158487523945, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4721871301480073, - "normalized_score": 47.21871301480073 - }, - "bbh": { - "name": "BBH", - "value": 0.649628096691823, - "normalized_score": 49.64957000538939 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31268882175226587, - "normalized_score": 31.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.4781458333333333, - "normalized_score": 19.6015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5383976063829787, - "normalized_score": 48.71084515366431 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-14", - "generation": 1, - "base_model": "Sakalti/Saba2-14B-Preview (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.1790733504217985 - } - }, - { - "id": "Sakalti/Saba2-3B_float16_24ff63079aa31f1476039d51d1db4e0c1747436f_False", - "model": { - "name": "Sakalti/Saba2-3B", - "sha": "24ff63079aa31f1476039d51d1db4e0c1747436f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.068766556279139, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28651533486704167, - "normalized_score": 28.651533486704167 - }, - "bbh": { - "name": "BBH", - "value": 0.28011877359000464, - "normalized_score": 2.002419101562576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2617449664429531, - "normalized_score": 1.5659955257270781 - }, - "musr": { - "name": "MUSR", - "value": 0.39269791666666665, - "normalized_score": 7.25390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12101063829787234, - "normalized_score": 2.3345153664302596 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 3, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.630537214027361 - } - }, - { - "id": "Sakalti/Sailor-japanese_float16__False", - "model": { - "name": "Sakalti/Sailor-japanese", - "sha": "", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.518102124361933, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16046866757979938, - "normalized_score": 16.046866757979938 - }, - "bbh": { - "name": "BBH", - "value": 0.2912583602962783, - "normalized_score": 1.721791288073509 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3911770833333333, - "normalized_score": 6.763802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11643949468085106, - "normalized_score": 1.8266105200945615 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 3.4726330476976357 - } - }, - { - "id": "Sakalti/Saka-1.5B_float16_33fe7226550102131b2a10cb38a86570c66a82b1_False", - "model": { - "name": "Sakalti/Saka-1.5B", - "sha": "33fe7226550102131b2a10cb38a86570c66a82b1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.780380949151962, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2726266306732802, - "normalized_score": 27.262663067328024 - }, - "bbh": { - "name": "BBH", - "value": 0.3987868899865206, - "normalized_score": 16.01477085551285 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.37390625000000005, - "normalized_score": 4.304947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24152260638297873, - "normalized_score": 15.724734042553193 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "Sakalti/Saka-1.5B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 1.2166855344358272 - } - }, - { - "id": "Sakalti/Saka-14B_float16_d110532209d8dad5b11fdf4148b5a0a7dfd11718_False", - "model": { - "name": "Sakalti/Saka-14B", - "sha": "d110532209d8dad5b11fdf4148b5a0a7dfd11718", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.90788417749271, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7174341857382855, - "normalized_score": 71.74341857382856 - }, - "bbh": { - "name": "BBH", - "value": 0.6496945295195891, - "normalized_score": 49.72322791125956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4093655589123867, - "normalized_score": 40.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3959731543624161, - "normalized_score": 19.463087248322143 - }, - "musr": { - "name": "MUSR", - "value": 0.48859375, - "normalized_score": 20.740885416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.539561170212766, - "normalized_score": 48.84013002364066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "Sakalti/Saka-14B (Merge)", - "hub_license": "", - "hub_hearts": 7, - "params_billions": 14.766, - "co2_cost": 3.817782434928411 - } - }, - { - "id": "Sakalti/Saka-24B_float16_f1fc24007beadc3708367b4252a91954df50ab99_False", - "model": { - "name": "Sakalti/Saka-24B", - "sha": "f1fc24007beadc3708367b4252a91954df50ab99", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.23669492021674, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38186123928952953, - "normalized_score": 38.18612392895295 - }, - "bbh": { - "name": "BBH", - "value": 0.6072116494463233, - "normalized_score": 43.311074405335944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.45408333333333334, - "normalized_score": 15.727083333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4765625, - "normalized_score": 41.84027777777778 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 2.5344282496185704 - } - }, - { - "id": "Sakalti/Saka-7.2B_float16_29027c1f9169261c9275b7466d776727059112b2_False", - "model": { - "name": "Sakalti/Saka-7.2B", - "sha": "29027c1f9169261c9275b7466d776727059112b2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.869360483737708, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1544989516704566, - "normalized_score": 15.44989516704566 - }, - "bbh": { - "name": "BBH", - "value": 0.2945156585364917, - "normalized_score": 2.1709866331347283 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37105208333333334, - "normalized_score": 3.8148437499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11602393617021277, - "normalized_score": 1.7804373522458627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "llm-jp/llm-jp-3-7.2b-instruct3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.292, - "co2_cost": 1.4495863626061547 - } - }, - { - "id": "Sakalti/Saka-7.6B_float16_90832afb508c1874cf46d8ddf32e27dcb8eb3f0c_False", - "model": { - "name": "Sakalti/Saka-7.6B", - "sha": "90832afb508c1874cf46d8ddf32e27dcb8eb3f0c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.81529957886164, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45242844541372446, - "normalized_score": 45.24284454137245 - }, - "bbh": { - "name": "BBH", - "value": 0.5655284792075981, - "normalized_score": 37.971180889420054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3255287009063444, - "normalized_score": 32.55287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4489375, - "normalized_score": 14.950520833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45403922872340424, - "normalized_score": 39.33769208037825 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "Sakalti/Saka-7.6B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3050669573744147 - } - }, - { - "id": "Sakalti/SakaMoe-3x1.6B-Instruct_float16_c5b2a5ae7856f0a29bd7e6d1a4cecd622d48e3e1_False", - "model": { - "name": "Sakalti/SakaMoe-3x1.6B-Instruct", - "sha": "c5b2a5ae7856f0a29bd7e6d1a4cecd622d48e3e1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 8.276328372160824, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23708094522533543, - "normalized_score": 23.708094522533543 - }, - "bbh": { - "name": "BBH", - "value": 0.328247997224552, - "normalized_score": 7.525328438833678 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.33421875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18824800531914893, - "normalized_score": 9.80533392434988 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "Sakalti/SakaMoe-3x1.6B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.572, - "co2_cost": 1.0961688491425574 - } - }, - { - "id": "Sakalti/SakalFusion-7B-Alpha_bfloat16_1af757e13a2f54fbb1d5bcb1f25c7d287af0b051_False", - "model": { - "name": "Sakalti/SakalFusion-7B-Alpha", - "sha": "1af757e13a2f54fbb1d5bcb1f25c7d287af0b051", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.1092425705961, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5289653674472622, - "normalized_score": 52.89653674472622 - }, - "bbh": { - "name": "BBH", - "value": 0.559133672829116, - "normalized_score": 36.780545262095615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38444108761329304, - "normalized_score": 38.4441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.4581458333333333, - "normalized_score": 15.868229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4473902925531915, - "normalized_score": 38.598921394799056 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "Sakalti/SakalFusion-7B-Alpha (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2594490151745308 - } - }, - { - "id": "Sakalti/SakalFusion-7B-Beta_float16_32498fb3cfba3d5821c71987dda06443ff3a7657_False", - "model": { - "name": "Sakalti/SakalFusion-7B-Beta", - "sha": "32498fb3cfba3d5821c71987dda06443ff3a7657", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.453145556903973, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18090222830977362, - "normalized_score": 18.09022283097736 - }, - "bbh": { - "name": "BBH", - "value": 0.2881298650933641, - "normalized_score": 1.4987820116285064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3872083333333333, - "normalized_score": 6.134375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10895944148936171, - "normalized_score": 0.9954934988179669 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "Sakalti/SakalFusion-7B-Beta (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.3951089379366615 - } - }, - { - "id": "Sakalti/Tara-3.8B-v1.1_float16_3938e99fc1bbdb8fd5faf47f811778cce1ca1325_False", - "model": { - "name": "Sakalti/Tara-3.8B-v1.1", - "sha": "3938e99fc1bbdb8fd5faf47f811778cce1ca1325", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.968635956123418, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40621661635571393, - "normalized_score": 40.6216616355714 - }, - "bbh": { - "name": "BBH", - "value": 0.4885743296577029, - "normalized_score": 28.913837340499914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4779583333333333, - "normalized_score": 19.444791666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.328125, - "normalized_score": 25.34722222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 3, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 3.821, - "co2_cost": 0.8283155253074412 - } - }, - { - "id": "Sakalti/light-1.1-3B_float16_55faef550b5780b6d2e65ddb26b2543a3cfb5771_False", - "model": { - "name": "Sakalti/light-1.1-3B", - "sha": "55faef550b5780b6d2e65ddb26b2543a3cfb5771", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.923496536132208, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27345110972220377, - "normalized_score": 27.345110972220375 - }, - "bbh": { - "name": "BBH", - "value": 0.28027723572953045, - "normalized_score": 1.8805156390567757 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2617449664429531, - "normalized_score": 1.5659955257270781 - }, - "musr": { - "name": "MUSR", - "value": 0.3900625, - "normalized_score": 7.291145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12092752659574468, - "normalized_score": 2.3252807328605196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "Sakalti/light-3B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 1.4902297180434927 - } - }, - { - "id": "Sakalti/light-3B_float16_76875793661adee95bed723ce9cc8f40a769d62e_False", - "model": { - "name": "Sakalti/light-3B", - "sha": "76875793661adee95bed723ce9cc8f40a769d62e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.284693562377303, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5337360425892188, - "normalized_score": 53.37360425892189 - }, - "bbh": { - "name": "BBH", - "value": 0.4831034368803701, - "normalized_score": 27.467849685442218 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2590634441087613, - "normalized_score": 25.90634441087613 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40149999999999997, - "normalized_score": 8.0875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3774933510638298, - "normalized_score": 30.832594562647753 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-06", - "generation": 0, - "base_model": "Sakalti/light-3B", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 3.397, - "co2_cost": 2.4053189900210823 - } - }, - { - "id": "Sakalti/light-3b-beta_bfloat16_b82688ca8f69ae2368bd1d7d44f63442d046f26f_False", - "model": { - "name": "Sakalti/light-3b-beta", - "sha": "b82688ca8f69ae2368bd1d7d44f63442d046f26f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.809275245445864, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5485489612007252, - "normalized_score": 54.85489612007252 - }, - "bbh": { - "name": "BBH", - "value": 0.48152297262112204, - "normalized_score": 27.27438755254684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.277190332326284, - "normalized_score": 27.719033232628398 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.40146875, - "normalized_score": 7.983593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3758311170212766, - "normalized_score": 30.647901891252953 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "Sakalti/light-3b-beta (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 1.5219403009060517 - } - }, - { - "id": "Sakalti/light-7b-beta_float16_e59962df36f6fc47815babf3e8c38ed965eb651c_False", - "model": { - "name": "Sakalti/light-7b-beta", - "sha": "e59962df36f6fc47815babf3e8c38ed965eb651c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.68982935684443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6233870574520051, - "normalized_score": 62.338705745200514 - }, - "bbh": { - "name": "BBH", - "value": 0.5548193064288276, - "normalized_score": 36.37504670970679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3768882175226586, - "normalized_score": 37.68882175226586 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.42906249999999996, - "normalized_score": 11.832812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.445561835106383, - "normalized_score": 38.39575945626477 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "Sakalti/light-7b-beta (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3886399483488214 - } - }, - { - "id": "Sakalti/llama-3-yanyuedao-8b-instruct_bfloat16_d36d76a88f6af38e923be426623ed1211e7099d8_False", - "model": { - "name": "Sakalti/llama-3-yanyuedao-8b-instruct", - "sha": "d36d76a88f6af38e923be426623ed1211e7099d8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.935841337684982, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21857116894284942, - "normalized_score": 21.857116894284943 - }, - "bbh": { - "name": "BBH", - "value": 0.43497849055247495, - "normalized_score": 20.593160052037973 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.41985416666666664, - "normalized_score": 10.715104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29105718085106386, - "normalized_score": 21.228575650118206 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "Sakalti/llama-3-yanyuedao-8b-instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.494317719320831 - } - }, - { - "id": "Sakalti/magro-7B_float16_db8d1d3aa647ac59884448d23eef35187839f123_False", - "model": { - "name": "Sakalti/magro-7B", - "sha": "db8d1d3aa647ac59884448d23eef35187839f123", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.382552499684506, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13439008497453425, - "normalized_score": 13.439008497453422 - }, - "bbh": { - "name": "BBH", - "value": 0.4185526485966236, - "normalized_score": 19.54888376105943 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.44598958333333333, - "normalized_score": 13.615364583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2765126329787234, - "normalized_score": 19.61251477541371 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-12", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.9045520120665085 - } - }, - { - "id": "Sakalti/mergekit-01_float16_cb69064dc643b02319e71ac48e5a93e24e9066e2_False", - "model": { - "name": "Sakalti/mergekit-01", - "sha": "cb69064dc643b02319e71ac48e5a93e24e9066e2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.68982935684443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6233870574520051, - "normalized_score": 62.338705745200514 - }, - "bbh": { - "name": "BBH", - "value": 0.5548193064288276, - "normalized_score": 36.37504670970679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3768882175226586, - "normalized_score": 37.68882175226586 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.42906249999999996, - "normalized_score": 11.832812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.445561835106383, - "normalized_score": 38.39575945626477 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "Sakalti/mergekit-01 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3946770910685848 - } - }, - { - "id": "Sakalti/mergekit-della_linear-vmeykci_float16_af5cee0d7e9b38d8d0e86a3822894ec09f87bd6f_False", - "model": { - "name": "Sakalti/mergekit-della_linear-vmeykci", - "sha": "af5cee0d7e9b38d8d0e86a3822894ec09f87bd6f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.741275884240794, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1126078804239418, - "normalized_score": 11.26078804239418 - }, - "bbh": { - "name": "BBH", - "value": 0.28155028620092587, - "normalized_score": 0.975893246187364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.38968749999999996, - "normalized_score": 6.377604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10887632978723404, - "normalized_score": 0.9862588652482256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "Sakalti/mergekit-della_linear-vmeykci (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.3824972617381066 - } - }, - { - "id": "Sakalti/model-3_bfloat16_9ffaebdaad62d4cf6c0f3135ab49156c45009158_False", - "model": { - "name": "Sakalti/model-3", - "sha": "9ffaebdaad62d4cf6c0f3135ab49156c45009158", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.572441221864636, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6263846593704703, - "normalized_score": 62.63846593704703 - }, - "bbh": { - "name": "BBH", - "value": 0.554216994021922, - "normalized_score": 36.31775528213533 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37084592145015105, - "normalized_score": 37.08459214501511 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4263958333333333, - "normalized_score": 11.499479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4454787234042553, - "normalized_score": 38.38652482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "Sakalti/model-3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3637598052620072 - } - }, - { - "id": "Sakalti/qwen2.5-2.3B_float16_8e324d7fd70a99f5d2bca70474d30b947af7d20a_False", - "model": { - "name": "Sakalti/qwen2.5-2.3B", - "sha": "8e324d7fd70a99f5d2bca70474d30b947af7d20a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2Model", - "average_score": 3.7013246212545643, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12879493078365403, - "normalized_score": 12.879493078365403 - }, - "bbh": { - "name": "BBH", - "value": 0.2849449123234445, - "normalized_score": 1.1167186571598344 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.38565625, - "normalized_score": 5.540364583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11727061170212766, - "normalized_score": 1.9189568557919614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "Sakalti/qwen2.5-2.3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.339, - "co2_cost": 1.213831595366386 - } - }, - { - "id": "Sakalti/tara-3.8B_float16_85a61437d7c52e2822f3d9059b480cc7e7d235a4_False", - "model": { - "name": "Sakalti/tara-3.8B", - "sha": "85a61437d7c52e2822f3d9059b480cc7e7d235a4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.994031536147386, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4077403511571519, - "normalized_score": 40.77403511571519 - }, - "bbh": { - "name": "BBH", - "value": 0.4885743296577029, - "normalized_score": 28.913837340499914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4779583333333333, - "normalized_score": 19.444791666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.328125, - "normalized_score": 25.34722222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 0.8321437628531957 - } - }, - { - "id": "Sakalti/ultiima-14B_float16_fdc335cfceb6cf94fcba302cd9a834a55f9409c4_False", - "model": { - "name": "Sakalti/ultiima-14B", - "sha": "fdc335cfceb6cf94fcba302cd9a834a55f9409c4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.61010623253845, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5700563394016764, - "normalized_score": 57.00563394016764 - }, - "bbh": { - "name": "BBH", - "value": 0.6491153472177067, - "normalized_score": 49.51071627853103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4697885196374622, - "normalized_score": 46.97885196374622 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.4717604166666667, - "normalized_score": 18.936718749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5380651595744681, - "normalized_score": 48.67390661938534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "Sakalti/ultiima-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.9443342818350264 - } - }, - { - "id": "Sakalti/ultiima-14B-v0.2_float16_bca52733accffafd741abe8c56a203f4836ec742_False", - "model": { - "name": "Sakalti/ultiima-14B-v0.2", - "sha": "bca52733accffafd741abe8c56a203f4836ec742", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.962282081659104, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7069930007934502, - "normalized_score": 70.69930007934502 - }, - "bbh": { - "name": "BBH", - "value": 0.6472012505703305, - "normalized_score": 49.51261036714752 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3995468277945619, - "normalized_score": 39.95468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.4793541666666667, - "normalized_score": 19.185937499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5387300531914894, - "normalized_score": 48.74778368794326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "Sakalti/ultiima-14B-v0.2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.8941810988986063 - } - }, - { - "id": "Sakalti/ultiima-14B-v0.3_float16_ca40125982440de39096aca1fcac941bb9f36af4_False", - "model": { - "name": "Sakalti/ultiima-14B-v0.3", - "sha": "ca40125982440de39096aca1fcac941bb9f36af4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.38402709174191, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7040452665593957, - "normalized_score": 70.40452665593958 - }, - "bbh": { - "name": "BBH", - "value": 0.639820771660141, - "normalized_score": 48.445133231959545 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39652567975830816, - "normalized_score": 39.65256797583081 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3766778523489933, - "normalized_score": 16.890380313199106 - }, - "musr": { - "name": "MUSR", - "value": 0.47541666666666665, - "normalized_score": 18.727083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5336602393617021, - "normalized_score": 48.18447104018913 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Sakalti/ultiima-14B-v0.3 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 5.831010188497662 - } - }, - { - "id": "Sakalti/ultiima-14B-v0.4_float16_d5ab61746755307af49a5709e0439d54d8b3078f_False", - "model": { - "name": "Sakalti/ultiima-14B-v0.4", - "sha": "d5ab61746755307af49a5709e0439d54d8b3078f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.64162669559072, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3008284684636764, - "normalized_score": 30.082846846367637 - }, - "bbh": { - "name": "BBH", - "value": 0.6420007859105136, - "normalized_score": 48.24803486437804 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35347432024169184, - "normalized_score": 35.34743202416919 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3959731543624161, - "normalized_score": 19.463087248322143 - }, - "musr": { - "name": "MUSR", - "value": 0.4885625, - "normalized_score": 21.170312499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.527842420212766, - "normalized_score": 47.538046690307326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "Sakalti/ultiima-14B-v0.4 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.2688358355493436 - } - }, - { - "id": "Sakalti/ultiima-32B_float16_843652316df8b2b02d829325fbe4607ccf262ef4_False", - "model": { - "name": "Sakalti/ultiima-32B", - "sha": "843652316df8b2b02d829325fbe4607ccf262ef4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.40321787320272, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6854357549080883, - "normalized_score": 68.54357549080883 - }, - "bbh": { - "name": "BBH", - "value": 0.7037285782797875, - "normalized_score": 58.112446786765965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4962235649546828, - "normalized_score": 49.62235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4994791666666667, - "normalized_score": 24.134895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5910073138297872, - "normalized_score": 54.55636820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "Sakalti/ultiima-32B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 32.764, - "co2_cost": 10.511603801652418 - } - }, - { - "id": "Sakalti/ultiima-72B_float16_4554fb2249b139d8f62a31c2985f8909ec905798_False", - "model": { - "name": "Sakalti/ultiima-72B", - "sha": "4554fb2249b139d8f62a31c2985f8909ec905798", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.76723892225559, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7140121544169471, - "normalized_score": 71.4012154416947 - }, - "bbh": { - "name": "BBH", - "value": 0.7217809739144654, - "normalized_score": 61.10313258693403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5354984894259819, - "normalized_score": 53.54984894259819 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41442953020134227, - "normalized_score": 21.923937360178968 - }, - "musr": { - "name": "MUSR", - "value": 0.46518750000000003, - "normalized_score": 18.115104166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.590591755319149, - "normalized_score": 54.510195035460995 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "Sakalti/ultiima-72B (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 72.706, - "co2_cost": 36.96552782603761 - } - }, - { - "id": "Sakalti/ultiima-72B-v1.5_float16_971db2a0f7e67cb7d5b34e35d2d2ec5b0591eb42_False", - "model": { - "name": "Sakalti/ultiima-72B-v1.5", - "sha": "971db2a0f7e67cb7d5b34e35d2d2ec5b0591eb42", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.89912916202218, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6549610588793291, - "normalized_score": 65.49610588793291 - }, - "bbh": { - "name": "BBH", - "value": 0.7391727188223717, - "normalized_score": 63.438206058920265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4395770392749245, - "normalized_score": 43.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41359060402684567, - "normalized_score": 21.812080536912756 - }, - "musr": { - "name": "MUSR", - "value": 0.46909375000000003, - "normalized_score": 18.536718749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.6053856382978723, - "normalized_score": 56.153959810874696 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Sakalti/ultiima-72B-v1.5 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 72.706, - "co2_cost": 32.194072335681284 - } - }, - { - "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R_bfloat16_ad7d1aed82eb6d8ca4b3aad627ff76f72ab34f70_True", - "model": { - "name": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", - "sha": "ad7d1aed82eb6d8ca4b3aad627ff76f72ab34f70", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.527751291550988, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38156203318306536, - "normalized_score": 38.15620331830654 - }, - "bbh": { - "name": "BBH", - "value": 0.5011950469666927, - "normalized_score": 29.150289349765558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.36333333333333334, - "normalized_score": 5.550000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3172373670212766, - "normalized_score": 24.137485224586285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-09", - "submission_date": "2024-07-02", - "generation": 0, - "base_model": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", - "hub_license": "llama3", - "hub_hearts": 78, - "params_billions": 8.03, - "co2_cost": 1.7147848610366037 - } - }, - { - "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B_float16_5278247beb482c4fceff2294570236d68b74d132_True", - "model": { - "name": "SanjiWatsuki/Kunoichi-DPO-v2-7B", - "sha": "5278247beb482c4fceff2294570236d68b74d132", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.5816311223652, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5431034100630772, - "normalized_score": 54.31034100630771 - }, - "bbh": { - "name": "BBH", - "value": 0.4415592450869275, - "normalized_score": 20.903472484123803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.41883333333333334, - "normalized_score": 11.087500000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3106715425531915, - "normalized_score": 23.407949172576835 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-13", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "SanjiWatsuki/Kunoichi-DPO-v2-7B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 84, - "params_billions": 7.242, - "co2_cost": 2.4164136863768286 - } - }, - { - "id": "SanjiWatsuki/Silicon-Maid-7B_bfloat16_4e43d81f3fff1091df7cb2d85e9e306d25235701_True", - "model": { - "name": "SanjiWatsuki/Silicon-Maid-7B", - "sha": "4e43d81f3fff1091df7cb2d85e9e306d25235701", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.412095533357363, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5367835121920947, - "normalized_score": 53.678351219209475 - }, - "bbh": { - "name": "BBH", - "value": 0.4127972831009074, - "normalized_score": 16.692746753586437 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.41883333333333334, - "normalized_score": 11.087500000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.308344414893617, - "normalized_score": 23.149379432624112 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-27", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "SanjiWatsuki/Silicon-Maid-7B", - "hub_license": "cc-by-4.0", - "hub_hearts": 112, - "params_billions": 7.242, - "co2_cost": 1.2123266045650976 - } - }, - { - "id": "Sao10K/70B-L3.3-Cirrus-x1_bfloat16_31d7ca33f3098d1eabe6f87a2c5b5bde85b20f35_False", - "model": { - "name": "Sao10K/70B-L3.3-Cirrus-x1", - "sha": "31d7ca33f3098d1eabe6f87a2c5b5bde85b20f35", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.0025823792082, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6680751517085777, - "normalized_score": 66.80751517085777 - }, - "bbh": { - "name": "BBH", - "value": 0.7028970787833794, - "normalized_score": 57.132312166381276 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37386706948640486, - "normalized_score": 37.38670694864049 - }, - "gpqa": { - "name": "GPQA", - "value": 0.44966442953020136, - "normalized_score": 26.62192393736018 - }, - "musr": { - "name": "MUSR", - "value": 0.4841666666666667, - "normalized_score": 21.42083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5378158244680851, - "normalized_score": 48.646202718676115 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "Sao10K/70B-L3.3-Cirrus-x1 (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 28, - "params_billions": 70.554, - "co2_cost": 27.049162520155996 - } - }, - { - "id": "Sao10K/Fimbulvetr-11B-v2_float16_b2dcd534dc3a53ff84e60a53b87816185169be19_True", - "model": { - "name": "Sao10K/Fimbulvetr-11B-v2", - "sha": "b2dcd534dc3a53ff84e60a53b87816185169be19", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.089256463822462, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5100056738343152, - "normalized_score": 51.000567383431516 - }, - "bbh": { - "name": "BBH", - "value": 0.4544495065184342, - "normalized_score": 22.65512081005865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.43536458333333333, - "normalized_score": 14.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33011968085106386, - "normalized_score": 25.56885342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-06", - "submission_date": "2024-07-01", - "generation": 0, - "base_model": "Sao10K/Fimbulvetr-11B-v2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 176, - "params_billions": 10.732, - "co2_cost": 1.6424083021526863 - } - }, - { - "id": "Sao10K/L3-70B-Euryale-v2.1_bfloat16_36ad832b771cd783ea7ad00ed39e61f679b1a7c6_True", - "model": { - "name": "Sao10K/L3-70B-Euryale-v2.1", - "sha": "36ad832b771cd783ea7ad00ed39e61f679b1a7c6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.43613152375533, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7384417789243651, - "normalized_score": 73.84417789243652 - }, - "bbh": { - "name": "BBH", - "value": 0.6471322811268715, - "normalized_score": 48.70118672944805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21374622356495468, - "normalized_score": 21.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.42091666666666666, - "normalized_score": 12.247916666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5103889627659575, - "normalized_score": 45.598773640661946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-11", - "submission_date": "2024-07-01", - "generation": 0, - "base_model": "Sao10K/L3-70B-Euryale-v2.1", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 134, - "params_billions": 70.554, - "co2_cost": 17.236697092399766 - } - }, - { - "id": "Sao10K/L3-70B-Euryale-v2.1_float16_36ad832b771cd783ea7ad00ed39e61f679b1a7c6_True", - "model": { - "name": "Sao10K/L3-70B-Euryale-v2.1", - "sha": "36ad832b771cd783ea7ad00ed39e61f679b1a7c6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.473252668728755, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7281003293483512, - "normalized_score": 72.81003293483514 - }, - "bbh": { - "name": "BBH", - "value": 0.6502778992745041, - "normalized_score": 49.193003079898574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22432024169184292, - "normalized_score": 22.43202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.41958333333333336, - "normalized_score": 12.047916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5095578457446809, - "normalized_score": 45.50642730496454 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-11", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Sao10K/L3-70B-Euryale-v2.1", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 134, - "params_billions": 70.554, - "co2_cost": 8.616454319859539 - } - }, - { - "id": "Sao10K/L3-8B-Lunaris-v1_bfloat16_8479c2a7ee119c935b9a02c921cc2a85b698dfe8_True", - "model": { - "name": "Sao10K/L3-8B-Lunaris-v1", - "sha": "8479c2a7ee119c935b9a02c921cc2a85b698dfe8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.577983918433663, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6894573066131198, - "normalized_score": 68.94573066131198 - }, - "bbh": { - "name": "BBH", - "value": 0.5235299282515419, - "normalized_score": 32.11434845509543 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.3726666666666667, - "normalized_score": 5.550000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3787400265957447, - "normalized_score": 30.971114066193856 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-07-22", - "generation": 0, - "base_model": "Sao10K/L3-8B-Lunaris-v1", - "hub_license": "llama3", - "hub_hearts": 114, - "params_billions": 8.03, - "co2_cost": 1.3209972770416638 - } - }, - { - "id": "Sao10K/L3-8B-Niitama-v1_float16_507a802294c653056dac604a704fd55c9a566695_True", - "model": { - "name": "Sao10K/L3-8B-Niitama-v1", - "sha": "507a802294c653056dac604a704fd55c9a566695", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.7912140312776, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6790659893526954, - "normalized_score": 67.90659893526953 - }, - "bbh": { - "name": "BBH", - "value": 0.5302980131787137, - "normalized_score": 33.209787820927936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.3806666666666667, - "normalized_score": 6.083333333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3700964095744681, - "normalized_score": 30.010712174940902 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-07", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "Sao10K/L3-8B-Niitama-v1", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 40, - "params_billions": 8.03, - "co2_cost": 1.0607544329948946 - } - }, - { - "id": "Sao10K/L3-8B-Stheno-v3.2_bfloat16_4bb828f6e1b1efd648c39b1ad682c44ff260f018_True", - "model": { - "name": "Sao10K/L3-8B-Stheno-v3.2", - "sha": "4bb828f6e1b1efd648c39b1ad682c44ff260f018", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.884393588195138, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6872841837435781, - "normalized_score": 68.72841837435782 - }, - "bbh": { - "name": "BBH", - "value": 0.522778637171633, - "normalized_score": 32.02159792407502 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.3793645833333333, - "normalized_score": 6.453906249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3768284574468085, - "normalized_score": 30.758717494089833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-05", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "Sao10K/L3-8B-Stheno-v3.2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 282, - "params_billions": 8.03, - "co2_cost": 1.783344467254921 - } - }, - { - "id": "Sao10K/L3-8B-Stheno-v3.3-32K_bfloat16_1a59d163e079c7e7f1542553d085853119960f0c_True", - "model": { - "name": "Sao10K/L3-8B-Stheno-v3.3-32K", - "sha": "1a59d163e079c7e7f1542553d085853119960f0c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.649981104473916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46037181345496614, - "normalized_score": 46.03718134549661 - }, - "bbh": { - "name": "BBH", - "value": 0.3844012923008206, - "normalized_score": 13.512008983197541 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3725416666666667, - "normalized_score": 4.067708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1895777925531915, - "normalized_score": 9.95308806146572 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Sao10K/L3-8B-Stheno-v3.3-32K", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 60, - "params_billions": 8.03, - "co2_cost": 2.937264492143905 - } - }, - { - "id": "Sao10K/MN-12B-Lyra-v3_bfloat16_da76fa39d128ca84065427189bb228f2dfc6b8a3_True", - "model": { - "name": "Sao10K/MN-12B-Lyra-v3", - "sha": "da76fa39d128ca84065427189bb228f2dfc6b8a3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.63563111181339, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4486063644463357, - "normalized_score": 44.86063644463357 - }, - "bbh": { - "name": "BBH", - "value": 0.4803954360397243, - "normalized_score": 25.870963383072453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.40190624999999996, - "normalized_score": 9.038281250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32488364361702127, - "normalized_score": 24.987071513002363 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-09-03", - "generation": 0, - "base_model": "Sao10K/MN-12B-Lyra-v3", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 35, - "params_billions": 12.248, - "co2_cost": 4.169329542761872 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B_bfloat16_2c355e4b28d552b5fd28c461730272a7e0ccc893_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", - "sha": "2c355e4b28d552b5fd28c461730272a7e0ccc893", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.33695645964031, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7971681804279312, - "normalized_score": 79.71681804279312 - }, - "bbh": { - "name": "BBH", - "value": 0.7000545067146033, - "normalized_score": 57.633928620802884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6027190332326284, - "normalized_score": 60.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.45378125, - "normalized_score": 18.155989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5792885638297872, - "normalized_score": 53.25428486997635 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 2, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 32.76, - "co2_cost": 7.953929556785364 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B_bfloat16_30df08b54b86b6f11bcd3afeb4ba8440085e35b7_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", - "sha": "30df08b54b86b6f11bcd3afeb4ba8440085e35b7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.33912719384693, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7956444456264933, - "normalized_score": 79.56444456264933 - }, - "bbh": { - "name": "BBH", - "value": 0.7023193256341814, - "normalized_score": 58.07744907944319 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5664652567975831, - "normalized_score": 56.646525679758305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.41663541666666665, - "normalized_score": 11.179427083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5719747340425532, - "normalized_score": 52.44163711583924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 3, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.76, - "co2_cost": 8.597440968045287 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B_bfloat16_5a0166310b316f0cb1b3cf9edf99188912fadab0_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", - "sha": "5a0166310b316f0cb1b3cf9edf99188912fadab0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.36626657304592, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8248702332034556, - "normalized_score": 82.48702332034557 - }, - "bbh": { - "name": "BBH", - "value": 0.6913199237437709, - "normalized_score": 56.527924000253876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6178247734138973, - "normalized_score": 61.78247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.42745833333333333, - "normalized_score": 13.832291666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.56640625, - "normalized_score": 51.822916666666664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 4, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 7.081442953722984 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B_bfloat16_99600b01f8e40c6ddf375c05457b1beb9caa0632_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", - "sha": "99600b01f8e40c6ddf375c05457b1beb9caa0632", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.60530808037683, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7630963620970137, - "normalized_score": 76.30963620970138 - }, - "bbh": { - "name": "BBH", - "value": 0.6920204096666581, - "normalized_score": 56.552203892922165 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5362537764350453, - "normalized_score": 53.625377643504535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.4642604166666667, - "normalized_score": 19.465885416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5752160904255319, - "normalized_score": 52.8017878250591 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 5, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 32.764, - "co2_cost": 7.50944829748876 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B_bfloat16_5ff582988d90a386c8ebba1f02834cff8804a60f_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", - "sha": "5ff582988d90a386c8ebba1f02834cff8804a60f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.672698393079905, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7515558717536137, - "normalized_score": 75.15558717536138 - }, - "bbh": { - "name": "BBH", - "value": 0.6928650089977083, - "normalized_score": 56.67538301839144 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5460725075528701, - "normalized_score": 54.607250755287005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.47086458333333336, - "normalized_score": 20.591406250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5762134308510638, - "normalized_score": 52.912603427895974 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 6, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 32.764, - "co2_cost": 7.573451455506034 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B_bfloat16_58dc292d0c0f112778527030e0f2ee50a7fa9c9c_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", - "sha": "58dc292d0c0f112778527030e0f2ee50a7fa9c9c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.23110024344452, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8208985491828349, - "normalized_score": 82.08985491828349 - }, - "bbh": { - "name": "BBH", - "value": 0.6889783858832969, - "normalized_score": 56.186576093303636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.622356495468278, - "normalized_score": 62.2356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.42742708333333335, - "normalized_score": 13.661718749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5672373670212766, - "normalized_score": 51.915263002364064 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 7, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 32.76, - "co2_cost": 7.305351304120306 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B_bfloat16_1edbb9dd88fc3a73197c1bde3105dc9b6e8e13d9_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", - "sha": "1edbb9dd88fc3a73197c1bde3105dc9b6e8e13d9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 37.17096025005312, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8145786513118525, - "normalized_score": 81.45786513118526 - }, - "bbh": { - "name": "BBH", - "value": 0.6463223196116569, - "normalized_score": 49.645052903738225 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2802114803625378, - "normalized_score": 28.02114803625378 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.4139375, - "normalized_score": 10.942187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45985704787234044, - "normalized_score": 39.984116430260045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 4, - "base_model": "google/gemma-2-27b", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.744957095859188 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B_bfloat16_077dbd8265b83cc99e5b37f0af97b73008ec5866_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", - "sha": "077dbd8265b83cc99e5b37f0af97b73008ec5866", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 37.3287116618655, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.81420408959339, - "normalized_score": 81.420408959339 - }, - "bbh": { - "name": "BBH", - "value": 0.6403963618749583, - "normalized_score": 48.69836786250173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24924471299093656, - "normalized_score": 24.924471299093657 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.44667708333333334, - "normalized_score": 15.23463541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4523769946808511, - "normalized_score": 39.15299940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 5, - "base_model": "google/gemma-2-27b", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.22797798661364 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B_bfloat16_4fb08002ca40d2fdfbf1334049e8e1264daf50c1_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", - "sha": "4fb08002ca40d2fdfbf1334049e8e1264daf50c1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.331660638255602, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6766679078179231, - "normalized_score": 67.66679078179232 - }, - "bbh": { - "name": "BBH", - "value": 0.5625539568927603, - "normalized_score": 36.57802382251899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23716012084592145, - "normalized_score": 23.716012084592144 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.3907708333333333, - "normalized_score": 5.946354166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3871343085106383, - "normalized_score": 31.903812056737586 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "mistralai/Mistral-Small-Instruct-2409", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 22.247, - "co2_cost": 2.026993687258279 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B_bfloat16_0b4cf265801f8ee050a54eea7ee51d3142e98c74_False", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", - "sha": "0b4cf265801f8ee050a54eea7ee51d3142e98c74", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 38.3619715244158, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7767601076255447, - "normalized_score": 77.67601076255447 - }, - "bbh": { - "name": "BBH", - "value": 0.6518345685119445, - "normalized_score": 50.60725661580997 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2719033232628399, - "normalized_score": 27.19033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3598993288590604, - "normalized_score": 14.65324384787472 - }, - "musr": { - "name": "MUSR", - "value": 0.47913541666666665, - "normalized_score": 19.52526041666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4646775265957447, - "normalized_score": 40.51972517730496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 2, - "base_model": "google/gemma-2-27b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 27.227, - "co2_cost": 7.776938011044043 - } - }, - { - "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B_bfloat16_b1bffefa2c1c36436275abae3b9bc018f9ea1908_True", - "model": { - "name": "Saxo/Linkbricks-Horizon-AI-Superb-27B", - "sha": "b1bffefa2c1c36436275abae3b9bc018f9ea1908", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.63303350305584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7302235845334822, - "normalized_score": 73.02235845334823 - }, - "bbh": { - "name": "BBH", - "value": 0.6186245528925046, - "normalized_score": 45.694323910043586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22205438066465258, - "normalized_score": 22.20543806646526 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.465, - "normalized_score": 18.558333333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.406000664893617, - "normalized_score": 34.00007387706855 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-25", - "generation": 2, - "base_model": "google/gemma-2-27b", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 9.249224923882931 - } - }, - { - "id": "SeaLLMs/SeaLLM-7B-v2_bfloat16_35c5464399144a14915733dc690c4a74e1f71b16_False", - "model": { - "name": "SeaLLMs/SeaLLM-7B-v2", - "sha": "35c5464399144a14915733dc690c4a74e1f71b16", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.166389781627085, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36712367629002157, - "normalized_score": 36.71236762900216 - }, - "bbh": { - "name": "BBH", - "value": 0.4902100795458318, - "normalized_score": 27.438159401570932 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.4069583333333333, - "normalized_score": 9.36979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30826130319148937, - "normalized_score": 23.140144799054376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-29", - "submission_date": "2024-09-17", - "generation": 0, - "base_model": "SeaLLMs/SeaLLM-7B-v2", - "hub_license": "other", - "hub_hearts": 65, - "params_billions": 7.376, - "co2_cost": 1.26978602248258 - } - }, - { - "id": "SeaLLMs/SeaLLM-7B-v2.5_bfloat16_a961daf713dcb31e3253ebe40d43ea5fb7a84099_True", - "model": { - "name": "SeaLLMs/SeaLLM-7B-v2.5", - "sha": "a961daf713dcb31e3253ebe40d43ea5fb7a84099", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 20.73056803415737, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4521536190640833, - "normalized_score": 45.21536190640833 - }, - "bbh": { - "name": "BBH", - "value": 0.49802029594352754, - "normalized_score": 28.738153930102815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.42032291666666666, - "normalized_score": 11.60703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3203125, - "normalized_score": 24.479166666666664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "SeaLLMs/SeaLLM-7B-v2.5", - "hub_license": "other", - "hub_hearts": 49, - "params_billions": 8.538, - "co2_cost": 2.2019534437040718 - } - }, - { - "id": "SeaLLMs/SeaLLMs-v3-7B-Chat_bfloat16_67ef6dfd0a5df7af4be7a325786105a2ba4cbaf7_True", - "model": { - "name": "SeaLLMs/SeaLLMs-v3-7B-Chat", - "sha": "67ef6dfd0a5df7af4be7a325786105a2ba4cbaf7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.211695856688838, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43766539448662883, - "normalized_score": 43.76653944866288 - }, - "bbh": { - "name": "BBH", - "value": 0.5266406284595359, - "normalized_score": 33.801622722378404 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.417375, - "normalized_score": 10.471875000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3894614361702128, - "normalized_score": 32.16238179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-03", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "SeaLLMs/SeaLLMs-v3-7B-Chat", - "hub_license": "other", - "hub_hearts": 53, - "params_billions": 7.616, - "co2_cost": 1.685706249390413 - } - }, - { - "id": "SenseLLM/ReflectionCoder-CL-34B_bfloat16_e939100132251cf340ba88d9bdd342faa3c3b211_True", - "model": { - "name": "SenseLLM/ReflectionCoder-CL-34B", - "sha": "e939100132251cf340ba88d9bdd342faa3c3b211", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.147932297450376, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4007710652180658, - "normalized_score": 40.077106521806584 - }, - "bbh": { - "name": "BBH", - "value": 0.39529304297033296, - "normalized_score": 14.264686822563535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.41548958333333336, - "normalized_score": 10.402864583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14237034574468085, - "normalized_score": 4.707816193853427 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "SenseLLM/ReflectionCoder-CL-34B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 33.744, - "co2_cost": 3.9401396540259577 - } - }, - { - "id": "SenseLLM/ReflectionCoder-DS-33B_bfloat16_07ae97a21fbef0503294e1eb258ce0a308b8dc35_True", - "model": { - "name": "SenseLLM/ReflectionCoder-DS-33B", - "sha": "07ae97a21fbef0503294e1eb258ce0a308b8dc35", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.194495021407294, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3786641666334215, - "normalized_score": 37.86641666334215 - }, - "bbh": { - "name": "BBH", - "value": 0.3449447540164568, - "normalized_score": 8.337659356727954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3343125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12017952127659574, - "normalized_score": 2.24216903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "SenseLLM/ReflectionCoder-DS-33B", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 33.34, - "co2_cost": 4.619056319496223 - } - }, - { - "id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B_bfloat16_a6f1c58d657b18f1dc507dbd8db1a79089c4a05d_True", - "model": { - "name": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", - "sha": "a6f1c58d657b18f1dc507dbd8db1a79089c4a05d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.438992188094076, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7847034756667863, - "normalized_score": 78.47034756667864 - }, - "bbh": { - "name": "BBH", - "value": 0.5138053850165866, - "normalized_score": 30.77304532336984 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.425375, - "normalized_score": 11.938541666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36943151595744683, - "normalized_score": 29.93683510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-02-04", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 1.5346435887257317 - } - }, - { - "id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B_bfloat16_23f4c4a863f933238505391f49af552e1cf2c2ad_True", - "model": { - "name": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", - "sha": "23f4c4a863f933238505391f49af552e1cf2c2ad", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.45756521473049, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7456858912130924, - "normalized_score": 74.56858912130923 - }, - "bbh": { - "name": "BBH", - "value": 0.5142440064892148, - "normalized_score": 30.369934195446422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15634441087613293, - "normalized_score": 15.634441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.40128125000000003, - "normalized_score": 7.960156250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35846077127659576, - "normalized_score": 28.717863475177303 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-02-04", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 35, - "params_billions": 8.03, - "co2_cost": 1.351298557406623 - } - }, - { - "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo_bfloat16_6ced77bb27efc0d6f33d447b9cc8fca35976e91c_True", - "model": { - "name": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", - "sha": "6ced77bb27efc0d6f33d447b9cc8fca35976e91c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.299518611352284, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09554648333089535, - "normalized_score": 9.554648333089535 - }, - "bbh": { - "name": "BBH", - "value": 0.3072665948660797, - "normalized_score": 3.612865412111988 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.40320833333333334, - "normalized_score": 8.401041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11610704787234043, - "normalized_score": 1.7896719858156023 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-12", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6543926123142053 - } - }, - { - "id": "Sharathhebbar24/SSH_355M_float16_601988021bc27acf3c470fe70eed5db373df58db_False", - "model": { - "name": "Sharathhebbar24/SSH_355M", - "sha": "601988021bc27acf3c470fe70eed5db373df58db", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.371931358828882, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1423589409433636, - "normalized_score": 14.235894094336361 - }, - "bbh": { - "name": "BBH", - "value": 0.30985907344593705, - "normalized_score": 3.496136025027686 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.41775, - "normalized_score": 10.518749999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11760305851063829, - "normalized_score": 1.9558953900709206 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-06", - "submission_date": "2025-01-11", - "generation": 0, - "base_model": "Sharathhebbar24/SSH_355M", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.355, - "co2_cost": 0.17182553379164855 - } - }, - { - "id": "Sharathhebbar24/chat_gpt2_dpo_float16_f4a41f2c058c6b4087e1c0196d1279a38dd1f060_False", - "model": { - "name": "Sharathhebbar24/chat_gpt2_dpo", - "sha": "f4a41f2c058c6b4087e1c0196d1279a38dd1f060", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 3.406545907558898, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09861944086135896, - "normalized_score": 9.861944086135896 - }, - "bbh": { - "name": "BBH", - "value": 0.29022988561565644, - "normalized_score": 1.6986044099668713 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.38184375, - "normalized_score": 5.43046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-24", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "Sharathhebbar24/chat_gpt2_dpo", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.124, - "co2_cost": 0.12875475498765035 - } - }, - { - "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1_bfloat16_f78146bdd1632585b3520717885e0ca41ddbce69_True", - "model": { - "name": "Shreyash2010/Uma-4x4B-Instruct-v0.1", - "sha": "f78146bdd1632585b3520717885e0ca41ddbce69", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 27.922104785036094, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5516961661724225, - "normalized_score": 55.169616617242255 - }, - "bbh": { - "name": "BBH", - "value": 0.5511602059856503, - "normalized_score": 36.28453127383045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17749244712990936, - "normalized_score": 17.749244712990937 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4441041666666667, - "normalized_score": 15.146354166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.386968085106383, - "normalized_score": 31.885342789598102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 2.3835301665966497 - } - }, - { - "id": "Sicarius-Prototyping/Brainy_LLAMA_float16_c6c0c29b606c2f7b9484f7ccc9ffc139ce56c37e_False", - "model": { - "name": "Sicarius-Prototyping/Brainy_LLAMA", - "sha": "c6c0c29b606c2f7b9484f7ccc9ffc139ce56c37e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.22551281841119, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5204224790223274, - "normalized_score": 52.042247902232745 - }, - "bbh": { - "name": "BBH", - "value": 0.5117131754488634, - "normalized_score": 30.294986762557475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4143333333333334, - "normalized_score": 9.491666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3848902925531915, - "normalized_score": 31.654476950354614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "Sicarius-Prototyping/Brainy_LLAMA", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4562784679275762 - } - }, - { - "id": "Sicarius-Prototyping/Micropenis_1B_float16_959bd3a493afab5556cf70e4ff8d3e11445a76e0_False", - "model": { - "name": "Sicarius-Prototyping/Micropenis_1B", - "sha": "959bd3a493afab5556cf70e4ff8d3e11445a76e0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.141053791929549, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3460662154195313, - "normalized_score": 34.606621541953125 - }, - "bbh": { - "name": "BBH", - "value": 0.3372377910880025, - "normalized_score": 7.664224287376964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3325416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18600398936170212, - "normalized_score": 9.555998817966902 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.618, - "co2_cost": 0.7227822905689283 - } - }, - { - "id": "Sicarius-Prototyping/bacon_and_food_bfloat16_79123dca52ada8369e44200e298c466b1f302c33_True", - "model": { - "name": "Sicarius-Prototyping/bacon_and_food", - "sha": "79123dca52ada8369e44200e298c466b1f302c33", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.242423887650176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5860428108529812, - "normalized_score": 58.60428108529812 - }, - "bbh": { - "name": "BBH", - "value": 0.47245798883729967, - "normalized_score": 24.93079263718865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3883854166666667, - "normalized_score": 7.014843750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3262965425531915, - "normalized_score": 25.144060283687946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "Sicarius-Prototyping/bacon_and_food (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.332752887985638 - } - }, - { - "id": "SicariusSicariiStuff/2B-ad_float16_fa0e405edfb1c6e454b7a25852b5bbf5049cf132_False", - "model": { - "name": "SicariusSicariiStuff/2B-ad", - "sha": "fa0e405edfb1c6e454b7a25852b5bbf5049cf132", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 15.93131891082937, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4378903531518593, - "normalized_score": 43.78903531518593 - }, - "bbh": { - "name": "BBH", - "value": 0.40922431523996955, - "normalized_score": 16.007592932115813 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.40153124999999995, - "normalized_score": 8.124739583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2662067819148936, - "normalized_score": 18.467420212765955 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "SicariusSicariiStuff/2B-ad", - "hub_license": "gemma", - "hub_hearts": 5, - "params_billions": 3.204, - "co2_cost": 3.4785996796507748 - } - }, - { - "id": "SicariusSicariiStuff/2B_or_not_2B_float16_abf87e8422284aa83a42efd7a91154f9af3c7ed3_False", - "model": { - "name": "SicariusSicariiStuff/2B_or_not_2B", - "sha": "abf87e8422284aa83a42efd7a91154f9af3c7ed3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 6.592012664617282, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2062316874781136, - "normalized_score": 20.623168747811363 - }, - "bbh": { - "name": "BBH", - "value": 0.3415917024092019, - "normalized_score": 7.68230049623281 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3790833333333334, - "normalized_score": 4.8520833333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13987699468085107, - "normalized_score": 4.43077718676123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-11", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "SicariusSicariiStuff/2B_or_not_2B", - "hub_license": "gemma", - "hub_hearts": 27, - "params_billions": 2.506, - "co2_cost": 1.754271302947184 - } - }, - { - "id": "SicariusSicariiStuff/Dusk_Rainbow_bfloat16_106058ac50593d65bc4b5ae75c8c010e87cd8487_False", - "model": { - "name": "SicariusSicariiStuff/Dusk_Rainbow", - "sha": "106058ac50593d65bc4b5ae75c8c010e87cd8487", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.623890627323348, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3588057465303173, - "normalized_score": 35.88057465303173 - }, - "bbh": { - "name": "BBH", - "value": 0.47717504280736184, - "normalized_score": 25.95903682422342 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.40252083333333327, - "normalized_score": 7.448437499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3443317819148936, - "normalized_score": 27.14797576832151 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-10-08", - "generation": 0, - "base_model": "SicariusSicariiStuff/Dusk_Rainbow", - "hub_license": "llama3", - "hub_hearts": 36, - "params_billions": 8.03, - "co2_cost": 2.4012977293319704 - } - }, - { - "id": "SicariusSicariiStuff/Eximius_Persona_5B_bfloat16_c0eaf083496a30d161551d24ef3445b1ab67483f_True", - "model": { - "name": "SicariusSicariiStuff/Eximius_Persona_5B", - "sha": "c0eaf083496a30d161551d24ef3445b1ab67483f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.833611932328903, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6559850086658954, - "normalized_score": 65.59850086658955 - }, - "bbh": { - "name": "BBH", - "value": 0.4511736018571028, - "normalized_score": 22.201333094135226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.38181249999999994, - "normalized_score": 7.326562499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31399601063829785, - "normalized_score": 23.777334515366427 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "SicariusSicariiStuff/Eximius_Persona_5B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 5.821, - "co2_cost": 1.961155951705545 - } - }, - { - "id": "SicariusSicariiStuff/Impish_LLAMA_3B_bfloat16_72703d3083d1a67849cbea0b7add3c1270a77cc7_False", - "model": { - "name": "SicariusSicariiStuff/Impish_LLAMA_3B", - "sha": "72703d3083d1a67849cbea0b7add3c1270a77cc7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.791947493881953, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46299485365496884, - "normalized_score": 46.29948536549688 - }, - "bbh": { - "name": "BBH", - "value": 0.40905101627873225, - "normalized_score": 16.98575485690441 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.3672708333333334, - "normalized_score": 5.60885416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2941323138297872, - "normalized_score": 21.570257092198577 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "SicariusSicariiStuff/Impish_LLAMA_3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 19, - "params_billions": 3.213, - "co2_cost": 1.445904762553517 - } - }, - { - "id": "SicariusSicariiStuff/Impish_Mind_8B_float16_b408001bae902572570b4f55ebad3436c1c2cedb_False", - "model": { - "name": "SicariusSicariiStuff/Impish_Mind_8B", - "sha": "b408001bae902572570b4f55ebad3436c1c2cedb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.124203063482273, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31791424531354584, - "normalized_score": 31.791424531354583 - }, - "bbh": { - "name": "BBH", - "value": 0.46736571616627115, - "normalized_score": 24.562854114562622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4069583333333333, - "normalized_score": 8.969791666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3308676861702128, - "normalized_score": 25.65196513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-05", - "generation": 0, - "base_model": "SicariusSicariiStuff/Impish_Mind_8B", - "hub_license": "llama3.1", - "hub_hearts": 26, - "params_billions": 8.03, - "co2_cost": 1.3695634672770003 - } - }, - { - "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M_bfloat16_245f7900d6b6396c81b9d0247c09eaa95bdb6dc7_True", - "model": { - "name": "SicariusSicariiStuff/Impish_QWEN_14B-1M", - "sha": "245f7900d6b6396c81b9d0247c09eaa95bdb6dc7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.23736559941774, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7867768631675067, - "normalized_score": 78.67768631675067 - }, - "bbh": { - "name": "BBH", - "value": 0.6282934814011238, - "normalized_score": 47.22031340500465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39652567975830816, - "normalized_score": 39.65256797583081 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.46146875000000004, - "normalized_score": 17.516927083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.504404920212766, - "normalized_score": 44.93388002364066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "SicariusSicariiStuff/Impish_QWEN_14B-1M (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 14.77, - "co2_cost": 3.2022507114347665 - } - }, - { - "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M_bfloat16_c8aef44ff768bf5a9d58a97d3b3b9d7ed0b3abaf_True", - "model": { - "name": "SicariusSicariiStuff/Impish_QWEN_7B-1M", - "sha": "c8aef44ff768bf5a9d58a97d3b3b9d7ed0b3abaf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.209083552218306, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6381744881359238, - "normalized_score": 63.81744881359238 - }, - "bbh": { - "name": "BBH", - "value": 0.537172912933626, - "normalized_score": 34.55484768058619 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30891238670694865, - "normalized_score": 30.891238670694865 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40739583333333335, - "normalized_score": 9.557812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4265292553191489, - "normalized_score": 36.281028368794324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "SicariusSicariiStuff/Impish_QWEN_7B-1M (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.2744692866465974 - } - }, - { - "id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA_float16_9bc5b68a7448a4e46eeaf27a4ac477d79578db95_False", - "model": { - "name": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", - "sha": "9bc5b68a7448a4e46eeaf27a4ac477d79578db95", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.14075237978403, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3713203189758729, - "normalized_score": 37.13203189758729 - }, - "bbh": { - "name": "BBH", - "value": 0.4717234028484832, - "normalized_score": 24.998013753807424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.41194791666666664, - "normalized_score": 9.56015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3464926861702128, - "normalized_score": 27.38807624113475 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-18", - "generation": 0, - "base_model": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", - "hub_license": "llama3.1", - "hub_hearts": 45, - "params_billions": 8.03, - "co2_cost": 1.4987464399469281 - } - }, - { - "id": "SicariusSicariiStuff/Phi-Line_14B_bfloat16_4eaf6b4e21774b8c6da9f998f0e2e71b3ab16296_True", - "model": { - "name": "SicariusSicariiStuff/Phi-Line_14B", - "sha": "4eaf6b4e21774b8c6da9f998f0e2e71b3ab16296", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.56208119420943, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6495653754260917, - "normalized_score": 64.95653754260917 - }, - "bbh": { - "name": "BBH", - "value": 0.6154430096216078, - "normalized_score": 43.794068990904684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3859516616314199, - "normalized_score": 38.59516616314199 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.44785416666666666, - "normalized_score": 14.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5453789893617021, - "normalized_score": 49.48655437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "SicariusSicariiStuff/Phi-Line_14B (Merge)", - "hub_license": "mit", - "hub_hearts": 11, - "params_billions": 14.66, - "co2_cost": 0.9368138642981235 - } - }, - { - "id": "SicariusSicariiStuff/Phi-lthy4_bfloat16_888f1003ec7de0d2880d3a83b1e23c125ac47fb1_True", - "model": { - "name": "SicariusSicariiStuff/Phi-lthy4", - "sha": "888f1003ec7de0d2880d3a83b1e23c125ac47fb1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.269040533700707, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7679423928509688, - "normalized_score": 76.79423928509688 - }, - "bbh": { - "name": "BBH", - "value": 0.587935701572946, - "normalized_score": 40.15288217449905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.40829166666666666, - "normalized_score": 9.036458333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.433344414893617, - "normalized_score": 37.038268321512994 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-12", - "generation": 1, - "base_model": "SicariusSicariiStuff/Phi-lthy4 (Merge)", - "hub_license": "mit", - "hub_hearts": 28, - "params_billions": 11.933, - "co2_cost": 0.739112042377834 - } - }, - { - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored_float16_1daf648ac2f837c66bf6bb00459e034987d9486f_False", - "model": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", - "sha": "1daf648ac2f837c66bf6bb00459e034987d9486f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.72493871859786, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31579099012841483, - "normalized_score": 31.579099012841482 - }, - "bbh": { - "name": "BBH", - "value": 0.6308941945507827, - "normalized_score": 46.7202351109504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31797583081570996, - "normalized_score": 31.797583081570995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.45166666666666666, - "normalized_score": 15.291666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.526595744680851, - "normalized_score": 47.399527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 5.478390781463515 - } - }, - { - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_float16_0710a2341d269dcd56f9136fed442373d4dadc5d_False", - "model": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", - "sha": "0710a2341d269dcd56f9136fed442373d4dadc5d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.750334298621826, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3173147249298528, - "normalized_score": 31.73147249298528 - }, - "bbh": { - "name": "BBH", - "value": 0.6308941945507827, - "normalized_score": 46.7202351109504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31797583081570996, - "normalized_score": 31.797583081570995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.45166666666666666, - "normalized_score": 15.291666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.526595744680851, - "normalized_score": 47.399527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 4.841773402094493 - } - }, - { - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct_float16__True", - "model": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", - "sha": "", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.958792291740732, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3789389929830627, - "normalized_score": 37.89389929830627 - }, - "bbh": { - "name": "BBH", - "value": 0.5936792404117958, - "normalized_score": 42.113096716972805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3285498489425982, - "normalized_score": 32.85498489425982 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.36965625, - "normalized_score": 4.40703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5127160904255319, - "normalized_score": 45.85734338061466 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 7.8103262425846 - } - }, - { - "id": "SicariusSicariiStuff/Redemption_Wind_24B_float16_e7b0e4989b34e5a7b1a3068c95ae83b951ac658e_False", - "model": { - "name": "SicariusSicariiStuff/Redemption_Wind_24B", - "sha": "e7b0e4989b34e5a7b1a3068c95ae83b951ac658e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.370595438172284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25014517037017336, - "normalized_score": 25.014517037017335 - }, - "bbh": { - "name": "BBH", - "value": 0.642816406969129, - "normalized_score": 48.417358452350896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.4262395833333333, - "normalized_score": 11.179947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.543218085106383, - "normalized_score": 49.24645390070923 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "SicariusSicariiStuff/Redemption_Wind_24B", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 23.572, - "co2_cost": 2.9658071756513316 - } - }, - { - "id": "SicariusSicariiStuff/Winged_Imp_8B_bfloat16_64411873c8b98fdbe62058a240fdcf1a550a00d0_True", - "model": { - "name": "SicariusSicariiStuff/Winged_Imp_8B", - "sha": "64411873c8b98fdbe62058a240fdcf1a550a00d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.911877977477257, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.743012983328679, - "normalized_score": 74.3012983328679 - }, - "bbh": { - "name": "BBH", - "value": 0.5120376322048542, - "normalized_score": 30.592875323293935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.41483333333333333, - "normalized_score": 10.887499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3638630319148936, - "normalized_score": 29.318114657210398 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.299678580705405 - } - }, - { - "id": "SicariusSicariiStuff/Wingless_Imp_8B_bfloat16_5da96e0a37d80faaca421606a4e1c6b7e5cafd78_True", - "model": { - "name": "SicariusSicariiStuff/Wingless_Imp_8B", - "sha": "5da96e0a37d80faaca421606a4e1c6b7e5cafd78", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.911877977477257, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.743012983328679, - "normalized_score": 74.3012983328679 - }, - "bbh": { - "name": "BBH", - "value": 0.5120376322048542, - "normalized_score": 30.592875323293935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.41483333333333333, - "normalized_score": 10.887499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3638630319148936, - "normalized_score": 29.318114657210398 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "SicariusSicariiStuff/Wingless_Imp_8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 9, - "params_billions": 8.03, - "co2_cost": 1.2959258660149595 - } - }, - { - "id": "SicariusSicariiStuff/Zion_Alpha_float16_e52e1b6e98dce3a54d82f87f83920c0a3f189457_False", - "model": { - "name": "SicariusSicariiStuff/Zion_Alpha", - "sha": "e52e1b6e98dce3a54d82f87f83920c0a3f189457", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.186491401477962, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3324024698910003, - "normalized_score": 33.24024698910004 - }, - "bbh": { - "name": "BBH", - "value": 0.49321099934509743, - "normalized_score": 29.160501194115735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4726875, - "normalized_score": 18.452604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31316489361702127, - "normalized_score": 23.684988179669027 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-19", - "submission_date": "2024-10-18", - "generation": 0, - "base_model": "SicariusSicariiStuff/Zion_Alpha", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.180954638110283 - } - }, - { - "id": "SicariusSicariiStuff/dn_ep02_float16_ab9d5937cff45d0da251d6094cbf5a3cef4d42d8_False", - "model": { - "name": "SicariusSicariiStuff/dn_ep02", - "sha": "ab9d5937cff45d0da251d6094cbf5a3cef4d42d8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.284440475555808, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5064340394597445, - "normalized_score": 50.643403945974455 - }, - "bbh": { - "name": "BBH", - "value": 0.5266008759836228, - "normalized_score": 32.6437741461978 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1419939577039275, - "normalized_score": 14.19939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.43163541666666666, - "normalized_score": 12.187760416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39976728723404253, - "normalized_score": 33.30747635933806 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3874595609327391 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora_bfloat16_69db8a7ac983f08e280c2b4ed55d159abeea8719_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", - "sha": "69db8a7ac983f08e280c2b4ed55d159abeea8719", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.022312491744717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5058345190760515, - "normalized_score": 50.58345190760515 - }, - "bbh": { - "name": "BBH", - "value": 0.5088388495224864, - "normalized_score": 29.191619249410266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.3997916666666667, - "normalized_score": 8.507291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3777426861702128, - "normalized_score": 30.860298463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.34628480084584 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1_bfloat16_d275bea3b261da56fb8332afae6e670797caf6cb_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", - "sha": "d275bea3b261da56fb8332afae6e670797caf6cb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.022312491744717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5058345190760515, - "normalized_score": 50.58345190760515 - }, - "bbh": { - "name": "BBH", - "value": 0.5088388495224864, - "normalized_score": 29.191619249410266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.3997916666666667, - "normalized_score": 8.507291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3777426861702128, - "normalized_score": 30.860298463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.36420275822784 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3_float16_9d92423a35bafedfbfe5782bd69df1f2e3e8620e_True", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", - "sha": "9d92423a35bafedfbfe5782bd69df1f2e3e8620e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.679539302024469, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3247084402718121, - "normalized_score": 32.470844027181215 - }, - "bbh": { - "name": "BBH", - "value": 0.3166586087861201, - "normalized_score": 5.493123703086085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.33815625, - "normalized_score": 2.0695312500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12790890957446807, - "normalized_score": 3.100989952718674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3 (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7414391903365065 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5_bfloat16_7f6435abc4e61ee287b0d31b7d3e5654a2d8ec30_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", - "sha": "7f6435abc4e61ee287b0d31b7d3e5654a2d8ec30", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.134493116614278, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4359920566319587, - "normalized_score": 43.599205663195875 - }, - "bbh": { - "name": "BBH", - "value": 0.34060156188911545, - "normalized_score": 8.132119042373503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3471458333333333, - "normalized_score": 4.1265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19456449468085107, - "normalized_score": 10.507166075650117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5 (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7261055632337824 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3_bfloat16_3ceb1f21e9423b3069e75d51ab7e3ac3c5896c42_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", - "sha": "3ceb1f21e9423b3069e75d51ab7e3ac3c5896c42", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.134493116614278, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4359920566319587, - "normalized_score": 43.599205663195875 - }, - "bbh": { - "name": "BBH", - "value": 0.34060156188911545, - "normalized_score": 8.132119042373503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3471458333333333, - "normalized_score": 4.1265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19456449468085107, - "normalized_score": 10.507166075650117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.6957441095320627 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5_bfloat16_285b5f8c99d2bc8233288d95ab645f74e6dd95fd_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", - "sha": "285b5f8c99d2bc8233288d95ab645f74e6dd95fd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.804479423825526, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42467652495378927, - "normalized_score": 42.46765249537893 - }, - "bbh": { - "name": "BBH", - "value": 0.33968360414253995, - "normalized_score": 8.268548811534602 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.34584375, - "normalized_score": 3.963802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19456449468085107, - "normalized_score": 10.507166075650117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7083427535997535 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1_bfloat16_bee7f5b3a4bc739c24cee6a0f936470df2d58a56_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", - "sha": "bee7f5b3a4bc739c24cee6a0f936470df2d58a56", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.42021662043027, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5331121424487028, - "normalized_score": 53.31121424487027 - }, - "bbh": { - "name": "BBH", - "value": 0.4399628268031015, - "normalized_score": 20.806136629310426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.35222916666666665, - "normalized_score": 5.961979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30044880319148937, - "normalized_score": 22.272089243498815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.8412479453539257 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2_bfloat16_ff0f4aa3aee4535aaec8c4989014e1126d3dd36a_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", - "sha": "ff0f4aa3aee4535aaec8c4989014e1126d3dd36a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.42021662043027, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5331121424487028, - "normalized_score": 53.31121424487027 - }, - "bbh": { - "name": "BBH", - "value": 0.4399628268031015, - "normalized_score": 20.806136629310426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.35222916666666665, - "normalized_score": 5.961979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30044880319148937, - "normalized_score": 22.272089243498815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1445605695482872 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3_bfloat16_879c73ee9539aca6cabff3a3fc5a8b37108dbd15_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", - "sha": "879c73ee9539aca6cabff3a3fc5a8b37108dbd15", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.42021662043027, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5331121424487028, - "normalized_score": 53.31121424487027 - }, - "bbh": { - "name": "BBH", - "value": 0.4399628268031015, - "normalized_score": 20.806136629310426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.35222916666666665, - "normalized_score": 5.961979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30044880319148937, - "normalized_score": 22.272089243498815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1332529970386 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct_float16_3241e4efcc62259e56caa03f8b42c301edc9320a_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", - "sha": "3241e4efcc62259e56caa03f8b42c301edc9320a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.791201412428551, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3534100630770799, - "normalized_score": 35.34100630770799 - }, - "bbh": { - "name": "BBH", - "value": 0.4264821228336018, - "normalized_score": 19.150679030509476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.40236458333333336, - "normalized_score": 9.262239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28116688829787234, - "normalized_score": 20.12965425531915 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.4597928292182816 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000_bfloat16_4b2f6c40cc0b83c77d40805f23f300d90055641a_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", - "sha": "4b2f6c40cc0b83c77d40805f23f300d90055641a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.131333394810895, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38188672721711725, - "normalized_score": 38.188672721711725 - }, - "bbh": { - "name": "BBH", - "value": 0.5077962006048589, - "normalized_score": 31.32761158066677 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1865558912386707, - "normalized_score": 18.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.44360416666666663, - "normalized_score": 13.950520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3913730053191489, - "normalized_score": 32.374778368794324 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.286764843505913 - } - }, - { - "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000_bfloat16_af6741845310182a40e5f8e2882af5f23e3a9ffd_False", - "model": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", - "sha": "af6741845310182a40e5f8e2882af5f23e3a9ffd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.667117533839868, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3812373391490135, - "normalized_score": 38.12373391490135 - }, - "bbh": { - "name": "BBH", - "value": 0.5389864554242366, - "normalized_score": 34.9514354427427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4237916666666666, - "normalized_score": 10.907291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42378656914893614, - "normalized_score": 35.976285460992905 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2356579907364422 - } - }, - { - "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2_bfloat16_a92f2ec997c806de469ff287ef3b71982e886fc2_True", - "model": { - "name": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", - "sha": "a92f2ec997c806de469ff287ef3b71982e886fc2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForSequenceClassification", - "average_score": 34.66144843038861, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7807317916461656, - "normalized_score": 78.07317916461656 - }, - "bbh": { - "name": "BBH", - "value": 0.635960062329604, - "normalized_score": 48.159904249727155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.42314583333333333, - "normalized_score": 11.993229166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4103224734042553, - "normalized_score": 34.48027482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-12-27", - "generation": 2, - "base_model": "google/gemma-2-27b", - "hub_license": "", - "hub_hearts": 30, - "params_billions": 27.227, - "co2_cost": 8.857939726796388 - } - }, - { - "id": "Skywork/Skywork-o1-Open-Llama-3.1-8B_bfloat16_a41903315f39ebf1c08fdba0ef52758f7afe3682_True", - "model": { - "name": "Skywork/Skywork-o1-Open-Llama-3.1-8B", - "sha": "a41903315f39ebf1c08fdba0ef52758f7afe3682", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.752994700266516, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3518364605912313, - "normalized_score": 35.18364605912313 - }, - "bbh": { - "name": "BBH", - "value": 0.45159089701897237, - "normalized_score": 23.017598928072996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5211480362537765, - "normalized_score": 52.11480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.31564583333333335, - "normalized_score": 1.5223958333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20304188829787234, - "normalized_score": 11.449098699763592 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2025-01-01", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "other", - "hub_hearts": 110, - "params_billions": 8.03, - "co2_cost": 1.396378069950861 - } - }, - { - "id": "Solshine/Brimful-merged-replete_float16_01ce8c3df6edb87d31f0e9a9651cbcbc4d4823e8_True", - "model": { - "name": "Solshine/Brimful-merged-replete", - "sha": "01ce8c3df6edb87d31f0e9a9651cbcbc4d4823e8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.879827054900916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17605619755581856, - "normalized_score": 17.605619755581856 - }, - "bbh": { - "name": "BBH", - "value": 0.28834447696551024, - "normalized_score": 1.9921389967360958 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.342125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10846077127659574, - "normalized_score": 0.9400856973995264 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "Solshine/Brimful-merged-replete (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 12.277, - "co2_cost": 4.333446537317904 - } - }, - { - "id": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2_float16_d48047d6577e22fdda73a1be8e18971912db66d2_True", - "model": { - "name": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", - "sha": "d48047d6577e22fdda73a1be8e18971912db66d2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.928703193353647, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25466650709007654, - "normalized_score": 25.466650709007656 - }, - "bbh": { - "name": "BBH", - "value": 0.32093808427144627, - "normalized_score": 5.008442306492267 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.38894791666666667, - "normalized_score": 6.751822916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11851728723404255, - "normalized_score": 2.05747635933806 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 18.5, - "co2_cost": 6.762705392247006 - } - }, - { - "id": "Sorawiz/Gemma-9B-Base_bfloat16_f89db94ea783ddb1e365e7863cc015456dfc9f1d_True", - "model": { - "name": "Sorawiz/Gemma-9B-Base", - "sha": "f89db94ea783ddb1e365e7863cc015456dfc9f1d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.84248400200344, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16673758959560633, - "normalized_score": 16.673758959560633 - }, - "bbh": { - "name": "BBH", - "value": 0.593040577894583, - "normalized_score": 41.28135020153275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.40451041666666665, - "normalized_score": 9.363802083333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42353723404255317, - "normalized_score": 35.948581560283685 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-12", - "generation": 1, - "base_model": "Sorawiz/Gemma-9B-Base (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 2.064132863618642 - } - }, - { - "id": "Sorawiz/Gemma-Creative-9B-Base_bfloat16_aeb1b97a3ddad1fc8f7ee16692c09e7da528fcb1_True", - "model": { - "name": "Sorawiz/Gemma-Creative-9B-Base", - "sha": "aeb1b97a3ddad1fc8f7ee16692c09e7da528fcb1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 18.29960573109928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1515002415812267, - "normalized_score": 15.150024158122669 - }, - "bbh": { - "name": "BBH", - "value": 0.5458614335095562, - "normalized_score": 34.6224221959871 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.401875, - "normalized_score": 8.201041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4007646276595745, - "normalized_score": 33.41829196217494 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-12", - "generation": 1, - "base_model": "Sorawiz/Gemma-Creative-9B-Base (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 2.051281638551995 - } - }, - { - "id": "Sourjayon/DeepSeek-R1-8b-Sify_float16_5e9bb0d78129b9b5a8b91d0dacc55de23b8c21fe_True", - "model": { - "name": "Sourjayon/DeepSeek-R1-8b-Sify", - "sha": "5e9bb0d78129b9b5a8b91d0dacc55de23b8c21fe", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.313209574216847, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3679481553389451, - "normalized_score": 36.79481553389451 - }, - "bbh": { - "name": "BBH", - "value": 0.33793580116642347, - "normalized_score": 6.9268224957064595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24471299093655588, - "normalized_score": 24.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3303125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19805518617021275, - "normalized_score": 10.895020685579194 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-10", - "generation": 2, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.8653249275105314 - } - }, - { - "id": "Sourjayon/DeepSeek-R1-ForumNXT_float16_8c5fe80c0c72215522cd277878bfb97319ff845d_False", - "model": { - "name": "Sourjayon/DeepSeek-R1-ForumNXT", - "sha": "8c5fe80c0c72215522cd277878bfb97319ff845d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.959695622199478, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26028714920854445, - "normalized_score": 26.028714920854448 - }, - "bbh": { - "name": "BBH", - "value": 0.3310198487331462, - "normalized_score": 6.957542087542088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25755287009063443, - "normalized_score": 25.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3392395833333333, - "normalized_score": 2.5716145833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16481050531914893, - "normalized_score": 7.201167257683213 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-03", - "generation": 2, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.2067745961926486 - } - }, - { - "id": "SpaceYL/ECE_Poirot_bfloat16_601fc736a6b7f0cff96219cbd9a903070db37adb_False", - "model": { - "name": "SpaceYL/ECE_Poirot", - "sha": "601fc736a6b7f0cff96219cbd9a903070db37adb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.742560298806495, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3106956209524063, - "normalized_score": 31.069562095240627 - }, - "bbh": { - "name": "BBH", - "value": 0.42622349736626014, - "normalized_score": 18.616426052772713 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.40264583333333337, - "normalized_score": 8.330729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2883144946808511, - "normalized_score": 20.923832742316783 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 1, - "base_model": "SpaceYL/ECE_Poirot (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 1.544, - "co2_cost": 0.6143529019539091 - } - }, - { - "id": "Spestly/Athena-1-3B_float16_b2f9ab2db333f73c0adb8fa83837dbfb6cbd6204_True", - "model": { - "name": "Spestly/Athena-1-3B", - "sha": "b2f9ab2db333f73c0adb8fa83837dbfb6cbd6204", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.48204682938236, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5569167586448401, - "normalized_score": 55.69167586448401 - }, - "bbh": { - "name": "BBH", - "value": 0.47015477265388084, - "normalized_score": 26.308869593300415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23791540785498488, - "normalized_score": 23.791540785498487 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.43622916666666667, - "normalized_score": 13.2953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35189494680851063, - "normalized_score": 27.988327423167853 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2025-01-28", - "generation": 2, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 1.4893610158375592 - } - }, - { - "id": "Spestly/Atlas-Pro-1.5B-Preview_float16_4fce245a33bec99c00548878787413c2dafec0b7_False", - "model": { - "name": "Spestly/Atlas-Pro-1.5B-Preview", - "sha": "4fce245a33bec99c00548878787413c2dafec0b7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.9538569769866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2429509257658568, - "normalized_score": 24.29509257658568 - }, - "bbh": { - "name": "BBH", - "value": 0.349893585329524, - "normalized_score": 9.07740771263535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31948640483383683, - "normalized_score": 31.948640483383684 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3354270833333333, - "normalized_score": 1.8617187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1924867021276596, - "normalized_score": 10.27630023640662 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 2, - "base_model": "Spestly/Atlas-R1-1.5B-Preview (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 1.1789415089573965 - } - }, - { - "id": "Spestly/Atlas-Pro-7B-Preview_float16_3c693093b74675bebc507a0b92bb45e2bd0ee177_False", - "model": { - "name": "Spestly/Atlas-Pro-7B-Preview", - "sha": "3c693093b74675bebc507a0b92bb45e2bd0ee177", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.63755262041268, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31541642840995227, - "normalized_score": 31.541642840995227 - }, - "bbh": { - "name": "BBH", - "value": 0.46679203304308553, - "normalized_score": 25.274194951351465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5083081570996979, - "normalized_score": 50.83081570996979 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.3910833333333333, - "normalized_score": 6.6520833333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2970412234042553, - "normalized_score": 21.89346926713948 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 2, - "base_model": "Spestly/Atlas-R1-7B-Preview (Merge)", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 1.385866069632902 - } - }, - { - "id": "Stark2008/GutenLaserPi_bfloat16_d5ab84c6f8f0c88c16380242c7e11e8cefc934b7_False", - "model": { - "name": "Stark2008/GutenLaserPi", - "sha": "d5ab84c6f8f0c88c16380242c7e11e8cefc934b7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.400724622957522, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42265300513747966, - "normalized_score": 42.26530051374797 - }, - "bbh": { - "name": "BBH", - "value": 0.5212342482489518, - "normalized_score": 32.97771006701662 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4620208333333333, - "normalized_score": 16.985937499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31058843085106386, - "normalized_score": 23.39871453900709 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-11", - "submission_date": "2024-07-11", - "generation": 1, - "base_model": "Stark2008/GutenLaserPi (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1399303674874355 - } - }, - { - "id": "Stark2008/LayleleFlamPi_bfloat16_b2897d17a65dea17383f52711475c8b41567c5d0_False", - "model": { - "name": "Stark2008/LayleleFlamPi", - "sha": "b2897d17a65dea17383f52711475c8b41567c5d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.871096216576518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42842325030917966, - "normalized_score": 42.84232503091796 - }, - "bbh": { - "name": "BBH", - "value": 0.5115654142581095, - "normalized_score": 31.20740955947399 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.46084375, - "normalized_score": 16.57213541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3093417553191489, - "normalized_score": 23.26019503546099 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-12", - "submission_date": "2024-07-12", - "generation": 1, - "base_model": "Stark2008/LayleleFlamPi (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2593159866249992 - } - }, - { - "id": "Stark2008/VisFlamCat_float16_290efa41ac83b8408cab084d093bcd9ae9abb0c9_False", - "model": { - "name": "Stark2008/VisFlamCat", - "sha": "290efa41ac83b8408cab084d093bcd9ae9abb0c9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.340907115633893, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43659157701565177, - "normalized_score": 43.65915770156518 - }, - "bbh": { - "name": "BBH", - "value": 0.5216957865099948, - "normalized_score": 32.881396834037055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.44627083333333334, - "normalized_score": 14.68385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31441156914893614, - "normalized_score": 23.823507683215126 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-12", - "submission_date": "2024-07-12", - "generation": 1, - "base_model": "Stark2008/VisFlamCat (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2204318533152774 - } - }, - { - "id": "Steelskull/L3.3-MS-Nevoria-70b_bfloat16_6271121beeac444db45ef12ce7c52215604463c3_False", - "model": { - "name": "Steelskull/L3.3-MS-Nevoria-70b", - "sha": "6271121beeac444db45ef12ce7c52215604463c3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 44.041818757700526, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6963268571833845, - "normalized_score": 69.63268571833845 - }, - "bbh": { - "name": "BBH", - "value": 0.6997536580025828, - "normalized_score": 56.60264873723427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3957703927492447, - "normalized_score": 39.577039274924466 - }, - "gpqa": { - "name": "GPQA", - "value": 0.47063758389261745, - "normalized_score": 29.418344519015662 - }, - "musr": { - "name": "MUSR", - "value": 0.4682291666666667, - "normalized_score": 18.628645833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5535239361702128, - "normalized_score": 50.39154846335697 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "Steelskull/L3.3-MS-Nevoria-70b (Merge)", - "hub_license": "other", - "hub_hearts": 73, - "params_billions": 70.554, - "co2_cost": 39.22482966970595 - } - }, - { - "id": "Steelskull/L3.3-Nevoria-R1-70b_bfloat16_cdcb10280e4c652127eb3d3af61125fc7f731fdd_True", - "model": { - "name": "Steelskull/L3.3-Nevoria-R1-70b", - "sha": "cdcb10280e4c652127eb3d3af61125fc7f731fdd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.61308347965567, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6023794642659255, - "normalized_score": 60.23794642659256 - }, - "bbh": { - "name": "BBH", - "value": 0.6971668662651651, - "normalized_score": 56.167288330479515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46299093655589124, - "normalized_score": 46.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.46895973154362414, - "normalized_score": 29.194630872483216 - }, - "musr": { - "name": "MUSR", - "value": 0.47753125, - "normalized_score": 20.191406249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5462932180851063, - "normalized_score": 49.5881353427896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Steelskull/L3.3-Nevoria-R1-70b (Merge)", - "hub_license": "other", - "hub_hearts": 70, - "params_billions": 70.554, - "co2_cost": 39.62696233736567 - } - }, - { - "id": "StelleX/Qwen2.5_Math_7B_Cot_bfloat16_1549288a296c6e44cfcf4b9513769000bc768e36_False", - "model": { - "name": "StelleX/Qwen2.5_Math_7B_Cot", - "sha": "1549288a296c6e44cfcf4b9513769000bc768e36", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.801855998042843, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2142747908881767, - "normalized_score": 21.42747908881767 - }, - "bbh": { - "name": "BBH", - "value": 0.4312922433417096, - "normalized_score": 19.796911486609314 - }, - "math": { - "name": "MATH Level 5", - "value": 0.32628398791540786, - "normalized_score": 32.62839879154079 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.39241666666666664, - "normalized_score": 6.918750000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.281000664893617, - "normalized_score": 20.111184988179666 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.0559838993158137 - } - }, - { - "id": "StelleX/Vorisatex-7B-preview_bfloat16_57612bb8af75e5e8d75b4df3dde993fdc48efbea_False", - "model": { - "name": "StelleX/Vorisatex-7B-preview", - "sha": "57612bb8af75e5e8d75b4df3dde993fdc48efbea", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.954613231174487, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1515013497519914, - "normalized_score": 15.150134975199137 - }, - "bbh": { - "name": "BBH", - "value": 0.3111695757290421, - "normalized_score": 4.133712426973548 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.41923958333333333, - "normalized_score": 11.504947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 2.4981429762342664 - } - }, - { - "id": "SultanR/SmolTulu-1.7b-Instruct_bfloat16_11ed78c7c7a2e7f3c73c8f6f36c010f6dcba3245_True", - "model": { - "name": "SultanR/SmolTulu-1.7b-Instruct", - "sha": "11ed78c7c7a2e7f3c73c8f6f36c010f6dcba3245", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.331009858160883, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6540867121459949, - "normalized_score": 65.4086712145995 - }, - "bbh": { - "name": "BBH", - "value": 0.3713086260572204, - "normalized_score": 12.25982971199621 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.35403125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17104388297872342, - "normalized_score": 7.893764775413713 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 1, - "base_model": "SultanR/SmolTulu-1.7b-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 1.711, - "co2_cost": 0.614783493029297 - } - }, - { - "id": "SultanR/SmolTulu-1.7b-Reinforced_bfloat16_530b6c0c63a3513fd012e218ad53d64b75d1b259_True", - "model": { - "name": "SultanR/SmolTulu-1.7b-Reinforced", - "sha": "530b6c0c63a3513fd012e218ad53d64b75d1b259", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.5748337360141, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6790659893526954, - "normalized_score": 67.90659893526953 - }, - "bbh": { - "name": "BBH", - "value": 0.3551868188444029, - "normalized_score": 10.015214516485557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.34060416666666665, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17627992021276595, - "normalized_score": 8.475546690307327 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "SultanR/SmolTulu-1.7b-Reinforced (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 1.711, - "co2_cost": 0.5792320587665617 - } - }, - { - "id": "SultanR/SmolTulu-1.7b-it-v0_bfloat16_75369e5c868ba261ea13f7bf85987ac1fe7ceb72_True", - "model": { - "name": "SultanR/SmolTulu-1.7b-it-v0", - "sha": "75369e5c868ba261ea13f7bf85987ac1fe7ceb72", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.331009858160883, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6540867121459949, - "normalized_score": 65.4086712145995 - }, - "bbh": { - "name": "BBH", - "value": 0.3713086260572204, - "normalized_score": 12.25982971199621 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.35403125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17104388297872342, - "normalized_score": 7.893764775413713 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 1, - "base_model": "SultanR/SmolTulu-1.7b-it-v0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 1.711, - "co2_cost": 0.612902656162427 - } - }, - { - "id": "Supichi/BBA-123_bfloat16_7551128748cbe65e49192e9551217e70bb00574d_False", - "model": { - "name": "Supichi/BBA-123", - "sha": "7551128748cbe65e49192e9551217e70bb00574d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.7970062398113145, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2079548930171944, - "normalized_score": 20.79548930171944 - }, - "bbh": { - "name": "BBH", - "value": 0.2920111436321769, - "normalized_score": 2.218337253816706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.34990625000000003, - "normalized_score": 2.5716145833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11668882978723404, - "normalized_score": 1.854314420803781 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Supichi/BBA-123 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 17.161, - "co2_cost": 1.4063001393023913 - } - }, - { - "id": "Supichi/BBA99_bfloat16_5b6e65ee5eb1c8bd0923108dc1929269f0d5b4bc_False", - "model": { - "name": "Supichi/BBA99", - "sha": "5b6e65ee5eb1c8bd0923108dc1929269f0d5b4bc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.5504559517392935, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14066011516110588, - "normalized_score": 14.066011516110587 - }, - "bbh": { - "name": "BBH", - "value": 0.2768958340020912, - "normalized_score": 1.3050510001980589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.32184375, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11120345744680851, - "normalized_score": 1.2448286052009452 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Supichi/BBA99 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 17.161, - "co2_cost": 1.4096340914337329 - } - }, - { - "id": "Supichi/BBAIK29_bfloat16_6a8f2407d1f64205d5104126fb20d57377510cc8_False", - "model": { - "name": "Supichi/BBAIK29", - "sha": "6a8f2407d1f64205d5104126fb20d57377510cc8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.240917048589704, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45884807865352817, - "normalized_score": 45.88480786535282 - }, - "bbh": { - "name": "BBH", - "value": 0.5589641249478369, - "normalized_score": 36.96354856971481 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3678247734138973, - "normalized_score": 36.78247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.45008333333333334, - "normalized_score": 14.993750000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4468916223404255, - "normalized_score": 38.54351359338061 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Supichi/BBAIK29 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6412984065814576 - } - }, - { - "id": "Supichi/BBAI_135_Gemma_bfloat16_487cc6e1636bc7eda7c9ba19cd066890144397cf_False", - "model": { - "name": "Supichi/BBAI_135_Gemma", - "sha": "487cc6e1636bc7eda7c9ba19cd066890144397cf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 5.349663435184209, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.06562144000141845, - "normalized_score": 6.562144000141846 - }, - "bbh": { - "name": "BBH", - "value": 0.35684129093449685, - "normalized_score": 10.857975857833802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.38047916666666665, - "normalized_score": 4.859895833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16722074468085107, - "normalized_score": 7.468971631205674 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Supichi/BBAI_135_Gemma (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 19.3, - "co2_cost": 3.2168864661964816 - } - }, - { - "id": "Supichi/BBAI_250_Xia0_gZ_bfloat16_ae7d06f7e08b50df13d09541b4bbe08425d857ca_False", - "model": { - "name": "Supichi/BBAI_250_Xia0_gZ", - "sha": "ae7d06f7e08b50df13d09541b4bbe08425d857ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.643171147465264, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4685401401614383, - "normalized_score": 46.854014016143836 - }, - "bbh": { - "name": "BBH", - "value": 0.5567682997527722, - "normalized_score": 36.654123211962506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3640483383685801, - "normalized_score": 36.40483383685801 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4579270833333333, - "normalized_score": 15.940885416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4464760638297872, - "normalized_score": 38.49734042553191 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Supichi/BBAI_250_Xia0_gZ (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6098356306802665 - } - }, - { - "id": "Supichi/BBAI_275_Tsunami_gZ_bfloat16_1373772684b0ec67425d782107d4b13ea1bcc2c1_False", - "model": { - "name": "Supichi/BBAI_275_Tsunami_gZ", - "sha": "1373772684b0ec67425d782107d4b13ea1bcc2c1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.835431920268267, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5369586031729146, - "normalized_score": 53.695860317291455 - }, - "bbh": { - "name": "BBH", - "value": 0.5531259476127334, - "normalized_score": 36.25417674909635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3285498489425982, - "normalized_score": 32.85498489425982 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.44478124999999996, - "normalized_score": 13.897656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44921875, - "normalized_score": 38.802083333333336 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Supichi/BBAI_275_Tsunami_gZ (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6195082794233058 - } - }, - { - "id": "Supichi/BBAI_525_Tsu_gZ_Xia0_bfloat16_5338ec1d5ab109af8985ade45ddcd447fca07ce5_False", - "model": { - "name": "Supichi/BBAI_525_Tsu_gZ_Xia0", - "sha": "5338ec1d5ab109af8985ade45ddcd447fca07ce5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.93538839809629, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5338612658856279, - "normalized_score": 53.386126588562796 - }, - "bbh": { - "name": "BBH", - "value": 0.5561933633430705, - "normalized_score": 36.525252487756354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3429003021148036, - "normalized_score": 34.29003021148036 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.44744791666666667, - "normalized_score": 14.49765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44772273936170215, - "normalized_score": 38.63585992907802 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Supichi/BBAI_525_Tsu_gZ_Xia0 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6500205335836085 - } - }, - { - "id": "Supichi/BBAI_78B_Calme_3_1_Ties_bfloat16_d07c51d1296700d65a39301e568b03058e4ee2ca_False", - "model": { - "name": "Supichi/BBAI_78B_Calme_3_1_Ties", - "sha": "d07c51d1296700d65a39301e568b03058e4ee2ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.9420514629428283, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18280052482967415, - "normalized_score": 18.280052482967417 - }, - "bbh": { - "name": "BBH", - "value": 0.28281264175951776, - "normalized_score": 1.530417863838492 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22902684563758388, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.30996874999999996, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11436170212765957, - "normalized_score": 1.595744680851063 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "Supichi/BBAI_78B_Calme_3_1_Ties (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 27.06, - "co2_cost": 2.3646634423965374 - } - }, - { - "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B_bfloat16_f8ada5e5d4986818bfd70e99ef7ee7f1a9e8bff5_False", - "model": { - "name": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", - "sha": "f8ada5e5d4986818bfd70e99ef7ee7f1a9e8bff5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.277816309763046, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18145188100905596, - "normalized_score": 18.1451881009056 - }, - "bbh": { - "name": "BBH", - "value": 0.22972580681005383, - "normalized_score": 3.0068974054268174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23154362416107382, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3445416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11602393617021277, - "normalized_score": 1.7804373522458627 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.366, - "co2_cost": 0.9180784406772212 - } - }, - { - "id": "Supichi/HF_TOKEN_bfloat16_a2d007a56354d9bd21cabda7615b65cb3955e7da_False", - "model": { - "name": "Supichi/HF_TOKEN", - "sha": "a2d007a56354d9bd21cabda7615b65cb3955e7da", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.4876527968358464, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1379872072766925, - "normalized_score": 13.798720727669249 - }, - "bbh": { - "name": "BBH", - "value": 0.2763924734767205, - "normalized_score": 1.1476980590215882 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.32717708333333334, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11095412234042554, - "normalized_score": 1.2171247044917257 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Supichi/HF_TOKEN (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 17.161, - "co2_cost": 1.4168891762749531 - } - }, - { - "id": "Supichi/NJS26_bfloat16_cbc12277c5f471fcf15fe4078dd2715d5baa972f_False", - "model": { - "name": "Supichi/NJS26", - "sha": "cbc12277c5f471fcf15fe4078dd2715d5baa972f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.996375840759782, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04481331755298164, - "normalized_score": 4.481331755298164 - }, - "bbh": { - "name": "BBH", - "value": 0.4780152929488641, - "normalized_score": 26.847431929671945 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.38540625, - "normalized_score": 5.709114583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3036901595744681, - "normalized_score": 22.632239952718678 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Supichi/NJS26 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.41695909593484287 - } - }, - { - "id": "Svak/MN-12B-Inferor-v0.0_float16_ab9efd0cc19b862ea1ab37a60dacac78aa022ad1_True", - "model": { - "name": "Svak/MN-12B-Inferor-v0.0", - "sha": "ab9efd0cc19b862ea1ab37a60dacac78aa022ad1", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.41093424607308, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5707555951541909, - "normalized_score": 57.07555951541909 - }, - "bbh": { - "name": "BBH", - "value": 0.5195010930589931, - "normalized_score": 30.846426792178836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.46388541666666666, - "normalized_score": 18.085677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3558843085106383, - "normalized_score": 28.431589834515364 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Svak/MN-12B-Inferor-v0.0 (Merge)", - "hub_license": "", - "hub_hearts": 10, - "params_billions": 12.248, - "co2_cost": 2.494259584397854 - } - }, - { - "id": "Svak/MN-12B-Inferor-v0.1_float16_2d8cfac16dac3151d5e8e5ecd62866ca83c5149a_True", - "model": { - "name": "Svak/MN-12B-Inferor-v0.1", - "sha": "2d8cfac16dac3151d5e8e5ecd62866ca83c5149a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.937535331905043, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6346527214457639, - "normalized_score": 63.46527214457639 - }, - "bbh": { - "name": "BBH", - "value": 0.5146762089838804, - "normalized_score": 30.850764971038174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.4350833333333333, - "normalized_score": 15.052083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3661901595744681, - "normalized_score": 29.57668439716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "Svak/MN-12B-Inferor-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 1.990764986078632 - } - }, - { - "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo_bfloat16_2896ef357be81fd433c17801d76ce148e60a7032_True", - "model": { - "name": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", - "sha": "2896ef357be81fd433c17801d76ce148e60a7032", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 27.216374484698875, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5714049832222946, - "normalized_score": 57.14049832222946 - }, - "bbh": { - "name": "BBH", - "value": 0.5681534123661078, - "normalized_score": 39.148157776889455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15709969788519637, - "normalized_score": 15.709969788519636 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3963541666666666, - "normalized_score": 8.777604166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38605385638297873, - "normalized_score": 31.783761820330973 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 3.821, - "co2_cost": 2.3284766238434114 - } - }, - { - "id": "T145/KRONOS-8B-V1-P1_bfloat16_f39b904870a1b8b922e650214819ab10a3028d0f_True", - "model": { - "name": "T145/KRONOS-8B-V1-P1", - "sha": "f39b904870a1b8b922e650214819ab10a3028d0f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.907659501595617, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7849783020164276, - "normalized_score": 78.49783020164276 - }, - "bbh": { - "name": "BBH", - "value": 0.508544756293663, - "normalized_score": 29.9732800557004 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.3881041666666667, - "normalized_score": 8.479687500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3759973404255319, - "normalized_score": 30.666371158392437 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "T145/KRONOS-8B-V1-P1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4941161225327542 - } - }, - { - "id": "T145/KRONOS-8B-V1-P2_bfloat16_7d3caeb1c7d1a8cf55e124a6df4ade74f8d89a0e_True", - "model": { - "name": "T145/KRONOS-8B-V1-P2", - "sha": "7d3caeb1c7d1a8cf55e124a6df4ade74f8d89a0e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.505318597003576, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6724213974476612, - "normalized_score": 67.2421397447661 - }, - "bbh": { - "name": "BBH", - "value": 0.47717566218002166, - "normalized_score": 25.86433596064097 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3567604166666667, - "normalized_score": 5.0617187500000025 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3453291223404255, - "normalized_score": 27.258791371158388 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "T145/KRONOS-8B-V1-P2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3284026293633946 - } - }, - { - "id": "T145/KRONOS-8B-V1-P3_bfloat16_5ab222b73cd2291a1ef2499aa60d3ca786d119d5_True", - "model": { - "name": "T145/KRONOS-8B-V1-P3", - "sha": "5ab222b73cd2291a1ef2499aa60d3ca786d119d5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.821837809885533, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7137373280673058, - "normalized_score": 71.37373280673059 - }, - "bbh": { - "name": "BBH", - "value": 0.5127875870036823, - "normalized_score": 30.270031651490015 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3615625, - "normalized_score": 5.961979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34050864361702127, - "normalized_score": 26.723182624113473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "T145/KRONOS-8B-V1-P3 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.492216135585029 - } - }, - { - "id": "T145/KRONOS-8B-V2_bfloat16_8a004e1e51aa24574ba961613fe9698df30bd9a0_True", - "model": { - "name": "T145/KRONOS-8B-V2", - "sha": "8a004e1e51aa24574ba961613fe9698df30bd9a0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.04979364610371, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5180243974875552, - "normalized_score": 51.802439748755525 - }, - "bbh": { - "name": "BBH", - "value": 0.513268555595521, - "normalized_score": 30.67490713784655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22658610271903323, - "normalized_score": 22.658610271903324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.38286458333333334, - "normalized_score": 8.258072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3737533244680851, - "normalized_score": 30.417036052009454 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "T145/KRONOS-8B-V2 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3699907312036532 - } - }, - { - "id": "T145/KRONOS-8B-V3_bfloat16_75b0ff936de5caa98a6b9680bafeeb92d4b9abaa_True", - "model": { - "name": "T145/KRONOS-8B-V3", - "sha": "75b0ff936de5caa98a6b9680bafeeb92d4b9abaa", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.7368028097919, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5474751437297483, - "normalized_score": 54.74751437297482 - }, - "bbh": { - "name": "BBH", - "value": 0.511865544689898, - "normalized_score": 30.291098984835486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2598187311178248, - "normalized_score": 25.981873111782477 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3922291666666667, - "normalized_score": 7.8286458333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3738364361702128, - "normalized_score": 30.426270685579198 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "T145/KRONOS-8B-V3 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4015645267678678 - } - }, - { - "id": "T145/KRONOS-8B-V4_bfloat16_f9faad008b866745fd60755e558f7a06d3a59da4_True", - "model": { - "name": "T145/KRONOS-8B-V4", - "sha": "f9faad008b866745fd60755e558f7a06d3a59da4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.7502883448152, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889499860370484, - "normalized_score": 78.89499860370483 - }, - "bbh": { - "name": "BBH", - "value": 0.5092470034846742, - "normalized_score": 30.14061947211616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19486404833836857, - "normalized_score": 19.486404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.38295833333333335, - "normalized_score": 7.76979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37857380319148937, - "normalized_score": 30.952644799054376 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "T145/KRONOS-8B-V4 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3905189302327912 - } - }, - { - "id": "T145/KRONOS-8B-V5_bfloat16_67a69b38a382a7cebe2d8d7b52aeafab6ff89a29_True", - "model": { - "name": "T145/KRONOS-8B-V5", - "sha": "67a69b38a382a7cebe2d8d7b52aeafab6ff89a29", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.264834996106643, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5405058577906621, - "normalized_score": 54.05058577906621 - }, - "bbh": { - "name": "BBH", - "value": 0.5088651598969166, - "normalized_score": 30.17368221664718 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2688821752265861, - "normalized_score": 26.888217522658607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.40546875, - "normalized_score": 10.450260416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37591422872340424, - "normalized_score": 30.657136524822686 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "T145/KRONOS-8B-V5 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3808181462013638 - } - }, - { - "id": "T145/KRONOS-8B-V6_bfloat16_7afd2483e81c58ad3865a9cac6f2e66afe1d1f78_True", - "model": { - "name": "T145/KRONOS-8B-V6", - "sha": "7afd2483e81c58ad3865a9cac6f2e66afe1d1f78", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.898039972832333, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7022467054083166, - "normalized_score": 70.22467054083165 - }, - "bbh": { - "name": "BBH", - "value": 0.5033606149499412, - "normalized_score": 29.659286418525568 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2598187311178248, - "normalized_score": 25.981873111782477 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.41210416666666666, - "normalized_score": 9.813020833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3501496010638298, - "normalized_score": 27.794400118203306 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "T145/KRONOS-8B-V6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4396566160893274 - } - }, - { - "id": "T145/KRONOS-8B-V7_bfloat16_422458ab11c4a8bb502fd8681551f9b54d7e6162_True", - "model": { - "name": "T145/KRONOS-8B-V7", - "sha": "422458ab11c4a8bb502fd8681551f9b54d7e6162", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.899831252721421, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3529102780622083, - "normalized_score": 35.291027806220825 - }, - "bbh": { - "name": "BBH", - "value": 0.4526219443939161, - "normalized_score": 23.89017313885566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.36711458333333336, - "normalized_score": 4.02265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2696974734042553, - "normalized_score": 18.855274822695034 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.4541474228859679 - } - }, - { - "id": "T145/KRONOS-8B-V8_bfloat16_1085c73a0b9bea22cc0dd85cf2745c62387949d9_True", - "model": { - "name": "T145/KRONOS-8B-V8", - "sha": "1085c73a0b9bea22cc0dd85cf2745c62387949d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.793303577270123, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7770349339751859, - "normalized_score": 77.70349339751859 - }, - "bbh": { - "name": "BBH", - "value": 0.5094406613555632, - "normalized_score": 30.053093995528446 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.3868958333333334, - "normalized_score": 8.36197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37824135638297873, - "normalized_score": 30.915706264775416 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "T145/KRONOS-8B-V8 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4554374939342578 - } - }, - { - "id": "T145/KRONOS-8B-V9_bfloat16_46861313565195fbc0edca6396c3c214b308baa1_True", - "model": { - "name": "T145/KRONOS-8B-V9", - "sha": "46861313565195fbc0edca6396c3c214b308baa1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.92215319526741, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7855778224001206, - "normalized_score": 78.55778224001206 - }, - "bbh": { - "name": "BBH", - "value": 0.5099211908307056, - "normalized_score": 30.06801140755337 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1986404833836858, - "normalized_score": 19.86404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.3868020833333334, - "normalized_score": 8.316927083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3751662234042553, - "normalized_score": 30.57402482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "T145/KRONOS-8B-V9 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4379859323130169 - } - }, - { - "id": "T145/Llama-3.1-8B-Instruct-Zeus_bfloat16_672f9f1d4256f999b4513061c5406f60bfda2949_True", - "model": { - "name": "T145/Llama-3.1-8B-Instruct-Zeus", - "sha": "672f9f1d4256f999b4513061c5406f60bfda2949", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.649994293530643, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7941207108250552, - "normalized_score": 79.41207108250552 - }, - "bbh": { - "name": "BBH", - "value": 0.5173982439996302, - "normalized_score": 31.388990728478273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19561933534743203, - "normalized_score": 19.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.39762499999999995, - "normalized_score": 8.569791666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38929521276595747, - "normalized_score": 32.14391252955083 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "T145/Llama-3.1-8B-Instruct-Zeus (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.389369408893887 - } - }, - { - "id": "T145/Llama-3.1-8B-Zeus_bfloat16_712ff76aa966b0a9c5c65a074b2eb2b2cb56de86_True", - "model": { - "name": "T145/Llama-3.1-8B-Zeus", - "sha": "712ff76aa966b0a9c5c65a074b2eb2b2cb56de86", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.076440481991643, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35176110497923285, - "normalized_score": 35.17611049792329 - }, - "bbh": { - "name": "BBH", - "value": 0.3671175348446849, - "normalized_score": 10.560807756832054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.33158333333333334, - "normalized_score": 1.58125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1332280585106383, - "normalized_score": 3.6920065011820316 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4634354982438125 - } - }, - { - "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES_bfloat16_62a8c4f6e02a2e18f79688877fa925dcac8096aa_True", - "model": { - "name": "T145/Meta-Llama-3.1-8B-Instruct-TIES", - "sha": "62a8c4f6e02a2e18f79688877fa925dcac8096aa", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.976590554171448, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5423542866261519, - "normalized_score": 54.23542866261519 - }, - "bbh": { - "name": "BBH", - "value": 0.5070111385564763, - "normalized_score": 29.7742634799406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3842916666666667, - "normalized_score": 8.036458333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37799202127659576, - "normalized_score": 30.888002364066192 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "T145/Meta-Llama-3.1-8B-Instruct-TIES (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3694240258694423 - } - }, - { - "id": "T145/ZEUS-8B-V10_bfloat16_94b750b9de63df6391bb42d304a3dabea259b178_True", - "model": { - "name": "T145/ZEUS-8B-V10", - "sha": "94b750b9de63df6391bb42d304a3dabea259b178", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.36969062022236, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7706651684197928, - "normalized_score": 77.06651684197928 - }, - "bbh": { - "name": "BBH", - "value": 0.5269758270442659, - "normalized_score": 32.69504767226082 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.38978124999999997, - "normalized_score": 9.089322916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.390375664893617, - "normalized_score": 32.263962765957444 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-24", - "generation": 1, - "base_model": "T145/ZEUS-8B-V10 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3669331779670844 - } - }, - { - "id": "T145/ZEUS-8B-V11_bfloat16_407c0bd5c2de36aee4a7c3ec769f82705616fcf2_True", - "model": { - "name": "T145/ZEUS-8B-V11", - "sha": "407c0bd5c2de36aee4a7c3ec769f82705616fcf2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.941073374653865, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8099575792231279, - "normalized_score": 80.99575792231279 - }, - "bbh": { - "name": "BBH", - "value": 0.5161982586505715, - "normalized_score": 31.20791313817828 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19637462235649547, - "normalized_score": 19.637462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.38066666666666665, - "normalized_score": 7.149999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38838098404255317, - "normalized_score": 32.042331560283685 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "T145/ZEUS-8B-V11 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.17199566710153 - } - }, - { - "id": "T145/ZEUS-8B-V12_bfloat16_32ee6e7da83b1fac23e2d931279c3c4adb6d9718_True", - "model": { - "name": "T145/ZEUS-8B-V12", - "sha": "32ee6e7da83b1fac23e2d931279c3c4adb6d9718", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.333680488372995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.781556270695089, - "normalized_score": 78.15562706950891 - }, - "bbh": { - "name": "BBH", - "value": 0.5253912026310238, - "normalized_score": 32.44900193426873 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.38584375, - "normalized_score": 8.497135416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3912067819148936, - "normalized_score": 32.35630910165484 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "T145/ZEUS-8B-V12 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.7418777518816637 - } - }, - { - "id": "T145/ZEUS-8B-V13_bfloat16_48a73e233cf8736313b616ca0e87b26841318f4e_True", - "model": { - "name": "T145/ZEUS-8B-V13", - "sha": "48a73e233cf8736313b616ca0e87b26841318f4e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.621361680379156, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7904238531540756, - "normalized_score": 79.04238531540756 - }, - "bbh": { - "name": "BBH", - "value": 0.5277128851736589, - "normalized_score": 32.72745832819285 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21374622356495468, - "normalized_score": 21.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.38447916666666665, - "normalized_score": 8.393229166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39112367021276595, - "normalized_score": 32.347074468085104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "T145/ZEUS-8B-V13 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3517701951939556 - } - }, - { - "id": "T145/ZEUS-8B-V13-abliterated_bfloat16_1edf9d72638d57bbe697717344391355cb610781_True", - "model": { - "name": "T145/ZEUS-8B-V13-abliterated", - "sha": "1edf9d72638d57bbe697717344391355cb610781", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.488667624353806, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7877509452696623, - "normalized_score": 78.77509452696623 - }, - "bbh": { - "name": "BBH", - "value": 0.5197597316957202, - "normalized_score": 31.784785148944987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17900302114803626, - "normalized_score": 17.900302114803626 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.3871458333333333, - "normalized_score": 8.393229166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38721742021276595, - "normalized_score": 31.91304669030733 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "T145/ZEUS-8B-V13-abliterated (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.36719859751768 - } - }, - { - "id": "T145/ZEUS-8B-V14_bfloat16_d5a831c4923e9effe2f64426f6066c66eec4569c_True", - "model": { - "name": "T145/ZEUS-8B-V14", - "sha": "d5a831c4923e9effe2f64426f6066c66eec4569c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.191102257235432, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.770939994769434, - "normalized_score": 77.0939994769434 - }, - "bbh": { - "name": "BBH", - "value": 0.5274593322517976, - "normalized_score": 32.693445971056676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21299093655589124, - "normalized_score": 21.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.3844479166666666, - "normalized_score": 8.289322916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3913730053191489, - "normalized_score": 32.374778368794324 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "T145/ZEUS-8B-V14 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3481792126144727 - } - }, - { - "id": "T145/ZEUS-8B-V15_bfloat16_3d83f7ec7ddc41d81c4bb7859420f377427d5367_True", - "model": { - "name": "T145/ZEUS-8B-V15", - "sha": "3d83f7ec7ddc41d81c4bb7859420f377427d5367", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.370031092616262, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.701272623306161, - "normalized_score": 70.1272623306161 - }, - "bbh": { - "name": "BBH", - "value": 0.5537552380544757, - "normalized_score": 36.18160301722815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23036253776435045, - "normalized_score": 23.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.40199999999999997, - "normalized_score": 9.416666666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40591755319148937, - "normalized_score": 33.99083924349882 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.3892912358486078 - } - }, - { - "id": "T145/ZEUS-8B-V16_bfloat16_497a1e669fd64ebb576149bfc55aa826383daaff_True", - "model": { - "name": "T145/ZEUS-8B-V16", - "sha": "497a1e669fd64ebb576149bfc55aa826383daaff", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.579931119756704, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7925471083392066, - "normalized_score": 79.25471083392065 - }, - "bbh": { - "name": "BBH", - "value": 0.5265817990313368, - "normalized_score": 32.53218336552364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.3950833333333333, - "normalized_score": 9.518749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39261968085106386, - "normalized_score": 32.513297872340424 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "T145/ZEUS-8B-V16 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.331310957109588 - } - }, - { - "id": "T145/ZEUS-8B-V17_bfloat16_c8420f0ef12a0c42f5e1cb62adbb2bf403d4c77f_True", - "model": { - "name": "T145/ZEUS-8B-V17", - "sha": "c8420f0ef12a0c42f5e1cb62adbb2bf403d4c77f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.006563519372005, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7940708431406447, - "normalized_score": 79.40708431406446 - }, - "bbh": { - "name": "BBH", - "value": 0.525086643033107, - "normalized_score": 32.33848329071727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.40162499999999995, - "normalized_score": 9.63645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39345079787234044, - "normalized_score": 32.60564420803782 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "T145/ZEUS-8B-V17 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3446234519473816 - } - }, - { - "id": "T145/ZEUS-8B-V17-abliterated_bfloat16_b1e9142e0efb74d5ecc9ab82aff858ff6715678a_True", - "model": { - "name": "T145/ZEUS-8B-V17-abliterated", - "sha": "b1e9142e0efb74d5ecc9ab82aff858ff6715678a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.847962315480604, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7576009432749549, - "normalized_score": 75.7600943274955 - }, - "bbh": { - "name": "BBH", - "value": 0.520041374505222, - "normalized_score": 31.522204154633215 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.42692708333333335, - "normalized_score": 13.132552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36220079787234044, - "normalized_score": 29.1334219858156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "T145/ZEUS-8B-V17-abliterated (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.594, - "co2_cost": 1.3400490976803874 - } - }, - { - "id": "T145/ZEUS-8B-V17-abliterated-V2_bfloat16_9cc40b877185b7796b3cfc49558ce28a1cb0d207_True", - "model": { - "name": "T145/ZEUS-8B-V17-abliterated-V2", - "sha": "9cc40b877185b7796b3cfc49558ce28a1cb0d207", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.607357264113972, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6532123654126606, - "normalized_score": 65.32123654126606 - }, - "bbh": { - "name": "BBH", - "value": 0.49280119619174295, - "normalized_score": 27.568611667990155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3407291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34017619680851063, - "normalized_score": 26.686244089834517 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "T145/ZEUS-8B-V17-abliterated-V2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6694349225561051 - } - }, - { - "id": "T145/ZEUS-8B-V17-abliterated-V4_bfloat16_04b4069dcd85e42eb2649fe39f00325e7febb415_True", - "model": { - "name": "T145/ZEUS-8B-V17-abliterated-V4", - "sha": "04b4069dcd85e42eb2649fe39f00325e7febb415", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.58716707472223, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7228298691915229, - "normalized_score": 72.2829869191523 - }, - "bbh": { - "name": "BBH", - "value": 0.5169216944225185, - "normalized_score": 30.971615423734274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4187083333333333, - "normalized_score": 11.605208333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37741023936170215, - "normalized_score": 30.82335992907802 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "T145/ZEUS-8B-V17-abliterated-V4 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.3164047673296317 - } - }, - { - "id": "T145/ZEUS-8B-V18_bfloat16_bf3c9a2836a00cdccdc85b1587b46d1146877850_True", - "model": { - "name": "T145/ZEUS-8B-V18", - "sha": "bf3c9a2836a00cdccdc85b1587b46d1146877850", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.93292754557434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7834046995305788, - "normalized_score": 78.34046995305789 - }, - "bbh": { - "name": "BBH", - "value": 0.5269802862530547, - "normalized_score": 32.52958987066048 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.40429166666666666, - "normalized_score": 10.703124999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39419880319148937, - "normalized_score": 32.68875591016548 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "T145/ZEUS-8B-V18 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3272917306684742 - } - }, - { - "id": "T145/ZEUS-8B-V19_bfloat16_8000d59047526f61b9588180df0a862928c2ccea_True", - "model": { - "name": "T145/ZEUS-8B-V19", - "sha": "8000d59047526f61b9588180df0a862928c2ccea", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.073717256514314, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7882507302845339, - "normalized_score": 78.82507302845339 - }, - "bbh": { - "name": "BBH", - "value": 0.5276233222408697, - "normalized_score": 32.64362847061702 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.40429166666666666, - "normalized_score": 10.703124999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3933676861702128, - "normalized_score": 32.59640957446809 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "T145/ZEUS-8B-V19 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3864482789696406 - } - }, - { - "id": "T145/ZEUS-8B-V2_bfloat16_8f874a61fe651717afaf484e3a556a0c11b7f292_True", - "model": { - "name": "T145/ZEUS-8B-V2", - "sha": "8f874a61fe651717afaf484e3a556a0c11b7f292", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.143481455653376, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8029384255996312, - "normalized_score": 80.29384255996311 - }, - "bbh": { - "name": "BBH", - "value": 0.5194405455747161, - "normalized_score": 31.605592775073944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21601208459214502, - "normalized_score": 21.6012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.3910208333333333, - "normalized_score": 8.24427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3896276595744681, - "normalized_score": 32.18085106382979 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 1, - "base_model": "T145/ZEUS-8B-V2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3914350329709466 - } - }, - { - "id": "T145/ZEUS-8B-V2-ORPO_bfloat16_fee1b04ccafb9f6bbb4db88effd837ad72e00571_True", - "model": { - "name": "T145/ZEUS-8B-V2-ORPO", - "sha": "fee1b04ccafb9f6bbb4db88effd837ad72e00571", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.882957614827564, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7186830941900824, - "normalized_score": 71.86830941900824 - }, - "bbh": { - "name": "BBH", - "value": 0.5075246906772, - "normalized_score": 29.59149027379915 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.39349999999999996, - "normalized_score": 9.754166666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3677692819148936, - "normalized_score": 29.752142434988176 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.4149754480427466 - } - }, - { - "id": "T145/ZEUS-8B-V2-abliterated_bfloat16_d07c040573a4a468d774e5f47811be3e4c05e622_True", - "model": { - "name": "T145/ZEUS-8B-V2-abliterated", - "sha": "d07c040573a4a468d774e5f47811be3e4c05e622", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.796705902139966, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7895495064207414, - "normalized_score": 78.95495064207414 - }, - "bbh": { - "name": "BBH", - "value": 0.5128868622210663, - "normalized_score": 30.98256419519066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.3910833333333333, - "normalized_score": 7.918749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38248005319148937, - "normalized_score": 31.386672576832154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "T145/ZEUS-8B-V2-abliterated (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.055450230985255 - } - }, - { - "id": "T145/ZEUS-8B-V20_bfloat16_0daec2344934c6f945fe8df88de345f66c89fe84_True", - "model": { - "name": "T145/ZEUS-8B-V20", - "sha": "0daec2344934c6f945fe8df88de345f66c89fe84", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.039974330291205, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7955945779420825, - "normalized_score": 79.55945779420827 - }, - "bbh": { - "name": "BBH", - "value": 0.5244005058415827, - "normalized_score": 32.22158697725198 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2190332326283988, - "normalized_score": 21.90332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.40432291666666664, - "normalized_score": 10.273697916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3929521276595745, - "normalized_score": 32.55023640661938 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "T145/ZEUS-8B-V20 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3367754678837924 - } - }, - { - "id": "T145/ZEUS-8B-V21_bfloat16_8b3646b8e380835dc6955ae210743360b3f9c298_True", - "model": { - "name": "T145/ZEUS-8B-V21", - "sha": "8b3646b8e380835dc6955ae210743360b3f9c298", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.085754641843304, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3785145635801894, - "normalized_score": 37.85145635801894 - }, - "bbh": { - "name": "BBH", - "value": 0.33975753940458464, - "normalized_score": 7.358048379917139 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.32615625, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17137632978723405, - "normalized_score": 7.930703309692672 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.440348362883099 - } - }, - { - "id": "T145/ZEUS-8B-V22_bfloat16_e0c00dfff7eb8b0fe0c3b63980d9558f55dd569c_True", - "model": { - "name": "T145/ZEUS-8B-V22", - "sha": "e0c00dfff7eb8b0fe0c3b63980d9558f55dd569c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.143603587984483, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7995163942782927, - "normalized_score": 79.95163942782926 - }, - "bbh": { - "name": "BBH", - "value": 0.5244915522507715, - "normalized_score": 32.21395635996418 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.3989583333333333, - "normalized_score": 9.369791666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3937832446808511, - "normalized_score": 32.64258274231678 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "T145/ZEUS-8B-V22 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4125065320853716 - } - }, - { - "id": "T145/ZEUS-8B-V23_bfloat16_62b55d14842dfcbe33a0847e2b8fc18ffabf05bf_True", - "model": { - "name": "T145/ZEUS-8B-V23", - "sha": "62b55d14842dfcbe33a0847e2b8fc18ffabf05bf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.43967386680849, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7621222799948582, - "normalized_score": 76.2122279994858 - }, - "bbh": { - "name": "BBH", - "value": 0.519500470668349, - "normalized_score": 31.46730001602275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3921979166666667, - "normalized_score": 7.191406250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3666057180851064, - "normalized_score": 29.622857565011817 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "T145/ZEUS-8B-V23 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3324908358064513 - } - }, - { - "id": "T145/ZEUS-8B-V24_bfloat16_0b8f6c8232f6018f1a9849618773b16dd4405650_True", - "model": { - "name": "T145/ZEUS-8B-V24", - "sha": "0b8f6c8232f6018f1a9849618773b16dd4405650", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.065645178943935, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5999813827311533, - "normalized_score": 59.99813827311534 - }, - "bbh": { - "name": "BBH", - "value": 0.4777962576721959, - "normalized_score": 26.153953910062185 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3729166666666666, - "normalized_score": 4.7145833333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32845744680851063, - "normalized_score": 25.384160756501178 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "T145/ZEUS-8B-V24 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.524096689802697 - } - }, - { - "id": "T145/ZEUS-8B-V25_bfloat16_20e25207ddcba81f481e6178b5ede453da0b93db_True", - "model": { - "name": "T145/ZEUS-8B-V25", - "sha": "20e25207ddcba81f481e6178b5ede453da0b93db", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.8147475535603, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33202790817253774, - "normalized_score": 33.20279081725378 - }, - "bbh": { - "name": "BBH", - "value": 0.4546907005207668, - "normalized_score": 21.846212671079687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2039274924471299, - "normalized_score": 20.39274924471299 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3488229166666667, - "normalized_score": 2.602864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2884807180851064, - "normalized_score": 20.942302009456267 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "T145/ZEUS-8B-V25 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6179681370552723 - } - }, - { - "id": "T145/ZEUS-8B-V26_bfloat16_621279b3792ebb97f5ea94136481d1a84c7babc1_True", - "model": { - "name": "T145/ZEUS-8B-V26", - "sha": "621279b3792ebb97f5ea94136481d1a84c7babc1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.628443632555204, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6707979272774018, - "normalized_score": 67.07979272774018 - }, - "bbh": { - "name": "BBH", - "value": 0.5231548583920674, - "normalized_score": 32.25100532809862 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40162499999999995, - "normalized_score": 9.63645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39070811170212766, - "normalized_score": 32.300901300236404 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "T145/ZEUS-8B-V26 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.434081544674218 - } - }, - { - "id": "T145/ZEUS-8B-V27_bfloat16_819e1d5470b2d5e86e49ee0be692fee5016386ca_True", - "model": { - "name": "T145/ZEUS-8B-V27", - "sha": "819e1d5470b2d5e86e49ee0be692fee5016386ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.817908661081884, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.654361538495636, - "normalized_score": 65.4361538495636 - }, - "bbh": { - "name": "BBH", - "value": 0.52303129292911, - "normalized_score": 32.21930421807795 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.39768749999999997, - "normalized_score": 9.844270833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3902094414893617, - "normalized_score": 32.245493498817964 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "T145/ZEUS-8B-V27 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4555947946601697 - } - }, - { - "id": "T145/ZEUS-8B-V28_bfloat16_c70e0e93166320fe9e70c4b568239d6ec4c69d03_True", - "model": { - "name": "T145/ZEUS-8B-V28", - "sha": "c70e0e93166320fe9e70c4b568239d6ec4c69d03", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.179429325600655, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.635252241829457, - "normalized_score": 63.5252241829457 - }, - "bbh": { - "name": "BBH", - "value": 0.5254256199968339, - "normalized_score": 32.62174149720304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.38962499999999994, - "normalized_score": 8.836458333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3902094414893617, - "normalized_score": 32.245493498817964 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "T145/ZEUS-8B-V28 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4544305897282979 - } - }, - { - "id": "T145/ZEUS-8B-V29_bfloat16_be5ab42b6f8339d012595850b91402da5a45ba48_True", - "model": { - "name": "T145/ZEUS-8B-V29", - "sha": "be5ab42b6f8339d012595850b91402da5a45ba48", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.116399573768543, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7417640748768822, - "normalized_score": 74.17640748768822 - }, - "bbh": { - "name": "BBH", - "value": 0.5253330901112457, - "normalized_score": 32.349726924866495 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.4002604166666666, - "normalized_score": 9.53255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3920378989361702, - "normalized_score": 32.448655437352244 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "T145/ZEUS-8B-V29 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.408967476441205 - } - }, - { - "id": "T145/ZEUS-8B-V2L1_bfloat16_c2d7f009c769f7ebdef00412ad85f2d3bdea9869_False", - "model": { - "name": "T145/ZEUS-8B-V2L1", - "sha": "c2d7f009c769f7ebdef00412ad85f2d3bdea9869", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.959332334959768, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3191886416929303, - "normalized_score": 31.91886416929303 - }, - "bbh": { - "name": "BBH", - "value": 0.5013485375260267, - "normalized_score": 28.694208206551323 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.38819791666666664, - "normalized_score": 9.058072916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36377992021276595, - "normalized_score": 29.308880023640665 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.489273917381773 - } - }, - { - "id": "T145/ZEUS-8B-V2L2_bfloat16_d3ae250942e4b749c2d545a48f08a93a659a9b6e_True", - "model": { - "name": "T145/ZEUS-8B-V2L2", - "sha": "d3ae250942e4b749c2d545a48f08a93a659a9b6e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.93592636769453, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8020640788662969, - "normalized_score": 80.2064078866297 - }, - "bbh": { - "name": "BBH", - "value": 0.5202843665402132, - "normalized_score": 32.0175092945529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.39746875000000004, - "normalized_score": 8.583593750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38838098404255317, - "normalized_score": 32.042331560283685 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4080791080730521 - } - }, - { - "id": "T145/ZEUS-8B-V3_bfloat16_2253fa275c722d46dd6380539042ec7f1bc0d7f7_True", - "model": { - "name": "T145/ZEUS-8B-V3", - "sha": "2253fa275c722d46dd6380539042ec7f1bc0d7f7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.6049284530603, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7886751596874072, - "normalized_score": 78.86751596874072 - }, - "bbh": { - "name": "BBH", - "value": 0.5265064133535374, - "normalized_score": 32.108252109929126 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16767371601208458, - "normalized_score": 16.76737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4016875, - "normalized_score": 9.110937499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38040226063829785, - "normalized_score": 31.155806737588655 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2127011701204595 - } - }, - { - "id": "T145/ZEUS-8B-V30_bfloat16_7091cc3981243056ed3fae72307f4fac4fa367e4_True", - "model": { - "name": "T145/ZEUS-8B-V30", - "sha": "7091cc3981243056ed3fae72307f4fac4fa367e4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.0957713811746, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7435626360279614, - "normalized_score": 74.35626360279613 - }, - "bbh": { - "name": "BBH", - "value": 0.5243248855841048, - "normalized_score": 32.188253745585094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15861027190332327, - "normalized_score": 15.861027190332328 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4029270833333333, - "normalized_score": 10.065885416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3943650265957447, - "normalized_score": 32.70722517730496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "T145/ZEUS-8B-V30 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.0650085806418934 - } - }, - { - "id": "T145/ZEUS-8B-V4_bfloat16_ca89fdfe275397f430092a0f644dc02b22ba2a8b_True", - "model": { - "name": "T145/ZEUS-8B-V4", - "sha": "ca89fdfe275397f430092a0f644dc02b22ba2a8b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.654622258469473, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7807317916461656, - "normalized_score": 78.07317916461656 - }, - "bbh": { - "name": "BBH", - "value": 0.5245974297200655, - "normalized_score": 32.04614380654929 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4028958333333333, - "normalized_score": 9.961979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37882313829787234, - "normalized_score": 30.98034869976359 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3011521967725295 - } - }, - { - "id": "T145/ZEUS-8B-V6_bfloat16_d7131128560dce428c3308ab46d7955b749c726d_True", - "model": { - "name": "T145/ZEUS-8B-V6", - "sha": "d7131128560dce428c3308ab46d7955b749c726d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.75703725188933, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7837792612490415, - "normalized_score": 78.37792612490415 - }, - "bbh": { - "name": "BBH", - "value": 0.5239561762634447, - "normalized_score": 32.07784842307126 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4068020833333333, - "normalized_score": 9.916927083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37591422872340424, - "normalized_score": 30.657136524822686 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-09", - "generation": 1, - "base_model": "T145/ZEUS-8B-V6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2898547806439689 - } - }, - { - "id": "T145/ZEUS-8B-V7_bfloat16_dbaa3828be77d925f40ecf3762b90ec4ad70e6d9_True", - "model": { - "name": "T145/ZEUS-8B-V7", - "sha": "dbaa3828be77d925f40ecf3762b90ec4ad70e6d9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.470021765028548, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7786085364610345, - "normalized_score": 77.86085364610345 - }, - "bbh": { - "name": "BBH", - "value": 0.5070394117180643, - "normalized_score": 29.556016390229985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.41616666666666663, - "normalized_score": 11.087499999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-10", - "submission_date": "2024-12-11", - "generation": 1, - "base_model": "T145/ZEUS-8B-V7 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.311758942423652 - } - }, - { - "id": "T145/ZEUS-8B-V8_bfloat16_c7da6c67926ddaff25602bfd1b9941d9822c1387_True", - "model": { - "name": "T145/ZEUS-8B-V8", - "sha": "c7da6c67926ddaff25602bfd1b9941d9822c1387", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.22354116902905, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7913979352562313, - "normalized_score": 79.13979352562313 - }, - "bbh": { - "name": "BBH", - "value": 0.5064510419864701, - "normalized_score": 29.39403095675921 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.421375, - "normalized_score": 11.805208333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37608045212765956, - "normalized_score": 30.67560579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "T145/ZEUS-8B-V8 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.3200279497215517 - } - }, - { - "id": "T145/ZEUS-8B-V9_bfloat16_10b386571ad34d115433419d30b61746ef4d9735_True", - "model": { - "name": "T145/ZEUS-8B-V9", - "sha": "10b386571ad34d115433419d30b61746ef4d9735", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.864889285845788, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5551436854213487, - "normalized_score": 55.51436854213487 - }, - "bbh": { - "name": "BBH", - "value": 0.5207256346477752, - "normalized_score": 31.85054952781809 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21374622356495468, - "normalized_score": 21.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3949270833333333, - "normalized_score": 8.73255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39012632978723405, - "normalized_score": 32.23625886524823 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "T145/ZEUS-8B-V9 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.346419889879008 - } - }, - { - "id": "T145/qwen-2.5-3B-merge-test_bfloat16_0d5f82d841f811fbf1ee07bfbf7c6eb1de812840_True", - "model": { - "name": "T145/qwen-2.5-3B-merge-test", - "sha": "0d5f82d841f811fbf1ee07bfbf7c6eb1de812840", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.975399259287943, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5751018408932742, - "normalized_score": 57.510184089327424 - }, - "bbh": { - "name": "BBH", - "value": 0.4842488747720393, - "normalized_score": 27.889341313676073 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3202416918429003, - "normalized_score": 32.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.40072916666666664, - "normalized_score": 8.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.397, - "co2_cost": 1.5679131730465115 - } - }, - { - "id": "THUDM/glm-4-9b_bfloat16_99a140996f9d4f197842fb6b1aab217a42e27ef3_False", - "model": { - "name": "THUDM/glm-4-9b", - "sha": "99a140996f9d4f197842fb6b1aab217a42e27ef3", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "ChatGLMModelM", - "average_score": 18.006731731716215, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1426082793654171, - "normalized_score": 14.260827936541709 - }, - "bbh": { - "name": "BBH", - "value": 0.5528368141665274, - "normalized_score": 35.811283581208905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4385833333333333, - "normalized_score": 14.189583333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4144780585106383, - "normalized_score": 34.94200650118203 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "THUDM/glm-4-9b", - "hub_license": "other", - "hub_hearts": 126, - "params_billions": 9.0, - "co2_cost": 3.3448936751023592 - } - }, - { - "id": "THUDM/glm-4-9b-chat_bfloat16_04419001bc63e05e70991ade6da1f91c4aeec278_True", - "model": { - "name": "THUDM/glm-4-9b-chat", - "sha": "04419001bc63e05e70991ade6da1f91c4aeec278", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "ChatGLMModelM", - "average_score": 10.973477297045166, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.47363884291035735, - "normalized_score": 25.205183674440235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.3994270833333333, - "normalized_score": 8.061718749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.316655585106383, - "normalized_score": 24.072842789598106 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-07-09", - "generation": 0, - "base_model": "THUDM/glm-4-9b-chat", - "hub_license": "other", - "hub_hearts": 669, - "params_billions": 9.0, - "co2_cost": 0.4942690169854622 - } - }, - { - "id": "THUDM/glm-4-9b-chat-1m_bfloat16_0aa722c7e0745dd21453427dd44c257dd253304f_True", - "model": { - "name": "THUDM/glm-4-9b-chat-1m", - "sha": "0aa722c7e0745dd21453427dd44c257dd253304f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "ChatGLMModel", - "average_score": 8.922510186531982, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.41800578218330303, - "normalized_score": 17.10802850816805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3794583333333333, - "normalized_score": 5.232291666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31632313829787234, - "normalized_score": 24.03590425531915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-10-09", - "generation": 0, - "base_model": "THUDM/glm-4-9b-chat-1m", - "hub_license": "other", - "hub_hearts": 186, - "params_billions": 9.484, - "co2_cost": 0.4113399099723327 - } - }, - { - "id": "THUDM/glm-4-9b-chat-1m-hf_bfloat16_0588cb62942f0f0a5545c695e5c1b019d64eabdc_True", - "model": { - "name": "THUDM/glm-4-9b-chat-1m-hf", - "sha": "0588cb62942f0f0a5545c695e5c1b019d64eabdc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GlmForCausalLM", - "average_score": 15.139213915838658, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5341106043076814, - "normalized_score": 53.411060430768146 - }, - "bbh": { - "name": "BBH", - "value": 0.3900953106836365, - "normalized_score": 14.405440654648933 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.36888541666666663, - "normalized_score": 3.544010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18143284574468085, - "normalized_score": 9.048093971631205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "THUDM/glm-4-9b-chat-1m-hf (Merge)", - "hub_license": "other", - "hub_hearts": 10, - "params_billions": 9.484, - "co2_cost": 2.0941316751918633 - } - }, - { - "id": "THUDM/glm-4-9b-chat-hf_bfloat16_c7f73fd9e0f378c87f3c8f2c25aec6ad705043cd_True", - "model": { - "name": "THUDM/glm-4-9b-chat-hf", - "sha": "c7f73fd9e0f378c87f3c8f2c25aec6ad705043cd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GlmForCausalLM", - "average_score": 20.54431273192071, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6513140688927601, - "normalized_score": 65.13140688927601 - }, - "bbh": { - "name": "BBH", - "value": 0.4432308604245425, - "normalized_score": 20.668085640285003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.35930208333333336, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27742686170212766, - "normalized_score": 19.714095744680847 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "THUDM/glm-4-9b-chat-hf (Merge)", - "hub_license": "other", - "hub_hearts": 11, - "params_billions": 9.4, - "co2_cost": 1.9847880947215946 - } - }, - { - "id": "TIGER-Lab/AceCodeRM-7B_bfloat16_cc0d74c2c70a2af30c33e9e1c5a787fb79ac5c2c_True", - "model": { - "name": "TIGER-Lab/AceCodeRM-7B", - "sha": "cc0d74c2c70a2af30c33e9e1c5a787fb79ac5c2c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalRM", - "average_score": 27.34471609415536, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5854931581536988, - "normalized_score": 58.54931581536989 - }, - "bbh": { - "name": "BBH", - "value": 0.4773230085351336, - "normalized_score": 26.279158142995318 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3466767371601209, - "normalized_score": 34.66767371601209 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.41920833333333335, - "normalized_score": 11.067708333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3361037234042553, - "normalized_score": 26.233747044917255 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "TIGER-Lab/AceCodeRM-7B (Merge)", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 0.6548936133746145 - } - }, - { - "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule_bfloat16_aedbaf4b30d6992872f6de21416fbf9c52795a81_True", - "model": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", - "sha": "aedbaf4b30d6992872f6de21416fbf9c52795a81", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.10989936210928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.742413462944986, - "normalized_score": 74.24134629449861 - }, - "bbh": { - "name": "BBH", - "value": 0.5404426673547671, - "normalized_score": 35.040755991620735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49924471299093653, - "normalized_score": 49.92447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.39803125, - "normalized_score": 7.720572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4321808510638298, - "normalized_score": 36.90898345153664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6477565533827316 - } - }, - { - "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule_bfloat16_352bab9841e39d359c630c61b46e58b2dea73384_True", - "model": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", - "sha": "352bab9841e39d359c630c61b46e58b2dea73384", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.333370512930014, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44076273177391545, - "normalized_score": 44.07627317739154 - }, - "bbh": { - "name": "BBH", - "value": 0.49023782785253694, - "normalized_score": 29.405351182469058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.34488541666666667, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37450132978723405, - "normalized_score": 30.500147754137117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.9756049775312156 - } - }, - { - "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule_bfloat16_b230c078dfebe25af64dff924d8c41e620770ec4_True", - "model": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", - "sha": "b230c078dfebe25af64dff924d8c41e620770ec4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.029959679329718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6222378843690297, - "normalized_score": 62.22378843690298 - }, - "bbh": { - "name": "BBH", - "value": 0.5089236146835355, - "normalized_score": 30.542991358230974 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.40463541666666664, - "normalized_score": 8.712760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34283577127659576, - "normalized_score": 26.981752364066192 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6418509308278861 - } - }, - { - "id": "TIGER-Lab/MAmmoTH2-7B-Plus_bfloat16_3ed578d8dda09787137e363a0dc32e3a8ed908de_True", - "model": { - "name": "TIGER-Lab/MAmmoTH2-7B-Plus", - "sha": "3ed578d8dda09787137e363a0dc32e3a8ed908de", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.633507778259585, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5574664113441224, - "normalized_score": 55.74664113441224 - }, - "bbh": { - "name": "BBH", - "value": 0.42346949888019064, - "normalized_score": 18.925953227555734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.41235416666666663, - "normalized_score": 10.1109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30169547872340424, - "normalized_score": 22.410608747044915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-06", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "TIGER-Lab/MAmmoTH2-7B-Plus", - "hub_license": "mit", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 1.1053267011114711 - } - }, - { - "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT_bfloat16_070621bc59d17068cc9e86b7e9f3db3efb08c981_True", - "model": { - "name": "TIGER-Lab/Qwen2.5-Math-7B-CFT", - "sha": "070621bc59d17068cc9e86b7e9f3db3efb08c981", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.521464490408093, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2776976200924658, - "normalized_score": 27.769762009246577 - }, - "bbh": { - "name": "BBH", - "value": 0.46369414980230833, - "normalized_score": 24.585137970728173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5574018126888217, - "normalized_score": 55.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.38866666666666666, - "normalized_score": 6.61666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29446476063829785, - "normalized_score": 21.607195626477537 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "TIGER-Lab/Qwen2.5-Math-7B-CFT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 7.616, - "co2_cost": 1.1704517284970632 - } - }, - { - "id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7_bfloat16_7a271e3061165f4e1abfe26715c04e20c2ac935e_True", - "model": { - "name": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", - "sha": "7a271e3061165f4e1abfe26715c04e20c2ac935e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.417452751929584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43918912928806675, - "normalized_score": 43.91891292880668 - }, - "bbh": { - "name": "BBH", - "value": 0.43195515014882774, - "normalized_score": 20.692627382557507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.36041666666666666, - "normalized_score": 5.252083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2765957446808511, - "normalized_score": 19.62174940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-30", - "submission_date": "2024-09-01", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.0459922645345738 - } - }, - { - "id": "Tarek07/Progenitor-V1.1-LLaMa-70B_bfloat16_00b611cd032a7f944267d5eac9dee0e488e6428b_False", - "model": { - "name": "Tarek07/Progenitor-V1.1-LLaMa-70B", - "sha": "00b611cd032a7f944267d5eac9dee0e488e6428b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.00294516350462, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6906064796960952, - "normalized_score": 69.06064796960952 - }, - "bbh": { - "name": "BBH", - "value": 0.6971116049173388, - "normalized_score": 56.24697023586278 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35725075528700906, - "normalized_score": 35.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.45805369127516776, - "normalized_score": 27.740492170022367 - }, - "musr": { - "name": "MUSR", - "value": 0.47356250000000005, - "normalized_score": 19.62864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5465425531914894, - "normalized_score": 49.61583924349882 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Tarek07/Progenitor-V1.1-LLaMa-70B (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 10, - "params_billions": 70.554, - "co2_cost": 28.145173073124646 - } - }, - { - "id": "Tarek07/Thalassic-Alpha-LLaMa-70B_bfloat16_134030081c61d1e1cb9df44521ab130396607682_False", - "model": { - "name": "Tarek07/Thalassic-Alpha-LLaMa-70B", - "sha": "134030081c61d1e1cb9df44521ab130396607682", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 42.2203763514307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7003484088884161, - "normalized_score": 70.03484088884161 - }, - "bbh": { - "name": "BBH", - "value": 0.6940408286616311, - "normalized_score": 55.95412538254855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3149546827794562, - "normalized_score": 31.49546827794562 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4437919463087248, - "normalized_score": 25.838926174496642 - }, - "musr": { - "name": "MUSR", - "value": 0.4801979166666667, - "normalized_score": 20.72473958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.543467420212766, - "normalized_score": 49.27415780141844 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "Tarek07/Thalassic-Alpha-LLaMa-70B (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 1, - "params_billions": 70.554, - "co2_cost": 28.772456313434247 - } - }, - { - "id": "TeeZee/DoubleBagel-57B-v1.0_bfloat16_6e10dc1fb5223d1b045dc2a19c9c267a574e520f_True", - "model": { - "name": "TeeZee/DoubleBagel-57B-v1.0", - "sha": "6e10dc1fb5223d1b045dc2a19c9c267a574e520f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.707748481359532, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23363342597640924, - "normalized_score": 23.363342597640923 - }, - "bbh": { - "name": "BBH", - "value": 0.325078559362514, - "normalized_score": 5.5227816982611495 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.43148958333333337, - "normalized_score": 13.60286458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14777260638297873, - "normalized_score": 5.308067375886525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-05", - "submission_date": "2024-08-10", - "generation": 1, - "base_model": "TeeZee/DoubleBagel-57B-v1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 56.703, - "co2_cost": 18.737294956345185 - } - }, - { - "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0_bfloat16_6e53b24f9368eaf0b1f9aee0c7c59f2068d05a27_False", - "model": { - "name": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", - "sha": "6e53b24f9368eaf0b1f9aee0c7c59f2068d05a27", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 6.657818235070503, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21030310686755588, - "normalized_score": 21.030310686755588 - }, - "bbh": { - "name": "BBH", - "value": 0.3240881373468133, - "normalized_score": 6.0210547049870184 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3899375, - "normalized_score": 7.075520833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12790890957446807, - "normalized_score": 3.100989952718674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-17", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "google/gemma-2b", - "hub_license": "other", - "hub_hearts": 23, - "params_billions": 2.506, - "co2_cost": 1.410325791348669 - } - }, - { - "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0_bfloat16_84d251f088d2954561a4348883ba28f6f3265182_False", - "model": { - "name": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", - "sha": "84d251f088d2954561a4348883ba28f6f3265182", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 13.004827801453324, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32368449048524583, - "normalized_score": 32.36844904852458 - }, - "bbh": { - "name": "BBH", - "value": 0.40229948924733394, - "normalized_score": 16.263180963543576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.40832291666666665, - "normalized_score": 9.140364583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23503989361702127, - "normalized_score": 15.004432624113473 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-17", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "google/gemma-7b", - "hub_license": "other", - "hub_hearts": 18, - "params_billions": 8.538, - "co2_cost": 1.8611614350698493 - } - }, - { - "id": "TencentARC/LLaMA-Pro-8B_bfloat16_7115e7179060e0623d1ee9ff4476faed7e478d8c_False", - "model": { - "name": "TencentARC/LLaMA-Pro-8B", - "sha": "7115e7179060e0623d1ee9ff4476faed7e478d8c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.816698626146762, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2277135777514772, - "normalized_score": 22.77135777514772 - }, - "bbh": { - "name": "BBH", - "value": 0.3484197711435169, - "normalized_score": 9.2939499758607 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.40181249999999996, - "normalized_score": 8.593229166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18110039893617022, - "normalized_score": 9.011155437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-05", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "TencentARC/LLaMA-Pro-8B", - "hub_license": "llama2", - "hub_hearts": 171, - "params_billions": 8.357, - "co2_cost": 95.61546710217299 - } - }, - { - "id": "TencentARC/LLaMA-Pro-8B-Instruct_bfloat16_9850c8afce19a69d8fc4a1603a82441157514016_True", - "model": { - "name": "TencentARC/LLaMA-Pro-8B-Instruct", - "sha": "9850c8afce19a69d8fc4a1603a82441157514016", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.28346018029823, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4486063644463357, - "normalized_score": 44.86063644463357 - }, - "bbh": { - "name": "BBH", - "value": 0.4224205282459997, - "normalized_score": 19.485726056875954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.41902083333333334, - "normalized_score": 11.1109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19456449468085107, - "normalized_score": 10.507166075650117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-06", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "TencentARC/LLaMA-Pro-8B-Instruct", - "hub_license": "llama2", - "hub_hearts": 62, - "params_billions": 8.357, - "co2_cost": 6.210406711645305 - } - }, - { - "id": "TencentARC/MetaMath-Mistral-Pro_bfloat16_3835d38de15ed2a04c32aca879b782fc50e390bf_False", - "model": { - "name": "TencentARC/MetaMath-Mistral-Pro", - "sha": "3835d38de15ed2a04c32aca879b782fc50e390bf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.5165268741835, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21187670935340452, - "normalized_score": 21.18767093534045 - }, - "bbh": { - "name": "BBH", - "value": 0.44131618555883606, - "normalized_score": 22.37227879113455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.35241666666666666, - "normalized_score": 4.9854166666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2471742021276596, - "normalized_score": 16.35268912529551 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-26", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "TencentARC/MetaMath-Mistral-Pro", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 8.987, - "co2_cost": 1.2015045035813723 - } - }, - { - "id": "TencentARC/Mistral_Pro_8B_v0.1_bfloat16_366f159fc5b314ba2a955209d2bca4600f84dac0_False", - "model": { - "name": "TencentARC/Mistral_Pro_8B_v0.1", - "sha": "366f159fc5b314ba2a955209d2bca4600f84dac0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.195345928021323, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21145227995053123, - "normalized_score": 21.145227995053123 - }, - "bbh": { - "name": "BBH", - "value": 0.4525975968066435, - "normalized_score": 22.894188758768042 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.42422916666666666, - "normalized_score": 11.828645833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2765126329787234, - "normalized_score": 19.61251477541371 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "TencentARC/Mistral_Pro_8B_v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 66, - "params_billions": 8.987, - "co2_cost": 1.2649645640770015 - } - }, - { - "id": "TheDrummer/Cydonia-22B-v1.2_bfloat16_acd8da5efadc7dc404bb4eeebef2b27b1554a2ca_False", - "model": { - "name": "TheDrummer/Cydonia-22B-v1.2", - "sha": "acd8da5efadc7dc404bb4eeebef2b27b1554a2ca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.7900883355216, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5635114828654637, - "normalized_score": 56.35114828654636 - }, - "bbh": { - "name": "BBH", - "value": 0.580856074392761, - "normalized_score": 39.93260406588619 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.40217708333333335, - "normalized_score": 10.50546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4140625, - "normalized_score": 34.895833333333336 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-26", - "generation": 0, - "base_model": "TheDrummer/Cydonia-22B-v1.2", - "hub_license": "other", - "hub_hearts": 42, - "params_billions": 22.247, - "co2_cost": 3.2574085500617844 - } - }, - { - "id": "TheDrummer/Gemmasutra-9B-v1_bfloat16_21591f6a0140e095f1c6668ac7a267f214547609_False", - "model": { - "name": "TheDrummer/Gemmasutra-9B-v1", - "sha": "21591f6a0140e095f1c6668ac7a267f214547609", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.74868519843542, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24155130609006326, - "normalized_score": 24.155130609006328 - }, - "bbh": { - "name": "BBH", - "value": 0.5886914248369671, - "normalized_score": 41.20039631726062 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.48459375, - "normalized_score": 20.940885416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4045046542553192, - "normalized_score": 33.83385047281324 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "TheDrummer/Gemmasutra-9B-v1 (Merge)", - "hub_license": "", - "hub_hearts": 25, - "params_billions": 10.159, - "co2_cost": 5.807637484434085 - } - }, - { - "id": "TheDrummer/Gemmasutra-Mini-2B-v1_bfloat16_c1db4c8f975d3848edbdaf851217039c8dfdaeb5_True", - "model": { - "name": "TheDrummer/Gemmasutra-Mini-2B-v1", - "sha": "c1db4c8f975d3848edbdaf851217039c8dfdaeb5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 9.12929252128462, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25486597782771936, - "normalized_score": 25.48659778277193 - }, - "bbh": { - "name": "BBH", - "value": 0.35750190791471836, - "normalized_score": 9.81033614467704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3489791666666666, - "normalized_score": 1.1890624999999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20545212765957446, - "normalized_score": 11.71690307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "TheDrummer/Gemmasutra-Mini-2B-v1", - "hub_license": "other", - "hub_hearts": 58, - "params_billions": 2.614, - "co2_cost": 2.7959093485129887 - } - }, - { - "id": "TheDrummer/Llama-3SOME-8B-v2_bfloat16_2412c897532c1ab325ddf674c62004b234f2939e_True", - "model": { - "name": "TheDrummer/Llama-3SOME-8B-v2", - "sha": "2412c897532c1ab325ddf674c62004b234f2939e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.812966856684692, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4508049752434651, - "normalized_score": 45.08049752434651 - }, - "bbh": { - "name": "BBH", - "value": 0.5203347869042534, - "normalized_score": 31.69527340436814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.3832708333333333, - "normalized_score": 7.208854166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37533244680851063, - "normalized_score": 30.592494089834517 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-21", - "submission_date": "2025-01-12", - "generation": 0, - "base_model": "TheDrummer/Llama-3SOME-8B-v2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 41, - "params_billions": 8.03, - "co2_cost": 1.4985057944870328 - } - }, - { - "id": "TheDrummer/Ministrations-8B-v1_bfloat16_39b892de64401ec7990ebb816c4455ba4532bafb_False", - "model": { - "name": "TheDrummer/Ministrations-8B-v1", - "sha": "39b892de64401ec7990ebb816c4455ba4532bafb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.29045248198521, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28219346888478125, - "normalized_score": 28.219346888478125 - }, - "bbh": { - "name": "BBH", - "value": 0.48766312602251366, - "normalized_score": 26.98563733629608 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18429003021148035, - "normalized_score": 18.429003021148034 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.44490625, - "normalized_score": 14.779947916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36436170212765956, - "normalized_score": 29.373522458628837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-14", - "generation": 0, - "base_model": "TheDrummer/Ministrations-8B-v1", - "hub_license": "other", - "hub_hearts": 19, - "params_billions": 8.02, - "co2_cost": 1.7251117817617339 - } - }, - { - "id": "TheDrummer/Rocinante-12B-v1_bfloat16_74a4ae2584d45655298995198d5ab3e660364a1a_True", - "model": { - "name": "TheDrummer/Rocinante-12B-v1", - "sha": "74a4ae2584d45655298995198d5ab3e660364a1a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.62809312692346, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6076499244227538, - "normalized_score": 60.764992442275386 - }, - "bbh": { - "name": "BBH", - "value": 0.5065452085797449, - "normalized_score": 30.025654065607256 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.40171874999999996, - "normalized_score": 11.281510416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34773936170212766, - "normalized_score": 27.526595744680847 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-14", - "submission_date": "2024-09-03", - "generation": 0, - "base_model": "TheDrummer/Rocinante-12B-v1", - "hub_license": "other", - "hub_hearts": 28, - "params_billions": 12.248, - "co2_cost": 3.7288831901606856 - } - }, - { - "id": "TheDrummer/Tiger-Gemma-9B-v1_float16_e95392c07bab3c483937583c711939ab3f5044dd_True", - "model": { - "name": "TheDrummer/Tiger-Gemma-9B-v1", - "sha": "e95392c07bab3c483937583c711939ab3f5044dd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.896643595129223, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.728150197032762, - "normalized_score": 72.81501970327619 - }, - "bbh": { - "name": "BBH", - "value": 0.5703687739329574, - "normalized_score": 37.220546054087826 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.41616666666666663, - "normalized_score": 10.487500000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41181848404255317, - "normalized_score": 34.64649822695036 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-12", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "TheDrummer/Tiger-Gemma-9B-v1", - "hub_license": "", - "hub_hearts": 39, - "params_billions": 9.242, - "co2_cost": 3.2408546620597902 - } - }, - { - "id": "TheDrummer/Tiger-Gemma-9B-v2_float16_9aea74832c16646c9c4948ccc2e76cb812f3c089_True", - "model": { - "name": "TheDrummer/Tiger-Gemma-9B-v2", - "sha": "9aea74832c16646c9c4948ccc2e76cb812f3c089", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 29.900202484538976, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6985997154217476, - "normalized_score": 69.85997154217475 - }, - "bbh": { - "name": "BBH", - "value": 0.5617191114121779, - "normalized_score": 35.46954056518246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.40841666666666665, - "normalized_score": 9.31875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41123670212765956, - "normalized_score": 34.58185579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "TheDrummer/Tiger-Gemma-9B-v2", - "hub_license": "", - "hub_hearts": 29, - "params_billions": 9.242, - "co2_cost": 3.3395979593481933 - } - }, - { - "id": "TheDrummer/Tiger-Gemma-9B-v3_float16_fe32c1926e4057f75ebc2a4a57103564168cdbb7_True", - "model": { - "name": "TheDrummer/Tiger-Gemma-9B-v3", - "sha": "fe32c1926e4057f75ebc2a4a57103564168cdbb7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 29.47327542727632, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6820635912711606, - "normalized_score": 68.20635912711606 - }, - "bbh": { - "name": "BBH", - "value": 0.5812231557853248, - "normalized_score": 38.83602273195975 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1623867069486405, - "normalized_score": 16.238670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.4003541666666666, - "normalized_score": 7.710937499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40591755319148937, - "normalized_score": 33.99083924349882 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "TheDrummer/Tiger-Gemma-9B-v3", - "hub_license": "", - "hub_hearts": 47, - "params_billions": 9.242, - "co2_cost": 3.1435063282657105 - } - }, - { - "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B_bfloat16_f8f7d64218491055f0c983736e0befc6fbe92a63_True", - "model": { - "name": "TheDrunkenSnail/Daughter-of-Rhodia-12B", - "sha": "f8f7d64218491055f0c983736e0befc6fbe92a63", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.609503388815302, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6903815210308648, - "normalized_score": 69.03815210308647 - }, - "bbh": { - "name": "BBH", - "value": 0.5179174184876773, - "normalized_score": 31.47583343042707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.43477083333333333, - "normalized_score": 14.613020833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3641123670212766, - "normalized_score": 29.34581855791962 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.6214507377709615 - } - }, - { - "id": "TheDrunkenSnail/Mother-of-Rhodia-12B_bfloat16_82376ee83b88faa62921f98d28ece7c0941cfda2_True", - "model": { - "name": "TheDrunkenSnail/Mother-of-Rhodia-12B", - "sha": "82376ee83b88faa62921f98d28ece7c0941cfda2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.379307438403675, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6504895898438365, - "normalized_score": 65.04895898438366 - }, - "bbh": { - "name": "BBH", - "value": 0.49479138664574934, - "normalized_score": 28.502978884048833 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.41241666666666665, - "normalized_score": 11.652083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35513630319148937, - "normalized_score": 28.34847813238771 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "TheDrunkenSnail/Mother-of-Rhodia-12B (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.714810337183691 - } - }, - { - "id": "TheDrunkenSnail/Son-of-Rhodia_float16_f855ee46e8c6f187e2885bccdb4dd40a4ec27d94_True", - "model": { - "name": "TheDrunkenSnail/Son-of-Rhodia", - "sha": "f855ee46e8c6f187e2885bccdb4dd40a4ec27d94", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.216224977371535, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7046447869430887, - "normalized_score": 70.46447869430888 - }, - "bbh": { - "name": "BBH", - "value": 0.5097327647725524, - "normalized_score": 30.222057252119924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4202916666666667, - "normalized_score": 12.103124999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3607878989361702, - "normalized_score": 28.976433215130015 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "TheDrunkenSnail/Son-of-Rhodia (Merge)", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.9124200394313293 - } - }, - { - "id": "TheHierophant/Underground-Cognitive-V0.3-test_float16_2753b6f9068ad14efe836cde3160747cd208bf9e_False", - "model": { - "name": "TheHierophant/Underground-Cognitive-V0.3-test", - "sha": "2753b6f9068ad14efe836cde3160747cd208bf9e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.4060912466085, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4808297539417634, - "normalized_score": 48.08297539417634 - }, - "bbh": { - "name": "BBH", - "value": 0.5290131900998047, - "normalized_score": 33.665101982714994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.43511458333333336, - "normalized_score": 14.55598958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.331781914893617, - "normalized_score": 25.75354609929078 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "TheHierophant/Underground-Cognitive-V0.3-test (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.1731624613051972 - } - }, - { - "id": "TheTsar1209/nemo-carpmuscle-v0.1_float16_84d20db8220014958ff157047b2216910637ae39_False", - "model": { - "name": "TheTsar1209/nemo-carpmuscle-v0.1", - "sha": "84d20db8220014958ff157047b2216910637ae39", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.794489017380517, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2275639746982451, - "normalized_score": 22.75639746982451 - }, - "bbh": { - "name": "BBH", - "value": 0.5083529697101391, - "normalized_score": 30.034995783434088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4135, - "normalized_score": 10.220833333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3405917553191489, - "normalized_score": 26.73241725768321 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-15", - "submission_date": "2024-10-10", - "generation": 1, - "base_model": "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.616880770705841 - } - }, - { - "id": "TheTsar1209/qwen-carpmuscle-r-v0.3_bfloat16_30f8221d2f5f587343b1dbd65cf7d9bda4f5ef16_True", - "model": { - "name": "TheTsar1209/qwen-carpmuscle-r-v0.3", - "sha": "30f8221d2f5f587343b1dbd65cf7d9bda4f5ef16", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.00049707280477, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44550902715904905, - "normalized_score": 44.55090271590491 - }, - "bbh": { - "name": "BBH", - "value": 0.6227124007872, - "normalized_score": 46.37591354449345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30060422960725075, - "normalized_score": 30.060422960725074 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.42776041666666664, - "normalized_score": 12.003385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5103058510638298, - "normalized_score": 45.58953900709219 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "TheTsar1209/qwen-carpmuscle-r-v0.3 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 4.513992806284949 - } - }, - { - "id": "TheTsar1209/qwen-carpmuscle-v0.1_float16_7c7b06a1788aef48054c3c6d6ad90c6dc5264a81_True", - "model": { - "name": "TheTsar1209/qwen-carpmuscle-v0.1", - "sha": "7c7b06a1788aef48054c3c6d6ad90c6dc5264a81", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.445029423216624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5621628390448454, - "normalized_score": 56.21628390448454 - }, - "bbh": { - "name": "BBH", - "value": 0.643430074129922, - "normalized_score": 48.82559521217237 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2628398791540785, - "normalized_score": 26.283987915407852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.41610416666666666, - "normalized_score": 10.146354166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.520029920212766, - "normalized_score": 46.669991134751776 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-05", - "submission_date": "2024-10-10", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.352435104606311 - } - }, - { - "id": "TheTsar1209/qwen-carpmuscle-v0.2_bfloat16_081f6b067ebca9bc384af283f1d267880534b8e3_True", - "model": { - "name": "TheTsar1209/qwen-carpmuscle-v0.2", - "sha": "081f6b067ebca9bc384af283f1d267880534b8e3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.666712709905646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5256929391791557, - "normalized_score": 52.56929391791557 - }, - "bbh": { - "name": "BBH", - "value": 0.6386922464145662, - "normalized_score": 48.18244143380709 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28323262839879154, - "normalized_score": 28.323262839879153 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.43455208333333334, - "normalized_score": 12.75234375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5147107712765957, - "normalized_score": 46.07897458628841 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-19", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.496397782336827 - } - }, - { - "id": "TheTsar1209/qwen-carpmuscle-v0.3_bfloat16_ec92820e4ff36b6f21e1ef63546fe2ddcb34456a_True", - "model": { - "name": "TheTsar1209/qwen-carpmuscle-v0.3", - "sha": "ec92820e4ff36b6f21e1ef63546fe2ddcb34456a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.794404327523267, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4476322823441801, - "normalized_score": 44.76322823441801 - }, - "bbh": { - "name": "BBH", - "value": 0.6151533941210218, - "normalized_score": 45.5433921378403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31344410876132933, - "normalized_score": 31.344410876132933 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.4131875, - "normalized_score": 9.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5061502659574468, - "normalized_score": 45.12780732860521 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 8.461957749933813 - } - }, - { - "id": "TheTsar1209/qwen-carpmuscle-v0.4_bfloat16_3e11d5aad0f19bd652b8605620d0cf6af7a0ea00_True", - "model": { - "name": "TheTsar1209/qwen-carpmuscle-v0.4", - "sha": "3e11d5aad0f19bd652b8605620d0cf6af7a0ea00", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.393960224042935, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7202068289915202, - "normalized_score": 72.02068289915204 - }, - "bbh": { - "name": "BBH", - "value": 0.6453667027727318, - "normalized_score": 49.38495588865565 - }, - "math": { - "name": "MATH Level 5", - "value": 0.277190332326284, - "normalized_score": 27.719033232628398 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.45160416666666664, - "normalized_score": 15.550520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5143783244680851, - "normalized_score": 46.04203605200946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-18", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 2.73924849680602 - } - }, - { - "id": "TheTsar1209/qwen-carpmuscle-v0.4.1_bfloat16_052e690c01133ef4cfaa1457426679fe7effccda_True", - "model": { - "name": "TheTsar1209/qwen-carpmuscle-v0.4.1", - "sha": "052e690c01133ef4cfaa1457426679fe7effccda", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.605164486042526, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7359938297051822, - "normalized_score": 73.59938297051823 - }, - "bbh": { - "name": "BBH", - "value": 0.6506533698399672, - "normalized_score": 50.003672744368544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27794561933534745, - "normalized_score": 27.794561933534744 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.44890625, - "normalized_score": 14.913281250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5191156914893617, - "normalized_score": 46.56841016548463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 2.678118088869087 - } - }, - { - "id": "Tijmen2/cosmosage-v3_bfloat16_e6d4b4e6868fcf113ab5261d71c7214a1f7fbb0c_True", - "model": { - "name": "Tijmen2/cosmosage-v3", - "sha": "e6d4b4e6868fcf113ab5261d71c7214a1f7fbb0c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.354746679203608, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44823180272787316, - "normalized_score": 44.82318027278731 - }, - "bbh": { - "name": "BBH", - "value": 0.4550637900339029, - "normalized_score": 22.6871057550123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4198854166666666, - "normalized_score": 10.685677083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24858710106382978, - "normalized_score": 16.50967789598109 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-20", - "submission_date": "2024-08-27", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6617177615198897 - } - }, - { - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1_float16_7abc14e7779eabc3a028bc695342869d0410dea2_False", - "model": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", - "sha": "7abc14e7779eabc3a028bc695342869d0410dea2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.9575773348190606, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1478543597654224, - "normalized_score": 14.78543597654224 - }, - "bbh": { - "name": "BBH", - "value": 0.30835294748680114, - "normalized_score": 3.3630106739393377 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22902684563758388, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35923958333333333, - "normalized_score": 3.9049479166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10979055851063829, - "normalized_score": 1.087839834515365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-16", - "submission_date": "2024-12-02", - "generation": 0, - "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 55, - "params_billions": 1.1, - "co2_cost": 0.18219041949411743 - } - }, - { - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5_bfloat16_5c9e70dd07f5234bf6bf6a2425fffeecd5a6020b_False", - "model": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", - "sha": "5c9e70dd07f5234bf6bf6a2425fffeecd5a6020b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.126163903736939, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1633665341294432, - "normalized_score": 16.336653412944322 - }, - "bbh": { - "name": "BBH", - "value": 0.3105046915935697, - "normalized_score": 3.4076909375697055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36612500000000003, - "normalized_score": 3.565625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10962433510638298, - "normalized_score": 1.0693705673758855 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-20", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 1.1, - "co2_cost": 0.18992766350981338 - } - }, - { - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6_bfloat16_bf9ae1c8bf026667e6f810768de259bb4a7f4777_True", - "model": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", - "sha": "bf9ae1c8bf026667e6f810768de259bb4a7f4777", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.2942762818616345, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15742119797692344, - "normalized_score": 15.742119797692347 - }, - "bbh": { - "name": "BBH", - "value": 0.3066976656166826, - "normalized_score": 3.390370709512531 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.34221875, - "normalized_score": 2.2773437500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11486037234042554, - "normalized_score": 1.6511524822695034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-20", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", - "hub_license": "apache-2.0", - "hub_hearts": 98, - "params_billions": 1.1, - "co2_cost": 0.6476948448823974 - } - }, - { - "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0_float16_fe8a4ea1ffedaf415f4da2f062534de366a451e6_False", - "model": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "sha": "fe8a4ea1ffedaf415f4da2f062534de366a451e6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 2.818859486124847, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0595763684800773, - "normalized_score": 5.957636848007731 - }, - "bbh": { - "name": "BBH", - "value": 0.3103562867491015, - "normalized_score": 4.013396848486799 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35152083333333334, - "normalized_score": 4.306770833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11012300531914894, - "normalized_score": 1.124778368794326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-08-04", - "generation": 0, - "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 1193, - "params_billions": 1.1, - "co2_cost": 0.536882909624825 - } - }, - { - "id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T_bfloat16_59f6f375b26bde864a6ca194a9a3044570490064_False", - "model": { - "name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", - "sha": "59f6f375b26bde864a6ca194a9a3044570490064", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.230318100791095, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22766371006706648, - "normalized_score": 22.76637100670665 - }, - "bbh": { - "name": "BBH", - "value": 0.3071188438267271, - "normalized_score": 3.547093389508079 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.33803125, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11203457446808511, - "normalized_score": 1.337174940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-28", - "submission_date": "2024-11-27", - "generation": 0, - "base_model": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", - "hub_license": "apache-2.0", - "hub_hearts": 171, - "params_billions": 1.1, - "co2_cost": 0.33159575886426274 - } - }, - { - "id": "TinyLlama/TinyLlama_v1.1_bfloat16_ff3c701f2424c7625fdefb9dd470f45ef18b02d6_False", - "model": { - "name": "TinyLlama/TinyLlama_v1.1", - "sha": "ff3c701f2424c7625fdefb9dd470f45ef18b02d6", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.824553844580785, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20006139266036338, - "normalized_score": 20.00613926603634 - }, - "bbh": { - "name": "BBH", - "value": 0.30237018045076064, - "normalized_score": 3.2103010497128146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36996874999999996, - "normalized_score": 3.979427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10488696808510638, - "normalized_score": 0.542996453900708 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-09", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "TinyLlama/TinyLlama_v1.1", - "hub_license": "apache-2.0", - "hub_hearts": 87, - "params_billions": 1.1, - "co2_cost": 0.49785723720472574 - } - }, - { - "id": "ToastyPigeon/Sto-vo-kor-12B_bfloat16_ee703d00350f35eeb68cbad28a4e9a4fcb30fde3_True", - "model": { - "name": "ToastyPigeon/Sto-vo-kor-12B", - "sha": "ee703d00350f35eeb68cbad28a4e9a4fcb30fde3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.997938390529622, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5501225636865739, - "normalized_score": 55.01225636865739 - }, - "bbh": { - "name": "BBH", - "value": 0.5064617128925814, - "normalized_score": 29.579484369783785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.39384375, - "normalized_score": 8.497135416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33976063829787234, - "normalized_score": 26.640070921985814 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "ToastyPigeon/Sto-vo-kor-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.7371561597927307 - } - }, - { - "id": "Trappu/Magnum-Picaro-0.7-v2-12b_bfloat16_2ffc46cde49eb823f5588990bd6b848cd505271e_False", - "model": { - "name": "Trappu/Magnum-Picaro-0.7-v2-12b", - "sha": "2ffc46cde49eb823f5588990bd6b848cd505271e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.730064133818086, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.300278815764394, - "normalized_score": 30.027881576439405 - }, - "bbh": { - "name": "BBH", - "value": 0.5506661918828847, - "normalized_score": 35.74623319855443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.47271875, - "normalized_score": 19.556510416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35804521276595747, - "normalized_score": 28.671690307328607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-11", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "Trappu/Magnum-Picaro-0.7-v2-12b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 12.248, - "co2_cost": 3.3499177102746382 - } - }, - { - "id": "Trappu/Nemo-Picaro-12B_float16_d65bf383d744998ae93a5589ec886532bb7e18eb_False", - "model": { - "name": "Trappu/Nemo-Picaro-12B", - "sha": "d65bf383d744998ae93a5589ec886532bb7e18eb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.362492548877267, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2577139766929525, - "normalized_score": 25.771397669295247 - }, - "bbh": { - "name": "BBH", - "value": 0.5489586125997546, - "normalized_score": 35.9731352844479 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.47259375, - "normalized_score": 18.74088541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36045545212765956, - "normalized_score": 28.93949468085106 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-22", - "generation": 2, - "base_model": "royallab/MN-LooseCannon-12B-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 3.682055727975601 - } - }, - { - "id": "Tremontaine/L3-12B-Lunaris-v1_bfloat16_7be236530a835416ebca712d51d661c4488a45de_True", - "model": { - "name": "Tremontaine/L3-12B-Lunaris-v1", - "sha": "7be236530a835416ebca712d51d661c4488a45de", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.477254598147393, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6909311737301471, - "normalized_score": 69.0931173730147 - }, - "bbh": { - "name": "BBH", - "value": 0.5230217237244009, - "normalized_score": 32.1807456461844 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3673645833333334, - "normalized_score": 4.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3774933510638298, - "normalized_score": 30.832594562647753 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-07-15", - "generation": 1, - "base_model": "Tremontaine/L3-12B-Lunaris-v1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 11.52, - "co2_cost": 2.281927897434539 - } - }, - { - "id": "Triangle104/Annunaki-12b_bfloat16_70a240d771e5ec614b1cd9f080636cec5d9b4ae5_False", - "model": { - "name": "Triangle104/Annunaki-12b", - "sha": "70a240d771e5ec614b1cd9f080636cec5d9b4ae5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.36969772271661, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3872070550583563, - "normalized_score": 38.720705505835625 - }, - "bbh": { - "name": "BBH", - "value": 0.5498969437971782, - "normalized_score": 35.321144959632626 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.44087499999999996, - "normalized_score": 14.276041666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3720910904255319, - "normalized_score": 30.232343380614658 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Annunaki-12b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.5733339060325437 - } - }, - { - "id": "Triangle104/BigTalker-Lite-8B_float16_ea4454ee9f66e54cb0f9efc87a702048582cafb7_False", - "model": { - "name": "Triangle104/BigTalker-Lite-8B", - "sha": "ea4454ee9f66e54cb0f9efc87a702048582cafb7", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.97899274764932, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3689222374411007, - "normalized_score": 36.89222374411007 - }, - "bbh": { - "name": "BBH", - "value": 0.5308138241234059, - "normalized_score": 32.683408496152815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.42084375, - "normalized_score": 11.038802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34308510638297873, - "normalized_score": 27.009456264775412 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/BigTalker-Lite-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3354162271484944 - } - }, - { - "id": "Triangle104/Chatty-Harry_V2.0_float16_79d69071632b800de651dffd244d3036d0aeb0c4_False", - "model": { - "name": "Triangle104/Chatty-Harry_V2.0", - "sha": "79d69071632b800de651dffd244d3036d0aeb0c4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.833190818566663, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3325520729442324, - "normalized_score": 33.25520729442324 - }, - "bbh": { - "name": "BBH", - "value": 0.5318928049062546, - "normalized_score": 32.763031539641695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.40782291666666665, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36826795212765956, - "normalized_score": 29.807550236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Chatty-Harry_V2.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.7168798861463483 - } - }, - { - "id": "Triangle104/Chatty-Harry_V3.0_float16_c16d5698bc8c96365d4874b2bb207e4baa79b8ea_False", - "model": { - "name": "Triangle104/Chatty-Harry_V3.0", - "sha": "c16d5698bc8c96365d4874b2bb207e4baa79b8ea", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.114766630549312, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36749823800848413, - "normalized_score": 36.74982380084842 - }, - "bbh": { - "name": "BBH", - "value": 0.5526193453608234, - "normalized_score": 35.89470703139707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.44084375, - "normalized_score": 15.038802083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37017952127659576, - "normalized_score": 30.019946808510632 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Chatty-Harry_V3.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.6679627320496089 - } - }, - { - "id": "Triangle104/Chronos-Prism_V1.0_float16_02889fdec891a9e092c7c0ab4c3b8562117df700_False", - "model": { - "name": "Triangle104/Chronos-Prism_V1.0", - "sha": "02889fdec891a9e092c7c0ab4c3b8562117df700", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.183415410600137, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3259329689667859, - "normalized_score": 32.593296896678595 - }, - "bbh": { - "name": "BBH", - "value": 0.5554188807010064, - "normalized_score": 36.575708870675506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4262708333333333, - "normalized_score": 14.283854166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36727061170212766, - "normalized_score": 29.696734633569736 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-24", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Chronos-Prism_V1.0 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.6540336547935837 - } - }, - { - "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_bfloat16_48b1a24bcfff16ae849d2003f7773fa7e241d332_False", - "model": { - "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1", - "sha": "48b1a24bcfff16ae849d2003f7773fa7e241d332", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.38500937029289, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3229353670483207, - "normalized_score": 32.29353670483207 - }, - "bbh": { - "name": "BBH", - "value": 0.5117012556460311, - "normalized_score": 30.312465517244533 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2930513595166163, - "normalized_score": 29.305135951661633 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4038541666666667, - "normalized_score": 9.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31100398936170215, - "normalized_score": 23.444887706855795 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/DS-Distilled-Hermes-Llama-3.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4246801609287691 - } - }, - { - "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES_float16_f17071ff432257711720cc3956469fe37391ef81_False", - "model": { - "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", - "sha": "f17071ff432257711720cc3956469fe37391ef81", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.378416433125681, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13641360479084386, - "normalized_score": 13.641360479084385 - }, - "bbh": { - "name": "BBH", - "value": 0.292845246551473, - "normalized_score": 2.108592689290024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36209375000000005, - "normalized_score": 2.461718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11037234042553191, - "normalized_score": 1.1524822695035455 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.5058492107836854 - } - }, - { - "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony_bfloat16_010d50098f18672d302219150ce522e8e37eaf1e_False", - "model": { - "name": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", - "sha": "010d50098f18672d302219150ce522e8e37eaf1e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.7584241508614915, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17508211545366295, - "normalized_score": 17.508211545366294 - }, - "bbh": { - "name": "BBH", - "value": 0.2643276743386568, - "normalized_score": 1.528324420677361 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2105704697986577, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.31276041666666665, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11727061170212766, - "normalized_score": 1.9189568557919614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.366, - "co2_cost": 1.8712623967897637 - } - }, - { - "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1_bfloat16_baa7a816d101fd6a8d7909f4fdf038538e33822c_False", - "model": { - "name": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", - "sha": "baa7a816d101fd6a8d7909f4fdf038538e33822c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.40633267860269, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4515042309959796, - "normalized_score": 45.15042309959796 - }, - "bbh": { - "name": "BBH", - "value": 0.5783379428926061, - "normalized_score": 38.71536986302894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5551359516616314, - "normalized_score": 55.51359516616314 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.5566875000000001, - "normalized_score": 31.919270833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4601063829787234, - "normalized_score": 40.01182033096927 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.6793931311300994 - } - }, - { - "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP_bfloat16_1dce22130cb198a763c59cfe2bcf3a3ac12b4fab_False", - "model": { - "name": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", - "sha": "1dce22130cb198a763c59cfe2bcf3a3ac12b4fab", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.29151402275458, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34454248061809334, - "normalized_score": 34.45424806180933 - }, - "bbh": { - "name": "BBH", - "value": 0.43834886662348205, - "normalized_score": 20.78137433344085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46827794561933533, - "normalized_score": 46.82779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.40302083333333333, - "normalized_score": 8.177604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2890625, - "normalized_score": 21.006944444444443 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/DS-R1-Distill-Q2.5-7B-RP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3332671182751854 - } - }, - { - "id": "Triangle104/DS-R1-Llama-8B-Harmony_bfloat16_01aee21e38112e386511e5abeee1b1d3e2f904a5_False", - "model": { - "name": "Triangle104/DS-R1-Llama-8B-Harmony", - "sha": "01aee21e38112e386511e5abeee1b1d3e2f904a5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.179062178800105, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35663262366077564, - "normalized_score": 35.66326236607756 - }, - "bbh": { - "name": "BBH", - "value": 0.41536451555729687, - "normalized_score": 17.496342243581015 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4282477341389728, - "normalized_score": 42.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3761979166666667, - "normalized_score": 6.124739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27435172872340424, - "normalized_score": 19.372414302600472 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/DS-R1-Llama-8B-Harmony (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.5553321228885983 - } - }, - { - "id": "Triangle104/DSR1-Distill-Llama-Lit-8B_bfloat16_b5952b3507961626e50dc5640677000cdfbc4726_False", - "model": { - "name": "Triangle104/DSR1-Distill-Llama-Lit-8B", - "sha": "b5952b3507961626e50dc5640677000cdfbc4726", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.835032823738523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18852090231696345, - "normalized_score": 18.852090231696344 - }, - "bbh": { - "name": "BBH", - "value": 0.4284056327107781, - "normalized_score": 19.225159654639572 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35196374622356497, - "normalized_score": 35.196374622356494 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.35346875, - "normalized_score": 6.716927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27975398936170215, - "normalized_score": 19.97266548463357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-11", - "generation": 1, - "base_model": "Triangle104/DSR1-Distill-Llama-Lit-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4702000791170424 - } - }, - { - "id": "Triangle104/DSR1-Distill-Qwen-7B-RP_bfloat16_ad284db0b08b115a9fb2675cdffc2447b82ec3bc_False", - "model": { - "name": "Triangle104/DSR1-Distill-Qwen-7B-RP", - "sha": "ad284db0b08b115a9fb2675cdffc2447b82ec3bc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.099711456808993, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36092900171544834, - "normalized_score": 36.092900171544834 - }, - "bbh": { - "name": "BBH", - "value": 0.4326490703099772, - "normalized_score": 19.853297811659683 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48036253776435045, - "normalized_score": 48.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.40454166666666663, - "normalized_score": 8.801041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30277593085106386, - "normalized_score": 22.530658983451538 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "Triangle104/DSR1-Distill-Qwen-7B-RP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6860882551271495 - } - }, - { - "id": "Triangle104/Dark-Chivalry_V1.0_float16_94882aa3e066ac9a896b57eaf026df23e930906e_False", - "model": { - "name": "Triangle104/Dark-Chivalry_V1.0", - "sha": "94882aa3e066ac9a896b57eaf026df23e930906e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.672950420599893, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4325700253106203, - "normalized_score": 43.25700253106203 - }, - "bbh": { - "name": "BBH", - "value": 0.4974207759950637, - "normalized_score": 28.026138739765415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4181770833333333, - "normalized_score": 12.63880208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34441489361702127, - "normalized_score": 27.157210401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Dark-Chivalry_V1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4220577896339603 - } - }, - { - "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_float16_61d7f1f5b92a156aa342c6c78d8d81d0eff96d55_False", - "model": { - "name": "Triangle104/Distilled-DarkPlanet-Allades-8B", - "sha": "61d7f1f5b92a156aa342c6c78d8d81d0eff96d55", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.683057829608618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3460163477351206, - "normalized_score": 34.60163477351206 - }, - "bbh": { - "name": "BBH", - "value": 0.4633948672868899, - "normalized_score": 23.038205707355587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4003021148036254, - "normalized_score": 40.03021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.35375, - "normalized_score": 3.9187499999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29014295212765956, - "normalized_score": 21.12699468085106 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "Triangle104/Distilled-DarkPlanet-Allades-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4574056932536055 - } - }, - { - "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES_float16_ce441290f3193f30950e1e979a72a6d3e7be5801_False", - "model": { - "name": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", - "sha": "ce441290f3193f30950e1e979a72a6d3e7be5801", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.21392622497977, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3891807071902552, - "normalized_score": 38.91807071902552 - }, - "bbh": { - "name": "BBH", - "value": 0.5041556910813355, - "normalized_score": 29.961797257661434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.3868020833333334, - "normalized_score": 8.050260416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.340093085106383, - "normalized_score": 26.677009456264773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4457470456859252 - } - }, - { - "id": "Triangle104/Distilled-Whiskey-8b_float16_959128ec0161c4d4cc424ec75e63fbcf997c3f81_False", - "model": { - "name": "Triangle104/Distilled-Whiskey-8b", - "sha": "959128ec0161c4d4cc424ec75e63fbcf997c3f81", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.935848469843638, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34476743928332376, - "normalized_score": 34.47674392833238 - }, - "bbh": { - "name": "BBH", - "value": 0.5027820189600739, - "normalized_score": 29.317662931896013 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.41721874999999997, - "normalized_score": 11.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3366855053191489, - "normalized_score": 26.29838947990544 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Distilled-Whiskey-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.478627218374972 - } - }, - { - "id": "Triangle104/Dolphin3-Llama3.2-Smart_bfloat16_ecf28b35c75e6e4f3144701d8887e9328a42cace_False", - "model": { - "name": "Triangle104/Dolphin3-Llama3.2-Smart", - "sha": "ecf28b35c75e6e4f3144701d8887e9328a42cace", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.183463389792356, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.413660199382084, - "normalized_score": 41.366019938208396 - }, - "bbh": { - "name": "BBH", - "value": 0.397507554563096, - "normalized_score": 15.349665999615956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3921666666666667, - "normalized_score": 8.154166666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21949800531914893, - "normalized_score": 13.277556146572103 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Dolphin3-Llama3.2-Smart (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.2169451994944307 - } - }, - { - "id": "Triangle104/Gemmadevi-Stock-10B_bfloat16_a8085fb5e559a1ac384ab0b5bfb3599bfff6fabc_False", - "model": { - "name": "Triangle104/Gemmadevi-Stock-10B", - "sha": "a8085fb5e559a1ac384ab0b5bfb3599bfff6fabc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.723742034208243, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15819470117067158, - "normalized_score": 15.819470117067159 - }, - "bbh": { - "name": "BBH", - "value": 0.6065922684184144, - "normalized_score": 43.62183969257654 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.46211458333333333, - "normalized_score": 17.23098958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4261968085106383, - "normalized_score": 36.244089834515364 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Triangle104/Gemmadevi-Stock-10B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.862202785697742 - } - }, - { - "id": "Triangle104/Hermes-Llama-3.2-CoT_bfloat16_91cdadaa53f538ae95d6e59016639d10381c6da6_False", - "model": { - "name": "Triangle104/Hermes-Llama-3.2-CoT", - "sha": "91cdadaa53f538ae95d6e59016639d10381c6da6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.621221154166122, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4177571066991139, - "normalized_score": 41.77571066991139 - }, - "bbh": { - "name": "BBH", - "value": 0.4615751505493966, - "normalized_score": 23.795788682714846 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09516616314199396, - "normalized_score": 9.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.36978125, - "normalized_score": 5.089322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2947140957446808, - "normalized_score": 21.634899527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Hermes-Llama-3.2-CoT (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.179573613762454 - } - }, - { - "id": "Triangle104/Hermes-Llama-3.2-CoT-Summary_float16_92e850a4a23b8d6694026f31ee6c72c4d1b6a25d_False", - "model": { - "name": "Triangle104/Hermes-Llama-3.2-CoT-Summary", - "sha": "92e850a4a23b8d6694026f31ee6c72c4d1b6a25d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.766151353052297, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48302836473889277, - "normalized_score": 48.30283647388928 - }, - "bbh": { - "name": "BBH", - "value": 0.42003008354054533, - "normalized_score": 17.388422101012054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3575, - "normalized_score": 4.687499999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29014295212765956, - "normalized_score": 21.12699468085106 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Hermes-Llama-3.2-CoT-Summary (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.2115236264239686 - } - }, - { - "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B_float16_cf6bca48a8378975588c17238050ce687d226204_False", - "model": { - "name": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", - "sha": "cf6bca48a8378975588c17238050ce687d226204", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.55982177819895, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32423414318452815, - "normalized_score": 32.42341431845282 - }, - "bbh": { - "name": "BBH", - "value": 0.5066388671914118, - "normalized_score": 29.678775616931603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4068958333333333, - "normalized_score": 9.161979166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3338597074468085, - "normalized_score": 25.984411938534276 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Hermes3-L3.1-DirtyHarry-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3222689806146606 - } - }, - { - "id": "Triangle104/Herodotos-14B_bfloat16_0fad098b9e657b0aabd073b80f72e0961cb3d4c7_False", - "model": { - "name": "Triangle104/Herodotos-14B", - "sha": "0fad098b9e657b0aabd073b80f72e0961cb3d4c7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.337259928842606, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4667415790103592, - "normalized_score": 46.674157901035926 - }, - "bbh": { - "name": "BBH", - "value": 0.6435044367110887, - "normalized_score": 48.9099032194971 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5045317220543807, - "normalized_score": 50.453172205438065 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.4795416666666667, - "normalized_score": 19.876041666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5290059840425532, - "normalized_score": 47.667331560283685 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Herodotos-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.8350566763800837 - } - }, - { - "id": "Triangle104/Herodotos-14B_V0.1_float16_95f343835e08e360fa8b7ffe79c1b67045952825_False", - "model": { - "name": "Triangle104/Herodotos-14B_V0.1", - "sha": "95f343835e08e360fa8b7ffe79c1b67045952825", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.56388866448478, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1878715142488597, - "normalized_score": 18.78715142488597 - }, - "bbh": { - "name": "BBH", - "value": 0.30172239497895226, - "normalized_score": 2.954726291928145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22399328859060402, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3683854166666667, - "normalized_score": 3.8148437499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11643949468085106, - "normalized_score": 1.8266105200945615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-11", - "generation": 1, - "base_model": "Triangle104/Herodotos-14B_V0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.997502037756403 - } - }, - { - "id": "Triangle104/L3.1-8B-Dusky-Ink_float16_b972ab707f97d67d52b2122f77e4be32822fc578_False", - "model": { - "name": "Triangle104/L3.1-8B-Dusky-Ink", - "sha": "b972ab707f97d67d52b2122f77e4be32822fc578", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.391498815708132, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4529780981130068, - "normalized_score": 45.29780981130068 - }, - "bbh": { - "name": "BBH", - "value": 0.5097902234872148, - "normalized_score": 30.509038071961726 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4223958333333333, - "normalized_score": 11.166145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36826795212765956, - "normalized_score": 29.807550236406616 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Triangle104/L3.1-8B-Dusky-Ink (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.440204275039435 - } - }, - { - "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1_float16_c169ef1d2274ffc8af2496a8c3f16d19ff9a5b5f_False", - "model": { - "name": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", - "sha": "c169ef1d2274ffc8af2496a8c3f16d19ff9a5b5f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.969324061974744, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19848779017451473, - "normalized_score": 19.848779017451474 - }, - "bbh": { - "name": "BBH", - "value": 0.43372778578458115, - "normalized_score": 20.17548881298811 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3988333333333333, - "normalized_score": 7.820833333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.320561835106383, - "normalized_score": 24.506870567375884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.467098660298715 - } - }, - { - "id": "Triangle104/LThreePointOne-8B-HermesBlackroot_bfloat16_52244f9da40c719f1d302e1ce87f9a07a5b96955_False", - "model": { - "name": "Triangle104/LThreePointOne-8B-HermesBlackroot", - "sha": "52244f9da40c719f1d302e1ce87f9a07a5b96955", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.16012857459314, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17920340252751588, - "normalized_score": 17.920340252751586 - }, - "bbh": { - "name": "BBH", - "value": 0.4998333246909241, - "normalized_score": 29.267249815971553 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.3585520833333333, - "normalized_score": 8.819010416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32845744680851063, - "normalized_score": 25.384160756501178 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "Triangle104/LThreePointOne-8B-HermesBlackroot (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4778235402881181 - } - }, - { - "id": "Triangle104/LThreePointOne-8B-HermesInk_bfloat16_b572deac44832f1e5a7bb75e33d533620aeedac6_False", - "model": { - "name": "Triangle104/LThreePointOne-8B-HermesInk", - "sha": "b572deac44832f1e5a7bb75e33d533620aeedac6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.69615504867416, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4031192790684273, - "normalized_score": 40.31192790684273 - }, - "bbh": { - "name": "BBH", - "value": 0.5222765555856439, - "normalized_score": 31.47994731255064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17220543806646527, - "normalized_score": 17.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4129375, - "normalized_score": 10.0171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34674202127659576, - "normalized_score": 27.41578014184397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "Triangle104/LThreePointOne-8B-HermesInk (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.0721760014631236 - } - }, - { - "id": "Triangle104/Llama3.1-Allades-Lit-8b_bfloat16_9821725c1b319134754d350de2071b0d5d1c8ad4_False", - "model": { - "name": "Triangle104/Llama3.1-Allades-Lit-8b", - "sha": "9821725c1b319134754d350de2071b0d5d1c8ad4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.875472634114063, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24612361866514182, - "normalized_score": 24.61236186651418 - }, - "bbh": { - "name": "BBH", - "value": 0.41832977787362163, - "normalized_score": 17.44690701770639 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.37083333333333335, - "normalized_score": 5.220833333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2724401595744681, - "normalized_score": 19.160017730496453 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-11", - "generation": 1, - "base_model": "Triangle104/Llama3.1-Allades-Lit-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.7720308187157303 - } - }, - { - "id": "Triangle104/Llama3.1-cc-Lit-8b_bfloat16_7285d0694395654ed0255b4ca2c7e7fa8aaad482_False", - "model": { - "name": "Triangle104/Llama3.1-cc-Lit-8b", - "sha": "7285d0694395654ed0255b4ca2c7e7fa8aaad482", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.717562119702656, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2993047336622384, - "normalized_score": 29.930473366223843 - }, - "bbh": { - "name": "BBH", - "value": 0.3847994561866892, - "normalized_score": 13.866972220416004 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.38540625, - "normalized_score": 6.242447916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30044880319148937, - "normalized_score": 22.272089243498815 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-11", - "generation": 1, - "base_model": "Triangle104/Llama3.1-cc-Lit-8b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5077341013865457 - } - }, - { - "id": "Triangle104/Minerva-1.5b_bfloat16_101165d451aa5dc66514572056b1216851ac86c8_False", - "model": { - "name": "Triangle104/Minerva-1.5b", - "sha": "101165d451aa5dc66514572056b1216851ac86c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.467080512966321, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2694295580171722, - "normalized_score": 26.94295580171722 - }, - "bbh": { - "name": "BBH", - "value": 0.4025709779119226, - "normalized_score": 16.381923221385325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.3655, - "normalized_score": 6.2875000000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.269780585106383, - "normalized_score": 18.864509456264773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-1.5b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.1307037261186645 - } - }, - { - "id": "Triangle104/Minerva-1.5b_V0.2_float16_e8a05f264dd1ed568812459edeaa17e977efedee_False", - "model": { - "name": "Triangle104/Minerva-1.5b_V0.2", - "sha": "e8a05f264dd1ed568812459edeaa17e977efedee", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.021440291369977, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3083474071020448, - "normalized_score": 30.834740710204482 - }, - "bbh": { - "name": "BBH", - "value": 0.3989042137094949, - "normalized_score": 14.994536223857972 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.3960104166666667, - "normalized_score": 6.967968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29105718085106386, - "normalized_score": 21.228575650118206 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-1.5b_V0.2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 1.1413996689755055 - } - }, - { - "id": "Triangle104/Minerva-10b_bfloat16_ee678589020031634d02b020767d821dd5b10876_False", - "model": { - "name": "Triangle104/Minerva-10b", - "sha": "ee678589020031634d02b020767d821dd5b10876", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.977971126497692, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1878715142488597, - "normalized_score": 18.78715142488597 - }, - "bbh": { - "name": "BBH", - "value": 0.4462036157096501, - "normalized_score": 22.69248262502312 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.36270833333333335, - "normalized_score": 5.605208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23179853723404256, - "normalized_score": 14.644281914893616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-10b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.067, - "co2_cost": 1.561337791741095 - } - }, - { - "id": "Triangle104/Minerva-14b_bfloat16_4eed798e0edb7869fadf60f254abdad75825b348_False", - "model": { - "name": "Triangle104/Minerva-14b", - "sha": "4eed798e0edb7869fadf60f254abdad75825b348", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.40842006449074, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3467898509288687, - "normalized_score": 34.678985092886876 - }, - "bbh": { - "name": "BBH", - "value": 0.6300829439447851, - "normalized_score": 47.062224551633484 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.476625, - "normalized_score": 19.04479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5193650265957447, - "normalized_score": 46.596114066193856 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-14b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.0386771852521433 - } - }, - { - "id": "Triangle104/Minerva-14b-V0.1_bfloat16_5c4ed9ada62c298fba8705132d69382b8cf6ec6c_False", - "model": { - "name": "Triangle104/Minerva-14b-V0.1", - "sha": "5c4ed9ada62c298fba8705132d69382b8cf6ec6c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.042982226988133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0861292481726264, - "normalized_score": 8.612924817262641 - }, - "bbh": { - "name": "BBH", - "value": 0.6089792638423274, - "normalized_score": 43.620098523083904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36577181208053694, - "normalized_score": 15.436241610738257 - }, - "musr": { - "name": "MUSR", - "value": 0.47002083333333333, - "normalized_score": 18.31927083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5118018617021277, - "normalized_score": 45.755762411347526 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-14b-V0.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 2.8097686346555895 - } - }, - { - "id": "Triangle104/Minerva-7b_float16_00fd640525d0f802968a01336d0770e4243c9d5b_False", - "model": { - "name": "Triangle104/Minerva-7b", - "sha": "00fd640525d0f802968a01336d0770e4243c9d5b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.501050696755055, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3724196243744376, - "normalized_score": 37.24196243744376 - }, - "bbh": { - "name": "BBH", - "value": 0.5498400501314606, - "normalized_score": 36.0758653251849 - }, - "math": { - "name": "MATH Level 5", - "value": 0.283987915407855, - "normalized_score": 28.3987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4143333333333333, - "normalized_score": 9.291666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44439827127659576, - "normalized_score": 38.26647458628841 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.1637015534784922 - } - }, - { - "id": "Triangle104/Minerva-8b_bfloat16_be3d362c6b6c491d780a774e3b8f18ba8a36c679_False", - "model": { - "name": "Triangle104/Minerva-8b", - "sha": "be3d362c6b6c491d780a774e3b8f18ba8a36c679", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.317747008078769, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17208451353519771, - "normalized_score": 17.20845135351977 - }, - "bbh": { - "name": "BBH", - "value": 0.46686093526780637, - "normalized_score": 25.37530670020226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4272916666666667, - "normalized_score": 11.378124999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30892619680851063, - "normalized_score": 23.214021867612292 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Minerva-8b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.248, - "co2_cost": 1.4768092091178562 - } - }, - { - "id": "Triangle104/Mistral-Redemption-Arc_float16_c4b74ad0240cd1c30eebf06f078483c9c5c576e5_False", - "model": { - "name": "Triangle104/Mistral-Redemption-Arc", - "sha": "c4b74ad0240cd1c30eebf06f078483c9c5c576e5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 32.78693047639915, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40289432040319684, - "normalized_score": 40.28943204031969 - }, - "bbh": { - "name": "BBH", - "value": 0.6254876729064861, - "normalized_score": 46.276528672084225 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.45951041666666664, - "normalized_score": 17.172135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4509640957446808, - "normalized_score": 38.99601063829786 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "Triangle104/Mistral-Redemption-Arc (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 23.572, - "co2_cost": 1.593036275501428 - } - }, - { - "id": "Triangle104/Mistral-Small-24b-Harmony_float16_2ef93029bdda2058b5ade068e20a76028bd0b87d_False", - "model": { - "name": "Triangle104/Mistral-Small-24b-Harmony", - "sha": "2ef93029bdda2058b5ade068e20a76028bd0b87d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.168346239791443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16871234989826994, - "normalized_score": 16.871234989826995 - }, - "bbh": { - "name": "BBH", - "value": 0.6433732705921861, - "normalized_score": 48.421150596785026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19108761329305135, - "normalized_score": 19.108761329305135 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.4276041666666666, - "normalized_score": 11.483854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5430518617021277, - "normalized_score": 49.22798463356975 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 1, - "base_model": "Triangle104/Mistral-Small-24b-Harmony (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.4399582166958405 - } - }, - { - "id": "Triangle104/Pans_Gutenbergum_V0.1_float16_87017852514d3f12ced5e44a670ee00e0fd00124_False", - "model": { - "name": "Triangle104/Pans_Gutenbergum_V0.1", - "sha": "87017852514d3f12ced5e44a670ee00e0fd00124", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.27617513692708, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.309696050922663, - "normalized_score": 30.9696050922663 - }, - "bbh": { - "name": "BBH", - "value": 0.5541091780465247, - "normalized_score": 36.082449137821364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4528125, - "normalized_score": 16.33489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3696808510638298, - "normalized_score": 29.964539007092196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Pans_Gutenbergum_V0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.684474305877838 - } - }, - { - "id": "Triangle104/Pans_Gutenbergum_V0.2_float16_d40fa1df61d99c02abc6d242d93be26a2a457aee_False", - "model": { - "name": "Triangle104/Pans_Gutenbergum_V0.2", - "sha": "d40fa1df61d99c02abc6d242d93be26a2a457aee", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.71531814911191, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3215113676157041, - "normalized_score": 32.15113676157041 - }, - "bbh": { - "name": "BBH", - "value": 0.55257930562769, - "normalized_score": 35.91445940350976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.46732291666666664, - "normalized_score": 18.348697916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3585438829787234, - "normalized_score": 28.727098108747047 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Pans_Gutenbergum_V0.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.681225758481351 - } - }, - { - "id": "Triangle104/Pantheon_ChatWaifu_V0.2_float16_27714543a8cbeedc8dba50651f40b5170ae65000_False", - "model": { - "name": "Triangle104/Pantheon_ChatWaifu_V0.2", - "sha": "27714543a8cbeedc8dba50651f40b5170ae65000", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.845639319611607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2682803849341968, - "normalized_score": 26.828038493419683 - }, - "bbh": { - "name": "BBH", - "value": 0.5531574435698693, - "normalized_score": 36.74319895362526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.47551041666666666, - "normalized_score": 19.638802083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34424867021276595, - "normalized_score": 27.138741134751772 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Pantheon_ChatWaifu_V0.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.7744480161185983 - } - }, - { - "id": "Triangle104/Phi-4-AbliteratedRP_float16_31fd2f1f6f0b95b5c695e9601344f24202e962c6_False", - "model": { - "name": "Triangle104/Phi-4-AbliteratedRP", - "sha": "31fd2f1f6f0b95b5c695e9601344f24202e962c6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 37.374976244999196, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49227050891634194, - "normalized_score": 49.2270508916342 - }, - "bbh": { - "name": "BBH", - "value": 0.6708776140201277, - "normalized_score": 52.64096935250796 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3074018126888218, - "normalized_score": 30.74018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.5098333333333334, - "normalized_score": 24.429166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.530751329787234, - "normalized_score": 47.86125886524823 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Phi-4-AbliteratedRP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.66, - "co2_cost": 1.7394770105593755 - } - }, - { - "id": "Triangle104/Phi4-RP-o1_bfloat16_303b720420c30329ced0aca2a217cca97da405d1_False", - "model": { - "name": "Triangle104/Phi4-RP-o1", - "sha": "303b720420c30329ced0aca2a217cca97da405d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.809089491248006, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.022007163215822904, - "normalized_score": 2.2007163215822905 - }, - "bbh": { - "name": "BBH", - "value": 0.6652563961373095, - "normalized_score": 51.5939185267128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3776435045317221, - "normalized_score": 37.764350453172206 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.4755729166666667, - "normalized_score": 19.17994791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5110538563829787, - "normalized_score": 45.67265070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Phi4-RP-o1 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8639654010673399 - } - }, - { - "id": "Triangle104/Phi4-RP-o1-Ablit_bfloat16_28b977c7d255b9c504a63d7120b43c5638ac512a_False", - "model": { - "name": "Triangle104/Phi4-RP-o1-Ablit", - "sha": "28b977c7d255b9c504a63d7120b43c5638ac512a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.67748592577745, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.02385559205131274, - "normalized_score": 2.385559205131274 - }, - "bbh": { - "name": "BBH", - "value": 0.6629825730619672, - "normalized_score": 51.22184133516779 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38821752265861026, - "normalized_score": 38.82175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.47541666666666665, - "normalized_score": 18.927083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5104720744680851, - "normalized_score": 45.60800827423168 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "Triangle104/Phi4-RP-o1-Ablit (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9109735397136868 - } - }, - { - "id": "Triangle104/Porpoise-R1-Llama3.2-3b_bfloat16_dedf20a6549763640bc4c35cba354351cb6cce8c_False", - "model": { - "name": "Triangle104/Porpoise-R1-Llama3.2-3b", - "sha": "dedf20a6549763640bc4c35cba354351cb6cce8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.626836330964721, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4352174452674459, - "normalized_score": 43.52174452674459 - }, - "bbh": { - "name": "BBH", - "value": 0.38236758004585686, - "normalized_score": 12.926570818614183 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.357625, - "normalized_score": 6.436458333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21168550531914893, - "normalized_score": 12.409500591016547 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "Triangle104/Porpoise-R1-Llama3.2-3b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.5946285747737013 - } - }, - { - "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony_float16_82520f6d9a6a8f3d788a98e7009c22398985675d_False", - "model": { - "name": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", - "sha": "82520f6d9a6a8f3d788a98e7009c22398985675d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.73826910663008, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5986327389105351, - "normalized_score": 59.86327389105351 - }, - "bbh": { - "name": "BBH", - "value": 0.6338808682301471, - "normalized_score": 47.259249197406746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3768882175226586, - "normalized_score": 37.68882175226586 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.4795416666666667, - "normalized_score": 19.676041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5074800531914894, - "normalized_score": 45.27556146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "Triangle104/Q2.5-14B-Instruct-1M-Harmony (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.7730342245263926 - } - }, - { - "id": "Triangle104/Q2.5-AthensCOT_bfloat16_6eaa16d09a4971ed9ad4371aada365caef30d38d_False", - "model": { - "name": "Triangle104/Q2.5-AthensCOT", - "sha": "6eaa16d09a4971ed9ad4371aada365caef30d38d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.558050099748765, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45727447616767947, - "normalized_score": 45.72744761676795 - }, - "bbh": { - "name": "BBH", - "value": 0.5541692533534606, - "normalized_score": 36.446691868053854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4578333333333333, - "normalized_score": 15.762500000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4379155585106383, - "normalized_score": 37.54617316784869 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Q2.5-AthensCOT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2770496852895648 - } - }, - { - "id": "Triangle104/Q2.5-CodeR1-3B_bfloat16_4ef3c55b383e21eedbe5e7094d12158a47c7f88e_False", - "model": { - "name": "Triangle104/Q2.5-CodeR1-3B", - "sha": "4ef3c55b383e21eedbe5e7094d12158a47c7f88e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.81078532589114, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35875587884590665, - "normalized_score": 35.875587884590665 - }, - "bbh": { - "name": "BBH", - "value": 0.4660844324968853, - "normalized_score": 25.312035348871195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.43154166666666666, - "normalized_score": 12.142708333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2978723404255319, - "normalized_score": 21.98581560283688 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "Triangle104/Q2.5-CodeR1-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.085, - "co2_cost": 0.7322141137207586 - } - }, - { - "id": "Triangle104/Q2.5-EVACOT-7b_bfloat16_ed18ec729937b992ee9c9e0992d87d137b604f9c_False", - "model": { - "name": "Triangle104/Q2.5-EVACOT-7b", - "sha": "ed18ec729937b992ee9c9e0992d87d137b604f9c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.447241223161303, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5784241368457914, - "normalized_score": 57.842413684579135 - }, - "bbh": { - "name": "BBH", - "value": 0.5505524946794311, - "normalized_score": 35.722592826715 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2824773413897281, - "normalized_score": 28.247734138972806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4498645833333333, - "normalized_score": 14.799739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43309507978723405, - "normalized_score": 37.01056442080379 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Q2.5-EVACOT-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3098282095178113 - } - }, - { - "id": "Triangle104/Q2.5-EvaHumane-RP_bfloat16_e303ea41606c02d5f434e88bddde43ba96586891_False", - "model": { - "name": "Triangle104/Q2.5-EvaHumane-RP", - "sha": "e303ea41606c02d5f434e88bddde43ba96586891", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.370822890124625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3676234613048932, - "normalized_score": 36.76234613048933 - }, - "bbh": { - "name": "BBH", - "value": 0.5328196297646768, - "normalized_score": 33.757405524368195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29229607250755285, - "normalized_score": 29.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.42763541666666666, - "normalized_score": 11.387760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4412400265957447, - "normalized_score": 37.9155585106383 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Q2.5-EvaHumane-RP (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.2574226662136654 - } - }, - { - "id": "Triangle104/Q2.5-Humane-RP_bfloat16_819395b7cc4fc157d8f322432580a5c3defebd63_False", - "model": { - "name": "Triangle104/Q2.5-Humane-RP", - "sha": "819395b7cc4fc157d8f322432580a5c3defebd63", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.83187981671115, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4411627814199657, - "normalized_score": 44.116278141996574 - }, - "bbh": { - "name": "BBH", - "value": 0.5649289292164736, - "normalized_score": 37.653375376825046 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3391238670694864, - "normalized_score": 33.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4528125, - "normalized_score": 15.334895833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44921875, - "normalized_score": 38.802083333333336 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Q2.5-Humane-RP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.254656556269786 - } - }, - { - "id": "Triangle104/Q2.5-Instruct-1M_Harmony_float16_efda8174f8b2134ca9ee1490f5bb73917845e24c_False", - "model": { - "name": "Triangle104/Q2.5-Instruct-1M_Harmony", - "sha": "efda8174f8b2134ca9ee1490f5bb73917845e24c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.07867587170479, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6038034636985421, - "normalized_score": 60.38034636985421 - }, - "bbh": { - "name": "BBH", - "value": 0.5373243549676157, - "normalized_score": 33.63146155668904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3323262839879154, - "normalized_score": 33.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.46878125, - "normalized_score": 18.097656249999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43658577127659576, - "normalized_score": 37.39841903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "Triangle104/Q2.5-Instruct-1M_Harmony (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2756625564987696 - } - }, - { - "id": "Triangle104/Q2.5-R1-3B_bfloat16_cd4b8cb325e4e126f62e1279fd2281167b9228ef_False", - "model": { - "name": "Triangle104/Q2.5-R1-3B", - "sha": "cd4b8cb325e4e126f62e1279fd2281167b9228ef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.667669563202328, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4213542290012722, - "normalized_score": 42.13542290012722 - }, - "bbh": { - "name": "BBH", - "value": 0.48124304786769817, - "normalized_score": 27.203483032816802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2673716012084592, - "normalized_score": 26.73716012084592 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.43197916666666664, - "normalized_score": 12.730729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38131648936170215, - "normalized_score": 31.257387706855795 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "Triangle104/Q2.5-R1-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.085, - "co2_cost": 0.7877770482953435 - } - }, - { - "id": "Triangle104/Q2.5-R1-7B_float16_2579a9c6b7c04b92d07b00d467eb56708e971a98_False", - "model": { - "name": "Triangle104/Q2.5-R1-7B", - "sha": "2579a9c6b7c04b92d07b00d467eb56708e971a98", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.783468006938507, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1346150436397647, - "normalized_score": 13.461504363976472 - }, - "bbh": { - "name": "BBH", - "value": 0.30065625818799685, - "normalized_score": 2.548887396663384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3607291666666666, - "normalized_score": 2.6911458333333322 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1180186170212766, - "normalized_score": 2.002068557919621 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Triangle104/Q2.5-R1-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.7005417943365251 - } - }, - { - "id": "Triangle104/Robo-Gutenberg_V1.0_float16_9162806dd52c7cf1b7bb9798cc4176460859ce88_False", - "model": { - "name": "Triangle104/Robo-Gutenberg_V1.0", - "sha": "9162806dd52c7cf1b7bb9798cc4176460859ce88", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.348593527637, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6007559940956662, - "normalized_score": 60.075599409566614 - }, - "bbh": { - "name": "BBH", - "value": 0.653716560941194, - "normalized_score": 50.286291267237225 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4561933534743202, - "normalized_score": 45.61933534743202 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.47436458333333337, - "normalized_score": 19.195572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5391456117021277, - "normalized_score": 48.79395685579197 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Robo-Gutenberg_V1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.8898383424955503 - } - }, - { - "id": "Triangle104/Rocinante-Prism_V2.0_bfloat16_1d6764b9feeba5794bfa1f89ab931648ad9dc3fa_False", - "model": { - "name": "Triangle104/Rocinante-Prism_V2.0", - "sha": "1d6764b9feeba5794bfa1f89ab931648ad9dc3fa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.774918840644446, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2616103051015749, - "normalized_score": 26.16103051015749 - }, - "bbh": { - "name": "BBH", - "value": 0.5361246041982355, - "normalized_score": 33.22820642176426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.445, - "normalized_score": 15.424999999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3640292553191489, - "normalized_score": 29.336583924349874 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-02", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "Triangle104/Rocinante-Prism_V2.0 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.6493639223611307 - } - }, - { - "id": "Triangle104/Rocinante-Prism_V2.1_bfloat16_775836a90e03521f2f0a8850ba4862812678299f_False", - "model": { - "name": "Triangle104/Rocinante-Prism_V2.1", - "sha": "775836a90e03521f2f0a8850ba4862812678299f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.79146935600846, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25584005992987496, - "normalized_score": 25.5840059929875 - }, - "bbh": { - "name": "BBH", - "value": 0.5332676401860506, - "normalized_score": 32.982522799498575 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.44896874999999997, - "normalized_score": 16.187760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3651097074468085, - "normalized_score": 29.4566341607565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.7031501654301848 - } - }, - { - "id": "Triangle104/RomboHermes3-R1-Llama3.2-3b_float16_0725171bdc64ac36851ae22eb6e5e8242fb9b8c6_False", - "model": { - "name": "Triangle104/RomboHermes3-R1-Llama3.2-3b", - "sha": "0725171bdc64ac36851ae22eb6e5e8242fb9b8c6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.65828137567246, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.300728733094855, - "normalized_score": 30.072873309485498 - }, - "bbh": { - "name": "BBH", - "value": 0.42639466274987187, - "normalized_score": 19.092695935991667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.36565625, - "normalized_score": 4.40703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2957114361702128, - "normalized_score": 21.74571513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "Triangle104/RomboHermes3-R1-Llama3.2-3b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.5959179312218184 - } - }, - { - "id": "Triangle104/Rombos-Novasky-7B_V1c_float16_77fb1ba91d47b6caf9bbfb9c6f8d7cb591889884_False", - "model": { - "name": "Triangle104/Rombos-Novasky-7B_V1c", - "sha": "77fb1ba91d47b6caf9bbfb9c6f8d7cb591889884", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.209818018566242, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40801517750679306, - "normalized_score": 40.80151775067931 - }, - "bbh": { - "name": "BBH", - "value": 0.4349247829177707, - "normalized_score": 20.42212501104689 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.44645833333333335, - "normalized_score": 14.040624999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27376994680851063, - "normalized_score": 19.307771867612292 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-20", - "generation": 1, - "base_model": "Triangle104/Rombos-Novasky-7B_V1c (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6720768673852975 - } - }, - { - "id": "Triangle104/Set-70b_bfloat16_206b32829e4fcb65306bbb60370735f97cef6e4d_False", - "model": { - "name": "Triangle104/Set-70b", - "sha": "206b32829e4fcb65306bbb60370735f97cef6e4d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 44.03469176472607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7642954028643998, - "normalized_score": 76.42954028643999 - }, - "bbh": { - "name": "BBH", - "value": 0.70142939330013, - "normalized_score": 56.88003115055037 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3640483383685801, - "normalized_score": 36.40483383685801 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4463087248322148, - "normalized_score": 26.174496644295303 - }, - "musr": { - "name": "MUSR", - "value": 0.46956250000000005, - "normalized_score": 18.961979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5442154255319149, - "normalized_score": 49.3572695035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "Triangle104/Set-70b (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 28.620895805508525 - } - }, - { - "id": "Tsunami-th/Tsunami-0.5-7B-Instruct_bfloat16_10706336513d54c4e8962f54653f25941c4031f4_True", - "model": { - "name": "Tsunami-th/Tsunami-0.5-7B-Instruct", - "sha": "10706336513d54c4e8962f54653f25941c4031f4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.42709650938436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7400153814102137, - "normalized_score": 74.00153814102137 - }, - "bbh": { - "name": "BBH", - "value": 0.552369427738073, - "normalized_score": 36.13825418700338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5045317220543807, - "normalized_score": 50.453172205438065 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.42571875, - "normalized_score": 12.214843750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44132313829787234, - "normalized_score": 37.92479314420804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "Tsunami-th/Tsunami-0.5-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.1801055943223924 - } - }, - { - "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct_bfloat16_83d048ab565893a660fa7eaeb4a749d360c76b53_True", - "model": { - "name": "Tsunami-th/Tsunami-0.5x-7B-Instruct", - "sha": "83d048ab565893a660fa7eaeb4a749d360c76b53", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.004746535208355, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.709915247099917, - "normalized_score": 70.99152470999172 - }, - "bbh": { - "name": "BBH", - "value": 0.5592865858560252, - "normalized_score": 37.36306059795168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4206948640483384, - "normalized_score": 42.06948640483384 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.46667708333333335, - "normalized_score": 18.567968749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44581117021276595, - "normalized_score": 38.423463356974 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "Tsunami-th/Tsunami-0.5x-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.1171267987107916 - } - }, - { - "id": "Tsunami-th/Tsunami-1.0-14B-Instruct_bfloat16_b468814b5242acbe6294226db71bc19dead6c8b6_True", - "model": { - "name": "Tsunami-th/Tsunami-1.0-14B-Instruct", - "sha": "b468814b5242acbe6294226db71bc19dead6c8b6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.840045325237675, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7829049145157072, - "normalized_score": 78.29049145157072 - }, - "bbh": { - "name": "BBH", - "value": 0.6438763263011559, - "normalized_score": 49.150255113097735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45845921450151056, - "normalized_score": 45.84592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.44593750000000004, - "normalized_score": 16.342187499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5248503989361702, - "normalized_score": 47.205599881796694 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "Tsunami-th/Tsunami-1.0-14B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.307262617446699 - } - }, - { - "id": "Tsunami-th/Tsunami-1.0-7B-Instruct_bfloat16_34d0f8da8ce6b0de50a269eef622ff2e93e5c059_True", - "model": { - "name": "Tsunami-th/Tsunami-1.0-7B-Instruct", - "sha": "34d0f8da8ce6b0de50a269eef622ff2e93e5c059", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.74871261762576, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.730872972601586, - "normalized_score": 73.0872972601586 - }, - "bbh": { - "name": "BBH", - "value": 0.549071195618326, - "normalized_score": 35.85724274977317 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4335347432024169, - "normalized_score": 43.353474320241695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.44928125, - "normalized_score": 15.760156249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4424035904255319, - "normalized_score": 38.04484338061466 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "Tsunami-th/Tsunami-1.0-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.9684366489643583 - } - }, - { - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1_bfloat16_33cfd6919f22efc38f71e9d21a7e697afb418e6b_True", - "model": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", - "sha": "33cfd6919f22efc38f71e9d21a7e697afb418e6b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.586154586899323, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.308221075634871, - "normalized_score": 30.8221075634871 - }, - "bbh": { - "name": "BBH", - "value": 0.5968934762705508, - "normalized_score": 41.80922962006354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.4099375, - "normalized_score": 10.075520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39070811170212766, - "normalized_score": 32.300901300236404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", - "hub_license": "gemma", - "hub_hearts": 3, - "params_billions": 9.242, - "co2_cost": 5.8853164770339355 - } - }, - { - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2_bfloat16_b7590721d92bf6e0606e3dbc1ca2c229b7c534b4_True", - "model": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", - "sha": "b7590721d92bf6e0606e3dbc1ca2c229b7c534b4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.563072986134326, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3100196367859502, - "normalized_score": 31.00196367859502 - }, - "bbh": { - "name": "BBH", - "value": 0.5989880877421281, - "normalized_score": 42.16983380174582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4139375, - "normalized_score": 10.942187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.386968085106383, - "normalized_score": 31.885342789598102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", - "hub_license": "gemma", - "hub_hearts": 3, - "params_billions": 9.242, - "co2_cost": 5.432923596852596 - } - }, - { - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3_bfloat16_2261f2a03b2e15de13a18da52590c237ecf5f188_True", - "model": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", - "sha": "2261f2a03b2e15de13a18da52590c237ecf5f188", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.65046295673854, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31671409637539505, - "normalized_score": 31.671409637539504 - }, - "bbh": { - "name": "BBH", - "value": 0.6007080229268026, - "normalized_score": 42.53675224107426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.41660416666666666, - "normalized_score": 11.342187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.382563164893617, - "normalized_score": 31.395907210401887 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", - "hub_license": "gemma", - "hub_hearts": 122, - "params_billions": 9.242, - "co2_cost": 5.630300298977712 - } - }, - { - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1_bfloat16_2076437f65776aeb9686c95f1f41515f70c4db27_True", - "model": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", - "sha": "2076437f65776aeb9686c95f1f41515f70c4db27", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.76595847369129, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7298988904994304, - "normalized_score": 72.98988904994303 - }, - "bbh": { - "name": "BBH", - "value": 0.5057890691082708, - "normalized_score": 29.489353188071963 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3567916666666666, - "normalized_score": 2.165624999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37109375, - "normalized_score": 30.12152777777778 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-25", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4024578249460675 - } - }, - { - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2_bfloat16_730c7207d4b538feeb3c2e6d6f6a6ba8615a9be3_True", - "model": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", - "sha": "730c7207d4b538feeb3c2e6d6f6a6ba8615a9be3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.040942547288513, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6988745417713889, - "normalized_score": 69.88745417713888 - }, - "bbh": { - "name": "BBH", - "value": 0.5088696278852957, - "normalized_score": 29.86944932809155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.35942708333333334, - "normalized_score": 1.995052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36918218085106386, - "normalized_score": 29.909131205673756 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-25", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.3135334475254834 - } - }, - { - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3_bfloat16_f73dafc2923acd56f115f21f76e9d14f8d19a63e_True", - "model": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "sha": "f73dafc2923acd56f115f21f76e9d14f8d19a63e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.693396308174695, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6834122350917787, - "normalized_score": 68.34122350917787 - }, - "bbh": { - "name": "BBH", - "value": 0.50795799761689, - "normalized_score": 29.739683580440694 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.36606249999999996, - "normalized_score": 3.0911458333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3644448138297872, - "normalized_score": 29.38275709219858 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-25", - "submission_date": "2024-07-02", - "generation": 0, - "base_model": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "hub_license": "apache-2.0", - "hub_hearts": 82, - "params_billions": 8.03, - "co2_cost": 9.13529851667579 - } - }, - { - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3_float16_f73dafc2923acd56f115f21f76e9d14f8d19a63e_True", - "model": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "sha": "f73dafc2923acd56f115f21f76e9d14f8d19a63e", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.05947024187678, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.67029814226253, - "normalized_score": 67.02981422625301 - }, - "bbh": { - "name": "BBH", - "value": 0.5076407742830437, - "normalized_score": 29.71670075746525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3647291666666667, - "normalized_score": 2.891145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3657746010638298, - "normalized_score": 29.53051122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-25", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "hub_license": "apache-2.0", - "hub_hearts": 82, - "params_billions": 8.03, - "co2_cost": 0.9104745048401183 - } - }, - { - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO_bfloat16_abdc173603690fcf6b333b351c291a321d2631c3_True", - "model": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO", - "sha": "abdc173603690fcf6b333b351c291a321d2631c3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.44469675653773, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43549227161708715, - "normalized_score": 43.549227161708714 - }, - "bbh": { - "name": "BBH", - "value": 0.4438979817093698, - "normalized_score": 22.08465647856181 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.39647916666666666, - "normalized_score": 7.793229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26205119680851063, - "normalized_score": 18.005688534278956 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "UCLA-AGI/Mistral7B-PairRM-SPPO", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.242, - "co2_cost": 1.0083177982388298 - } - }, - { - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1_bfloat16_97252e2d868725b2fa5055adc241c5182610fb6a_True", - "model": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", - "sha": "97252e2d868725b2fa5055adc241c5182610fb6a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.91774579525423, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5047352136774869, - "normalized_score": 50.47352136774869 - }, - "bbh": { - "name": "BBH", - "value": 0.4468056921650662, - "normalized_score": 22.93229237099634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3991770833333333, - "normalized_score": 8.29713541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26953125, - "normalized_score": 18.836805555555554 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.0533470821859574 - } - }, - { - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2_bfloat16_8201064df67b5762ff9f361ff1b98aae3747855c_True", - "model": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", - "sha": "8201064df67b5762ff9f361ff1b98aae3747855c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.118139500757714, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4445848127413041, - "normalized_score": 44.45848127413042 - }, - "bbh": { - "name": "BBH", - "value": 0.4465719945610438, - "normalized_score": 22.479924250197836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.40854166666666664, - "normalized_score": 9.801041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2677027925531915, - "normalized_score": 18.633643617021278 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.0309688884634434 - } - }, - { - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3_bfloat16_72cd8e5435ae679249ddad7ac4cdb64c5b4590c3_True", - "model": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", - "sha": "72cd8e5435ae679249ddad7ac4cdb64c5b4590c3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.488657432502865, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4350678422142138, - "normalized_score": 43.506784221421384 - }, - "bbh": { - "name": "BBH", - "value": 0.4396587862984616, - "normalized_score": 21.817495985928634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.40711458333333334, - "normalized_score": 9.489322916666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2657912234042553, - "normalized_score": 18.42124704491726 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.242, - "co2_cost": 1.0368151525905827 - } - }, - { - "id": "UKzExecution/LlamaExecutor-8B-3.0.5_bfloat16_2047978e8ab1146b8881cde3d998856594f437a4_True", - "model": { - "name": "UKzExecution/LlamaExecutor-8B-3.0.5", - "sha": "2047978e8ab1146b8881cde3d998856594f437a4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.541079195358947, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.740290207759855, - "normalized_score": 74.0290207759855 - }, - "bbh": { - "name": "BBH", - "value": 0.5006000507021341, - "normalized_score": 28.41381524085358 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3753645833333333, - "normalized_score": 4.653906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3625332446808511, - "normalized_score": 29.170360520094558 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-07-30", - "generation": 1, - "base_model": "UKzExecution/LlamaExecutor-8B-3.0.5 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.614003500601508 - } - }, - { - "id": "Unbabel/TowerInstruct-Mistral-7B-v0.2_float16_454bdfedc8b51f292a402aba2c560df145a0817d_False", - "model": { - "name": "Unbabel/TowerInstruct-Mistral-7B-v0.2", - "sha": "454bdfedc8b51f292a402aba2c560df145a0817d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.902717149315622, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2843422119975, - "normalized_score": 28.43422119975 - }, - "bbh": { - "name": "BBH", - "value": 0.388195180992626, - "normalized_score": 14.224326422972673 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4522291666666667, - "normalized_score": 15.961979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19680851063829788, - "normalized_score": 10.756501182033098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-26", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "Unbabel/TowerInstruct-Mistral-7B-v0.2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 16, - "params_billions": 7.242, - "co2_cost": 1.2077791222280865 - } - }, - { - "id": "Undi95/MG-FinalMix-72B_bfloat16_6c9c2f5d052495dcd49f44bf5623d21210653c65_True", - "model": { - "name": "Undi95/MG-FinalMix-72B", - "sha": "6c9c2f5d052495dcd49f44bf5623d21210653c65", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.29736213992491, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8013648231137825, - "normalized_score": 80.13648231137824 - }, - "bbh": { - "name": "BBH", - "value": 0.6973017446417747, - "normalized_score": 57.502411706281976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3972809667673716, - "normalized_score": 39.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.48227083333333337, - "normalized_score": 21.217187499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.542719414893617, - "normalized_score": 49.19104609929077 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-25", - "submission_date": "2024-07-13", - "generation": 1, - "base_model": "Undi95/MG-FinalMix-72B (Merge)", - "hub_license": "other", - "hub_hearts": 5, - "params_billions": 72.706, - "co2_cost": 24.44404705336021 - } - }, - { - "id": "Undi95/Phi4-abliterated_bfloat16_b960b130911eeb32fd728043652b9f9591821469_True", - "model": { - "name": "Undi95/Phi4-abliterated", - "sha": "b960b130911eeb32fd728043652b9f9591821469", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 37.42237137162351, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6617552538375954, - "normalized_score": 66.17552538375953 - }, - "bbh": { - "name": "BBH", - "value": 0.680902103041113, - "normalized_score": 54.117248360632516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37009063444108764, - "normalized_score": 37.00906344410876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4034270833333333, - "normalized_score": 8.92838541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.528091755319149, - "normalized_score": 47.56575059101655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-23", - "generation": 0, - "base_model": "Undi95/Phi4-abliterated", - "hub_license": "", - "hub_hearts": 11, - "params_billions": 14.66, - "co2_cost": 1.9100486024045213 - } - }, - { - "id": "V3N0M/Jenna-Tiny-2.0_float16_95c5e775b4a155110a5fac3e1cdd814dde93f220_False", - "model": { - "name": "V3N0M/Jenna-Tiny-2.0", - "sha": "95c5e775b4a155110a5fac3e1cdd814dde93f220", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.519126028133118, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2309361383351729, - "normalized_score": 23.09361383351729 - }, - "bbh": { - "name": "BBH", - "value": 0.31479264061817097, - "normalized_score": 4.829999905649889 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33666666666666667, - "normalized_score": 2.3499999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2025-01-17", - "generation": 0, - "base_model": "V3N0M/Jenna-Tiny-2.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.631, - "co2_cost": 0.4935997564668777 - } - }, - { - "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct_bfloat16_707cfd1a93875247c0223e0c7e3d86d58c432318_True", - "model": { - "name": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", - "sha": "707cfd1a93875247c0223e0c7e3d86d58c432318", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.00558783767682, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8044621604010691, - "normalized_score": 80.44621604010692 - }, - "bbh": { - "name": "BBH", - "value": 0.6663247245334951, - "normalized_score": 52.029579759117915 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2280966767371601, - "normalized_score": 22.80966767371601 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.43393750000000003, - "normalized_score": 13.542187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5392287234042553, - "normalized_score": 48.8031914893617 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-24", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", - "hub_license": "other", - "hub_hearts": 23, - "params_billions": 70.554, - "co2_cost": 21.252381632304154 - } - }, - { - "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct_bfloat16_37127c44d7c0fb56cef817270c4b1a6802d8793a_True", - "model": { - "name": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", - "sha": "37127c44d7c0fb56cef817270c4b1a6802d8793a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.66765472658618, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.744536718130117, - "normalized_score": 74.45367181301171 - }, - "bbh": { - "name": "BBH", - "value": 0.494337579362695, - "normalized_score": 28.0492424520597 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.42410416666666667, - "normalized_score": 11.2796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3857214095744681, - "normalized_score": 31.746823286052013 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-19", - "submission_date": "2024-07-22", - "generation": 0, - "base_model": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", - "hub_license": "other", - "hub_hearts": 53, - "params_billions": 8.03, - "co2_cost": 1.5913870564135353 - } - }, - { - "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct_bfloat16_e8e74aa789243c25a3a8f7565780a402f5050bbb_True", - "model": { - "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", - "sha": "e8e74aa789243c25a3a8f7565780a402f5050bbb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.413769840000015, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8656365111238181, - "normalized_score": 86.56365111238182 - }, - "bbh": { - "name": "BBH", - "value": 0.7006249194404001, - "normalized_score": 57.24162100868165 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3693353474320242, - "normalized_score": 36.933534743202415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.4710833333333333, - "normalized_score": 19.38541666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5334940159574468, - "normalized_score": 48.16600177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-08-26", - "generation": 0, - "base_model": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", - "hub_license": "llama3.1", - "hub_hearts": 21, - "params_billions": 70.554, - "co2_cost": 30.18323458840754 - } - }, - { - "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct_bfloat16_23ca79966a4ab0a61f7ccc7a0454ffef553b66eb_True", - "model": { - "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", - "sha": "23ca79966a4ab0a61f7ccc7a0454ffef553b66eb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.931073085077475, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8017393848322452, - "normalized_score": 80.1739384832245 - }, - "bbh": { - "name": "BBH", - "value": 0.5114932190011187, - "normalized_score": 30.999360665356374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19410876132930513, - "normalized_score": 19.410876132930515 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4148020833333333, - "normalized_score": 11.51692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3890458776595745, - "normalized_score": 32.116208628841605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", - "hub_license": "llama3.1", - "hub_hearts": 32, - "params_billions": 8.03, - "co2_cost": 2.4988715708140576 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-1.5b_bfloat16_8f5170f03e6b0355dd920adc3a7e65d0417ee14e_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-1.5b", - "sha": "8f5170f03e6b0355dd920adc3a7e65d0417ee14e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.273562653094961, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24040324117785256, - "normalized_score": 24.040324117785254 - }, - "bbh": { - "name": "BBH", - "value": 0.3703912164863146, - "normalized_score": 13.419518424915282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.37390625000000005, - "normalized_score": 4.971614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21509308510638298, - "normalized_score": 12.788120567375886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-12", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-1.5b", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 1.544, - "co2_cost": 1.5524737369802986 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-7b-HerO_bfloat16_3a14b437e2f375b74de3b6923e171662133347bb_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-7b-HerO", - "sha": "3a14b437e2f375b74de3b6923e171662133347bb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.66931174717597, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.534610389322553, - "normalized_score": 53.461038932255306 - }, - "bbh": { - "name": "BBH", - "value": 0.49044349935812964, - "normalized_score": 27.991873536830212 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.39238541666666665, - "normalized_score": 6.881510416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30460438829787234, - "normalized_score": 22.733820921985814 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-11-24", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-7b-HerO", - "hub_license": "apache-2.0", - "hub_hearts": 32, - "params_billions": 7.242, - "co2_cost": 1.139537237641185 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat_bfloat16_cb759636a3d5b0768df2f43a3d3da9b17e10e7b9_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-7b-LaserChat", - "sha": "cb759636a3d5b0768df2f43a3d3da9b17e10e7b9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.14731649128105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5987823419637672, - "normalized_score": 59.87823419637673 - }, - "bbh": { - "name": "BBH", - "value": 0.45432707993295685, - "normalized_score": 22.99208011647468 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4148020833333333, - "normalized_score": 9.916927083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3304521276595745, - "normalized_score": 25.60579196217494 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-05", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-7b-LaserChat", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 7.242, - "co2_cost": 1.2093577226572743 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-Gemma-2b_bfloat16_f9d5575c23da96f33ce77dea3b0776746b9469bc_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-Gemma-2b", - "sha": "f9d5575c23da96f33ce77dea3b0776746b9469bc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 7.716094956089698, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24752213017017072, - "normalized_score": 24.75221301701707 - }, - "bbh": { - "name": "BBH", - "value": 0.3416315376053174, - "normalized_score": 9.133870459903902 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3675833333333333, - "normalized_score": 3.514583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14685837765957446, - "normalized_score": 5.2064864066193834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-06", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-Gemma-2b", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 2.506, - "co2_cost": 1.832470509424899 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-Gemma-7b_bfloat16_4296bdabf82e900235b094e5348be03ebb0ec891_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-Gemma-7b", - "sha": "4296bdabf82e900235b094e5348be03ebb0ec891", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 14.801979415385233, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3406705319662939, - "normalized_score": 34.06705319662939 - }, - "bbh": { - "name": "BBH", - "value": 0.41879127895858687, - "normalized_score": 18.492651800030927 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.35942708333333334, - "normalized_score": 2.9283854166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2961269946808511, - "normalized_score": 21.79188829787234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-27", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-Gemma-7b", - "hub_license": "other", - "hub_hearts": 13, - "params_billions": 8.538, - "co2_cost": 3.026498864574828 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct_bfloat16_30ed549de7d84f68b4c6cb619f73275c99af23cc_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", - "sha": "30ed549de7d84f68b4c6cb619f73275c99af23cc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.487466800390596, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5601891869129465, - "normalized_score": 56.01891869129465 - }, - "bbh": { - "name": "BBH", - "value": 0.5277342269858817, - "normalized_score": 33.94516253986293 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.42041666666666666, - "normalized_score": 11.318750000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3650265957446808, - "normalized_score": 29.44739952718675 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-15", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 22, - "params_billions": 46.703, - "co2_cost": 7.548780388067429 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct_bfloat16_fcb056465084ab2c71503a0760f46e4be79c985c_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", - "sha": "fcb056465084ab2c71503a0760f46e4be79c985c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.219081516210622, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6112969144093228, - "normalized_score": 61.129691440932284 - }, - "bbh": { - "name": "BBH", - "value": 0.5214128647611115, - "normalized_score": 32.34378307249567 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4468958333333333, - "normalized_score": 17.161979166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33851396276595747, - "normalized_score": 26.501551418439718 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-22", - "submission_date": "2024-07-22", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 22, - "params_billions": 12.248, - "co2_cost": 2.761054538607155 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-Phi-3-medium_bfloat16_ebfed26a2b35ede15fe526f57029e0ad866ac66d_False", - "model": { - "name": "VAGOsolutions/SauerkrautLM-Phi-3-medium", - "sha": "ebfed26a2b35ede15fe526f57029e0ad866ac66d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 30.407915004992635, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4408879550703245, - "normalized_score": 44.08879550703245 - }, - "bbh": { - "name": "BBH", - "value": 0.6432931765847228, - "normalized_score": 49.630350331717615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4845, - "normalized_score": 20.69583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46650598404255317, - "normalized_score": 40.72288711583924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-09", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-Phi-3-medium", - "hub_license": "mit", - "hub_hearts": 9, - "params_billions": 13.96, - "co2_cost": 1.5654950210859602 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct_bfloat16_2665d7600ccd253728453433d2434844e6f702bd_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", - "sha": "2665d7600ccd253728453433d2434844e6f702bd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.221646172197712, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49172085621705963, - "normalized_score": 49.17208562170596 - }, - "bbh": { - "name": "BBH", - "value": 0.5169447300097646, - "normalized_score": 31.838919738784018 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3965416666666666, - "normalized_score": 8.334374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31831781914893614, - "normalized_score": 24.257535460992905 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-20", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 46, - "params_billions": 10.732, - "co2_cost": 1.6314605938212952 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it_bfloat16_7fd35fcb32aebfc422e535739161d7528fc562d5_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", - "sha": "7fd35fcb32aebfc422e535739161d7528fc562d5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 10.817668945100195, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13206625088099574, - "normalized_score": 13.206625088099575 - }, - "bbh": { - "name": "BBH", - "value": 0.42408371860644856, - "normalized_score": 18.914195373183343 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3994583333333333, - "normalized_score": 8.765624999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.269281914893617, - "normalized_score": 18.809101654846337 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-08-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", - "hub_license": "gemma", - "hub_hearts": 10, - "params_billions": 2.614, - "co2_cost": 4.744972801225721 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it_bfloat16_8e02fc1c24e0499c74ee1186ddc46b989fe497f1_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", - "sha": "8e02fc1c24e0499c74ee1186ddc46b989fe497f1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.14181422578697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3024009627787604, - "normalized_score": 30.240096277876034 - }, - "bbh": { - "name": "BBH", - "value": 0.6072645787154746, - "normalized_score": 43.249988966600455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.43182291666666667, - "normalized_score": 12.344531250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40907579787234044, - "normalized_score": 34.34175531914893 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-08-26", - "generation": 0, - "base_model": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", - "hub_license": "gemma", - "hub_hearts": 7, - "params_billions": 9.242, - "co2_cost": 5.812136404481569 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO_bfloat16_1fbe5364bc443255a06df7fa0debbcc3d38ab866_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", - "sha": "1fbe5364bc443255a06df7fa0debbcc3d38ab866", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.583891756672784, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7411645544931892, - "normalized_score": 74.11645544931892 - }, - "bbh": { - "name": "BBH", - "value": 0.6560374350756156, - "normalized_score": 50.92613155208554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3164652567975831, - "normalized_score": 31.64652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.43746875, - "normalized_score": 13.783593750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.51171875, - "normalized_score": 45.74652777777778 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 14.77, - "co2_cost": 2.983250828721228 - } - }, - { - "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT_bfloat16_606ddc7819d4a5d9cd8618d5ede57e2bdd99a1ed_True", - "model": { - "name": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", - "sha": "606ddc7819d4a5d9cd8618d5ede57e2bdd99a1ed", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.22785641162885, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6948529900663573, - "normalized_score": 69.48529900663573 - }, - "bbh": { - "name": "BBH", - "value": 0.6210355880693049, - "normalized_score": 45.824351326219606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3285498489425982, - "normalized_score": 32.85498489425982 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.417875, - "normalized_score": 11.067708333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5205285904255319, - "normalized_score": 46.725398936170215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "VAGOsolutions/SauerkrautLM-v2-14b-SFT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 14.77, - "co2_cost": 3.037848790767508 - } - }, - { - "id": "VIRNECT/llama-3-Korean-8B_float16_c658409e094ff04eeb6ab6cee2d4bc56716e45f1_True", - "model": { - "name": "VIRNECT/llama-3-Korean-8B", - "sha": "c658409e094ff04eeb6ab6cee2d4bc56716e45f1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.245300698827084, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5058345190760515, - "normalized_score": 50.58345190760515 - }, - "bbh": { - "name": "BBH", - "value": 0.49082453083378397, - "normalized_score": 27.322411613379888 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.36615624999999996, - "normalized_score": 3.26953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3538896276595745, - "normalized_score": 28.20995862884161 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-07-17", - "generation": 0, - "base_model": "VIRNECT/llama-3-Korean-8B", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.8126872040589411 - } - }, - { - "id": "VIRNECT/llama-3-Korean-8B_bfloat16_c658409e094ff04eeb6ab6cee2d4bc56716e45f1_True", - "model": { - "name": "VIRNECT/llama-3-Korean-8B", - "sha": "c658409e094ff04eeb6ab6cee2d4bc56716e45f1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.431609341892, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5021376614050719, - "normalized_score": 50.21376614050719 - }, - "bbh": { - "name": "BBH", - "value": 0.491837579362695, - "normalized_score": 27.564318704783016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3647916666666666, - "normalized_score": 3.032291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3536402925531915, - "normalized_score": 28.182254728132396 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-07-17", - "generation": 0, - "base_model": "VIRNECT/llama-3-Korean-8B", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6406132627587642 - } - }, - { - "id": "VIRNECT/llama-3-Korean-8B-r-v-0.1_float16_10acb1aa4f341f2d3c899d78c520b0822a909b95_True", - "model": { - "name": "VIRNECT/llama-3-Korean-8B-r-v-0.1", - "sha": "10acb1aa4f341f2d3c899d78c520b0822a909b95", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 18.749278536149728, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49157125316382755, - "normalized_score": 49.15712531638276 - }, - "bbh": { - "name": "BBH", - "value": 0.48061568139086264, - "normalized_score": 25.8849543311167 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36748958333333337, - "normalized_score": 3.73619791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3259640957446808, - "normalized_score": 25.10712174940898 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-18", - "submission_date": "2024-07-18", - "generation": 2, - "base_model": "MLP-KTLim/llama-3-Korean-Bllossom-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 16.061, - "co2_cost": 2.3989817615402846 - } - }, - { - "id": "ValiantLabs/Llama3-70B-Fireplace_float16_220079e4115733991eb19c30d5480db9696a665e_True", - "model": { - "name": "ValiantLabs/Llama3-70B-Fireplace", - "sha": "220079e4115733991eb19c30d5480db9696a665e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.125226832822285, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7773596280092377, - "normalized_score": 77.73596280092377 - }, - "bbh": { - "name": "BBH", - "value": 0.648899361888402, - "normalized_score": 49.55653001638277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.4448541666666667, - "normalized_score": 16.7734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4892785904255319, - "normalized_score": 43.25317671394799 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-09", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "ValiantLabs/Llama3-70B-Fireplace", - "hub_license": "llama3", - "hub_hearts": 3, - "params_billions": 70.554, - "co2_cost": 19.384343990456575 - } - }, - { - "id": "ValiantLabs/Llama3-70B-ShiningValiant2_bfloat16_bd6cce8da08ccefe9ec58cae3df4bf75c97d8950_True", - "model": { - "name": "ValiantLabs/Llama3-70B-ShiningValiant2", - "sha": "bd6cce8da08ccefe9ec58cae3df4bf75c97d8950", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.730483450533846, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6121712611426571, - "normalized_score": 61.217126114265696 - }, - "bbh": { - "name": "BBH", - "value": 0.6338341405069171, - "normalized_score": 46.71026104076922 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20770392749244712, - "normalized_score": 20.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4325729166666667, - "normalized_score": 13.63828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48977726063829785, - "normalized_score": 43.30858451536643 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-20", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "ValiantLabs/Llama3-70B-ShiningValiant2", - "hub_license": "llama3", - "hub_hearts": 5, - "params_billions": 70.554, - "co2_cost": 22.43518747855869 - } - }, - { - "id": "ValiantLabs/Llama3.1-70B-ShiningValiant2_float16_55436621ed65f0b79e7c6324b780bd6a18e06c79_False", - "model": { - "name": "ValiantLabs/Llama3.1-70B-ShiningValiant2", - "sha": "55436621ed65f0b79e7c6324b780bd6a18e06c79", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.493183854101574, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5355346037402979, - "normalized_score": 53.5534603740298 - }, - "bbh": { - "name": "BBH", - "value": 0.6738408402945882, - "normalized_score": 52.390968518523096 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3926174496644295, - "normalized_score": 19.01565995525727 - }, - "musr": { - "name": "MUSR", - "value": 0.4681041666666667, - "normalized_score": 18.4796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5172872340425532, - "normalized_score": 46.36524822695035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-10-30", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 70.554, - "co2_cost": 27.994570050184382 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-Cobalt_float16_3a69145a2acc1f7f51735aa3ae5d81c090249c65_False", - "model": { - "name": "ValiantLabs/Llama3.1-8B-Cobalt", - "sha": "3a69145a2acc1f7f51735aa3ae5d81c090249c65", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.239393742398576, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3496134700372789, - "normalized_score": 34.96134700372789 - }, - "bbh": { - "name": "BBH", - "value": 0.4946769968149292, - "normalized_score": 27.41777700049443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3959479166666667, - "normalized_score": 9.82682291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3644448138297872, - "normalized_score": 29.38275709219858 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-10-02", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 2.627762740519015 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-Cobalt_bfloat16_3a69145a2acc1f7f51735aa3ae5d81c090249c65_True", - "model": { - "name": "ValiantLabs/Llama3.1-8B-Cobalt", - "sha": "3a69145a2acc1f7f51735aa3ae5d81c090249c65", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.558664369322077, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7168346653545925, - "normalized_score": 71.68346653545925 - }, - "bbh": { - "name": "BBH", - "value": 0.4910700749859321, - "normalized_score": 27.235483048638354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.3512395833333333, - "normalized_score": 4.704947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36627327127659576, - "normalized_score": 29.58591903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-09-20", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 0.9381708246723047 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-Enigma_float16_332c99d80f378c77b090745a5aac10f8ab339519_False", - "model": { - "name": "ValiantLabs/Llama3.1-8B-Enigma", - "sha": "332c99d80f378c77b090745a5aac10f8ab339519", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.62515745828593, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26805542626896633, - "normalized_score": 26.80554262689663 - }, - "bbh": { - "name": "BBH", - "value": 0.44776000880153927, - "normalized_score": 22.012915076928266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.4196041666666666, - "normalized_score": 10.2171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34092420212765956, - "normalized_score": 26.769355791962173 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-11", - "submission_date": "2024-10-02", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 7.2751411047549865 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-Esper2_float16_38f24f2fe90f839acbc57e7530221acf1232e9dc_False", - "model": { - "name": "ValiantLabs/Llama3.1-8B-Esper2", - "sha": "38f24f2fe90f839acbc57e7530221acf1232e9dc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.94081009846365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2567398945907968, - "normalized_score": 25.673989459079685 - }, - "bbh": { - "name": "BBH", - "value": 0.4469866863000255, - "normalized_score": 22.195685067925833 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3560729166666667, - "normalized_score": 5.709114583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29039228723404253, - "normalized_score": 21.15469858156028 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-09", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.7535302324242419 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-Fireplace2_float16_be3a5c18b5e8e86a3703df1a8227f784ad2c713c_True", - "model": { - "name": "ValiantLabs/Llama3.1-8B-Fireplace2", - "sha": "be3a5c18b5e8e86a3703df1a8227f784ad2c713c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.312602016344886, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5483240025354947, - "normalized_score": 54.83240025354947 - }, - "bbh": { - "name": "BBH", - "value": 0.4609817052543379, - "normalized_score": 24.07027321429611 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.34330208333333334, - "normalized_score": 4.379427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24069148936170212, - "normalized_score": 15.632387706855791 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-23", - "submission_date": "2024-07-25", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 0.9162338472706587 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-Fireplace2_bfloat16_ef129903bbdcc59efdbe10fe9061bff473334a99_True", - "model": { - "name": "ValiantLabs/Llama3.1-8B-Fireplace2", - "sha": "ef129903bbdcc59efdbe10fe9061bff473334a99", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.570580608454396, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5328118281714739, - "normalized_score": 53.28118281714739 - }, - "bbh": { - "name": "BBH", - "value": 0.4613311485871581, - "normalized_score": 24.089953790013478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.33666666666666667, - "normalized_score": 4.216666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24235372340425532, - "normalized_score": 15.817080378250589 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-23", - "submission_date": "2024-08-10", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.8021953793743168 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2_bfloat16_6b2b5694a192cb29ad0e4314138affa25b630c0e_True", - "model": { - "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "sha": "6b2b5694a192cb29ad0e4314138affa25b630c0e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.15728097110788, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6495653754260917, - "normalized_score": 64.95653754260917 - }, - "bbh": { - "name": "BBH", - "value": 0.477390600131639, - "normalized_score": 26.346118640067065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.39086458333333335, - "normalized_score": 7.458072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33818151595744683, - "normalized_score": 26.46461288416076 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-10", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 16, - "params_billions": 8.03, - "co2_cost": 2.4452384103432947 - } - }, - { - "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2_float16_6b2b5694a192cb29ad0e4314138affa25b630c0e_False", - "model": { - "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "sha": "6b2b5694a192cb29ad0e4314138affa25b630c0e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.45803558114332, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26780608784691284, - "normalized_score": 26.780608784691285 - }, - "bbh": { - "name": "BBH", - "value": 0.4429290017852748, - "normalized_score": 21.618149642278947 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.39591666666666664, - "normalized_score": 10.789583333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.292719414893617, - "normalized_score": 21.413268321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-11-05", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 16, - "params_billions": 8.03, - "co2_cost": 6.528982131474145 - } - }, - { - "id": "ValiantLabs/Llama3.2-3B-Enigma_float16_ca6adf3a289ce47c7598139e7a312e2b4b3708ce_False", - "model": { - "name": "ValiantLabs/Llama3.2-3B-Enigma", - "sha": "ca6adf3a289ce47c7598139e7a312e2b4b3708ce", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.692730593705356, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2786218345102107, - "normalized_score": 27.86218345102107 - }, - "bbh": { - "name": "BBH", - "value": 0.3722590772046992, - "normalized_score": 12.434025970150065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3921354166666667, - "normalized_score": 8.050260416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2427692819148936, - "normalized_score": 15.86325354609929 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-02", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 7, - "params_billions": 3.213, - "co2_cost": 2.244792890969476 - } - }, - { - "id": "ValiantLabs/Llama3.2-3B-Esper2_float16_64a2c619a2e1680ab42945fcf5b75a5242cab3a1_False", - "model": { - "name": "ValiantLabs/Llama3.2-3B-Esper2", - "sha": "64a2c619a2e1680ab42945fcf5b75a5242cab3a1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.944295126634552, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27497484452364174, - "normalized_score": 27.49748445236417 - }, - "bbh": { - "name": "BBH", - "value": 0.38082611390366106, - "normalized_score": 13.851732907913407 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3549583333333333, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22573138297872342, - "normalized_score": 13.9701536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 3, - "params_billions": 3.213, - "co2_cost": 1.4777687000332602 - } - }, - { - "id": "ValiantLabs/Llama3.2-3B-ShiningValiant2_float16_1336e200485675c9b92baae17831eab17c601803_False", - "model": { - "name": "ValiantLabs/Llama3.2-3B-ShiningValiant2", - "sha": "1336e200485675c9b92baae17831eab17c601803", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.39069569820728, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2625101397624968, - "normalized_score": 26.251013976249684 - }, - "bbh": { - "name": "BBH", - "value": 0.42259325337870185, - "normalized_score": 18.912708783001538 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.38664583333333336, - "normalized_score": 8.597395833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28291223404255317, - "normalized_score": 20.323581560283685 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-11-05", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 3, - "params_billions": 3.213, - "co2_cost": 3.4639971355899557 - } - }, - { - "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24_float16_c0b57cf6d4444b35fc5cec0525ff5eef32af22c9_True", - "model": { - "name": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", - "sha": "c0b57cf6d4444b35fc5cec0525ff5eef32af22c9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.354951361962605, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.643145742186288, - "normalized_score": 64.31457421862879 - }, - "bbh": { - "name": "BBH", - "value": 0.527224269970207, - "normalized_score": 32.66941729424733 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2175226586102719, - "normalized_score": 21.75226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3753958333333334, - "normalized_score": 5.0911458333333375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3547207446808511, - "normalized_score": 28.302304964539005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 29, - "params_billions": 8.03, - "co2_cost": 1.713223400631936 - } - }, - { - "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24_float16_6abd887cb631f705042c9e8085615fe4d76e9779_True", - "model": { - "name": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", - "sha": "6abd887cb631f705042c9e8085615fe4d76e9779", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.01995412263689, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5999315150467426, - "normalized_score": 59.993151504674266 - }, - "bbh": { - "name": "BBH", - "value": 0.5212309052827618, - "normalized_score": 31.41440911337631 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1714501510574018, - "normalized_score": 17.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.40730208333333334, - "normalized_score": 9.446093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33976063829787234, - "normalized_score": 26.640070921985814 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 114, - "params_billions": 12.248, - "co2_cost": 3.4415836238229383 - } - }, - { - "id": "Weyaxi/Bagel-Hermes-2x34B_bfloat16_44fddd32d7dcafc0fa670fd87a2e129310640aac_True", - "model": { - "name": "Weyaxi/Bagel-Hermes-2x34B", - "sha": "44fddd32d7dcafc0fa670fd87a2e129310640aac", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 25.611273447311106, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5431532777474878, - "normalized_score": 54.31532777474878 - }, - "bbh": { - "name": "BBH", - "value": 0.49166555632285514, - "normalized_score": 27.409031445428763 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.45166666666666666, - "normalized_score": 15.625000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4588597074468085, - "normalized_score": 39.873300827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-12", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "Weyaxi/Bagel-Hermes-2x34B", - "hub_license": "apache-2.0", - "hub_hearts": 16, - "params_billions": 60.814, - "co2_cost": 19.630335820609552 - } - }, - { - "id": "Weyaxi/Bagel-Hermes-34B-Slerp_bfloat16_dcdcc17a2c650a95bc27129a3ddbf261dffed37f_False", - "model": { - "name": "Weyaxi/Bagel-Hermes-34B-Slerp", - "sha": "dcdcc17a2c650a95bc27129a3ddbf261dffed37f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.24685762860255, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4602720780861448, - "normalized_score": 46.02720780861448 - }, - "bbh": { - "name": "BBH", - "value": 0.5921903605860047, - "normalized_score": 41.957047480557854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.46220833333333333, - "normalized_score": 17.009375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4703291223404255, - "normalized_score": 41.147680260047274 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-12", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "Weyaxi/Bagel-Hermes-34B-Slerp", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 34.389, - "co2_cost": 6.042750996972115 - } - }, - { - "id": "Weyaxi/Einstein-v4-7B_bfloat16_7eecd9833b8a012e23ac1df789884888b047baa0_True", - "model": { - "name": "Weyaxi/Einstein-v4-7B", - "sha": "7eecd9833b8a012e23ac1df789884888b047baa0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.755664054789627, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47081299839980145, - "normalized_score": 47.08129983998015 - }, - "bbh": { - "name": "BBH", - "value": 0.38494699692741774, - "normalized_score": 14.30445141720726 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4681666666666667, - "normalized_score": 19.020833333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22589760638297873, - "normalized_score": 13.98862293144208 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-22", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "other", - "hub_hearts": 48, - "params_billions": 7.242, - "co2_cost": 1.3355080217096282 - } - }, - { - "id": "Weyaxi/Einstein-v6.1-Llama3-8B_bfloat16_5cab6d54666b6024d0f745d61abf1842edb934e0_True", - "model": { - "name": "Weyaxi/Einstein-v6.1-Llama3-8B", - "sha": "5cab6d54666b6024d0f745d61abf1842edb934e0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.169491366460083, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4568245588372186, - "normalized_score": 45.68245588372186 - }, - "bbh": { - "name": "BBH", - "value": 0.5008295581095018, - "normalized_score": 29.38377348658535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.42128125, - "normalized_score": 11.22682291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3130817819148936, - "normalized_score": 23.675753546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-19", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "other", - "hub_hearts": 67, - "params_billions": 8.03, - "co2_cost": 1.719195518627926 - } - }, - { - "id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B_bfloat16_b7507e94146c0832c26609e9ab8115934d3e25b3_True", - "model": { - "name": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", - "sha": "b7507e94146c0832c26609e9ab8115934d3e25b3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.318742706301332, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39270247388041507, - "normalized_score": 39.27024738804151 - }, - "bbh": { - "name": "BBH", - "value": 0.5043837450549643, - "normalized_score": 29.69444747698505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.43324999999999997, - "normalized_score": 13.389583333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30925864361702127, - "normalized_score": 23.250960401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-23", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7434564581463543 - } - }, - { - "id": "Weyaxi/Einstein-v7-Qwen2-7B_bfloat16_d5a2f245bf98a40d196821bc378e10f35b4da81a_True", - "model": { - "name": "Weyaxi/Einstein-v7-Qwen2-7B", - "sha": "d5a2f245bf98a40d196821bc378e10f35b4da81a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.806418050095402, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4099633417111043, - "normalized_score": 40.99633417111043 - }, - "bbh": { - "name": "BBH", - "value": 0.5161472249498397, - "normalized_score": 32.84181889691276 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19939577039274925, - "normalized_score": 19.939577039274926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.43997916666666664, - "normalized_score": 14.0640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4095744680851064, - "normalized_score": 34.39716312056737 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "other", - "hub_hearts": 38, - "params_billions": 7.616, - "co2_cost": 2.6279427530533828 - } - }, - { - "id": "Weyaxi/Einstein-v8-Llama3.2-1B_bfloat16_1edc6abcb8eedd047bc40b79d2d36c3723ff28e2_True", - "model": { - "name": "Weyaxi/Einstein-v8-Llama3.2-1B", - "sha": "1edc6abcb8eedd047bc40b79d2d36c3723ff28e2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.640409160461389, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18622255615101263, - "normalized_score": 18.622255615101263 - }, - "bbh": { - "name": "BBH", - "value": 0.30184334823943154, - "normalized_score": 3.0137741782829335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.36178125, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11610704787234043, - "normalized_score": 1.7896719858156023 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-1B", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 1.236, - "co2_cost": 0.7758492036245103 - } - }, - { - "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct_bfloat16_9678b9ca952abe0083dbfc772a56b849866bfa1a_True", - "model": { - "name": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", - "sha": "9678b9ca952abe0083dbfc772a56b849866bfa1a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.476008390270348, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4573243438520902, - "normalized_score": 45.73243438520902 - }, - "bbh": { - "name": "BBH", - "value": 0.5166357112030591, - "normalized_score": 31.824686783543132 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.397875, - "normalized_score": 8.601041666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31532579787234044, - "normalized_score": 23.92508865248227 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-21", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 26, - "params_billions": 10.732, - "co2_cost": 1.4920222032108486 - } - }, - { - "id": "WizardLMTeam/WizardLM-13B-V1.0_bfloat16_964a93aa2e78da377115bb856075a69ebe8aefa4_False", - "model": { - "name": "WizardLMTeam/WizardLM-13B-V1.0", - "sha": "964a93aa2e78da377115bb856075a69ebe8aefa4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.546091523510591, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18504900331121424, - "normalized_score": 18.504900331121426 - }, - "bbh": { - "name": "BBH", - "value": 0.29134447696551025, - "normalized_score": 2.147966883446335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.34971875, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-13", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "WizardLMTeam/WizardLM-13B-V1.0", - "hub_license": "", - "hub_hearts": 73, - "params_billions": 13.0, - "co2_cost": 141.95517416949244 - } - }, - { - "id": "WizardLMTeam/WizardLM-13B-V1.2_float16_cf5f40382559f19e13874e45b39575171ca46ef8_False", - "model": { - "name": "WizardLMTeam/WizardLM-13B-V1.2", - "sha": "cf5f40382559f19e13874e45b39575171ca46ef8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.177532740883725, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3392465325336773, - "normalized_score": 33.92465325336773 - }, - "bbh": { - "name": "BBH", - "value": 0.44619994364600474, - "normalized_score": 22.88865497804447 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.43784375000000003, - "normalized_score": 14.030468750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25191156914893614, - "normalized_score": 16.87906323877068 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-25", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "WizardLMTeam/WizardLM-13B-V1.2", - "hub_license": "llama2", - "hub_hearts": 226, - "params_billions": 13.0, - "co2_cost": 7.038916260166667 - } - }, - { - "id": "WizardLMTeam/WizardLM-70B-V1.0_float16_54aaecaff7d0790eb9f0ecea1cc267a94cc66949_False", - "model": { - "name": "WizardLMTeam/WizardLM-70B-V1.0", - "sha": "54aaecaff7d0790eb9f0ecea1cc267a94cc66949", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.3974420993294, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49514288753839814, - "normalized_score": 49.514288753839814 - }, - "bbh": { - "name": "BBH", - "value": 0.5590366047184262, - "normalized_score": 37.54335453368136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.43911458333333336, - "normalized_score": 14.089322916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34466422872340424, - "normalized_score": 27.184914302600472 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-08-09", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "WizardLMTeam/WizardLM-70B-V1.0", - "hub_license": "llama2", - "hub_hearts": 235, - "params_billions": 70.0, - "co2_cost": 58.19212658390846 - } - }, - { - "id": "Wladastic/Mini-Think-Base-1B_float16_86460e314d6ca707921baae6864396c18e5c024a_True", - "model": { - "name": "Wladastic/Mini-Think-Base-1B", - "sha": "86460e314d6ca707921baae6864396c18e5c024a", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.3485600949955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5588405430923283, - "normalized_score": 55.88405430923284 - }, - "bbh": { - "name": "BBH", - "value": 0.35741728048349203, - "normalized_score": 9.377987524324114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.32748958333333333, - "normalized_score": 3.1361979166666676 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17719414893617022, - "normalized_score": 8.577127659574469 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "Wladastic/Mini-Think-Base-1B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.3615004625377821 - } - }, - { - "id": "Xclbr7/Arcanum-12b_float16_845ac67d2b527296ae8c06da4453bf8a60f2e59b_False", - "model": { - "name": "Xclbr7/Arcanum-12b", - "sha": "845ac67d2b527296ae8c06da4453bf8a60f2e59b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.757225902526443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2906864896253053, - "normalized_score": 29.068648962530528 - }, - "bbh": { - "name": "BBH", - "value": 0.5265359354118465, - "normalized_score": 31.879959562746524 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.41703124999999996, - "normalized_score": 13.528906249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3586269946808511, - "normalized_score": 28.736332742316783 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-17", - "generation": 0, - "base_model": "Xclbr7/Arcanum-12b", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.3810214112701495 - } - }, - { - "id": "Xclbr7/Hyena-12b_bfloat16_9dd5eb77ce8e0e05e260ae4d812631fb980527fa_False", - "model": { - "name": "Xclbr7/Hyena-12b", - "sha": "9dd5eb77ce8e0e05e260ae4d812631fb980527fa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.76453411894911, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3404455733010634, - "normalized_score": 34.04455733010634 - }, - "bbh": { - "name": "BBH", - "value": 0.5457182415468321, - "normalized_score": 34.665648637656034 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.39842708333333327, - "normalized_score": 11.070052083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3439162234042553, - "normalized_score": 27.101802600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "Xclbr7/Arcanum-12b", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.7197867929063437 - } - }, - { - "id": "Xclbr7/caliburn-12b_float16_f76fa67c7ca8bf7e75540baf55972ba52a46630b_False", - "model": { - "name": "Xclbr7/caliburn-12b", - "sha": "f76fa67c7ca8bf7e75540baf55972ba52a46630b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.94686461976949, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35763108551975425, - "normalized_score": 35.76310855197542 - }, - "bbh": { - "name": "BBH", - "value": 0.5518630300231809, - "normalized_score": 35.63684056756332 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.4291875, - "normalized_score": 13.781770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36751994680851063, - "normalized_score": 29.724438534278963 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Xclbr7/caliburn-12b", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 3.712440910761932 - } - }, - { - "id": "Xclbr7/caliburn-v2-12b_float16_fa736b3b852298dd8c047ac6dcc620161df4a79b_False", - "model": { - "name": "Xclbr7/caliburn-v2-12b", - "sha": "fa736b3b852298dd8c047ac6dcc620161df4a79b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.96611306962392, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2966816934622358, - "normalized_score": 29.668169346223575 - }, - "bbh": { - "name": "BBH", - "value": 0.5141426125097639, - "normalized_score": 30.387966946397217 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.43703125, - "normalized_score": 14.12890625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37840757978723405, - "normalized_score": 30.9341755319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-16", - "generation": 0, - "base_model": "Xclbr7/caliburn-v2-12b", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.2643874963426573 - } - }, - { - "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER_bfloat16_0697f71f487e1a845626f0cfce6df472fe5eb63d_True", - "model": { - "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", - "sha": "0697f71f487e1a845626f0cfce6df472fe5eb63d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.088097956146527, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5575916346405316, - "normalized_score": 55.75916346405316 - }, - "bbh": { - "name": "BBH", - "value": 0.35437497890840614, - "normalized_score": 9.115530357902744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.31297916666666664, - "normalized_score": 2.3223958333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17627992021276595, - "normalized_score": 8.475546690307327 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7736472043193455 - } - }, - { - "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2_bfloat16_fa2fbdb6ba0bfa29042d24e33ec10cb6561bf200_True", - "model": { - "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", - "sha": "fa2fbdb6ba0bfa29042d24e33ec10cb6561bf200", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.779234902412705, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5317878783849076, - "normalized_score": 53.17878783849076 - }, - "bbh": { - "name": "BBH", - "value": 0.3527816493941946, - "normalized_score": 8.329663069732538 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.33164583333333336, - "normalized_score": 4.189062500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1745345744680851, - "normalized_score": 8.281619385342788 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.6876423057880287 - } - }, - { - "id": "Xiaojian9992024/Phi-4-Megatron-Empathetic_bfloat16_68d1969e3a4a1b68546ba4c521c403b62d65c00a_False", - "model": { - "name": "Xiaojian9992024/Phi-4-Megatron-Empathetic", - "sha": "68d1969e3a4a1b68546ba4c521c403b62d65c00a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.96788940888075, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.01726086783068924, - "normalized_score": 1.726086783068924 - }, - "bbh": { - "name": "BBH", - "value": 0.6673396558729835, - "normalized_score": 51.912764493007764 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26963746223564955, - "normalized_score": 26.963746223564954 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.5071354166666667, - "normalized_score": 23.72526041666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5082280585106383, - "normalized_score": 45.35867316784871 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-17", - "generation": 1, - "base_model": "Xiaojian9992024/Phi-4-Megatron-Empathetic (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 3.6022378134943116 - } - }, - { - "id": "Xiaojian9992024/Phi-4-mini-UNOFFICAL_bfloat16_39e3ee6d0335dc047f5e8901ea859b55bfce670e_False", - "model": { - "name": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", - "sha": "39e3ee6d0335dc047f5e8901ea859b55bfce670e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 3.014260701114446, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12732106366662677, - "normalized_score": 12.732106366662677 - }, - "bbh": { - "name": "BBH", - "value": 0.29444372790183987, - "normalized_score": 2.4789472756031983 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3368229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11444481382978723, - "normalized_score": 1.6049793144208027 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "Xiaojian9992024/Phi-4-mini-UNOFFICAL (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.754, - "co2_cost": 0.49202304557773224 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer_bfloat16_dae469769a9e4d29a95d58bd18e9379cf96a6d61_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", - "sha": "dae469769a9e4d29a95d58bd18e9379cf96a6d61", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.4337301147896, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7295741964653786, - "normalized_score": 72.95741964653786 - }, - "bbh": { - "name": "BBH", - "value": 0.5469696828400438, - "normalized_score": 35.7596547015331 - }, - "math": { - "name": "MATH Level 5", - "value": 0.459214501510574, - "normalized_score": 45.9214501510574 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42702083333333335, - "normalized_score": 12.777604166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4412400265957447, - "normalized_score": 37.9155585106383 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6893245788040062 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview_bfloat16_7053bf7b54611e42080d873e0fe766cef471fa14_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", - "sha": "7053bf7b54611e42080d873e0fe766cef471fa14", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.29594433542609, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7640205765147586, - "normalized_score": 76.40205765147587 - }, - "bbh": { - "name": "BBH", - "value": 0.5543342320067098, - "normalized_score": 36.61517249738159 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4879154078549849, - "normalized_score": 48.79154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.44807291666666665, - "normalized_score": 15.509114583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43758311170212766, - "normalized_score": 37.50923463356973 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 7.616, - "co2_cost": 0.620859013831167 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2_bfloat16_5c38789333a433ee69fcbdd4a24a597ba0d0abb8_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", - "sha": "5c38789333a433ee69fcbdd4a24a597ba0d0abb8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.554535104079356, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6701984068937087, - "normalized_score": 67.01984068937087 - }, - "bbh": { - "name": "BBH", - "value": 0.537439126573433, - "normalized_score": 34.35967522664527 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47205438066465255, - "normalized_score": 47.205438066465256 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4467083333333333, - "normalized_score": 15.471874999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4370844414893617, - "normalized_score": 37.4538268321513 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6527470209395493 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored_bfloat16_269646a7eaedd39fe99a70d222186d901c3995e8_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", - "sha": "269646a7eaedd39fe99a70d222186d901c3995e8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.54381790411528, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8112064876749248, - "normalized_score": 81.12064876749247 - }, - "bbh": { - "name": "BBH", - "value": 0.6431453053747279, - "normalized_score": 49.11232867574543 - }, - "math": { - "name": "MATH Level 5", - "value": 0.533987915407855, - "normalized_score": 53.398791540785496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.414, - "normalized_score": 10.683333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49285239361702127, - "normalized_score": 43.65026595744681 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 1.593070184447759 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small_bfloat16_987d437ec9bc9c7d12474cbac3663615d8f7dd79_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", - "sha": "987d437ec9bc9c7d12474cbac3663615d8f7dd79", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.55431800242105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7689164749531243, - "normalized_score": 76.89164749531244 - }, - "bbh": { - "name": "BBH", - "value": 0.5489785469339065, - "normalized_score": 35.79468363096907 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4735649546827795, - "normalized_score": 47.35649546827795 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.43492708333333335, - "normalized_score": 13.932552083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4356715425531915, - "normalized_score": 37.29683806146573 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.291254759495219 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition_bfloat16_fc115c61ba4c92a64a1c79f5a55e86a464400122_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", - "sha": "fc115c61ba4c92a64a1c79f5a55e86a464400122", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.25068817877398, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7403899431286763, - "normalized_score": 74.03899431286763 - }, - "bbh": { - "name": "BBH", - "value": 0.5465437953400678, - "normalized_score": 35.29192945032174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5075528700906344, - "normalized_score": 50.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.38069791666666664, - "normalized_score": 5.253906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4393284574468085, - "normalized_score": 37.70316193853428 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 2.044542113283342 - } - }, - { - "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp_bfloat16_43b9236cd7578164a4767a9cfbf1d301c9b7240d_True", - "model": { - "name": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", - "sha": "43b9236cd7578164a4767a9cfbf1d301c9b7240d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.446569781402497, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4073403015111017, - "normalized_score": 40.73403015111017 - }, - "bbh": { - "name": "BBH", - "value": 0.40655813090204523, - "normalized_score": 17.02637966475561 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3383125, - "normalized_score": 1.255729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26412898936170215, - "normalized_score": 18.23655437352246 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 1, - "base_model": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 1.544, - "co2_cost": 0.6000318406060409 - } - }, - { - "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B_bfloat16_4d5f20dd1860f7e8538f51fc25f1854ab9c6a6fd_False", - "model": { - "name": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", - "sha": "4d5f20dd1860f7e8538f51fc25f1854ab9c6a6fd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.125922744230454, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46194541594081484, - "normalized_score": 46.194541594081485 - }, - "bbh": { - "name": "BBH", - "value": 0.4389528940684813, - "normalized_score": 20.235870672885177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.36673958333333334, - "normalized_score": 8.375781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29878656914893614, - "normalized_score": 22.087396572104016 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-17", - "generation": 1, - "base_model": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 0.5851620329780873 - } - }, - { - "id": "Xkev/Llama-3.2V-11B-cot_float16_86d718ed524bf79320497bc2029e835af3b9bcc4_False", - "model": { - "name": "Xkev/Llama-3.2V-11B-cot", - "sha": "86d718ed524bf79320497bc2029e835af3b9bcc4", - "precision": "float16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "MllamaForConditionalGeneration", - "average_score": 21.759029142464396, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41580894249480266, - "normalized_score": 41.58089424948027 - }, - "bbh": { - "name": "BBH", - "value": 0.495871783411897, - "normalized_score": 28.246761553003438 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1555891238670695, - "normalized_score": 15.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4158541666666667, - "normalized_score": 10.381770833333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35871010638297873, - "normalized_score": 28.745567375886527 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-12-26", - "generation": 1, - "base_model": "Xkev/Llama-3.2V-11B-cot (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 147, - "params_billions": 10.67, - "co2_cost": 1.4232207220783888 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3_float16_bb54e01a0af3947328ce69470c4f0d73e9cb1ac0_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", - "sha": "bb54e01a0af3947328ce69470c4f0d73e9cb1ac0", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.55942702673594, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8398327548681941, - "normalized_score": 83.98327548681942 - }, - "bbh": { - "name": "BBH", - "value": 0.6448491305599157, - "normalized_score": 49.46606980733037 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5354984894259819, - "normalized_score": 53.54984894259819 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.414125, - "normalized_score": 11.098958333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5206948138297872, - "normalized_score": 46.74386820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 1.7807627064512441 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505_float16_962e4b7f8d2996f5be867a15f0bbe61cff0d2d98_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0505", - "sha": "962e4b7f8d2996f5be867a15f0bbe61cff0d2d98", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.665305547250405, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5882912893345214, - "normalized_score": 58.829128933452125 - }, - "bbh": { - "name": "BBH", - "value": 0.6539239511887702, - "normalized_score": 50.35907329801012 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4433534743202417, - "normalized_score": 44.33534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.47569791666666666, - "normalized_score": 19.46223958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5370678191489362, - "normalized_score": 48.56309101654846 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.8572340231669995 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2_float16_127a1434acced2bc83614eaab86d6ca55c3fa30b_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", - "sha": "127a1434acced2bc83614eaab86d6ca55c3fa30b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.98094603659342, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.594710922574325, - "normalized_score": 59.4710922574325 - }, - "bbh": { - "name": "BBH", - "value": 0.6552826977321495, - "normalized_score": 50.468800790349086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44410876132930516, - "normalized_score": 44.41087613293052 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.47439583333333335, - "normalized_score": 19.29947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5380651595744681, - "normalized_score": 48.67390661938534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.639096152046373 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805_float16_813e280b8acf5b96bdb02e4da6794d8351df3c02_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0805", - "sha": "813e280b8acf5b96bdb02e4da6794d8351df3c02", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.665305547250405, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5882912893345214, - "normalized_score": 58.829128933452125 - }, - "bbh": { - "name": "BBH", - "value": 0.6539239511887702, - "normalized_score": 50.35907329801012 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4433534743202417, - "normalized_score": 44.33534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.47569791666666666, - "normalized_score": 19.46223958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5370678191489362, - "normalized_score": 48.56309101654846 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.983396677583985 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005_float16_9d644a3a31eb23e31a47b690ca13ee52fb4bad80_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005", - "sha": "9d644a3a31eb23e31a47b690ca13ee52fb4bad80", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.08590414271186, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5971588717935079, - "normalized_score": 59.715887179350794 - }, - "bbh": { - "name": "BBH", - "value": 0.6542059787912534, - "normalized_score": 50.286899267965076 - }, - "math": { - "name": "MATH Level 5", - "value": 0.452416918429003, - "normalized_score": 45.2416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.47303125, - "normalized_score": 19.128906249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5382313829787234, - "normalized_score": 48.69237588652482 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-1005 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.9308090122404535 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2_float16_05fcb89cd0d02c1a913478ead4df45a7ebdd7225_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", - "sha": "05fcb89cd0d02c1a913478ead4df45a7ebdd7225", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.99172395122441, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.595310442958018, - "normalized_score": 59.531044295801806 - }, - "bbh": { - "name": "BBH", - "value": 0.6551321410649699, - "normalized_score": 50.51505544014052 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4433534743202417, - "normalized_score": 44.33534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.4730625, - "normalized_score": 19.099479166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5371509308510638, - "normalized_score": 48.5723256501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.909489671257113 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010_float16_2d6d7fc307e2b5a570d45393a1511cbdb64e35e0_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "sha": "2d6d7fc307e2b5a570d45393a1511cbdb64e35e0", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.008009214120385, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5898648918203699, - "normalized_score": 58.98648918203699 - }, - "bbh": { - "name": "BBH", - "value": 0.6539973096042956, - "normalized_score": 50.26771877763532 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4509063444108761, - "normalized_score": 45.090634441087616 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.47439583333333335, - "normalized_score": 19.29947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5375664893617021, - "normalized_score": 48.6184988179669 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-1010 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.886339525994916 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010_bfloat16_b4a8e3712d7dbdfaaa473022ac4d831e41882ad6_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "sha": "b4a8e3712d7dbdfaaa473022ac4d831e41882ad6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.959648585770537, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7904737208384863, - "normalized_score": 79.04737208384864 - }, - "bbh": { - "name": "BBH", - "value": 0.6405986391086301, - "normalized_score": 48.6902938974714 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4180625, - "normalized_score": 11.357812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49443151595744683, - "normalized_score": 43.82572399527187 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-1010 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.670591842952951 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2_float16_04dc9d0b3cea95118eac713d90112fe9b1f5c53f_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", - "sha": "04dc9d0b3cea95118eac713d90112fe9b1f5c53f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.98094603659342, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.594710922574325, - "normalized_score": 59.4710922574325 - }, - "bbh": { - "name": "BBH", - "value": 0.6552826977321495, - "normalized_score": 50.468800790349086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44410876132930516, - "normalized_score": 44.41087613293052 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.47439583333333335, - "normalized_score": 19.29947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5380651595744681, - "normalized_score": 48.67390661938534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.9195075866457527 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE_float16_9c4749ee44179eca24a86dcc5fda790f72c13455_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", - "sha": "9c4749ee44179eca24a86dcc5fda790f72c13455", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.669216951931844, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5843694729983111, - "normalized_score": 58.43694729983112 - }, - "bbh": { - "name": "BBH", - "value": 0.6489486805510399, - "normalized_score": 49.4648829451977 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46148036253776437, - "normalized_score": 46.14803625377644 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.47042708333333333, - "normalized_score": 18.736718749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5380651595744681, - "normalized_score": 48.67390661938534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-SCE (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.9275474953298914 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4_float16_1befb006bc7c4732ec627b65cb069ad1a66648ed_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4", - "sha": "1befb006bc7c4732ec627b65cb069ad1a66648ed", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.28551883596217, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8397828871837835, - "normalized_score": 83.97828871837835 - }, - "bbh": { - "name": "BBH", - "value": 0.6490345839036636, - "normalized_score": 49.67240339546996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5347432024169184, - "normalized_score": 53.47432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.41152083333333334, - "normalized_score": 10.640104166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5169547872340425, - "normalized_score": 46.3283096926714 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-V4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 3.693343718287393 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1_float16_c5e0a02061f3857fcb2c93904548351766b3feaf_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", - "sha": "c5e0a02061f3857fcb2c93904548351766b3feaf", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.45828532336958, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8203488964835526, - "normalized_score": 82.03488964835526 - }, - "bbh": { - "name": "BBH", - "value": 0.6515535751177631, - "normalized_score": 50.24542053284919 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5332326283987915, - "normalized_score": 53.32326283987915 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.41942708333333334, - "normalized_score": 11.728385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5019946808510638, - "normalized_score": 44.6660756501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.7656846963986235 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2_float16_6a3445d37b1d99fbc8b26f4dd73af4182545810c_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", - "sha": "6a3445d37b1d99fbc8b26f4dd73af4182545810c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.58466487593814, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8047868544351211, - "normalized_score": 80.4786854435121 - }, - "bbh": { - "name": "BBH", - "value": 0.6338919627514907, - "normalized_score": 47.026925916460435 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5166163141993958, - "normalized_score": 51.66163141993958 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.44345833333333334, - "normalized_score": 15.965624999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49675864361702127, - "normalized_score": 44.08429373522459 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.757040584464602 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest_float16_96d256afcba2e6616cecd1ca78bbb65f423ef1a4_False", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest", - "sha": "96d256afcba2e6616cecd1ca78bbb65f423ef1a4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.078312739638136, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.591063932587756, - "normalized_score": 59.10639325877561 - }, - "bbh": { - "name": "BBH", - "value": 0.6656232526900528, - "normalized_score": 52.03543238956396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4418429003021148, - "normalized_score": 44.18429003021148 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.469125, - "normalized_score": 18.907291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5370678191489362, - "normalized_score": 48.56309101654846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-latest (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.953941577818676 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2_float16_2945b25444c537ed3b94bb088cdd035f78e5b676_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", - "sha": "2945b25444c537ed3b94bb088cdd035f78e5b676", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.84546724325599, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7771346693440072, - "normalized_score": 77.71346693440073 - }, - "bbh": { - "name": "BBH", - "value": 0.6299023045601466, - "normalized_score": 47.29890601305299 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5158610271903323, - "normalized_score": 51.586102719033235 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.42993750000000003, - "normalized_score": 13.675520833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5223570478723404, - "normalized_score": 46.92856087470449 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.697725501634185 - } - }, - { - "id": "YOYO-AI/Qwen2.5-14B-it-restore_float16_2da0ed7953502c4aa27a4fcb39af09b9e6614db9_True", - "model": { - "name": "YOYO-AI/Qwen2.5-14B-it-restore", - "sha": "2da0ed7953502c4aa27a4fcb39af09b9e6614db9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.50259115054908, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8209484168672456, - "normalized_score": 82.09484168672456 - }, - "bbh": { - "name": "BBH", - "value": 0.6387730309916794, - "normalized_score": 48.42592166972992 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5370090634441088, - "normalized_score": 53.70090634441088 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.40872916666666664, - "normalized_score": 9.824479166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4900265957446808, - "normalized_score": 43.33628841607564 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-14B-it-restore (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.670695198460566 - } - }, - { - "id": "YOYO-AI/Qwen2.5-7B-it-restore_float16_c45b9a94190dfdbc24c2e3fb183072738a8a5d97_True", - "model": { - "name": "YOYO-AI/Qwen2.5-7B-it-restore", - "sha": "c45b9a94190dfdbc24c2e3fb183072738a8a5d97", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.33330306100891, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7530796065550517, - "normalized_score": 75.30796065550517 - }, - "bbh": { - "name": "BBH", - "value": 0.5406524352251431, - "normalized_score": 35.084321766131644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5, - "normalized_score": 50.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.40069791666666665, - "normalized_score": 8.25390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42877327127659576, - "normalized_score": 36.5303634751773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-7B-it-restore (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6688730982662607 - } - }, - { - "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010_float16_45c2fd877aefbdd560eadf12cd0a766029b1f782_False", - "model": { - "name": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", - "sha": "45c2fd877aefbdd560eadf12cd0a766029b1f782", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.054639724074, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5335864395359867, - "normalized_score": 53.35864395359867 - }, - "bbh": { - "name": "BBH", - "value": 0.6186663964199025, - "normalized_score": 45.20119016827334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3217522658610272, - "normalized_score": 32.17522658610272 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.4422395833333333, - "normalized_score": 13.77994791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4074966755319149, - "normalized_score": 34.16629728132387 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.2300749664844886 - } - }, - { - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B_float16_93b4ec952f28fd349e37674a41fa111f5adc6a18_False", - "model": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", - "sha": "93b4ec952f28fd349e37674a41fa111f5adc6a18", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.823588693279426, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.594111402190632, - "normalized_score": 59.4111402190632 - }, - "bbh": { - "name": "BBH", - "value": 0.6644460038734455, - "normalized_score": 52.042745183320136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.411631419939577, - "normalized_score": 41.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.47569791666666666, - "normalized_score": 19.86223958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5350731382978723, - "normalized_score": 48.341459810874696 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "YOYO-AI/ZYH-LLM-Qwen2.5-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.9418760740147274 - } - }, - { - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2_float16_fc0ca4d32fd37afc46adb7a77021c1027f2aacda_False", - "model": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", - "sha": "fc0ca4d32fd37afc46adb7a77021c1027f2aacda", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.566362481938945, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5070834275278483, - "normalized_score": 50.70834275278483 - }, - "bbh": { - "name": "BBH", - "value": 0.6452083564140533, - "normalized_score": 49.088647063990685 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3542296072507553, - "normalized_score": 35.422960725075534 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.46890625, - "normalized_score": 18.379947916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5371509308510638, - "normalized_score": 48.5723256501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 2.015112458534472 - } - }, - { - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3_float16_3458f675f43efdd59b705f5dd9baea091bbea27c_True", - "model": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", - "sha": "3458f675f43efdd59b705f5dd9baea091bbea27c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.62825190779884, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8577928784513978, - "normalized_score": 85.77928784513978 - }, - "bbh": { - "name": "BBH", - "value": 0.6359248665982408, - "normalized_score": 48.182465261630426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.527190332326284, - "normalized_score": 52.7190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.40215625, - "normalized_score": 9.002864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4881150265957447, - "normalized_score": 43.123891843971634 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 14.766, - "co2_cost": 1.863210504006676 - } - }, - { - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4_float16_eb2fd7148b95fb8dc30a8d8bdda5f55bf0d4ec94_True", - "model": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", - "sha": "eb2fd7148b95fb8dc30a8d8bdda5f55bf0d4ec94", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.137421470516735, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8364605912312664, - "normalized_score": 83.64605912312663 - }, - "bbh": { - "name": "BBH", - "value": 0.651497220848125, - "normalized_score": 50.26935344231426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5392749244712991, - "normalized_score": 53.92749244712991 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.44342708333333336, - "normalized_score": 15.661718749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5203623670212766, - "normalized_score": 46.706929669030735 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 1.8018926490312426 - } - }, - { - "id": "Yash21/TinyYi-7B-Test_float16_7750e5de73fbcf1dcc0832b4cdabaa9713c20475_False", - "model": { - "name": "Yash21/TinyYi-7B-Test", - "sha": "7750e5de73fbcf1dcc0832b4cdabaa9713c20475", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.495167294967694, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18564852369490728, - "normalized_score": 18.56485236949073 - }, - "bbh": { - "name": "BBH", - "value": 0.29098007801214715, - "normalized_score": 2.267966388832264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3364479166666667, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10912566489361702, - "normalized_score": 1.0139627659574466 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-06", - "submission_date": "2024-07-03", - "generation": 0, - "base_model": "Yash21/TinyYi-7B-Test", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.061, - "co2_cost": 1.5262181891545112 - } - }, - { - "id": "Youlln/1PARAMMYL-8B-ModelStock_bfloat16_4ce556da5ccd1ecac8d0f3e1e94d1982f11b910d_False", - "model": { - "name": "Youlln/1PARAMMYL-8B-ModelStock", - "sha": "4ce556da5ccd1ecac8d0f3e1e94d1982f11b910d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.30915159356537, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5371336941537344, - "normalized_score": 53.71336941537343 - }, - "bbh": { - "name": "BBH", - "value": 0.5215839663555125, - "normalized_score": 31.799951193327704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1487915407854985, - "normalized_score": 14.879154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4409375, - "normalized_score": 14.283854166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4000166223404255, - "normalized_score": 33.335180260047274 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-20", - "generation": 1, - "base_model": "Youlln/1PARAMMYL-8B-ModelStock (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.785043690387826 - } - }, - { - "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP_bfloat16_b776bd3ce6784b96ff928b1d5ad51b2991909f2c_False", - "model": { - "name": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", - "sha": "b776bd3ce6784b96ff928b1d5ad51b2991909f2c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.991811258176195, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28259351853083153, - "normalized_score": 28.259351853083153 - }, - "bbh": { - "name": "BBH", - "value": 0.46647504291710673, - "normalized_score": 24.495644420709066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.47560416666666666, - "normalized_score": 18.150520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3169880319148936, - "normalized_score": 24.109781323877066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.061, - "co2_cost": 2.0788965592952984 - } - }, - { - "id": "Youlln/3PRYMMAL-PHI3-3B-SLERP_bfloat16_9396bcf1709ac8360a95a746482520fab4295706_False", - "model": { - "name": "Youlln/3PRYMMAL-PHI3-3B-SLERP", - "sha": "9396bcf1709ac8360a95a746482520fab4295706", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.138740994778118, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3655500738041729, - "normalized_score": 36.555007380417294 - }, - "bbh": { - "name": "BBH", - "value": 0.5421833887682153, - "normalized_score": 35.82766762143187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1714501510574018, - "normalized_score": 17.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.46484375, - "normalized_score": 17.77213541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4001828457446808, - "normalized_score": 33.353649527186754 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "Youlln/3PRYMMAL-PHI3-3B-SLERP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.0, - "co2_cost": 2.1568781105686563 - } - }, - { - "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP_bfloat16_7dac3b4ab4298113ae3103d63bb284e1ac8bf4d4_False", - "model": { - "name": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", - "sha": "7dac3b4ab4298113ae3103d63bb284e1ac8bf4d4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.688707771567948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2713766140507188, - "normalized_score": 27.137661405071878 - }, - "bbh": { - "name": "BBH", - "value": 0.5922529923998928, - "normalized_score": 42.064171912395665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.46719791666666666, - "normalized_score": 17.46640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42096077127659576, - "normalized_score": 35.662307919621746 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 5.705320172540969 - } - }, - { - "id": "Youlln/ECE-MIRAGE-1-12B_float16_c9ecb705a9d39be9250d5372a7711e491f6e2154_False", - "model": { - "name": "Youlln/ECE-MIRAGE-1-12B", - "sha": "c9ecb705a9d39be9250d5372a7711e491f6e2154", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.785720345472222, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20698081091503875, - "normalized_score": 20.698081091503873 - }, - "bbh": { - "name": "BBH", - "value": 0.30107140221306034, - "normalized_score": 2.6005529379115604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3219375, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11095412234042554, - "normalized_score": 1.2171247044917257 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-10", - "submission_date": "2025-02-11", - "generation": 0, - "base_model": "Youlln/ECE-MIRAGE-1-12B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 15.21, - "co2_cost": 1.2675445429752399 - } - }, - { - "id": "Youlln/ECE-MIRAGE-1-15B_float16_c9ecb705a9d39be9250d5372a7711e491f6e2154_False", - "model": { - "name": "Youlln/ECE-MIRAGE-1-15B", - "sha": "c9ecb705a9d39be9250d5372a7711e491f6e2154", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.785720345472222, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20698081091503875, - "normalized_score": 20.698081091503873 - }, - "bbh": { - "name": "BBH", - "value": 0.30107140221306034, - "normalized_score": 2.6005529379115604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3219375, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11095412234042554, - "normalized_score": 1.2171247044917257 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-10", - "submission_date": "2025-02-21", - "generation": 0, - "base_model": "Youlln/ECE-MIRAGE-1-15B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 15.21, - "co2_cost": 1.2881628575043904 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3_bfloat16_d542b4d53888fcc8e96c32892d47ec51afc9edc9_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", - "sha": "d542b4d53888fcc8e96c32892d47ec51afc9edc9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.392856379585042, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16419101317836673, - "normalized_score": 16.419101317836674 - }, - "bbh": { - "name": "BBH", - "value": 0.30931341134548046, - "normalized_score": 3.6168825108366214 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3644479166666667, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11610704787234043, - "normalized_score": 1.7896719858156023 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL-0.5B-FT-V3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.1462704150298528 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR_bfloat16_221dc80a1acd6f7dda0644699e6d61b90a5a0a05_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", - "sha": "221dc80a1acd6f7dda0644699e6d61b90a5a0a05", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.5387028579477615, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15334977858748122, - "normalized_score": 15.334977858748122 - }, - "bbh": { - "name": "BBH", - "value": 0.3041148294962408, - "normalized_score": 5.062185886531173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125000000003, - "normalized_score": 3.2539062500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1644780585106383, - "normalized_score": 7.164228723404253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-10-21", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 2.0599692544226658 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR_bfloat16_f5b268d63bb10f05a229da4f2ee9cb0882c93971_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", - "sha": "f5b268d63bb10f05a229da4f2ee9cb0882c93971", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.211186680289756, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1137570535069172, - "normalized_score": 11.375705350691721 - }, - "bbh": { - "name": "BBH", - "value": 0.3038362724383693, - "normalized_score": 4.949091743380625 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3528854166666667, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13214760638297873, - "normalized_score": 3.5719562647754137 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-10-21", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.8973827349384214 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2_bfloat16_5e87669abcdc042774a63b94a13880f1acd6e15d_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", - "sha": "5e87669abcdc042774a63b94a13880f1acd6e15d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.6271949147226215, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1611934112599015, - "normalized_score": 16.119341125990154 - }, - "bbh": { - "name": "BBH", - "value": 0.2934774313772131, - "normalized_score": 1.9175609031491385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3831145833333333, - "normalized_score": 5.355989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10945811170212766, - "normalized_score": 1.0509013002364058 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.3099948071405383 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3_bfloat16_94bfab3b1f41458427e5f8598ceb3ec731ba1bd6_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", - "sha": "94bfab3b1f41458427e5f8598ceb3ec731ba1bd6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.6630142582547953, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16701352411601217, - "normalized_score": 16.701352411601217 - }, - "bbh": { - "name": "BBH", - "value": 0.29383772587210827, - "normalized_score": 2.319604893286366 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.354125, - "normalized_score": 1.7656250000000016 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10871010638297872, - "normalized_score": 0.9677895981087459 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 0, - "base_model": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.27539691314213 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1_bfloat16_b5cd268edb0cc5c2c6ab2c49c950e611b2b8138c_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", - "sha": "b5cd268edb0cc5c2c6ab2c49c950e611b2b8138c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.681936382880597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32510848991786234, - "normalized_score": 32.51084899178623 - }, - "bbh": { - "name": "BBH", - "value": 0.4208506248736219, - "normalized_score": 18.27951144620795 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.4265833333333333, - "normalized_score": 11.589583333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2935505319148936, - "normalized_score": 21.5056146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.190681575994079 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2_bfloat16_3559f643c8d5774135a1cd8daea78fef31035679_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", - "sha": "3559f643c8d5774135a1cd8daea78fef31035679", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.681936382880597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32510848991786234, - "normalized_score": 32.51084899178623 - }, - "bbh": { - "name": "BBH", - "value": 0.4208506248736219, - "normalized_score": 18.27951144620795 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.4265833333333333, - "normalized_score": 11.589583333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2935505319148936, - "normalized_score": 21.5056146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.20925564708773 - } - }, - { - "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4_bfloat16_4939b9e24be6f03d5df1e9bb7dc1b4fd5d59404a_False", - "model": { - "name": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", - "sha": "4939b9e24be6f03d5df1e9bb7dc1b4fd5d59404a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.869547461805958, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2509696494190969, - "normalized_score": 25.09696494190969 - }, - "bbh": { - "name": "BBH", - "value": 0.37697272812325017, - "normalized_score": 13.157437333845115 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3744895833333333, - "normalized_score": 7.011197916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2131815159574468, - "normalized_score": 12.575723995271867 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.540313926541391 - } - }, - { - "id": "Youlln/ECE-PRYMMAL0.5-FT_bfloat16_56b9fd5f26e5b6379fe4aa62e0f66b87b5c6f8e8_False", - "model": { - "name": "Youlln/ECE-PRYMMAL0.5-FT", - "sha": "56b9fd5f26e5b6379fe4aa62e0f66b87b5c6f8e8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.585741676013818, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18507338306803725, - "normalized_score": 18.507338306803724 - }, - "bbh": { - "name": "BBH", - "value": 0.31320911187036277, - "normalized_score": 5.151599849335524 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.330125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14768949468085107, - "normalized_score": 5.298832742316785 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-02", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL0.5-FT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0067829764347855 - } - }, - { - "id": "Youlln/ECE-PRYMMAL0.5B-Youri_bfloat16_1477d3deff98f35f523aa222bc0442278d464566_False", - "model": { - "name": "Youlln/ECE-PRYMMAL0.5B-Youri", - "sha": "1477d3deff98f35f523aa222bc0442278d464566", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.505273892929676, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1446317991817267, - "normalized_score": 14.46317991817267 - }, - "bbh": { - "name": "BBH", - "value": 0.28173574256265815, - "normalized_score": 1.5012962555992377 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36965625, - "normalized_score": 4.007031250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10954122340425532, - "normalized_score": 1.0601359338061456 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL0.5B-Youri (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 0.63, - "co2_cost": 1.3116917717904806 - } - }, - { - "id": "Youlln/ECE-PRYMMAL1B-FT-V1_float16_d0fc3a6e93f91c8d586eb25c9f2a4ea4ca99e9f4_False", - "model": { - "name": "Youlln/ECE-PRYMMAL1B-FT-V1", - "sha": "d0fc3a6e93f91c8d586eb25c9f2a4ea4ca99e9f4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.84779796618845, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2143745262569981, - "normalized_score": 21.43745262569981 - }, - "bbh": { - "name": "BBH", - "value": 0.4032647427840684, - "normalized_score": 16.18938601764277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.34165625, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2742686170212766, - "normalized_score": 19.363179669030732 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "Youlln/ECE-PRYMMAL1B-FT-V1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.475191139732048 - } - }, - { - "id": "Youlln/ECE-Qwen0.5B-FT-V2_bfloat16_c87da3f19ab74854fca30f9ca71ce5c4884ef629_False", - "model": { - "name": "Youlln/ECE-Qwen0.5B-FT-V2", - "sha": "c87da3f19ab74854fca30f9ca71ce5c4884ef629", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.5746869620543364, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25259311958935626, - "normalized_score": 25.259311958935623 - }, - "bbh": { - "name": "BBH", - "value": 0.328970813623839, - "normalized_score": 7.632147610946966 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.30628125, - "normalized_score": 0.8851562499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16655585106382978, - "normalized_score": 7.395094562647753 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-11", - "generation": 1, - "base_model": "Youlln/ECE-Qwen0.5B-FT-V2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0465990255890534 - } - }, - { - "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP_bfloat16_e376ce416af881eefa778d2566d15d9a6d29e7d9_False", - "model": { - "name": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", - "sha": "e376ce416af881eefa778d2566d15d9a6d29e7d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.829965651830063, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2561403742071038, - "normalized_score": 25.614037420710382 - }, - "bbh": { - "name": "BBH", - "value": 0.33056720460862643, - "normalized_score": 8.405356119616796 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.31021875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1903257978723404, - "normalized_score": 10.036199763593379 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.2081187188905276 - } - }, - { - "id": "YoungPanda/qwenqwen_bfloat16_3b5d9b63076acc8988b8f7e9734cf1d78bb39c25_True", - "model": { - "name": "YoungPanda/qwenqwen", - "sha": "3b5d9b63076acc8988b8f7e9734cf1d78bb39c25", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 4.783628186114581, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12639684924888184, - "normalized_score": 12.639684924888185 - }, - "bbh": { - "name": "BBH", - "value": 0.337898518087465, - "normalized_score": 8.194779944827593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34336458333333336, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-12", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.316, - "co2_cost": 14.245337780966572 - } - }, - { - "id": "Yuma42/KangalKhan-RawRuby-7B_bfloat16_54f56d4c6889eaf43fdd5f7d6dcef3c2ebe51929_True", - "model": { - "name": "Yuma42/KangalKhan-RawRuby-7B", - "sha": "54f56d4c6889eaf43fdd5f7d6dcef3c2ebe51929", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.49108954626372, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.547674614467391, - "normalized_score": 54.76746144673909 - }, - "bbh": { - "name": "BBH", - "value": 0.47547278683676025, - "normalized_score": 26.387283588738626 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.39495833333333336, - "normalized_score": 7.636458333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30227726063829785, - "normalized_score": 22.475251182033094 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-17", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "Yuma42/KangalKhan-RawRuby-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 1.3074388891203519 - } - }, - { - "id": "Yuma42/Llama3.1-IgneousIguana-8B_bfloat16_b3d20a0fdd9002cc39b921363ec873475f68874f_True", - "model": { - "name": "Yuma42/Llama3.1-IgneousIguana-8B", - "sha": "b3d20a0fdd9002cc39b921363ec873475f68874f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.476166913247212, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8133297428600558, - "normalized_score": 81.33297428600558 - }, - "bbh": { - "name": "BBH", - "value": 0.5190512670457804, - "normalized_score": 31.985926552081427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.42026041666666664, - "normalized_score": 12.465885416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39735704787234044, - "normalized_score": 33.0396719858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Yuma42/Llama3.1-IgneousIguana-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.7070549834224411 - } - }, - { - "id": "Yuma42/Llama3.1-SuperHawk-8B_bfloat16_4c9dfbc9b8b7bd98ca5ec288f4aab0cf85b2ccb5_True", - "model": { - "name": "Yuma42/Llama3.1-SuperHawk-8B", - "sha": "4c9dfbc9b8b7bd98ca5ec288f4aab0cf85b2ccb5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.135471049209404, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7986420475449585, - "normalized_score": 79.86420475449586 - }, - "bbh": { - "name": "BBH", - "value": 0.5199931545260023, - "normalized_score": 31.966635202809908 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2348942598187311, - "normalized_score": 23.48942598187311 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.40835416666666663, - "normalized_score": 10.377604166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39453125, - "normalized_score": 32.72569444444445 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "Yuma42/Llama3.1-SuperHawk-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.7120080997921155 - } - }, - { - "id": "Z1-Coder/Z1-Coder-7B_float16_fb3866e2735424e6e133a1ca955dcdec6b577908_True", - "model": { - "name": "Z1-Coder/Z1-Coder-7B", - "sha": "fb3866e2735424e6e133a1ca955dcdec6b577908", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.533348699576006, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3215113676157041, - "normalized_score": 32.15113676157041 - }, - "bbh": { - "name": "BBH", - "value": 0.48418251218099567, - "normalized_score": 28.158145376480263 - }, - "math": { - "name": "MATH Level 5", - "value": 0.324773413897281, - "normalized_score": 32.477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.36215625, - "normalized_score": 2.736197916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37591422872340424, - "normalized_score": 30.657136524822686 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Z1-Coder/Z1-Coder-7B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.1700372621730621 - } - }, - { - "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg_float16_b97a56a94799ff084fde17bb8e65f3d1d80ecfe3_False", - "model": { - "name": "ZHLiu627/zephyr-7b-gemma-dpo-avg", - "sha": "b97a56a94799ff084fde17bb8e65f3d1d80ecfe3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 14.667993195924751, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30899679517014855, - "normalized_score": 30.899679517014853 - }, - "bbh": { - "name": "BBH", - "value": 0.41488227982365095, - "normalized_score": 18.40453524667128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.4107083333333333, - "normalized_score": 9.805208333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28507313829787234, - "normalized_score": 20.563682033096924 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.538, - "co2_cost": 0.8876135928862379 - } - }, - { - "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg_float16_d66c14a077fd7aa29dabaf3fb19e0f3630fb3646_False", - "model": { - "name": "ZHLiu627/zephyr-7b-gemma-rpo-avg", - "sha": "d66c14a077fd7aa29dabaf3fb19e0f3630fb3646", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 14.588311638428499, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30060350979844586, - "normalized_score": 30.060350979844586 - }, - "bbh": { - "name": "BBH", - "value": 0.41832761356743015, - "normalized_score": 19.016801252298812 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.40810416666666666, - "normalized_score": 9.546354166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2830784574468085, - "normalized_score": 20.342050827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "ZHLiu627/zephyr-7b-gemma-rpo-avg", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.538, - "co2_cost": 0.8713301212278419 - } - }, - { - "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B_bfloat16_d63917595e911b077cff38109c74622c3ec41704_True", - "model": { - "name": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", - "sha": "d63917595e911b077cff38109c74622c3ec41704", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.81522350685034, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48335305877294465, - "normalized_score": 48.33530587729446 - }, - "bbh": { - "name": "BBH", - "value": 0.5384211938486898, - "normalized_score": 34.30754710829408 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.4187083333333333, - "normalized_score": 13.071874999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3784906914893617, - "normalized_score": 30.94341016548463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "ZeroXClem/L3-Aspire-Heart-Matrix-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.5754026013900415 - } - }, - { - "id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix_bfloat16_b67376478ad1f6e1355e9c42aea4072561fccc82_True", - "model": { - "name": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", - "sha": "b67376478ad1f6e1355e9c42aea4072561fccc82", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.791594885986026, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.63008151704145, - "normalized_score": 63.008151704145 - }, - "bbh": { - "name": "BBH", - "value": 0.5163423288466883, - "normalized_score": 31.385283673059273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2794561933534743, - "normalized_score": 27.945619335347434 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.35384375, - "normalized_score": 6.897135416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3503989361702128, - "normalized_score": 27.822104018912537 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.7384327295388944 - } - }, - { - "id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix_bfloat16_f3dacbc5d69989216ebe5c30092b25cb40f93cd9_True", - "model": { - "name": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", - "sha": "f3dacbc5d69989216ebe5c30092b25cb40f93cd9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.830979754905858, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49734149833552754, - "normalized_score": 49.73414983355275 - }, - "bbh": { - "name": "BBH", - "value": 0.5154785280029148, - "normalized_score": 31.072264304734336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.39470833333333327, - "normalized_score": 9.871874999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.363031914893617, - "normalized_score": 29.225768321512994 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.7630019896475743 - } - }, - { - "id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion_bfloat16_c5c0f936a7e182989dd035a846b08bfd578da6e5_True", - "model": { - "name": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", - "sha": "c5c0f936a7e182989dd035a846b08bfd578da6e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.23340398888008, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7402403400754443, - "normalized_score": 74.02403400754443 - }, - "bbh": { - "name": "BBH", - "value": 0.5438928349489152, - "normalized_score": 34.823135587389494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23338368580060423, - "normalized_score": 23.338368580060422 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.38739583333333333, - "normalized_score": 7.491145833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3621176861702128, - "normalized_score": 29.12418735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.6858701462349897 - } - }, - { - "id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes_bfloat16_3512e8f5998996db8375142dc80df77a93cd7dc6_True", - "model": { - "name": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", - "sha": "3512e8f5998996db8375142dc80df77a93cd7dc6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.405539774407757, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7338705745200512, - "normalized_score": 73.38705745200511 - }, - "bbh": { - "name": "BBH", - "value": 0.5244464882599044, - "normalized_score": 32.07128887769491 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17447129909365558, - "normalized_score": 17.447129909365557 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.4065833333333333, - "normalized_score": 11.322916666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37450132978723405, - "normalized_score": 30.500147754137117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.7371906885752015 - } - }, - { - "id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova_bfloat16_c25f5786954e523b1b0a2376db9abe1fd22313d1_False", - "model": { - "name": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", - "sha": "c25f5786954e523b1b0a2376db9abe1fd22313d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.300170759550948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4164583305629064, - "normalized_score": 41.64583305629064 - }, - "bbh": { - "name": "BBH", - "value": 0.5078595074869328, - "normalized_score": 30.50279668346708 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25302114803625375, - "normalized_score": 25.302114803625376 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.39706249999999993, - "normalized_score": 11.232812499999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3367686170212766, - "normalized_score": 26.307624113475175 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.468249573248224 - } - }, - { - "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B_bfloat16_23992e1be9f77d767181dc7bcb42176395f42c30_True", - "model": { - "name": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", - "sha": "23992e1be9f77d767181dc7bcb42176395f42c30", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.118329802811974, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6261597007052399, - "normalized_score": 62.615970070523986 - }, - "bbh": { - "name": "BBH", - "value": 0.5462236205548866, - "normalized_score": 36.01120909918873 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27341389728096677, - "normalized_score": 27.341389728096676 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.41778125, - "normalized_score": 11.28932291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43267952127659576, - "normalized_score": 36.96439125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2024-11-20", - "generation": 1, - "base_model": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.352707431092865 - } - }, - { - "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M_bfloat16_36dbdb9dec7d9d9dc49c132167121cce45d71394_False", - "model": { - "name": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", - "sha": "36dbdb9dec7d9d9dc49c132167121cce45d71394", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.03891571870145, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5943862285402732, - "normalized_score": 59.43862285402732 - }, - "bbh": { - "name": "BBH", - "value": 0.5431374181474681, - "normalized_score": 34.50741633012333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3474320241691843, - "normalized_score": 34.74320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4595416666666667, - "normalized_score": 16.742708333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4386635638297872, - "normalized_score": 37.62928486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M (Merge)", - "hub_license": "mit", - "hub_hearts": 7, - "params_billions": 7.613, - "co2_cost": 1.4316788097607813 - } - }, - { - "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix_bfloat16_cd87a9d7c9a9c8950af84e1f4c72fff5d4625d8a_True", - "model": { - "name": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", - "sha": "cd87a9d7c9a9c8950af84e1f4c72fff5d4625d8a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.64186520551805, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7707649037886142, - "normalized_score": 77.07649037886142 - }, - "bbh": { - "name": "BBH", - "value": 0.5541319848156986, - "normalized_score": 36.579205629985445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38368580060422963, - "normalized_score": 38.368580060422964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.43905208333333334, - "normalized_score": 14.414843750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4431515957446808, - "normalized_score": 38.12795508274231 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-21", - "generation": 1, - "base_model": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.5683858790129577 - } - }, - { - "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix_bfloat16_6849553db73428ca67823a06f5cfeea660f77df8_True", - "model": { - "name": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", - "sha": "6849553db73428ca67823a06f5cfeea660f77df8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.907245009987854, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7835044348994002, - "normalized_score": 78.35044348994003 - }, - "bbh": { - "name": "BBH", - "value": 0.5548068560095062, - "normalized_score": 36.7707223822516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.43495833333333334, - "normalized_score": 13.76979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4447307180851064, - "normalized_score": 38.30341312056737 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-21", - "generation": 1, - "base_model": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 7.616, - "co2_cost": 1.4591412814366411 - } - }, - { - "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec_bfloat16_6c8b513dbc61a9f704210d26124244f19f3bc4cc_True", - "model": { - "name": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", - "sha": "6c8b513dbc61a9f704210d26124244f19f3bc4cc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.02349981666794, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6773172958860268, - "normalized_score": 67.73172958860269 - }, - "bbh": { - "name": "BBH", - "value": 0.5490022663689288, - "normalized_score": 36.264898165897854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2930513595166163, - "normalized_score": 29.305135951661633 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4286041666666667, - "normalized_score": 13.408854166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4484707446808511, - "normalized_score": 38.71897163120567 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "ZeroXClem/Qwen2.5-7B-Qandora-CySec (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.3641605836632866 - } - }, - { - "id": "ZeusLabs/L3-Aethora-15B-V2_bfloat16_2c601f116c37dd912c89357dbdbef879a637997e_True", - "model": { - "name": "ZeusLabs/L3-Aethora-15B-V2", - "sha": "2c601f116c37dd912c89357dbdbef879a637997e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.69871364353165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7208063493752133, - "normalized_score": 72.08063493752132 - }, - "bbh": { - "name": "BBH", - "value": 0.5010910465463698, - "normalized_score": 28.968504695312703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.3870833333333333, - "normalized_score": 6.252083333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3499833776595745, - "normalized_score": 27.77593085106383 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "ZeusLabs/L3-Aethora-15B-V2 (Merge)", - "hub_license": "cc-by-sa-4.0", - "hub_hearts": 41, - "params_billions": 15.01, - "co2_cost": 4.75533746778399 - } - }, - { - "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3_bfloat16_9c95ccdeceed14a3c2881bc495101a1acca1385f_True", - "model": { - "name": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", - "sha": "9c95ccdeceed14a3c2881bc495101a1acca1385f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.042937650204056, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6902817856620433, - "normalized_score": 69.02817856620433 - }, - "bbh": { - "name": "BBH", - "value": 0.5046089390770511, - "normalized_score": 29.07853088402274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.38451041666666663, - "normalized_score": 5.497135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3783244680851064, - "normalized_score": 30.92494089834515 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-25", - "submission_date": "2024-07-02", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "mit", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.311181864354396 - } - }, - { - "id": "aaditya/Llama3-OpenBioLLM-70B_bfloat16_5f79deaf38bc5f662943d304d59cb30357e8e5bd_True", - "model": { - "name": "aaditya/Llama3-OpenBioLLM-70B", - "sha": "5f79deaf38bc5f662943d304d59cb30357e8e5bd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.979020412011955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7596743307756753, - "normalized_score": 75.96743307756752 - }, - "bbh": { - "name": "BBH", - "value": 0.6398872375485518, - "normalized_score": 47.147074677167915 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1971299093655589, - "normalized_score": 19.71299093655589 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.44171875, - "normalized_score": 14.348177083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4867021276595745, - "normalized_score": 42.96690307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-24", - "submission_date": "2024-08-30", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 406, - "params_billions": 70.0, - "co2_cost": 19.314044645910396 - } - }, - { - "id": "abacusai/Dracarys-72B-Instruct_bfloat16_10cabc4beb57a69df51533f65e39a7ad22821370_True", - "model": { - "name": "abacusai/Dracarys-72B-Instruct", - "sha": "10cabc4beb57a69df51533f65e39a7ad22821370", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.377212135916615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7855778224001206, - "normalized_score": 78.55778224001206 - }, - "bbh": { - "name": "BBH", - "value": 0.6944066392084981, - "normalized_score": 56.93552010003367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39652567975830816, - "normalized_score": 39.65256797583081 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4558229166666667, - "normalized_score": 16.81119791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5456283244680851, - "normalized_score": 49.51425827423168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-14", - "submission_date": "2024-08-16", - "generation": 0, - "base_model": "abacusai/Dracarys-72B-Instruct", - "hub_license": "other", - "hub_hearts": 21, - "params_billions": 72.706, - "co2_cost": 24.766928145455058 - } - }, - { - "id": "abacusai/Liberated-Qwen1.5-14B_float16_cc0fa5102bfee821bb5e49f082731ccb9d1fedf1_True", - "model": { - "name": "abacusai/Liberated-Qwen1.5-14B", - "sha": "cc0fa5102bfee821bb5e49f082731ccb9d1fedf1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.50814223648692, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36310212458499, - "normalized_score": 36.310212458499 - }, - "bbh": { - "name": "BBH", - "value": 0.49480009174671863, - "normalized_score": 28.020905999685464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.41746875, - "normalized_score": 10.316927083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35123005319148937, - "normalized_score": 27.914450354609933 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-05", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "abacusai/Liberated-Qwen1.5-14B", - "hub_license": "other", - "hub_hearts": 20, - "params_billions": 14.0, - "co2_cost": 6.1071985770077655 - } - }, - { - "id": "abacusai/Llama-3-Smaug-8B_bfloat16_fe54a7d42160d3d8fcc3289c8c411fd9dd5e8357_True", - "model": { - "name": "abacusai/Llama-3-Smaug-8B", - "sha": "fe54a7d42160d3d8fcc3289c8c411fd9dd5e8357", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.067762376529682, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48667535472546175, - "normalized_score": 48.66753547254618 - }, - "bbh": { - "name": "BBH", - "value": 0.4930712769667174, - "normalized_score": 27.880374189415942 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36224999999999996, - "normalized_score": 5.047916666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3184840425531915, - "normalized_score": 24.27600472813239 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-19", - "submission_date": "2024-07-02", - "generation": 0, - "base_model": "abacusai/Llama-3-Smaug-8B", - "hub_license": "llama2", - "hub_hearts": 89, - "params_billions": 8.03, - "co2_cost": 1.820436529300302 - } - }, - { - "id": "abacusai/Smaug-34B-v0.1_bfloat16_34d54c65a0247d5eb694973106c816d9c0ad3fc2_True", - "model": { - "name": "abacusai/Smaug-34B-v0.1", - "sha": "34d54c65a0247d5eb694973106c816d9c0ad3fc2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.95321808709686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5015625207782018, - "normalized_score": 50.156252077820184 - }, - "bbh": { - "name": "BBH", - "value": 0.5357785983493821, - "normalized_score": 34.261660667279955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.397875, - "normalized_score": 8.134375000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4542885638297872, - "normalized_score": 39.36539598108747 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-25", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "jondurbin/bagel-34b-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 60, - "params_billions": 34.389, - "co2_cost": 23.57188137271181 - } - }, - { - "id": "abacusai/Smaug-72B-v0.1_bfloat16_a1d657156f82c24b670158406378648233487011_False", - "model": { - "name": "abacusai/Smaug-72B-v0.1", - "sha": "a1d657156f82c24b670158406378648233487011", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.737299261857203, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5167001334237601, - "normalized_score": 51.67001334237601 - }, - "bbh": { - "name": "BBH", - "value": 0.5995632330786429, - "normalized_score": 43.12510043134919 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19108761329305135, - "normalized_score": 19.108761329305135 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4473229166666666, - "normalized_score": 14.415364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4623503989361702, - "normalized_score": 40.26115543735224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-02", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "moreh/MoMo-72B-lora-1.8.7-DPO", - "hub_license": "other", - "hub_hearts": 468, - "params_billions": 72.289, - "co2_cost": 58.47160296784014 - } - }, - { - "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K_bfloat16_33840982dc253968f32ef3a534ee0e025eb97482_True", - "model": { - "name": "abacusai/Smaug-Llama-3-70B-Instruct-32K", - "sha": "33840982dc253968f32ef3a534ee0e025eb97482", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.76489160177244, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7761107195574409, - "normalized_score": 77.6110719557441 - }, - "bbh": { - "name": "BBH", - "value": 0.6493108088828602, - "normalized_score": 49.07037043446443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27492447129909364, - "normalized_score": 27.492447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4207916666666667, - "normalized_score": 12.432291666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47647938829787234, - "normalized_score": 41.83104314420804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-11", - "submission_date": "2024-08-06", - "generation": 0, - "base_model": "abacusai/Smaug-Llama-3-70B-Instruct-32K", - "hub_license": "llama3", - "hub_hearts": 21, - "params_billions": 70.554, - "co2_cost": 26.606826468263424 - } - }, - { - "id": "abacusai/Smaug-Mixtral-v0.1_bfloat16_98fdc8315906b0a8b9e7f24bad89914869fcfc20_True", - "model": { - "name": "abacusai/Smaug-Mixtral-v0.1", - "sha": "98fdc8315906b0a8b9e7f24bad89914869fcfc20", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 23.821471245327263, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5554428915278129, - "normalized_score": 55.54428915278129 - }, - "bbh": { - "name": "BBH", - "value": 0.5162245602454115, - "normalized_score": 31.919260543426763 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09516616314199396, - "normalized_score": 9.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4298125, - "normalized_score": 12.993229166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3351894946808511, - "normalized_score": 26.13216607565012 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-18", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "abacusai/Smaug-Mixtral-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 46.703, - "co2_cost": 7.882831004615716 - } - }, - { - "id": "abacusai/Smaug-Qwen2-72B-Instruct_bfloat16_af015925946d0c60ef69f512c3b35f421cf8063d_True", - "model": { - "name": "abacusai/Smaug-Qwen2-72B-Instruct", - "sha": "af015925946d0c60ef69f512c3b35f421cf8063d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.07422014151536, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7825303527972447, - "normalized_score": 78.25303527972446 - }, - "bbh": { - "name": "BBH", - "value": 0.6909789934583822, - "normalized_score": 56.26617189727526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4131419939577039, - "normalized_score": 41.31419939577039 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.44007291666666665, - "normalized_score": 15.175781249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.519032579787234, - "normalized_score": 46.559175531914896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "abacusai/Smaug-Qwen2-72B-Instruct", - "hub_license": "other", - "hub_hearts": 9, - "params_billions": 72.706, - "co2_cost": 26.514669556273837 - } - }, - { - "id": "abacusai/bigstral-12b-32k_float16_b78a5385ec1b04d6c97f25e9ba1dff18dc98305f_False", - "model": { - "name": "abacusai/bigstral-12b-32k", - "sha": "b78a5385ec1b04d6c97f25e9ba1dff18dc98305f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.135141503206025, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41938057686937324, - "normalized_score": 41.93805768693733 - }, - "bbh": { - "name": "BBH", - "value": 0.4700122314782882, - "normalized_score": 25.5569024540723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.45597916666666666, - "normalized_score": 15.864062500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26412898936170215, - "normalized_score": 18.23655437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-06", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "abacusai/bigstral-12b-32k (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 43, - "params_billions": 12.476, - "co2_cost": 1.9305588580734474 - } - }, - { - "id": "abacusai/bigyi-15b_float16_b878c15531f7aaf6cf287530f1117b1308b96dc4_False", - "model": { - "name": "abacusai/bigyi-15b", - "sha": "b878c15531f7aaf6cf287530f1117b1308b96dc4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.051824492532413, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20940327220663396, - "normalized_score": 20.940327220663395 - }, - "bbh": { - "name": "BBH", - "value": 0.4345298820215116, - "normalized_score": 19.94022305425607 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.35378125, - "normalized_score": 4.289322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30028257978723405, - "normalized_score": 22.25361997635934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-06", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "abacusai/bigyi-15b (Merge)", - "hub_license": "other", - "hub_hearts": 11, - "params_billions": 15.058, - "co2_cost": 3.7422439356328927 - } - }, - { - "id": "abhishek/autotrain-0tmgq-5tpbg_float16_a75e1fda984e009613dca3b7846c579a37ab0673_True", - "model": { - "name": "abhishek/autotrain-0tmgq-5tpbg", - "sha": "a75e1fda984e009613dca3b7846c579a37ab0673", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.856618645954227, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19571514692127998, - "normalized_score": 19.571514692127998 - }, - "bbh": { - "name": "BBH", - "value": 0.3134513987945074, - "normalized_score": 4.268752154086201 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.36504166666666665, - "normalized_score": 3.3968750000000014 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11510970744680851, - "normalized_score": 1.6788563829787229 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-12-03", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.3518284991076842 - } - }, - { - "id": "abhishek/autotrain-0tmgq-5tpbg_bfloat16_a75e1fda984e009613dca3b7846c579a37ab0673_True", - "model": { - "name": "abhishek/autotrain-0tmgq-5tpbg", - "sha": "a75e1fda984e009613dca3b7846c579a37ab0673", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.051545214400249, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19516549422199764, - "normalized_score": 19.516549422199766 - }, - "bbh": { - "name": "BBH", - "value": 0.3127326480314375, - "normalized_score": 4.419022545347729 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35837499999999994, - "normalized_score": 2.263541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11436170212765957, - "normalized_score": 1.595744680851063 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-12-04", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6736077839783131 - } - }, - { - "id": "abhishek/autotrain-llama3-70b-orpo-v1_float16_053236c6846cc561c1503ba05e2b28c94855a432_True", - "model": { - "name": "abhishek/autotrain-llama3-70b-orpo-v1", - "sha": "053236c6846cc561c1503ba05e2b28c94855a432", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.813377033004464, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4233023932055834, - "normalized_score": 42.33023932055834 - }, - "bbh": { - "name": "BBH", - "value": 0.5997985900252331, - "normalized_score": 41.565362273408454 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35790625000000004, - "normalized_score": 2.5716145833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11220079787234043, - "normalized_score": 1.3556442080378246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-02", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "abhishek/autotrain-llama3-70b-orpo-v1", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 21.522055975877294 - } - }, - { - "id": "abhishek/autotrain-llama3-70b-orpo-v2_float16_a2c16a8a7fa48792eb8a1f0c50e13309c2021a63_True", - "model": { - "name": "abhishek/autotrain-llama3-70b-orpo-v2", - "sha": "a2c16a8a7fa48792eb8a1f0c50e13309c2021a63", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.867313366854955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5406055931594835, - "normalized_score": 54.06055931594835 - }, - "bbh": { - "name": "BBH", - "value": 0.5899473641612185, - "normalized_score": 39.88219882979646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.41133333333333333, - "normalized_score": 9.950000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48179853723404253, - "normalized_score": 42.4220596926714 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-08-21", - "generation": 0, - "base_model": "abhishek/autotrain-llama3-70b-orpo-v2", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 70.554, - "co2_cost": 25.054093177678148 - } - }, - { - "id": "abhishek/autotrain-llama3-orpo-v2_bfloat16_1655d0683696a5de2eb9a59c339ee469297beb9c_True", - "model": { - "name": "abhishek/autotrain-llama3-orpo-v2", - "sha": "1655d0683696a5de2eb9a59c339ee469297beb9c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.276280777825411, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4371656094717572, - "normalized_score": 43.71656094717572 - }, - "bbh": { - "name": "BBH", - "value": 0.31593828880846425, - "normalized_score": 4.380133995067516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3792395833333333, - "normalized_score": 5.104947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22182513297872342, - "normalized_score": 13.536125886524822 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "abhishek/autotrain-llama3-orpo-v2", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.8118801946808998 - } - }, - { - "id": "abhishek/autotrain-vr4a1-e5mms_float16_5206a32e0bd3067aef1ce90f5528ade7d866253f_False", - "model": { - "name": "abhishek/autotrain-vr4a1-e5mms", - "sha": "5206a32e0bd3067aef1ce90f5528ade7d866253f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 18.65996836134808, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21422492320376602, - "normalized_score": 21.4224923203766 - }, - "bbh": { - "name": "BBH", - "value": 0.5000624442873264, - "normalized_score": 28.45661724854773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.389125, - "normalized_score": 9.040625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36668882978723405, - "normalized_score": 29.63209219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-09-06", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 16.061, - "co2_cost": 3.745756095720281 - } - }, - { - "id": "abideen/MedPhi-4-14B-v1_bfloat16_2efbd4c10d39ac26cfe89535c5eb38b293b794bb_True", - "model": { - "name": "abideen/MedPhi-4-14B-v1", - "sha": "2efbd4c10d39ac26cfe89535c5eb38b293b794bb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 36.53594090222929, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6276834355066778, - "normalized_score": 62.76834355066778 - }, - "bbh": { - "name": "BBH", - "value": 0.6896781879584077, - "normalized_score": 55.57896973123449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2930513595166163, - "normalized_score": 29.305135951661633 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.4154583333333333, - "normalized_score": 10.832291666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5338264627659575, - "normalized_score": 48.2029403073286 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8237211677219296 - } - }, - { - "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2_float16_189b42b0dae6352fbe7165255aae851961c8e678_True", - "model": { - "name": "adamo1139/Yi-34B-200K-AEZAKMI-v2", - "sha": "189b42b0dae6352fbe7165255aae851961c8e678", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.8273490799763, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4555257827010111, - "normalized_score": 45.55257827010111 - }, - "bbh": { - "name": "BBH", - "value": 0.5383819237015192, - "normalized_score": 35.2764249557812 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.38860416666666664, - "normalized_score": 6.475520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4512965425531915, - "normalized_score": 39.032949172576835 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-13", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "adamo1139/Yi-34B-200K-AEZAKMI-v2", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 34.389, - "co2_cost": 6.0431489462132335 - } - }, - { - "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES_float16_c89bc166dbe2a31c1fceb40ea7acdd96c5620ff5_False", - "model": { - "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "sha": "c89bc166dbe2a31c1fceb40ea7acdd96c5620ff5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.469542016632626, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.174632198123202, - "normalized_score": 17.463219812320197 - }, - "bbh": { - "name": "BBH", - "value": 0.3126379538396578, - "normalized_score": 5.253690606033476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.40959375, - "normalized_score": 9.132552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10871010638297872, - "normalized_score": 0.9677895981087459 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-10-27", - "generation": 0, - "base_model": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2834082223865078 - } - }, - { - "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES_bfloat16_c89bc166dbe2a31c1fceb40ea7acdd96c5620ff5_False", - "model": { - "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "sha": "c89bc166dbe2a31c1fceb40ea7acdd96c5620ff5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.988441667082756, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16853725891745014, - "normalized_score": 16.853725891745015 - }, - "bbh": { - "name": "BBH", - "value": 0.31242688274884584, - "normalized_score": 5.019151283406914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.39629166666666665, - "normalized_score": 7.169791666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10663231382978723, - "normalized_score": 0.7369237588652473 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-10-27", - "generation": 0, - "base_model": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.6126317222798834 - } - }, - { - "id": "aevalone/distill_qw_test_bfloat16_d8d2b85fcf1ac1170536c12f98892552f184337b_True", - "model": { - "name": "aevalone/distill_qw_test", - "sha": "d8d2b85fcf1ac1170536c12f98892552f184337b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.68409846940663, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.740889728143548, - "normalized_score": 74.0889728143548 - }, - "bbh": { - "name": "BBH", - "value": 0.5245748734435777, - "normalized_score": 33.097457229677104 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4780966767371601, - "normalized_score": 47.809667673716014 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.38596874999999997, - "normalized_score": 6.046093750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4091589095744681, - "normalized_score": 34.35098995271868 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-05", - "generation": 3, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6383736325002226 - } - }, - { - "id": "agentlans/Gemma2-9B-AdvancedFuse_bfloat16_f7d31619237579b7b473f8ebe87047cf881a33a6_True", - "model": { - "name": "agentlans/Gemma2-9B-AdvancedFuse", - "sha": "f7d31619237579b7b473f8ebe87047cf881a33a6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.434579949396277, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15427288483446144, - "normalized_score": 15.427288483446144 - }, - "bbh": { - "name": "BBH", - "value": 0.585936684475517, - "normalized_score": 40.51673791578562 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4230833333333333, - "normalized_score": 11.985416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4000166223404255, - "normalized_score": 33.335180260047274 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "agentlans/Gemma2-9B-AdvancedFuse (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 3.959092552204823 - } - }, - { - "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K_bfloat16_9b32ead43c60e8c3b16ee38f223236d3a44f4aa6_True", - "model": { - "name": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", - "sha": "9b32ead43c60e8c3b16ee38f223236d3a44f4aa6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.437967590003339, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5395062877609188, - "normalized_score": 53.95062877609188 - }, - "bbh": { - "name": "BBH", - "value": 0.35481032861183426, - "normalized_score": 9.387917708519003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32104166666666667, - "normalized_score": 1.1968750000000006 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1809341755319149, - "normalized_score": 8.992686170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 0, - "base_model": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7763331443373106 - } - }, - { - "id": "agentlans/Llama3.1-8B-drill_bfloat16_0e8ce55d45a029e132ea4a09bdfb8736c7434384_True", - "model": { - "name": "agentlans/Llama3.1-8B-drill", - "sha": "0e8ce55d45a029e132ea4a09bdfb8736c7434384", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.72490725705725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.765169749597734, - "normalized_score": 76.51697495977339 - }, - "bbh": { - "name": "BBH", - "value": 0.5015680021795333, - "normalized_score": 28.791683075355134 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1714501510574018, - "normalized_score": 17.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36723958333333334, - "normalized_score": 4.704947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37757646276595747, - "normalized_score": 30.841829196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "agentlans/Llama3.1-8B-drill (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3829473293004872 - } - }, - { - "id": "agentlans/Llama3.1-Daredevilish_bfloat16_43e0c3d9792d39b0e41bbe7fa21116751418b7d9_True", - "model": { - "name": "agentlans/Llama3.1-Daredevilish", - "sha": "43e0c3d9792d39b0e41bbe7fa21116751418b7d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.570843048894687, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6291573026237051, - "normalized_score": 62.9157302623705 - }, - "bbh": { - "name": "BBH", - "value": 0.5012506630648397, - "normalized_score": 29.2027295330145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.40909375, - "normalized_score": 11.603385416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3696808510638298, - "normalized_score": 29.964539007092196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "agentlans/Llama3.1-Daredevilish (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4015468014661299 - } - }, - { - "id": "agentlans/Llama3.1-Daredevilish-Instruct_bfloat16_97acbab4c2412facfda8469521ce3e86baa28e23_True", - "model": { - "name": "agentlans/Llama3.1-Daredevilish-Instruct", - "sha": "97acbab4c2412facfda8469521ce3e86baa28e23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.365203712781987, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7925969760236173, - "normalized_score": 79.25969760236174 - }, - "bbh": { - "name": "BBH", - "value": 0.5235442557198345, - "normalized_score": 32.21751239385498 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17220543806646527, - "normalized_score": 17.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.3910833333333333, - "normalized_score": 7.918749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3877160904255319, - "normalized_score": 31.96845449172577 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "agentlans/Llama3.1-Daredevilish-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.914256354585132 - } - }, - { - "id": "agentlans/Llama3.1-LexiHermes-SuperStorm_bfloat16_a7e3bdb9308da0a6aa3108b9e14db4e93000be83_True", - "model": { - "name": "agentlans/Llama3.1-LexiHermes-SuperStorm", - "sha": "a7e3bdb9308da0a6aa3108b9e14db4e93000be83", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.430987251890873, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7834545672149895, - "normalized_score": 78.34545672149895 - }, - "bbh": { - "name": "BBH", - "value": 0.5266460888159817, - "normalized_score": 32.54749327279135 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.3962604166666667, - "normalized_score": 8.199218750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3843916223404255, - "normalized_score": 31.599069148936167 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "agentlans/Llama3.1-LexiHermes-SuperStorm (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2385420258216013 - } - }, - { - "id": "agentlans/Llama3.1-SuperDeepFuse_bfloat16_4fbcc81c2c9341f72f921bcc6d17523c62e8b099_True", - "model": { - "name": "agentlans/Llama3.1-SuperDeepFuse", - "sha": "4fbcc81c2c9341f72f921bcc6d17523c62e8b099", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.387201422763493, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7761605872418517, - "normalized_score": 77.61605872418517 - }, - "bbh": { - "name": "BBH", - "value": 0.5048544889908054, - "normalized_score": 29.218386755692325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.369875, - "normalized_score": 5.134375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3774933510638298, - "normalized_score": 30.832594562647753 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "agentlans/Llama3.1-SuperDeepFuse (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3456045308937912 - } - }, - { - "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K_bfloat16_0d38d69d9daeeb59dd837e21e8373a372e3c2226_True", - "model": { - "name": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", - "sha": "0d38d69d9daeeb59dd837e21e8373a372e3c2226", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.995793391642223, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.718732961874493, - "normalized_score": 71.8732961874493 - }, - "bbh": { - "name": "BBH", - "value": 0.5215513828266275, - "normalized_score": 31.82844411240522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.40264583333333337, - "normalized_score": 8.597395833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3631150265957447, - "normalized_score": 29.235002955082745 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4103921643817923 - } - }, - { - "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout_bfloat16_09acebcae7826ca66d950641a92a2732d3cf49eb_True", - "model": { - "name": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", - "sha": "09acebcae7826ca66d950641a92a2732d3cf49eb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.433362269175692, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2948831323111566, - "normalized_score": 29.48831323111566 - }, - "bbh": { - "name": "BBH", - "value": 0.3311726760218689, - "normalized_score": 7.227868281254662 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3341875, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16082114361702127, - "normalized_score": 6.757904846335696 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9185456354197431 - } - }, - { - "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b_float16_248c420cc0a0bb8fce3a64a998ca0ce89613783c_True", - "model": { - "name": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", - "sha": "248c420cc0a0bb8fce3a64a998ca0ce89613783c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.404258622194578, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17480728910402177, - "normalized_score": 17.480728910402178 - }, - "bbh": { - "name": "BBH", - "value": 0.2883257760266153, - "normalized_score": 1.6778452402411428 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3803229166666666, - "normalized_score": 4.6070312499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11286569148936171, - "normalized_score": 1.4295212765957446 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 5.195, - "co2_cost": 0.992047384114175 - } - }, - { - "id": "ai21labs/Jamba-v0.1_bfloat16_ce13f3fe99555a2606d1892665bb67649032ff2d_True", - "model": { - "name": "ai21labs/Jamba-v0.1", - "sha": "ce13f3fe99555a2606d1892665bb67649032ff2d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "JambaForCausalLM", - "average_score": 9.218365089520882, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20255920956395698, - "normalized_score": 20.2559209563957 - }, - "bbh": { - "name": "BBH", - "value": 0.36022602451645724, - "normalized_score": 10.722058918870276 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.35902083333333334, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24916888297872342, - "normalized_score": 16.574320330969268 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-28", - "submission_date": "2024-09-16", - "generation": 0, - "base_model": "ai21labs/Jamba-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 1181, - "params_billions": 51.57, - "co2_cost": 15.315294656239235 - } - }, - { - "id": "ai4bharat/Airavata_bfloat16_3af92e691e461d80d080823f48996df10aa8ec19_False", - "model": { - "name": "ai4bharat/Airavata", - "sha": "3af92e691e461d80d080823f48996df10aa8ec19", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.550973263891643, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05585402288150995, - "normalized_score": 5.585402288150995 - }, - "bbh": { - "name": "BBH", - "value": 0.36276862514633795, - "normalized_score": 11.57402914482553 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3762916666666667, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1634807180851064, - "normalized_score": 7.053413120567376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-13", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "ai4bharat/Airavata", - "hub_license": "llama2", - "hub_hearts": 34, - "params_billions": 6.87, - "co2_cost": 1.0705537282993127 - } - }, - { - "id": "aixonlab/Aether-12b_float16_c55d08a69c74f87c18ab5afb05d46359f389c91a_False", - "model": { - "name": "aixonlab/Aether-12b", - "sha": "c55d08a69c74f87c18ab5afb05d46359f389c91a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.045942870286208, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23468286369056326, - "normalized_score": 23.468286369056326 - }, - "bbh": { - "name": "BBH", - "value": 0.5179400750435481, - "normalized_score": 30.551138311303117 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.38286458333333334, - "normalized_score": 7.991406250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3410073138297872, - "normalized_score": 26.778590425531913 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "Xclbr7/Arcanum-12b", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.7328641016066264 - } - }, - { - "id": "aixonlab/Grey-12b_float16_50f56572870c49186c3679f9949a602d2d97c046_False", - "model": { - "name": "aixonlab/Grey-12b", - "sha": "50f56572870c49186c3679f9949a602d2d97c046", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.68155281704829, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39679938119744496, - "normalized_score": 39.6799381197445 - }, - "bbh": { - "name": "BBH", - "value": 0.5698957505959833, - "normalized_score": 38.746043454917555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4516354166666667, - "normalized_score": 16.25442708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3779089095744681, - "normalized_score": 30.87876773049646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-09", - "generation": 2, - "base_model": "Xclbr7/Arcanum-12b", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.9373873351076165 - } - }, - { - "id": "aixonlab/Zara-14b-v1.2_bfloat16_88794cba01ae33c28cd4e698d7663c3e80f3c0ae_False", - "model": { - "name": "aixonlab/Zara-14b-v1.2", - "sha": "88794cba01ae33c28cd4e698d7663c3e80f3c0ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.99891251390576, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6197400674654362, - "normalized_score": 61.97400674654362 - }, - "bbh": { - "name": "BBH", - "value": 0.6405368457456163, - "normalized_score": 48.270462607206646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35347432024169184, - "normalized_score": 35.34743202416919 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.46747916666666667, - "normalized_score": 17.46822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5263464095744681, - "normalized_score": 47.37182328605201 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-02-24", - "generation": 3, - "base_model": "sometimesanotion/Lamarck-14B-v0.7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 1.9407437177174478 - } - }, - { - "id": "akhadangi/Llama3.2.1B.0.01-First_float16_b9d1edeb95f15c92118f5b4677c2f97ca5523a3d_False", - "model": { - "name": "akhadangi/Llama3.2.1B.0.01-First", - "sha": "b9d1edeb95f15c92118f5b4677c2f97ca5523a3d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.1098917488910103, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08135857303066973, - "normalized_score": 8.135857303066974 - }, - "bbh": { - "name": "BBH", - "value": 0.31891926453372005, - "normalized_score": 4.7662306392124005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3193958333333333, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1196808510638298, - "normalized_score": 2.186761229314421 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "akhadangi/Llama3.2.1B.0.01-First (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.37091941881358165 - } - }, - { - "id": "akhadangi/Llama3.2.1B.0.01-Last_float16_30ff8e568c712c27f8c4990a988115817ef604fb_False", - "model": { - "name": "akhadangi/Llama3.2.1B.0.01-Last", - "sha": "30ff8e568c712c27f8c4990a988115817ef604fb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.2621298672901786, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09165015492227291, - "normalized_score": 9.16501549222729 - }, - "bbh": { - "name": "BBH", - "value": 0.3159283874883156, - "normalized_score": 4.282945307374523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3206354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12267287234042554, - "normalized_score": 2.519208037825059 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "akhadangi/Llama3.2.1B.0.01-Last (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3674257784086992 - } - }, - { - "id": "akhadangi/Llama3.2.1B.0.1-First_float16_0b986d5ce44bb9c0f3f3fd31442353d590b37700_False", - "model": { - "name": "akhadangi/Llama3.2.1B.0.1-First", - "sha": "0b986d5ce44bb9c0f3f3fd31442353d590b37700", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.269240763959394, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10009330797838623, - "normalized_score": 10.009330797838624 - }, - "bbh": { - "name": "BBH", - "value": 0.3119615016336897, - "normalized_score": 4.177000172360429 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.330125, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11693816489361702, - "normalized_score": 1.8820183215130022 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "akhadangi/Llama3.2.1B.0.1-First (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.36746382970331215 - } - }, - { - "id": "akhadangi/Llama3.2.1B.0.1-Last_float16_72a0652d9432dc62a296437a7880c0a0cb267097_False", - "model": { - "name": "akhadangi/Llama3.2.1B.0.1-Last", - "sha": "72a0652d9432dc62a296437a7880c0a0cb267097", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "LlamaForCausalLM", - "average_score": 3.2667219224258432, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.09497245087479, - "normalized_score": 9.497245087479 - }, - "bbh": { - "name": "BBH", - "value": 0.3163776768490709, - "normalized_score": 4.256105664488017 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23825503355704697, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3340625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11776928191489362, - "normalized_score": 1.9743646572104017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "akhadangi/Llama3.2.1B.0.1-Last (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.37292835860095896 - } - }, - { - "id": "akhadangi/Llama3.2.1B.BaseFiT_float16_7e62ad1fd646caf483cfc39d83ab50bb295b8060_False", - "model": { - "name": "akhadangi/Llama3.2.1B.BaseFiT", - "sha": "7e62ad1fd646caf483cfc39d83ab50bb295b8060", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.318002610011419, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08827799128534511, - "normalized_score": 8.82779912853451 - }, - "bbh": { - "name": "BBH", - "value": 0.31745151457535453, - "normalized_score": 4.548336087243887 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3220625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1171875, - "normalized_score": 1.9097222222222217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "akhadangi/Llama3.2.1B.BaseFiT (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.37243472687756174 - } - }, - { - "id": "akjindal53244/Llama-3.1-Storm-8B_bfloat16_df21b06dcf534b026dd301a44a521d7253c8b94b_True", - "model": { - "name": "akjindal53244/Llama-3.1-Storm-8B", - "sha": "df21b06dcf534b026dd301a44a521d7253c8b94b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.365249771767235, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.803263119633683, - "normalized_score": 80.32631196336831 - }, - "bbh": { - "name": "BBH", - "value": 0.5196330402870707, - "normalized_score": 31.615695113850098 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1623867069486405, - "normalized_score": 16.238670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4028333333333334, - "normalized_score": 8.820833333333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-10-27", - "generation": 0, - "base_model": "akjindal53244/Llama-3.1-Storm-8B", - "hub_license": "llama3.1", - "hub_hearts": 175, - "params_billions": 8.03, - "co2_cost": 0.7943914887773879 - } - }, - { - "id": "akjindal53244/Llama-3.1-Storm-8B_float16_df21b06dcf534b026dd301a44a521d7253c8b94b_True", - "model": { - "name": "akjindal53244/Llama-3.1-Storm-8B", - "sha": "df21b06dcf534b026dd301a44a521d7253c8b94b", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.943924239133935, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8050616807847621, - "normalized_score": 80.50616807847622 - }, - "bbh": { - "name": "BBH", - "value": 0.5188671226840744, - "normalized_score": 31.494363445102696 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17220543806646524, - "normalized_score": 17.220543806646525 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.4028020833333333, - "normalized_score": 9.116927083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3803191489361702, - "normalized_score": 31.146572104018905 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-11-26", - "generation": 0, - "base_model": "akjindal53244/Llama-3.1-Storm-8B", - "hub_license": "llama3.1", - "hub_hearts": 175, - "params_billions": 8.03, - "co2_cost": 1.9419148224605758 - } - }, - { - "id": "alcholjung/llama3_medical_tuned_float16_62bd457b6fe961a42a631306577e622c83876cb6_False", - "model": { - "name": "alcholjung/llama3_medical_tuned", - "sha": "62bd457b6fe961a42a631306577e622c83876cb6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 12.048770294336398, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.010566408241244343, - "normalized_score": 1.0566408241244343 - }, - "bbh": { - "name": "BBH", - "value": 0.4512943191660926, - "normalized_score": 23.265089024969495 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.46602083333333333, - "normalized_score": 16.852604166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29463098404255317, - "normalized_score": 21.625664893617017 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-14", - "submission_date": "2024-08-14", - "generation": 0, - "base_model": "alcholjung/llama3_medical_tuned", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 16.061, - "co2_cost": 1.8214421946937212 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-70B_bfloat16_c4280450c0cd91a2fb6f41a25c6a1662c6966b01_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-70B", - "sha": "c4280450c0cd91a2fb6f41a25c6a1662c6966b01", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 42.33178738532094, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8291167435737177, - "normalized_score": 82.91167435737177 - }, - "bbh": { - "name": "BBH", - "value": 0.6163626496199947, - "normalized_score": 45.365568617406275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4501510574018127, - "normalized_score": 45.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.4948333333333334, - "normalized_score": 23.754166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46451130319148937, - "normalized_score": 40.50125591016548 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 54, - "params_billions": 70.554, - "co2_cost": 73.18610096017416 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-70B_float16_c4280450c0cd91a2fb6f41a25c6a1662c6966b01_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-70B", - "sha": "c4280450c0cd91a2fb6f41a25c6a1662c6966b01", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.45452740659843, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8379344583482937, - "normalized_score": 83.79344583482937 - }, - "bbh": { - "name": "BBH", - "value": 0.6156847169556112, - "normalized_score": 45.25948099520497 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38293051359516617, - "normalized_score": 38.29305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.49880208333333337, - "normalized_score": 24.316927083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4655917553191489, - "normalized_score": 40.6213061465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 54, - "params_billions": 70.554, - "co2_cost": 38.02202622335404 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-70B-DPO_bfloat16_6ea110f39fb660573111892a1381d3be3f826f80_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-70B-DPO", - "sha": "6ea110f39fb660573111892a1381d3be3f826f80", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 42.22441492015548, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8281925291559729, - "normalized_score": 82.81925291559727 - }, - "bbh": { - "name": "BBH", - "value": 0.6146203626958501, - "normalized_score": 45.047180919508435 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44939577039274925, - "normalized_score": 44.93957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.4922604166666667, - "normalized_score": 23.39921875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4632646276595745, - "normalized_score": 40.36273640661938 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-70B-DPO (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 9, - "params_billions": 70.0, - "co2_cost": 73.6014980662589 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-70B-SFT_bfloat16_f58ab66db3a1c5dd805c6d3420b2b4f5aef30041_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-70B-SFT", - "sha": "f58ab66db3a1c5dd805c6d3420b2b4f5aef30041", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.848492068127356, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8050616807847621, - "normalized_score": 80.50616807847622 - }, - "bbh": { - "name": "BBH", - "value": 0.5951437800580934, - "normalized_score": 42.02398394906459 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33157099697885195, - "normalized_score": 33.157099697885194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.5026145833333334, - "normalized_score": 24.493489583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46243351063829785, - "normalized_score": 40.270390070921984 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-70B-SFT (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 70.554, - "co2_cost": 54.676654379073604 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-8B_bfloat16_63b75e0dd6eac3725319f869716b9b70c16a6a65_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-8B", - "sha": "63b75e0dd6eac3725319f869716b9b70c16a6a65", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.034998081672143, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8266687943545348, - "normalized_score": 82.66687943545348 - }, - "bbh": { - "name": "BBH", - "value": 0.4049833102731906, - "normalized_score": 16.671812993247975 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19637462235649547, - "normalized_score": 19.637462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.41746875, - "normalized_score": 10.450260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2826628989361702, - "normalized_score": 20.295877659574465 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-21", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 161, - "params_billions": 8.03, - "co2_cost": 0.7037741489943292 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-8B_float16_50fef8756a9a4ca2010587d128aebb3a18ec897d_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-8B", - "sha": "50fef8756a9a4ca2010587d128aebb3a18ec897d", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.260868015453667, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8254697535871487, - "normalized_score": 82.54697535871487 - }, - "bbh": { - "name": "BBH", - "value": 0.40608256120952024, - "normalized_score": 16.858052069402785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.41746875, - "normalized_score": 10.516927083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2820811170212766, - "normalized_score": 20.231235224586285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 161, - "params_billions": 8.03, - "co2_cost": 1.402464233166514 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-8B-DPO_bfloat16_002347006131d85678ea3865520bc9caad69869a_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-8B-DPO", - "sha": "002347006131d85678ea3865520bc9caad69869a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.463980035063702, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8029384255996312, - "normalized_score": 80.29384255996311 - }, - "bbh": { - "name": "BBH", - "value": 0.4079428557044153, - "normalized_score": 17.42601622698448 - }, - "math": { - "name": "MATH Level 5", - "value": 0.236404833836858, - "normalized_score": 23.6404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.41613541666666665, - "normalized_score": 10.516927083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2898105053191489, - "normalized_score": 21.0900561465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-8B-DPO (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 24, - "params_billions": 8.0, - "co2_cost": 1.3406751467289846 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-8B-RM_bfloat16_76247c00745747f820f1712949b5b37901d0f9c4_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-8B-RM", - "sha": "76247c00745747f820f1712949b5b37901d0f9c4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForSequenceClassification", - "average_score": 4.235057018188027, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16701352411601217, - "normalized_score": 16.701352411601217 - }, - "bbh": { - "name": "BBH", - "value": 0.2950041147470504, - "normalized_score": 2.6496699813735507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3764166666666667, - "normalized_score": 4.252083333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10821143617021277, - "normalized_score": 0.912381796690307 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-8B-RM (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 17, - "params_billions": 8.0, - "co2_cost": 1.4737981352541207 - } - }, - { - "id": "allenai/Llama-3.1-Tulu-3-8B-SFT_bfloat16_4ddd761e6750e04ea3d468175f78463628bba860_True", - "model": { - "name": "allenai/Llama-3.1-Tulu-3-8B-SFT", - "sha": "4ddd761e6750e04ea3d468175f78463628bba860", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.596940551752763, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7403400754442657, - "normalized_score": 74.03400754442657 - }, - "bbh": { - "name": "BBH", - "value": 0.3871863270501647, - "normalized_score": 13.931208168262557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4267708333333333, - "normalized_score": 12.013020833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28116688829787234, - "normalized_score": 20.12965425531915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allenai/Llama-3.1-Tulu-3-8B-SFT (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 29, - "params_billions": 8.03, - "co2_cost": 1.3664925590347665 - } - }, - { - "id": "allenai/OLMo-1.7-7B-hf_float16_a2a514275cb69a5f9b3dd51e0a4e92df88a12dfb_False", - "model": { - "name": "allenai/OLMo-1.7-7B-hf", - "sha": "a2a514275cb69a5f9b3dd51e0a4e92df88a12dfb", - "precision": "float16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 3.8002319134201135, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1568970332052288, - "normalized_score": 15.68970332052288 - }, - "bbh": { - "name": "BBH", - "value": 0.3013695911207614, - "normalized_score": 2.7703163925041445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.34748958333333335, - "normalized_score": 2.0695312500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11236702127659574, - "normalized_score": 1.374113475177304 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "", - "generation": 0, - "base_model": "allenai/OLMo-1.7-7B-hf", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 0.0, - "co2_cost": 0.6542927552804128 - } - }, - { - "id": "allenai/OLMo-1B-hf_bfloat16_8e995430edd24416ccfa98b5b283fa07b0c9f1a9_False", - "model": { - "name": "allenai/OLMo-1B-hf", - "sha": "8e995430edd24416ccfa98b5b283fa07b0c9f1a9", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OlmoForCausalLM", - "average_score": 6.633923959022838, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21819660722438686, - "normalized_score": 21.819660722438684 - }, - "bbh": { - "name": "BBH", - "value": 0.30519468988429327, - "normalized_score": 3.1965463124303173 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.40978125, - "normalized_score": 9.555989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11735372340425532, - "normalized_score": 1.9281914893617011 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-12", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "allenai/OLMo-1B-hf", - "hub_license": "apache-2.0", - "hub_hearts": 20, - "params_billions": 1.177, - "co2_cost": 0.4977474999034887 - } - }, - { - "id": "allenai/OLMo-2-1124-7B-Instruct_float16_470b1fba1ae01581f270116362ee4aa1b97f4c84_True", - "model": { - "name": "allenai/OLMo-2-1124-7B-Instruct", - "sha": "470b1fba1ae01581f270116362ee4aa1b97f4c84", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Olmo2ForCausalLM", - "average_score": 21.785857000415522, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7244034716773715, - "normalized_score": 72.44034716773716 - }, - "bbh": { - "name": "BBH", - "value": 0.40223602474417786, - "normalized_score": 16.326772949551835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1487915407854985, - "normalized_score": 14.879154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.35083333333333333, - "normalized_score": 4.687499999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2672041223404255, - "normalized_score": 18.578235815602834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "allenai/OLMo-2-1124-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 30, - "params_billions": 7.299, - "co2_cost": 1.6358898931555619 - } - }, - { - "id": "allenai/OLMo-7B-Instruct-hf_bfloat16_2ea947518df93433aa71219f29b36c72ac63be95_True", - "model": { - "name": "allenai/OLMo-7B-Instruct-hf", - "sha": "2ea947518df93433aa71219f29b36c72ac63be95", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "OlmoForCausalLM", - "average_score": 10.84897342143646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3472652561869174, - "normalized_score": 34.72652561869174 - }, - "bbh": { - "name": "BBH", - "value": 0.3706469866662716, - "normalized_score": 13.159933415267032 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.37647916666666664, - "normalized_score": 4.3265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17852393617021275, - "normalized_score": 8.724881796690305 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-04", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "allenai/OLMo-7B-Instruct-hf", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.0, - "co2_cost": 1.7905993674604468 - } - }, - { - "id": "allenai/OLMo-7B-hf_bfloat16_687d934d36a05417048d0fe7482f24f389fef6aa_False", - "model": { - "name": "allenai/OLMo-7B-hf", - "sha": "687d934d36a05417048d0fe7482f24f389fef6aa", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OlmoForCausalLM", - "average_score": 6.864268027495356, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2719273749207658, - "normalized_score": 27.19273749207658 - }, - "bbh": { - "name": "BBH", - "value": 0.32791316587362274, - "normalized_score": 5.761987041080832 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3486666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11727061170212766, - "normalized_score": 1.9189568557919614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-12", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "allenai/OLMo-7B-hf", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 6.888, - "co2_cost": 1.1811283168501225 - } - }, - { - "id": "allenai/OLMoE-1B-7B-0125-Instruct_float16_b89a7c4bc24fb9e55ce2543c9458ce0ca5c4650e_True", - "model": { - "name": "allenai/OLMoE-1B-7B-0125-Instruct", - "sha": "b89a7c4bc24fb9e55ce2543c9458ce0ca5c4650e", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OlmoeForCausalLM", - "average_score": 17.50987629325843, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6757436934001781, - "normalized_score": 67.57436934001782 - }, - "bbh": { - "name": "BBH", - "value": 0.38245348916008676, - "normalized_score": 14.007956498913387 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3635833333333333, - "normalized_score": 2.9812500000000006 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19148936170212766, - "normalized_score": 10.16548463356974 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "allenai/OLMoE-1B-7B-0125-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 43, - "params_billions": 6.919, - "co2_cost": 2.6083486455591163 - } - }, - { - "id": "allenai/OLMoE-1B-7B-0924_bfloat16_4fa3a6e09ed0e41639962f38bfba0fc532b90075_False", - "model": { - "name": "allenai/OLMoE-1B-7B-0924", - "sha": "4fa3a6e09ed0e41639962f38bfba0fc532b90075", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OlmoeForCausalLM", - "average_score": 7.266580603765461, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21847143357402804, - "normalized_score": 21.847143357402803 - }, - "bbh": { - "name": "BBH", - "value": 0.3393437931177341, - "normalized_score": 8.308106894895777 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34879166666666667, - "normalized_score": 3.565625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1739527925531915, - "normalized_score": 8.216976950354608 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-20", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "allenai/OLMoE-1B-7B-0924", - "hub_license": "apache-2.0", - "hub_hearts": 112, - "params_billions": 6.919, - "co2_cost": 6.152814233849351 - } - }, - { - "id": "allenai/OLMoE-1B-7B-0924-Instruct_bfloat16_7f1c97f440f06ce36705e4f2b843edb5925f4498_True", - "model": { - "name": "allenai/OLMoE-1B-7B-0924-Instruct", - "sha": "7f1c97f440f06ce36705e4f2b843edb5925f4498", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "OlmoeForCausalLM", - "average_score": 13.698377341715071, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4667415790103592, - "normalized_score": 46.674157901035926 - }, - "bbh": { - "name": "BBH", - "value": 0.3901610626816106, - "normalized_score": 14.571562821337196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3848229166666666, - "normalized_score": 6.06953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18758311170212766, - "normalized_score": 9.731456855791961 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-30", - "generation": 2, - "base_model": "allenai/OLMoE-1B-7B-0924", - "hub_license": "apache-2.0", - "hub_hearts": 88, - "params_billions": 6.919, - "co2_cost": 8.245768180804477 - } - }, - { - "id": "allknowingroger/Chocolatine-24B_float16_6245b82885ca4930575dbed2932ec1d32d901c0e_False", - "model": { - "name": "allknowingroger/Chocolatine-24B", - "sha": "6245b82885ca4930575dbed2932ec1d32d901c0e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 21.34573359059476, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19581488229010136, - "normalized_score": 19.581488229010137 - }, - "bbh": { - "name": "BBH", - "value": 0.6191260063262436, - "normalized_score": 45.78594021531884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.43232291666666667, - "normalized_score": 12.940364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4566156914893617, - "normalized_score": 39.623965721040186 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-02", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/Chocolatine-24B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 24.184, - "co2_cost": 12.369920157785502 - } - }, - { - "id": "allknowingroger/Gemma2Slerp1-2.6B_bfloat16_2d0e85a03c55abd22963c5c3a44f180bfecebf7b_False", - "model": { - "name": "allknowingroger/Gemma2Slerp1-2.6B", - "sha": "2d0e85a03c55abd22963c5c3a44f180bfecebf7b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.670220538987987, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5354348683714766, - "normalized_score": 53.54348683714766 - }, - "bbh": { - "name": "BBH", - "value": 0.4343094462630086, - "normalized_score": 19.770254818005952 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.45616666666666666, - "normalized_score": 16.820833333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26886635638297873, - "normalized_score": 18.762928486997634 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/Gemma2Slerp1-2.6B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.388526979326512 - } - }, - { - "id": "allknowingroger/Gemma2Slerp1-27B_bfloat16_4a5c5092f40cc161bb18ca2b9e30a653c768e062_False", - "model": { - "name": "allknowingroger/Gemma2Slerp1-27B", - "sha": "4a5c5092f40cc161bb18ca2b9e30a653c768e062", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 36.50763943363299, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7186332265056716, - "normalized_score": 71.86332265056717 - }, - "bbh": { - "name": "BBH", - "value": 0.6398902146527521, - "normalized_score": 48.37766577055401 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2583081570996979, - "normalized_score": 25.83081570996979 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.47671875, - "normalized_score": 19.356510416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44564494680851063, - "normalized_score": 38.40499408983452 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/Gemma2Slerp1-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.179935475644573 - } - }, - { - "id": "allknowingroger/Gemma2Slerp2-2.6B_bfloat16_12ca2fdb5dd866fbdc624057a176ad3d1f8c2293_False", - "model": { - "name": "allknowingroger/Gemma2Slerp2-2.6B", - "sha": "12ca2fdb5dd866fbdc624057a176ad3d1f8c2293", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.29404813373446, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5747272791748117, - "normalized_score": 57.472727917481166 - }, - "bbh": { - "name": "BBH", - "value": 0.4307646783089521, - "normalized_score": 19.719838751468483 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.44677083333333334, - "normalized_score": 15.279687500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26961436170212766, - "normalized_score": 18.846040189125294 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/Gemma2Slerp2-2.6B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 2.614, - "co2_cost": 2.4000228880452505 - } - }, - { - "id": "allknowingroger/Gemma2Slerp2-27B_bfloat16_21043f6eaf40680675461825fbdfc964f4a3c4a0_False", - "model": { - "name": "allknowingroger/Gemma2Slerp2-27B", - "sha": "21043f6eaf40680675461825fbdfc964f4a3c4a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 37.93170029272168, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7545534736720789, - "normalized_score": 75.45534736720789 - }, - "bbh": { - "name": "BBH", - "value": 0.6557274121032689, - "normalized_score": 51.09023389049588 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27870090634441086, - "normalized_score": 27.870090634441087 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.46208333333333335, - "normalized_score": 16.927083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46226728723404253, - "normalized_score": 40.251920803782504 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/Gemma2Slerp2-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 27.227, - "co2_cost": 8.823042133825854 - } - }, - { - "id": "allknowingroger/Gemma2Slerp3-27B_bfloat16_cddd53f3b29a361be2350b76770a60b3fcc78059_False", - "model": { - "name": "allknowingroger/Gemma2Slerp3-27B", - "sha": "cddd53f3b29a361be2350b76770a60b3fcc78059", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 37.531456425236875, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7426384216102164, - "normalized_score": 74.26384216102164 - }, - "bbh": { - "name": "BBH", - "value": 0.6499638721230724, - "normalized_score": 49.95152147746908 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27416918429003023, - "normalized_score": 27.416918429003022 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.47402083333333334, - "normalized_score": 19.119270833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4640957446808511, - "normalized_score": 40.45508274231678 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/Gemma2Slerp3-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.693278122014503 - } - }, - { - "id": "allknowingroger/Gemma2Slerp4-27B_bfloat16_5c89bb96e60f0297f5bf27fc10713a4dcdd54285_False", - "model": { - "name": "allknowingroger/Gemma2Slerp4-27B", - "sha": "5c89bb96e60f0297f5bf27fc10713a4dcdd54285", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 37.36755448762331, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7496575752337131, - "normalized_score": 74.96575752337131 - }, - "bbh": { - "name": "BBH", - "value": 0.6529581339749019, - "normalized_score": 50.77376164739926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2719033232628399, - "normalized_score": 27.19033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36661073825503354, - "normalized_score": 15.548098434004473 - }, - "musr": { - "name": "MUSR", - "value": 0.4502395833333333, - "normalized_score": 15.179947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46492686170212766, - "normalized_score": 40.54742907801418 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/Gemma2Slerp4-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.813366715220036 - } - }, - { - "id": "allknowingroger/GemmaSlerp-9B_bfloat16_4f54819ae9c0af1f3e508f0afc88a7a734f9632d_False", - "model": { - "name": "allknowingroger/GemmaSlerp-9B", - "sha": "4f54819ae9c0af1f3e508f0afc88a7a734f9632d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.18611821246378, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.704320092909037, - "normalized_score": 70.4320092909037 - }, - "bbh": { - "name": "BBH", - "value": 0.592057786577488, - "normalized_score": 41.556031529840865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21601208459214502, - "normalized_score": 21.6012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.46732291666666664, - "normalized_score": 17.882031249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41605718085106386, - "normalized_score": 35.117464539007095 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/GemmaSlerp-9B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 3.215339927007747 - } - }, - { - "id": "allknowingroger/GemmaSlerp2-9B_bfloat16_e93fb8d7fad0007e463e44365a5a82d0d6facd61_False", - "model": { - "name": "allknowingroger/GemmaSlerp2-9B", - "sha": "e93fb8d7fad0007e463e44365a5a82d0d6facd61", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.25757823554644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7281003293483512, - "normalized_score": 72.81003293483514 - }, - "bbh": { - "name": "BBH", - "value": 0.598271299766216, - "normalized_score": 42.541032642532 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.47671875, - "normalized_score": 19.489843749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42386968085106386, - "normalized_score": 35.985520094562645 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/GemmaSlerp2-9B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 9.242, - "co2_cost": 3.570006589254344 - } - }, - { - "id": "allknowingroger/GemmaSlerp4-10B_bfloat16_e30d14d05730a83926263a7b0e4b1e002b6cd65a_False", - "model": { - "name": "allknowingroger/GemmaSlerp4-10B", - "sha": "e30d14d05730a83926263a7b0e4b1e002b6cd65a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.06301244708342, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7326216660682544, - "normalized_score": 73.26216660682545 - }, - "bbh": { - "name": "BBH", - "value": 0.6027862253440982, - "normalized_score": 43.328658430206815 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.45398958333333334, - "normalized_score": 15.482031250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4250332446808511, - "normalized_score": 36.114804964539005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/GemmaSlerp4-10B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.837357026917442 - } - }, - { - "id": "allknowingroger/GemmaSlerp5-10B_bfloat16_7e94afcde7cc1ae88105521a831abefe8126b0d1_False", - "model": { - "name": "allknowingroger/GemmaSlerp5-10B", - "sha": "7e94afcde7cc1ae88105521a831abefe8126b0d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.382403041414115, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7353444416370785, - "normalized_score": 73.53444416370786 - }, - "bbh": { - "name": "BBH", - "value": 0.605447654436423, - "normalized_score": 43.53846364760333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.46078125, - "normalized_score": 16.764322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4328457446808511, - "normalized_score": 36.982860520094555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/GemmaSlerp5-10B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 4.645721806493785 - } - }, - { - "id": "allknowingroger/GemmaStock1-27B_bfloat16_8563301fe323c4d1060ae6f56d5737ad62a63fef_False", - "model": { - "name": "allknowingroger/GemmaStock1-27B", - "sha": "8563301fe323c4d1060ae6f56d5737ad62a63fef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 37.51365913910503, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7509064836855099, - "normalized_score": 75.090648368551 - }, - "bbh": { - "name": "BBH", - "value": 0.6565607454366021, - "normalized_score": 50.9901358512802 - }, - "math": { - "name": "MATH Level 5", - "value": 0.263595166163142, - "normalized_score": 26.3595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.45268749999999996, - "normalized_score": 15.985937499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47298869680851063, - "normalized_score": 41.44318853427896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/GemmaStock1-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.305481153593714 - } - }, - { - "id": "allknowingroger/HomerSlerp1-7B_bfloat16_42e3df3d9a25d8ff0d470582395f165b2ddb83d8_False", - "model": { - "name": "allknowingroger/HomerSlerp1-7B", - "sha": "42e3df3d9a25d8ff0d470582395f165b2ddb83d8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.483742375570117, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46212050692163464, - "normalized_score": 46.21205069216346 - }, - "bbh": { - "name": "BBH", - "value": 0.551818027489446, - "normalized_score": 36.259862847099804 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2719033232628399, - "normalized_score": 27.19033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.43585416666666665, - "normalized_score": 13.248437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4503823138297872, - "normalized_score": 38.93136820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/HomerSlerp1-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3665644049358667 - } - }, - { - "id": "allknowingroger/HomerSlerp2-7B_bfloat16_210acef73da0488ea270332f5831b609298a98f0_False", - "model": { - "name": "allknowingroger/HomerSlerp2-7B", - "sha": "210acef73da0488ea270332f5831b609298a98f0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.948900169100085, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44868172005833407, - "normalized_score": 44.868172005833415 - }, - "bbh": { - "name": "BBH", - "value": 0.5648943315947, - "normalized_score": 37.96030019842789 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29682779456193353, - "normalized_score": 29.68277945619335 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.43557291666666664, - "normalized_score": 12.846614583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45146276595744683, - "normalized_score": 39.051418439716315 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/HomerSlerp2-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2499731109344432 - } - }, - { - "id": "allknowingroger/HomerSlerp3-7B_bfloat16_4f41686caa5bc39e3b0f075360974057486ece95_False", - "model": { - "name": "allknowingroger/HomerSlerp3-7B", - "sha": "4f41686caa5bc39e3b0f075360974057486ece95", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.953652934928698, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4362668829815999, - "normalized_score": 43.62668829815999 - }, - "bbh": { - "name": "BBH", - "value": 0.5598063466560873, - "normalized_score": 37.29001802552013 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3021148036253776, - "normalized_score": 30.211480362537763 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.44617708333333334, - "normalized_score": 14.372135416666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45345744680851063, - "normalized_score": 39.273049645390074 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/HomerSlerp3-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2344683203092297 - } - }, - { - "id": "allknowingroger/HomerSlerp4-7B_bfloat16_f2ce1f2afa3c645e26ca61ee30f24736873bafa1_False", - "model": { - "name": "allknowingroger/HomerSlerp4-7B", - "sha": "f2ce1f2afa3c645e26ca61ee30f24736873bafa1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.144845188798246, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43741605606457534, - "normalized_score": 43.74160560645754 - }, - "bbh": { - "name": "BBH", - "value": 0.5570767234678723, - "normalized_score": 36.7868341584624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3270392749244713, - "normalized_score": 32.703927492447136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.44084375, - "normalized_score": 13.77213541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44722406914893614, - "normalized_score": 38.58045212765956 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/HomerSlerp4-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3581398652442602 - } - }, - { - "id": "allknowingroger/LimyQstar-7B-slerp_bfloat16_6dc557c7bfd6a6f9bc8190bc8a31c3b732deca40_False", - "model": { - "name": "allknowingroger/LimyQstar-7B-slerp", - "sha": "6dc557c7bfd6a6f9bc8190bc8a31c3b732deca40", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.67252497281834, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34911368502240725, - "normalized_score": 34.91136850224072 - }, - "bbh": { - "name": "BBH", - "value": 0.5023559424245442, - "normalized_score": 30.194567331120084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4146458333333333, - "normalized_score": 10.197395833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3103390957446808, - "normalized_score": 23.371010638297868 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-23", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/LimyQstar-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2606481806550056 - } - }, - { - "id": "allknowingroger/Llama3.1-60B_float16_5fb1ddcce0bddc60949a9d0c2fc9f8326be5bc4e_False", - "model": { - "name": "allknowingroger/Llama3.1-60B", - "sha": "5fb1ddcce0bddc60949a9d0c2fc9f8326be5bc4e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.951594488947727, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18145188100905596, - "normalized_score": 18.1451881009056 - }, - "bbh": { - "name": "BBH", - "value": 0.32417552719382076, - "normalized_score": 7.784282802508024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3595833333333333, - "normalized_score": 2.18125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3310339095744681, - "normalized_score": 25.67043439716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 61.997, - "co2_cost": 26.983717435413205 - } - }, - { - "id": "allknowingroger/Marco-01-slerp1-7B_bfloat16_12070d5f5bbd891024cb02c363759430ffd3dfba_False", - "model": { - "name": "allknowingroger/Marco-01-slerp1-7B", - "sha": "12070d5f5bbd891024cb02c363759430ffd3dfba", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.48531675671586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46811571075856506, - "normalized_score": 46.811571075856506 - }, - "bbh": { - "name": "BBH", - "value": 0.5540943469864194, - "normalized_score": 36.23184676022296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3157099697885196, - "normalized_score": 31.570996978851962 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4451875, - "normalized_score": 14.648437500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44830452127659576, - "normalized_score": 38.70050236406619 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/Marco-01-slerp1-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2746476604123598 - } - }, - { - "id": "allknowingroger/Meme-7B-slerp_bfloat16_7836c0f4fce70286382e61003e9a05d7559365d9_False", - "model": { - "name": "allknowingroger/Meme-7B-slerp", - "sha": "7836c0f4fce70286382e61003e9a05d7559365d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.276080557709445, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5163754393897082, - "normalized_score": 51.63754393897082 - }, - "bbh": { - "name": "BBH", - "value": 0.4660944195552204, - "normalized_score": 24.52948594942413 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4223020833333333, - "normalized_score": 10.187760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.281000664893617, - "normalized_score": 20.111184988179666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/Meme-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9649009171513735 - } - }, - { - "id": "allknowingroger/Ministral-8B-slerp_float16_51c40046c0f9fead83485ae83b6c0d03f4ae47f2_False", - "model": { - "name": "allknowingroger/Ministral-8B-slerp", - "sha": "51c40046c0f9fead83485ae83b6c0d03f4ae47f2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.900965366835125, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19608970863974257, - "normalized_score": 19.608970863974257 - }, - "bbh": { - "name": "BBH", - "value": 0.4686018544963986, - "normalized_score": 25.195564651348292 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.42853125000000003, - "normalized_score": 12.399739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3119182180851064, - "normalized_score": 23.54646867612293 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.248, - "co2_cost": 2.2501965622566895 - } - }, - { - "id": "allknowingroger/MistralPhi3-11B_bfloat16_3afeaf24c6306c4752c320c4fd32fa2e7694e12e_False", - "model": { - "name": "allknowingroger/MistralPhi3-11B", - "sha": "3afeaf24c6306c4752c320c4fd32fa2e7694e12e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.627095011873774, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1942911474886634, - "normalized_score": 19.42911474886634 - }, - "bbh": { - "name": "BBH", - "value": 0.6234314600705605, - "normalized_score": 46.16462900339792 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.4266770833333333, - "normalized_score": 12.23463541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46875, - "normalized_score": 40.97222222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/MistralPhi3-11B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 11.234, - "co2_cost": 1.4140754682898875 - } - }, - { - "id": "allknowingroger/Mistralmash1-7B-s_bfloat16_730b7b2867deef63961f002b6e1e70e7d416c599_False", - "model": { - "name": "allknowingroger/Mistralmash1-7B-s", - "sha": "730b7b2867deef63961f002b6e1e70e7d416c599", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.913865809674476, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39610012544493056, - "normalized_score": 39.61001254449306 - }, - "bbh": { - "name": "BBH", - "value": 0.5277485757172445, - "normalized_score": 33.44855374433827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4267083333333333, - "normalized_score": 11.805208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3292885638297872, - "normalized_score": 25.47650709219858 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/Mistralmash1-7B-s (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.3223769966212695 - } - }, - { - "id": "allknowingroger/Mistralmash2-7B-s_bfloat16_3b2aafa0f931f3d3103fbc96a6da4ac36f376d78_False", - "model": { - "name": "allknowingroger/Mistralmash2-7B-s", - "sha": "3b2aafa0f931f3d3103fbc96a6da4ac36f376d78", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.389680975325334, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4101883003763348, - "normalized_score": 41.01883003763348 - }, - "bbh": { - "name": "BBH", - "value": 0.530485814102601, - "normalized_score": 33.29836428588566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.43724999999999997, - "normalized_score": 13.65625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3345246010638298, - "normalized_score": 26.0582890070922 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/Mistralmash2-7B-s (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3310737440025664 - } - }, - { - "id": "allknowingroger/MixTAO-19B-pass_bfloat16_a41369cfcfbada9d5387051ba616bf1432b31d31_False", - "model": { - "name": "allknowingroger/MixTAO-19B-pass", - "sha": "a41369cfcfbada9d5387051ba616bf1432b31d31", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.627592394280352, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3814368098866563, - "normalized_score": 38.14368098866563 - }, - "bbh": { - "name": "BBH", - "value": 0.5128248798224987, - "normalized_score": 31.577918110916897 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.47827083333333337, - "normalized_score": 19.950520833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31050531914893614, - "normalized_score": 23.389479905437348 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-02", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MixTAO-19B-pass (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 19.188, - "co2_cost": 2.510264608639344 - } - }, - { - "id": "allknowingroger/MixTaoTruthful-13B-slerp_bfloat16_3324d37e138c6bf0d6891e54b6dd839c8d2f35ec_False", - "model": { - "name": "allknowingroger/MixTaoTruthful-13B-slerp", - "sha": "3324d37e138c6bf0d6891e54b6dd839c8d2f35ec", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.252975968080015, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41388515804731446, - "normalized_score": 41.38851580473145 - }, - "bbh": { - "name": "BBH", - "value": 0.5207335343585151, - "normalized_score": 32.70636246605644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.42924999999999996, - "normalized_score": 12.856249999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3100066489361702, - "normalized_score": 23.33407210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-25", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MixTaoTruthful-13B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.6162171190429424 - } - }, - { - "id": "allknowingroger/MultiCalm-7B-slerp_bfloat16_1c23540e907fab4dfe0ef66edd0003e764bfe568_False", - "model": { - "name": "allknowingroger/MultiCalm-7B-slerp", - "sha": "1c23540e907fab4dfe0ef66edd0003e764bfe568", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.472289426361296, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3926526061960044, - "normalized_score": 39.26526061960044 - }, - "bbh": { - "name": "BBH", - "value": 0.5121891599770304, - "normalized_score": 31.46648332199455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.43194791666666665, - "normalized_score": 12.960156250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3032746010638298, - "normalized_score": 22.586066784869978 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-19", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiCalm-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2331468527800262 - } - }, - { - "id": "allknowingroger/MultiMash-12B-slerp_bfloat16_91a6d0fe6b9271000ca713ee9ab414c782ba4c50_False", - "model": { - "name": "allknowingroger/MultiMash-12B-slerp", - "sha": "91a6d0fe6b9271000ca713ee9ab414c782ba4c50", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.179903850442244, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39744876926554873, - "normalized_score": 39.74487692655487 - }, - "bbh": { - "name": "BBH", - "value": 0.5141827379810838, - "normalized_score": 31.925677106468356 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.44379166666666664, - "normalized_score": 14.773958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3067652925531915, - "normalized_score": 22.973921394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-20", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash-12B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.6839596369648564 - } - }, - { - "id": "allknowingroger/MultiMash10-13B-slerp_bfloat16_6def2fd1a11d4c380a19b7a3bdf263a6b80cd8f3_False", - "model": { - "name": "allknowingroger/MultiMash10-13B-slerp", - "sha": "6def2fd1a11d4c380a19b7a3bdf263a6b80cd8f3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.4264364824254, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41628323958208663, - "normalized_score": 41.62832395820867 - }, - "bbh": { - "name": "BBH", - "value": 0.5186335995744094, - "normalized_score": 32.45250184104656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.43179166666666663, - "normalized_score": 12.97395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3116688829787234, - "normalized_score": 23.51876477541371 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-27", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash10-13B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.7587183017329835 - } - }, - { - "id": "allknowingroger/MultiMash11-13B-slerp_bfloat16_1134a0adabef4a26e1d49c302baff74c4a7e9f46_False", - "model": { - "name": "allknowingroger/MultiMash11-13B-slerp", - "sha": "1134a0adabef4a26e1d49c302baff74c4a7e9f46", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.614675908435512, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4251009543566625, - "normalized_score": 42.51009543566625 - }, - "bbh": { - "name": "BBH", - "value": 0.5193864686484946, - "normalized_score": 32.59670310684399 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.43728125, - "normalized_score": 14.026822916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30851063829787234, - "normalized_score": 23.167848699763592 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-27", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash11-13B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.9714771309533339 - } - }, - { - "id": "allknowingroger/MultiMash2-12B-slerp_bfloat16_e44e9563368699f753a4474b068c059d233ddee3_False", - "model": { - "name": "allknowingroger/MultiMash2-12B-slerp", - "sha": "e44e9563368699f753a4474b068c059d233ddee3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.84014306475432, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42607503645881817, - "normalized_score": 42.60750364588182 - }, - "bbh": { - "name": "BBH", - "value": 0.5133973498532299, - "normalized_score": 31.617950213580304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.4228020833333333, - "normalized_score": 11.783593750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3042719414893617, - "normalized_score": 22.696882387706854 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-20", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash2-12B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.6402793394565462 - } - }, - { - "id": "allknowingroger/MultiMash5-12B-slerp_bfloat16_15ef0301c7ce939208d55ad13fa840662f92bce6_False", - "model": { - "name": "allknowingroger/MultiMash5-12B-slerp", - "sha": "15ef0301c7ce939208d55ad13fa840662f92bce6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.590305453838365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41415998439695567, - "normalized_score": 41.415998439695564 - }, - "bbh": { - "name": "BBH", - "value": 0.5144534995858502, - "normalized_score": 31.856364255964934 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4202916666666667, - "normalized_score": 11.703124999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30277593085106386, - "normalized_score": 22.530658983451538 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.608333288442236 - } - }, - { - "id": "allknowingroger/MultiMash6-12B-slerp_bfloat16_a04856a12b85e986e1b540cf0c7510e9ce2df09b_False", - "model": { - "name": "allknowingroger/MultiMash6-12B-slerp", - "sha": "a04856a12b85e986e1b540cf0c7510e9ce2df09b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.27642710504546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43004672047943904, - "normalized_score": 43.0046720479439 - }, - "bbh": { - "name": "BBH", - "value": 0.5195916915718951, - "normalized_score": 32.403879619181005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.4305833333333333, - "normalized_score": 12.522916666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30909242021276595, - "normalized_score": 23.232491134751772 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash6-12B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.6491453052321292 - } - }, - { - "id": "allknowingroger/MultiMash7-12B-slerp_bfloat16_5f91dd41fb4b58e76c52b03ed15477a046b079df_False", - "model": { - "name": "allknowingroger/MultiMash7-12B-slerp", - "sha": "5f91dd41fb4b58e76c52b03ed15477a046b079df", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.792293511824642, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42127887338927383, - "normalized_score": 42.12788733892738 - }, - "bbh": { - "name": "BBH", - "value": 0.5111135397195524, - "normalized_score": 31.298150090327656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.42794791666666665, - "normalized_score": 12.026822916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3029421542553192, - "normalized_score": 22.549128250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash7-12B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.643542693275669 - } - }, - { - "id": "allknowingroger/MultiMash8-13B-slerp_bfloat16_5590ccd99f74301951f450f9d0271a99e97728c8_False", - "model": { - "name": "allknowingroger/MultiMash8-13B-slerp", - "sha": "5590ccd99f74301951f450f9d0271a99e97728c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 21.07486448648852, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4320702402957486, - "normalized_score": 43.20702402957486 - }, - "bbh": { - "name": "BBH", - "value": 0.5178483059643324, - "normalized_score": 32.27299661531551 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.4423958333333333, - "normalized_score": 14.499479166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31258311170212766, - "normalized_score": 23.620345744680847 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/MultiMash8-13B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 2.5984320994193433 - } - }, - { - "id": "allknowingroger/MultiMash9-13B-slerp_bfloat16_56dac45f387669baa04a8997ebb9ea63c65fbbd1_False", - "model": { - "name": "allknowingroger/MultiMash9-13B-slerp", - "sha": "56dac45f387669baa04a8997ebb9ea63c65fbbd1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.642969652890788, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4187810564856802, - "normalized_score": 41.878105648568024 - }, - "bbh": { - "name": "BBH", - "value": 0.5193579939678727, - "normalized_score": 32.55261171624742 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.4398229166666667, - "normalized_score": 14.211197916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3100066489361702, - "normalized_score": 23.33407210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMash9-13B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.7314619662186004 - } - }, - { - "id": "allknowingroger/MultiMerge-7B-slerp_bfloat16_a026bbea09f0b1880deed62b9081e3708be0dec2_False", - "model": { - "name": "allknowingroger/MultiMerge-7B-slerp", - "sha": "a026bbea09f0b1880deed62b9081e3708be0dec2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.542246727772383, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3947758613811354, - "normalized_score": 39.47758613811354 - }, - "bbh": { - "name": "BBH", - "value": 0.5140224933103638, - "normalized_score": 31.803983321994554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.42797916666666663, - "normalized_score": 12.330729166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3036901595744681, - "normalized_score": 22.632239952718678 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-11", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiMerge-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2263444868106883 - } - }, - { - "id": "allknowingroger/Multimash3-12B-slerp_bfloat16_0b90bf0b5230d02b4ba63879fc3bf0b85d46c3ce_False", - "model": { - "name": "allknowingroger/Multimash3-12B-slerp", - "sha": "0b90bf0b5230d02b4ba63879fc3bf0b85d46c3ce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.47073324126141, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44371046600796993, - "normalized_score": 44.371046600796994 - }, - "bbh": { - "name": "BBH", - "value": 0.5176624678276028, - "normalized_score": 32.1508911391619 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.4343958333333333, - "normalized_score": 13.0328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3067652925531915, - "normalized_score": 22.973921394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-21", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/Multimash3-12B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.689315902139885 - } - }, - { - "id": "allknowingroger/Multimerge-19B-pass_bfloat16_e75918ed5601f400f62601cf6c0887aa936e8a52_False", - "model": { - "name": "allknowingroger/Multimerge-19B-pass", - "sha": "e75918ed5601f400f62601cf6c0887aa936e8a52", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 4.536203105914491, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17730510600761534, - "normalized_score": 17.730510600761534 - }, - "bbh": { - "name": "BBH", - "value": 0.2891778102988436, - "normalized_score": 2.0803742908537424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3429583333333333, - "normalized_score": 4.303125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11685505319148937, - "normalized_score": 1.8727836879432622 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 19.188, - "co2_cost": 3.930757831495271 - } - }, - { - "id": "allknowingroger/MultiverseEx26-7B-slerp_bfloat16_43f18d84e025693f00e9be335bf12fce96089b2f_False", - "model": { - "name": "allknowingroger/MultiverseEx26-7B-slerp", - "sha": "43f18d84e025693f00e9be335bf12fce96089b2f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.69589878762239, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3938516469633905, - "normalized_score": 39.38516469633905 - }, - "bbh": { - "name": "BBH", - "value": 0.5133591871690678, - "normalized_score": 31.66377531246577 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4293125, - "normalized_score": 12.597395833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3035239361702128, - "normalized_score": 22.613770685579198 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-30", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/MultiverseEx26-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2109854289342996 - } - }, - { - "id": "allknowingroger/NeuralWestSeverus-7B-slerp_bfloat16_5ee5d6a11ffc4f9733e78994169a2e1614d5e16e_False", - "model": { - "name": "allknowingroger/NeuralWestSeverus-7B-slerp", - "sha": "5ee5d6a11ffc4f9733e78994169a2e1614d5e16e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.675370567029706, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41356046401326263, - "normalized_score": 41.35604640132626 - }, - "bbh": { - "name": "BBH", - "value": 0.5244283854305991, - "normalized_score": 33.41446681662389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.45287499999999997, - "normalized_score": 15.409375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3137466755319149, - "normalized_score": 23.749630614657207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-16", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/NeuralWestSeverus-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.168092565997147 - } - }, - { - "id": "allknowingroger/Neuralcoven-7B-slerp_bfloat16_129b40a7fd816f679ef5d4ab29fc77345f33a7b1_False", - "model": { - "name": "allknowingroger/Neuralcoven-7B-slerp", - "sha": "129b40a7fd816f679ef5d4ab29fc77345f33a7b1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.363670451495135, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3858584112377381, - "normalized_score": 38.58584112377381 - }, - "bbh": { - "name": "BBH", - "value": 0.530287217712165, - "normalized_score": 33.79913505465432 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.429, - "normalized_score": 11.758333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3293716755319149, - "normalized_score": 25.485741725768317 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/Neuralcoven-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2638224311243544 - } - }, - { - "id": "allknowingroger/Neuralmultiverse-7B-slerp_bfloat16_a65fe05e26e10a488b08264ac8ed73a49c3f263a_False", - "model": { - "name": "allknowingroger/Neuralmultiverse-7B-slerp", - "sha": "a65fe05e26e10a488b08264ac8ed73a49c3f263a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.36103031824635, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3769154731667531, - "normalized_score": 37.69154731667531 - }, - "bbh": { - "name": "BBH", - "value": 0.5165722210470375, - "normalized_score": 32.10018047347172 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.42804166666666665, - "normalized_score": 12.60520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30418882978723405, - "normalized_score": 22.687647754137117 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/Neuralmultiverse-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2343190282871481 - } - }, - { - "id": "allknowingroger/Ph3della5-14B_float16_9c6819a910d4da414dd67c10da3bff3f986fefa5_False", - "model": { - "name": "allknowingroger/Ph3della5-14B", - "sha": "9c6819a910d4da414dd67c10da3bff3f986fefa5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 30.46973875649518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47985567183960776, - "normalized_score": 47.98556718396078 - }, - "bbh": { - "name": "BBH", - "value": 0.6331746353794991, - "normalized_score": 48.41436428305058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17673716012084592, - "normalized_score": 17.673716012084594 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.4386145833333333, - "normalized_score": 14.360156249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4787234042553192, - "normalized_score": 42.08037825059102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "allknowingroger/Ph3della5-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 2.0918496924067207 - } - }, - { - "id": "allknowingroger/Ph3merge-14B_bfloat16_6d0ddaa4e0cf4c82d7149cc726b08be5753a760a_False", - "model": { - "name": "allknowingroger/Ph3merge-14B", - "sha": "6d0ddaa4e0cf4c82d7149cc726b08be5753a760a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.68333279737927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27012881376968667, - "normalized_score": 27.012881376968664 - }, - "bbh": { - "name": "BBH", - "value": 0.638087568868341, - "normalized_score": 48.88242371785896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4334375, - "normalized_score": 13.279687500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4611037234042553, - "normalized_score": 40.12263593380615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-30", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/Ph3merge-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.619, - "co2_cost": 4.024664867271926 - } - }, - { - "id": "allknowingroger/Ph3merge2-14B_bfloat16_2256ab821e286a1d8a4f0d42e00a50013e119671_False", - "model": { - "name": "allknowingroger/Ph3merge2-14B", - "sha": "2256ab821e286a1d8a4f0d42e00a50013e119671", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 7.962730746600417, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17061064641817045, - "normalized_score": 17.061064641817044 - }, - "bbh": { - "name": "BBH", - "value": 0.3606937444321621, - "normalized_score": 10.549967885447563 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3910833333333333, - "normalized_score": 6.6520833333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1722905585106383, - "normalized_score": 8.03228427895981 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.619, - "co2_cost": 4.122009189633371 - } - }, - { - "id": "allknowingroger/Ph3merge3-14B_bfloat16_90a036f7f136932ea525b5fd26cf2f54a66141af_False", - "model": { - "name": "allknowingroger/Ph3merge3-14B", - "sha": "90a036f7f136932ea525b5fd26cf2f54a66141af", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 7.931823747573229, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1645157072124186, - "normalized_score": 16.45157072124186 - }, - "bbh": { - "name": "BBH", - "value": 0.3597431731140411, - "normalized_score": 10.391379646236162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.40819791666666666, - "normalized_score": 8.858072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16472739361702127, - "normalized_score": 7.191932624113473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.619, - "co2_cost": 3.9785131756969263 - } - }, - { - "id": "allknowingroger/Ph3task1-14B_float16_c9a5bab157dbdd281c651a5b7ea82a8bc64aa420_False", - "model": { - "name": "allknowingroger/Ph3task1-14B", - "sha": "c9a5bab157dbdd281c651a5b7ea82a8bc64aa420", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 30.54839802644553, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46946435457918323, - "normalized_score": 46.94643545791833 - }, - "bbh": { - "name": "BBH", - "value": 0.63178060736657, - "normalized_score": 47.92690847304542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16691842900302115, - "normalized_score": 16.691842900302113 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.45077083333333334, - "normalized_score": 16.81302083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4734042553191489, - "normalized_score": 41.48936170212765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-07", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "allknowingroger/Ph3task1-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 2.017696993674693 - } - }, - { - "id": "allknowingroger/Ph3task2-14B_float16_2193bfec75bc90e87bc57863e02deefbdd195f9f_False", - "model": { - "name": "allknowingroger/Ph3task2-14B", - "sha": "2193bfec75bc90e87bc57863e02deefbdd195f9f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 28.611110586227724, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4713127834146731, - "normalized_score": 47.13127834146731 - }, - "bbh": { - "name": "BBH", - "value": 0.6098412220695854, - "normalized_score": 44.08179620906436 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4535, - "normalized_score": 16.620833333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44597739361702127, - "normalized_score": 38.44193262411347 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-08", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "allknowingroger/Ph3task2-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 1.8701316050849537 - } - }, - { - "id": "allknowingroger/Ph3task3-14B_float16_359de5c4969057206f846a41c72073b3429317fd_False", - "model": { - "name": "allknowingroger/Ph3task3-14B", - "sha": "359de5c4969057206f846a41c72073b3429317fd", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 30.710221507611795, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4962421929369628, - "normalized_score": 49.62421929369628 - }, - "bbh": { - "name": "BBH", - "value": 0.6297915743094921, - "normalized_score": 47.99849937558212 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.44255208333333335, - "normalized_score": 14.952343749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47706117021276595, - "normalized_score": 41.89568557919622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-08", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "allknowingroger/Ph3task3-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 2.0752301196035647 - } - }, - { - "id": "allknowingroger/Ph3unsloth-3B-slerp_bfloat16_465444b3cdd43876717f7386ea2f3357c5fe8e53_False", - "model": { - "name": "allknowingroger/Ph3unsloth-3B-slerp", - "sha": "465444b3cdd43876717f7386ea2f3357c5fe8e53", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.153514887760053, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18944511673470835, - "normalized_score": 18.944511673470835 - }, - "bbh": { - "name": "BBH", - "value": 0.5468077356147099, - "normalized_score": 36.45877270267158 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.45278124999999997, - "normalized_score": 15.430989583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3700964095744681, - "normalized_score": 30.010712174940902 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/Ph3unsloth-3B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.0580837656713333 - } - }, - { - "id": "allknowingroger/Phi3mash1-17B-pass_float16_fcd265996f026475c15fa44833e0481dc610e469_False", - "model": { - "name": "allknowingroger/Phi3mash1-17B-pass", - "sha": "fcd265996f026475c15fa44833e0481dc610e469", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.34996880563698, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18842116694814204, - "normalized_score": 18.8421166948142 - }, - "bbh": { - "name": "BBH", - "value": 0.6128878795560929, - "normalized_score": 45.25041934691859 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.445125, - "normalized_score": 14.840624999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45894281914893614, - "normalized_score": 39.882535460992905 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-28", - "submission_date": "2024-09-02", - "generation": 1, - "base_model": "allknowingroger/Phi3mash1-17B-pass (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 16.687, - "co2_cost": 2.916252902936439 - } - }, - { - "id": "allknowingroger/Quen2-65B_float16_2259cd8ea037d0e590920e7106b0fd1641a96c1d_False", - "model": { - "name": "allknowingroger/Quen2-65B", - "sha": "2259cd8ea037d0e590920e7106b0fd1641a96c1d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.5313443657802126, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17578137120617737, - "normalized_score": 17.57813712061774 - }, - "bbh": { - "name": "BBH", - "value": 0.27565161872324456, - "normalized_score": 1.23986036838978 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23573825503355705, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32085416666666666, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11136968085106383, - "normalized_score": 1.2632978723404247 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 63.923, - "co2_cost": 26.63484711957512 - } - }, - { - "id": "allknowingroger/Qwen2.5-42B-AGI_bfloat16_8939b021a9d84bc2e4ae0ea4f351d807f35b91d7_False", - "model": { - "name": "allknowingroger/Qwen2.5-42B-AGI", - "sha": "8939b021a9d84bc2e4ae0ea4f351d807f35b91d7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.47082956616707, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19129354557019818, - "normalized_score": 19.129354557019816 - }, - "bbh": { - "name": "BBH", - "value": 0.2942104150907988, - "normalized_score": 2.2358856564144527 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.36203125, - "normalized_score": 2.2539062500000013 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 42.516, - "co2_cost": 17.7139622556385 - } - }, - { - "id": "allknowingroger/Qwen2.5-7B-task2_bfloat16_6f3ae972b2bbde0383c3a774e0e788a1af0dabc5_False", - "model": { - "name": "allknowingroger/Qwen2.5-7B-task2", - "sha": "6f3ae972b2bbde0383c3a774e0e788a1af0dabc5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.877934142366954, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45270327176336567, - "normalized_score": 45.270327176336565 - }, - "bbh": { - "name": "BBH", - "value": 0.5625940266685543, - "normalized_score": 37.52854979009311 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3549848942598187, - "normalized_score": 35.49848942598187 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.43696874999999996, - "normalized_score": 13.054427083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4517121010638298, - "normalized_score": 39.079122340425535 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwen2.5-7B-task2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3837787587187609 - } - }, - { - "id": "allknowingroger/Qwen2.5-7B-task3_bfloat16_b1e524004242cdeec838ba21bce44ebb8598c12f_False", - "model": { - "name": "allknowingroger/Qwen2.5-7B-task3", - "sha": "b1e524004242cdeec838ba21bce44ebb8598c12f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.738760798626867, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.512903540383959, - "normalized_score": 51.2903540383959 - }, - "bbh": { - "name": "BBH", - "value": 0.5397623813486384, - "normalized_score": 34.385984193445104 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26057401812688824, - "normalized_score": 26.057401812688823 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.43557291666666664, - "normalized_score": 12.846614583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45013297872340424, - "normalized_score": 38.90366430260047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwen2.5-7B-task3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2464163421179026 - } - }, - { - "id": "allknowingroger/Qwen2.5-7B-task4_bfloat16_ef4fe9331a0b9c34d829fcd5b1a09a7056e9300f_False", - "model": { - "name": "allknowingroger/Qwen2.5-7B-task4", - "sha": "ef4fe9331a0b9c34d829fcd5b1a09a7056e9300f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.06180942847573, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5005385709916355, - "normalized_score": 50.05385709916355 - }, - "bbh": { - "name": "BBH", - "value": 0.5583446038580263, - "normalized_score": 37.02526862429277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.311178247734139, - "normalized_score": 31.1178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.43954166666666666, - "normalized_score": 13.209375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45611702127659576, - "normalized_score": 39.568557919621746 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwen2.5-7B-task4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.324198591253045 - } - }, - { - "id": "allknowingroger/Qwen2.5-7B-task7_bfloat16_090a873c77ed291867ddaf20249ed7f479ba4ba9_False", - "model": { - "name": "allknowingroger/Qwen2.5-7B-task7", - "sha": "090a873c77ed291867ddaf20249ed7f479ba4ba9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.016939559246442, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42842325030917966, - "normalized_score": 42.84232503091796 - }, - "bbh": { - "name": "BBH", - "value": 0.555243179835915, - "normalized_score": 37.51817009438029 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4325625, - "normalized_score": 13.036979166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4133144946808511, - "normalized_score": 34.812721631205676 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwen2.5-7B-task7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3565170555703252 - } - }, - { - "id": "allknowingroger/Qwen2.5-7B-task8_bfloat16_489a9a6fc98001026d9b96563d715cad43aabc8c_False", - "model": { - "name": "allknowingroger/Qwen2.5-7B-task8", - "sha": "489a9a6fc98001026d9b96563d715cad43aabc8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.109425461585875, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4645185884564068, - "normalized_score": 46.45185884564068 - }, - "bbh": { - "name": "BBH", - "value": 0.5524895381578828, - "normalized_score": 36.09273684636751 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3527190332326284, - "normalized_score": 35.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.45144791666666667, - "normalized_score": 15.297656250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44331781914893614, - "normalized_score": 38.14642434988179 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwen2.5-7B-task8 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3955516221737763 - } - }, - { - "id": "allknowingroger/Qwen2.5-slerp-14B_bfloat16_a44b0ea8291b62785152c2fe6ab336f5da672d1e_False", - "model": { - "name": "allknowingroger/Qwen2.5-slerp-14B", - "sha": "a44b0ea8291b62785152c2fe6ab336f5da672d1e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.16277613953164, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49282016161562425, - "normalized_score": 49.282016161562424 - }, - "bbh": { - "name": "BBH", - "value": 0.65124197415124, - "normalized_score": 49.78953727809471 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4622356495468278, - "normalized_score": 46.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.47439583333333335, - "normalized_score": 19.36614583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5378989361702128, - "normalized_score": 48.65543735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-17", - "submission_date": "2024-10-21", - "generation": 1, - "base_model": "allknowingroger/Qwen2.5-slerp-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.733763744815111 - } - }, - { - "id": "allknowingroger/QwenSlerp12-7B_bfloat16_be6510452755c2c8e559333ecaf68dc6b37637d9_False", - "model": { - "name": "allknowingroger/QwenSlerp12-7B", - "sha": "be6510452755c2c8e559333ecaf68dc6b37637d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.989027415795306, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5075577246151324, - "normalized_score": 50.75577246151323 - }, - "bbh": { - "name": "BBH", - "value": 0.5556448443090559, - "normalized_score": 36.41130337400265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2945619335347432, - "normalized_score": 29.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.45947916666666666, - "normalized_score": 16.134895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4460605053191489, - "normalized_score": 38.45116725768321 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "allknowingroger/QwenSlerp12-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3660681974403914 - } - }, - { - "id": "allknowingroger/QwenSlerp4-14B_bfloat16_3a55f52f639fc380a829b7cace5be3c96fcad730_False", - "model": { - "name": "allknowingroger/QwenSlerp4-14B", - "sha": "3a55f52f639fc380a829b7cace5be3c96fcad730", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.79874440145977, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6327544249258634, - "normalized_score": 63.275442492586336 - }, - "bbh": { - "name": "BBH", - "value": 0.6483250205703057, - "normalized_score": 49.38124012444703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3693353474320242, - "normalized_score": 36.933534743202415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3724832214765101, - "normalized_score": 16.33109619686801 - }, - "musr": { - "name": "MUSR", - "value": 0.46496875, - "normalized_score": 17.58776041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5435505319148937, - "normalized_score": 49.28339243498819 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/QwenSlerp4-14B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.758323525776358 - } - }, - { - "id": "allknowingroger/QwenSlerp5-14B_bfloat16_f1eac24bb5338ae11951d38ba09ff71f4d319cc9_False", - "model": { - "name": "allknowingroger/QwenSlerp5-14B", - "sha": "f1eac24bb5338ae11951d38ba09ff71f4d319cc9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.3582576505929, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7119387669162267, - "normalized_score": 71.19387669162268 - }, - "bbh": { - "name": "BBH", - "value": 0.6356573710010681, - "normalized_score": 47.38764037444587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.4675416666666667, - "normalized_score": 17.809375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5390625, - "normalized_score": 48.78472222222222 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/QwenSlerp5-14B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.7236249813326867 - } - }, - { - "id": "allknowingroger/QwenSlerp6-14B_bfloat16_eff132ab6d7f612b46c47b29966f8391cea7b407_False", - "model": { - "name": "allknowingroger/QwenSlerp6-14B", - "sha": "eff132ab6d7f612b46c47b29966f8391cea7b407", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.53457080924675, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6866846633598851, - "normalized_score": 68.66846633598851 - }, - "bbh": { - "name": "BBH", - "value": 0.6384454358065165, - "normalized_score": 47.588316576052456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3723564954682779, - "normalized_score": 37.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.46896875, - "normalized_score": 18.32109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5405585106382979, - "normalized_score": 48.95094562647754 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/QwenSlerp6-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.48165447545322 - } - }, - { - "id": "allknowingroger/QwenStock1-14B_bfloat16_79daacc58a5ec97f297c4a99dbb31d19ae4c59ca_False", - "model": { - "name": "allknowingroger/QwenStock1-14B", - "sha": "79daacc58a5ec97f297c4a99dbb31d19ae4c59ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.14564924678278, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5634117474966422, - "normalized_score": 56.34117474966422 - }, - "bbh": { - "name": "BBH", - "value": 0.6528491305599156, - "normalized_score": 50.07629311887721 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3768882175226586, - "normalized_score": 37.68882175226586 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3766778523489933, - "normalized_score": 16.890380313199106 - }, - "musr": { - "name": "MUSR", - "value": 0.47296875, - "normalized_score": 18.787760416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418051861702128, - "normalized_score": 49.08946513002365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/QwenStock1-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 4.08054899462919 - } - }, - { - "id": "allknowingroger/QwenStock2-14B_bfloat16_69fd5f98c812cfa26d8514349669158a93058bf7_False", - "model": { - "name": "allknowingroger/QwenStock2-14B", - "sha": "69fd5f98c812cfa26d8514349669158a93058bf7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.41917529577626, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5563427261887348, - "normalized_score": 55.63427261887348 - }, - "bbh": { - "name": "BBH", - "value": 0.656885010139055, - "normalized_score": 50.5982763137811 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38821752265861026, - "normalized_score": 38.82175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.47560416666666666, - "normalized_score": 19.28385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5405585106382979, - "normalized_score": 48.95094562647754 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/QwenStock2-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 4.042087995274873 - } - }, - { - "id": "allknowingroger/QwenStock3-14B_bfloat16_834fbf35e01efc44e4f2c8c372d7c1412754c0fa_False", - "model": { - "name": "allknowingroger/QwenStock3-14B", - "sha": "834fbf35e01efc44e4f2c8c372d7c1412754c0fa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.32000426856688, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5615134509767417, - "normalized_score": 56.15134509767417 - }, - "bbh": { - "name": "BBH", - "value": 0.6565322062808641, - "normalized_score": 50.576674117962874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3776435045317221, - "normalized_score": 37.764350453172206 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.4755729166666667, - "normalized_score": 19.11328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5428025265957447, - "normalized_score": 49.20028073286053 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "allknowingroger/QwenStock3-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 4.2992838879816615 - } - }, - { - "id": "allknowingroger/Qwenslerp2-14B_bfloat16_38e902c114b5640509a8615fc2a2546e07a5fb3f_False", - "model": { - "name": "allknowingroger/Qwenslerp2-14B", - "sha": "38e902c114b5640509a8615fc2a2546e07a5fb3f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.08598326949303, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5007136619724553, - "normalized_score": 50.07136619724553 - }, - "bbh": { - "name": "BBH", - "value": 0.6554876216007552, - "normalized_score": 50.30369191199753 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44561933534743203, - "normalized_score": 44.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.4729375, - "normalized_score": 18.883854166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5403091755319149, - "normalized_score": 48.92324172576833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-19", - "submission_date": "2024-10-21", - "generation": 1, - "base_model": "allknowingroger/Qwenslerp2-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 4.528886847953466 - } - }, - { - "id": "allknowingroger/Qwenslerp2-7B_bfloat16_46fe65fc2567b2430fa421478d47134ffe55c8f8_False", - "model": { - "name": "allknowingroger/Qwenslerp2-7B", - "sha": "46fe65fc2567b2430fa421478d47134ffe55c8f8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.810469270516734, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5294396645345462, - "normalized_score": 52.943966453454614 - }, - "bbh": { - "name": "BBH", - "value": 0.5609127334788001, - "normalized_score": 37.437245340819274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3421450151057402, - "normalized_score": 34.21450151057402 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4356041666666666, - "normalized_score": 12.817187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4515458776595745, - "normalized_score": 39.06065307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwenslerp2-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2923099159013567 - } - }, - { - "id": "allknowingroger/Qwenslerp3-14B_bfloat16_ac60a6c4e224e5b52c42bebfd0cf81f920befdef_False", - "model": { - "name": "allknowingroger/Qwenslerp3-14B", - "sha": "ac60a6c4e224e5b52c42bebfd0cf81f920befdef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.080923366964626, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5052349986923584, - "normalized_score": 50.52349986923585 - }, - "bbh": { - "name": "BBH", - "value": 0.6520835120117142, - "normalized_score": 49.80982854016478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44637462235649544, - "normalized_score": 44.637462235649544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.46760416666666665, - "normalized_score": 18.017187499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5394780585106383, - "normalized_score": 48.83089539007093 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-19", - "submission_date": "2024-10-21", - "generation": 1, - "base_model": "allknowingroger/Qwenslerp3-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 4.439262164468745 - } - }, - { - "id": "allknowingroger/Qwenslerp3-7B_bfloat16_0351c5f6207cafd15e10e6d8dfe61b50d1b2378b_False", - "model": { - "name": "allknowingroger/Qwenslerp3-7B", - "sha": "0351c5f6207cafd15e10e6d8dfe61b50d1b2378b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.63274991183272, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.501837347127843, - "normalized_score": 50.1837347127843 - }, - "bbh": { - "name": "BBH", - "value": 0.5580160200086862, - "normalized_score": 37.15398413723134 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3217522658610272, - "normalized_score": 32.17522658610272 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.45151041666666664, - "normalized_score": 14.972135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45420545212765956, - "normalized_score": 39.35616134751773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "allknowingroger/Qwenslerp3-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2188906930043304 - } - }, - { - "id": "allknowingroger/ROGERphi-7B-slerp_bfloat16_a92f90ae5e4286daa2399df4951a3347aaf414e1_False", - "model": { - "name": "allknowingroger/ROGERphi-7B-slerp", - "sha": "a92f90ae5e4286daa2399df4951a3347aaf414e1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.70747082131683, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3861332375873793, - "normalized_score": 38.61332375873792 - }, - "bbh": { - "name": "BBH", - "value": 0.5195583428468424, - "normalized_score": 32.81903240379116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.46853125, - "normalized_score": 17.53307291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3052692819148936, - "normalized_score": 22.807697990543733 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-20", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/ROGERphi-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.232698875974477 - } - }, - { - "id": "allknowingroger/RogerMerge-7B-slerp_bfloat16_397f5c0b52a536c130982ca2a7c3056358bbdf92_False", - "model": { - "name": "allknowingroger/RogerMerge-7B-slerp", - "sha": "397f5c0b52a536c130982ca2a7c3056358bbdf92", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.61773581081775, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39330199426410817, - "normalized_score": 39.330199426410815 - }, - "bbh": { - "name": "BBH", - "value": 0.5160176493085935, - "normalized_score": 31.98716596760703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.43197916666666664, - "normalized_score": 12.930729166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30302526595744683, - "normalized_score": 22.558362884160758 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-11", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/RogerMerge-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2357162254144245 - } - }, - { - "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b_bfloat16_977192ef80c5c904697f1d85d2eeab5db3947c65_False", - "model": { - "name": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", - "sha": "977192ef80c5c904697f1d85d2eeab5db3947c65", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.559640996200532, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1879213819332704, - "normalized_score": 18.79213819332704 - }, - "bbh": { - "name": "BBH", - "value": 0.2969164076001621, - "normalized_score": 2.607639713842668 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.36333333333333334, - "normalized_score": 2.4166666666666683 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 42.516, - "co2_cost": 16.89576539452686 - } - }, - { - "id": "allknowingroger/Strangecoven-7B-slerp_bfloat16_8bc9d8f972d15fdd3e02c602ef4f549493bf2208_False", - "model": { - "name": "allknowingroger/Strangecoven-7B-slerp", - "sha": "8bc9d8f972d15fdd3e02c602ef4f549493bf2208", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.311977055139792, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37464261492839, - "normalized_score": 37.464261492839 - }, - "bbh": { - "name": "BBH", - "value": 0.5368022290282338, - "normalized_score": 34.832235357083775 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4198854166666666, - "normalized_score": 10.419010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33643617021276595, - "normalized_score": 26.27068557919622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-16", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/Strangecoven-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2649379377675427 - } - }, - { - "id": "allknowingroger/Weirdslerp2-25B_bfloat16_4221341fe45e3ee6eaab27830b27d46bbbd5ea23_False", - "model": { - "name": "allknowingroger/Weirdslerp2-25B", - "sha": "4221341fe45e3ee6eaab27830b27d46bbbd5ea23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.039649477594208, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1754068094877148, - "normalized_score": 17.54068094877148 - }, - "bbh": { - "name": "BBH", - "value": 0.2873695911207614, - "normalized_score": 1.5659917737677607 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3523541666666667, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 25.204, - "co2_cost": 3.4154245881450267 - } - }, - { - "id": "allknowingroger/WestlakeMaziyar-7B-slerp_bfloat16_751534a844b0d439fe62f98bf8882fe9ab9872e0_False", - "model": { - "name": "allknowingroger/WestlakeMaziyar-7B-slerp", - "sha": "751534a844b0d439fe62f98bf8882fe9ab9872e0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.18341722963105, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48377748817581795, - "normalized_score": 48.3777488175818 - }, - "bbh": { - "name": "BBH", - "value": 0.5245479952765804, - "normalized_score": 33.34281144377223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.44738541666666665, - "normalized_score": 14.48984375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3077626329787234, - "normalized_score": 23.084736997635936 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-16", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/WestlakeMaziyar-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2633411170478208 - } - }, - { - "id": "allknowingroger/YamMaths-7B-slerp_bfloat16_bd4ac9d63ca88c80d34fa60ef5cbb56d60a39077_False", - "model": { - "name": "allknowingroger/YamMaths-7B-slerp", - "sha": "bd4ac9d63ca88c80d34fa60ef5cbb56d60a39077", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.55230690685158, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4148093724650594, - "normalized_score": 41.48093724650594 - }, - "bbh": { - "name": "BBH", - "value": 0.5155845857281723, - "normalized_score": 32.1333222251701 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.43836458333333334, - "normalized_score": 13.462239583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3130817819148936, - "normalized_score": 23.675753546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-02", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/YamMaths-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.225247122036009 - } - }, - { - "id": "allknowingroger/Yi-1.5-34B_bfloat16_fef96e380cb3aeecac8e2e53ad2c73a1187beb68_False", - "model": { - "name": "allknowingroger/Yi-1.5-34B", - "sha": "fef96e380cb3aeecac8e2e53ad2c73a1187beb68", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.25273291237777, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16391618682872555, - "normalized_score": 16.391618682872554 - }, - "bbh": { - "name": "BBH", - "value": 0.28272506287695653, - "normalized_score": 1.3390433749257278 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.38565625, - "normalized_score": 5.6070312499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10954122340425532, - "normalized_score": 1.0601359338061456 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 34.389, - "co2_cost": 10.504515486676762 - } - }, - { - "id": "allknowingroger/Yi-blossom-40B_bfloat16_d1bf1cf9339808193c5a56ef23fecdfd1012acfb_False", - "model": { - "name": "allknowingroger/Yi-blossom-40B", - "sha": "d1bf1cf9339808193c5a56ef23fecdfd1012acfb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.827458303350088, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20088587170928693, - "normalized_score": 20.08858717092869 - }, - "bbh": { - "name": "BBH", - "value": 0.32150442258143547, - "normalized_score": 5.539183494900659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3842604166666666, - "normalized_score": 5.199218749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10804521276595745, - "normalized_score": 0.8939125295508273 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 18.769, - "co2_cost": 1.9769146319373172 - } - }, - { - "id": "allknowingroger/Yibuddy-35B_bfloat16_592e1e52b97ec88a80ba3b496c19f2498ada4ea3_False", - "model": { - "name": "allknowingroger/Yibuddy-35B", - "sha": "592e1e52b97ec88a80ba3b496c19f2498ada4ea3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.283170568255755, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4234774841864032, - "normalized_score": 42.34774841864032 - }, - "bbh": { - "name": "BBH", - "value": 0.5916185369526096, - "normalized_score": 42.80824233844326 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15709969788519637, - "normalized_score": 15.709969788519636 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.45045833333333335, - "normalized_score": 15.973958333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44888630319148937, - "normalized_score": 38.765144799054376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "allknowingroger/Yibuddy-35B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 34.389, - "co2_cost": 10.760124957451268 - } - }, - { - "id": "allknowingroger/Yillama-40B_bfloat16_65db687755e716481a218cac99d20619d78e41f7_False", - "model": { - "name": "allknowingroger/Yillama-40B", - "sha": "65db687755e716481a218cac99d20619d78e41f7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.31148742313468, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16968643200042555, - "normalized_score": 16.968643200042553 - }, - "bbh": { - "name": "BBH", - "value": 0.40628855371888356, - "normalized_score": 15.875797412234048 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3500625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1981382978723404, - "normalized_score": 10.904255319148934 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 34.389, - "co2_cost": 7.048132234928361 - } - }, - { - "id": "allknowingroger/Yislerp-34B_bfloat16_131ad918edd652271510ee8dba63d3e7319df133_False", - "model": { - "name": "allknowingroger/Yislerp-34B", - "sha": "131ad918edd652271510ee8dba63d3e7319df133", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.39892579588029, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3691970637907419, - "normalized_score": 36.91970637907419 - }, - "bbh": { - "name": "BBH", - "value": 0.6158722731484186, - "normalized_score": 45.9816957285586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21601208459214502, - "normalized_score": 21.6012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.456625, - "normalized_score": 15.778125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4751496010638298, - "normalized_score": 41.68328900709219 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "allknowingroger/Yislerp-34B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 34.389, - "co2_cost": 5.977555745763449 - } - }, - { - "id": "allknowingroger/Yislerp2-34B_bfloat16_3147cf866736b786347928b655c887e8b9c07bfc_False", - "model": { - "name": "allknowingroger/Yislerp2-34B", - "sha": "3147cf866736b786347928b655c887e8b9c07bfc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.433865107288046, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39994658616914236, - "normalized_score": 39.994658616914236 - }, - "bbh": { - "name": "BBH", - "value": 0.6245771970170245, - "normalized_score": 47.20230580445542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.229607250755287, - "normalized_score": 22.9607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.45296875, - "normalized_score": 15.854427083333327 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.472406914893617, - "normalized_score": 41.37854609929077 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "allknowingroger/Yislerp2-34B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 34.389, - "co2_cost": 9.10231048295783 - } - }, - { - "id": "allknowingroger/Yunconglong-13B-slerp_bfloat16_dead687b7342d875bd8ac73bfcd34b88a2e5564c_False", - "model": { - "name": "allknowingroger/Yunconglong-13B-slerp", - "sha": "dead687b7342d875bd8ac73bfcd34b88a2e5564c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.600104180648433, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42417673993891764, - "normalized_score": 42.41767399389176 - }, - "bbh": { - "name": "BBH", - "value": 0.5165807158493828, - "normalized_score": 32.14072892807635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.4160729166666666, - "normalized_score": 10.842447916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30360704787234044, - "normalized_score": 22.623005319148938 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.879, - "co2_cost": 1.5992629681499602 - } - }, - { - "id": "allknowingroger/limyClown-7B-slerp_bfloat16_732a1ed0c2c7007297ad9d9797793073825f65ca_False", - "model": { - "name": "allknowingroger/limyClown-7B-slerp", - "sha": "732a1ed0c2c7007297ad9d9797793073825f65ca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.70388869479609, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4017451473202215, - "normalized_score": 40.174514732022146 - }, - "bbh": { - "name": "BBH", - "value": 0.5147517317055973, - "normalized_score": 31.9314661071385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.4293125, - "normalized_score": 12.464062500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30377327127659576, - "normalized_score": 22.641474586288414 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-23", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "allknowingroger/limyClown-7B-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.220100793295194 - } - }, - { - "id": "allknowingroger/llama3-Jallabi-40B-s_float16_a86d8cc3530fb466245b2cac55f25c28d0bd8c22_False", - "model": { - "name": "allknowingroger/llama3-Jallabi-40B-s", - "sha": "a86d8cc3530fb466245b2cac55f25c28d0bd8c22", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.029701636906855, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19206815693471102, - "normalized_score": 19.2068156934711 - }, - "bbh": { - "name": "BBH", - "value": 0.32522424198526295, - "normalized_score": 5.957911562958214 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23741610738255034, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37495833333333334, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10879321808510638, - "normalized_score": 0.9770242316784857 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 18.769, - "co2_cost": 1.9591494795974131 - } - }, - { - "id": "allknowingroger/llama3AnFeng-40B_float16_5995441962287970ffc98ad9b292e14420bf49ca_False", - "model": { - "name": "allknowingroger/llama3AnFeng-40B", - "sha": "5995441962287970ffc98ad9b292e14420bf49ca", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.237994378100716, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17420776872032873, - "normalized_score": 17.42077687203287 - }, - "bbh": { - "name": "BBH", - "value": 0.3794080447660335, - "normalized_score": 12.476996185725282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.39399999999999996, - "normalized_score": 7.149999999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1979720744680851, - "normalized_score": 10.885786052009454 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 39.971, - "co2_cost": 8.126936202511224 - } - }, - { - "id": "allura-org/L3.1-8b-RP-Ink_bfloat16_5d487fff4e2d4ae18193b843484b3bd21d09b07c_True", - "model": { - "name": "allura-org/L3.1-8b-RP-Ink", - "sha": "5d487fff4e2d4ae18193b843484b3bd21d09b07c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.096017377265284, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7811063533646281, - "normalized_score": 78.1106353364628 - }, - "bbh": { - "name": "BBH", - "value": 0.48284724308518095, - "normalized_score": 26.318228573463205 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3608229166666667, - "normalized_score": 2.4695312500000024 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3427526595744681, - "normalized_score": 26.972517730496453 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "allura-org/L3.1-8b-RP-Ink (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 0.668821031483859 - } - }, - { - "id": "allura-org/MN-12b-RP-Ink_bfloat16_812fe1585cdf347284bd82b24e09a5308b899f71_True", - "model": { - "name": "allura-org/MN-12b-RP-Ink", - "sha": "812fe1585cdf347284bd82b24e09a5308b899f71", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.976661361688898, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7186332265056716, - "normalized_score": 71.86332265056717 - }, - "bbh": { - "name": "BBH", - "value": 0.4833826588550261, - "normalized_score": 26.610597861672858 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.38184375000000004, - "normalized_score": 6.897135416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3513962765957447, - "normalized_score": 27.932919621749413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "allura-org/MN-12b-RP-Ink (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 12.248, - "co2_cost": 0.8294158149047763 - } - }, - { - "id": "allura-org/MS-Meadowlark-22B_bfloat16_6eb2f6bee66dbffa1b17397e75a7380ed4f9d0ac_True", - "model": { - "name": "allura-org/MS-Meadowlark-22B", - "sha": "6eb2f6bee66dbffa1b17397e75a7380ed4f9d0ac", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.097964406963452, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.669698621878837, - "normalized_score": 66.9698621878837 - }, - "bbh": { - "name": "BBH", - "value": 0.5162576933217772, - "normalized_score": 30.29658044666958 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.3842604166666667, - "normalized_score": 5.532552083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38231382978723405, - "normalized_score": 31.36820330969267 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-18", - "submission_date": "2024-10-24", - "generation": 1, - "base_model": "allura-org/MS-Meadowlark-22B (Merge)", - "hub_license": "other", - "hub_hearts": 13, - "params_billions": 22.247, - "co2_cost": 4.3438523756957625 - } - }, - { - "id": "allura-org/Mistral-Small-24b-Sertraline-0304_bfloat16_26c68b8c4de900ffc392567961d4f516b384a077_True", - "model": { - "name": "allura-org/Mistral-Small-24b-Sertraline-0304", - "sha": "26c68b8c4de900ffc392567961d4f516b384a077", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 35.369805475643055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6799902037704402, - "normalized_score": 67.99902037704402 - }, - "bbh": { - "name": "BBH", - "value": 0.6524908933699552, - "normalized_score": 49.28145843642874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.4395104166666666, - "normalized_score": 13.505468750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5105551861702128, - "normalized_score": 45.61724290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "allura-org/Mistral-Small-24b-Sertraline-0304 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 23.572, - "co2_cost": 1.4705426325981035 - } - }, - { - "id": "allura-org/Mistral-Small-Sisyphus-24b-2503_bfloat16_fa6d90550cad060664c47aac56b511e6584262a0_True", - "model": { - "name": "allura-org/Mistral-Small-Sisyphus-24b-2503", - "sha": "fa6d90550cad060664c47aac56b511e6584262a0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 32.50290014221418, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6848362345243952, - "normalized_score": 68.48362345243953 - }, - "bbh": { - "name": "BBH", - "value": 0.6269790835863639, - "normalized_score": 46.42097750457095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25, - "normalized_score": 25.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.39768749999999997, - "normalized_score": 7.577604166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5127160904255319, - "normalized_score": 45.85734338061466 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "allura-org/Mistral-Small-Sisyphus-24b-2503 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 23.572, - "co2_cost": 2.743696289599396 - } - }, - { - "id": "allura-org/MoE-Girl-1BA-7BT_bfloat16_ecfac73ab9e7f2ee006d6a2ad9c8e86a85deab2b_True", - "model": { - "name": "allura-org/MoE-Girl-1BA-7BT", - "sha": "ecfac73ab9e7f2ee006d6a2ad9c8e86a85deab2b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "OlmoeForCausalLM", - "average_score": 6.402799107780404, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27050337548814923, - "normalized_score": 27.050337548814923 - }, - "bbh": { - "name": "BBH", - "value": 0.3139175363262408, - "normalized_score": 4.8423440285204995 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.34355208333333337, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12175864361702128, - "normalized_score": 2.4176270685579193 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-10", - "generation": 1, - "base_model": "allenai/OLMoE-1B-7B-0924", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 6.919, - "co2_cost": 6.402309294204105 - } - }, - { - "id": "allura-org/TQ2.5-14B-Aletheia-v1_bfloat16_c7fbe91dbdb85161464f87c261b588dbf674e514_True", - "model": { - "name": "allura-org/TQ2.5-14B-Aletheia-v1", - "sha": "c7fbe91dbdb85161464f87c261b588dbf674e514", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.48247192029189, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7530297388706411, - "normalized_score": 75.3029738870641 - }, - "bbh": { - "name": "BBH", - "value": 0.6585074769185942, - "normalized_score": 50.881441812823226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33987915407854985, - "normalized_score": 33.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.44515625, - "normalized_score": 14.611197916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5241023936170213, - "normalized_score": 47.122488179669034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "allura-org/TQ2.5-14B-Aletheia-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 14.77, - "co2_cost": 2.825806465586459 - } - }, - { - "id": "allura-org/TQ2.5-14B-Neon-v1_bfloat16_f83a719a5c02c1b9ec5225585978d1f1595f8da7_True", - "model": { - "name": "allura-org/TQ2.5-14B-Neon-v1", - "sha": "f83a719a5c02c1b9ec5225585978d1f1595f8da7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.14031038134542, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6754189993661264, - "normalized_score": 67.54189993661264 - }, - "bbh": { - "name": "BBH", - "value": 0.655304131044165, - "normalized_score": 50.51009292921976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.461, - "normalized_score": 17.29166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5252659574468085, - "normalized_score": 47.251773049645394 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2025-02-17", - "generation": 2, - "base_model": "arcee-ai/SuperNova-Medius (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 14.77, - "co2_cost": 1.4839621477769043 - } - }, - { - "id": "allura-org/Teleut-7b_bfloat16_259e5b8b84d8ddee69db34cdd237ce5ac5c8c4cf_True", - "model": { - "name": "allura-org/Teleut-7b", - "sha": "259e5b8b84d8ddee69db34cdd237ce5ac5c8c4cf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.22949441247033, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6378752820294595, - "normalized_score": 63.78752820294596 - }, - "bbh": { - "name": "BBH", - "value": 0.5141277814496585, - "normalized_score": 30.859919035029737 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24093655589123866, - "normalized_score": 24.093655589123866 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.4640416666666667, - "normalized_score": 17.671874999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4130651595744681, - "normalized_score": 34.785017730496456 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-24", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 7.616, - "co2_cost": 2.10019685565601 - } - }, - { - "id": "aloobun/Meta-Llama-3-7B-28Layers_bfloat16_9822e6b8d4de0c0f2964d299f6fcef72385a0341_False", - "model": { - "name": "aloobun/Meta-Llama-3-7B-28Layers", - "sha": "9822e6b8d4de0c0f2964d299f6fcef72385a0341", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.375690297994408, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19636453498938372, - "normalized_score": 19.636453498938373 - }, - "bbh": { - "name": "BBH", - "value": 0.4437497014253391, - "normalized_score": 22.096530251343513 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.35892708333333334, - "normalized_score": 5.7992187500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3159906914893617, - "normalized_score": 23.99896572104019 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-10", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "aloobun/Meta-Llama-3-7B-28Layers (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 7.158, - "co2_cost": 1.618716589671752 - } - }, - { - "id": "aloobun/d-SmolLM2-360M_bfloat16_2a1d82b4cbcdfdff3c2cfcd171435c5f01b8de43_False", - "model": { - "name": "aloobun/d-SmolLM2-360M", - "sha": "2a1d82b4cbcdfdff3c2cfcd171435c5f01b8de43", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.184070904256285, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20970358648386284, - "normalized_score": 20.97035864838628 - }, - "bbh": { - "name": "BBH", - "value": 0.3195784405636826, - "normalized_score": 4.762820747165694 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 7.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11693816489361702, - "normalized_score": 1.8820183215130022 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-26", - "generation": 0, - "base_model": "aloobun/d-SmolLM2-360M", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.362, - "co2_cost": 0.7402465552245907 - } - }, - { - "id": "alpindale/WizardLM-2-8x22B_bfloat16_087834da175523cffd66a7e19583725e798c1b4f_False", - "model": { - "name": "alpindale/WizardLM-2-8x22B", - "sha": "087834da175523cffd66a7e19583725e798c1b4f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 33.059051837739325, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5272166739805937, - "normalized_score": 52.72166739805937 - }, - "bbh": { - "name": "BBH", - "value": 0.6377307938917097, - "normalized_score": 48.57616817936264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25, - "normalized_score": 25.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.4387083333333333, - "normalized_score": 14.538541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45960771276595747, - "normalized_score": 39.95641252955083 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-16", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "alpindale/WizardLM-2-8x22B", - "hub_license": "apache-2.0", - "hub_hearts": 399, - "params_billions": 140.621, - "co2_cost": 186.61044332743728 - } - }, - { - "id": "alpindale/magnum-72b-v1_bfloat16_fef27e0f235ae8858b84b765db773a2a954110dd_True", - "model": { - "name": "alpindale/magnum-72b-v1", - "sha": "fef27e0f235ae8858b84b765db773a2a954110dd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.9290547455837, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7606484128778308, - "normalized_score": 76.06484128778308 - }, - "bbh": { - "name": "BBH", - "value": 0.6982215794373214, - "normalized_score": 57.653184855142705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39803625377643503, - "normalized_score": 39.803625377643506 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4489375, - "normalized_score": 15.617187499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5467918882978723, - "normalized_score": 49.64354314420804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-17", - "submission_date": "2024-07-25", - "generation": 2, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 166, - "params_billions": 72.706, - "co2_cost": 25.030245410056658 - } - }, - { - "id": "altomek/YiSM-34B-0rn_float16_7a481c67cbdd5c846d6aaab5ef9f1eebfad812c2_True", - "model": { - "name": "altomek/YiSM-34B-0rn", - "sha": "7a481c67cbdd5c846d6aaab5ef9f1eebfad812c2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.51201240834246, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.428373382624769, - "normalized_score": 42.83733826247689 - }, - "bbh": { - "name": "BBH", - "value": 0.6140009573868866, - "normalized_score": 45.38292724900714 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2280966767371601, - "normalized_score": 22.80966767371601 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.445, - "normalized_score": 14.758333333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4695811170212766, - "normalized_score": 41.06456855791962 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "altomek/YiSM-34B-0rn (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 34.389, - "co2_cost": 5.9212482883245645 - } - }, - { - "id": "amazon/MegaBeam-Mistral-7B-300k_bfloat16_42572e5c9a0747b19af5c5c9962d122622f32295_True", - "model": { - "name": "amazon/MegaBeam-Mistral-7B-300k", - "sha": "42572e5c9a0747b19af5c5c9962d122622f32295", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.022470504123003, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.520347123410329, - "normalized_score": 52.0347123410329 - }, - "bbh": { - "name": "BBH", - "value": 0.4227731731112974, - "normalized_score": 19.291805959591997 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.39799999999999996, - "normalized_score": 8.350000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2549035904255319, - "normalized_score": 17.211510047281322 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-13", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "amazon/MegaBeam-Mistral-7B-300k", - "hub_license": "apache-2.0", - "hub_hearts": 16, - "params_billions": 7.242, - "co2_cost": 1.299219876736505 - } - }, - { - "id": "amd/AMD-Llama-135m_float16_8f9c39b5ed86d422ab332ed1ecf042fdaeb57903_False", - "model": { - "name": "amd/AMD-Llama-135m", - "sha": "8f9c39b5ed86d422ab332ed1ecf042fdaeb57903", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.759627159992882, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18422452426229072, - "normalized_score": 18.42245242622907 - }, - "bbh": { - "name": "BBH", - "value": 0.2973931917569524, - "normalized_score": 2.4854950529752244 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.37796874999999996, - "normalized_score": 4.912760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11685505319148937, - "normalized_score": 1.8727836879432622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-19", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "amd/AMD-Llama-135m", - "hub_license": "apache-2.0", - "hub_hearts": 111, - "params_billions": 0.135, - "co2_cost": 0.12871917576638336 - } - }, - { - "id": "amd/AMD-Llama-135m_bfloat16_8f9c39b5ed86d422ab332ed1ecf042fdaeb57903_False", - "model": { - "name": "amd/AMD-Llama-135m", - "sha": "8f9c39b5ed86d422ab332ed1ecf042fdaeb57903", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.228976558960189, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19184319826948054, - "normalized_score": 19.18431982694805 - }, - "bbh": { - "name": "BBH", - "value": 0.29694449748780255, - "normalized_score": 2.537952680477511 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.38457291666666665, - "normalized_score": 5.904947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11685505319148937, - "normalized_score": 1.8727836879432622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-19", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "amd/AMD-Llama-135m", - "hub_license": "apache-2.0", - "hub_hearts": 111, - "params_billions": 0.134, - "co2_cost": 0.7086780315506144 - } - }, - { - "id": "anakin87/gemma-2b-orpo_bfloat16_bf6bfe30c31c18620767ad60d0bff89343804230_True", - "model": { - "name": "anakin87/gemma-2b-orpo", - "sha": "bf6bfe30c31c18620767ad60d0bff89343804230", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 7.284706228625474, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24779695651981187, - "normalized_score": 24.779695651981186 - }, - "bbh": { - "name": "BBH", - "value": 0.34261709435617754, - "normalized_score": 7.94944502776896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.37276041666666665, - "normalized_score": 4.128385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1305684840425532, - "normalized_score": 3.3964982269503543 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-24", - "submission_date": "2024-07-06", - "generation": 1, - "base_model": "google/gemma-2b", - "hub_license": "other", - "hub_hearts": 28, - "params_billions": 2.506, - "co2_cost": 1.5798531310408666 - } - }, - { - "id": "anthracite-org/magnum-v1-72b_bfloat16_f8f85021bace7e8250ed8559c5b78b8b34f0c4cc_True", - "model": { - "name": "anthracite-org/magnum-v1-72b", - "sha": "f8f85021bace7e8250ed8559c5b78b8b34f0c4cc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.96291506867274, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7606484128778308, - "normalized_score": 76.06484128778308 - }, - "bbh": { - "name": "BBH", - "value": 0.6982215794373214, - "normalized_score": 57.653184855142705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39803625377643503, - "normalized_score": 39.803625377643506 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4489375, - "normalized_score": 15.617187499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5486203457446809, - "normalized_score": 49.84670508274232 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-17", - "submission_date": "2024-09-21", - "generation": 2, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 166, - "params_billions": 72.706, - "co2_cost": 25.78211241796205 - } - }, - { - "id": "anthracite-org/magnum-v2-12b_bfloat16__True", - "model": { - "name": "anthracite-org/magnum-v2-12b", - "sha": "", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.795821563358565, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.376166349729828, - "normalized_score": 37.6166349729828 - }, - "bbh": { - "name": "BBH", - "value": 0.5020864013200114, - "normalized_score": 28.785551595365874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.41790625, - "normalized_score": 11.371614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31673869680851063, - "normalized_score": 24.082077423167846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-09-05", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 86, - "params_billions": 12.248, - "co2_cost": 3.2976707757776453 - } - }, - { - "id": "anthracite-org/magnum-v2-72b_bfloat16_c9c5826ef42b9fcc8a8e1079be574481cf0b6cc6_True", - "model": { - "name": "anthracite-org/magnum-v2-72b", - "sha": "c9c5826ef42b9fcc8a8e1079be574481cf0b6cc6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.78287226692161, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7560273407891063, - "normalized_score": 75.60273407891063 - }, - "bbh": { - "name": "BBH", - "value": 0.7005076514129516, - "normalized_score": 57.85470432085098 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3542296072507553, - "normalized_score": 35.422960725075534 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.4371875, - "normalized_score": 14.181770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5456283244680851, - "normalized_score": 49.51425827423168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-18", - "submission_date": "2024-09-05", - "generation": 2, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 38, - "params_billions": 72.706, - "co2_cost": 24.268433710248942 - } - }, - { - "id": "anthracite-org/magnum-v2.5-12b-kto_float16_aee0374e5a43e950c9977b0004dede1c57be2999_True", - "model": { - "name": "anthracite-org/magnum-v2.5-12b-kto", - "sha": "aee0374e5a43e950c9977b0004dede1c57be2999", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.982789989560633, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3865576669902525, - "normalized_score": 38.65576669902525 - }, - "bbh": { - "name": "BBH", - "value": 0.5076961186254344, - "normalized_score": 29.625059445981027 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.40863541666666664, - "normalized_score": 9.979427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3214760638297872, - "normalized_score": 24.608451536643024 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-08-29", - "generation": 2, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 53, - "params_billions": 12.248, - "co2_cost": 3.2181261017762615 - } - }, - { - "id": "anthracite-org/magnum-v3-27b-kto_bfloat16_96fbb750b3150e5fe9d6d2fcf757f49310d99a43_True", - "model": { - "name": "anthracite-org/magnum-v3-27b-kto", - "sha": "96fbb750b3150e5fe9d6d2fcf757f49310d99a43", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 29.33708001780754, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5674831668860845, - "normalized_score": 56.74831668860845 - }, - "bbh": { - "name": "BBH", - "value": 0.586040577894583, - "normalized_score": 41.1601029248443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18126888217522658, - "normalized_score": 18.12688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.38546874999999997, - "normalized_score": 9.916927083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42378656914893614, - "normalized_score": 35.976285460992905 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-06", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "anthracite-org/magnum-v3-27b-kto (Merge)", - "hub_license": "gemma", - "hub_hearts": 15, - "params_billions": 27.227, - "co2_cost": 7.87506792297306 - } - }, - { - "id": "anthracite-org/magnum-v3-34b_bfloat16_3bcd8c3dbb93021a5ce22203c690a1a084cafb73_True", - "model": { - "name": "anthracite-org/magnum-v3-34b", - "sha": "3bcd8c3dbb93021a5ce22203c690a1a084cafb73", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.66608133452966, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5115294086357531, - "normalized_score": 51.15294086357531 - }, - "bbh": { - "name": "BBH", - "value": 0.6087828692085228, - "normalized_score": 44.32790341462959 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19486404833836857, - "normalized_score": 19.486404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36073825503355705, - "normalized_score": 14.76510067114094 - }, - "musr": { - "name": "MUSR", - "value": 0.3872395833333333, - "normalized_score": 6.571614583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47523271276595747, - "normalized_score": 41.692523640661946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-09-18", - "generation": 0, - "base_model": "anthracite-org/magnum-v3-34b", - "hub_license": "apache-2.0", - "hub_hearts": 29, - "params_billions": 34.389, - "co2_cost": 9.225748511595935 - } - }, - { - "id": "anthracite-org/magnum-v3-9b-chatml_bfloat16_96c2d023c56ef73be095ffbae8cedd7243ebca84_False", - "model": { - "name": "anthracite-org/magnum-v3-9b-chatml", - "sha": "96c2d023c56ef73be095ffbae8cedd7243ebca84", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.50411636926871, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12747066671985885, - "normalized_score": 12.747066671985886 - }, - "bbh": { - "name": "BBH", - "value": 0.5427688488887096, - "normalized_score": 35.31787541238543 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.4432291666666666, - "normalized_score": 13.236979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4242021276595745, - "normalized_score": 36.022458628841605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "IntervitensInc/gemma-2-9b-chatml", - "hub_license": "gemma", - "hub_hearts": 24, - "params_billions": 9.242, - "co2_cost": 5.779930609974818 - } - }, - { - "id": "anthracite-org/magnum-v3-9b-customgemma2_bfloat16_9a7cd3d47434bed2bd80e34e45c74e413f8baaa8_False", - "model": { - "name": "anthracite-org/magnum-v3-9b-customgemma2", - "sha": "9a7cd3d47434bed2bd80e34e45c74e413f8baaa8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.20026712602973, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1272955757390391, - "normalized_score": 12.729557573903909 - }, - "bbh": { - "name": "BBH", - "value": 0.5340136936916174, - "normalized_score": 34.11678334094384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.45646875, - "normalized_score": 15.058593749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4204621010638298, - "normalized_score": 35.606900118203306 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 19, - "params_billions": 9.242, - "co2_cost": 5.803650034123516 - } - }, - { - "id": "anthracite-org/magnum-v4-12b_bfloat16_704f2ccfe662052e415499e56789dd88ec01a113_False", - "model": { - "name": "anthracite-org/magnum-v4-12b", - "sha": "704f2ccfe662052e415499e56789dd88ec01a113", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.27642686776778, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33929640021808805, - "normalized_score": 33.9296400218088 - }, - "bbh": { - "name": "BBH", - "value": 0.5176693046591915, - "normalized_score": 30.503902266484772 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40928125, - "normalized_score": 10.360156249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3603723404255319, - "normalized_score": 28.930260047281326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "anthracite-org/magnum-v4-12b", - "hub_license": "apache-2.0", - "hub_hearts": 39, - "params_billions": 12.248, - "co2_cost": 3.398031457956096 - } - }, - { - "id": "anthracite-org/magnum-v4-22b_bfloat16_e5239e71d2628269b453a832de98c1ecb79d2557_False", - "model": { - "name": "anthracite-org/magnum-v4-22b", - "sha": "e5239e71d2628269b453a832de98c1ecb79d2557", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.854369713005507, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5628620947973599, - "normalized_score": 56.28620947973599 - }, - "bbh": { - "name": "BBH", - "value": 0.548612004937422, - "normalized_score": 35.549148532773465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.44078124999999996, - "normalized_score": 13.430989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3829787234042553, - "normalized_score": 31.44208037825059 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "anthracite-org/magnum-v4-22b", - "hub_license": "other", - "hub_hearts": 26, - "params_billions": 22.247, - "co2_cost": 3.300580071468586 - } - }, - { - "id": "anthracite-org/magnum-v4-27b_bfloat16_50a14716bdeb6a9376b9377df31ab1497864f3f9_False", - "model": { - "name": "anthracite-org/magnum-v4-27b", - "sha": "50a14716bdeb6a9376b9377df31ab1497864f3f9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 26.63300380472101, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34541682735142754, - "normalized_score": 34.54168273514276 - }, - "bbh": { - "name": "BBH", - "value": 0.5867298109891389, - "normalized_score": 40.96038433350091 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1797583081570997, - "normalized_score": 17.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.4379895833333333, - "normalized_score": 12.815364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43758311170212766, - "normalized_score": 37.50923463356973 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "anthracite-org/magnum-v4-27b", - "hub_license": "gemma", - "hub_hearts": 18, - "params_billions": 27.227, - "co2_cost": 11.472707787132373 - } - }, - { - "id": "anthracite-org/magnum-v4-9b_bfloat16_e9db6cb80f02ca2e2db4538ef59f7a30f69a849d_False", - "model": { - "name": "anthracite-org/magnum-v4-9b", - "sha": "e9db6cb80f02ca2e2db4538ef59f7a30f69a849d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.798994622099844, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3502628581053826, - "normalized_score": 35.02628581053826 - }, - "bbh": { - "name": "BBH", - "value": 0.5336423991931557, - "normalized_score": 33.27040443647636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.45157291666666666, - "normalized_score": 15.646614583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3952792553191489, - "normalized_score": 32.8088061465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-23", - "generation": 0, - "base_model": "anthracite-org/magnum-v4-9b", - "hub_license": "gemma", - "hub_hearts": 17, - "params_billions": 9.242, - "co2_cost": 5.112652302122218 - } - }, - { - "id": "apple/DCLM-7B_bfloat16_c85bfa168f999ce27e954808bc005a2748fda5c5_False", - "model": { - "name": "apple/DCLM-7B", - "sha": "c85bfa168f999ce27e954808bc005a2748fda5c5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OpenLMModel", - "average_score": 14.112858289728544, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21727239280664196, - "normalized_score": 21.727239280664193 - }, - "bbh": { - "name": "BBH", - "value": 0.42321423668184166, - "normalized_score": 19.760934974772244 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.3920729166666667, - "normalized_score": 7.309114583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3110871010638298, - "normalized_score": 23.45412234042553 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-11", - "submission_date": "2024-08-16", - "generation": 0, - "base_model": "apple/DCLM-7B", - "hub_license": "apple-ascl", - "hub_hearts": 834, - "params_billions": 7.0, - "co2_cost": 1.2599111030184245 - } - }, - { - "id": "appvoid/arco-2_float16_9bec3c42c5bb557eb218513f4fe26c4edc803f0f_False", - "model": { - "name": "appvoid/arco-2", - "sha": "9bec3c42c5bb557eb218513f4fe26c4edc803f0f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.137100838868086, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19913717824261848, - "normalized_score": 19.913717824261848 - }, - "bbh": { - "name": "BBH", - "value": 0.31456676274830814, - "normalized_score": 4.059150069582826 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35359375, - "normalized_score": 4.199218749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1116190159574468, - "normalized_score": 1.2910017730496441 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-12-23", - "generation": 0, - "base_model": "appvoid/arco-2", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 0.514, - "co2_cost": 0.3162940489756324 - } - }, - { - "id": "appvoid/arco-2-instruct_float16_eb116cfdf0b239d67a874d2bd37b2b748d7d6654_False", - "model": { - "name": "appvoid/arco-2-instruct", - "sha": "eb116cfdf0b239d67a874d2bd37b2b748d7d6654", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.382510586018164, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2164479137577184, - "normalized_score": 21.644791375771838 - }, - "bbh": { - "name": "BBH", - "value": 0.31330470624451107, - "normalized_score": 3.913002236158603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23825503355704697, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34959375, - "normalized_score": 4.199218749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11128656914893617, - "normalized_score": 1.2540632387706852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "appvoid/arco-2-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.514, - "co2_cost": 0.3375334700786766 - } - }, - { - "id": "arcee-ai/Arcee-Blitz_float16_cf20caa2bfbfabc71a79c6e73f1a7b1e59c86a9b_False", - "model": { - "name": "arcee-ai/Arcee-Blitz", - "sha": "cf20caa2bfbfabc71a79c6e73f1a7b1e59c86a9b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 40.01232729285768, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5543435861292482, - "normalized_score": 55.43435861292482 - }, - "bbh": { - "name": "BBH", - "value": 0.6606628431550884, - "normalized_score": 50.72663255795833 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34818731117824775, - "normalized_score": 34.818731117824775 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.50471875, - "normalized_score": 23.82317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.6153590425531915, - "normalized_score": 57.2621158392435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "arcee-ai/Arcee-Blitz (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 68, - "params_billions": 23.572, - "co2_cost": 1.4446652668107918 - } - }, - { - "id": "arcee-ai/Arcee-Maestro-7B-Preview_float16_007a65e79e9631b6842a5db89a9bc13936fd3aca_False", - "model": { - "name": "arcee-ai/Arcee-Maestro-7B-Preview", - "sha": "007a65e79e9631b6842a5db89a9bc13936fd3aca", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.793130973366363, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2750247122080524, - "normalized_score": 27.50247122080524 - }, - "bbh": { - "name": "BBH", - "value": 0.4648373015709704, - "normalized_score": 25.375555786781916 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49924471299093653, - "normalized_score": 49.92447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.3885416666666666, - "normalized_score": 6.334375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3039394946808511, - "normalized_score": 22.659943853427897 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-10", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "arcee-ai/Arcee-Maestro-7B-Preview (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 36, - "params_billions": 7.613, - "co2_cost": 0.6903426140888346 - } - }, - { - "id": "arcee-ai/Arcee-Nova_bfloat16_ec3bfe88b83f81481daa04b6789c1e0d32827dc5_True", - "model": { - "name": "arcee-ai/Arcee-Nova", - "sha": "ec3bfe88b83f81481daa04b6789c1e0d32827dc5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.05339262826514, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7907485471881275, - "normalized_score": 79.07485471881274 - }, - "bbh": { - "name": "BBH", - "value": 0.694196965855899, - "normalized_score": 56.74098753952074 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4380664652567976, - "normalized_score": 43.80664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.45616666666666666, - "normalized_score": 17.220833333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5452127659574468, - "normalized_score": 49.46808510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "arcee-ai/Arcee-Nova", - "hub_license": "other", - "hub_hearts": 50, - "params_billions": 72.706, - "co2_cost": 22.986587129501515 - } - }, - { - "id": "arcee-ai/Arcee-Spark_bfloat16_3fe368ea5fd32bc4a8d1bcf42510416f7fa28668_True", - "model": { - "name": "arcee-ai/Arcee-Spark", - "sha": "3fe368ea5fd32bc4a8d1bcf42510416f7fa28668", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.406546265844867, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5620874834328471, - "normalized_score": 56.208748343284704 - }, - "bbh": { - "name": "BBH", - "value": 0.5489474198567446, - "normalized_score": 37.13852245584468 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29531722054380666, - "normalized_score": 29.531722054380666 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.40209374999999997, - "normalized_score": 8.595052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3822307180851064, - "normalized_score": 31.358968676122927 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "arcee-ai/Arcee-Spark", - "hub_license": "apache-2.0", - "hub_hearts": 87, - "params_billions": 7.616, - "co2_cost": 2.197065655544504 - } - }, - { - "id": "arcee-ai/Arcee-Spark_float16_3fe368ea5fd32bc4a8d1bcf42510416f7fa28668_True", - "model": { - "name": "arcee-ai/Arcee-Spark", - "sha": "3fe368ea5fd32bc4a8d1bcf42510416f7fa28668", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.443168747377587, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.571829412625168, - "normalized_score": 57.18294126251679 - }, - "bbh": { - "name": "BBH", - "value": 0.5480864114714127, - "normalized_score": 36.92439043586489 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858004, - "normalized_score": 11.404833836858003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4007604166666667, - "normalized_score": 8.395052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38131648936170215, - "normalized_score": 31.257387706855795 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "arcee-ai/Arcee-Spark", - "hub_license": "apache-2.0", - "hub_hearts": 87, - "params_billions": 7.616, - "co2_cost": 1.1360401936497302 - } - }, - { - "id": "arcee-ai/Llama-3.1-SuperNova-Lite_bfloat16_76246ca4448c1a11787daee0958b60ab27f17774_True", - "model": { - "name": "arcee-ai/Llama-3.1-SuperNova-Lite", - "sha": "76246ca4448c1a11787daee0958b60ab27f17774", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.193463980461605, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8017393848322452, - "normalized_score": 80.1739384832245 - }, - "bbh": { - "name": "BBH", - "value": 0.5151992115104819, - "normalized_score": 31.572340212980667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.41632291666666665, - "normalized_score": 11.673697916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3877160904255319, - "normalized_score": 31.96845449172577 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-17", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3", - "hub_hearts": 189, - "params_billions": 8.03, - "co2_cost": 1.7119867700237617 - } - }, - { - "id": "arcee-ai/Llama-Spark_bfloat16_6d74a617fbb17a1ada08528f2673c89f84fb062e_True", - "model": { - "name": "arcee-ai/Llama-Spark", - "sha": "6d74a617fbb17a1ada08528f2673c89f84fb062e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.037236901530367, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7910732412221794, - "normalized_score": 79.10732412221793 - }, - "bbh": { - "name": "BBH", - "value": 0.5053504145749979, - "normalized_score": 29.770253700208638 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.35933333333333334, - "normalized_score": 2.6166666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3720910904255319, - "normalized_score": 30.232343380614658 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-26", - "submission_date": "2024-08-08", - "generation": 0, - "base_model": "arcee-ai/Llama-Spark", - "hub_license": "llama3", - "hub_hearts": 27, - "params_billions": 8.03, - "co2_cost": 1.6614282034914647 - } - }, - { - "id": "arcee-ai/SuperNova-Medius_bfloat16_e34fafcac2801be1ae5c7eb744e191a08119f2af_True", - "model": { - "name": "arcee-ai/SuperNova-Medius", - "sha": "e34fafcac2801be1ae5c7eb744e191a08119f2af", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.15430388277812, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7183584001560305, - "normalized_score": 71.83584001560305 - }, - "bbh": { - "name": "BBH", - "value": 0.6377284463115707, - "normalized_score": 48.00501462716327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4690332326283988, - "normalized_score": 46.903323262839876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.4232708333333333, - "normalized_score": 12.275520833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5034906914893617, - "normalized_score": 44.83229905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "arcee-ai/SuperNova-Medius (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 205, - "params_billions": 14.77, - "co2_cost": 7.687530654788435 - } - }, - { - "id": "arcee-ai/Virtuoso-Lite_bfloat16_efc17a8dc63fa6f035c4dfe7be7d138aec837d03_True", - "model": { - "name": "arcee-ai/Virtuoso-Lite", - "sha": "efc17a8dc63fa6f035c4dfe7be7d138aec837d03", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.41610644523877, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8099575792231279, - "normalized_score": 80.99575792231279 - }, - "bbh": { - "name": "BBH", - "value": 0.6098520975127147, - "normalized_score": 43.89855735433509 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25302114803625375, - "normalized_score": 25.302114803625376 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.4595416666666667, - "normalized_score": 17.54270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4440658244680851, - "normalized_score": 38.22953605200946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "arcee-ai/Virtuoso-Lite (Merge)", - "hub_license": "other", - "hub_hearts": 34, - "params_billions": 10.306, - "co2_cost": 2.586021987364553 - } - }, - { - "id": "arcee-ai/Virtuoso-Small_bfloat16_ca5dec1c6351ba6f2f0c59e609b94628a29c1459_True", - "model": { - "name": "arcee-ai/Virtuoso-Small", - "sha": "ca5dec1c6351ba6f2f0c59e609b94628a29c1459", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.53607757653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7935211904413622, - "normalized_score": 79.35211904413622 - }, - "bbh": { - "name": "BBH", - "value": 0.6517633129454784, - "normalized_score": 50.399846311899886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4093655589123867, - "normalized_score": 40.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.43390625, - "normalized_score": 14.438281249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5191156914893617, - "normalized_score": 46.56841016548463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "arcee-ai/Virtuoso-Small (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 68, - "params_billions": 14.77, - "co2_cost": 3.0286282432128417 - } - }, - { - "id": "arcee-ai/Virtuoso-Small-v2_bfloat16_b1e0c424683cae4032aed31f43aa0cbda5255efb_True", - "model": { - "name": "arcee-ai/Virtuoso-Small-v2", - "sha": "b1e0c424683cae4032aed31f43aa0cbda5255efb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.475701925930025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8273181824226385, - "normalized_score": 82.73181824226387 - }, - "bbh": { - "name": "BBH", - "value": 0.6554097094586643, - "normalized_score": 50.94799062781783 - }, - "math": { - "name": "MATH Level 5", - "value": 0.466012084592145, - "normalized_score": 46.6012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.43133333333333335, - "normalized_score": 14.283333333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.518783244680851, - "normalized_score": 46.53147163120567 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "arcee-ai/Virtuoso-Small-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 34, - "params_billions": 14.766, - "co2_cost": 3.114543153921314 - } - }, - { - "id": "arcee-ai/raspberry-3B_bfloat16_66bf1346c060bbfe1f1b98cd22e7a26ada69cf70_True", - "model": { - "name": "arcee-ai/raspberry-3B", - "sha": "66bf1346c060bbfe1f1b98cd22e7a26ada69cf70", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.852706041886336, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31541642840995227, - "normalized_score": 31.541642840995227 - }, - "bbh": { - "name": "BBH", - "value": 0.42689280188827033, - "normalized_score": 19.528234400992485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.41232291666666665, - "normalized_score": 9.407031250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.285405585106383, - "normalized_score": 20.600620567375884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-05", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 39, - "params_billions": 3.086, - "co2_cost": 2.0730530354701697 - } - }, - { - "id": "argilla/notus-7b-v1_bfloat16_30172203a2d41cb487bf7e2b92a821080783b2c9_True", - "model": { - "name": "argilla/notus-7b-v1", - "sha": "30172203a2d41cb487bf7e2b92a821080783b2c9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.474261772943255, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.508207112683236, - "normalized_score": 50.820711268323606 - }, - "bbh": { - "name": "BBH", - "value": 0.4511857407381495, - "normalized_score": 22.74711196116141 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.33641666666666664, - "normalized_score": 6.58541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3003656914893617, - "normalized_score": 22.26285460992908 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-16", - "submission_date": "2024-06-27", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "mit", - "hub_hearts": 122, - "params_billions": 7.242, - "co2_cost": 1.335816262311418 - } - }, - { - "id": "argilla/notux-8x7b-v1_bfloat16_0b29f9afcbae2ab4c5085638d8f5a7f6d44c6b17_True", - "model": { - "name": "argilla/notux-8x7b-v1", - "sha": "0b29f9afcbae2ab4c5085638d8f5a7f6d44c6b17", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.47858356291764, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5422290633297429, - "normalized_score": 54.22290633297429 - }, - "bbh": { - "name": "BBH", - "value": 0.5363304164516353, - "normalized_score": 34.75806168290175 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.41759375, - "normalized_score": 10.532552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3660239361702128, - "normalized_score": 29.558215130023648 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-12", - "submission_date": "2024-06-12", - "generation": 2, - "base_model": "mistralai/Mixtral-8x7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 165, - "params_billions": 46.703, - "co2_cost": 30.70977222678072 - } - }, - { - "id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra_bfloat16_1e12f20ca5db84f65a6db793a65100433aac0ac6_True", - "model": { - "name": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", - "sha": "1e12f20ca5db84f65a6db793a65100433aac0ac6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.84899133862277, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5756514935925566, - "normalized_score": 57.565149359255656 - }, - "bbh": { - "name": "BBH", - "value": 0.46196134634468616, - "normalized_score": 23.516310364827646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.35425, - "normalized_score": 4.2479166666666694 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31441156914893614, - "normalized_score": 23.823507683215126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "meta-llama/Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.9313541039833282 - } - }, - { - "id": "arisin/orca-platypus-13B-slerp_float16_679c8aa21e7d0ba79584a4b5eb352ecf26bd7096_False", - "model": { - "name": "arisin/orca-platypus-13B-slerp", - "sha": "679c8aa21e7d0ba79584a4b5eb352ecf26bd7096", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.79190764445101, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26718107953563214, - "normalized_score": 26.718107953563212 - }, - "bbh": { - "name": "BBH", - "value": 0.46306234976954946, - "normalized_score": 24.40376595176109 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4253125, - "normalized_score": 11.864062500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2592253989361702, - "normalized_score": 17.691710992907797 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-23", - "submission_date": "2024-11-23", - "generation": 0, - "base_model": "arisin/orca-platypus-13B-slerp", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.016, - "co2_cost": 1.878126628161279 - } - }, - { - "id": "arshiaafshani/Arsh-V1_float16_ef9dd6e8aa5a4635ee66cf21fb58df6b3a27db7e_False", - "model": { - "name": "arshiaafshani/Arsh-V1", - "sha": "ef9dd6e8aa5a4635ee66cf21fb58df6b3a27db7e", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.54445092721804, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6043276284702368, - "normalized_score": 60.43276284702368 - }, - "bbh": { - "name": "BBH", - "value": 0.6739657491720434, - "normalized_score": 53.51427176415471 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2620845921450151, - "normalized_score": 26.208459214501513 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.48989583333333336, - "normalized_score": 21.370312499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5256815159574468, - "normalized_score": 47.29794621749409 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.96, - "co2_cost": 1.8986704685080669 - } - }, - { - "id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k_float16_c9b04b40cd3915f0576659aafba86b85c22a7ee8_True", - "model": { - "name": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", - "sha": "c9b04b40cd3915f0576659aafba86b85c22a7ee8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.221361486666726, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34639090945358314, - "normalized_score": 34.63909094535831 - }, - "bbh": { - "name": "BBH", - "value": 0.4668707690948544, - "normalized_score": 25.725678338583254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.36965625, - "normalized_score": 7.07369791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.281000664893617, - "normalized_score": 20.111184988179666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 1, - "base_model": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4822389187294722 - } - }, - { - "id": "ashercn97/a1-v0.0.1_bfloat16_83760a8e55b312880f57247c0d9a4a25a0f2e528_False", - "model": { - "name": "ashercn97/a1-v0.0.1", - "sha": "83760a8e55b312880f57247c0d9a4a25a0f2e528", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.57499066409029, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21984445715146922, - "normalized_score": 21.98444571514692 - }, - "bbh": { - "name": "BBH", - "value": 0.5188122863232913, - "normalized_score": 32.75543224003656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4119791666666666, - "normalized_score": 9.930729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41647273936170215, - "normalized_score": 35.163637706855795 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-28", - "generation": 0, - "base_model": "ashercn97/a1-v0.0.1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.164092573836315 - } - }, - { - "id": "ashercn97/a1-v002_bfloat16_f6a33c4c83b57b3a76bc1e79e714bdf05f249ed6_False", - "model": { - "name": "ashercn97/a1-v002", - "sha": "f6a33c4c83b57b3a76bc1e79e714bdf05f249ed6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.8816161973256, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2584631001298776, - "normalized_score": 25.84631001298776 - }, - "bbh": { - "name": "BBH", - "value": 0.5261137844506322, - "normalized_score": 33.526527072476405 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23413897280966767, - "normalized_score": 23.413897280966765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.41591666666666666, - "normalized_score": 10.05625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41747007978723405, - "normalized_score": 35.274453309692674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-11-29", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.077839372853315 - } - }, - { - "id": "assskelad/smollm2-360M-sft_SmallThoughts_bfloat16_7b0eb51cc4c1d1d2dc234c6553a3c2859b6207e5_True", - "model": { - "name": "assskelad/smollm2-360M-sft_SmallThoughts", - "sha": "7b0eb51cc4c1d1d2dc234c6553a3c2859b6207e5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.0424666628422985, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20071078072846715, - "normalized_score": 20.071078072846714 - }, - "bbh": { - "name": "BBH", - "value": 0.3149572469619188, - "normalized_score": 4.164356769946658 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3395208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11818484042553191, - "normalized_score": 2.020537825059101 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "HuggingFaceTB/SmolLM2-360M", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 0.362, - "co2_cost": 0.41662315501900704 - } - }, - { - "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit_bfloat16_42eaee4de10302fec7c0c20ad96f527cfb0b10a3_False", - "model": { - "name": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", - "sha": "42eaee4de10302fec7c0c20ad96f527cfb0b10a3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.968535229566395, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4521037513796726, - "normalized_score": 45.21037513796726 - }, - "bbh": { - "name": "BBH", - "value": 0.4939066588253951, - "normalized_score": 28.015909017593412 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3863958333333333, - "normalized_score": 8.299479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3564660904255319, - "normalized_score": 28.496232269503547 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-30", - "submission_date": "2024-08-01", - "generation": 1, - "base_model": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.7730546115919343 - } - }, - { - "id": "automerger/YamshadowExperiment28-7B_bfloat16_76972ed8aacba1fd14f78e6f8d347f087f8b6800_False", - "model": { - "name": "automerger/YamshadowExperiment28-7B", - "sha": "76972ed8aacba1fd14f78e6f8d347f087f8b6800", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.884270029585778, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4070156074770498, - "normalized_score": 40.701560747704974 - }, - "bbh": { - "name": "BBH", - "value": 0.5150030227855061, - "normalized_score": 31.980235156677427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4306145833333333, - "normalized_score": 12.693489583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30601728723404253, - "normalized_score": 22.89080969267139 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-18", - "submission_date": "2024-06-29", - "generation": 1, - "base_model": "automerger/YamshadowExperiment28-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 24, - "params_billions": 7.242, - "co2_cost": 1.168848141905 - } - }, - { - "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI_bfloat16_0049b137a19fcc964d2e4864b851f24a753e401c_True", - "model": { - "name": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", - "sha": "0049b137a19fcc964d2e4864b851f24a753e401c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 0.7378506308720487, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0, - "normalized_score": 0.0 - }, - "bbh": { - "name": "BBH", - "value": 0.26065954545866094, - "normalized_score": 0.44106407209348375 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3446666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10605053191489362, - "normalized_score": 0.6722813238770686 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.962404566531638 - } - }, - { - "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2_bfloat16_9575327242f8539eac59b6d788beccf54a6f9414_False", - "model": { - "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", - "sha": "9575327242f8539eac59b6d788beccf54a6f9414", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.370486963136566, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21792178087474567, - "normalized_score": 21.792178087474564 - }, - "bbh": { - "name": "BBH", - "value": 0.4422884892437673, - "normalized_score": 22.40015255970937 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.40060416666666665, - "normalized_score": 8.80885416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2999501329787234, - "normalized_score": 22.21668144208038 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-07-30", - "generation": 0, - "base_model": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 3.2138826548654063 - } - }, - { - "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4_bfloat16_b288ab9d8adfd2963a44a7935bb47649f55bcbee_False", - "model": { - "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", - "sha": "b288ab9d8adfd2963a44a7935bb47649f55bcbee", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.747197547964134, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2133007087860211, - "normalized_score": 21.330070878602108 - }, - "bbh": { - "name": "BBH", - "value": 0.35070947402846286, - "normalized_score": 9.22769372478478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.34603125, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2310505319148936, - "normalized_score": 14.561170212765957 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.0, - "co2_cost": 1.9320716524596728 - } - }, - { - "id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8_bfloat16_fa368f705ace05da2fef25c030fe740cf1fef176_False", - "model": { - "name": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", - "sha": "fa368f705ace05da2fef25c030fe740cf1fef176", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.813468900343905, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23942915907569692, - "normalized_score": 23.94291590756969 - }, - "bbh": { - "name": "BBH", - "value": 0.4299940969601492, - "normalized_score": 20.435230589690022 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.38175000000000003, - "normalized_score": 5.785416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30011635638297873, - "normalized_score": 22.23515070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.9055370006724817 - } - }, - { - "id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16_bfloat16_5ea13b3d0723237889e1512bc70dae72f71884d1_False", - "model": { - "name": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", - "sha": "5ea13b3d0723237889e1512bc70dae72f71884d1", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.28452908669469, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21182684166899385, - "normalized_score": 21.18268416689938 - }, - "bbh": { - "name": "BBH", - "value": 0.4124151161773006, - "normalized_score": 17.543031293477835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3686041666666667, - "normalized_score": 6.142187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29579454787234044, - "normalized_score": 21.75494976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2994691170481019 - } - }, - { - "id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16_bfloat16_0e1f2cb0a81c38fc6c567d9c007883ab62fae266_False", - "model": { - "name": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", - "sha": "0e1f2cb0a81c38fc6c567d9c007883ab62fae266", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.246704735704872, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22936253584932426, - "normalized_score": 22.936253584932423 - }, - "bbh": { - "name": "BBH", - "value": 0.43158208189876196, - "normalized_score": 21.040436509874464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.39520833333333333, - "normalized_score": 7.934375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30302526595744683, - "normalized_score": 22.558362884160758 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-29", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.9556216383299223 - } - }, - { - "id": "aws-prototyping/MegaBeam-Mistral-7B-512k_bfloat16_3e3b8c4b933650eed81ede7c4395df943d2a0796_True", - "model": { - "name": "aws-prototyping/MegaBeam-Mistral-7B-512k", - "sha": "3e3b8c4b933650eed81ede7c4395df943d2a0796", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.582481722488744, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5972586071623293, - "normalized_score": 59.72586071623293 - }, - "bbh": { - "name": "BBH", - "value": 0.3662336639946533, - "normalized_score": 12.361177501580405 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3993645833333333, - "normalized_score": 8.520572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25889295212765956, - "normalized_score": 17.65477245862884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-30", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "aws-prototyping/MegaBeam-Mistral-7B-512k", - "hub_license": "apache-2.0", - "hub_hearts": 50, - "params_billions": 7.242, - "co2_cost": 1.2943758144129203 - } - }, - { - "id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo_bfloat16_15fd3ffa46c1ea51aa5d26a1da24214e324d7cf2_True", - "model": { - "name": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", - "sha": "15fd3ffa46c1ea51aa5d26a1da24214e324d7cf2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.176086393973254, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.607924750772395, - "normalized_score": 60.79247507723951 - }, - "bbh": { - "name": "BBH", - "value": 0.5395057669562011, - "normalized_score": 34.64240096637378 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.42330208333333336, - "normalized_score": 12.979427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3469082446808511, - "normalized_score": 27.43424940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-09-21", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 17, - "params_billions": 12.248, - "co2_cost": 4.080012998109583 - } - }, - { - "id": "baconnier/Napoleon_24B_V0.0_float16_e05877c4b79fdcb68488b99613a9cf6abfd6f7be_False", - "model": { - "name": "baconnier/Napoleon_24B_V0.0", - "sha": "e05877c4b79fdcb68488b99613a9cf6abfd6f7be", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.30083551864148, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1801021290176731, - "normalized_score": 18.01021290176731 - }, - "bbh": { - "name": "BBH", - "value": 0.6367110843973786, - "normalized_score": 47.26497234848218 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.4419895833333333, - "normalized_score": 13.682031250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5039893617021277, - "normalized_score": 44.88770685579197 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "baconnier/Napoleon_24B_V0.0", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.2766561543186024 - } - }, - { - "id": "baconnier/Napoleon_24B_V0.2_float16_0a60bbf6fc74ceabf69416b3c5268801a61a34ff_False", - "model": { - "name": "baconnier/Napoleon_24B_V0.2", - "sha": "0a60bbf6fc74ceabf69416b3c5268801a61a34ff", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.007932141039515, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2527172347150006, - "normalized_score": 25.271723471500067 - }, - "bbh": { - "name": "BBH", - "value": 0.5910621269874454, - "normalized_score": 41.20548669811284 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14350453172205438, - "normalized_score": 14.350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4459583333333333, - "normalized_score": 14.178125000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4356715425531915, - "normalized_score": 37.29683806146573 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-17", - "generation": 1, - "base_model": "baconnier/Napoleon_24B_V0.2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.2888476479711901 - } - }, - { - "id": "baebee/7B-Cetacea_bfloat16_fc2f943e55f7edb0d5e165387d69954456f2c4fb_True", - "model": { - "name": "baebee/7B-Cetacea", - "sha": "fc2f943e55f7edb0d5e165387d69954456f2c4fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.942170906921103, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5278660620486975, - "normalized_score": 52.78660620486975 - }, - "bbh": { - "name": "BBH", - "value": 0.4757171853895546, - "normalized_score": 25.75266015070167 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.41362499999999996, - "normalized_score": 9.903125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2954621010638298, - "normalized_score": 21.71801122931442 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "baebee/7B-Cetacea (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.4541996112195244 - } - }, - { - "id": "baebee/mergekit-model_stock-nzjnheg_bfloat16_c6fd83921bfb927e6d49fb66d0ca524fddccba01_True", - "model": { - "name": "baebee/mergekit-model_stock-nzjnheg", - "sha": "c6fd83921bfb927e6d49fb66d0ca524fddccba01", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.997984972467677, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48442687624392167, - "normalized_score": 48.442687624392164 - }, - "bbh": { - "name": "BBH", - "value": 0.5287391310729729, - "normalized_score": 32.742095397153456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16767371601208458, - "normalized_score": 16.76737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.38466666666666666, - "normalized_score": 6.016666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3699301861702128, - "normalized_score": 29.99224290780142 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "baebee/mergekit-model_stock-nzjnheg (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6334827799574804 - } - }, - { - "id": "baebee/mergekit-ties-fnjenli_bfloat16_d4310b29dab10c750e141f9e440d55e0e3b3814a_False", - "model": { - "name": "baebee/mergekit-ties-fnjenli", - "sha": "d4310b29dab10c750e141f9e440d55e0e3b3814a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.4674729843638685, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19881248420856662, - "normalized_score": 19.88124842085666 - }, - "bbh": { - "name": "BBH", - "value": 0.30236959112076134, - "normalized_score": 2.9252946060117697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.002265861027190332, - "normalized_score": 0.2265861027190332 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4019375, - "normalized_score": 8.3421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11286569148936171, - "normalized_score": 1.4295212765957446 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "baebee/mergekit-ties-fnjenli (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6871999168055457 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B_bfloat16_da223cb99a6bc23c2ef124fee2ed2581cd5a880a_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B", - "sha": "da223cb99a6bc23c2ef124fee2ed2581cd5a880a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.61927817846981, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3851835352420466, - "normalized_score": 38.51835352420467 - }, - "bbh": { - "name": "BBH", - "value": 0.5404981575206657, - "normalized_score": 34.07162735574622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4144895833333333, - "normalized_score": 11.277864583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3671875, - "normalized_score": 29.6875 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 2.2301884718146217 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v_bfloat16_a7d01046742cbccace2ee5343b434313fa47e5e1_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", - "sha": "a7d01046742cbccace2ee5343b434313fa47e5e1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.611076276517252, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36362628935668473, - "normalized_score": 36.36262893566847 - }, - "bbh": { - "name": "BBH", - "value": 0.5436022524587655, - "normalized_score": 34.35759228302058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.41315624999999995, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3673537234042553, - "normalized_score": 29.70596926713948 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 2.262393562761689 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v_bfloat16_f93c9997bc22abb5c97a895083c20b84e957d172_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", - "sha": "f93c9997bc22abb5c97a895083c20b84e957d172", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.454256608568887, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3623773809048879, - "normalized_score": 36.23773809048879 - }, - "bbh": { - "name": "BBH", - "value": 0.5434355857920987, - "normalized_score": 34.41027746820577 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.41582291666666665, - "normalized_score": 11.811197916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36627327127659576, - "normalized_score": 29.58591903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.265824284170919 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v_bfloat16_e4e3ffddc0ff26b9ee915f28dacd0bc298333ed2_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", - "sha": "e4e3ffddc0ff26b9ee915f28dacd0bc298333ed2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.795688006734604, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38698209639312575, - "normalized_score": 38.69820963931258 - }, - "bbh": { - "name": "BBH", - "value": 0.5431389316665282, - "normalized_score": 34.38744334527327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.41312499999999996, - "normalized_score": 11.440624999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3663563829787234, - "normalized_score": 29.595153664302604 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-15", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.2595164768217573 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v_bfloat16_fd2f202fd2114aed91fe7009588ccdc3eefc40a2_True", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", - "sha": "fd2f202fd2114aed91fe7009588ccdc3eefc40a2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.653906427315196, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6508142838778884, - "normalized_score": 65.08142838778883 - }, - "bbh": { - "name": "BBH", - "value": 0.5094241395384186, - "normalized_score": 30.596484949495864 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1351963746223565, - "normalized_score": 13.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.41762499999999997, - "normalized_score": 11.969791666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36826795212765956, - "normalized_score": 29.807550236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 2.1081866673732383 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v_bfloat16_e9284d6aa1868026ef1687160858b94f16e164c1_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", - "sha": "e9284d6aa1868026ef1687160858b94f16e164c1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.635396518839368, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3745672593163916, - "normalized_score": 37.45672593163916 - }, - "bbh": { - "name": "BBH", - "value": 0.5421932988679541, - "normalized_score": 34.1770041493929 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.41315624999999995, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36610704787234044, - "normalized_score": 29.56744976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.3089564542815997 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v_bfloat16_b089a77c506215a02abb495c912526b7dfd0bc8e_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", - "sha": "b089a77c506215a02abb495c912526b7dfd0bc8e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.874344518139583, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43656608908806416, - "normalized_score": 43.65660890880641 - }, - "bbh": { - "name": "BBH", - "value": 0.5448909065942131, - "normalized_score": 34.72779363865639 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.4184895833333333, - "normalized_score": 12.344531249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3661901595744681, - "normalized_score": 29.57668439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.1307047035446205 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V_bfloat16_651323f625a3b0f982bd0ca294b7293327de19ad_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", - "sha": "651323f625a3b0f982bd0ca294b7293327de19ad", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.23673015528421, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40309379114083965, - "normalized_score": 40.30937911408397 - }, - "bbh": { - "name": "BBH", - "value": 0.54645347832278, - "normalized_score": 34.85070126040164 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.41982291666666666, - "normalized_score": 12.344531249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3664394946808511, - "normalized_score": 29.604388297872337 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.188830563510498 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V_bfloat16_3af16a4f7be269c0483ba7ff3c7ea70c5843a44d_False", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", - "sha": "3af16a4f7be269c0483ba7ff3c7ea70c5843a44d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.80784193287211, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43162032296528763, - "normalized_score": 43.162032296528764 - }, - "bbh": { - "name": "BBH", - "value": 0.5448926891254073, - "normalized_score": 34.868634894350585 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4197916666666666, - "normalized_score": 12.773958333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3648603723404255, - "normalized_score": 29.428930260047277 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-17", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 2.159004792865925 - } - }, - { - "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo_bfloat16_a3f902a40e0e6a1b7abdefe70e8cd14929deddc9_True", - "model": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", - "sha": "a3f902a40e0e6a1b7abdefe70e8cd14929deddc9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.077670184051115, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6249606599378538, - "normalized_score": 62.49606599378538 - }, - "bbh": { - "name": "BBH", - "value": 0.5077574728717519, - "normalized_score": 30.36068974252419 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.41502083333333334, - "normalized_score": 11.64427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36851728723404253, - "normalized_score": 29.83525413711584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "bamec66557/MISCHIEVOUS-12B-Mix_Neo (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.0109002685634336 - } - }, - { - "id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407_bfloat16_5e11cfae129186b2479fb01211d565a16eed1f02_True", - "model": { - "name": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", - "sha": "5e11cfae129186b2479fb01211d565a16eed1f02", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.48211728638603, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6705729686121713, - "normalized_score": 67.05729686121713 - }, - "bbh": { - "name": "BBH", - "value": 0.5155964285724085, - "normalized_score": 31.356607393533825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4309895833333333, - "normalized_score": 14.340364583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36768617021276595, - "normalized_score": 29.74290780141844 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 2.9388037027739964 - } - }, - { - "id": "bamec66557/NameLess-12B-prob_bfloat16_eb53d01a5d573356c16b6235679e84567f599e33_True", - "model": { - "name": "bamec66557/NameLess-12B-prob", - "sha": "eb53d01a5d573356c16b6235679e84567f599e33", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.18904881817572, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6602315190361574, - "normalized_score": 66.02315190361574 - }, - "bbh": { - "name": "BBH", - "value": 0.5158141019151304, - "normalized_score": 31.35572805903409 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.433625, - "normalized_score": 14.703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3684341755319149, - "normalized_score": 29.826019503546092 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-25", - "generation": 0, - "base_model": "bamec66557/NameLess-12B-prob", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 1.9229328834179493 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B_bfloat16_4a0a2cf1eca5766badb8ff2853e15f045de71a92_False", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B", - "sha": "4a0a2cf1eca5766badb8ff2853e15f045de71a92", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.780363902289253, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37156965739792636, - "normalized_score": 37.15696573979264 - }, - "bbh": { - "name": "BBH", - "value": 0.5436022524587655, - "normalized_score": 34.37251603029727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.4104895833333333, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36785239361702127, - "normalized_score": 29.76137706855792 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 3.3601327747126493 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-0.1v_bfloat16_14e82cd2858767003bed53db1c0de82f6c7dd9bf_False", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-0.1v", - "sha": "14e82cd2858767003bed53db1c0de82f6c7dd9bf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.449453445188812, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36574954454181574, - "normalized_score": 36.57495445418157 - }, - "bbh": { - "name": "BBH", - "value": 0.5412276004529172, - "normalized_score": 34.13023813457423 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.41582291666666665, - "normalized_score": 11.011197916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36826795212765956, - "normalized_score": 29.807550236406616 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 2.1489175293330742 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver_bfloat16_93bdb2c1d5644217e5f5e9bcbf669b18e3b05851_False", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-0.X.ver", - "sha": "93bdb2c1d5644217e5f5e9bcbf669b18e3b05851", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.653126146717884, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37756486123485683, - "normalized_score": 37.756486123485686 - }, - "bbh": { - "name": "BBH", - "value": 0.541624689936422, - "normalized_score": 34.08924738532064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.41982291666666666, - "normalized_score": 12.877864583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36710438829787234, - "normalized_score": 29.678265366430256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 3.37598261326679 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-ALPHA_bfloat16_0db2b2268828869387868fb01f14217a97d28d2b_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-ALPHA", - "sha": "0db2b2268828869387868fb01f14217a97d28d2b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.45551197855697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6365011502812536, - "normalized_score": 63.65011502812537 - }, - "bbh": { - "name": "BBH", - "value": 0.5093679898057982, - "normalized_score": 30.51014607050993 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4202916666666667, - "normalized_score": 12.436458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3696808510638298, - "normalized_score": 29.964539007092196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "bamec66557/VICIOUS_MESH-12B-ALPHA (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.9053541901705389 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-BETA_bfloat16_e5ee09e29f2079a0422b5b66c64a91b8d65ee13f_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-BETA", - "sha": "e5ee09e29f2079a0422b5b66c64a91b8d65ee13f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.466293297388987, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6720967034136092, - "normalized_score": 67.20967034136092 - }, - "bbh": { - "name": "BBH", - "value": 0.5155964285724085, - "normalized_score": 31.356607393533825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4309895833333333, - "normalized_score": 14.340364583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36785239361702127, - "normalized_score": 29.76137706855792 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "bamec66557/VICIOUS_MESH-12B-BETA (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.9132214406325836 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-DELTA_bfloat16_b78353e18ccee5445cca15fa2e558cb77fd54bfe_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-DELTA", - "sha": "b78353e18ccee5445cca15fa2e558cb77fd54bfe", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.939275726783702, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6468924675416783, - "normalized_score": 64.68924675416784 - }, - "bbh": { - "name": "BBH", - "value": 0.5055418480543742, - "normalized_score": 29.792447042456303 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13746223564954682, - "normalized_score": 13.746223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.40565625, - "normalized_score": 9.673697916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3651097074468085, - "normalized_score": 29.4566341607565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 2.169281582832776 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA_bfloat16_1cda02ade49cdc99cbfec09b3204b3e899ec1eb3_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", - "sha": "1cda02ade49cdc99cbfec09b3204b3e899ec1eb3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.971654466957943, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6429207835210575, - "normalized_score": 64.29207835210575 - }, - "bbh": { - "name": "BBH", - "value": 0.506116784464076, - "normalized_score": 29.833229531368332 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.40965625, - "normalized_score": 10.407031249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36585771276595747, - "normalized_score": 29.539745862884164 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 1.988230357090947 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-EPSILON_bfloat16_882d08d9cd7a45aa36c5f31c13014dcf033a6b37_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-EPSILON", - "sha": "882d08d9cd7a45aa36c5f31c13014dcf033a6b37", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.50473024902234, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6304560787599126, - "normalized_score": 63.04560787599126 - }, - "bbh": { - "name": "BBH", - "value": 0.5037995611302296, - "normalized_score": 29.596444965473506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.4069895833333333, - "normalized_score": 9.74036458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36477726063829785, - "normalized_score": 29.419695626477544 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 2.092368777812442 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-GAMMA_bfloat16_b76049bd9081d7b3151f0cab6f2d804e3804bba0_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-GAMMA", - "sha": "b76049bd9081d7b3151f0cab6f2d804e3804bba0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.916848818972074, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6361764562472019, - "normalized_score": 63.61764562472018 - }, - "bbh": { - "name": "BBH", - "value": 0.5181908355069679, - "normalized_score": 31.485974649070176 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.43632291666666667, - "normalized_score": 15.20703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3666057180851064, - "normalized_score": 29.622857565011817 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "bamec66557/VICIOUS_MESH-12B-GAMMA (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.8036391645947394 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-NEMO_bfloat16_21aa15e9d0425338d76c7f0dc7af70fb30bc3ad2_False", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-NEMO", - "sha": "21aa15e9d0425338d76c7f0dc7af70fb30bc3ad2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.316444114380346, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40221944440750546, - "normalized_score": 40.22194444075054 - }, - "bbh": { - "name": "BBH", - "value": 0.5441680901949261, - "normalized_score": 34.4014166333088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 12.56614583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37159242021276595, - "normalized_score": 30.17693557919622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "bamec66557/VICIOUS_MESH-12B-NEMO (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.9548109329175953 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-OMEGA_bfloat16_4af6dbc11346fc9efdd7d44105cb12f47cfad220_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-OMEGA", - "sha": "4af6dbc11346fc9efdd7d44105cb12f47cfad220", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.49554497317065, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6699734482284783, - "normalized_score": 66.99734482284782 - }, - "bbh": { - "name": "BBH", - "value": 0.516644373777888, - "normalized_score": 31.523711655329873 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.43232291666666667, - "normalized_score": 14.540364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36768617021276595, - "normalized_score": 29.74290780141844 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "bamec66557/VICIOUS_MESH-12B-OMEGA (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.9947289317766945 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B-UNION_bfloat16_f1331f2993652945d79ba6b4a67a2bf6876f269a_True", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B-UNION", - "sha": "f1331f2993652945d79ba6b4a67a2bf6876f269a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.67624004599591, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6428709158366468, - "normalized_score": 64.28709158366468 - }, - "bbh": { - "name": "BBH", - "value": 0.5106643448765741, - "normalized_score": 30.46389197050985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4256875, - "normalized_score": 13.444270833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3671875, - "normalized_score": 29.6875 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 2.0332475815456683 - } - }, - { - "id": "bamec66557/VICIOUS_MESH-12B_Razor_bfloat16_cdbbf14d884c8bf7c4ae4cb5e2d30425a5340cfe_False", - "model": { - "name": "bamec66557/VICIOUS_MESH-12B_Razor", - "sha": "cdbbf14d884c8bf7c4ae4cb5e2d30425a5340cfe", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.640681565889526, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37364304489864675, - "normalized_score": 37.36430448986467 - }, - "bbh": { - "name": "BBH", - "value": 0.5447127693928118, - "normalized_score": 34.56221200969913 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.40915624999999994, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36685505319148937, - "normalized_score": 29.650561465721044 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 2.141617222178077 - } - }, - { - "id": "bamec66557/mergekit-model_stock-zdaysvi_bfloat16_7ee1c3a87fe903c165be1393c7728fd8da001a86_True", - "model": { - "name": "bamec66557/mergekit-model_stock-zdaysvi", - "sha": "7ee1c3a87fe903c165be1393c7728fd8da001a86", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.24451582006411, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6425960894870055, - "normalized_score": 64.25960894870056 - }, - "bbh": { - "name": "BBH", - "value": 0.5062803896601668, - "normalized_score": 30.16636018648764 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1351963746223565, - "normalized_score": 13.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.41238541666666667, - "normalized_score": 11.14817708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36884973404255317, - "normalized_score": 29.8721926713948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 1.9319840894221854 - } - }, - { - "id": "bamec66557/mergekit-ties-sinbkow_bfloat16_871b4b0c0c77675c933aca9bc0573e85189a0a57_True", - "model": { - "name": "bamec66557/mergekit-ties-sinbkow", - "sha": "871b4b0c0c77675c933aca9bc0573e85189a0a57", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.284691171067987, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6431956098706986, - "normalized_score": 64.31956098706986 - }, - "bbh": { - "name": "BBH", - "value": 0.5092084289828543, - "normalized_score": 30.62203788717879 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.40447916666666667, - "normalized_score": 10.059895833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36028922872340424, - "normalized_score": 28.921025413711575 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.124, - "co2_cost": 1.8004765207173132 - } - }, - { - "id": "belztjti/dffghgjh_bfloat16_20a115228627e753f03f876ad3c437d00aa5caf0_True", - "model": { - "name": "belztjti/dffghgjh", - "sha": "20a115228627e753f03f876ad3c437d00aa5caf0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GlmForCausalLM", - "average_score": 16.671219751518176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5784241368457914, - "normalized_score": 57.842413684579135 - }, - "bbh": { - "name": "BBH", - "value": 0.35817085768640783, - "normalized_score": 9.713638961998793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.34745833333333337, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3421708776595745, - "normalized_score": 26.907875295508276 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.543, - "co2_cost": 2.3218651318125167 - } - }, - { - "id": "belztjti/dtfgv_bfloat16_014ea7eab9cb8fb71d661369bdaf34fe6a64b3f6_True", - "model": { - "name": "belztjti/dtfgv", - "sha": "014ea7eab9cb8fb71d661369bdaf34fe6a64b3f6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.007600725168578, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.334450369464133, - "normalized_score": 33.445036946413296 - }, - "bbh": { - "name": "BBH", - "value": 0.32815316667476035, - "normalized_score": 5.520450164271318 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3793958333333333, - "normalized_score": 5.091145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15043218085106383, - "normalized_score": 5.603575650118202 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.543, - "co2_cost": 2.8656283220337087 - } - }, - { - "id": "benhaotang/phi4-qwq-sky-t1_float16_de04971083243b7cfe0447e20badb1847bd7cef6_False", - "model": { - "name": "benhaotang/phi4-qwq-sky-t1", - "sha": "de04971083243b7cfe0447e20badb1847bd7cef6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.01825262068425, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04596249063595704, - "normalized_score": 4.596249063595704 - }, - "bbh": { - "name": "BBH", - "value": 0.6710520703782934, - "normalized_score": 52.61239992936087 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.48995833333333333, - "normalized_score": 21.378124999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5244348404255319, - "normalized_score": 47.15942671394799 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "benhaotang/phi4-qwq-sky-t1 (Merge)", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.9014616068923202 - } - }, - { - "id": "beomi/gemma-mling-7b_bfloat16_3f442e28bd50db6c438ce2a15b3a003532babba0_False", - "model": { - "name": "beomi/gemma-mling-7b", - "sha": "3f442e28bd50db6c438ce2a15b3a003532babba0", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 11.392173694644192, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20290939152559653, - "normalized_score": 20.290939152559652 - }, - "bbh": { - "name": "BBH", - "value": 0.40675941947154004, - "normalized_score": 17.631391012223656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37585416666666666, - "normalized_score": 6.848437499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2632978723404255, - "normalized_score": 18.144208037825056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-15", - "submission_date": "2024-07-17", - "generation": 0, - "base_model": "beomi/gemma-mling-7b", - "hub_license": "other", - "hub_hearts": 14, - "params_billions": 8.538, - "co2_cost": 3.287011197266699 - } - }, - { - "id": "beowolx/CodeNinja-1.0-OpenChat-7B_bfloat16_9934c04c767e6ae0f792712a060f02915391d4ec_True", - "model": { - "name": "beowolx/CodeNinja-1.0-OpenChat-7B", - "sha": "9934c04c767e6ae0f792712a060f02915391d4ec", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.460682433184903, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5446770125489258, - "normalized_score": 54.467701254892575 - }, - "bbh": { - "name": "BBH", - "value": 0.4441338669403703, - "normalized_score": 21.713423267203808 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.42432291666666666, - "normalized_score": 11.540364583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3015292553191489, - "normalized_score": 22.392139479905435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-20", - "submission_date": "2024-07-30", - "generation": 0, - "base_model": "beowolx/CodeNinja-1.0-OpenChat-7B", - "hub_license": "mit", - "hub_hearts": 104, - "params_billions": 7.242, - "co2_cost": 1.2721452017893264 - } - }, - { - "id": "berkeley-nest/Starling-LM-7B-alpha_bfloat16_1dddf3b95bc1391f6307299eb1c162c194bde9bd_True", - "model": { - "name": "berkeley-nest/Starling-LM-7B-alpha", - "sha": "1dddf3b95bc1391f6307299eb1c162c194bde9bd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.83936104726783, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5480491761858536, - "normalized_score": 54.80491761858535 - }, - "bbh": { - "name": "BBH", - "value": 0.4440065261164004, - "normalized_score": 21.95402808715926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.41201041666666666, - "normalized_score": 9.501302083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3171542553191489, - "normalized_score": 24.128250591016545 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-25", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "berkeley-nest/Starling-LM-7B-alpha", - "hub_license": "apache-2.0", - "hub_hearts": 558, - "params_billions": 7.242, - "co2_cost": 1.1032577733840974 - } - }, - { - "id": "bfuzzy1/Gunny_float16_4648b9fafbbf5871fef317cdf9b76c3b7da6d66d_True", - "model": { - "name": "bfuzzy1/Gunny", - "sha": "4648b9fafbbf5871fef317cdf9b76c3b7da6d66d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.34108004405445, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7128629813339716, - "normalized_score": 71.28629813339717 - }, - "bbh": { - "name": "BBH", - "value": 0.45459857092962414, - "normalized_score": 22.991778829133718 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.35828124999999994, - "normalized_score": 2.018489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3038563829787234, - "normalized_score": 22.650709219858157 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-12-20", - "generation": 0, - "base_model": "bfuzzy1/Gunny", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.6675725586964267 - } - }, - { - "id": "bfuzzy1/acheron_float16_10f0384c0363f63a17f41f1cf09f9a317a3ee957_False", - "model": { - "name": "bfuzzy1/acheron", - "sha": "10f0384c0363f63a17f41f1cf09f9a317a3ee957", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.974672842034184, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19831269919369493, - "normalized_score": 19.831269919369493 - }, - "bbh": { - "name": "BBH", - "value": 0.3107918622526179, - "normalized_score": 3.737588062186813 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3510520833333333, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10962433510638298, - "normalized_score": 1.0693705673758855 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-12-24", - "generation": 1, - "base_model": "bfuzzy1/acheron (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.514, - "co2_cost": 0.3253645856268118 - } - }, - { - "id": "bfuzzy1/acheron-c_float16_dd9b3e9f550ab5c48a8349ddfea534996f4a28c4_True", - "model": { - "name": "bfuzzy1/acheron-c", - "sha": "dd9b3e9f550ab5c48a8349ddfea534996f4a28c4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.2908204733904265, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19286714805604685, - "normalized_score": 19.286714805604685 - }, - "bbh": { - "name": "BBH", - "value": 0.30260703404313577, - "normalized_score": 2.7690272588902722 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33821875, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1171875, - "normalized_score": 1.9097222222222217 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "bfuzzy1/acheron-c (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.514, - "co2_cost": 0.49820634326519797 - } - }, - { - "id": "bfuzzy1/acheron-d_float16_337fc6d265062b22b368debdf42deb10af58b25e_False", - "model": { - "name": "bfuzzy1/acheron-d", - "sha": "337fc6d265062b22b368debdf42deb10af58b25e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.988234616217907, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.192542454021995, - "normalized_score": 19.254245402199498 - }, - "bbh": { - "name": "BBH", - "value": 0.3139959864926003, - "normalized_score": 4.1222474820638 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34971875, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11344747340425532, - "normalized_score": 1.4941637115839235 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "bfuzzy1/acheron-d (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.514, - "co2_cost": 0.32747280338737966 - } - }, - { - "id": "bfuzzy1/acheron-m_float16_15a592a961c17aca78b16851ef42fd55b24c2d09_True", - "model": { - "name": "bfuzzy1/acheron-m", - "sha": "15a592a961c17aca78b16851ef42fd55b24c2d09", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.225197951458999, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17583123889058808, - "normalized_score": 17.583123889058808 - }, - "bbh": { - "name": "BBH", - "value": 0.29284447696551025, - "normalized_score": 2.1820409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3486666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11128656914893617, - "normalized_score": 1.2540632387706852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 2, - "base_model": "bfuzzy1/acheron-d (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 0.514, - "co2_cost": 0.3736255718512762 - } - }, - { - "id": "bfuzzy1/acheron-m1a-llama_float16_f6b5f9afe3163767fa51dd1dd66d2fcd829ffc7d_True", - "model": { - "name": "bfuzzy1/acheron-m1a-llama", - "sha": "f6b5f9afe3163767fa51dd1dd66d2fcd829ffc7d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "LlamaForCausalLM", - "average_score": 3.348613471275291, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11245827737070972, - "normalized_score": 11.245827737070972 - }, - "bbh": { - "name": "BBH", - "value": 0.29560475093811295, - "normalized_score": 2.545408537429085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.36330208333333336, - "normalized_score": 2.579427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11461103723404255, - "normalized_score": 1.6234485815602824 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 3, - "base_model": "bfuzzy1/acheron-d (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 0.514, - "co2_cost": 1.0738704727547566 - } - }, - { - "id": "bfuzzy1/llambses-1_float16_73d190c1726f22de8bb1be333d93cfeebb550984_False", - "model": { - "name": "bfuzzy1/llambses-1", - "sha": "73d190c1726f22de8bb1be333d93cfeebb550984", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.83707295838704, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3553837152089788, - "normalized_score": 35.53837152089788 - }, - "bbh": { - "name": "BBH", - "value": 0.5046977405175623, - "normalized_score": 31.077833088739435 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.45290625, - "normalized_score": 15.379947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31399601063829785, - "normalized_score": 23.777334515366427 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "bfuzzy1/llambses-1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9365833927550146 - } - }, - { - "id": "bhuvneshsaini/merged_model_float16_35d045ae54b9bdf334b1c28becd85746cf4e9a38_False", - "model": { - "name": "bhuvneshsaini/merged_model", - "sha": "35d045ae54b9bdf334b1c28becd85746cf4e9a38", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.795748828202385, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1812767900282362, - "normalized_score": 18.12767900282362 - }, - "bbh": { - "name": "BBH", - "value": 0.3359777949071243, - "normalized_score": 7.617386883057352 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34971875, - "normalized_score": 4.081510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14453125, - "normalized_score": 4.947916666666666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-17", - "generation": 0, - "base_model": "bhuvneshsaini/merged_model", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 4.715, - "co2_cost": 0.7164317669572453 - } - }, - { - "id": "bigcode/starcoder2-15b_bfloat16_46d44742909c03ac8cee08eb03fdebce02e193ec_False", - "model": { - "name": "bigcode/starcoder2-15b", - "sha": "46d44742909c03ac8cee08eb03fdebce02e193ec", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Starcoder2ForCausalLM", - "average_score": 12.539175421645837, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2780223141265177, - "normalized_score": 27.802231412651764 - }, - "bbh": { - "name": "BBH", - "value": 0.4447957841230437, - "normalized_score": 20.373540752678547 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.35009375000000004, - "normalized_score": 2.9283854166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23528922872340424, - "normalized_score": 15.032136524822693 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-20", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "bigcode/starcoder2-15b", - "hub_license": "bigcode-openrail-m", - "hub_hearts": 595, - "params_billions": 15.958, - "co2_cost": 67.68626903353528 - } - }, - { - "id": "bigcode/starcoder2-3b_bfloat16_733247c55e3f73af49ce8e9c7949bf14af205928_False", - "model": { - "name": "bigcode/starcoder2-3b", - "sha": "733247c55e3f73af49ce8e9c7949bf14af205928", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Starcoder2ForCausalLM", - "average_score": 6.549147626379535, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20370838264693236, - "normalized_score": 20.370838264693234 - }, - "bbh": { - "name": "BBH", - "value": 0.35087141384601755, - "normalized_score": 8.909299421083569 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34345833333333337, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1636469414893617, - "normalized_score": 7.0718823877068555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-29", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "bigcode/starcoder2-3b", - "hub_license": "bigcode-openrail-m", - "hub_hearts": 169, - "params_billions": 3.03, - "co2_cost": 0.893257836653058 - } - }, - { - "id": "bigcode/starcoder2-7b_bfloat16_a3d33687b51284b528abeb17830776ffd24892a9_False", - "model": { - "name": "bigcode/starcoder2-7b", - "sha": "a3d33687b51284b528abeb17830776ffd24892a9", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Starcoder2ForCausalLM", - "average_score": 8.2934383764798, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22091938279321088, - "normalized_score": 22.09193827932109 - }, - "bbh": { - "name": "BBH", - "value": 0.36609857669123036, - "normalized_score": 11.395110106503443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3793333333333333, - "normalized_score": 5.8166666666666655 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16422872340425532, - "normalized_score": 7.1365248226950335 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-20", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "bigcode/starcoder2-7b", - "hub_license": "bigcode-openrail-m", - "hub_hearts": 172, - "params_billions": 7.174, - "co2_cost": 1.0128027570551226 - } - }, - { - "id": "bigscience/bloom-1b1_bfloat16_eb3dd7399312f5f94fd13f41d2f318117d3eb1e4_False", - "model": { - "name": "bigscience/bloom-1b1", - "sha": "eb3dd7399312f5f94fd13f41d2f318117d3eb1e4", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "BloomForCausalLM", - "average_score": 4.025155876068456, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13733781920858879, - "normalized_score": 13.733781920858878 - }, - "bbh": { - "name": "BBH", - "value": 0.31072762377370394, - "normalized_score": 4.042705269260129 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.36999999999999994, - "normalized_score": 3.416666666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1107878989361702, - "normalized_score": 1.1986554373522447 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-19", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "bigscience/bloom-1b1", - "hub_license": "bigscience-bloom-rail-1.0", - "hub_hearts": 62, - "params_billions": 1.065, - "co2_cost": 1.4340427150874084 - } - }, - { - "id": "bigscience/bloom-1b7_bfloat16_cc72a88036c2fb937d65efeacc57a0c2ef5d6fe5_False", - "model": { - "name": "bigscience/bloom-1b7", - "sha": "cc72a88036c2fb937d65efeacc57a0c2ef5d6fe5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "BloomForCausalLM", - "average_score": 4.046754480742192, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10438968603305895, - "normalized_score": 10.438968603305895 - }, - "bbh": { - "name": "BBH", - "value": 0.314054919904072, - "normalized_score": 4.39745292760164 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.38857291666666666, - "normalized_score": 6.838281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10862699468085106, - "normalized_score": 0.9585549645390061 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-19", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "bigscience/bloom-1b7", - "hub_license": "bigscience-bloom-rail-1.0", - "hub_hearts": 121, - "params_billions": 1.722, - "co2_cost": 1.6367191975791189 - } - }, - { - "id": "bigscience/bloom-3b_bfloat16_52bc5b43010b4844513826b8be3f78c7344c37d7_False", - "model": { - "name": "bigscience/bloom-3b", - "sha": "52bc5b43010b4844513826b8be3f78c7344c37d7", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "BloomForCausalLM", - "average_score": 4.387894128649155, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1270961050013963, - "normalized_score": 12.709610500139629 - }, - "bbh": { - "name": "BBH", - "value": 0.3062918592346337, - "normalized_score": 3.4200982840077354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 7.891145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11328125, - "normalized_score": 1.4756944444444438 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-19", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "bigscience/bloom-3b", - "hub_license": "bigscience-bloom-rail-1.0", - "hub_hearts": 90, - "params_billions": 3.003, - "co2_cost": 1.992112381322494 - } - }, - { - "id": "bigscience/bloom-560m_bfloat16_ac2ae5fab2ce3f9f40dc79b5ca9f637430d24971_False", - "model": { - "name": "bigscience/bloom-560m", - "sha": "ac2ae5fab2ce3f9f40dc79b5ca9f637430d24971", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "BloomForCausalLM", - "average_score": 3.50724359916236, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.06202431769926019, - "normalized_score": 6.202431769926019 - }, - "bbh": { - "name": "BBH", - "value": 0.3025950541549823, - "normalized_score": 2.885363608028119 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.4030833333333333, - "normalized_score": 8.185416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11643949468085106, - "normalized_score": 1.8266105200945615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-19", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "bigscience/bloom-560m", - "hub_license": "bigscience-bloom-rail-1.0", - "hub_hearts": 351, - "params_billions": 0.559, - "co2_cost": 1.525432280306906 - } - }, - { - "id": "bigscience/bloom-7b1_float16_6232703e399354503377bf59dfbb8397fd569e4a_False", - "model": { - "name": "bigscience/bloom-7b1", - "sha": "6232703e399354503377bf59dfbb8397fd569e4a", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "BloomForCausalLM", - "average_score": 3.795510241848182, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13221696210499254, - "normalized_score": 13.221696210499253 - }, - "bbh": { - "name": "BBH", - "value": 0.3113718529627139, - "normalized_score": 4.038808518979752 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.34869791666666666, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11045545212765957, - "normalized_score": 1.1617169030732852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-19", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "bigscience/bloom-7b1", - "hub_license": "bigscience-bloom-rail-1.0", - "hub_hearts": 203, - "params_billions": 7.069, - "co2_cost": 2.0115500371999038 - } - }, - { - "id": "bluuwhale/L3-SthenoMaid-8B-V1_bfloat16_f8e65823aa02752c9c08aa69c7a24bfa94058a9b_True", - "model": { - "name": "bluuwhale/L3-SthenoMaid-8B-V1", - "sha": "f8e65823aa02752c9c08aa69c7a24bfa94058a9b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.83976001225683, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7344700949037443, - "normalized_score": 73.44700949037443 - }, - "bbh": { - "name": "BBH", - "value": 0.5218759253208048, - "normalized_score": 32.398152503800446 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3686979166666667, - "normalized_score": 4.853906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3656083776595745, - "normalized_score": 29.51204196217494 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-09", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "bluuwhale/L3-SthenoMaid-8B-V1 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.205450782943445 - } - }, - { - "id": "bond005/meno-tiny-0.1_float16_e45b5605e2209a143c823f4e9c7c49705955cdb1_True", - "model": { - "name": "bond005/meno-tiny-0.1", - "sha": "e45b5605e2209a143c823f4e9c7c49705955cdb1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.850916827026232, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45497613000172876, - "normalized_score": 45.497613000172876 - }, - "bbh": { - "name": "BBH", - "value": 0.4262909130965971, - "normalized_score": 19.64270876311028 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4184583333333333, - "normalized_score": 9.973958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2785904255319149, - "normalized_score": 19.843380614657207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "bond005/meno-tiny-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 1.544, - "co2_cost": 3.73337552432176 - } - }, - { - "id": "bosonai/Higgs-Llama-3-70B_bfloat16_b2c7540768046dfdae7a0cb846a7da6c41d826b1_True", - "model": { - "name": "bosonai/Higgs-Llama-3-70B", - "sha": "b2c7540768046dfdae7a0cb846a7da6c41d826b1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.525397972968115, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5560678998390935, - "normalized_score": 55.60678998390936 - }, - "bbh": { - "name": "BBH", - "value": 0.625765879603832, - "normalized_score": 45.89740563396065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25226586102719034, - "normalized_score": 25.226586102719033 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36661073825503354, - "normalized_score": 15.548098434004473 - }, - "musr": { - "name": "MUSR", - "value": 0.44708333333333333, - "normalized_score": 15.518750000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49019281914893614, - "normalized_score": 43.35475768321512 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-05", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "other", - "hub_hearts": 220, - "params_billions": 70.554, - "co2_cost": 27.45369405239353 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt_bfloat16_ef4f5c21d64e3dc2f67718e62d814827d19188ee_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", - "sha": "ef4f5c21d64e3dc2f67718e62d814827d19188ee", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.552804605341624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.261136008014291, - "normalized_score": 26.113600801429097 - }, - "bbh": { - "name": "BBH", - "value": 0.27743669901671336, - "normalized_score": 1.1023633058943698 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13821752265861026, - "normalized_score": 13.821752265861026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35952083333333335, - "normalized_score": 2.240104166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11835106382978723, - "normalized_score": 2.0390070921985806 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 0.6036575615550887 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective_bfloat16_0444175d262f264654af671f423ca72f4d284478_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", - "sha": "0444175d262f264654af671f423ca72f4d284478", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.705956789781306, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30327641768285923, - "normalized_score": 30.327641768285925 - }, - "bbh": { - "name": "BBH", - "value": 0.2908444769655102, - "normalized_score": 1.7477599117687752 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.33555208333333336, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11303191489361702, - "normalized_score": 1.4479905437352243 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 0.6228340036068769 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B_bfloat16_00463c8b956246647d2d0fc4027df4e569194aeb_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B", - "sha": "00463c8b956246647d2d0fc4027df4e569194aeb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.942561374741304, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4171575863154209, - "normalized_score": 41.71575863154209 - }, - "bbh": { - "name": "BBH", - "value": 0.30329653176003074, - "normalized_score": 3.271232992020663 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.4487916666666667, - "normalized_score": 15.632291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.8978922751786214 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST_bfloat16_2ebe36e929737546f03ef37e845c745f4068752f_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", - "sha": "2ebe36e929737546f03ef37e845c745f4068752f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.311295170279617, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3751922676276723, - "normalized_score": 37.51922676276723 - }, - "bbh": { - "name": "BBH", - "value": 0.4926903187457697, - "normalized_score": 27.634826732270326 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5015105740181269, - "normalized_score": 50.15105740181269 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.4220625, - "normalized_score": 11.891145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42428523936170215, - "normalized_score": 36.03169326241135 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.8912967101766143 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt_bfloat16_ad0d32e834c1187b5894d742215fd864e94dd3ac_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", - "sha": "ad0d32e834c1187b5894d742215fd864e94dd3ac", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.9179987350434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5611632690151022, - "normalized_score": 56.11632690151021 - }, - "bbh": { - "name": "BBH", - "value": 0.32828968244496226, - "normalized_score": 6.126852862495489 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.45542708333333337, - "normalized_score": 16.86171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14469747340425532, - "normalized_score": 4.966385933806146 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.9364716614943034 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored_bfloat16_027525b1379cf8dd8272c46537f55281d8cdd9b9_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", - "sha": "027525b1379cf8dd8272c46537f55281d8cdd9b9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.62442618013144, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5421791956453321, - "normalized_score": 54.21791956453322 - }, - "bbh": { - "name": "BBH", - "value": 0.3170339746824052, - "normalized_score": 4.607187867121389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4486979166666667, - "normalized_score": 15.453906249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14311835106382978, - "normalized_score": 4.790927895981086 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.9544725353046577 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt_bfloat16_ad287e756594c171e78745b3314e9d146cd3a158_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", - "sha": "ad287e756594c171e78745b3314e9d146cd3a158", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.9423472074527, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5221456845614081, - "normalized_score": 52.21456845614081 - }, - "bbh": { - "name": "BBH", - "value": 0.3198581755956472, - "normalized_score": 5.067571369076204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25075528700906347, - "normalized_score": 25.075528700906347 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.4526979166666667, - "normalized_score": 16.120572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14835438829787234, - "normalized_score": 5.372709810874704 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.9292949587001595 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective_bfloat16_d048407437a3af102d59bb7f60db0bbe1c6b3bd8_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", - "sha": "d048407437a3af102d59bb7f60db0bbe1c6b3bd8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.874122056267588, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.554044380022784, - "normalized_score": 55.4044380022784 - }, - "bbh": { - "name": "BBH", - "value": 0.337106084887115, - "normalized_score": 6.901046016164872 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23716012084592145, - "normalized_score": 23.716012084592144 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4247604166666667, - "normalized_score": 11.928385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15043218085106383, - "normalized_score": 5.603575650118202 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.887683133323476 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective_bfloat16_7e662880a6327fd3ef8906358c526edfd48b861b_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", - "sha": "7e662880a6327fd3ef8906358c526edfd48b861b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.258010678110056, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5139274901705253, - "normalized_score": 51.39274901705253 - }, - "bbh": { - "name": "BBH", - "value": 0.3013444769655102, - "normalized_score": 2.698522439001891 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1472809667673716, - "normalized_score": 14.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.44333333333333336, - "normalized_score": 14.483333333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12890625, - "normalized_score": 3.2118055555555545 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.9294124725581154 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective_bfloat16_ad501b68fd42c312c64c70b1f63a9562679711c2_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", - "sha": "ad501b68fd42c312c64c70b1f63a9562679711c2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.389854199470006, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4290227706928727, - "normalized_score": 42.90227706928727 - }, - "bbh": { - "name": "BBH", - "value": 0.301225755504323, - "normalized_score": 3.035855458967868 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.4553958333333334, - "normalized_score": 16.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11294880319148937, - "normalized_score": 1.4387559101654845 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.9922666332584293 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B_bfloat16_b4409922f58a06ce7296032b7baba2eaa45a4000_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B", - "sha": "b4409922f58a06ce7296032b7baba2eaa45a4000", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.398600284940265, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39679938119744496, - "normalized_score": 39.6799381197445 - }, - "bbh": { - "name": "BBH", - "value": 0.2886778102988436, - "normalized_score": 1.9766705871500392 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.37666666666666665, - "normalized_score": 4.416666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1141123670212766, - "normalized_score": 1.5680407801418434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-7B", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6814255563649841 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt_bfloat16_fecc50fce2183143952083d053685194640c2ad1_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", - "sha": "fecc50fce2183143952083d053685194640c2ad1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.850779949168645, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4266246891581005, - "normalized_score": 42.66246891581005 - }, - "bbh": { - "name": "BBH", - "value": 0.29017781029884354, - "normalized_score": 2.149818735298187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.38851041666666664, - "normalized_score": 6.1638020833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11693816489361702, - "normalized_score": 1.8820183215130022 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6726927587157503 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored_bfloat16_1a94375fd2c68af28d112348ad5b8b22cb1b3c2f_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", - "sha": "1a94375fd2c68af28d112348ad5b8b22cb1b3c2f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.72103457820772, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3654503384353515, - "normalized_score": 36.54503384353515 - }, - "bbh": { - "name": "BBH", - "value": 0.2958444769655102, - "normalized_score": 2.744263179742631 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17371601208459214, - "normalized_score": 17.371601208459214 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.38460416666666664, - "normalized_score": 5.742187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11328125, - "normalized_score": 1.4756944444444438 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-13", - "submission_date": "2025-03-13", - "generation": 2, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6785234959047466 - } - }, - { - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective_bfloat16_62714830b4f061e1f132b19ad1b95db46653f8c2_True", - "model": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", - "sha": "62714830b4f061e1f132b19ad1b95db46653f8c2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.718078328507309, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3921783091087204, - "normalized_score": 39.21783091087204 - }, - "bbh": { - "name": "BBH", - "value": 0.2906778102988436, - "normalized_score": 2.0813002167796686 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.38999999999999996, - "normalized_score": 6.483333333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1155252659574468, - "normalized_score": 1.725029550827422 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6684679781246362 - } - }, - { - "id": "braindao/Qwen2.5-14B_bfloat16_ca710582977c49cc263b4fd02de159f9c51dc76c_True", - "model": { - "name": "braindao/Qwen2.5-14B", - "sha": "ca710582977c49cc263b4fd02de159f9c51dc76c", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.436238941896505, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.540854931581537, - "normalized_score": 54.085493158153696 - }, - "bbh": { - "name": "BBH", - "value": 0.5852660409288039, - "normalized_score": 41.26351364432166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29229607250755285, - "normalized_score": 29.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.41235416666666663, - "normalized_score": 10.444270833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48836436170212766, - "normalized_score": 43.15159574468085 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "braindao/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 2.2515694930214387 - } - }, - { - "id": "braindao/Qwen2.5-14B-Instruct_bfloat16_f45a202018ab4796db270d79f5f2b193237d37fb_True", - "model": { - "name": "braindao/Qwen2.5-14B-Instruct", - "sha": "f45a202018ab4796db270d79f5f2b193237d37fb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.60549009199536, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8142539572778007, - "normalized_score": 81.42539572778006 - }, - "bbh": { - "name": "BBH", - "value": 0.6403640774008682, - "normalized_score": 48.573089527387644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.552870090634441, - "normalized_score": 55.2870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.414, - "normalized_score": 10.616666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48894614361702127, - "normalized_score": 43.216238179669034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.755065293649155 - } - }, - { - "id": "braindao/iq-code-evmind-0.5b_bfloat16_c34cdd02ff7488eea0ac26110b0b7cb277a0bf1b_True", - "model": { - "name": "braindao/iq-code-evmind-0.5b", - "sha": "c34cdd02ff7488eea0ac26110b0b7cb277a0bf1b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.0224136272788416, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3215612353001148, - "normalized_score": 32.156123530011484 - }, - "bbh": { - "name": "BBH", - "value": 0.31637440507987097, - "normalized_score": 4.260915277471788 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33037500000000003, - "normalized_score": 1.1968750000000006 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11893284574468085, - "normalized_score": 2.1036495271867612 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2025-02-20", - "generation": 1, - "base_model": "braindao/iq-code-evmind-0.5b-instruct-v0.2411.4", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 0.494, - "co2_cost": 0.5073626971520605 - } - }, - { - "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial_bfloat16_78ee3bde02df349ee7161f9c2a5b36161c294009_False", - "model": { - "name": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", - "sha": "78ee3bde02df349ee7161f9c2a5b36161c294009", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.281187153244208, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32893057088525113, - "normalized_score": 32.89305708852512 - }, - "bbh": { - "name": "BBH", - "value": 0.5458008312900208, - "normalized_score": 36.673582111407946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.43728125, - "normalized_score": 14.426822916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3933676861702128, - "normalized_score": 32.59640957446809 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.307303661929017 - } - }, - { - "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial_bfloat16_8525f801c47b2bce2ca4dad360ce71b2cb6b370b_False", - "model": { - "name": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", - "sha": "8525f801c47b2bce2ca4dad360ce71b2cb6b370b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.48239660410299, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.567708125551315, - "normalized_score": 56.77081255513149 - }, - "bbh": { - "name": "BBH", - "value": 0.5607195549186694, - "normalized_score": 37.250632564299146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3496978851963746, - "normalized_score": 34.96978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.43563541666666666, - "normalized_score": 12.78776041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45054853723404253, - "normalized_score": 38.949837470449175 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 3.0, - "co2_cost": 2.0068195482674844 - } - }, - { - "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial_bfloat16_abac4757125a66a427fb82751bf171dabaea3458_False", - "model": { - "name": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", - "sha": "abac4757125a66a427fb82751bf171dabaea3458", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 22.756317201307535, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28763902002242936, - "normalized_score": 28.763902002242936 - }, - "bbh": { - "name": "BBH", - "value": 0.535846215598753, - "normalized_score": 35.45258577949333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4428958333333333, - "normalized_score": 15.428645833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4015957446808511, - "normalized_score": 33.51063829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "brgx53/3Blareneg-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.615068915069678 - } - }, - { - "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial_bfloat16_304038fc2b2527e31c738f9091206253a0d40f6c_False", - "model": { - "name": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", - "sha": "304038fc2b2527e31c738f9091206253a0d40f6c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.457001024079023, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5661843907498769, - "normalized_score": 56.6184390749877 - }, - "bbh": { - "name": "BBH", - "value": 0.5607195549186694, - "normalized_score": 37.250632564299146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3496978851963746, - "normalized_score": 34.96978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.43563541666666666, - "normalized_score": 12.78776041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45054853723404253, - "normalized_score": 38.949837470449175 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3737919280859543 - } - }, - { - "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3_bfloat16_5b24379a24328b77300eca1540915408151a9f20_False", - "model": { - "name": "brgx53/Barracuda-PRYMMAL-ECE-TW3", - "sha": "5b24379a24328b77300eca1540915408151a9f20", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.9169280688644776, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16401592219754696, - "normalized_score": 16.401592219754694 - }, - "bbh": { - "name": "BBH", - "value": 0.30024599561514337, - "normalized_score": 2.753426597884674 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.36085416666666664, - "normalized_score": 2.6401041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10929188829787234, - "normalized_score": 1.032432033096926 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "brgx53/Barracuda-PRYMMAL-ECE-TW3", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 0.6295401721453008 - } - }, - { - "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3_float16_09165d67f26be2a1bc6a319424fc2f35b1faf840_False", - "model": { - "name": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", - "sha": "09165d67f26be2a1bc6a319424fc2f35b1faf840", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.255168913434492, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1579209829917951, - "normalized_score": 15.79209829917951 - }, - "bbh": { - "name": "BBH", - "value": 0.29624186550380993, - "normalized_score": 1.9868050366680503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.38457291666666665, - "normalized_score": 5.904947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11461103723404255, - "normalized_score": 1.6234485815602824 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 0.6074110120256099 - } - }, - { - "id": "bunnycore/Best-Mix-Llama-3.1-8B_float16_4bde0e60ac20d6944b1fbdfb3456efea8ba59ae9_False", - "model": { - "name": "bunnycore/Best-Mix-Llama-3.1-8B", - "sha": "4bde0e60ac20d6944b1fbdfb3456efea8ba59ae9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.644596419047586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20670598456539757, - "normalized_score": 20.670598456539757 - }, - "bbh": { - "name": "BBH", - "value": 0.343178100574048, - "normalized_score": 7.255275858385576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.2928541666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15649933510638298, - "normalized_score": 6.277703900709218 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8100050452288448 - } - }, - { - "id": "bunnycore/Blabbertron-1.0_bfloat16_329e37a8e0c1e6289418ec00ee3895315adae416_True", - "model": { - "name": "bunnycore/Blabbertron-1.0", - "sha": "329e37a8e0c1e6289418ec00ee3895315adae416", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.2247146459169, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7433376773627309, - "normalized_score": 74.3337677362731 - }, - "bbh": { - "name": "BBH", - "value": 0.5496552006589083, - "normalized_score": 36.054612445029626 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49244712990936557, - "normalized_score": 49.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4336875, - "normalized_score": 13.510937500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4354222074468085, - "normalized_score": 37.2691341607565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "bunnycore/Blabbertron-1.0 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 7.613, - "co2_cost": 0.6719606311601921 - } - }, - { - "id": "bunnycore/Blabbertron-1.1_bfloat16_bfa4acda0123e6579f9460441c65e1c80f22762f_True", - "model": { - "name": "bunnycore/Blabbertron-1.1", - "sha": "bfa4acda0123e6579f9460441c65e1c80f22762f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.19288822858115, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7265267268625026, - "normalized_score": 72.65267268625026 - }, - "bbh": { - "name": "BBH", - "value": 0.5534000697428705, - "normalized_score": 36.60739009385717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48036253776435045, - "normalized_score": 48.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4415625, - "normalized_score": 14.695312500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44306848404255317, - "normalized_score": 38.11872044917258 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "bunnycore/Blabbertron-1.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6637390108814308 - } - }, - { - "id": "bunnycore/CyberCore-Qwen-2.1-7B_float16_98e69ba1cd70444b90178e1253e904d1892593c8_True", - "model": { - "name": "bunnycore/CyberCore-Qwen-2.1-7B", - "sha": "98e69ba1cd70444b90178e1253e904d1892593c8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.98421070085202, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5765757080103016, - "normalized_score": 57.657570801030154 - }, - "bbh": { - "name": "BBH", - "value": 0.5572089082936126, - "normalized_score": 36.96653253167443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35876132930513593, - "normalized_score": 35.87613293051359 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4144895833333333, - "normalized_score": 9.411197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4444813829787234, - "normalized_score": 38.27570921985816 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "bunnycore/CyberCore-Qwen-2.1-7B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3453683327965331 - } - }, - { - "id": "bunnycore/DeepQwen-3B-LCoT-SCE_bfloat16_200ae320fb7846016990e58b46acf78ce0a3b946_True", - "model": { - "name": "bunnycore/DeepQwen-3B-LCoT-SCE", - "sha": "200ae320fb7846016990e58b46acf78ce0a3b946", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.33845959063539, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4489809261647983, - "normalized_score": 44.89809261647983 - }, - "bbh": { - "name": "BBH", - "value": 0.45123121380305237, - "normalized_score": 23.559546324044987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24697885196374622, - "normalized_score": 24.69788519637462 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.35139583333333335, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "bunnycore/DeepQwen-3B-LCoT-SCE (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.396, - "co2_cost": 0.7734388198390433 - } - }, - { - "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex_bfloat16_cd233008ad60c15ed06f7de327e11f0734432b85_True", - "model": { - "name": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", - "sha": "cd233008ad60c15ed06f7de327e11f0734432b85", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.616489832350064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39010492160800014, - "normalized_score": 39.01049216080001 - }, - "bbh": { - "name": "BBH", - "value": 0.3494110718041537, - "normalized_score": 8.396453841395422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16540785498489427, - "normalized_score": 16.540785498489427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.3663125, - "normalized_score": 3.1890625000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2508311170212766, - "normalized_score": 16.759013002364064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3365016987234717 - } - }, - { - "id": "bunnycore/DeepThinker-7B-Sce-v1_bfloat16_93d03ae7c0059068cdd1cbfbbca6f3822d699419_True", - "model": { - "name": "bunnycore/DeepThinker-7B-Sce-v1", - "sha": "93d03ae7c0059068cdd1cbfbbca6f3822d699419", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.766577429203108, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12180015691698028, - "normalized_score": 12.180015691698028 - }, - "bbh": { - "name": "BBH", - "value": 0.30182806791122846, - "normalized_score": 2.5205978669314697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.41942708333333334, - "normalized_score": 11.328385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.4259256537840235 - } - }, - { - "id": "bunnycore/DeepThinker-7B-Sce-v2_bfloat16_7be8b128a7ce3a065aa6aa6518dc3d0c3c4c24ff_True", - "model": { - "name": "bunnycore/DeepThinker-7B-Sce-v2", - "sha": "7be8b128a7ce3a065aa6aa6518dc3d0c3c4c24ff", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.521537075022995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16306621985221434, - "normalized_score": 16.306621985221433 - }, - "bbh": { - "name": "BBH", - "value": 0.3056842322947901, - "normalized_score": 3.256507303765563 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.4100625, - "normalized_score": 9.691145833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11461103723404255, - "normalized_score": 1.6234485815602824 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "bunnycore/DeepThinker-7B-Sce-v2 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 7.613, - "co2_cost": 1.4249684990843643 - } - }, - { - "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct_bfloat16_e8cb3470dfa0c8e1d9f661168027241a0908876f_True", - "model": { - "name": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", - "sha": "e8cb3470dfa0c8e1d9f661168027241a0908876f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.50574650183828, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7019220113742648, - "normalized_score": 70.19220113742648 - }, - "bbh": { - "name": "BBH", - "value": 0.5517973725429837, - "normalized_score": 36.36861871726955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48413897280966767, - "normalized_score": 48.413897280966765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.40203125, - "normalized_score": 8.720572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43367686170212766, - "normalized_score": 37.07520685579196 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.302794593476737 - } - }, - { - "id": "bunnycore/FuseQwQen-7B_bfloat16_18b2a6249bdef2ff53d112b8008a7b8c3b8b9778_True", - "model": { - "name": "bunnycore/FuseQwQen-7B", - "sha": "18b2a6249bdef2ff53d112b8008a7b8c3b8b9778", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.67786732785649, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274509412802475, - "normalized_score": 72.74509412802476 - }, - "bbh": { - "name": "BBH", - "value": 0.5504256932515404, - "normalized_score": 35.909588839820735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43655589123867067, - "normalized_score": 43.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4216875, - "normalized_score": 11.97760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4406582446808511, - "normalized_score": 37.85091607565011 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "bunnycore/FuseQwQen-7B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 1.4317790858062496 - } - }, - { - "id": "bunnycore/FwF-Qwen-7B-0.1_bfloat16_7a6e2f5aac25da186af9a35ca7b10cad1f0f8e40_True", - "model": { - "name": "bunnycore/FwF-Qwen-7B-0.1", - "sha": "7a6e2f5aac25da186af9a35ca7b10cad1f0f8e40", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.055184230946704, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30045390674521383, - "normalized_score": 30.04539067452138 - }, - "bbh": { - "name": "BBH", - "value": 0.5019272523147252, - "normalized_score": 30.502106087142987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2764350453172205, - "normalized_score": 27.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.39520833333333333, - "normalized_score": 7.334375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4060837765957447, - "normalized_score": 34.0093085106383 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-06", - "generation": 1, - "base_model": "bunnycore/FwF-Qwen-7B-0.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.642909230556383 - } - }, - { - "id": "bunnycore/FwF-Qwen-7B-0.2_bfloat16_f328f22194bfe9586d7dc7bb671af45922e068c7_True", - "model": { - "name": "bunnycore/FwF-Qwen-7B-0.2", - "sha": "f328f22194bfe9586d7dc7bb671af45922e068c7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.049606790844468, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44790710869382133, - "normalized_score": 44.790710869382124 - }, - "bbh": { - "name": "BBH", - "value": 0.5596406929346521, - "normalized_score": 37.66718042893359 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4259818731117825, - "normalized_score": 42.59818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.42178125, - "normalized_score": 12.289322916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4382480053191489, - "normalized_score": 37.58311170212765 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "bunnycore/FwF-Qwen-7B-0.2 (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.550554917300331 - } - }, - { - "id": "bunnycore/Gemma-2-2B-Smart_bfloat16_426fc5f77a0f217150567a10e7fec5234cafa29b_True", - "model": { - "name": "bunnycore/Gemma-2-2B-Smart", - "sha": "426fc5f77a0f217150567a10e7fec5234cafa29b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 10.674608537656132, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13206625088099574, - "normalized_score": 13.206625088099575 - }, - "bbh": { - "name": "BBH", - "value": 0.39742674570492836, - "normalized_score": 15.07045874494903 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4248541666666667, - "normalized_score": 12.240104166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2426030585106383, - "normalized_score": 15.844784278959809 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "bunnycore/Gemma-2-2B-Smart (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.481796983463691 - } - }, - { - "id": "bunnycore/Gemma2-9B-TitanFusion_bfloat16_e2bb7d187d8dba7488fc134af45ca9b3139adfb4_True", - "model": { - "name": "bunnycore/Gemma2-9B-TitanFusion", - "sha": "e2bb7d187d8dba7488fc134af45ca9b3139adfb4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.471500934700618, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16184169115724056, - "normalized_score": 16.184169115724057 - }, - "bbh": { - "name": "BBH", - "value": 0.5712026020785131, - "normalized_score": 39.05056413790993 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.41362499999999996, - "normalized_score": 10.036458333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39602726063829785, - "normalized_score": 32.89191784869976 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "bunnycore/Gemma2-9B-TitanFusion (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 2.0467130344261024 - } - }, - { - "id": "bunnycore/HyperLlama-3.1-8B_bfloat16_659b18ffaee2c1e8dbe8a9a56a44502325d71696_True", - "model": { - "name": "bunnycore/HyperLlama-3.1-8B", - "sha": "659b18ffaee2c1e8dbe8a9a56a44502325d71696", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.44897570570595, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7883005979689446, - "normalized_score": 78.83005979689446 - }, - "bbh": { - "name": "BBH", - "value": 0.5103385292046213, - "normalized_score": 29.80665561261375 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.38292708333333336, - "normalized_score": 7.932552083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3783244680851064, - "normalized_score": 30.92494089834515 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "bunnycore/HyperLlama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.7890890370769081 - } - }, - { - "id": "bunnycore/Llama-3.1-8B-TitanFusion-Mix_bfloat16_9eb89de7df048276ccbc4405ce4f005f9185f82e_False", - "model": { - "name": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", - "sha": "9eb89de7df048276ccbc4405ce4f005f9185f82e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.01224783778518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4924954675815725, - "normalized_score": 49.24954675815725 - }, - "bbh": { - "name": "BBH", - "value": 0.5755964197928182, - "normalized_score": 39.535483334813364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4316979166666666, - "normalized_score": 12.462239583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3695146276595745, - "normalized_score": 29.94606973995272 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "bunnycore/Llama-3.1-8B-TitanFusion-Mix (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.8661696080057097 - } - }, - { - "id": "bunnycore/Llama-3.1-8B-TitanFusion-v3_bfloat16_ea8269ac3b2e9c0dc855a9089251ebdb273ada16_False", - "model": { - "name": "bunnycore/Llama-3.1-8B-TitanFusion-v3", - "sha": "ea8269ac3b2e9c0dc855a9089251ebdb273ada16", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.219132767586814, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4809549772381725, - "normalized_score": 48.095497723817246 - }, - "bbh": { - "name": "BBH", - "value": 0.5262113071794826, - "normalized_score": 32.072941144614084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1419939577039275, - "normalized_score": 14.19939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4302083333333333, - "normalized_score": 11.942708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38056848404255317, - "normalized_score": 31.174276004728128 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-22", - "generation": 1, - "base_model": "bunnycore/Llama-3.1-8B-TitanFusion-v3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.7756484366333045 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-All-Mix_float16_adacdd571c4073990ecf05a23277793e9e5f0410_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-All-Mix", - "sha": "adacdd571c4073990ecf05a23277793e9e5f0410", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.94517856311552, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7226049105262924, - "normalized_score": 72.26049105262925 - }, - "bbh": { - "name": "BBH", - "value": 0.45083384652782293, - "normalized_score": 22.516311192334513 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15030211480362538, - "normalized_score": 15.030211480362537 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.32869791666666665, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3159906914893617, - "normalized_score": 23.99896572104019 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-All-Mix (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.607, - "co2_cost": 1.4806189654910051 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Bespoke-Thought_bfloat16_e8d08b4548da570ba29ceb4e5ea4a0a75c14377a_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Bespoke-Thought", - "sha": "e8d08b4548da570ba29ceb4e5ea4a0a75c14377a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.009908616281866, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4112621178473118, - "normalized_score": 41.12621178473118 - }, - "bbh": { - "name": "BBH", - "value": 0.45217398665008424, - "normalized_score": 22.51656576646296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1646525679758308, - "normalized_score": 16.46525679758308 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.33025, - "normalized_score": 2.381250000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31100398936170215, - "normalized_score": 23.444887706855795 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-Bespoke-Thought (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 3.213, - "co2_cost": 1.2235100982970288 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Booval_bfloat16_d7f3449f89fa86d8e2c411aa4ca10ad552a62803_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Booval", - "sha": "d7f3449f89fa86d8e2c411aa4ca10ad552a62803", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.56545090409925, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6669259786256023, - "normalized_score": 66.69259786256023 - }, - "bbh": { - "name": "BBH", - "value": 0.45143904014934083, - "normalized_score": 22.515991469149508 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3394270833333333, - "normalized_score": 2.3950520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30576795212765956, - "normalized_score": 22.86310579196217 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-Booval (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 1.3104814708176846 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Deep-Test_bfloat16_cdf5651d7e39bfc6e70d4908137d103013c09109_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Deep-Test", - "sha": "cdf5651d7e39bfc6e70d4908137d103013c09109", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.9736733260966406, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17753006467284582, - "normalized_score": 17.75300646728458 - }, - "bbh": { - "name": "BBH", - "value": 0.29502574011260374, - "normalized_score": 2.5723233888621087 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3646666666666667, - "normalized_score": 2.7500000000000018 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10488696808510638, - "normalized_score": 0.542996453900708 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-Deep-Test (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.803, - "co2_cost": 0.5965275921771368 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Deep-Test_float16_5fc2f2f533e2de433d4bf99de72745aa2e32f914_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Deep-Test", - "sha": "5fc2f2f533e2de433d4bf99de72745aa2e32f914", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.441056902448857, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46516797652451053, - "normalized_score": 46.51679765245105 - }, - "bbh": { - "name": "BBH", - "value": 0.4530851376077318, - "normalized_score": 22.914432093725296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.33939583333333334, - "normalized_score": 2.5578125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3152426861702128, - "normalized_score": 23.91585401891253 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-Deep-Test (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.607, - "co2_cost": 1.7395749165213583 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Della_float16_53bc0a13b30227548abc34d4a5d7242e7a3dee74_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Della", - "sha": "53bc0a13b30227548abc34d4a5d7242e7a3dee74", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.217685988950697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35608297096149333, - "normalized_score": 35.60829709614933 - }, - "bbh": { - "name": "BBH", - "value": 0.36834936417932634, - "normalized_score": 11.46745923572203 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.39015625, - "normalized_score": 7.202864583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21284906914893617, - "normalized_score": 12.538785460992907 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.607, - "co2_cost": 1.2548360029719627 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Long-Think_float16_a8522bfc03657b41b0541b164a98ddff302a6fd2_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Long-Think", - "sha": "a8522bfc03657b41b0541b164a98ddff302a6fd2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.825990076474692, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5473499204333391, - "normalized_score": 54.73499204333391 - }, - "bbh": { - "name": "BBH", - "value": 0.4610394542442049, - "normalized_score": 24.22680316567029 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.33955208333333337, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30477061170212766, - "normalized_score": 22.752290189125294 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-24", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-Long-Think (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 2.071543124319194 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-Mix-Skill_float16_d07d6e733aaeaf48cb6616228d00104b05b52afd_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-Mix-Skill", - "sha": "d07d6e733aaeaf48cb6616228d00104b05b52afd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.73956606363181, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6404229666174639, - "normalized_score": 64.04229666174639 - }, - "bbh": { - "name": "BBH", - "value": 0.45818358891543803, - "normalized_score": 23.784246657651128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1472809667673716, - "normalized_score": 14.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.33961458333333333, - "normalized_score": 2.751822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3120844414893617, - "normalized_score": 23.56493794326241 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-24", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-Mix-Skill (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.607, - "co2_cost": 1.370133583615858 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-ProdigyPlus_float16_799f7669701ecf27f4c3e29998dd839b4d54c408_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-ProdigyPlus", - "sha": "799f7669701ecf27f4c3e29998dd839b4d54c408", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.356007600580043, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40152018865499095, - "normalized_score": 40.1520188654991 - }, - "bbh": { - "name": "BBH", - "value": 0.4392279045834126, - "normalized_score": 20.622988697146294 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.35800000000000004, - "normalized_score": 3.1500000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28174867021276595, - "normalized_score": 20.194296690307326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-ProdigyPlus (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.607, - "co2_cost": 1.4278795718496495 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus_bfloat16_512865708a7ec9754997fb404b1ffc0752b099d7_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", - "sha": "512865708a7ec9754997fb404b1ffc0752b099d7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.708176427584118, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1645157072124186, - "normalized_score": 16.45157072124186 - }, - "bbh": { - "name": "BBH", - "value": 0.3689926047041594, - "normalized_score": 11.56197768121448 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.354125, - "normalized_score": 1.698958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15001662234042554, - "normalized_score": 5.557402482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.607, - "co2_cost": 1.3431514349940856 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-RP-DeepThink_bfloat16_b728a56e39f2ac38926b380f5327a932ff04f2e7_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-RP-DeepThink", - "sha": "b728a56e39f2ac38926b380f5327a932ff04f2e7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.21103861527769, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7143867161354096, - "normalized_score": 71.43867161354096 - }, - "bbh": { - "name": "BBH", - "value": 0.45625632795830356, - "normalized_score": 23.757462281904537 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1608761329305136, - "normalized_score": 16.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.33021875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32421875, - "normalized_score": 24.913194444444443 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-RP-DeepThink (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.607, - "co2_cost": 1.3272197823874436 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-RRStock_bfloat16_79c38a10ff5b4be3618e8cb1dad6b83a67570499_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-RRStock", - "sha": "79c38a10ff5b4be3618e8cb1dad6b83a67570499", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.67468863966219, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6657269378582162, - "normalized_score": 66.57269378582163 - }, - "bbh": { - "name": "BBH", - "value": 0.45676937648721455, - "normalized_score": 23.921831246946002 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16993957703927492, - "normalized_score": 16.993957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3314270833333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32355385638297873, - "normalized_score": 24.839317375886523 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-RRStock (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.607, - "co2_cost": 1.14466366206302 - } - }, - { - "id": "bunnycore/Llama-3.2-3B-ToxicKod_bfloat16_5491f02af0048f1549b3eeca74bd5c5e5a675363_True", - "model": { - "name": "bunnycore/Llama-3.2-3B-ToxicKod", - "sha": "5491f02af0048f1549b3eeca74bd5c5e5a675363", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.269124274869156, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6319299458769398, - "normalized_score": 63.19299458769399 - }, - "bbh": { - "name": "BBH", - "value": 0.4525429005077621, - "normalized_score": 22.983327840830782 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16993957703927492, - "normalized_score": 16.993957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.34745833333333337, - "normalized_score": 1.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28798204787234044, - "normalized_score": 20.886894208037827 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3B-ToxicKod (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 1.210684779049432 - } - }, - { - "id": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse_bfloat16_ca68bc3095297b349bd87e6eed2e419fcf32fbe8_True", - "model": { - "name": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", - "sha": "ca68bc3095297b349bd87e6eed2e419fcf32fbe8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.277381748530818, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.683362367407368, - "normalized_score": 68.33623674073681 - }, - "bbh": { - "name": "BBH", - "value": 0.46497242330684924, - "normalized_score": 24.366030905438237 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24018126888217523, - "normalized_score": 24.018126888217523 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3953645833333333, - "normalized_score": 7.853906250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31058843085106386, - "normalized_score": 23.39871453900709 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.213, - "co2_cost": 0.5789514823163004 - } - }, - { - "id": "bunnycore/Maestro-S1k-7B-Sce_bfloat16_a0acad45ee6dd3cac16f569a5f86497258643bc0_True", - "model": { - "name": "bunnycore/Maestro-S1k-7B-Sce", - "sha": "a0acad45ee6dd3cac16f569a5f86497258643bc0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.835617935266296, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2522684255553044, - "normalized_score": 25.22684255553044 - }, - "bbh": { - "name": "BBH", - "value": 0.3104380842714463, - "normalized_score": 4.8440468816556645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3768229166666666, - "normalized_score": 4.802864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11702127659574468, - "normalized_score": 1.891252955082742 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "bunnycore/Maestro-S1k-7B-Sce (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.7234155444945778 - } - }, - { - "id": "bunnycore/Phi-3.5-mini-TitanFusion-0.1_bfloat16_72939b8b75e23b22b1758bb05a842e5834f75d96_True", - "model": { - "name": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", - "sha": "72939b8b75e23b22b1758bb05a842e5834f75d96", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 26.23579178503736, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5227950726295119, - "normalized_score": 52.279507262951185 - }, - "bbh": { - "name": "BBH", - "value": 0.5373733988565133, - "normalized_score": 35.446219076522446 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.4453125, - "normalized_score": 15.797395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3806515957446808, - "normalized_score": 31.183510638297868 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-13", - "generation": 1, - "base_model": "bunnycore/Phi-3.5-mini-TitanFusion-0.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.593770933170724 - } - }, - { - "id": "bunnycore/Phi-4-Model-Stock_bfloat16_4b7a2eafbf33e8cf7552b7ed62305c292c157895_True", - "model": { - "name": "bunnycore/Phi-4-Model-Stock", - "sha": "4b7a2eafbf33e8cf7552b7ed62305c292c157895", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.7857160927999, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6878837041272712, - "normalized_score": 68.78837041272712 - }, - "bbh": { - "name": "BBH", - "value": 0.6889699980822082, - "normalized_score": 55.31567822091236 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4297583081570997, - "normalized_score": 42.97583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.44413541666666667, - "normalized_score": 15.11692708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5368184840425532, - "normalized_score": 48.53538711583924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "bunnycore/Phi-4-Model-Stock (Merge)", - "hub_license": "mit", - "hub_hearts": 6, - "params_billions": 14.66, - "co2_cost": 1.919819940670153 - } - }, - { - "id": "bunnycore/Phi-4-Model-Stock-v2_bfloat16_e69d6350c5c930dff7afd24bfaa584e0dfff0334_True", - "model": { - "name": "bunnycore/Phi-4-Model-Stock-v2", - "sha": "e69d6350c5c930dff7afd24bfaa584e0dfff0334", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.1446195362203, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.63752510006782, - "normalized_score": 63.752510006782 - }, - "bbh": { - "name": "BBH", - "value": 0.6824667320746144, - "normalized_score": 54.68637369280513 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37537764350453173, - "normalized_score": 37.53776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.46617708333333335, - "normalized_score": 17.57213541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5330784574468085, - "normalized_score": 48.119828605200944 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "bunnycore/Phi-4-Model-Stock-v2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.8849691486581048 - } - }, - { - "id": "bunnycore/Phi-4-Model-Stock-v3_bfloat16_abc3cbe3f4f850ea9f317595c3ee3ff8e22fe220_True", - "model": { - "name": "bunnycore/Phi-4-Model-Stock-v3", - "sha": "abc3cbe3f4f850ea9f317595c3ee3ff8e22fe220", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.67299066187767, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5911636679565775, - "normalized_score": 59.11636679565775 - }, - "bbh": { - "name": "BBH", - "value": 0.6726298549419627, - "normalized_score": 52.78361125759003 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4901812688821752, - "normalized_score": 49.01812688821752 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.41663541666666665, - "normalized_score": 11.179427083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5381482712765957, - "normalized_score": 48.68314125295508 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "bunnycore/Phi-4-Model-Stock-v3 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.66, - "co2_cost": 1.861207931804194 - } - }, - { - "id": "bunnycore/Phi-4-Model-Stock-v4_bfloat16_76e0de4ca96533eaa7bfec95b9cc4caeb4e1db6b_True", - "model": { - "name": "bunnycore/Phi-4-Model-Stock-v4", - "sha": "76e0de4ca96533eaa7bfec95b9cc4caeb4e1db6b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.216841837982095, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7110145524984818, - "normalized_score": 71.10145524984819 - }, - "bbh": { - "name": "BBH", - "value": 0.6924302574038697, - "normalized_score": 55.90173559155676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38293051359516617, - "normalized_score": 38.29305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3691275167785235, - "normalized_score": 15.883668903803136 - }, - "musr": { - "name": "MUSR", - "value": 0.4610625, - "normalized_score": 17.299479166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5393949468085106, - "normalized_score": 48.82166075650118 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "bunnycore/Phi-4-Model-Stock-v4 (Merge)", - "hub_license": "", - "hub_hearts": 9, - "params_billions": 14.66, - "co2_cost": 1.8868426755694214 - } - }, - { - "id": "bunnycore/Phi-4-RP-v0_bfloat16_6ff65d49b76c23122a1d8767e17714db32c58760_True", - "model": { - "name": "bunnycore/Phi-4-RP-v0", - "sha": "6ff65d49b76c23122a1d8767e17714db32c58760", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 38.21180801915013, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6827129793392643, - "normalized_score": 68.27129793392643 - }, - "bbh": { - "name": "BBH", - "value": 0.685633603278299, - "normalized_score": 54.8449853466199 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33157099697885195, - "normalized_score": 33.157099697885194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.41409375, - "normalized_score": 10.861718750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5364029255319149, - "normalized_score": 48.48921394799055 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "bunnycore/Phi-4-RP-v0 (Merge)", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 14.66, - "co2_cost": 1.8469033311560676 - } - }, - { - "id": "bunnycore/Phi-4-RR-Shoup_bfloat16_3aee2a7da66705498f159afdf1c077470a7beae7_True", - "model": { - "name": "bunnycore/Phi-4-RR-Shoup", - "sha": "3aee2a7da66705498f159afdf1c077470a7beae7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.27997062998439, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6586579165503088, - "normalized_score": 65.86579165503088 - }, - "bbh": { - "name": "BBH", - "value": 0.6947025970028124, - "normalized_score": 56.10839417299811 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49924471299093653, - "normalized_score": 49.92447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.44404166666666667, - "normalized_score": 14.93854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5428856382978723, - "normalized_score": 49.20951536643025 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "bunnycore/Phi-4-RR-Shoup (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.8697275952784913 - } - }, - { - "id": "bunnycore/Phi-4-RStock-v0.1_bfloat16_47627dfbe666963edc311248551a029e81083dfa_True", - "model": { - "name": "bunnycore/Phi-4-RStock-v0.1", - "sha": "47627dfbe666963edc311248551a029e81083dfa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.10230758798472, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7018721436898541, - "normalized_score": 70.1872143689854 - }, - "bbh": { - "name": "BBH", - "value": 0.6928310064675399, - "normalized_score": 55.976291722373624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3950151057401813, - "normalized_score": 39.50151057401813 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.45836458333333335, - "normalized_score": 16.728906249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5400598404255319, - "normalized_score": 48.8955378250591 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "bunnycore/Phi-4-RStock-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 2.7881475653388756 - } - }, - { - "id": "bunnycore/Phi-4-ReasoningRP_bfloat16_5b3f6627c717f7bece020b4f17e52cc62e4bfbec_True", - "model": { - "name": "bunnycore/Phi-4-ReasoningRP", - "sha": "5b3f6627c717f7bece020b4f17e52cc62e4bfbec", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.953870582332804, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6736204382150472, - "normalized_score": 67.36204382150471 - }, - "bbh": { - "name": "BBH", - "value": 0.6922187070022994, - "normalized_score": 55.88446363760437 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4569486404833837, - "normalized_score": 45.69486404833837 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.44909375, - "normalized_score": 15.136718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5420545212765957, - "normalized_score": 49.11716903073285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "bunnycore/Phi-4-ReasoningRP (Merge)", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.8747088099962932 - } - }, - { - "id": "bunnycore/Phi-4-Sce-exp-v0.1_bfloat16_68f76d7bd85d2b2cfdffa6d2f5a53f0de623563a_True", - "model": { - "name": "bunnycore/Phi-4-Sce-exp-v0.1", - "sha": "68f76d7bd85d2b2cfdffa6d2f5a53f0de623563a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.33228109882298, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6595322632836429, - "normalized_score": 65.9532263283643 - }, - "bbh": { - "name": "BBH", - "value": 0.694317957938629, - "normalized_score": 56.074961973018425 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5030211480362538, - "normalized_score": 50.30211480362537 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.44407291666666665, - "normalized_score": 15.10911458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5423038563829787, - "normalized_score": 49.14487293144209 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "bunnycore/Phi-4-Sce-exp-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.8583181603957728 - } - }, - { - "id": "bunnycore/Phi-4-Stock-Ex_bfloat16_26e75507bcc57a36a6e661fc46f431e9c6ed419d_True", - "model": { - "name": "bunnycore/Phi-4-Stock-Ex", - "sha": "26e75507bcc57a36a6e661fc46f431e9c6ed419d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.217464671520766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6574588757829227, - "normalized_score": 65.74588757829227 - }, - "bbh": { - "name": "BBH", - "value": 0.6864461628663387, - "normalized_score": 55.20355070082317 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4086102719033233, - "normalized_score": 40.86102719033233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.46236458333333336, - "normalized_score": 17.462239583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5374833776595744, - "normalized_score": 48.60926418439716 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "bunnycore/Phi-4-Stock-Ex (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.84229568684912 - } - }, - { - "id": "bunnycore/Phi-4-Stock-RP_bfloat16_bf881c1b7f4e6f4b99f5c48fc1008b2cfe9efb64_True", - "model": { - "name": "bunnycore/Phi-4-Stock-RP", - "sha": "bf881c1b7f4e6f4b99f5c48fc1008b2cfe9efb64", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.044084455528825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6399231816025922, - "normalized_score": 63.99231816025922 - }, - "bbh": { - "name": "BBH", - "value": 0.6859633715492438, - "normalized_score": 55.20594989405504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3413897280966767, - "normalized_score": 34.13897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.47147916666666667, - "normalized_score": 18.53489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5316655585106383, - "normalized_score": 47.96283983451538 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "bunnycore/Phi-4-Stock-RP (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 2.3111459529240057 - } - }, - { - "id": "bunnycore/Phi-4-Trim-Exp1_bfloat16_77af94cfcce9fd3c187425a9eaf8f7b36573534f_True", - "model": { - "name": "bunnycore/Phi-4-Trim-Exp1", - "sha": "77af94cfcce9fd3c187425a9eaf8f7b36573534f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.501547812787733, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12192538021338936, - "normalized_score": 12.192538021338937 - }, - "bbh": { - "name": "BBH", - "value": 0.28516626650940224, - "normalized_score": 1.4066196276490395 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.4176875, - "normalized_score": 10.577604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-14", - "generation": 1, - "base_model": "bunnycore/Phi-4-Trim-Exp1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.503, - "co2_cost": 0.5123423469545572 - } - }, - { - "id": "bunnycore/Phi-Seek-4-Sce-V1_bfloat16_8eb60bf63862eac59efd75eedd9edfa5142eb9a3_True", - "model": { - "name": "bunnycore/Phi-Seek-4-Sce-V1", - "sha": "8eb60bf63862eac59efd75eedd9edfa5142eb9a3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.22214709074268, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29348462080612775, - "normalized_score": 29.348462080612777 - }, - "bbh": { - "name": "BBH", - "value": 0.6459114889718743, - "normalized_score": 49.2526730890894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.39815625, - "normalized_score": 8.002864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5123005319148937, - "normalized_score": 45.81117021276596 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9464034254665368 - } - }, - { - "id": "bunnycore/Qandora-2.5-7B-Creative_bfloat16_fdb174364d4a4f323ed1cb01219ac4d87708219d_True", - "model": { - "name": "bunnycore/Qandora-2.5-7B-Creative", - "sha": "fdb174364d4a4f323ed1cb01219ac4d87708219d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.10182633953863, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6803148978044922, - "normalized_score": 68.03148978044922 - }, - "bbh": { - "name": "BBH", - "value": 0.5541763892398439, - "normalized_score": 36.424651784758105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30589123867069484, - "normalized_score": 30.589123867069485 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4211875, - "normalized_score": 10.848437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4479720744680851, - "normalized_score": 38.66356382978723 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-20", - "generation": 1, - "base_model": "bunnycore/Qandora-2.5-7B-Creative (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.419590012322623 - } - }, - { - "id": "bunnycore/QandoraExp-7B_bfloat16_74906d5518c7feb7df9b168763dabc1b0167942f_True", - "model": { - "name": "bunnycore/QandoraExp-7B", - "sha": "74906d5518c7feb7df9b168763dabc1b0167942f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.26500037492428, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7509064836855099, - "normalized_score": 75.090648368551 - }, - "bbh": { - "name": "BBH", - "value": 0.5477959748047708, - "normalized_score": 35.92474216004687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4743202416918429, - "normalized_score": 47.43202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.43120833333333336, - "normalized_score": 13.201041666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4409906914893617, - "normalized_score": 37.88785460992907 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "bunnycore/QandoraExp-7B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3410551587069948 - } - }, - { - "id": "bunnycore/QandoraExp-7B-Persona_bfloat16_21bd6c2e270358b70f9af98bcccd6ec9c2cfce88_True", - "model": { - "name": "bunnycore/QandoraExp-7B-Persona", - "sha": "21bd6c2e270358b70f9af98bcccd6ec9c2cfce88", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.693541037557907, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6246858335882126, - "normalized_score": 62.46858335882126 - }, - "bbh": { - "name": "BBH", - "value": 0.5558337526959515, - "normalized_score": 36.8327094432999 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3104229607250755, - "normalized_score": 31.042296072507554 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.43715624999999997, - "normalized_score": 13.344531250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44074135638297873, - "normalized_score": 37.86015070921986 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "bunnycore/QandoraExp-7B-Persona (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3759415732833142 - } - }, - { - "id": "bunnycore/QandoraExp-7B-v2_bfloat16_017594240f9b3c4262e23de6d550453a1a3d5540_True", - "model": { - "name": "bunnycore/QandoraExp-7B-v2", - "sha": "017594240f9b3c4262e23de6d550453a1a3d5540", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.12963867958909, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5606889719278182, - "normalized_score": 56.06889719278182 - }, - "bbh": { - "name": "BBH", - "value": 0.5444864824489132, - "normalized_score": 34.94496675271275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47129909365558914, - "normalized_score": 47.129909365558916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.40454166666666663, - "normalized_score": 9.267708333333339 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.390874335106383, - "normalized_score": 32.319370567375884 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "bunnycore/QandoraExp-7B-v2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3872989562354536 - } - }, - { - "id": "bunnycore/QwQen-3B-LCoT_bfloat16_a778a78bdd4a2ab58bcbc99269f0673f610e5874_True", - "model": { - "name": "bunnycore/QwQen-3B-LCoT", - "sha": "a778a78bdd4a2ab58bcbc99269f0673f610e5874", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.986056265747138, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6025290673191577, - "normalized_score": 60.252906731915765 - }, - "bbh": { - "name": "BBH", - "value": 0.4899306773152123, - "normalized_score": 28.499814171968946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36178247734138974, - "normalized_score": 36.17824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.41778125, - "normalized_score": 10.755989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3699301861702128, - "normalized_score": 29.99224290780142 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-26", - "generation": 1, - "base_model": "bunnycore/QwQen-3B-LCoT (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.397, - "co2_cost": 1.4554590235335287 - } - }, - { - "id": "bunnycore/QwQen-3B-LCoT-R1_bfloat16_f6cc2be1224899c81c0931c58e589780442321e5_True", - "model": { - "name": "bunnycore/QwQen-3B-LCoT-R1", - "sha": "f6cc2be1224899c81c0931c58e589780442321e5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.965028531766123, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.534160471992092, - "normalized_score": 53.4160471992092 - }, - "bbh": { - "name": "BBH", - "value": 0.4798600168403517, - "normalized_score": 26.982869231919675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33534743202416917, - "normalized_score": 33.53474320241692 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.41384375, - "normalized_score": 10.030468749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3723404255319149, - "normalized_score": 30.26004728132387 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "bunnycore/QwQen-3B-LCoT-R1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.085, - "co2_cost": 0.8102690770869923 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1_float16_34278bff581951eb410930a05d097b60a997ef3c_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", - "sha": "34278bff581951eb410930a05d097b60a997ef3c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.009116031308366, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42080457630198986, - "normalized_score": 42.08045763019898 - }, - "bbh": { - "name": "BBH", - "value": 0.4139878251775055, - "normalized_score": 17.866965310070587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.40181249999999996, - "normalized_score": 8.193229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2103557180851064, - "normalized_score": 12.26174645390071 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-16", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.7107938132268742 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1_bfloat16_d9edb0b221196bf95b06097ee732807d7dc92da0_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", - "sha": "d9edb0b221196bf95b06097ee732807d7dc92da0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.53059115286298, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5695066867023941, - "normalized_score": 56.95066867023941 - }, - "bbh": { - "name": "BBH", - "value": 0.5361336083539997, - "normalized_score": 34.07986228320928 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26435045317220546, - "normalized_score": 26.435045317220546 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4108958333333333, - "normalized_score": 9.96197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40658244680851063, - "normalized_score": 34.06471631205674 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 1.406862713775246 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4_bfloat16_41abad750ddb89571482318273a814bab91b8ff1_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", - "sha": "41abad750ddb89571482318273a814bab91b8ff1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.10174991287545, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7752862405085175, - "normalized_score": 77.52862405085175 - }, - "bbh": { - "name": "BBH", - "value": 0.5452765042799131, - "normalized_score": 35.91001360257288 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48942598187311176, - "normalized_score": 48.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.41269791666666666, - "normalized_score": 10.387239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4341755319148936, - "normalized_score": 37.1306146572104 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 1.3293361270213007 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5_bfloat16_4c8027d72ebb17ffb74c2538fa9bf4ae2cec9172_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", - "sha": "4c8027d72ebb17ffb74c2538fa9bf4ae2cec9172", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.50794443275282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45090471061228654, - "normalized_score": 45.090471061228655 - }, - "bbh": { - "name": "BBH", - "value": 0.4672461238794705, - "normalized_score": 24.990385055835603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1472809667673716, - "normalized_score": 14.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3648229166666667, - "normalized_score": 3.202864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28316156914893614, - "normalized_score": 20.351285460992905 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-16", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 7.613, - "co2_cost": 0.6894965582474849 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-Exp-Sce_bfloat16_cea556e9f3768a317d746bab5fc830216af373bb_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-Exp-Sce", - "sha": "cea556e9f3768a317d746bab5fc830216af373bb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.8635495431104, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.765169749597734, - "normalized_score": 76.51697495977339 - }, - "bbh": { - "name": "BBH", - "value": 0.5505865059891896, - "normalized_score": 36.2390009919108 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3255287009063444, - "normalized_score": 32.55287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.44302083333333336, - "normalized_score": 15.177604166666661 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42586436170212766, - "normalized_score": 36.207151300236404 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-Exp-Sce (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.7166903469215072 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-R1-Stock_bfloat16_ce916b3394fb8d741e80f00f710962c10c6623d3_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-R1-Stock", - "sha": "ce916b3394fb8d741e80f00f710962c10c6623d3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.319380127136064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7573261169253137, - "normalized_score": 75.73261169253138 - }, - "bbh": { - "name": "BBH", - "value": 0.5393363105747148, - "normalized_score": 34.8504410029365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5007552870090635, - "normalized_score": 50.07552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3993645833333333, - "normalized_score": 8.053906249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.429438164893617, - "normalized_score": 36.60424054373522 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-R1-Stock (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 1.3544243143784895 - } - }, - { - "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke_bfloat16_662c55748486b23fd676f45add9453294e79749a_True", - "model": { - "name": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", - "sha": "662c55748486b23fd676f45add9453294e79749a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.4734154687315, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5206219497599702, - "normalized_score": 52.062194975997016 - }, - "bbh": { - "name": "BBH", - "value": 0.49203477801491813, - "normalized_score": 28.17803739186357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.4068020833333333, - "normalized_score": 8.916927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3579621010638298, - "normalized_score": 28.662455673758863 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.4266904813280454 - } - }, - { - "id": "bunnycore/Qwen-2.5-7b-S1k_bfloat16_32b66e4488724dca26655057f62bb3fae9ad11ef_True", - "model": { - "name": "bunnycore/Qwen-2.5-7b-S1k", - "sha": "32b66e4488724dca26655057f62bb3fae9ad11ef", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.59275371596473, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7162351449708995, - "normalized_score": 71.62351449708994 - }, - "bbh": { - "name": "BBH", - "value": 0.5562750208035135, - "normalized_score": 36.69420283560647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4780966767371601, - "normalized_score": 47.809667673716014 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4071458333333333, - "normalized_score": 9.259895833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4382480053191489, - "normalized_score": 37.58311170212765 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-20", - "generation": 1, - "base_model": "bunnycore/Qwen-2.5-7b-S1k (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.6942319849156767 - } - }, - { - "id": "bunnycore/Qwen2.5-1.5B-Model-Stock_bfloat16_6496cbb16e56f68248d38a792a7e51b7505f6cc3_True", - "model": { - "name": "bunnycore/Qwen2.5-1.5B-Model-Stock", - "sha": "6496cbb16e56f68248d38a792a7e51b7505f6cc3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.1995227467450436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18292574812608325, - "normalized_score": 18.292574812608326 - }, - "bbh": { - "name": "BBH", - "value": 0.2873695911207613, - "normalized_score": 1.430207460042271 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3674270833333333, - "normalized_score": 3.128385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11003989361702128, - "normalized_score": 1.1155437352245863 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.776, - "co2_cost": 0.6066806636378542 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-Model-Stock_bfloat16_ee5704f7bbfbcd15c64dc2628a82362d2c8ef016_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock", - "sha": "ee5704f7bbfbcd15c64dc2628a82362d2c8ef016", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.52421014331389, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6380747527671025, - "normalized_score": 63.807475276710235 - }, - "bbh": { - "name": "BBH", - "value": 0.4712481909242632, - "normalized_score": 26.00226442713036 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37990936555891236, - "normalized_score": 37.99093655589124 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.39415625, - "normalized_score": 7.202864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3249667553191489, - "normalized_score": 24.9963061465721 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-Model-Stock (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.396, - "co2_cost": 1.5158120396985344 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2_bfloat16_5911ddc9c6cc97299cd0bbef75664bdf0493f42d_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v2", - "sha": "5911ddc9c6cc97299cd0bbef75664bdf0493f42d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.68767938120949, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6490157227268093, - "normalized_score": 64.90157227268094 - }, - "bbh": { - "name": "BBH", - "value": 0.46774789186946836, - "normalized_score": 25.648546574568158 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3867069486404834, - "normalized_score": 38.670694864048336 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3914583333333333, - "normalized_score": 6.765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3269614361702128, - "normalized_score": 25.217937352245862 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-Model-Stock-v2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.396, - "co2_cost": 1.5142603427826242 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1_bfloat16_4b56e847600c4be09df262470b1f3602fde13386_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", - "sha": "4b56e847600c4be09df262470b1f3602fde13386", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.97276368199932, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6480915083090644, - "normalized_score": 64.80915083090645 - }, - "bbh": { - "name": "BBH", - "value": 0.473722298403459, - "normalized_score": 26.396631614914487 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38972809667673713, - "normalized_score": 38.972809667673715 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.39679166666666665, - "normalized_score": 7.632291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3289561170212766, - "normalized_score": 25.439568557919618 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 3.396, - "co2_cost": 0.7668845276681562 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2_bfloat16_3ac3318182fb329456d69e298a525d836e55c433_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", - "sha": "3ac3318182fb329456d69e298a525d836e55c433", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.386758745448905, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6353021095138676, - "normalized_score": 63.53021095138676 - }, - "bbh": { - "name": "BBH", - "value": 0.4727417689283166, - "normalized_score": 26.326937514436406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37537764350453173, - "normalized_score": 37.53776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.39279166666666665, - "normalized_score": 6.965625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3293716755319149, - "normalized_score": 25.485741725768317 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.396, - "co2_cost": 0.7636616572248317 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1_bfloat16_ac75d8b3f0f5bfe0da95889e6b4671b91b3f58a1_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", - "sha": "ac75d8b3f0f5bfe0da95889e6b4671b91b3f58a1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.743273848475656, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6380747527671025, - "normalized_score": 63.807475276710235 - }, - "bbh": { - "name": "BBH", - "value": 0.48202557906199406, - "normalized_score": 27.399951145314322 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3768882175226586, - "normalized_score": 37.68882175226586 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.39409374999999996, - "normalized_score": 7.128385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3386801861702128, - "normalized_score": 26.520020685579198 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.396, - "co2_cost": 0.7750401326792976 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-RP-Mix_bfloat16_0e8f3b56f9270fdcdd4badfd7b925dc8fc4902c7_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-RP-Mix", - "sha": "0e8f3b56f9270fdcdd4badfd7b925dc8fc4902c7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.505206811112476, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5720543712903984, - "normalized_score": 57.20543712903984 - }, - "bbh": { - "name": "BBH", - "value": 0.4894378989397821, - "normalized_score": 28.305922895366823 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.42844791666666665, - "normalized_score": 12.555989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37275598404255317, - "normalized_score": 30.306220449172578 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-RP-Mix (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 3.397, - "co2_cost": 1.8394020332907 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-RP-Thinker_float16_ffe872591c1fe1c06464779dd2abfddf9ba7b9f8_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-RP-Thinker", - "sha": "ffe872591c1fe1c06464779dd2abfddf9ba7b9f8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.906122256350514, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.589414974489909, - "normalized_score": 58.9414974489909 - }, - "bbh": { - "name": "BBH", - "value": 0.4164134011392067, - "normalized_score": 17.412964272966242 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33534743202416917, - "normalized_score": 33.53474320241692 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3287291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3149933510638298, - "normalized_score": 23.88815011820331 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-RP-Thinker (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 1.4401271968608247 - } - }, - { - "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2_float16_61889a5b40995a556e9c0cb1c64223559a6035a5_True", - "model": { - "name": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", - "sha": "61889a5b40995a556e9c0cb1c64223559a6035a5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.670372197700004, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6419965691033125, - "normalized_score": 64.19965691033124 - }, - "bbh": { - "name": "BBH", - "value": 0.46784408133522204, - "normalized_score": 25.629506719785628 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38293051359516617, - "normalized_score": 38.29305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.398125, - "normalized_score": 7.965625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3271276595744681, - "normalized_score": 25.236406619385342 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-3B-RP-Thinker-V2 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.397, - "co2_cost": 1.4404737026994847 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-CyberRombos_float16_dfd4d30fc6956ffecb9fb3c59fad51875552f7f9_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-CyberRombos", - "sha": "dfd4d30fc6956ffecb9fb3c59fad51875552f7f9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.95055187781517, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.751830698103255, - "normalized_score": 75.18306981032549 - }, - "bbh": { - "name": "BBH", - "value": 0.5464960546716063, - "normalized_score": 35.8840250776346 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4962235649546828, - "normalized_score": 49.62235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.41254166666666664, - "normalized_score": 10.067708333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4390791223404255, - "normalized_score": 37.67545803782505 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-11-05", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-CyberRombos (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.420210820498877 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-Fuse-Exp_bfloat16_8602ca392ec0414cca119fea98a06c20049488b2_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-Fuse-Exp", - "sha": "8602ca392ec0414cca119fea98a06c20049488b2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.92529775716781, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5468501354184675, - "normalized_score": 54.68501354184674 - }, - "bbh": { - "name": "BBH", - "value": 0.5108680600425207, - "normalized_score": 29.967150522844417 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31419939577039274, - "normalized_score": 31.419939577039273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.45728125000000003, - "normalized_score": 16.36015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3308676861702128, - "normalized_score": 25.65196513002364 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-Fuse-Exp (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 0.6754423000288923 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion_float16_6313c0b3de799ab48720c3b828e322a77cf8d023_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-Instruct-Fusion", - "sha": "6313c0b3de799ab48720c3b828e322a77cf8d023", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.10123026617861, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6962016338869754, - "normalized_score": 69.62016338869753 - }, - "bbh": { - "name": "BBH", - "value": 0.5491903018724945, - "normalized_score": 36.17985917773406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3406344410876133, - "normalized_score": 34.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42971875, - "normalized_score": 12.948177083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4467253989361702, - "normalized_score": 38.52504432624112 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-02", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-Instruct-Fusion (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.328964770541402 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1_bfloat16_f205f75c1cd0436535cf2bca702844877e70781b_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", - "sha": "f205f75c1cd0436535cf2bca702844877e70781b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.14483066079348, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7509064836855099, - "normalized_score": 75.090648368551 - }, - "bbh": { - "name": "BBH", - "value": 0.5529431709465797, - "normalized_score": 36.395231467496636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48942598187311176, - "normalized_score": 48.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.42311458333333335, - "normalized_score": 11.689322916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4383311170212766, - "normalized_score": 37.5923463356974 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.6805329882548932 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3_bfloat16_94344107260688d9e381cc519f219990fd3dfd6b_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", - "sha": "94344107260688d9e381cc519f219990fd3dfd6b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.636394077198473, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21197644472222593, - "normalized_score": 21.197644472222592 - }, - "bbh": { - "name": "BBH", - "value": 0.3479005166788895, - "normalized_score": 9.507336377473363 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25755287009063443, - "normalized_score": 25.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3713958333333333, - "normalized_score": 3.6911458333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17794215425531915, - "normalized_score": 8.660239361702127 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-14", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.7264664530340607 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1_bfloat16_8fb8a90c094c74fce75125c4783fcc817a30243f_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-MixStock-V0.1", - "sha": "8fb8a90c094c74fce75125c4783fcc817a30243f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.68659688810528, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7673428724672757, - "normalized_score": 76.73428724672758 - }, - "bbh": { - "name": "BBH", - "value": 0.5479100568012056, - "normalized_score": 35.86925790573909 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31722054380664655, - "normalized_score": 31.722054380664655 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.441625, - "normalized_score": 14.903124999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4256150265957447, - "normalized_score": 36.179447399527184 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-MixStock-V0.1 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 7.613, - "co2_cost": 2.1289447011964473 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock_bfloat16_f41a11dd958c01949397118e488fa44288c95483_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", - "sha": "f41a11dd958c01949397118e488fa44288c95483", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.435991032043965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3726445830396681, - "normalized_score": 37.264458303966805 - }, - "bbh": { - "name": "BBH", - "value": 0.48221362910675625, - "normalized_score": 26.63869755860018 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.3926354166666666, - "normalized_score": 6.979427083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34715757978723405, - "normalized_score": 27.46195330969267 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.4337758797452507 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task_bfloat16_ff2b303f7fd2acaa20d81b880d626dc8e785f5a0_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", - "sha": "ff2b303f7fd2acaa20d81b880d626dc8e785f5a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.746098824658986, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3786641666334215, - "normalized_score": 37.86641666334215 - }, - "bbh": { - "name": "BBH", - "value": 0.41495531490332715, - "normalized_score": 17.906937974221723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3568854166666666, - "normalized_score": 1.6773437499999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2687832446808511, - "normalized_score": 18.753693853427897 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.405181067340548 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-RRP-1M_bfloat16_c49c82be793bd630230f81d627724af62abdbd1a_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-RRP-1M", - "sha": "c49c82be793bd630230f81d627724af62abdbd1a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.67928553393096, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7481338404322753, - "normalized_score": 74.81338404322753 - }, - "bbh": { - "name": "BBH", - "value": 0.545239229980545, - "normalized_score": 35.648526152494235 - }, - "math": { - "name": "MATH Level 5", - "value": 0.324773413897281, - "normalized_score": 32.477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.44826041666666666, - "normalized_score": 15.799218749999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4266123670212766, - "normalized_score": 36.290263002364064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-RRP-1M (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 7.613, - "co2_cost": 1.3613529566243419 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker_bfloat16_ddc20ed7a1d4ce5f1d47dd30181540c2ddc79d8c_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", - "sha": "ddc20ed7a1d4ce5f1d47dd30181540c2ddc79d8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.275362824338155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23081091503876383, - "normalized_score": 23.08109150387638 - }, - "bbh": { - "name": "BBH", - "value": 0.3481907488085136, - "normalized_score": 9.209373414510402 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2719033232628399, - "normalized_score": 27.19033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3767291666666666, - "normalized_score": 4.624479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1768617021276596, - "normalized_score": 8.540189125295509 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.7101215693816563 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-RRP-ID_bfloat16_9e5c600d49ae0697dc4584477fdcc941c66f5ef4_True", - "model": { - "name": "bunnycore/Qwen2.5-7B-RRP-ID", - "sha": "9e5c600d49ae0697dc4584477fdcc941c66f5ef4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.463219597116336, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.747259493698941, - "normalized_score": 74.7259493698941 - }, - "bbh": { - "name": "BBH", - "value": 0.5479543512061099, - "normalized_score": 36.09918880152282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.486404833836858, - "normalized_score": 48.6404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.41796875, - "normalized_score": 11.31276041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.5030598082994553 - } - }, - { - "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini_bfloat16_15902f67ef18fd6d8e325c48c726007bebf98fce_False", - "model": { - "name": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", - "sha": "15902f67ef18fd6d8e325c48c726007bebf98fce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.371821186629208, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23048622100471194, - "normalized_score": 23.048622100471192 - }, - "bbh": { - "name": "BBH", - "value": 0.3502939195575525, - "normalized_score": 8.89516742838758 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.3448229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12533244680851063, - "normalized_score": 2.8147163120567367 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "bunnycore/Qwen2.5-7B-Sky-R1-Mini (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6655915571196371 - } - }, - { - "id": "bunnycore/QwenMosaic-7B_bfloat16_1eab0bbe701195ba26f60a284f74e3c6dfe5c139_True", - "model": { - "name": "bunnycore/QwenMosaic-7B", - "sha": "1eab0bbe701195ba26f60a284f74e3c6dfe5c139", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.300371581101075, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5819215237791282, - "normalized_score": 58.192152377912834 - }, - "bbh": { - "name": "BBH", - "value": 0.5564132127895585, - "normalized_score": 36.750519941741935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44410876132930516, - "normalized_score": 44.41087613293052 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4163854166666667, - "normalized_score": 10.21484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43101728723404253, - "normalized_score": 36.77969858156028 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "bunnycore/QwenMosaic-7B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.5005744720068477 - } - }, - { - "id": "bunnycore/Smol-Llama-3.2-3B_bfloat16_d66d88bdfb94a879ac3a0ba4891aefb26f4d384f_True", - "model": { - "name": "bunnycore/Smol-Llama-3.2-3B", - "sha": "d66d88bdfb94a879ac3a0ba4891aefb26f4d384f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.52219334493714, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6678501930433471, - "normalized_score": 66.78501930433472 - }, - "bbh": { - "name": "BBH", - "value": 0.453881406940321, - "normalized_score": 23.04076448114923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13821752265861026, - "normalized_score": 13.821752265861026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.34600000000000003, - "normalized_score": 3.1500000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3228058510638298, - "normalized_score": 24.756205673758867 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "bunnycore/Smol-Llama-3.2-3B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.607, - "co2_cost": 1.1314095821066952 - } - }, - { - "id": "bunnycore/SmolLM2-1.7-Persona_bfloat16_ebeaa6f284c044bd54e3e66cc5458d974d92523e_True", - "model": { - "name": "bunnycore/SmolLM2-1.7-Persona", - "sha": "ebeaa6f284c044bd54e3e66cc5458d974d92523e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.527349011570474, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5465254413844156, - "normalized_score": 54.652544138441556 - }, - "bbh": { - "name": "BBH", - "value": 0.3623213930905173, - "normalized_score": 11.203752907058009 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 3.0322916666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1973902925531915, - "normalized_score": 10.821143617021276 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-11-15", - "generation": 0, - "base_model": "bunnycore/SmolLM2-1.7-Persona", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.711, - "co2_cost": 0.6626648013774251 - } - }, - { - "id": "bunnycore/SmolLM2-1.7B-roleplay-lora_float16_bbab860a4ffdd8e48f600192947ad3504bb0a944_True", - "model": { - "name": "bunnycore/SmolLM2-1.7B-roleplay-lora", - "sha": "bbab860a4ffdd8e48f600192947ad3504bb0a944", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 14.47905997638928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5382075116247114, - "normalized_score": 53.82075116247113 - }, - "bbh": { - "name": "BBH", - "value": 0.3610343412303005, - "normalized_score": 10.907238019540259 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.33945833333333336, - "normalized_score": 2.7656250000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19664228723404256, - "normalized_score": 10.738031914893616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-11-15", - "generation": 3, - "base_model": "HuggingFaceTB/SmolLM2-1.7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.423, - "co2_cost": 1.4000513949062356 - } - }, - { - "id": "bunnycore/Tulu-3.1-8B-SuperNova_float16_bbbfb910ca8d8f7ae35ecaf4824ad68713bf8d86_True", - "model": { - "name": "bunnycore/Tulu-3.1-8B-SuperNova", - "sha": "bbbfb910ca8d8f7ae35ecaf4824ad68713bf8d86", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.991375681506, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8193748143813969, - "normalized_score": 81.93748143813968 - }, - "bbh": { - "name": "BBH", - "value": 0.5254122754311122, - "normalized_score": 32.499170772496946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24622356495468278, - "normalized_score": 24.622356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.3935, - "normalized_score": 8.687500000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3813996010638298, - "normalized_score": 31.26662234042553 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "bunnycore/Tulu-3.1-8B-SuperNova (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.3878481612472198 - } - }, - { - "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated_bfloat16_5e24aaef2a37f9cb69f70ae9fe714f9d9599fd6e_True", - "model": { - "name": "byroneverson/Mistral-Small-Instruct-2409-abliterated", - "sha": "5e24aaef2a37f9cb69f70ae9fe714f9d9599fd6e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.805845718641834, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6970759806203096, - "normalized_score": 69.70759806203097 - }, - "bbh": { - "name": "BBH", - "value": 0.5237864400325174, - "normalized_score": 31.255700427788586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24773413897280966, - "normalized_score": 24.773413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.36971875000000004, - "normalized_score": 3.5481770833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39228723404255317, - "normalized_score": 32.476359338061464 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-10-13", - "generation": 1, - "base_model": "mistralai/Mistral-Small-Instruct-2409", - "hub_license": "other", - "hub_hearts": 13, - "params_billions": 22.247, - "co2_cost": 2.804572923392441 - } - }, - { - "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated_bfloat16_84a6eaa723633bbefc7cfac9c64bf0e0a4d39065_True", - "model": { - "name": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", - "sha": "84a6eaa723633bbefc7cfac9c64bf0e0a4d39065", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.948135419447976, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5528453392553979, - "normalized_score": 55.28453392553979 - }, - "bbh": { - "name": "BBH", - "value": 0.5282050829986801, - "normalized_score": 32.843258967002555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4734375, - "normalized_score": 19.679687499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38231382978723405, - "normalized_score": 31.36820330969267 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-03", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "01-ai/Yi-1.5-9B-Chat-16K", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 8.829, - "co2_cost": 2.180206419702791 - } - }, - { - "id": "byroneverson/Yi-1.5-9B-Chat-abliterated_bfloat16_4e26c200cdf2dc50dd50cdd9fe5b74887e9fa94a_True", - "model": { - "name": "byroneverson/Yi-1.5-9B-Chat-abliterated", - "sha": "4e26c200cdf2dc50dd50cdd9fe5b74887e9fa94a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.270006043497983, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5723291976400395, - "normalized_score": 57.23291976400395 - }, - "bbh": { - "name": "BBH", - "value": 0.5401219363002313, - "normalized_score": 34.35218727198406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1661631419939577, - "normalized_score": 16.61631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.43886458333333334, - "normalized_score": 13.658072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3715093085106383, - "normalized_score": 30.167700945626475 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "01-ai/Yi-1.5-9B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.829, - "co2_cost": 1.689144676280076 - } - }, - { - "id": "c10x/Q-Pluse_bfloat16__True", - "model": { - "name": "c10x/Q-Pluse", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.634370763516103, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11228318638988993, - "normalized_score": 11.228318638988993 - }, - "bbh": { - "name": "BBH", - "value": 0.2875111436321769, - "normalized_score": 1.9479450969539605 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.39381249999999995, - "normalized_score": 7.126562500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11353058510638298, - "normalized_score": 1.5033983451536632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.6233823800199807 - } - }, - { - "id": "c10x/longthinker_bfloat16_e1bb4a2c2782ab52be7a8fa2e5905f08b7cfd464_True", - "model": { - "name": "c10x/longthinker", - "sha": "e1bb4a2c2782ab52be7a8fa2e5905f08b7cfd464", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.73088769949013, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36087913403103766, - "normalized_score": 36.08791340310377 - }, - "bbh": { - "name": "BBH", - "value": 0.49274888053364546, - "normalized_score": 28.4247368611983 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23187311178247735, - "normalized_score": 23.187311178247736 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3909583333333333, - "normalized_score": 6.703125000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3527260638297872, - "normalized_score": 28.08067375886525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8895488764718014 - } - }, - { - "id": "carsenk/flippa-v6_float16_5206a32e0bd3067aef1ce90f5528ade7d866253f_False", - "model": { - "name": "carsenk/flippa-v6", - "sha": "5206a32e0bd3067aef1ce90f5528ade7d866253f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 20.776367026140395, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3439429602344003, - "normalized_score": 34.39429602344003 - }, - "bbh": { - "name": "BBH", - "value": 0.5046972457053399, - "normalized_score": 29.993501279427136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1404833836858006, - "normalized_score": 14.04833836858006 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.40887500000000004, - "normalized_score": 10.876041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3667719414893617, - "normalized_score": 29.6413268321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-08-24", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 16.061, - "co2_cost": 2.129600623223738 - } - }, - { - "id": "carsenk/phi3.5_mini_exp_825_uncensored_bfloat16_6b208dc3df02e0d5ef0c3fe5899f9f31618f2e94_True", - "model": { - "name": "carsenk/phi3.5_mini_exp_825_uncensored", - "sha": "6b208dc3df02e0d5ef0c3fe5899f9f31618f2e94", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.6431087408611895, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13641360479084386, - "normalized_score": 13.641360479084385 - }, - "bbh": { - "name": "BBH", - "value": 0.29647345147918264, - "normalized_score": 1.827812730226084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36441666666666667, - "normalized_score": 3.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11751994680851063, - "normalized_score": 1.9466607565011809 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-29", - "submission_date": "2024-08-29", - "generation": 2, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 3.821, - "co2_cost": 0.975636967913453 - } - }, - { - "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1_bfloat16_b29a3a5cef93ee044e2297fcb40bd2976415e900_True", - "model": { - "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1", - "sha": "b29a3a5cef93ee044e2297fcb40bd2976415e900", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.93864090281541, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30147674836101546, - "normalized_score": 30.147674836101544 - }, - "bbh": { - "name": "BBH", - "value": 0.5971867698707507, - "normalized_score": 41.67630770023723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.39266666666666666, - "normalized_score": 7.1499999999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38538896276595747, - "normalized_score": 31.709884751773053 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 5.536078704562463 - } - }, - { - "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1_bfloat16_c2d7b76786151aecfa5972a2a3e937feb2d2c48b_True", - "model": { - "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", - "sha": "c2d7b76786151aecfa5972a2a3e937feb2d2c48b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.52546311795015, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2941827683878775, - "normalized_score": 29.418276838787747 - }, - "bbh": { - "name": "BBH", - "value": 0.5939369622672414, - "normalized_score": 41.10464026733791 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.39257291666666666, - "normalized_score": 6.904947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37998670212765956, - "normalized_score": 31.109633569739948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-08-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 5.575624815994469 - } - }, - { - "id": "cckm/tinymistral_950m_bfloat16_b8ac79e9904405e6cc793101c098561a47b2d0d7_False", - "model": { - "name": "cckm/tinymistral_950m", - "sha": "b8ac79e9904405e6cc793101c098561a47b2d0d7", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.219822920055284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23952889444451833, - "normalized_score": 23.952889444451834 - }, - "bbh": { - "name": "BBH", - "value": 0.29694562621388126, - "normalized_score": 2.371788472964944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3553645833333334, - "normalized_score": 2.053906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10962433510638298, - "normalized_score": 1.0693705673758855 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "cckm/tinymistral_950m (Merge)", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 0.955, - "co2_cost": 0.7365668642980026 - } - }, - { - "id": "cgato/TheSalt-L3-8b-v0.3.2_bfloat16_5cf08e2bf9590ebcd14ba021e113def28c65afa2_True", - "model": { - "name": "cgato/TheSalt-L3-8b-v0.3.2", - "sha": "5cf08e2bf9590ebcd14ba021e113def28c65afa2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.39988938907955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27050337548814923, - "normalized_score": 27.050337548814923 - }, - "bbh": { - "name": "BBH", - "value": 0.29679653176003074, - "normalized_score": 2.612714473502145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.38962499999999994, - "normalized_score": 6.3031250000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11394614361702128, - "normalized_score": 1.549571513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "cgato/TheSalt-L3-8b-v0.3.2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.880588171001616 - } - }, - { - "id": "chargoddard/prometheus-2-llama-3-8b_bfloat16_90a728ac98e5b4169f88ae4945e357cf45477568_True", - "model": { - "name": "chargoddard/prometheus-2-llama-3-8b", - "sha": "90a728ac98e5b4169f88ae4945e357cf45477568", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.318861828334345, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5288900118352637, - "normalized_score": 52.889001183526375 - }, - "bbh": { - "name": "BBH", - "value": 0.4931144581470071, - "normalized_score": 27.80383919259717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.33958333333333335, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30867686170212766, - "normalized_score": 23.186317966903072 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "chargoddard/prometheus-2-llama-3-8b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.8902292652132209 - } - }, - { - "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO_bfloat16_3fcaa9fe99691659eb197487e9a343f601bf63f2_True", - "model": { - "name": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", - "sha": "3fcaa9fe99691659eb197487e9a343f601bf63f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.054922214082467, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6433707008515184, - "normalized_score": 64.33707008515184 - }, - "bbh": { - "name": "BBH", - "value": 0.4764515968840137, - "normalized_score": 25.86828225174116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3920104166666667, - "normalized_score": 9.501302083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.340093085106383, - "normalized_score": 26.677009456264773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", - "hub_license": "llama3", - "hub_hearts": 16, - "params_billions": 8.03, - "co2_cost": 1.4409627658318653 - } - }, - { - "id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO_bfloat16_d3e8342a63e5ae096f450f2467a92168db12768c_True", - "model": { - "name": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", - "sha": "d3e8342a63e5ae096f450f2467a92168db12768c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.617148888995521, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36734863495525205, - "normalized_score": 36.734863495525204 - }, - "bbh": { - "name": "BBH", - "value": 0.3882191262277366, - "normalized_score": 13.678635807519433 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.40553124999999995, - "normalized_score": 8.658072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2551529255319149, - "normalized_score": 17.239213947990542 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.018067970306855 - } - }, - { - "id": "cjvt/GaMS-1B_float16_1620a336e3317ba3fa56586995e46ea9fbadd407_False", - "model": { - "name": "cjvt/GaMS-1B", - "sha": "1620a336e3317ba3fa56586995e46ea9fbadd407", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "OPTForCausalLM", - "average_score": 4.6217597962340875, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.163541625110263, - "normalized_score": 16.3541625110263 - }, - "bbh": { - "name": "BBH", - "value": 0.3074752552734472, - "normalized_score": 3.861742268465667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.36841666666666667, - "normalized_score": 3.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11486037234042554, - "normalized_score": 1.6511524822695034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2025-02-13", - "generation": 0, - "base_model": "cjvt/GaMS-1B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.54, - "co2_cost": 0.5078824355043933 - } - }, - { - "id": "cloudyu/Llama-3-70Bx2-MOE_bfloat16_b8bd85e8db8e4ec352b93441c92e0ae1334bf5a7_False", - "model": { - "name": "cloudyu/Llama-3-70Bx2-MOE", - "sha": "b8bd85e8db8e4ec352b93441c92e0ae1334bf5a7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 35.66646489034437, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5482486469234964, - "normalized_score": 54.824864692349635 - }, - "bbh": { - "name": "BBH", - "value": 0.6636234572270707, - "normalized_score": 51.42213772529595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2175226586102719, - "normalized_score": 21.75226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.48118750000000005, - "normalized_score": 20.848437499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5142121010638298, - "normalized_score": 46.02356678486997 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-20", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "cloudyu/Llama-3-70Bx2-MOE", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 126.926, - "co2_cost": 43.079109603886145 - } - }, - { - "id": "cloudyu/Llama-3.2-3Bx4_float16_d0d893eb5937ba4c3dd4f58471d5ac64334e6ff6_False", - "model": { - "name": "cloudyu/Llama-3.2-3Bx4", - "sha": "d0d893eb5937ba4c3dd4f58471d5ac64334e6ff6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 18.997310704369742, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5068584688626179, - "normalized_score": 50.685846886261785 - }, - "bbh": { - "name": "BBH", - "value": 0.43321946547659324, - "normalized_score": 19.7933281387424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3495625, - "normalized_score": 7.028645833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29853723404255317, - "normalized_score": 22.059692671394796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.949, - "co2_cost": 2.534152258686576 - } - }, - { - "id": "cloudyu/Mixtral_11Bx2_MoE_19B_float16_39edb16515e431617f7ce69f9b4166b40f97f34b_False", - "model": { - "name": "cloudyu/Mixtral_11Bx2_MoE_19B", - "sha": "39edb16515e431617f7ce69f9b4166b40f97f34b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.407079261242234, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3850837998732253, - "normalized_score": 38.50837998732253 - }, - "bbh": { - "name": "BBH", - "value": 0.5208516020145867, - "normalized_score": 32.78564048528819 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4296875, - "normalized_score": 13.377604166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33111702127659576, - "normalized_score": 25.67966903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-31", - "submission_date": "2025-02-16", - "generation": 0, - "base_model": "cloudyu/Mixtral_11Bx2_MoE_19B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 37, - "params_billions": 19.188, - "co2_cost": 1.1164466475132806 - } - }, - { - "id": "cloudyu/Mixtral_34Bx2_MoE_60B_bfloat16_d01642769ccc782e1db1fc26cb25097aecb98e23_False", - "model": { - "name": "cloudyu/Mixtral_34Bx2_MoE_60B", - "sha": "d01642769ccc782e1db1fc26cb25097aecb98e23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 27.611169192851037, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4537770892343427, - "normalized_score": 45.37770892343427 - }, - "bbh": { - "name": "BBH", - "value": 0.5869701263465353, - "normalized_score": 41.209129053590964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4625208333333333, - "normalized_score": 17.78177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47664561170212766, - "normalized_score": 41.84951241134751 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-05", - "submission_date": "2024-08-22", - "generation": 0, - "base_model": "cloudyu/Mixtral_34Bx2_MoE_60B", - "hub_license": "apache-2.0", - "hub_hearts": 112, - "params_billions": 60.814, - "co2_cost": 14.665176960225859 - } - }, - { - "id": "cloudyu/Mixtral_7Bx2_MoE_bfloat16_5b7b6efb5110eccbcc752f92413eea22bacdd1c2_False", - "model": { - "name": "cloudyu/Mixtral_7Bx2_MoE", - "sha": "5b7b6efb5110eccbcc752f92413eea22bacdd1c2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 21.447315990602657, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4480068440626427, - "normalized_score": 44.80068440626427 - }, - "bbh": { - "name": "BBH", - "value": 0.5159732691655027, - "normalized_score": 32.2766407313606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.44729166666666664, - "normalized_score": 14.644791666666661 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30435505319148937, - "normalized_score": 22.706117021276594 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-22", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "cloudyu/Mixtral_7Bx2_MoE", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 36, - "params_billions": 12.879, - "co2_cost": 1.5676300529098275 - } - }, - { - "id": "cloudyu/S1-Llama-3.2-3Bx4-MoE_float16_a6af7ede4c291fc91dd54ff73fe64df840288367_False", - "model": { - "name": "cloudyu/S1-Llama-3.2-3Bx4-MoE", - "sha": "a6af7ede4c291fc91dd54ff73fe64df840288367", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.960751542556377, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.530214275899059, - "normalized_score": 53.0214275899059 - }, - "bbh": { - "name": "BBH", - "value": 0.43578925882973, - "normalized_score": 20.04155472353695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.345625, - "normalized_score": 6.169791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30435505319148937, - "normalized_score": 22.706117021276594 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "cloudyu/S1-Llama-3.2-3Bx4-MoE", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 9.555, - "co2_cost": 2.7112059316507433 - } - }, - { - "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO_bfloat16_5c2d31042229ee06246064100b781dd926cb0ffd_True", - "model": { - "name": "cloudyu/Yi-34Bx2-MoE-60B-DPO", - "sha": "5c2d31042229ee06246064100b781dd926cb0ffd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 26.04350240565636, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.531887613753729, - "normalized_score": 53.1887613753729 - }, - "bbh": { - "name": "BBH", - "value": 0.516831447641953, - "normalized_score": 31.259298004231464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.43746875, - "normalized_score": 14.316927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46766954787234044, - "normalized_score": 40.8521719858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-23", - "submission_date": "2024-08-06", - "generation": 0, - "base_model": "cloudyu/Yi-34Bx2-MoE-60B-DPO", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 60.814, - "co2_cost": 14.678493278722105 - } - }, - { - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo_bfloat16_eb04613997875935cb667a517e518874bb716169_False", - "model": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", - "sha": "eb04613997875935cb667a517e518874bb716169", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.051762435192133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1326668794354535, - "normalized_score": 13.26668794354535 - }, - "bbh": { - "name": "BBH", - "value": 0.3800219303191354, - "normalized_score": 12.669478223003603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.43321875, - "normalized_score": 12.419010416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2590591755319149, - "normalized_score": 17.673241725768317 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.439661779649258 - } - }, - { - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid_bfloat16_2c8b52e8db11a6ff57cccf890ee26688e858f9fb_False", - "model": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", - "sha": "2c8b52e8db11a6ff57cccf890ee26688e858f9fb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.070699081571052, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13184240038652995, - "normalized_score": 13.184240038652996 - }, - "bbh": { - "name": "BBH", - "value": 0.37889016032903705, - "normalized_score": 12.757325206130604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.43055208333333334, - "normalized_score": 12.019010416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2562333776595745, - "normalized_score": 17.359264184397162 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4471203294708115 - } - }, - { - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc_bfloat16_a003a227aed5c1ad67cd4a653b13a0dd7acb7ed5_False", - "model": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", - "sha": "a003a227aed5c1ad67cd4a653b13a0dd7acb7ed5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.681788175038944, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12085156274241235, - "normalized_score": 12.085156274241236 - }, - "bbh": { - "name": "BBH", - "value": 0.3780811415223316, - "normalized_score": 12.69457911779628 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.43185416666666665, - "normalized_score": 12.048437499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25556848404255317, - "normalized_score": 17.28538711583924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4830543361353572 - } - }, - { - "id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B_float16_0bf45a981ba100596ee0c3e7d27e7849b0206632_True", - "model": { - "name": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", - "sha": "0bf45a981ba100596ee0c3e7d27e7849b0206632", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.26984394173985, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7621222799948582, - "normalized_score": 76.2122279994858 - }, - "bbh": { - "name": "BBH", - "value": 0.4916366353921198, - "normalized_score": 27.6317028058839 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.36534375, - "normalized_score": 8.967968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2992021276595745, - "normalized_score": 22.13356973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "cognitivecomputations/Dolphin3.0-Llama3.1-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 155, - "params_billions": 8.03, - "co2_cost": 1.2386505265987682 - } - }, - { - "id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B_bfloat16_100ed6fbb590630622f795adc792f38df1c5b2f7_True", - "model": { - "name": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", - "sha": "100ed6fbb590630622f795adc792f38df1c5b2f7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.140988411587436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5427787160290252, - "normalized_score": 54.27787160290252 - }, - "bbh": { - "name": "BBH", - "value": 0.31222474255909144, - "normalized_score": 4.657279069612183 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22986577181208054, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32488541666666665, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13754986702127658, - "normalized_score": 4.172207446808509 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "cognitivecomputations/Dolphin3.0-Llama3.2-1B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 23, - "params_billions": 1.236, - "co2_cost": 2.008096300085214 - } - }, - { - "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B_bfloat16_7111aeb3bedf7414baaf1532e84d470519c667f2_True", - "model": { - "name": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", - "sha": "7111aeb3bedf7414baaf1532e84d470519c667f2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.626273337296174, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4697136930012367, - "normalized_score": 46.97136930012367 - }, - "bbh": { - "name": "BBH", - "value": 0.31142229157184026, - "normalized_score": 5.096928104575164 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2348993288590604, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35545833333333327, - "normalized_score": 1.9656249999999986 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14128989361702127, - "normalized_score": 4.587765957446806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-03-02", - "generation": 1, - "base_model": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 0.494, - "co2_cost": 0.8245174133202131 - } - }, - { - "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B_bfloat16_34368009d6122e9ef796826bc0ca3989a47ea33e_True", - "model": { - "name": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", - "sha": "34368009d6122e9ef796826bc0ca3989a47ea33e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.513141611997387, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.406816136739407, - "normalized_score": 40.6816136739407 - }, - "bbh": { - "name": "BBH", - "value": 0.5359697041031141, - "normalized_score": 33.763678263545195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3119335347432024, - "normalized_score": 31.19335347432024 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3951770833333333, - "normalized_score": 7.230468750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.300531914893617, - "normalized_score": 22.281323877068555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B (Merge)", - "hub_license": "", - "hub_hearts": 165, - "params_billions": 23.572, - "co2_cost": 2.9536720384896045 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9-llama3-8b_bfloat16_5aeb036f9215c558b483a654a8c6e1cc22e841bf_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9-llama3-8b", - "sha": "5aeb036f9215c558b483a654a8c6e1cc22e841bf", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.415461238797384, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38503393218881454, - "normalized_score": 38.503393218881456 - }, - "bbh": { - "name": "BBH", - "value": 0.49499220166609187, - "normalized_score": 27.858929260905125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.43753125, - "normalized_score": 13.791406250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.277094414893617, - "normalized_score": 19.67715721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-20", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "other", - "hub_hearts": 444, - "params_billions": 8.03, - "co2_cost": 1.4782402798649394 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b_bfloat16_31adf616c3c9176d147e0a62e9fedb7bf97678ac_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "sha": "31adf616c3c9176d147e0a62e9fedb7bf97678ac", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.53438611403665, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3760167466765959, - "normalized_score": 37.60167466765959 - }, - "bbh": { - "name": "BBH", - "value": 0.5204919312821467, - "normalized_score": 31.101151872569243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.49756249999999996, - "normalized_score": 23.695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41298204787234044, - "normalized_score": 34.77578309692671 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 42, - "params_billions": 70.554, - "co2_cost": 24.298176139208458 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b_bfloat16_1ec522298a6935c881df6dc29d3669833bd8672d_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", - "sha": "1ec522298a6935c881df6dc29d3669833bd8672d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.3072040459155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3852588908540451, - "normalized_score": 38.52588908540451 - }, - "bbh": { - "name": "BBH", - "value": 0.6076225600626862, - "normalized_score": 44.17408874277273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1865558912386707, - "normalized_score": 18.65558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.45979166666666665, - "normalized_score": 16.97395833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4518783244680851, - "normalized_score": 39.09759160756501 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-18", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "01-ai/Yi-1.5-34B", - "hub_license": "apache-2.0", - "hub_hearts": 35, - "params_billions": 34.389, - "co2_cost": 5.985306700006266 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b_bfloat16_91f0a521e3e2a0675a3549aa5d3f40717068de94_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", - "sha": "91f0a521e3e2a0675a3549aa5d3f40717068de94", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.639724124308234, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44653297694561545, - "normalized_score": 44.653297694561545 - }, - "bbh": { - "name": "BBH", - "value": 0.5484314644603556, - "normalized_score": 35.7760897255686 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4348020833333333, - "normalized_score": 13.516927083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3966921542553192, - "normalized_score": 32.96579491725768 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-18", - "submission_date": "2024-08-02", - "generation": 1, - "base_model": "01-ai/Yi-1.5-9B", - "hub_license": "apache-2.0", - "hub_hearts": 26, - "params_billions": 8.829, - "co2_cost": 2.101731428783443 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium_bfloat16_0470c5b912b51fa6e27d87a8ea7feafacd8cb101_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", - "sha": "0470c5b912b51fa6e27d87a8ea7feafacd8cb101", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.614516488633864, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4247762603226107, - "normalized_score": 42.47762603226107 - }, - "bbh": { - "name": "BBH", - "value": 0.6456739302686527, - "normalized_score": 49.72194030508101 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18277945619335348, - "normalized_score": 18.27794561933535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.4190520833333333, - "normalized_score": 11.414843750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45553523936170215, - "normalized_score": 39.50391548463357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium (Merge)", - "hub_license": "mit", - "hub_hearts": 22, - "params_billions": -1.0, - "co2_cost": 1.6809635853796399 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated_float16_d50be5f22ca9745a2a3175996611d6a840318b7f_False", - "model": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "sha": "d50be5f22ca9745a2a3175996611d6a840318b7f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.590063720348784, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36125369574950017, - "normalized_score": 36.12536957495002 - }, - "bbh": { - "name": "BBH", - "value": 0.612322545411745, - "normalized_score": 45.44126655093765 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.4111770833333333, - "normalized_score": 10.363802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4493849734042553, - "normalized_score": 38.820552600472816 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-03", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated (Merge)", - "hub_license": "mit", - "hub_hearts": 18, - "params_billions": 13.96, - "co2_cost": 0.8439536564991926 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated_bfloat16_d50be5f22ca9745a2a3175996611d6a840318b7f_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "sha": "d50be5f22ca9745a2a3175996611d6a840318b7f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.53887227810586, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4123614232458765, - "normalized_score": 41.23614232458765 - }, - "bbh": { - "name": "BBH", - "value": 0.638289226729353, - "normalized_score": 48.38534691270737 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18202416918429004, - "normalized_score": 18.202416918429005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.43492708333333335, - "normalized_score": 13.732552083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45246010638297873, - "normalized_score": 39.162234042553195 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-03", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated (Merge)", - "hub_license": "mit", - "hub_hearts": 18, - "params_billions": 13.96, - "co2_cost": 1.6415931932373822 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b_bfloat16_e79582577c2bf2af304221af0e8308b7e7d46ca1_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", - "sha": "e79582577c2bf2af304221af0e8308b7e7d46ca1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.97892776605849, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6343778950961227, - "normalized_score": 63.43778950961227 - }, - "bbh": { - "name": "BBH", - "value": 0.6296364939584073, - "normalized_score": 47.69617372826186 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2802114803625378, - "normalized_score": 28.02114803625378 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.45207291666666666, - "normalized_score": 17.042447916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.547124335106383, - "normalized_score": 49.680481678487006 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-27", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "Qwen/Qwen2-72B", - "hub_license": "other", - "hub_hearts": 158, - "params_billions": 72.0, - "co2_cost": 37.74700133322834 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b_bfloat16_c443c4eb5138ed746ac49ed98bf3c183dc5380ac_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", - "sha": "c443c4eb5138ed746ac49ed98bf3c183dc5380ac", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.27208217477453, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3534599307614906, - "normalized_score": 35.34599307614906 - }, - "bbh": { - "name": "BBH", - "value": 0.48938263759195594, - "normalized_score": 27.914874953255538 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.41914583333333333, - "normalized_score": 11.65989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4050864361702128, - "normalized_score": 33.89849290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-24", - "submission_date": "2024-07-10", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 67, - "params_billions": 7.616, - "co2_cost": 2.558394539648611 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k_bfloat16_ff4eee6438194a670a95dff3118b5231eb568610_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", - "sha": "ff4eee6438194a670a95dff3118b5231eb568610", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.098382647973178, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3639266036339136, - "normalized_score": 36.39266036339136 - }, - "bbh": { - "name": "BBH", - "value": 0.6046995537773227, - "normalized_score": 43.40647565235176 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16691842900302115, - "normalized_score": 16.691842900302113 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.43105208333333334, - "normalized_score": 13.348177083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4630152925531915, - "normalized_score": 40.33503250591017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-23", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "01-ai/Yi-1.5-34B-32k", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 34.0, - "co2_cost": 6.490521054223584 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k_bfloat16_4f4273ee8e7930dd64e2c6121c79d12546b883e2_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", - "sha": "4f4273ee8e7930dd64e2c6121c79d12546b883e2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.348695949526363, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4126362495955177, - "normalized_score": 41.26362495955176 - }, - "bbh": { - "name": "BBH", - "value": 0.48125401481062013, - "normalized_score": 26.906353891780515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.4642604166666667, - "normalized_score": 17.932552083333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2820811170212766, - "normalized_score": 20.231235224586285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-25", - "submission_date": "2024-07-04", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 52, - "params_billions": 7.248, - "co2_cost": 1.2001651608405095 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b_bfloat16_7b535c900688fc836fbeebaeb7133910b09bafda_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", - "sha": "7b535c900688fc836fbeebaeb7133910b09bafda", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.9724308416491, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5600894515441251, - "normalized_score": 56.008945154412515 - }, - "bbh": { - "name": "BBH", - "value": 0.5480369183144175, - "normalized_score": 36.08275865915292 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4429895833333333, - "normalized_score": 15.20703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3376828457446808, - "normalized_score": 26.409205082742314 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-23", - "submission_date": "2024-07-26", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 100, - "params_billions": 12.248, - "co2_cost": 2.750284963315723 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b_bfloat16_5c0854beb88a6711221771d1b13d51f733e6ca06_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", - "sha": "5c0854beb88a6711221771d1b13d51f733e6ca06", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 9.835205324051353, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08955127949396491, - "normalized_score": 8.955127949396491 - }, - "bbh": { - "name": "BBH", - "value": 0.40813187411055213, - "normalized_score": 17.3676325443774 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.41796875, - "normalized_score": 10.912760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2105219414893617, - "normalized_score": 12.28021572104019 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-08-25", - "generation": 1, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 36, - "params_billions": 2.614, - "co2_cost": 3.022496022461491 - } - }, - { - "id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b_bfloat16_7b73d1b7760bf9abac168de3d388b30d1ca1a138_True", - "model": { - "name": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", - "sha": "7b73d1b7760bf9abac168de3d388b30d1ca1a138", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.1318611826224165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27572396796056686, - "normalized_score": 27.572396796056683 - }, - "bbh": { - "name": "BBH", - "value": 0.35236263850832567, - "normalized_score": 8.972088688921525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3236145833333333, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12367021276595745, - "normalized_score": 2.6300236406619386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-04", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 96, - "params_billions": 8.03, - "co2_cost": 2.637555731042925 - } - }, - { - "id": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2_bfloat16_2560556d655d0ecaefec10f579c92292d65fb28b_False", - "model": { - "name": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", - "sha": "2560556d655d0ecaefec10f579c92292d65fb28b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.93904722141172, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.380887157187374, - "normalized_score": 38.08871571873739 - }, - "bbh": { - "name": "BBH", - "value": 0.46480279544898967, - "normalized_score": 23.648503176108246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.3434270833333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3480718085106383, - "normalized_score": 27.56353427895981 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4115782694310812 - } - }, - { - "id": "cpayne1303/cp2024_bfloat16_fb354aaa73c40b4f1fc6e86beea733e4f3929470_False", - "model": { - "name": "cpayne1303/cp2024", - "sha": "fb354aaa73c40b4f1fc6e86beea733e4f3929470", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.702132658945494, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16581448334862608, - "normalized_score": 16.58144833486261 - }, - "bbh": { - "name": "BBH", - "value": 0.29853854089245085, - "normalized_score": 2.7391414141414145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3383125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11012300531914894, - "normalized_score": 1.124778368794326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-11-26", - "generation": 0, - "base_model": "cpayne1303/cp2024", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.031, - "co2_cost": 0.09522611525635165 - } - }, - { - "id": "cpayne1303/cp2024-instruct_bfloat16_ac4cfbc28479f8a94e3eb745526620be9b75edfa_True", - "model": { - "name": "cpayne1303/cp2024-instruct", - "sha": "ac4cfbc28479f8a94e3eb745526620be9b75edfa", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.319731373654743, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17061064641817045, - "normalized_score": 17.061064641817044 - }, - "bbh": { - "name": "BBH", - "value": 0.2946778102988436, - "normalized_score": 2.481300216779669 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3686354166666666, - "normalized_score": 3.1794270833333322 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11668882978723404, - "normalized_score": 1.854314420803781 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "cpayne1303/cp2024", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.031, - "co2_cost": 0.06432379102827343 - } - }, - { - "id": "cpayne1303/llama-43m-beta_bfloat16_1f85bec8c3541ed58fc2fcf4e6f98c1c34d72f60_False", - "model": { - "name": "cpayne1303/llama-43m-beta", - "sha": "1f85bec8c3541ed58fc2fcf4e6f98c1c34d72f60", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.288331692594867, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19156837191983936, - "normalized_score": 19.156837191983936 - }, - "bbh": { - "name": "BBH", - "value": 0.29767781029884355, - "normalized_score": 2.482040957520409 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3871770833333333, - "normalized_score": 6.1638020833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11319813829787234, - "normalized_score": 1.466459810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "JackFram/llama-68m", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.043, - "co2_cost": 0.05839184774341252 - } - }, - { - "id": "cpayne1303/llama-43m-beta_float16_1f85bec8c3541ed58fc2fcf4e6f98c1c34d72f60_False", - "model": { - "name": "cpayne1303/llama-43m-beta", - "sha": "1f85bec8c3541ed58fc2fcf4e6f98c1c34d72f60", - "precision": "float16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.422628758277313, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19489066787235645, - "normalized_score": 19.489066787235643 - }, - "bbh": { - "name": "BBH", - "value": 0.29646319842669744, - "normalized_score": 2.496047806835478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3885416666666666, - "normalized_score": 6.401041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-12-04", - "generation": 1, - "base_model": "JackFram/llama-68m", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.043, - "co2_cost": 0.11983175082454595 - } - }, - { - "id": "cpayne1303/smallcp2024_bfloat16_ef995127242553e4126190e7f70f927504834360_False", - "model": { - "name": "cpayne1303/smallcp2024", - "sha": "ef995127242553e4126190e7f70f927504834360", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.543848434170488, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1581958093414363, - "normalized_score": 15.819580934143628 - }, - "bbh": { - "name": "BBH", - "value": 0.3027047714604053, - "normalized_score": 3.1181775588611322 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23070469798657717, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34246874999999993, - "normalized_score": 0.5333333333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11136968085106383, - "normalized_score": 1.2632978723404247 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 0, - "base_model": "cpayne1303/smallcp2024", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.002, - "co2_cost": 0.09461588977186879 - } - }, - { - "id": "crestf411/MN-Slush_bfloat16_46a0cd7e9355f232bdfe9d21a55b944319e23206_False", - "model": { - "name": "crestf411/MN-Slush", - "sha": "46a0cd7e9355f232bdfe9d21a55b944319e23206", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.136982895446007, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4077148632295642, - "normalized_score": 40.77148632295642 - }, - "bbh": { - "name": "BBH", - "value": 0.5340014235282594, - "normalized_score": 33.156422079931886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.39328125, - "normalized_score": 8.493489583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3508144946808511, - "normalized_score": 27.868277186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "crestf411/MN-Slush (Merge)", - "hub_license": "", - "hub_hearts": 22, - "params_billions": 12.248, - "co2_cost": 2.1242181967056797 - } - }, - { - "id": "cstr/llama3.1-8b-spaetzle-v90_bfloat16_717e5c3d31ed2465cd7cf927327adf677a9420b5_True", - "model": { - "name": "cstr/llama3.1-8b-spaetzle-v90", - "sha": "717e5c3d31ed2465cd7cf927327adf677a9420b5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.855367438578327, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7356192679867197, - "normalized_score": 73.56192679867198 - }, - "bbh": { - "name": "BBH", - "value": 0.5302860633332208, - "normalized_score": 32.76366579584106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14954682779456194, - "normalized_score": 14.954682779456194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.41343749999999996, - "normalized_score": 11.146354166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37308843085106386, - "normalized_score": 30.343158983451534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "cstr/llama3.1-8b-spaetzle-v90 (Merge)", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.5578157861662771 - } - }, - { - "id": "cyberagent/calm3-22b-chat_bfloat16_055922aa0f0fb1fbfbc97a2e31134532485ee99b_True", - "model": { - "name": "cyberagent/calm3-22b-chat", - "sha": "055922aa0f0fb1fbfbc97a2e31134532485ee99b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.451118364125847, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.509131327100981, - "normalized_score": 50.9131327100981 - }, - "bbh": { - "name": "BBH", - "value": 0.4991683247746046, - "normalized_score": 29.52088396885831 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.45532291666666663, - "normalized_score": 16.082031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29496343085106386, - "normalized_score": 21.66260342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-01", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "cyberagent/calm3-22b-chat", - "hub_license": "apache-2.0", - "hub_hearts": 75, - "params_billions": 22.543, - "co2_cost": 3.548496236264388 - } - }, - { - "id": "darkc0de/BuddyGlassNeverSleeps_float16_f8849498f02c94b68ef0308a7bf6637264949a7d_False", - "model": { - "name": "darkc0de/BuddyGlassNeverSleeps", - "sha": "f8849498f02c94b68ef0308a7bf6637264949a7d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.82064159432298, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4239019135892764, - "normalized_score": 42.390191358927645 - }, - "bbh": { - "name": "BBH", - "value": 0.49772281653646816, - "normalized_score": 28.477953494418696 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3992708333333333, - "normalized_score": 8.608854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34524601063829785, - "normalized_score": 27.249556737588648 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-16", - "generation": 1, - "base_model": "darkc0de/BuddyGlassNeverSleeps (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.708297548316883 - } - }, - { - "id": "darkc0de/BuddyGlassUncensored2025.2_float16_e5d8aedaee374cc87d985cd76818f61f529b4476_True", - "model": { - "name": "darkc0de/BuddyGlassUncensored2025.2", - "sha": "e5d8aedaee374cc87d985cd76818f61f529b4476", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.625792878130945, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7731131176389756, - "normalized_score": 77.31131176389758 - }, - "bbh": { - "name": "BBH", - "value": 0.6095411371819216, - "normalized_score": 43.57124516402332 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24018126888217523, - "normalized_score": 24.018126888217523 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.4070833333333333, - "normalized_score": 9.385416666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43359375, - "normalized_score": 37.06597222222223 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-15", - "generation": 3, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 10.306, - "co2_cost": 1.790304478851306 - } - }, - { - "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp_bfloat16_57367fefe01c7d9653c303b28449b416fc777d93_False", - "model": { - "name": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", - "sha": "57367fefe01c7d9653c303b28449b416fc777d93", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.32825544058106, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43584245357872664, - "normalized_score": 43.58424535787267 - }, - "bbh": { - "name": "BBH", - "value": 0.5243087998656722, - "normalized_score": 31.869311081858005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4143333333333334, - "normalized_score": 9.491666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36727061170212766, - "normalized_score": 29.696734633569736 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 0.007, - "co2_cost": 1.7963640925218531 - } - }, - { - "id": "databricks/dbrx-base_float16_d7d18d833146403dd74c2620b8434639ae123d6e_False", - "model": { - "name": "databricks/dbrx-base", - "sha": "d7d18d833146403dd74c2620b8434639ae123d6e", - "precision": "float16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 16.35943247356884, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08214723926380368, - "normalized_score": 8.214723926380369 - }, - "bbh": { - "name": "BBH", - "value": 0.5195833333333334, - "normalized_score": 32.60853758169935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1, - "normalized_score": 10.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32666666666666666, - "normalized_score": 10.222222222222221 - }, - "musr": { - "name": "MUSR", - "value": 0.4066666666666667, - "normalized_score": 9.333333333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35, - "normalized_score": 27.777777777777775 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-26", - "submission_date": "", - "generation": 0, - "base_model": "databricks/dbrx-base", - "hub_license": "other", - "hub_hearts": 556, - "params_billions": 0.0, - "co2_cost": 10.453409663307854 - } - }, - { - "id": "databricks/dbrx-instruct_bfloat16_c0a9245908c187da8f43a81e538e67ff360904ea_True", - "model": { - "name": "databricks/dbrx-instruct", - "sha": "c0a9245908c187da8f43a81e538e67ff360904ea", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "DbrxForCausalLM", - "average_score": 25.19901027244322, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5415796752616391, - "normalized_score": 54.157967526163915 - }, - "bbh": { - "name": "BBH", - "value": 0.5428960796934387, - "normalized_score": 35.96381960359357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.42692708333333335, - "normalized_score": 12.19921875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36826795212765956, - "normalized_score": 29.807550236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-26", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "databricks/dbrx-instruct", - "hub_license": "other", - "hub_hearts": 1111, - "params_billions": 131.597, - "co2_cost": 47.958027273119946 - } - }, - { - "id": "databricks/dolly-v1-6b_bfloat16_c9a85b3a322b402e20c839c702c725afe0cb454d_False", - "model": { - "name": "databricks/dolly-v1-6b", - "sha": "c9a85b3a322b402e20c839c702c725afe0cb454d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTJForCausalLM", - "average_score": 6.981231710564127, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22244311759464885, - "normalized_score": 22.244311759464885 - }, - "bbh": { - "name": "BBH", - "value": 0.3172089528774696, - "normalized_score": 4.7813091701327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.40041666666666664, - "normalized_score": 8.118750000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12657912234042554, - "normalized_score": 2.953235815602837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-03-23", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "databricks/dolly-v1-6b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 310, - "params_billions": 6.0, - "co2_cost": 1.3215598245524656 - } - }, - { - "id": "databricks/dolly-v2-12b_bfloat16_19308160448536e378e3db21a73a751579ee7fdd_False", - "model": { - "name": "databricks/dolly-v2-12b", - "sha": "19308160448536e378e3db21a73a751579ee7fdd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 6.3704357034963754, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23550734273948679, - "normalized_score": 23.550734273948677 - }, - "bbh": { - "name": "BBH", - "value": 0.33199731673771277, - "normalized_score": 6.377894137452961 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37390625000000005, - "normalized_score": 5.504947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11286569148936171, - "normalized_score": 1.4295212765957446 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-04-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "databricks/dolly-v2-12b", - "hub_license": "mit", - "hub_hearts": 1955, - "params_billions": 12.0, - "co2_cost": 2.794238924559327 - } - }, - { - "id": "databricks/dolly-v2-3b_bfloat16_f6c9be08f16fe4d3a719bee0a4a7c7415b5c65df_False", - "model": { - "name": "databricks/dolly-v2-3b", - "sha": "f6c9be08f16fe4d3a719bee0a4a7c7415b5c65df", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.59965824307081, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22471597583301195, - "normalized_score": 22.471597583301197 - }, - "bbh": { - "name": "BBH", - "value": 0.30792785961544844, - "normalized_score": 3.3247689565453875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.33378125, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11452792553191489, - "normalized_score": 1.6142139479905429 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-04-13", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "databricks/dolly-v2-3b", - "hub_license": "mit", - "hub_hearts": 287, - "params_billions": 3.0, - "co2_cost": 1.5161689681554578 - } - }, - { - "id": "databricks/dolly-v2-7b_bfloat16_d632f0c8b75b1ae5b26b250d25bfba4e99cb7c6f_False", - "model": { - "name": "databricks/dolly-v2-7b", - "sha": "d632f0c8b75b1ae5b26b250d25bfba4e99cb7c6f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.647360474812998, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2009856070781083, - "normalized_score": 20.098560707810833 - }, - "bbh": { - "name": "BBH", - "value": 0.31730628122070326, - "normalized_score": 5.449892512817211 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.35530208333333335, - "normalized_score": 2.779427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1149434840425532, - "normalized_score": 1.6603871158392434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-04-13", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "databricks/dolly-v2-7b", - "hub_license": "mit", - "hub_hearts": 149, - "params_billions": 7.0, - "co2_cost": 1.6604119870933625 - } - }, - { - "id": "davidkim205/Rhea-72b-v0.5_bfloat16_bc3806efb23d2713e6630a748d9747fd76b27169_False", - "model": { - "name": "davidkim205/Rhea-72b-v0.5", - "sha": "bc3806efb23d2713e6630a748d9747fd76b27169", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.99895584588256, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.014538092261865185, - "normalized_score": 1.4538092261865185 - }, - "bbh": { - "name": "BBH", - "value": 0.30783395929068597, - "normalized_score": 3.6707473002836024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17371601208459214, - "normalized_score": 17.371601208459214 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.42413541666666665, - "normalized_score": 11.316927083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-22", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "davidkim205/Rhea-72b-v0.5", - "hub_license": "apache-2.0", - "hub_hearts": 135, - "params_billions": 72.0, - "co2_cost": 17.377381849100434 - } - }, - { - "id": "davidkim205/nox-solar-10.7b-v4_bfloat16_5f4be6cb7d8398b84689148d15f3838f2e01e104_True", - "model": { - "name": "davidkim205/nox-solar-10.7b-v4", - "sha": "5f4be6cb7d8398b84689148d15f3838f2e01e104", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.514321082123697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3753418706809044, - "normalized_score": 37.534187068090446 - }, - "bbh": { - "name": "BBH", - "value": 0.4814038018918371, - "normalized_score": 26.631088145699618 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42984375, - "normalized_score": 12.563802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3332779255319149, - "normalized_score": 25.919769503546096 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-16", - "submission_date": "2024-10-04", - "generation": 0, - "base_model": "davidkim205/nox-solar-10.7b-v4", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 10.732, - "co2_cost": 1.6979525118902599 - } - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B_bfloat16_07a264a567ba0863a4ab34fdb3c2b8a54e0bb494_True", - "model": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "sha": "07a264a567ba0863a4ab34fdb3c2b8a54e0bb494", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.809426360756188, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43359397509718656, - "normalized_score": 43.35939750971866 - }, - "bbh": { - "name": "BBH", - "value": 0.5634962649702303, - "normalized_score": 35.81986234433108 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3074018126888218, - "normalized_score": 30.74018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.43421875000000004, - "normalized_score": 13.277343749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4748171542553192, - "normalized_score": 41.64635047281324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "hub_license": "mit", - "hub_hearts": 636, - "params_billions": 70.554, - "co2_cost": 118.54646560023332 - } - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B_bfloat16_2f96d315ae1d52352452b3c13d12cdd781d762f0_True", - "model": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "sha": "2f96d315ae1d52352452b3c13d12cdd781d762f0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.059950104920146, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37823973723054827, - "normalized_score": 37.82397372305483 - }, - "bbh": { - "name": "BBH", - "value": 0.323935108539057, - "normalized_score": 5.325247153240706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.32497916666666665, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20894281914893617, - "normalized_score": 12.10475768321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "hub_license": "mit", - "hub_hearts": 659, - "params_billions": 8.03, - "co2_cost": 1.4798056329536695 - } - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B_bfloat16_80da49efd7aed5a338dfa7f23f75a9311f0dec20_True", - "model": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "sha": "80da49efd7aed5a338dfa7f23f75a9311f0dec20", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.351036796154286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34634104176917246, - "normalized_score": 34.63410417691725 - }, - "bbh": { - "name": "BBH", - "value": 0.32409879947333436, - "normalized_score": 4.729119207646243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1691842900302115, - "normalized_score": 16.91842900302115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.36345833333333327, - "normalized_score": 2.9656249999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11868351063829788, - "normalized_score": 2.0759456264775418 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "hub_license": "mit", - "hub_hearts": 1060, - "params_billions": 1.777, - "co2_cost": 1.2364111786043857 - } - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B_bfloat16_c79f47acaf303faabb7133b4b7b76f24231f2c8d_False", - "model": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "sha": "c79f47acaf303faabb7133b4b7b76f24231f2c8d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.22146462032291, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43816517950150047, - "normalized_score": 43.81651795015004 - }, - "bbh": { - "name": "BBH", - "value": 0.5905573130283358, - "normalized_score": 40.69076685552542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5702416918429003, - "normalized_score": 57.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.536625, - "normalized_score": 28.711458333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4666722074468085, - "normalized_score": 40.74135638297872 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "hub_license": "mit", - "hub_hearts": 470, - "params_billions": 14.77, - "co2_cost": 3.9921899783219374 - } - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_bfloat16_4569fd730224ec487752bd4954399c6e18bf3aa6_True", - "model": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "sha": "4569fd730224ec487752bd4954399c6e18bf3aa6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.96226839270608, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4186314534324481, - "normalized_score": 41.86314534324481 - }, - "bbh": { - "name": "BBH", - "value": 0.41969150892898055, - "normalized_score": 17.149673765590364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4526041666666667, - "normalized_score": 16.1421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46866688829787234, - "normalized_score": 40.962987588652474 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "hub_license": "mit", - "hub_hearts": 1279, - "params_billions": 32.764, - "co2_cost": 47.27590504446936 - } - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B_bfloat16_008b8c2e0b59dac9b7619d58a5ad609f43a5b6b1_True", - "model": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "sha": "008b8c2e0b59dac9b7619d58a5ad609f43a5b6b1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.99492256865316, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40376866713653103, - "normalized_score": 40.3768667136531 - }, - "bbh": { - "name": "BBH", - "value": 0.34425676981862185, - "normalized_score": 7.882702983365756 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19561933534743203, - "normalized_score": 19.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.36628124999999995, - "normalized_score": 3.5518229166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2321309840425532, - "normalized_score": 14.681220449172578 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 0, - "base_model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "hub_license": "mit", - "hub_hearts": 559, - "params_billions": 7.616, - "co2_cost": 1.3699315191987256 - } - }, - { - "id": "deepseek-ai/deepseek-llm-67b-chat_bfloat16_79648bef7658bb824e4630740f6e1484c1b0620b_True", - "model": { - "name": "deepseek-ai/deepseek-llm-67b-chat", - "sha": "79648bef7658bb824e4630740f6e1484c1b0620b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.310631874736753, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5587153197959193, - "normalized_score": 55.87153197959192 - }, - "bbh": { - "name": "BBH", - "value": 0.5243416179742358, - "normalized_score": 33.22524192534525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.5058645833333334, - "normalized_score": 23.93307291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3943650265957447, - "normalized_score": 32.70722517730496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-29", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "deepseek-ai/deepseek-llm-67b-chat", - "hub_license": "other", - "hub_hearts": 198, - "params_billions": 67.0, - "co2_cost": 119.64361733739256 - } - }, - { - "id": "deepseek-ai/deepseek-llm-7b-base_bfloat16_7683fea62db869066ddaff6a41d032262c490d4f_False", - "model": { - "name": "deepseek-ai/deepseek-llm-7b-base", - "sha": "7683fea62db869066ddaff6a41d032262c490d4f", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.227098434870664, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.217871913190335, - "normalized_score": 21.787191319033496 - }, - "bbh": { - "name": "BBH", - "value": 0.35030315829299524, - "normalized_score": 9.76792479590425 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.37378124999999995, - "normalized_score": 3.755989583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18060172872340424, - "normalized_score": 8.955747635933804 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-29", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "deepseek-ai/deepseek-llm-7b-base", - "hub_license": "other", - "hub_hearts": 89, - "params_billions": 7.0, - "co2_cost": 1.6450711788946506 - } - }, - { - "id": "deepseek-ai/deepseek-llm-7b-chat_bfloat16_afbda8b347ec881666061fa67447046fc5164ec8_True", - "model": { - "name": "deepseek-ai/deepseek-llm-7b-chat", - "sha": "afbda8b347ec881666061fa67447046fc5164ec8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.823156850686358, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4170822307034225, - "normalized_score": 41.70822307034224 - }, - "bbh": { - "name": "BBH", - "value": 0.3632079760108669, - "normalized_score": 11.258949371501748 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.46677083333333336, - "normalized_score": 19.21302083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21334773936170212, - "normalized_score": 12.594193262411347 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-29", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "deepseek-ai/deepseek-llm-7b-chat", - "hub_license": "other", - "hub_hearts": 148, - "params_billions": 7.0, - "co2_cost": 1.5489650952313605 - } - }, - { - "id": "deepseek-ai/deepseek-moe-16b-base_bfloat16_521d2bc4fb69a3f3ae565310fcc3b65f97af2580_False", - "model": { - "name": "deepseek-ai/deepseek-moe-16b-base", - "sha": "521d2bc4fb69a3f3ae565310fcc3b65f97af2580", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "DeepseekForCausalLM", - "average_score": 7.466333791660237, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2449744455821664, - "normalized_score": 24.497444558216642 - }, - "bbh": { - "name": "BBH", - "value": 0.3409461055246395, - "normalized_score": 8.355555779389382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.36578125, - "normalized_score": 3.3559895833333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1505152925531915, - "normalized_score": 5.612810283687943 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-08", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "deepseek-ai/deepseek-moe-16b-base", - "hub_license": "other", - "hub_hearts": 108, - "params_billions": 16.376, - "co2_cost": 14.004930861807704 - } - }, - { - "id": "deepseek-ai/deepseek-moe-16b-chat_bfloat16_eefd8ac7e8dc90e095129fe1a537d5e236b2e57c_True", - "model": { - "name": "deepseek-ai/deepseek-moe-16b-chat", - "sha": "eefd8ac7e8dc90e095129fe1a537d5e236b2e57c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "DeepseekForCausalLM", - "average_score": 10.290615224333424, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36629919724109805, - "normalized_score": 36.629919724109804 - }, - "bbh": { - "name": "BBH", - "value": 0.3274953026448241, - "normalized_score": 6.573749026890635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22483221476510068, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38076041666666666, - "normalized_score": 5.261718750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1963929521276596, - "normalized_score": 10.710328014184398 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-09", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "deepseek-ai/deepseek-moe-16b-chat", - "hub_license": "other", - "hub_hearts": 131, - "params_billions": 16.376, - "co2_cost": 9.18695614589026 - } - }, - { - "id": "dfurman/CalmeRys-78B-Orpo-v0.1_bfloat16_7988deb48419c3f56bb24c139c23e5c476ec03f8_True", - "model": { - "name": "dfurman/CalmeRys-78B-Orpo-v0.1", - "sha": "7988deb48419c3f56bb24c139c23e5c476ec03f8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 51.23132307602696, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8163273447785211, - "normalized_score": 81.6327344778521 - }, - "bbh": { - "name": "BBH", - "value": 0.7262282792249927, - "normalized_score": 61.92476379259157 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40634441087613293, - "normalized_score": 40.6344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4001677852348993, - "normalized_score": 20.022371364653242 - }, - "musr": { - "name": "MUSR", - "value": 0.5901770833333333, - "normalized_score": 36.37213541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.7012134308510638, - "normalized_score": 66.80149231678487 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "dfurman/CalmeRys-78B-Orpo-v0.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 72, - "params_billions": 77.965, - "co2_cost": 25.99353475607352 - } - }, - { - "id": "dfurman/Llama-3-70B-Orpo-v0.1_float16_6bf3be5f7f427164c879f7a4ec9ccb6b22aa6631_True", - "model": { - "name": "dfurman/Llama-3-70B-Orpo-v0.1", - "sha": "6bf3be5f7f427164c879f7a4ec9ccb6b22aa6631", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.300061469360834, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20490742341431845, - "normalized_score": 20.490742341431847 - }, - "bbh": { - "name": "BBH", - "value": 0.46552376347015506, - "normalized_score": 24.09381654636037 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1578549848942598, - "normalized_score": 15.785498489425981 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.4534375, - "normalized_score": 16.2796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38929521276595747, - "normalized_score": 32.14391252955083 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-08-30", - "generation": 1, - "base_model": "dfurman/Llama-3-70B-Orpo-v0.1 (Merge)", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 28.880685434073776 - } - }, - { - "id": "dfurman/Llama-3-8B-Orpo-v0.1_float16_f02aef830e12a50892ac065826d5eb3dfc7675d1_True", - "model": { - "name": "dfurman/Llama-3-8B-Orpo-v0.1", - "sha": "f02aef830e12a50892ac065826d5eb3dfc7675d1", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 10.89448042775765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28351773294857646, - "normalized_score": 28.351773294857644 - }, - "bbh": { - "name": "BBH", - "value": 0.3842420919898036, - "normalized_score": 13.68074574746978 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3566354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22980385638297873, - "normalized_score": 14.422650709219859 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-08-30", - "generation": 1, - "base_model": "dfurman/Llama-3-8B-Orpo-v0.1 (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8561587817168912 - } - }, - { - "id": "dfurman/Llama-3-8B-Orpo-v0.1_bfloat16_f02aef830e12a50892ac065826d5eb3dfc7675d1_True", - "model": { - "name": "dfurman/Llama-3-8B-Orpo-v0.1", - "sha": "f02aef830e12a50892ac065826d5eb3dfc7675d1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.076157946218345, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3000039894147528, - "normalized_score": 30.000398941475282 - }, - "bbh": { - "name": "BBH", - "value": 0.3852967582460245, - "normalized_score": 13.773376256003464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.041540785498489434, - "normalized_score": 4.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.357875, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22805851063829788, - "normalized_score": 14.22872340425532 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-08-30", - "generation": 1, - "base_model": "dfurman/Llama-3-8B-Orpo-v0.1 (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.9498608388033825 - } - }, - { - "id": "dfurman/Qwen2-72B-Orpo-v0.1_bfloat16_26c7bbaa728822c60bb47b2808972140653aae4c_True", - "model": { - "name": "dfurman/Qwen2-72B-Orpo-v0.1", - "sha": "26c7bbaa728822c60bb47b2808972140653aae4c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.172299850567384, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7879759039348928, - "normalized_score": 78.79759039348927 - }, - "bbh": { - "name": "BBH", - "value": 0.6969024790545039, - "normalized_score": 57.41436351018751 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40558912386706947, - "normalized_score": 40.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.47842708333333334, - "normalized_score": 20.87005208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5454621010638298, - "normalized_score": 49.49578900709219 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-05", - "submission_date": "2024-08-22", - "generation": 1, - "base_model": "dfurman/Qwen2-72B-Orpo-v0.1 (Merge)", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 72.699, - "co2_cost": 25.25066324234519 - } - }, - { - "id": "dicta-il/dictalm2.0_bfloat16_f8ab3208e95a7b44a9a2fbb9bbbdd8ea11be509d_False", - "model": { - "name": "dicta-il/dictalm2.0", - "sha": "f8ab3208e95a7b44a9a2fbb9bbbdd8ea11be509d", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.895185345587594, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24132745559559746, - "normalized_score": 24.132745559559744 - }, - "bbh": { - "name": "BBH", - "value": 0.4017869112495909, - "normalized_score": 16.48984561578202 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.38196874999999997, - "normalized_score": 5.512760416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2604720744680851, - "normalized_score": 17.830230496453904 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-10", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "dicta-il/dictalm2.0", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 7.251, - "co2_cost": 1.3480768887398813 - } - }, - { - "id": "dicta-il/dictalm2.0-instruct_bfloat16_257c6023d6ac1bfa12110b7b17e7600da7da4e1e_True", - "model": { - "name": "dicta-il/dictalm2.0-instruct", - "sha": "257c6023d6ac1bfa12110b7b17e7600da7da4e1e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.7792214696191, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44121264910437635, - "normalized_score": 44.12126491043764 - }, - "bbh": { - "name": "BBH", - "value": 0.42560784985912875, - "normalized_score": 19.688075851194238 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.39458333333333334, - "normalized_score": 9.722916666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2604720744680851, - "normalized_score": 17.830230496453904 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-14", - "submission_date": "2024-07-31", - "generation": 1, - "base_model": "dicta-il/dictalm2.0", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 7.251, - "co2_cost": 1.2967896079037098 - } - }, - { - "id": "distilbert/distilgpt2_bfloat16_2290a62682d06624634c1f46a6ad5be0f47f38aa_False", - "model": { - "name": "distilbert/distilgpt2", - "sha": "2290a62682d06624634c1f46a6ad5be0f47f38aa", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 4.002273827220365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.06110010328151527, - "normalized_score": 6.110010328151527 - }, - "bbh": { - "name": "BBH", - "value": 0.3037988148650536, - "normalized_score": 2.835219845513963 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.42072916666666665, - "normalized_score": 11.1578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11868351063829788, - "normalized_score": 2.0759456264775418 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "distilbert/distilgpt2", - "hub_license": "apache-2.0", - "hub_hearts": 501, - "params_billions": 0.088, - "co2_cost": 0.24616308082824434 - } - }, - { - "id": "divyanshukunwar/SASTRI_1_9B_float16_3afeb5b296b1d6489401105e2ea6fc5c00d09c07_True", - "model": { - "name": "divyanshukunwar/SASTRI_1_9B", - "sha": "3afeb5b296b1d6489401105e2ea6fc5c00d09c07", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.42175962571148, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4207292206899914, - "normalized_score": 42.07292206899915 - }, - "bbh": { - "name": "BBH", - "value": 0.4680499051118341, - "normalized_score": 23.534216256795457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.3831145833333333, - "normalized_score": 5.555989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3187333776595745, - "normalized_score": 24.30370862884161 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "divyanshukunwar/SASTRI_1_9B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 5.211, - "co2_cost": 7.792430844020681 - } - }, - { - "id": "djuna/G2-BigGSHT-27B-2_bfloat16_b52e0c08d19232acebf85b68ee5989cc23c0d519_True", - "model": { - "name": "djuna/G2-BigGSHT-27B-2", - "sha": "b52e0c08d19232acebf85b68ee5989cc23c0d519", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 36.04705290996797, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7974430067775724, - "normalized_score": 79.74430067775725 - }, - "bbh": { - "name": "BBH", - "value": 0.641474454273013, - "normalized_score": 48.814372082405725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2348942598187311, - "normalized_score": 23.48942598187311 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.40720833333333334, - "normalized_score": 9.934374999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45279255319148937, - "normalized_score": 39.199172576832154 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "djuna/G2-BigGSHT-27B-2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 10.050857950074226 - } - }, - { - "id": "djuna/G2-GSHT_bfloat16_afa34f893a74af2a21b71f83d7bcc16aa818d157_True", - "model": { - "name": "djuna/G2-GSHT", - "sha": "afa34f893a74af2a21b71f83d7bcc16aa818d157", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 24.632233584017342, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5630116978505919, - "normalized_score": 56.301169785059194 - }, - "bbh": { - "name": "BBH", - "value": 0.5269730491270207, - "normalized_score": 30.992059015125676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.40057291666666667, - "normalized_score": 8.171614583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3070146276595745, - "normalized_score": 23.001625295508276 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "djuna/G2-GSHT (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 4.303383940005492 - } - }, - { - "id": "djuna/Gemma-2-gemmama-9b_bfloat16_1d6c53ad18970ac082e86bfa0159789b6a6e79c0_True", - "model": { - "name": "djuna/Gemma-2-gemmama-9b", - "sha": "1d6c53ad18970ac082e86bfa0159789b6a6e79c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 28.75247693758838, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7703404743857409, - "normalized_score": 77.0340474385741 - }, - "bbh": { - "name": "BBH", - "value": 0.5420037856495951, - "normalized_score": 32.916050576064585 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.4031458333333333, - "normalized_score": 8.459895833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3109208776595745, - "normalized_score": 23.43565307328605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-31", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "djuna/Gemma-2-gemmama-9b (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 5.528194430150129 - } - }, - { - "id": "djuna/L3.1-ForStHS_bfloat16_f5442e1f27e4a0c469504624ea85afdc6907c9cc_True", - "model": { - "name": "djuna/L3.1-ForStHS", - "sha": "f5442e1f27e4a0c469504624ea85afdc6907c9cc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.348156412970027, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7813313120298586, - "normalized_score": 78.13313120298585 - }, - "bbh": { - "name": "BBH", - "value": 0.5202703381267152, - "normalized_score": 31.3912168031268 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15030211480362538, - "normalized_score": 15.030211480362537 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.40264583333333337, - "normalized_score": 9.664062500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37350398936170215, - "normalized_score": 30.38933215130024 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "djuna/L3.1-ForStHS (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.6873286473063 - } - }, - { - "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc_bfloat16_67dc71cb877c1ebaeb634e116fc938b223338cf6_True", - "model": { - "name": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", - "sha": "67dc71cb877c1ebaeb634e116fc938b223338cf6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.587663022160967, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7235291249440374, - "normalized_score": 72.35291249440374 - }, - "bbh": { - "name": "BBH", - "value": 0.5432920704935255, - "normalized_score": 34.87957646776231 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.42528125, - "normalized_score": 13.026822916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.390375664893617, - "normalized_score": 32.263962765957444 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4883060783272883 - } - }, - { - "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc_bfloat16_42c6cd88b8394876cdbcf64e56633ad0a371b5f4_True", - "model": { - "name": "djuna/L3.1-Promissum_Mane-8B-Della-calc", - "sha": "42c6cd88b8394876cdbcf64e56633ad0a371b5f4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.488800069760984, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.544152847777231, - "normalized_score": 54.4152847777231 - }, - "bbh": { - "name": "BBH", - "value": 0.548587625935678, - "normalized_score": 35.553825960108405 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18429003021148035, - "normalized_score": 18.429003021148034 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4229895833333333, - "normalized_score": 12.80703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3801529255319149, - "normalized_score": 31.128102836879428 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "djuna/L3.1-Promissum_Mane-8B-Della-calc (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6454390620277746 - } - }, - { - "id": "djuna/L3.1-Purosani-2-8B_bfloat16_e5acd6277a1286c5e18fcb3e89a836ffc8a75b8f_True", - "model": { - "name": "djuna/L3.1-Purosani-2-8B", - "sha": "e5acd6277a1286c5e18fcb3e89a836ffc8a75b8f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.113373982142917, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4988153654525548, - "normalized_score": 49.88153654525547 - }, - "bbh": { - "name": "BBH", - "value": 0.5182122256069372, - "normalized_score": 31.391342665184258 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.38162499999999994, - "normalized_score": 8.303124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3751662234042553, - "normalized_score": 30.57402482269504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "djuna/L3.1-Purosani-2-8B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.7299035958162357 - } - }, - { - "id": "djuna/L3.1-Suze-Vume-calc_bfloat16_830c07d136ecd8171805078606f00c4ee69f21c3_True", - "model": { - "name": "djuna/L3.1-Suze-Vume-calc", - "sha": "830c07d136ecd8171805078606f00c4ee69f21c3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.000783959950287, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7296739318341999, - "normalized_score": 72.96739318341999 - }, - "bbh": { - "name": "BBH", - "value": 0.516421105092519, - "normalized_score": 31.136638199988273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.38429166666666664, - "normalized_score": 8.303124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35147938829787234, - "normalized_score": 27.942154255319146 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "djuna/L3.1-Suze-Vume-calc (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6090376998225064 - } - }, - { - "id": "djuna/MN-Chinofun_bfloat16_71b47c86f32e107b407fada44ec6b893c5eb8bb0_True", - "model": { - "name": "djuna/MN-Chinofun", - "sha": "71b47c86f32e107b407fada44ec6b893c5eb8bb0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.68383401895207, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6110220880596817, - "normalized_score": 61.102208805968175 - }, - "bbh": { - "name": "BBH", - "value": 0.49527033812671534, - "normalized_score": 28.48357519092637 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40835416666666663, - "normalized_score": 10.377604166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36028922872340424, - "normalized_score": 28.921025413711575 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "djuna/MN-Chinofun (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 2.8929861685466323 - } - }, - { - "id": "djuna/MN-Chinofun-12B-2_bfloat16_d2aab6837c2ad2dfebb18b15549affd9dd2b8723_True", - "model": { - "name": "djuna/MN-Chinofun-12B-2", - "sha": "d2aab6837c2ad2dfebb18b15549affd9dd2b8723", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.682587776893186, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6170671595810228, - "normalized_score": 61.70671595810227 - }, - "bbh": { - "name": "BBH", - "value": 0.5036959998266032, - "normalized_score": 29.526083526964456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.42683333333333334, - "normalized_score": 13.354166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3615359042553192, - "normalized_score": 29.05954491725768 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "djuna/MN-Chinofun-12B-2 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.955393100722868 - } - }, - { - "id": "djuna/MN-Chinofun-12B-3_bfloat16_fa64c9bc66221946d7425c4eea93828900083d84_True", - "model": { - "name": "djuna/MN-Chinofun-12B-3", - "sha": "fa64c9bc66221946d7425c4eea93828900083d84", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.389453278031425, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3052744495715812, - "normalized_score": 30.527444957158117 - }, - "bbh": { - "name": "BBH", - "value": 0.53478574603334, - "normalized_score": 34.219196465449734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.4197916666666666, - "normalized_score": 10.907291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3026097074468085, - "normalized_score": 22.512189716312054 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-05", - "generation": 1, - "base_model": "djuna/MN-Chinofun-12B-3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 2.4154788495638067 - } - }, - { - "id": "djuna/MN-Chinofun-12B-4_bfloat16_609b6b7bf20de7a6f93559f0d2572ae7b275ed78_True", - "model": { - "name": "djuna/MN-Chinofun-12B-4", - "sha": "609b6b7bf20de7a6f93559f0d2572ae7b275ed78", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.40291180354787, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5404305021786637, - "normalized_score": 54.04305021786637 - }, - "bbh": { - "name": "BBH", - "value": 0.5347693369790583, - "normalized_score": 34.17304204588476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4306770833333333, - "normalized_score": 13.234635416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3497340425531915, - "normalized_score": 27.74822695035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "djuna/MN-Chinofun-12B-4 (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.6917923335509382 - } - }, - { - "id": "djuna/Q2.5-Partron-7B_bfloat16_3a6d3cca23c0e1c6bcba38887fc819729d5d16cf_True", - "model": { - "name": "djuna/Q2.5-Partron-7B", - "sha": "3a6d3cca23c0e1c6bcba38887fc819729d5d16cf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.108466039072106, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7321218810533828, - "normalized_score": 73.21218810533829 - }, - "bbh": { - "name": "BBH", - "value": 0.5418474850726388, - "normalized_score": 35.25726531667357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4826283987915408, - "normalized_score": 48.26283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.41654166666666664, - "normalized_score": 11.067708333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4282746010638298, - "normalized_score": 36.47495567375886 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "djuna/Q2.5-Partron-7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 2.002153106358169 - } - }, - { - "id": "djuna/Q2.5-Veltha-14B_bfloat16_fd0c348618e5c8198b769d2f5ff1e3a810e007e7_True", - "model": { - "name": "djuna/Q2.5-Veltha-14B", - "sha": "fd0c348618e5c8198b769d2f5ff1e3a810e007e7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.519512261718894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8291666112581284, - "normalized_score": 82.91666112581285 - }, - "bbh": { - "name": "BBH", - "value": 0.648421390292023, - "normalized_score": 49.75243239928858 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4788519637462236, - "normalized_score": 47.88519637462236 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.41942708333333334, - "normalized_score": 12.261718749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5298371010638298, - "normalized_score": 47.759677895981085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "djuna/Q2.5-Veltha-14B (Merge)", - "hub_license": "", - "hub_hearts": 10, - "params_billions": 14.766, - "co2_cost": 3.213489909455477 - } - }, - { - "id": "djuna/Q2.5-Veltha-14B-0.5_bfloat16_db20da90002d4b1285f61e2648c4fdbec44e02e7_True", - "model": { - "name": "djuna/Q2.5-Veltha-14B-0.5", - "sha": "db20da90002d4b1285f61e2648c4fdbec44e02e7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.61227861539469, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7795826185631901, - "normalized_score": 77.95826185631901 - }, - "bbh": { - "name": "BBH", - "value": 0.6523026688308357, - "normalized_score": 50.318125985703354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43731117824773413, - "normalized_score": 43.73111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.43390625, - "normalized_score": 14.171614583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5295046542553191, - "normalized_score": 47.722739361702125 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "djuna/Q2.5-Veltha-14B-0.5 (Merge)", - "hub_license": "", - "hub_hearts": 10, - "params_billions": 14.766, - "co2_cost": 2.9429227973111853 - } - }, - { - "id": "djuna-test-lab/TEST-L3.2-ReWish-3B_bfloat16_0cb7d434c4647faed475f17d74e9047007cd3782_True", - "model": { - "name": "djuna-test-lab/TEST-L3.2-ReWish-3B", - "sha": "0cb7d434c4647faed475f17d74e9047007cd3782", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.57139360581495, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6367759766308949, - "normalized_score": 63.677597663089486 - }, - "bbh": { - "name": "BBH", - "value": 0.449540552927623, - "normalized_score": 22.066700432422255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.37775, - "normalized_score": 7.918749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31258311170212766, - "normalized_score": 23.620345744680847 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-24", - "generation": 1, - "base_model": "djuna-test-lab/TEST-L3.2-ReWish-3B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.2812618019241586 - } - }, - { - "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base_bfloat16_ebab6c0266ae7846b2bb9a595a2651a23b031372_True", - "model": { - "name": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", - "sha": "ebab6c0266ae7846b2bb9a595a2651a23b031372", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.545998025790983, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.635252241829457, - "normalized_score": 63.5252241829457 - }, - "bbh": { - "name": "BBH", - "value": 0.449540552927623, - "normalized_score": 22.066700432422255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.37775, - "normalized_score": 7.918749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31258311170212766, - "normalized_score": 23.620345744680847 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.92206090899317 - } - }, - { - "id": "dnhkng/RYS-Medium_bfloat16_de09a79e6b2efdcc97490a37b770764e62749fd0_False", - "model": { - "name": "dnhkng/RYS-Medium", - "sha": "de09a79e6b2efdcc97490a37b770764e62749fd0", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 26.44775194883569, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4406131287206833, - "normalized_score": 44.06131287206833 - }, - "bbh": { - "name": "BBH", - "value": 0.6284726872432828, - "normalized_score": 47.734201324861516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.40692708333333333, - "normalized_score": 8.732552083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4325964095744681, - "normalized_score": 36.95515661938535 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-07-17", - "generation": 0, - "base_model": "dnhkng/RYS-Medium", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 18.731, - "co2_cost": 4.2727567000988005 - } - }, - { - "id": "dnhkng/RYS-Llama-3-8B-Instruct_bfloat16_293ab00d1e2be2752f97d5568fde2b09f6a1caae_True", - "model": { - "name": "dnhkng/RYS-Llama-3-8B-Instruct", - "sha": "293ab00d1e2be2752f97d5568fde2b09f6a1caae", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.922774736650837, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6957772044841022, - "normalized_score": 69.57772044841022 - }, - "bbh": { - "name": "BBH", - "value": 0.4808708123069005, - "normalized_score": 25.373015462245586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.33834375, - "normalized_score": 0.29296874999999956 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.355718085106383, - "normalized_score": 28.413120567375884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "dnhkng/RYS-Llama-3-8B-Instruct", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.248, - "co2_cost": 1.6103746555589764 - } - }, - { - "id": "dnhkng/RYS-Llama-3-Huge-Instruct_bfloat16_cfe14a5339e88a7a89f075d9d48215d45f64acaf_True", - "model": { - "name": "dnhkng/RYS-Llama-3-Huge-Instruct", - "sha": "cfe14a5339e88a7a89f075d9d48215d45f64acaf", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.64400590559008, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7685917809190725, - "normalized_score": 76.85917809190724 - }, - "bbh": { - "name": "BBH", - "value": 0.6480872171360044, - "normalized_score": 49.07372077223325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22885196374622357, - "normalized_score": 22.885196374622357 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4207604166666667, - "normalized_score": 11.92838541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.510970744680851, - "normalized_score": 45.66341607565011 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "dnhkng/RYS-Llama-3-Huge-Instruct", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 99.646, - "co2_cost": 29.473976290050448 - } - }, - { - "id": "dnhkng/RYS-Llama-3-Large-Instruct_bfloat16_01e3208aaf7bf6d2b09737960c701ec6628977fe_True", - "model": { - "name": "dnhkng/RYS-Llama-3-Large-Instruct", - "sha": "01e3208aaf7bf6d2b09737960c701ec6628977fe", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.981216135127745, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8050616807847621, - "normalized_score": 80.50616807847622 - }, - "bbh": { - "name": "BBH", - "value": 0.65252690724939, - "normalized_score": 49.665539028891345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23036253776435045, - "normalized_score": 23.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.41803125, - "normalized_score": 11.453906250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5137134308510638, - "normalized_score": 45.96815898345154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "dnhkng/RYS-Llama-3-Large-Instruct", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 73.976, - "co2_cost": 19.623033883209317 - } - }, - { - "id": "dnhkng/RYS-Llama-3.1-8B-Instruct_bfloat16_d4e2393403dcae19860da7c29519c8fe6fbf2fad_True", - "model": { - "name": "dnhkng/RYS-Llama-3.1-8B-Instruct", - "sha": "d4e2393403dcae19860da7c29519c8fe6fbf2fad", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 26.763955214988574, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7684920455502511, - "normalized_score": 76.84920455502511 - }, - "bbh": { - "name": "BBH", - "value": 0.5163645317446665, - "normalized_score": 31.085445296018978 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3681041666666667, - "normalized_score": 7.679687500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36394614361702127, - "normalized_score": 29.32734929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-08", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "dnhkng/RYS-Llama-3.1-8B-Instruct", - "hub_license": "mit", - "hub_hearts": 10, - "params_billions": 8.685, - "co2_cost": 1.9433438965319416 - } - }, - { - "id": "dnhkng/RYS-Llama3.1-Large_bfloat16_52cc979de78155b33689efa48f52a8aab184bd86_True", - "model": { - "name": "dnhkng/RYS-Llama3.1-Large", - "sha": "52cc979de78155b33689efa48f52a8aab184bd86", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 42.70529151024601, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8492001223420524, - "normalized_score": 84.92001223420525 - }, - "bbh": { - "name": "BBH", - "value": 0.6899112229777242, - "normalized_score": 55.414864048196534 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3504531722054381, - "normalized_score": 35.04531722054381 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.4553958333333334, - "normalized_score": 17.091145833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5248503989361702, - "normalized_score": 47.205599881796694 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-11", - "submission_date": "2024-08-22", - "generation": 0, - "base_model": "dnhkng/RYS-Llama3.1-Large", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 81.677, - "co2_cost": 30.81265784726752 - } - }, - { - "id": "dnhkng/RYS-Phi-3-medium-4k-instruct_bfloat16_1009e916b1ff8c9a53bc9d8ff48bea2a15ccde26_False", - "model": { - "name": "dnhkng/RYS-Phi-3-medium-4k-instruct", - "sha": "1009e916b1ff8c9a53bc9d8ff48bea2a15ccde26", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 29.093689949569903, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4391392616036561, - "normalized_score": 43.913926160365605 - }, - "bbh": { - "name": "BBH", - "value": 0.6226313539198264, - "normalized_score": 46.748970518349154 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1608761329305136, - "normalized_score": 16.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.42528125, - "normalized_score": 11.093489583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.484624335106383, - "normalized_score": 42.73603723404255 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-06", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "dnhkng/RYS-Phi-3-medium-4k-instruct", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 17.709, - "co2_cost": 4.6210930733664615 - } - }, - { - "id": "dnhkng/RYS-XLarge_bfloat16_0f84dd9dde60f383e1e2821496befb4ce9a11ef6_False", - "model": { - "name": "dnhkng/RYS-XLarge", - "sha": "0f84dd9dde60f383e1e2821496befb4ce9a11ef6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.345219749056206, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7995662619627034, - "normalized_score": 79.95662619627035 - }, - "bbh": { - "name": "BBH", - "value": 0.7050033079850099, - "normalized_score": 58.77356748233938 - }, - "math": { - "name": "MATH Level 5", - "value": 0.425226586102719, - "normalized_score": 42.522658610271904 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.49696875, - "normalized_score": 23.721093749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5428025265957447, - "normalized_score": 49.20028073286053 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "dnhkng/RYS-XLarge", - "hub_license": "mit", - "hub_hearts": 85, - "params_billions": 77.965, - "co2_cost": 27.1521657518909 - } - }, - { - "id": "dnhkng/RYS-XLarge-base_bfloat16_c718b3d9e24916e3b0347d3fdaa5e5a097c2f603_True", - "model": { - "name": "dnhkng/RYS-XLarge-base", - "sha": "c718b3d9e24916e3b0347d3fdaa5e5a097c2f603", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.096835700317605, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7910233735377686, - "normalized_score": 79.10233735377686 - }, - "bbh": { - "name": "BBH", - "value": 0.7047291858548728, - "normalized_score": 58.692146076576385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37915407854984895, - "normalized_score": 37.91540785498489 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.4902708333333334, - "normalized_score": 22.4171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5430518617021277, - "normalized_score": 49.22798463356975 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-02", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "dnhkng/RYS-XLarge-base", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 77.972, - "co2_cost": 27.17504732334226 - } - }, - { - "id": "dnhkng/RYS-XLarge2_bfloat16_3ce16c9427e93e09ce10a28fa644469d49a51113_True", - "model": { - "name": "dnhkng/RYS-XLarge2", - "sha": "3ce16c9427e93e09ce10a28fa644469d49a51113", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.05222800806562, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49019712141562166, - "normalized_score": 49.01971214156217 - }, - "bbh": { - "name": "BBH", - "value": 0.6573947106260754, - "normalized_score": 51.54993579817892 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27492447129909364, - "normalized_score": 27.492447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.4508020833333333, - "normalized_score": 17.050260416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5378158244680851, - "normalized_score": 48.646202718676115 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 77.965, - "co2_cost": 26.751769110437625 - } - }, - { - "id": "dreamgen/WizardLM-2-7B_bfloat16_b5f2d7bff91445a47331dcce588aee009d11d255_True", - "model": { - "name": "dreamgen/WizardLM-2-7B", - "sha": "b5f2d7bff91445a47331dcce588aee009d11d255", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.877542593987686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45829842595424586, - "normalized_score": 45.82984259542458 - }, - "bbh": { - "name": "BBH", - "value": 0.34867856163972016, - "normalized_score": 9.213113542615597 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.39409374999999996, - "normalized_score": 7.528385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2660405585106383, - "normalized_score": 18.44895094562648 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-16", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "dreamgen/WizardLM-2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 36, - "params_billions": 7.242, - "co2_cost": 1.1334500041434057 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v1_bfloat16_e96bd9694ae87a4f612825310eb7afaea5b0aa28_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v1", - "sha": "e96bd9694ae87a4f612825310eb7afaea5b0aa28", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.353238785134156, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3657750324694034, - "normalized_score": 36.57750324694034 - }, - "bbh": { - "name": "BBH", - "value": 0.4663596290293861, - "normalized_score": 24.109958157326172 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3753958333333333, - "normalized_score": 4.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3384308510638298, - "normalized_score": 26.492316784869978 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7822831510467891 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v2_bfloat16_817408ebfaa7ba0ea9433e1de4bfa120d38d2a0f_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v2", - "sha": "817408ebfaa7ba0ea9433e1de4bfa120d38d2a0f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.27663380631257, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3912042270065648, - "normalized_score": 39.120422700656476 - }, - "bbh": { - "name": "BBH", - "value": 0.47238018945807153, - "normalized_score": 24.892196306273934 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3526354166666667, - "normalized_score": 4.912760416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3377659574468085, - "normalized_score": 26.418439716312054 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8807394500767654 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v3_bfloat16_dcfa1a6a9f94a099286891d732b17cbbe97a644e_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v3", - "sha": "dcfa1a6a9f94a099286891d732b17cbbe97a644e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.52544148779317, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.536733644507684, - "normalized_score": 53.6733644507684 - }, - "bbh": { - "name": "BBH", - "value": 0.4658310598309874, - "normalized_score": 24.168293247720744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35117708333333336, - "normalized_score": 4.763802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35480385638297873, - "normalized_score": 28.311539598108748 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7829341776091412 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v4_bfloat16_81e20c2e40f2028818d5d6d27ec9e0d503ae8cc1_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v4", - "sha": "81e20c2e40f2028818d5d6d27ec9e0d503ae8cc1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.53093853220701, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4697890486132351, - "normalized_score": 46.978904861323514 - }, - "bbh": { - "name": "BBH", - "value": 0.46860140660011185, - "normalized_score": 24.331770038797558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23406040268456377, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33930208333333334, - "normalized_score": 3.0460937500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3390126329787234, - "normalized_score": 26.556959219858157 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7705395097616592 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v5_bfloat16_12970eec99f458a3982eb502b71b6df0bc74bb52_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v5", - "sha": "12970eec99f458a3982eb502b71b6df0bc74bb52", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.536269738126414, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42375231053604434, - "normalized_score": 42.37523105360444 - }, - "bbh": { - "name": "BBH", - "value": 0.4781685533183147, - "normalized_score": 25.195784260224865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.33536458333333335, - "normalized_score": 4.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3217253989361702, - "normalized_score": 24.63615543735224 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8261926105455653 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v6_bfloat16_a0b30a21a8eea9a32a2767755dc2dbd44eeb383f_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v6", - "sha": "a0b30a21a8eea9a32a2767755dc2dbd44eeb383f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.344892411076298, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4938939790866014, - "normalized_score": 49.38939790866013 - }, - "bbh": { - "name": "BBH", - "value": 0.4809537068664902, - "normalized_score": 26.116102641092812 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3753333333333333, - "normalized_score": 4.3500000000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.347905585106383, - "normalized_score": 27.54506501182033 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7984057478075823 - } - }, - { - "id": "dustinwloring1988/Reflexis-8b-chat-v7_bfloat16_e8d990012ccd855e65d51cb7cfd1762632a8f217_True", - "model": { - "name": "dustinwloring1988/Reflexis-8b-chat-v7", - "sha": "e8d990012ccd855e65d51cb7cfd1762632a8f217", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.0955010758327, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39804828964924177, - "normalized_score": 39.804828964924184 - }, - "bbh": { - "name": "BBH", - "value": 0.4809830787114964, - "normalized_score": 25.98749682684877 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.32215625, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3642785904255319, - "normalized_score": 29.364287825059105 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.8042220303022205 - } - }, - { - "id": "duyhv1411/Llama-3.2-1B-en-vi_float16_d08c530d8256a72ad9548b0f26416ee98eae22ac_True", - "model": { - "name": "duyhv1411/Llama-3.2-1B-en-vi", - "sha": "d08c530d8256a72ad9548b0f26416ee98eae22ac", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.858146203302624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4788317220530415, - "normalized_score": 47.88317220530415 - }, - "bbh": { - "name": "BBH", - "value": 0.329090872737918, - "normalized_score": 6.09240023176899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3197083333333333, - "normalized_score": 0.9302083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13414228723404256, - "normalized_score": 3.793587470449173 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-1B-Instruct", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.37945424231509367 - } - }, - { - "id": "duyhv1411/Llama-3.2-3B-en-vi_bfloat16_7c5c74623642a3dc50de1f195babf32b8584fe90_True", - "model": { - "name": "duyhv1411/Llama-3.2-3B-en-vi", - "sha": "7c5c74623642a3dc50de1f195babf32b8584fe90", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.861408814252767, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4852014876084345, - "normalized_score": 48.520148760843455 - }, - "bbh": { - "name": "BBH", - "value": 0.3271639320986486, - "normalized_score": 5.946254874082537 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3210104166666667, - "normalized_score": 1.0929687500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13588763297872342, - "normalized_score": 3.987514775413712 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-1B-Instruct", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.3683193365720268 - } - }, - { - "id": "dwikitheduck/gemma-2-2b-id_float16_6f191d4a7618664619adda1cd96d9d1bf72f33b2_True", - "model": { - "name": "dwikitheduck/gemma-2-2b-id", - "sha": "6f191d4a7618664619adda1cd96d9d1bf72f33b2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.849648233221265, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38785644312646006, - "normalized_score": 38.785644312646 - }, - "bbh": { - "name": "BBH", - "value": 0.39621721241423097, - "normalized_score": 15.415129369168449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.41542708333333334, - "normalized_score": 10.728385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21733710106382978, - "normalized_score": 13.037455673758863 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-11-14", - "generation": 0, - "base_model": "dwikitheduck/gemma-2-2b-id", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.0, - "co2_cost": 6.048664020510734 - } - }, - { - "id": "dwikitheduck/gemma-2-2b-id-inst_float16_1c046ade199128da926004e154698546d65e3084_True", - "model": { - "name": "dwikitheduck/gemma-2-2b-id-inst", - "sha": "1c046ade199128da926004e154698546d65e3084", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.849648233221265, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38785644312646006, - "normalized_score": 38.785644312646 - }, - "bbh": { - "name": "BBH", - "value": 0.39621721241423097, - "normalized_score": 15.415129369168449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.41542708333333334, - "normalized_score": 10.728385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21733710106382978, - "normalized_score": 13.037455673758863 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-11-24", - "generation": 0, - "base_model": "dwikitheduck/gemma-2-2b-id-inst", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.0, - "co2_cost": 2.820792959479685 - } - }, - { - "id": "dwikitheduck/gemma-2-2b-id-instruct_float16_1c046ade199128da926004e154698546d65e3084_True", - "model": { - "name": "dwikitheduck/gemma-2-2b-id-instruct", - "sha": "1c046ade199128da926004e154698546d65e3084", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.849648233221265, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38785644312646006, - "normalized_score": 38.785644312646 - }, - "bbh": { - "name": "BBH", - "value": 0.39621721241423097, - "normalized_score": 15.415129369168449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.41542708333333334, - "normalized_score": 10.728385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21733710106382978, - "normalized_score": 13.037455673758863 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-11-15", - "generation": 0, - "base_model": "dwikitheduck/gemma-2-2b-id-instruct", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.0, - "co2_cost": 2.8338148685218054 - } - }, - { - "id": "dwikitheduck/gen-inst-1_bfloat16_73180b0a57469bbd12f7d037a1cc25e53c252ad6_True", - "model": { - "name": "dwikitheduck/gen-inst-1", - "sha": "73180b0a57469bbd12f7d037a1cc25e53c252ad6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.880198014659875, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7750114141588762, - "normalized_score": 77.50114141588762 - }, - "bbh": { - "name": "BBH", - "value": 0.6419926671215591, - "normalized_score": 48.31674208220422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4554380664652568, - "normalized_score": 45.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.42054166666666665, - "normalized_score": 12.26770833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5088929521276596, - "normalized_score": 45.43255023640663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-24", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.0597448187122507 - } - }, - { - "id": "dwikitheduck/gen-try1_bfloat16_9c2cab728518e179e5d8891f3f9775515f15cea2_True", - "model": { - "name": "dwikitheduck/gen-try1", - "sha": "9c2cab728518e179e5d8891f3f9775515f15cea2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.412127202941626, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7522052598217175, - "normalized_score": 75.22052598217175 - }, - "bbh": { - "name": "BBH", - "value": 0.6358510933470735, - "normalized_score": 47.41312903142858 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.4415625, - "normalized_score": 14.961979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5110538563829787, - "normalized_score": 45.67265070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "dwikitheduck/gen-try1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.166161110753483 - } - }, - { - "id": "dwikitheduck/gen-try1-notemp_bfloat16_391925b02f6cd60e7c4ef1321fe89a92d6b9fdf0_False", - "model": { - "name": "dwikitheduck/gen-try1-notemp", - "sha": "391925b02f6cd60e7c4ef1321fe89a92d6b9fdf0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.399295492837265, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26270961050013963, - "normalized_score": 26.270961050013963 - }, - "bbh": { - "name": "BBH", - "value": 0.626267088306491, - "normalized_score": 45.749092669505465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31797583081570996, - "normalized_score": 31.797583081570995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.47141666666666665, - "normalized_score": 17.927083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5210272606382979, - "normalized_score": 46.780806737588655 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.7912117591585552 - } - }, - { - "id": "dzakwan/dzakwan-MoE-4x7b-Beta_float16_e89f82f2afa1961335de5a6d6d05bd850d1d61d9_False", - "model": { - "name": "dzakwan/dzakwan-MoE-4x7b-Beta", - "sha": "e89f82f2afa1961335de5a6d6d05bd850d1d61d9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.76930305324143, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44426011870725235, - "normalized_score": 44.42601187072523 - }, - "bbh": { - "name": "BBH", - "value": 0.514044131159397, - "normalized_score": 32.074208465442545 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.42673958333333334, - "normalized_score": 12.109114583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3107546542553192, - "normalized_score": 23.417183806146575 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "dzakwan/dzakwan-MoE-4x7b-Beta (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 24.154, - "co2_cost": 2.9120566130342462 - } - }, - { - "id": "ehristoforu/Falcon3-8B-Franken-Basestruct_bfloat16_627cee5966188907ea34e4b473f655606fe82e5a_True", - "model": { - "name": "ehristoforu/Falcon3-8B-Franken-Basestruct", - "sha": "627cee5966188907ea34e4b473f655606fe82e5a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.43874726421677, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17148499315150467, - "normalized_score": 17.148499315150467 - }, - "bbh": { - "name": "BBH", - "value": 0.5462828074770284, - "normalized_score": 34.8564190624812 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.3554895833333333, - "normalized_score": 1.8028645833333323 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3946974734042553, - "normalized_score": 32.74416371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-06", - "generation": 1, - "base_model": "ehristoforu/Falcon3-8B-Franken-Basestruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.406, - "co2_cost": 1.5930172935005347 - } - }, - { - "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct_bfloat16_d7c85f436d22685010165483ba966d6ee2336cc8_True", - "model": { - "name": "ehristoforu/Falcon3-MoE-2x7B-Insruct", - "sha": "d7c85f436d22685010165483ba966d6ee2336cc8", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 36.66765115739224, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7642954028643998, - "normalized_score": 76.42954028643999 - }, - "bbh": { - "name": "BBH", - "value": 0.564789641564995, - "normalized_score": 38.0671542210182 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4123867069486405, - "normalized_score": 41.23867069486405 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4840416666666667, - "normalized_score": 21.605208333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40949135638297873, - "normalized_score": 34.38792848699764 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "ehristoforu/Falcon3-MoE-2x7B-Insruct (Merge)", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 13.401, - "co2_cost": 3.2724214127053273 - } - }, - { - "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health_float16_4adc2d61d530d23026493d29e6191e06cf549fc6_True", - "model": { - "name": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", - "sha": "4adc2d61d530d23026493d29e6191e06cf549fc6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 27.19248947734485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5886658510529839, - "normalized_score": 58.866585105298384 - }, - "bbh": { - "name": "BBH", - "value": 0.5539376944027642, - "normalized_score": 35.56600949863266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16314199395770393, - "normalized_score": 16.314199395770395 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.40860416666666666, - "normalized_score": 9.3421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38289561170212766, - "normalized_score": 31.432845744680847 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-07-31", - "generation": 4, - "base_model": "google/gemma-2-9b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 4.553660365839716 - } - }, - { - "id": "ehristoforu/Gemma2-9b-it-train6_float16_e72bf00b427c22c48b468818cf75300a373a0c8a_True", - "model": { - "name": "ehristoforu/Gemma2-9b-it-train6", - "sha": "e72bf00b427c22c48b468818cf75300a373a0c8a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.5339867395368, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7025215317579578, - "normalized_score": 70.25215317579578 - }, - "bbh": { - "name": "BBH", - "value": 0.5898092579133603, - "normalized_score": 40.98762530159646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19108761329305135, - "normalized_score": 19.108761329305135 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.40841666666666665, - "normalized_score": 9.652083333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39419880319148937, - "normalized_score": 32.68875591016548 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-22", - "submission_date": "2024-07-31", - "generation": 8, - "base_model": "google/gemma-2-9b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 3.9873669487924204 - } - }, - { - "id": "ehristoforu/HappyLlama1_float16_9bee1c404de70fc0ebe3cbcd2af2303a313a24be_True", - "model": { - "name": "ehristoforu/HappyLlama1", - "sha": "9bee1c404de70fc0ebe3cbcd2af2303a313a24be", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.735379379762946, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7362686560548235, - "normalized_score": 73.62686560548235 - }, - "bbh": { - "name": "BBH", - "value": 0.49957323097428485, - "normalized_score": 28.49977340708129 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14274924471299094, - "normalized_score": 14.274924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.42868749999999994, - "normalized_score": 11.252604166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35455452127659576, - "normalized_score": 28.283835697399525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "voidful/Llama-3.2-8B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4287212848324906 - } - }, - { - "id": "ehristoforu/QwenQwen2.5-7B-IT_float16_fec28a5fa8a3139c24b67a9e7092a0175b801872_True", - "model": { - "name": "ehristoforu/QwenQwen2.5-7B-IT", - "sha": "fec28a5fa8a3139c24b67a9e7092a0175b801872", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.58121845476433, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.751830698103255, - "normalized_score": 75.18306981032549 - }, - "bbh": { - "name": "BBH", - "value": 0.5397962708415814, - "normalized_score": 34.96965415936291 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5090634441087614, - "normalized_score": 50.90634441087614 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4033645833333333, - "normalized_score": 8.720572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4289394946808511, - "normalized_score": 36.54883274231678 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "ehristoforu/QwenQwen2.5-7B-IT (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 1.3084646956722368 - } - }, - { - "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare_float16_376d1c82e6fd973fb927f8540535d39e8f4c6168_True", - "model": { - "name": "ehristoforu/QwenQwen2.5-7B-IT-Dare", - "sha": "376d1c82e6fd973fb927f8540535d39e8f4c6168", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.56581488113525, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7509064836855099, - "normalized_score": 75.090648368551 - }, - "bbh": { - "name": "BBH", - "value": 0.5397962708415814, - "normalized_score": 34.96965415936291 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5090634441087614, - "normalized_score": 50.90634441087614 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4033645833333333, - "normalized_score": 8.720572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4289394946808511, - "normalized_score": 36.54883274231678 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "ehristoforu/QwenQwen2.5-7B-IT-Dare (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 1.3336692145681672 - } - }, - { - "id": "ehristoforu/RQwen-v0.1_float16_96d013d2db2ae47be9da1d1cd5b83782bd8f4096_True", - "model": { - "name": "ehristoforu/RQwen-v0.1", - "sha": "96d013d2db2ae47be9da1d1cd5b83782bd8f4096", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.73075710536471, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7624968417133207, - "normalized_score": 76.24968417133206 - }, - "bbh": { - "name": "BBH", - "value": 0.6446435015804635, - "normalized_score": 48.49085165760342 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4645015105740181, - "normalized_score": 46.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.41390625, - "normalized_score": 10.438281250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5201961436170213, - "normalized_score": 46.68846040189125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-24", - "submission_date": "2024-11-24", - "generation": 1, - "base_model": "ehristoforu/RQwen-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.4178277548538123 - } - }, - { - "id": "ehristoforu/RQwen-v0.2_float16_102ff435814388f4da9e7ebc25c5fbae7120638a_True", - "model": { - "name": "ehristoforu/RQwen-v0.2", - "sha": "102ff435814388f4da9e7ebc25c5fbae7120638a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.702469412365296, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7503568309862276, - "normalized_score": 75.03568309862277 - }, - "bbh": { - "name": "BBH", - "value": 0.6426888858891955, - "normalized_score": 48.68383680717029 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3270392749244713, - "normalized_score": 32.703927492447136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.4206666666666667, - "normalized_score": 11.950000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.515874335106383, - "normalized_score": 46.208259456264784 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-24", - "submission_date": "2024-11-25", - "generation": 2, - "base_model": "ehristoforu/RQwen-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 2.5884866591591313 - } - }, - { - "id": "ehristoforu/SoRu-0009_float16_fe4f439882175c3cad8a0f08f7b14d18318b53d1_True", - "model": { - "name": "ehristoforu/SoRu-0009", - "sha": "fe4f439882175c3cad8a0f08f7b14d18318b53d1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.300240741893716, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25818827378023645, - "normalized_score": 25.81882737802365 - }, - "bbh": { - "name": "BBH", - "value": 0.3149981683579724, - "normalized_score": 5.137457620795654 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3369479166666667, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12391954787234043, - "normalized_score": 2.657727541371158 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-11-27", - "generation": 10, - "base_model": "Vikhrmodels/Vikhr-Qwen-2.5-0.5b-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0238624057573298 - } - }, - { - "id": "ehristoforu/coolqwen-3b-it_float16_5045993527b5da13a71e5b3df8c649bd55425124_True", - "model": { - "name": "ehristoforu/coolqwen-3b-it", - "sha": "5045993527b5da13a71e5b3df8c649bd55425124", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.654353578279625, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6472670292601409, - "normalized_score": 64.72670292601408 - }, - "bbh": { - "name": "BBH", - "value": 0.485089343991756, - "normalized_score": 27.463695565892305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36706948640483383, - "normalized_score": 36.70694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.41251041666666666, - "normalized_score": 9.763802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3601230053191489, - "normalized_score": 28.9025561465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 2, - "params_billions": 3.085, - "co2_cost": 1.4458912752828572 - } - }, - { - "id": "ehristoforu/della-70b-test-v1_float16_c705bbf0e900dc5375325e8ff80de0525aa713e4_True", - "model": { - "name": "ehristoforu/della-70b-test-v1", - "sha": "c705bbf0e900dc5375325e8ff80de0525aa713e4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.869435105716944, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49786566310722213, - "normalized_score": 49.786566310722215 - }, - "bbh": { - "name": "BBH", - "value": 0.3029452113782393, - "normalized_score": 3.35845623845221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.45545833333333335, - "normalized_score": 16.365625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1574966755319149, - "normalized_score": 6.3885195035461 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "ehristoforu/della-70b-test-v1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 112.55540311381243 - } - }, - { - "id": "ehristoforu/falcon3-ultraset_float16_50f5fd7e00b64eb515205e47f4acf28daf224055_True", - "model": { - "name": "ehristoforu/falcon3-ultraset", - "sha": "50f5fd7e00b64eb515205e47f4acf28daf224055", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.53688653155465, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7135123694020753, - "normalized_score": 71.35123694020754 - }, - "bbh": { - "name": "BBH", - "value": 0.5583684420918801, - "normalized_score": 37.555134459501744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.48531250000000004, - "normalized_score": 20.99739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.398188164893617, - "normalized_score": 33.132018321512994 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "tiiuae/Falcon3-7B-Base", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2365822010385132 - } - }, - { - "id": "ehristoforu/fd-lora-merged-16x32_float16_e31dfdc1714a914452489af06fe226ac3495b4c6_True", - "model": { - "name": "ehristoforu/fd-lora-merged-16x32", - "sha": "e31dfdc1714a914452489af06fe226ac3495b4c6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.454537831938053, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3480897352358409, - "normalized_score": 34.80897352358409 - }, - "bbh": { - "name": "BBH", - "value": 0.3307564619842368, - "normalized_score": 6.527180121800353 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.35142708333333333, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12051196808510638, - "normalized_score": 2.279107565011819 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "ehristoforu/fd-lora-merged-16x32", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.776, - "co2_cost": 1.2609660637995952 - } - }, - { - "id": "ehristoforu/fd-lora-merged-64x128_float16_04903de3e1bd71db5421ccd12f7c19d9b3cf7e04_True", - "model": { - "name": "ehristoforu/fd-lora-merged-64x128", - "sha": "04903de3e1bd71db5421ccd12f7c19d9b3cf7e04", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.210864438231852, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3281060918363276, - "normalized_score": 32.810609183632764 - }, - "bbh": { - "name": "BBH", - "value": 0.33447107385638297, - "normalized_score": 7.819061072049554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18731117824773413, - "normalized_score": 18.731117824773413 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.3368229166666667, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15367353723404256, - "normalized_score": 5.963726359338062 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 0, - "base_model": "ehristoforu/fd-lora-merged-64x128", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.274784777113842 - } - }, - { - "id": "ehristoforu/fp4-14b-it-v1_bfloat16_3fc21d9a548a3f13b57a40c9f077ba1afc8ed20e_True", - "model": { - "name": "ehristoforu/fp4-14b-it-v1", - "sha": "3fc21d9a548a3f13b57a40c9f077ba1afc8ed20e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.698030773782747, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25346746632269046, - "normalized_score": 25.346746632269046 - }, - "bbh": { - "name": "BBH", - "value": 0.5739715511094247, - "normalized_score": 38.77952167023901 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.35948958333333336, - "normalized_score": 2.336197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4204621010638298, - "normalized_score": 35.606900118203306 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "ehristoforu/fp4-14b-it-v1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.927937797337822 - } - }, - { - "id": "ehristoforu/fp4-14b-v1-fix_bfloat16_bc84457e330589381ec4ac00d697985b74e62dd1_True", - "model": { - "name": "ehristoforu/fp4-14b-v1-fix", - "sha": "bc84457e330589381ec4ac00d697985b74e62dd1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.37357596138752, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6741700909143296, - "normalized_score": 67.41700909143296 - }, - "bbh": { - "name": "BBH", - "value": 0.6817274121032688, - "normalized_score": 54.333779642129876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4206948640483384, - "normalized_score": 42.06948640483384 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.4531875, - "normalized_score": 16.181770833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5353224734042553, - "normalized_score": 48.36916371158392 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 1, - "base_model": "ehristoforu/fp4-14b-v1-fix (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.9345584896658994 - } - }, - { - "id": "ehristoforu/fq2.5-7b-it-normalize_false_bfloat16_f64301d81ec5441eb79fe263e215ad24e1c111cc_True", - "model": { - "name": "ehristoforu/fq2.5-7b-it-normalize_false", - "sha": "f64301d81ec5441eb79fe263e215ad24e1c111cc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.49631491468131, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7399156460413925, - "normalized_score": 73.99156460413924 - }, - "bbh": { - "name": "BBH", - "value": 0.551986272150289, - "normalized_score": 36.35831249255221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4622356495468278, - "normalized_score": 46.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.46115625, - "normalized_score": 17.544531249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44132313829787234, - "normalized_score": 37.92479314420804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "ehristoforu/fq2.5-7b-it-normalize_false (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3755978546847847 - } - }, - { - "id": "ehristoforu/fq2.5-7b-it-normalize_true_bfloat16_0100b08ab93ee049be9e43532e2caf05221da773_True", - "model": { - "name": "ehristoforu/fq2.5-7b-it-normalize_true", - "sha": "0100b08ab93ee049be9e43532e2caf05221da773", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.49631491468131, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7399156460413925, - "normalized_score": 73.99156460413924 - }, - "bbh": { - "name": "BBH", - "value": 0.551986272150289, - "normalized_score": 36.35831249255221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4622356495468278, - "normalized_score": 46.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.46115625, - "normalized_score": 17.544531249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44132313829787234, - "normalized_score": 37.92479314420804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "ehristoforu/fq2.5-7b-it-normalize_true (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.370156364188565 - } - }, - { - "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it_bfloat16_3322c001e5dfbb301f42f0bac355f2af61137608_True", - "model": { - "name": "ehristoforu/frqwen2.5-from7b-duable4layers-it", - "sha": "3322c001e5dfbb301f42f0bac355f2af61137608", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.62610373964922, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7728881589737453, - "normalized_score": 77.28881589737452 - }, - "bbh": { - "name": "BBH", - "value": 0.5263561044354216, - "normalized_score": 33.95977783040959 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4509063444108761, - "normalized_score": 45.090634441087616 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4165729166666667, - "normalized_score": 10.63828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4126496010638298, - "normalized_score": 34.738844562647756 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "ehristoforu/frqwen2.5-from7b-duable4layers-it (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.545, - "co2_cost": 1.5642098098613515 - } - }, - { - "id": "ehristoforu/frqwen2.5-from7b-it_bfloat16_b8ebb360a763020f3bf0bbf0115e42a9d325b7e0_True", - "model": { - "name": "ehristoforu/frqwen2.5-from7b-it", - "sha": "b8ebb360a763020f3bf0bbf0115e42a9d325b7e0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.8464894172487, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6532123654126606, - "normalized_score": 65.32123654126606 - }, - "bbh": { - "name": "BBH", - "value": 0.5142906815349029, - "normalized_score": 30.71074009126445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29229607250755285, - "normalized_score": 29.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4085729166666667, - "normalized_score": 9.371614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3976894946808511, - "normalized_score": 33.076610520094555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "ehristoforu/frqwen2.5-from7b-it (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.206, - "co2_cost": 3.930741844722849 - } - }, - { - "id": "ehristoforu/mllama-3.1-8b-instruct_bfloat16_f7be209bee659916c03b6a3b77e67237cfed2c12_True", - "model": { - "name": "ehristoforu/mllama-3.1-8b-instruct", - "sha": "f7be209bee659916c03b6a3b77e67237cfed2c12", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.353062093432012, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3457913890698901, - "normalized_score": 34.57913890698901 - }, - "bbh": { - "name": "BBH", - "value": 0.47176616480333583, - "normalized_score": 26.370934099251986 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3776435045317221, - "normalized_score": 37.764350453172206 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.338, - "normalized_score": 3.683333333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2533244680851064, - "normalized_score": 17.036052009456267 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 1, - "base_model": "ehristoforu/mllama-3.1-8b-instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4892374312599972 - } - }, - { - "id": "ehristoforu/mllama-3.1-8b-it_bfloat16_5dc167a466759e5d60c073dca4e938463e2fd813_False", - "model": { - "name": "ehristoforu/mllama-3.1-8b-it", - "sha": "5dc167a466759e5d60c073dca4e938463e2fd813", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.17760204557689, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38788193105404767, - "normalized_score": 38.78819310540477 - }, - "bbh": { - "name": "BBH", - "value": 0.4868027039491969, - "normalized_score": 28.024833549561226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37990936555891236, - "normalized_score": 37.99093655589124 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3348645833333333, - "normalized_score": 6.658072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26221742021276595, - "normalized_score": 18.02415780141844 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 1, - "base_model": "ehristoforu/mllama-3.1-8b-it (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4676052297594384 - } - }, - { - "id": "ehristoforu/moremerge_bfloat16_6f56bba4a2d82482b269dcbab69513b6f18cefe2_True", - "model": { - "name": "ehristoforu/moremerge", - "sha": "6f56bba4a2d82482b269dcbab69513b6f18cefe2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.5589162639696825, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20190982149585324, - "normalized_score": 20.190982149585324 - }, - "bbh": { - "name": "BBH", - "value": 0.28684447696551024, - "normalized_score": 1.987596513075965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35657291666666663, - "normalized_score": 3.1049479166666654 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10654920212765957, - "normalized_score": 0.7276891252955076 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "ehristoforu/moremerge (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.4287547139447274 - } - }, - { - "id": "ehristoforu/moremerge-upscaled_bfloat16_2b50cf76b49db95caee5943e8ecc32237bc59a32_True", - "model": { - "name": "ehristoforu/moremerge-upscaled", - "sha": "2b50cf76b49db95caee5943e8ecc32237bc59a32", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.918261338977189, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1978882697908217, - "normalized_score": 19.788826979082167 - }, - "bbh": { - "name": "BBH", - "value": 0.26977370070980244, - "normalized_score": 1.0147625530079194 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35930208333333336, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10413896276595745, - "normalized_score": 0.4598847517730496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "ehristoforu/moremerge-upscaled (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.545, - "co2_cost": 1.8812689359615675 - } - }, - { - "id": "ehristoforu/phi-4-25b_bfloat16_d9a7690e89b7971d6462f5fff7591a5381b3c192_True", - "model": { - "name": "ehristoforu/phi-4-25b", - "sha": "d9a7690e89b7971d6462f5fff7591a5381b3c192", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 39.11615849607893, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6483663346587056, - "normalized_score": 64.83663346587056 - }, - "bbh": { - "name": "BBH", - "value": 0.6907778236877188, - "normalized_score": 55.67261468233141 - }, - "math": { - "name": "MATH Level 5", - "value": 0.452416918429003, - "normalized_score": 45.2416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4207916666666667, - "normalized_score": 11.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5350731382978723, - "normalized_score": 48.341459810874696 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "ehristoforu/phi-4-25b (Merge)", - "hub_license": "", - "hub_hearts": 7, - "params_billions": 24.883, - "co2_cost": 4.66303403496976 - } - }, - { - "id": "ehristoforu/qwen2.5-test-32b-it_bfloat16_6bcc8f1cedfe72471276d0159d1646be6ac50e40_True", - "model": { - "name": "ehristoforu/qwen2.5-test-32b-it", - "sha": "6bcc8f1cedfe72471276d0159d1646be6ac50e40", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.36835680458376, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889499860370484, - "normalized_score": 78.89499860370483 - }, - "bbh": { - "name": "BBH", - "value": 0.708059329453303, - "normalized_score": 58.28330738049858 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5974320241691843, - "normalized_score": 59.74320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.4578125, - "normalized_score": 19.126562500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5765458776595744, - "normalized_score": 52.94954196217494 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "ehristoforu/qwen2.5-test-32b-it (Merge)", - "hub_license": "", - "hub_hearts": 9, - "params_billions": 32.764, - "co2_cost": 29.54403370563684 - } - }, - { - "id": "ehristoforu/qwen2.5-with-lora-think-3b-it_float16_255e1d9c2eff51302276f99309c344179ee9d390_True", - "model": { - "name": "ehristoforu/qwen2.5-with-lora-think-3b-it", - "sha": "255e1d9c2eff51302276f99309c344179ee9d390", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.256524060080704, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5319374814381397, - "normalized_score": 53.19374814381398 - }, - "bbh": { - "name": "BBH", - "value": 0.4686847308109022, - "normalized_score": 25.07946347199629 - }, - "math": { - "name": "MATH Level 5", - "value": 0.236404833836858, - "normalized_score": 23.6404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.43095833333333333, - "normalized_score": 12.903125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3402593085106383, - "normalized_score": 26.695478723404253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.5346905766211627 - } - }, - { - "id": "ehristoforu/rmoe-v1_float16_e99909f057d4adc6476b906b2f0385e75f8271f8_True", - "model": { - "name": "ehristoforu/rmoe-v1", - "sha": "e99909f057d4adc6476b906b2f0385e75f8271f8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 5.841232027005013, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26500795666609045, - "normalized_score": 26.50079566660904 - }, - "bbh": { - "name": "BBH", - "value": 0.29292907133609175, - "normalized_score": 2.0673206688657713 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.36634374999999997, - "normalized_score": 3.826302083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1124501329787234, - "normalized_score": 1.383348108747044 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 0, - "base_model": "ehristoforu/rmoe-v1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 11.026, - "co2_cost": 6.069017817120963 - } - }, - { - "id": "ehristoforu/rufalcon3-3b-it_float16_8cf51e5308b4d18c1d67a882b87b81cbd1a46e84_True", - "model": { - "name": "ehristoforu/rufalcon3-3b-it", - "sha": "8cf51e5308b4d18c1d67a882b87b81cbd1a46e84", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.64141899254107, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5942111375594533, - "normalized_score": 59.421113755945335 - }, - "bbh": { - "name": "BBH", - "value": 0.41554222543957625, - "normalized_score": 18.21435758381163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.38953124999999994, - "normalized_score": 10.391406249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2347905585106383, - "normalized_score": 14.976728723404253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "tiiuae/Falcon3-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.228, - "co2_cost": 0.9813632832027144 - } - }, - { - "id": "ehristoforu/ruphi-4b_float16_228fc0c406609629e54068dbb7266f5a15ee89cc_True", - "model": { - "name": "ehristoforu/ruphi-4b", - "sha": "228fc0c406609629e54068dbb7266f5a15ee89cc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 4.080739303536478, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17518185082248433, - "normalized_score": 17.518185082248436 - }, - "bbh": { - "name": "BBH", - "value": 0.29060336568338, - "normalized_score": 2.400631279750579 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35117708333333336, - "normalized_score": 3.163802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 2, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.1001005886744497 - } - }, - { - "id": "ehristoforu/testq-32b_bfloat16_0affeb22ce5bcdd33e4e931f7bb2511349c69c4b_True", - "model": { - "name": "ehristoforu/testq-32b", - "sha": "0affeb22ce5bcdd33e4e931f7bb2511349c69c4b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.53854409737458, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18759668789921852, - "normalized_score": 18.75966878992185 - }, - "bbh": { - "name": "BBH", - "value": 0.2876549792486152, - "normalized_score": 1.9328254204684499 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3714583333333333, - "normalized_score": 3.8322916666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "ehristoforu/testq-32b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 56.165, - "co2_cost": 94.46409363188756 - } - }, - { - "id": "ehristoforu/tmoe_bfloat16_884bcb67207ebba2aaa909617185a7ef6459eae0_True", - "model": { - "name": "ehristoforu/tmoe", - "sha": "884bcb67207ebba2aaa909617185a7ef6459eae0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 3.652324770801055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11930234001338672, - "normalized_score": 11.930234001338672 - }, - "bbh": { - "name": "BBH", - "value": 0.30728601408520645, - "normalized_score": 3.2013609034113073 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2231543624161074, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36990624999999994, - "normalized_score": 3.904947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11909906914893617, - "normalized_score": 2.1221187943262403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 0, - "base_model": "ehristoforu/tmoe", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 11.026, - "co2_cost": 7.344954699169198 - } - }, - { - "id": "ehristoforu/tmoe-v2_bfloat16_8d3d79d90a5f44995b0186da53c6031b6951010e_True", - "model": { - "name": "ehristoforu/tmoe-v2", - "sha": "8d3d79d90a5f44995b0186da53c6031b6951010e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 5.712206463926581, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19026959578363187, - "normalized_score": 19.026959578363186 - }, - "bbh": { - "name": "BBH", - "value": 0.2896740649804915, - "normalized_score": 2.0623568616598433 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.4150833333333333, - "normalized_score": 10.052083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11003989361702128, - "normalized_score": 1.1155437352245863 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "ehristoforu/tmoe-v2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 11.026, - "co2_cost": 7.7213718338413955 - } - }, - { - "id": "ehristoforu/trd-7b-it_float16_9cb438f907825bc2b7bb6d9ed6a8fa2693abc7ec_True", - "model": { - "name": "ehristoforu/trd-7b-it", - "sha": "9cb438f907825bc2b7bb6d9ed6a8fa2693abc7ec", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.323081412830128, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21847143357402804, - "normalized_score": 21.847143357402803 - }, - "bbh": { - "name": "BBH", - "value": 0.2990238931062931, - "normalized_score": 2.722591215675422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3794270833333333, - "normalized_score": 5.528385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11785239361702128, - "normalized_score": 1.9835992907801419 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 1, - "base_model": "ehristoforu/trd-7b-it (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.421301667057972 - } - }, - { - "id": "ehristoforu/ud-14b_float16_c80f651c121e601e50312b3376ba77a032d0bd34_True", - "model": { - "name": "ehristoforu/ud-14b", - "sha": "c80f651c121e601e50312b3376ba77a032d0bd34", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.222391948468367, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4235273518708139, - "normalized_score": 42.35273518708139 - }, - "bbh": { - "name": "BBH", - "value": 0.3323819044961654, - "normalized_score": 6.295264416110146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1903323262839879, - "normalized_score": 19.033232628398792 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23741610738255034, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.43942708333333336, - "normalized_score": 13.928385416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24152260638297873, - "normalized_score": 15.724734042553193 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "ehristoforu/ud-14b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.9387488120401515 - } - }, - { - "id": "elinas/Chronos-Gold-12B-1.0_bfloat16_cf76a4621b9dfc0c2e6d930756e6c7c9ce2b260b_True", - "model": { - "name": "elinas/Chronos-Gold-12B-1.0", - "sha": "cf76a4621b9dfc0c2e6d930756e6c7c9ce2b260b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.828167948467158, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3165656014929277, - "normalized_score": 31.656560149292766 - }, - "bbh": { - "name": "BBH", - "value": 0.5514664110708439, - "normalized_score": 35.90894700063131 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.47398958333333335, - "normalized_score": 19.415364583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.351811835106383, - "normalized_score": 27.979092789598102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-21", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 46, - "params_billions": 12.248, - "co2_cost": 3.00506212983133 - } - }, - { - "id": "ell44ot/gemma-2b-def_float16_f9f1f882322360354fbc7a71d44d9b0b9ddd87ee_False", - "model": { - "name": "ell44ot/gemma-2b-def", - "sha": "f9f1f882322360354fbc7a71d44d9b0b9ddd87ee", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GemmaModel", - "average_score": 8.12291928068411, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26930433472076315, - "normalized_score": 26.930433472076317 - }, - "bbh": { - "name": "BBH", - "value": 0.31586532094752634, - "normalized_score": 4.586419628734295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.36702083333333335, - "normalized_score": 5.3109375000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15724734042553193, - "normalized_score": 6.3608156028368805 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "ell44ot/gemma-2b-def (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.546, - "co2_cost": 0.8945953968520545 - } - }, - { - "id": "euclaise/ReMask-3B_bfloat16_e094dae96097c2bc6f758101ee269c089b65a2cf_True", - "model": { - "name": "euclaise/ReMask-3B", - "sha": "e094dae96097c2bc6f758101ee269c089b65a2cf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 7.294404589328096, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2419269759792905, - "normalized_score": 24.192697597929048 - }, - "bbh": { - "name": "BBH", - "value": 0.3516779692917367, - "normalized_score": 8.742082990875964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.33409375, - "normalized_score": 2.6617187500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13572140957446807, - "normalized_score": 3.9690455082742293 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-28", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "euclaise/ReMask-3B", - "hub_license": "cc-by-sa-4.0", - "hub_hearts": 15, - "params_billions": 2.795, - "co2_cost": 0.8936806068910378 - } - }, - { - "id": "eworojoshua/vas-01_float16_0eb818ab19c02344d853dccfe37dd459abf2ec04_True", - "model": { - "name": "eworojoshua/vas-01", - "sha": "0eb818ab19c02344d853dccfe37dd459abf2ec04", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.46642217879174, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7612479332615238, - "normalized_score": 76.1247933261524 - }, - "bbh": { - "name": "BBH", - "value": 0.5417819433732887, - "normalized_score": 34.808538150886584 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4735649546827795, - "normalized_score": 47.35649546827795 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.44323958333333335, - "normalized_score": 15.371614583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4347573138297872, - "normalized_score": 37.19525709219858 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "eworojoshua/vas-01 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3466237713459364 - } - }, - { - "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning_float16_7a425678e7770b059db7106f4c234895b975b705_False", - "model": { - "name": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", - "sha": "7a425678e7770b059db7106f4c234895b975b705", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.331991514504523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44388555698878973, - "normalized_score": 44.388555698878974 - }, - "bbh": { - "name": "BBH", - "value": 0.4273125047156003, - "normalized_score": 19.412583015522838 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.36553125000000003, - "normalized_score": 7.191406250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2886469414893617, - "normalized_score": 20.960771276595743 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1903661259480065 - } - }, - { - "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning_float16_e0d596dd855b37a444d275c9638ce7353b7ee5b6_False", - "model": { - "name": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", - "sha": "e0d596dd855b37a444d275c9638ce7353b7ee5b6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.066116427700267, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2476473534665798, - "normalized_score": 24.76473534665798 - }, - "bbh": { - "name": "BBH", - "value": 0.3292122979013761, - "normalized_score": 7.39460963380786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.33821875, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16472739361702127, - "normalized_score": 7.191932624113473 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0242398933317163 - } - }, - { - "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning_float16_7dbd1a18e98892dbff1c6a51550ded17398e8518_False", - "model": { - "name": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", - "sha": "7dbd1a18e98892dbff1c6a51550ded17398e8518", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.843149272359046, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25836336476105626, - "normalized_score": 25.836336476105622 - }, - "bbh": { - "name": "BBH", - "value": 0.3071349750892843, - "normalized_score": 3.9733526107072055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.366125, - "normalized_score": 2.9656249999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.109375, - "normalized_score": 1.041666666666666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.135, - "co2_cost": 0.6681034901288193 - } - }, - { - "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill_float16_3592b13d6df2b6090819afed0be93b374b649b8d_True", - "model": { - "name": "ewre324/ewre324-R1-SmolLM2-135M-Distill", - "sha": "3592b13d6df2b6090819afed0be93b374b649b8d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.164698963034765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16489026893088118, - "normalized_score": 16.489026893088116 - }, - "bbh": { - "name": "BBH", - "value": 0.3041695757290421, - "normalized_score": 3.3830043659713684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3409166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11336436170212766, - "normalized_score": 1.4849290780141835 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "ewre324/ewre324-R1-SmolLM2-135M-Distill (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.7058207649530425 - } - }, - { - "id": "experiment-llm/exp-3-q-r_float16_d4300d83f75f6d95fe44a18aa0099e37dcd7868a_True", - "model": { - "name": "experiment-llm/exp-3-q-r", - "sha": "d4300d83f75f6d95fe44a18aa0099e37dcd7868a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.50441044971987, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6035785050333116, - "normalized_score": 60.357850503331164 - }, - "bbh": { - "name": "BBH", - "value": 0.5397159253811645, - "normalized_score": 33.9949174008218 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27870090634441086, - "normalized_score": 27.870090634441087 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.43154166666666666, - "normalized_score": 12.142708333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43159906914893614, - "normalized_score": 36.844341016548455 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-02", - "submission_date": "2024-12-02", - "generation": 4, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.4673112319684303 - } - }, - { - "id": "facebook/opt-1.3b_float16_3f5c25d0bc631cb57ac65913f76e22c2dfb61d62_False", - "model": { - "name": "facebook/opt-1.3b", - "sha": "3f5c25d0bc631cb57ac65913f76e22c2dfb61d62", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OPTForCausalLM", - "average_score": 5.276689334204645, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23832985367713222, - "normalized_score": 23.83298536771322 - }, - "bbh": { - "name": "BBH", - "value": 0.3093947052760125, - "normalized_score": 3.6480520895226785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.342, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11070478723404255, - "normalized_score": 1.1894208037825047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "facebook/opt-1.3b", - "hub_license": "other", - "hub_hearts": 168, - "params_billions": 1.3, - "co2_cost": 0.8060095556904593 - } - }, - { - "id": "facebook/opt-30b_float16_ceea0a90ac0f6fae7c2c34bcb40477438c152546_False", - "model": { - "name": "facebook/opt-30b", - "sha": "ceea0a90ac0f6fae7c2c34bcb40477438c152546", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "OPTForCausalLM", - "average_score": 6.276874107966858, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2452991396162183, - "normalized_score": 24.52991396162183 - }, - "bbh": { - "name": "BBH", - "value": 0.30703447525623373, - "normalized_score": 3.4984293851759607 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.36041666666666666, - "normalized_score": 4.185416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1163563829787234, - "normalized_score": 1.8173758865248217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-05-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "facebook/opt-30b", - "hub_license": "other", - "hub_hearts": 133, - "params_billions": 30.0, - "co2_cost": 5.999689572535679 - } - }, - { - "id": "failspy/Llama-3-8B-Instruct-MopeyMule_bfloat16_d1cbf407efe727c6b9fc94f22d51ff4915e1856e_True", - "model": { - "name": "failspy/Llama-3-8B-Instruct-MopeyMule", - "sha": "d1cbf407efe727c6b9fc94f22d51ff4915e1856e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.638132592588441, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6750444376476638, - "normalized_score": 67.50444376476638 - }, - "bbh": { - "name": "BBH", - "value": 0.383874490132152, - "normalized_score": 13.620495859752507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35130208333333335, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17644614361702127, - "normalized_score": 8.494015957446807 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-30", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "failspy/Llama-3-8B-Instruct-MopeyMule", - "hub_license": "other", - "hub_hearts": 78, - "params_billions": 8.03, - "co2_cost": 1.646271822481612 - } - }, - { - "id": "failspy/Llama-3-8B-Instruct-abliterated_bfloat16_dd67dd055661e4cbcedb0ed2431693d9cc3be6e0_True", - "model": { - "name": "failspy/Llama-3-8B-Instruct-abliterated", - "sha": "dd67dd055661e4cbcedb0ed2431693d9cc3be6e0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.190256107675243, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5908888416069362, - "normalized_score": 59.088884160693624 - }, - "bbh": { - "name": "BBH", - "value": 0.4353752684977051, - "normalized_score": 18.86459884908717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.41158333333333336, - "normalized_score": 10.514583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2741855053191489, - "normalized_score": 19.35394503546099 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-07-03", - "generation": 0, - "base_model": "failspy/Llama-3-8B-Instruct-abliterated", - "hub_license": "llama3", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.4838118499630564 - } - }, - { - "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5_bfloat16_fc951b03d92972ab52ad9392e620eba6173526b9_True", - "model": { - "name": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", - "sha": "fc951b03d92972ab52ad9392e620eba6173526b9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.12935413704135, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7746867201248244, - "normalized_score": 77.46867201248244 - }, - "bbh": { - "name": "BBH", - "value": 0.574710022890038, - "normalized_score": 37.87133313079306 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.39818749999999997, - "normalized_score": 7.9734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44522938829787234, - "normalized_score": 38.35882092198581 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", - "hub_license": "llama3", - "hub_hearts": 43, - "params_billions": 70.554, - "co2_cost": 18.409422221815564 - } - }, - { - "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3_bfloat16_85a25be002841fe738a5267b6806473f36f86715_True", - "model": { - "name": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", - "sha": "85a25be002841fe738a5267b6806473f36f86715", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.933089775002568, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7244533393617822, - "normalized_score": 72.44533393617823 - }, - "bbh": { - "name": "BBH", - "value": 0.4924562150856957, - "normalized_score": 27.3350514750733 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.36218749999999994, - "normalized_score": 2.8401041666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3653590425531915, - "normalized_score": 29.484338061465724 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-20", - "submission_date": "2025-02-12", - "generation": 0, - "base_model": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", - "hub_license": "llama3", - "hub_hearts": 47, - "params_billions": 8.03, - "co2_cost": 0.6525271459553947 - } - }, - { - "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3_bfloat16_959b09eacf6cae85a8eb21b25e998addc89a367b_True", - "model": { - "name": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", - "sha": "959b09eacf6cae85a8eb21b25e998addc89a367b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 31.85112078051007, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6319299458769398, - "normalized_score": 63.19299458769399 - }, - "bbh": { - "name": "BBH", - "value": 0.6304799176474429, - "normalized_score": 46.73283933573803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4604166666666667, - "normalized_score": 18.518749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4399933510638298, - "normalized_score": 37.77703900709219 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", - "hub_license": "mit", - "hub_hearts": 23, - "params_billions": 13.96, - "co2_cost": 3.041962386455618 - } - }, - { - "id": "failspy/llama-3-70B-Instruct-abliterated_bfloat16_53ae9dafe8b3d163e05d75387575f8e9f43253d0_True", - "model": { - "name": "failspy/llama-3-70B-Instruct-abliterated", - "sha": "53ae9dafe8b3d163e05d75387575f8e9f43253d0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.89001866123902, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8023389052159382, - "normalized_score": 80.23389052159382 - }, - "bbh": { - "name": "BBH", - "value": 0.6464853840398571, - "normalized_score": 48.93981832466943 - }, - "math": { - "name": "MATH Level 5", - "value": 0.243202416918429, - "normalized_score": 24.3202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4127604166666667, - "normalized_score": 10.528385416666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5145445478723404, - "normalized_score": 46.06050531914893 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-07-03", - "generation": 0, - "base_model": "failspy/llama-3-70B-Instruct-abliterated", - "hub_license": "llama3", - "hub_hearts": 104, - "params_billions": 70.554, - "co2_cost": 18.748257920537103 - } - }, - { - "id": "fblgit/TheBeagle-v2beta-32B-MGS_bfloat16_56830f63e4a40378b7721ae966637b4678cc8784_False", - "model": { - "name": "fblgit/TheBeagle-v2beta-32B-MGS", - "sha": "56830f63e4a40378b7721ae966637b4678cc8784", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.642045426579536, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.518074265171966, - "normalized_score": 51.807426517196596 - }, - "bbh": { - "name": "BBH", - "value": 0.7032634749563558, - "normalized_score": 58.027976201167604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4947129909365559, - "normalized_score": 49.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.50075, - "normalized_score": 24.260416666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5915059840425532, - "normalized_score": 54.61177600472813 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-30", - "generation": 1, - "base_model": "fblgit/TheBeagle-v2beta-32B-MGS (Merge)", - "hub_license": "other", - "hub_hearts": 17, - "params_billions": 32.764, - "co2_cost": 43.4101607790295 - } - }, - { - "id": "fblgit/TheBeagle-v2beta-32B-MGS_float16_56830f63e4a40378b7721ae966637b4678cc8784_False", - "model": { - "name": "fblgit/TheBeagle-v2beta-32B-MGS", - "sha": "56830f63e4a40378b7721ae966637b4678cc8784", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.286669657817164, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4503051902285935, - "normalized_score": 45.030519022859345 - }, - "bbh": { - "name": "BBH", - "value": 0.703542441088263, - "normalized_score": 58.06602977613295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3942598187311178, - "normalized_score": 39.42598187311178 - }, - "gpqa": { - "name": "GPQA", - "value": 0.401006711409396, - "normalized_score": 20.134228187919465 - }, - "musr": { - "name": "MUSR", - "value": 0.5021145833333334, - "normalized_score": 24.497656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5910904255319149, - "normalized_score": 54.565602836879435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "fblgit/TheBeagle-v2beta-32B-MGS (Merge)", - "hub_license": "other", - "hub_hearts": 17, - "params_billions": 32.764, - "co2_cost": 11.366068492834712 - } - }, - { - "id": "fblgit/UNA-SimpleSmaug-34b-v1beta_bfloat16_4b62fccfc7e44c0a02c11a5279d98fafa6b922ba_True", - "model": { - "name": "fblgit/UNA-SimpleSmaug-34b-v1beta", - "sha": "4b62fccfc7e44c0a02c11a5279d98fafa6b922ba", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.2920916316972, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45562551806983254, - "normalized_score": 45.56255180698325 - }, - "bbh": { - "name": "BBH", - "value": 0.5286654104993475, - "normalized_score": 32.775788922324494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4255625, - "normalized_score": 11.961979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4539561170212766, - "normalized_score": 39.328457446808514 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-05", - "submission_date": "2024-06-30", - "generation": 2, - "base_model": "jondurbin/bagel-34b-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 34.389, - "co2_cost": 6.328932494608793 - } - }, - { - "id": "fblgit/UNA-TheBeagle-7b-v1_bfloat16_866d3ee19f983728e21a624f8a27574960073f27_False", - "model": { - "name": "fblgit/UNA-TheBeagle-7b-v1", - "sha": "866d3ee19f983728e21a624f8a27574960073f27", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.646170777958677, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36887236975669, - "normalized_score": 36.887236975669 - }, - "bbh": { - "name": "BBH", - "value": 0.5028691097522866, - "normalized_score": 30.173396964633465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4564375, - "normalized_score": 16.088020833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3019448138297872, - "normalized_score": 22.43831264775413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-09", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "fblgit/UNA-TheBeagle-7b-v1", - "hub_license": "cc-by-nc-nd-4.0", - "hub_hearts": 37, - "params_billions": 7.242, - "co2_cost": 1.1212776454447984 - } - }, - { - "id": "fblgit/UNA-ThePitbull-21.4B-v2_bfloat16_f12aac93ae9c852550a16816e16116c4f8e7dec0_True", - "model": { - "name": "fblgit/UNA-ThePitbull-21.4B-v2", - "sha": "f12aac93ae9c852550a16816e16116c4f8e7dec0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.0265687976468, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3790387283518841, - "normalized_score": 37.90387283518841 - }, - "bbh": { - "name": "BBH", - "value": 0.635038821016254, - "normalized_score": 46.78807384004312 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.3921666666666666, - "normalized_score": 6.420833333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3515625, - "normalized_score": 27.95138888888889 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "fblgit/UNA-ThePitbull-21.4B-v2", - "hub_license": "afl-3.0", - "hub_hearts": 16, - "params_billions": 21.421, - "co2_cost": 4.596827555203435 - } - }, - { - "id": "fblgit/cybertron-v4-qw7B-MGS_bfloat16_ea2aaf4f4000190235722a9ad4f5cd9e9091a64e_False", - "model": { - "name": "fblgit/cybertron-v4-qw7B-MGS", - "sha": "ea2aaf4f4000190235722a9ad4f5cd9e9091a64e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.40351871985633, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6263846593704703, - "normalized_score": 62.63846593704703 - }, - "bbh": { - "name": "BBH", - "value": 0.5591772533435835, - "normalized_score": 37.04162311029608 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34894259818731116, - "normalized_score": 34.894259818731115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.43709375, - "normalized_score": 13.203385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44730718085106386, - "normalized_score": 38.589686761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "fblgit/cybertron-v4-qw7B-MGS (Merge)", - "hub_license": "other", - "hub_hearts": 15, - "params_billions": 7.616, - "co2_cost": 2.493477162325461 - } - }, - { - "id": "fblgit/cybertron-v4-qw7B-UNAMGS_bfloat16_ce9b1e991908f5b89f63a2e3212cf9a066906ed2_False", - "model": { - "name": "fblgit/cybertron-v4-qw7B-UNAMGS", - "sha": "ce9b1e991908f5b89f63a2e3212cf9a066906ed2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.05949412905735, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6090240561709597, - "normalized_score": 60.902405617095965 - }, - "bbh": { - "name": "BBH", - "value": 0.5642509108139038, - "normalized_score": 37.70717271699329 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3731117824773414, - "normalized_score": 37.31117824773414 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.4343333333333333, - "normalized_score": 12.691666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4500498670212766, - "normalized_score": 38.894429669030735 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "fblgit/cybertron-v4-qw7B-UNAMGS (Merge)", - "hub_license": "other", - "hub_hearts": 9, - "params_billions": 7.616, - "co2_cost": 1.9906577828823062 - } - }, - { - "id": "fblgit/juanako-7b-UNA_bfloat16_b8ac85b603d5ee1ac619b2e1d0b3bb86c4eecb0c_False", - "model": { - "name": "fblgit/juanako-7b-UNA", - "sha": "b8ac85b603d5ee1ac619b2e1d0b3bb86c4eecb0c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.863068015988574, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4837276204914073, - "normalized_score": 48.37276204914072 - }, - "bbh": { - "name": "BBH", - "value": 0.507001145736535, - "normalized_score": 30.415072015961297 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.46449999999999997, - "normalized_score": 17.162499999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.277094414893617, - "normalized_score": 19.67715721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-11-27", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "fblgit/juanako-7b-UNA", - "hub_license": "apache-2.0", - "hub_hearts": 23, - "params_billions": 7.242, - "co2_cost": 1.2635809849796131 - } - }, - { - "id": "fblgit/miniclaus-qw1.5B-UNAMGS_bfloat16_de590536ba82ffb7b4001dffb5f8b60d2087c319_False", - "model": { - "name": "fblgit/miniclaus-qw1.5B-UNAMGS", - "sha": "de590536ba82ffb7b4001dffb5f8b60d2087c319", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.04510204797917, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3348005514257725, - "normalized_score": 33.48005514257725 - }, - "bbh": { - "name": "BBH", - "value": 0.4238588294007628, - "normalized_score": 18.562863710456668 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.42934374999999997, - "normalized_score": 12.234635416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2937167553191489, - "normalized_score": 21.52408392434988 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-01", - "generation": 2, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 1.777, - "co2_cost": 1.168255560000145 - } - }, - { - "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO_bfloat16_16df845e8a3e6160ea185891b0c19f9c951eaea7_False", - "model": { - "name": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", - "sha": "16df845e8a3e6160ea185891b0c19f9c951eaea7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.440457138521808, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3518364605912313, - "normalized_score": 35.18364605912313 - }, - "bbh": { - "name": "BBH", - "value": 0.423443453814005, - "normalized_score": 18.62661642012632 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.42543749999999997, - "normalized_score": 11.813020833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2945478723404255, - "normalized_score": 21.616430260047277 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 3, - "base_model": "Qwen/Qwen2.5-1.5B", - "hub_license": "other", - "hub_hearts": 5, - "params_billions": 1.544, - "co2_cost": 1.7445232139324949 - } - }, - { - "id": "fblgit/pancho-v1-qw25-3B-UNAMGS_bfloat16_01143501cbc2c90961be5397c6945c6789815a60_False", - "model": { - "name": "fblgit/pancho-v1-qw25-3B-UNAMGS", - "sha": "01143501cbc2c90961be5397c6945c6789815a60", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.860634894188152, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.536134124123991, - "normalized_score": 53.6134124123991 - }, - "bbh": { - "name": "BBH", - "value": 0.49258278193390775, - "normalized_score": 28.66965021792132 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15709969788519637, - "normalized_score": 15.709969788519636 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4027395833333333, - "normalized_score": 8.17578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3765791223404255, - "normalized_score": 30.731013593380606 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-04", - "submission_date": "2024-11-12", - "generation": 2, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 3.397, - "co2_cost": 2.353076156225762 - } - }, - { - "id": "fblgit/una-cybertron-7b-v2-bf16_bfloat16_7ab101a153740aec39e95ec02831c56f4eab7910_True", - "model": { - "name": "fblgit/una-cybertron-7b-v2-bf16", - "sha": "7ab101a153740aec39e95ec02831c56f4eab7910", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.217324719799368, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47371086494944525, - "normalized_score": 47.37108649494452 - }, - "bbh": { - "name": "BBH", - "value": 0.3973388920486269, - "normalized_score": 14.966964848379982 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4473229166666666, - "normalized_score": 14.482031250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2442652925531915, - "normalized_score": 16.02947695035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-02", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "fblgit/una-cybertron-7b-v2-bf16", - "hub_license": "apache-2.0", - "hub_hearts": 116, - "params_billions": 7.242, - "co2_cost": 1.2684111497211332 - } - }, - { - "id": "fhai50032/RolePlayLake-7B_float16_4b1a6477bbdf6ce4a384d7e7ec1d99641f142bde_False", - "model": { - "name": "fhai50032/RolePlayLake-7B", - "sha": "4b1a6477bbdf6ce4a384d7e7ec1d99641f142bde", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.754717527935792, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5056594280952318, - "normalized_score": 50.56594280952318 - }, - "bbh": { - "name": "BBH", - "value": 0.5252170095233862, - "normalized_score": 33.47958591100428 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4459270833333333, - "normalized_score": 14.07421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3159906914893617, - "normalized_score": 23.99896572104019 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-29", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "fhai50032/RolePlayLake-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 7.242, - "co2_cost": 0.8652534422583942 - } - }, - { - "id": "fhai50032/Unaligned-Thinker-PHI-4_float16_8f394ab92e3605d0e352585450ed97d2c4074d45_False", - "model": { - "name": "fhai50032/Unaligned-Thinker-PHI-4", - "sha": "8f394ab92e3605d0e352585450ed97d2c4074d45", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.899268178294438, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.056254072527560206, - "normalized_score": 5.625407252756021 - }, - "bbh": { - "name": "BBH", - "value": 0.6642576780946753, - "normalized_score": 51.92504876544175 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33534743202416917, - "normalized_score": 33.53474320241692 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4678541666666667, - "normalized_score": 18.78177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5147107712765957, - "normalized_score": 46.07897458628841 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-17", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.804351802825741 - } - }, - { - "id": "flammenai/Llama3.1-Flammades-70B_bfloat16_48909a734460e667e3a7e91bd25f124ec3b2ba74_True", - "model": { - "name": "flammenai/Llama3.1-Flammades-70B", - "sha": "48909a734460e667e3a7e91bd25f124ec3b2ba74", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.994120533008164, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7058438277104748, - "normalized_score": 70.58438277104749 - }, - "bbh": { - "name": "BBH", - "value": 0.6659721866694542, - "normalized_score": 52.54794346693766 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20921450151057402, - "normalized_score": 20.921450151057403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.48705208333333333, - "normalized_score": 22.348177083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47523271276595747, - "normalized_score": 41.692523640661946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-13", - "generation": 1, - "base_model": "flammenai/Llama3.1-Flammades-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 20.569665756617702 - } - }, - { - "id": "flammenai/Mahou-1.2a-llama3-8B_bfloat16_3318b6f5f1839644bee287a3e5390f3e9f565a9e_False", - "model": { - "name": "flammenai/Mahou-1.2a-llama3-8B", - "sha": "3318b6f5f1839644bee287a3e5390f3e9f565a9e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.791261651208817, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.50925655039739, - "normalized_score": 50.925655039739 - }, - "bbh": { - "name": "BBH", - "value": 0.5093660540433169, - "normalized_score": 28.972587655292433 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.38466666666666666, - "normalized_score": 6.016666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38173204787234044, - "normalized_score": 31.303560874704488 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-25", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "flammenai/Mahou-1.2a-llama3-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.864824470117286 - } - }, - { - "id": "flammenai/Mahou-1.2a-mistral-7B_bfloat16_d45f61cca04da0c3359573102853fca1a0d3b252_False", - "model": { - "name": "flammenai/Mahou-1.2a-mistral-7B", - "sha": "d45f61cca04da0c3359573102853fca1a0d3b252", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.578990668864712, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4552010886669592, - "normalized_score": 45.52010886669592 - }, - "bbh": { - "name": "BBH", - "value": 0.5118111474458115, - "normalized_score": 31.166750037107487 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.38962500000000005, - "normalized_score": 6.969791666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31632313829787234, - "normalized_score": 24.03590425531915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-18", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "flammenai/Mahou-1.2a-mistral-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.242, - "co2_cost": 2.414893302171176 - } - }, - { - "id": "flammenai/Mahou-1.5-llama3.1-70B_bfloat16_49f45cc4c21e2ba7ed5c5e71f90ffd0bd9169e2d_True", - "model": { - "name": "flammenai/Mahou-1.5-llama3.1-70B", - "sha": "49f45cc4c21e2ba7ed5c5e71f90ffd0bd9169e2d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.34491319969544, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7146615424850509, - "normalized_score": 71.46615424850509 - }, - "bbh": { - "name": "BBH", - "value": 0.6650860641288713, - "normalized_score": 52.36957740630965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.4950208333333333, - "normalized_score": 23.7109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47490026595744683, - "normalized_score": 41.65558510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "flammenai/Mahou-1.5-llama3.1-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 7, - "params_billions": 70.554, - "co2_cost": 20.519984535298892 - } - }, - { - "id": "flammenai/Mahou-1.5-mistral-nemo-12B_bfloat16_852561e74f1785bf7225bb28395db1fd9431fe31_True", - "model": { - "name": "flammenai/Mahou-1.5-mistral-nemo-12B", - "sha": "852561e74f1785bf7225bb28395db1fd9431fe31", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.88532589921436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6751441730164851, - "normalized_score": 67.5144173016485 - }, - "bbh": { - "name": "BBH", - "value": 0.5522361927910235, - "normalized_score": 36.260510188013406 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.4520416666666667, - "normalized_score": 16.471874999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3602061170212766, - "normalized_score": 28.911790780141843 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "flammenai/Mahou-1.5-mistral-nemo-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 12.248, - "co2_cost": 2.9652639935554133 - } - }, - { - "id": "flammenai/flammen15-gutenberg-DPO-v1-7B_bfloat16_550cd9548cba1265cb1771c85ebe498789fdecb5_False", - "model": { - "name": "flammenai/flammen15-gutenberg-DPO-v1-7B", - "sha": "550cd9548cba1265cb1771c85ebe498789fdecb5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.612698429198762, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47980580415519714, - "normalized_score": 47.98058041551971 - }, - "bbh": { - "name": "BBH", - "value": 0.5202983979716951, - "normalized_score": 32.66511308584827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4293125, - "normalized_score": 12.530729166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3185671542553192, - "normalized_score": 24.28523936170213 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-05", - "submission_date": "2024-07-10", - "generation": 1, - "base_model": "flammenai/flammen15-gutenberg-DPO-v1-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.2550592138285477 - } - }, - { - "id": "fluently-lm/FluentlyLM-Prinum_bfloat16_3f5e29069004437ce567183dfe2eb1fce262fede_True", - "model": { - "name": "fluently-lm/FluentlyLM-Prinum", - "sha": "3f5e29069004437ce567183dfe2eb1fce262fede", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.21693789116021, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.809033364805383, - "normalized_score": 80.9033364805383 - }, - "bbh": { - "name": "BBH", - "value": 0.7143813967889198, - "normalized_score": 59.482203418427986 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5400302114803626, - "normalized_score": 54.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.44714583333333335, - "normalized_score": 17.259895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5807845744680851, - "normalized_score": 53.42050827423167 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-16", - "generation": 0, - "base_model": "fluently-lm/FluentlyLM-Prinum", - "hub_license": "mit", - "hub_hearts": 23, - "params_billions": 32.764, - "co2_cost": 21.252760892194704 - } - }, - { - "id": "fluently-lm/Llama-TI-8B_bfloat16_2ab7eb6daca1c850cc65cec04f4d374b1041d824_False", - "model": { - "name": "fluently-lm/Llama-TI-8B", - "sha": "2ab7eb6daca1c850cc65cec04f4d374b1041d824", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.062119760175154, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28803906966847964, - "normalized_score": 28.80390696684797 - }, - "bbh": { - "name": "BBH", - "value": 0.520085504155627, - "normalized_score": 31.984332678202136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19637462235649547, - "normalized_score": 19.637462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4102708333333333, - "normalized_score": 12.683854166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.343999335106383, - "normalized_score": 27.111037234042552 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "meta-llama/Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3607848501476378 - } - }, - { - "id": "fluently-lm/Llama-TI-8B-Instruct_bfloat16_e93cf77978d3a1589de67df226b62383c460c15c_True", - "model": { - "name": "fluently-lm/Llama-TI-8B-Instruct", - "sha": "e93cf77978d3a1589de67df226b62383c460c15c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.67161684095501, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7716392505219485, - "normalized_score": 77.16392505219484 - }, - "bbh": { - "name": "BBH", - "value": 0.5252143041749421, - "normalized_score": 32.26686716202457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23036253776435045, - "normalized_score": 23.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.38134375000000004, - "normalized_score": 9.234635416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37258976063829785, - "normalized_score": 30.287751182033094 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2025-01-16", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3940321957018207 - } - }, - { - "id": "fluently-sets/FalconThink3-10B-IT_float16_1a352e2efc0369ebd05a143d24e37124a2239b36_True", - "model": { - "name": "fluently-sets/FalconThink3-10B-IT", - "sha": "1a352e2efc0369ebd05a143d24e37124a2239b36", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.620674425220905, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7326216660682544, - "normalized_score": 73.26216660682545 - }, - "bbh": { - "name": "BBH", - "value": 0.620016981648187, - "normalized_score": 44.975804333935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24471299093655588, - "normalized_score": 24.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.44788541666666665, - "normalized_score": 15.552343749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4434840425531915, - "normalized_score": 38.16489361702128 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 2, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 10.306, - "co2_cost": 1.7468140421406724 - } - }, - { - "id": "fluently-sets/reasoning-1-1k-demo_float16_483acbedc44430c2fcc2552a8f1121457c43035b_True", - "model": { - "name": "fluently-sets/reasoning-1-1k-demo", - "sha": "483acbedc44430c2fcc2552a8f1121457c43035b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.341663670523, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7524800861713586, - "normalized_score": 75.24800861713587 - }, - "bbh": { - "name": "BBH", - "value": 0.6396692351083745, - "normalized_score": 48.94403407214196 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4282477341389728, - "normalized_score": 42.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.4060625, - "normalized_score": 9.691145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4773936170212766, - "normalized_score": 41.93262411347518 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.287029190192277 - } - }, - { - "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp_bfloat16_074a3c6455afd78e7ed02e5c8aa176cb02ce71c9_True", - "model": { - "name": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", - "sha": "074a3c6455afd78e7ed02e5c8aa176cb02ce71c9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.330152301711151, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16139288199754429, - "normalized_score": 16.13928819975443 - }, - "bbh": { - "name": "BBH", - "value": 0.29763925404210967, - "normalized_score": 1.9727619262732075 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.4219375, - "normalized_score": 11.3421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11735372340425532, - "normalized_score": 1.9281914893617011 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6999443901946086 - } - }, - { - "id": "formulae/mita-elite-v1.1-7b-2-25-2025_bfloat16_8a144b6296deed37005fd5e8520e76c5ef740d25_True", - "model": { - "name": "formulae/mita-elite-v1.1-7b-2-25-2025", - "sha": "8a144b6296deed37005fd5e8520e76c5ef740d25", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 2.7660245822572627, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1249728498162653, - "normalized_score": 12.49728498162653 - }, - "bbh": { - "name": "BBH", - "value": 0.28673660666639783, - "normalized_score": 1.2532101774016817 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3487291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10979055851063829, - "normalized_score": 1.087839834515365 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-elite-v1.1-7b-2-25-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6972605249137397 - } - }, - { - "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025_bfloat16_6d7680a5ae3a6ac96464856180671e95051a814e_True", - "model": { - "name": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", - "sha": "6d7680a5ae3a6ac96464856180671e95051a814e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.1775097815514886, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14108454456397912, - "normalized_score": 14.108454456397915 - }, - "bbh": { - "name": "BBH", - "value": 0.292375183445424, - "normalized_score": 1.9012033109847029 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.35409375000000004, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11012300531914894, - "normalized_score": 1.124778368794326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6845783503955575 - } - }, - { - "id": "formulae/mita-elite-v1.2-7b-2-26-2025_bfloat16_813303bd136b6fbdf7282470e131a68992d3b7f8_True", - "model": { - "name": "formulae/mita-elite-v1.2-7b-2-26-2025", - "sha": "813303bd136b6fbdf7282470e131a68992d3b7f8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.726216814779033, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14800396281865452, - "normalized_score": 14.800396281865451 - }, - "bbh": { - "name": "BBH", - "value": 0.29300480737441686, - "normalized_score": 1.7364263031282199 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.4286666666666667, - "normalized_score": 12.283333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1186003989361702, - "normalized_score": 2.0667109929078 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-elite-v1.2-7b-2-26-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6878110636727482 - } - }, - { - "id": "formulae/mita-gen3-7b-2-26-2025_bfloat16_ce4eeafb6839c3230f892b9f3328f1a5b1dcfe2d_True", - "model": { - "name": "formulae/mita-gen3-7b-2-26-2025", - "sha": "ce4eeafb6839c3230f892b9f3328f1a5b1dcfe2d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.370051339073015, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1964144026737944, - "normalized_score": 19.64144026737944 - }, - "bbh": { - "name": "BBH", - "value": 0.2915705776174771, - "normalized_score": 2.4970370370370367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3912083333333333, - "normalized_score": 6.467708333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11236702127659574, - "normalized_score": 1.374113475177304 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-gen3-7b-2-26-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6890507058814883 - } - }, - { - "id": "formulae/mita-gen3-v1.2-7b-2-26-2025_bfloat16_2ca9b92a98b1bc0873bb48f97e488955e5d453b1_True", - "model": { - "name": "formulae/mita-gen3-v1.2-7b-2-26-2025", - "sha": "2ca9b92a98b1bc0873bb48f97e488955e5d453b1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.4842868982669595, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2043577707150361, - "normalized_score": 20.43577707150361 - }, - "bbh": { - "name": "BBH", - "value": 0.30577476935056, - "normalized_score": 2.9759798497580423 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.38999999999999996, - "normalized_score": 6.6166666666666645 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-gen3-v1.2-7b-2-26-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6893794297608428 - } - }, - { - "id": "formulae/mita-math-v2.3-2-25-2025_bfloat16_ce5524268d5d97c766f1ed8675a5ae3b6137f57f_True", - "model": { - "name": "formulae/mita-math-v2.3-2-25-2025", - "sha": "ce5524268d5d97c766f1ed8675a5ae3b6137f57f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.592857717077292, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13733781920858879, - "normalized_score": 13.733781920858878 - }, - "bbh": { - "name": "BBH", - "value": 0.2949403673764691, - "normalized_score": 2.483286518149532 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.36975, - "normalized_score": 3.9187499999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11178523936170212, - "normalized_score": 1.309471040189124 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "formulae/mita-math-v2.3-2-25-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6780721044617208 - } - }, - { - "id": "formulae/mita-v1-7b_float16_bc7f2503c0f365135dde256aaab0f3034b5141a4_False", - "model": { - "name": "formulae/mita-v1-7b", - "sha": "bc7f2503c0f365135dde256aaab0f3034b5141a4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.852620472389429, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19723888172271792, - "normalized_score": 19.723888172271792 - }, - "bbh": { - "name": "BBH", - "value": 0.3003216459152819, - "normalized_score": 2.731523677549061 - }, - "math": { - "name": "MATH Level 5", - "value": 0.002265861027190332, - "normalized_score": 0.2265861027190332 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.41520833333333335, - "normalized_score": 10.801041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "formulae/mita-v1-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6772915173741988 - } - }, - { - "id": "formulae/mita-v1.1-7b-2-24-2025_bfloat16_14d93a482020ed18bd43a886d250d5abc0eebff6_False", - "model": { - "name": "formulae/mita-v1.1-7b-2-24-2025", - "sha": "14d93a482020ed18bd43a886d250d5abc0eebff6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.48261068378812, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34122018466557624, - "normalized_score": 34.12201846655762 - }, - "bbh": { - "name": "BBH", - "value": 0.5442430910797442, - "normalized_score": 35.44089953030106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4350453172205438, - "normalized_score": 43.50453172205438 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.45569791666666665, - "normalized_score": 16.062239583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4523769946808511, - "normalized_score": 39.15299940898345 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "formulae/mita-v1.1-7b-2-24-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.666018893029639 - } - }, - { - "id": "formulae/mita-v1.2-7b-2-24-2025_bfloat16_324ed6a142141697773e3d627365a6c297d6ae58_False", - "model": { - "name": "formulae/mita-v1.2-7b-2-24-2025", - "sha": "324ed6a142141697773e3d627365a6c297d6ae58", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.86325007402709, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.256415200556745, - "normalized_score": 25.6415200556745 - }, - "bbh": { - "name": "BBH", - "value": 0.4919464940215105, - "normalized_score": 28.413176799944807 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4879154078549849, - "normalized_score": 48.79154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4343958333333333, - "normalized_score": 12.632812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33585438829787234, - "normalized_score": 26.206043144208035 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "formulae/mita-v1.2-7b-2-24-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.637930641049308 - } - }, - { - "id": "frameai/Loxa-4B_float16_502156f3d50b94d48a7a7c0569c8c0b7492ff02a_False", - "model": { - "name": "frameai/Loxa-4B", - "sha": "502156f3d50b94d48a7a7c0569c8c0b7492ff02a", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.545787467583636, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47648350820268, - "normalized_score": 47.648350820268 - }, - "bbh": { - "name": "BBH", - "value": 0.42171373309002896, - "normalized_score": 18.307902854016177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.33765625, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28016954787234044, - "normalized_score": 20.01883865248227 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "frameai/Loxa-4B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.018, - "co2_cost": 1.5449268287887865 - } - }, - { - "id": "freewheelin/free-evo-qwen72b-v0.8-re_float16_24e301d8fbef8ada12be42156b01c827ff594962_False", - "model": { - "name": "freewheelin/free-evo-qwen72b-v0.8-re", - "sha": "24e301d8fbef8ada12be42156b01c827ff594962", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.4749309635116, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.533086654521115, - "normalized_score": 53.30866545211151 - }, - "bbh": { - "name": "BBH", - "value": 0.6127477065378042, - "normalized_score": 45.31740264996691 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.4871666666666667, - "normalized_score": 20.962499999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4870345744680851, - "normalized_score": 43.00384160756501 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-02", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "freewheelin/free-evo-qwen72b-v0.8-re", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 72.288, - "co2_cost": 23.57958120720033 - } - }, - { - "id": "freewheelin/free-solar-evo-v0.1_float16_233efd607ae0abbd7b46eded2ee7889892b7bdbb_True", - "model": { - "name": "freewheelin/free-solar-evo-v0.1", - "sha": "233efd607ae0abbd7b46eded2ee7889892b7bdbb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.42145204334013, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20500715878313985, - "normalized_score": 20.500715878313983 - }, - "bbh": { - "name": "BBH", - "value": 0.4502211109638701, - "normalized_score": 22.635182738331654 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.4945833333333334, - "normalized_score": 22.256249999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3414228723404255, - "normalized_score": 26.824763593380613 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-18", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "freewheelin/free-solar-evo-v0.1", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 10.732, - "co2_cost": 1.6022221103142726 - } - }, - { - "id": "freewheelin/free-solar-evo-v0.11_float16_17fc24a557bd3c3836abc9f6a367c803cba0cccd_True", - "model": { - "name": "freewheelin/free-solar-evo-v0.11", - "sha": "17fc24a557bd3c3836abc9f6a367c803cba0cccd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.77976316256584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20265894493277836, - "normalized_score": 20.265894493277834 - }, - "bbh": { - "name": "BBH", - "value": 0.4545155032474882, - "normalized_score": 23.18242496978891 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.5052187499999999, - "normalized_score": 24.285677083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34674202127659576, - "normalized_score": 27.41578014184397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-24", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "freewheelin/free-solar-evo-v0.11", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.6270039573765567 - } - }, - { - "id": "freewheelin/free-solar-evo-v0.13_float16_2a7eb72f84c54898630f9db470eee0f936a64396_True", - "model": { - "name": "freewheelin/free-solar-evo-v0.13", - "sha": "2a7eb72f84c54898630f9db470eee0f936a64396", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.40590132423633, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2320598234905606, - "normalized_score": 23.20598234905606 - }, - "bbh": { - "name": "BBH", - "value": 0.4554839670962904, - "normalized_score": 23.35420388572778 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.50515625, - "normalized_score": 24.077864583333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34699135638297873, - "normalized_score": 27.44348404255319 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-28", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "freewheelin/free-solar-evo-v0.13", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 10.732, - "co2_cost": 1.6319117694403533 - } - }, - { - "id": "fulim/FineLlama-3.1-8B_float16_a0e5599180fe810c2be5310196d07d1619b6f8e7_False", - "model": { - "name": "fulim/FineLlama-3.1-8B", - "sha": "a0e5599180fe810c2be5310196d07d1619b6f8e7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.250843761720356, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14388267574480157, - "normalized_score": 14.388267574480157 - }, - "bbh": { - "name": "BBH", - "value": 0.456920741562608, - "normalized_score": 22.46259684566392 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.38673958333333336, - "normalized_score": 8.109114583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31673869680851063, - "normalized_score": 24.082077423167846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-15", - "submission_date": "2024-12-15", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.591880040179111 - } - }, - { - "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval_bfloat16_ac5d711adc05ccfe1b1b912d5561d98f6afeeb40_True", - "model": { - "name": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", - "sha": "ac5d711adc05ccfe1b1b912d5561d98f6afeeb40", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.399069361603981, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23058595637353335, - "normalized_score": 23.058595637353335 - }, - "bbh": { - "name": "BBH", - "value": 0.313843378282092, - "normalized_score": 4.50167515878636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.33276041666666667, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11560837765957446, - "normalized_score": 1.7342641843971618 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-11", - "generation": 2, - "base_model": "HuggingFaceTB/SmolLM-1.7B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.711, - "co2_cost": 0.26949076825136253 - } - }, - { - "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA_float16_6b0271a98b8875a65972ed54b0d636d8236ea60b_False", - "model": { - "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", - "sha": "6b0271a98b8875a65972ed54b0d636d8236ea60b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.108403733798744, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40094615619888563, - "normalized_score": 40.094615619888565 - }, - "bbh": { - "name": "BBH", - "value": 0.3984844272016949, - "normalized_score": 15.276579446085892 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.36504166666666665, - "normalized_score": 3.463541666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16539228723404256, - "normalized_score": 7.265809692671395 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.6913479143926415 - } - }, - { - "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES_float16_80569e49b5aba960a5cd91281dd9eef92aeff9a3_True", - "model": { - "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", - "sha": "80569e49b5aba960a5cd91281dd9eef92aeff9a3", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.99904174519205, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45505148561372716, - "normalized_score": 45.50514856137272 - }, - "bbh": { - "name": "BBH", - "value": 0.5043660783243713, - "normalized_score": 28.91423515333936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.37375, - "normalized_score": 6.58541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36785239361702127, - "normalized_score": 29.76137706855792 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.9227136711196462 - } - }, - { - "id": "gbueno86/Brinebreath-Llama-3.1-70B_bfloat16_c508ecf356167e8c498c6fa3937ba30a82208983_True", - "model": { - "name": "gbueno86/Brinebreath-Llama-3.1-70B", - "sha": "c508ecf356167e8c498c6fa3937ba30a82208983", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.25499179659365, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5532952565858589, - "normalized_score": 55.32952565858589 - }, - "bbh": { - "name": "BBH", - "value": 0.6880562247706813, - "normalized_score": 55.46361848802468 - }, - "math": { - "name": "MATH Level 5", - "value": 0.297583081570997, - "normalized_score": 29.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.45406250000000004, - "normalized_score": 17.49114583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5196143617021277, - "normalized_score": 46.62381796690308 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-08-29", - "generation": 1, - "base_model": "gbueno86/Brinebreath-Llama-3.1-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 21.119508800657457 - } - }, - { - "id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b_bfloat16_2d73b7e1c7157df482555944d6a6b1362bc6c3c5_True", - "model": { - "name": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", - "sha": "2d73b7e1c7157df482555944d6a6b1362bc6c3c5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 38.69613307300818, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8071849359698933, - "normalized_score": 80.71849359698932 - }, - "bbh": { - "name": "BBH", - "value": 0.6674314931312052, - "normalized_score": 51.50838639894525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2938066465256798, - "normalized_score": 29.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.43682291666666667, - "normalized_score": 15.002864583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5074800531914894, - "normalized_score": 45.27556146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-24", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 70.554, - "co2_cost": 21.804586425941253 - } - }, - { - "id": "ghost-x/ghost-8b-beta-1608_bfloat16_6d1b3853aab774af5a4db21ff9d5764918fb48f5_True", - "model": { - "name": "ghost-x/ghost-8b-beta-1608", - "sha": "6d1b3853aab774af5a4db21ff9d5764918fb48f5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.04724368250191, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42727407722620425, - "normalized_score": 42.72740772262043 - }, - "bbh": { - "name": "BBH", - "value": 0.45165496100352914, - "normalized_score": 23.463963859654836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.35158333333333336, - "normalized_score": 1.58125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2839926861702128, - "normalized_score": 20.44363179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-18", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "ghost-x/ghost-8b-beta", - "hub_license": "other", - "hub_hearts": 31, - "params_billions": 8.03, - "co2_cost": 1.6978621748692648 - } - }, - { - "id": "glaiveai/Reflection-Llama-3.1-70B_bfloat16_086bd2658e00345808b31758ebb8f7e2c6d9897c_True", - "model": { - "name": "glaiveai/Reflection-Llama-3.1-70B", - "sha": "086bd2658e00345808b31758ebb8f7e2c6d9897c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.5194783867829, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5990571683134085, - "normalized_score": 59.90571683134084 - }, - "bbh": { - "name": "BBH", - "value": 0.5681010035620444, - "normalized_score": 37.96048632437056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2756797583081571, - "normalized_score": 27.567975830815712 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.43803125000000004, - "normalized_score": 13.720572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.6341422872340425, - "normalized_score": 59.349143026004725 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 69.5, - "co2_cost": 50.48755279544398 - } - }, - { - "id": "gmonsoon/SahabatAI-Llama-11B-Test_bfloat16_f6340b95d6e6cf766b6de29d36ee0db373ef175b_False", - "model": { - "name": "gmonsoon/SahabatAI-Llama-11B-Test", - "sha": "f6340b95d6e6cf766b6de29d36ee0db373ef175b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.26561908313948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33757319467900726, - "normalized_score": 33.75731946790073 - }, - "bbh": { - "name": "BBH", - "value": 0.4727584153058988, - "normalized_score": 24.457264432236542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.40013541666666663, - "normalized_score": 7.7835937500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3182347074468085, - "normalized_score": 24.248300827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "gmonsoon/SahabatAI-Llama-11B-Test (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 11.52, - "co2_cost": 2.0999437357356396 - } - }, - { - "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1_bfloat16_2f7daa8eb5ad216ce9ebcd70dc77e5b44fb977b0_True", - "model": { - "name": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", - "sha": "2f7daa8eb5ad216ce9ebcd70dc77e5b44fb977b0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.29986493129456, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41628323958208663, - "normalized_score": 41.62832395820867 - }, - "bbh": { - "name": "BBH", - "value": 0.4508834027881236, - "normalized_score": 23.64009974170933 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3753958333333333, - "normalized_score": 4.557812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3107546542553192, - "normalized_score": 23.417183806146575 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-19", - "generation": 1, - "base_model": "gmonsoon/SahabatAI-MediChatIndo-8B-v1 (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3534438936956568 - } - }, - { - "id": "gmonsoon/SahabatAI-Rebase-8B-Test_bfloat16_aef1b4c94595f3ef110d3d69724828a2fb416b5d_True", - "model": { - "name": "gmonsoon/SahabatAI-Rebase-8B-Test", - "sha": "aef1b4c94595f3ef110d3d69724828a2fb416b5d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.51679147976689, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5156263159527831, - "normalized_score": 51.562631595278305 - }, - "bbh": { - "name": "BBH", - "value": 0.522960549734047, - "normalized_score": 32.00222111760954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.41328125, - "normalized_score": 11.426822916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3663563829787234, - "normalized_score": 29.595153664302604 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "gmonsoon/SahabatAI-Rebase-8B-Test (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3043100372014074 - } - }, - { - "id": "gmonsoon/StockSeaLLMs-7B-v1_float16_2431fe5e4a3f63984c2936cf1cf68b3c7172cc20_True", - "model": { - "name": "gmonsoon/StockSeaLLMs-7B-v1", - "sha": "2431fe5e4a3f63984c2936cf1cf68b3c7172cc20", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.093450531516652, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4599218961245052, - "normalized_score": 45.99218961245052 - }, - "bbh": { - "name": "BBH", - "value": 0.5271087932535433, - "normalized_score": 34.01262496222566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19637462235649547, - "normalized_score": 19.637462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.421375, - "normalized_score": 11.071875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39519614361702127, - "normalized_score": 32.79957151300236 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-20", - "generation": 1, - "base_model": "gmonsoon/StockSeaLLMs-7B-v1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.393780624954565 - } - }, - { - "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES_bfloat16_43296081051afe5d7a426b86a6d73104efab440b_True", - "model": { - "name": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", - "sha": "43296081051afe5d7a426b86a6d73104efab440b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.8045690236527, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7377923908562614, - "normalized_score": 73.77923908562614 - }, - "bbh": { - "name": "BBH", - "value": 0.6077244532441547, - "normalized_score": 43.401341987357206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19939577039274925, - "normalized_score": 19.939577039274926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.47780208333333335, - "normalized_score": 19.12526041666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43467420212765956, - "normalized_score": 37.186022458628834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 3.5629694373942296 - } - }, - { - "id": "godlikehhd/alpaca_data_full_2_float16_703dbf51c1ae5721f1313d1af82bd18ed38c4910_False", - "model": { - "name": "godlikehhd/alpaca_data_full_2", - "sha": "703dbf51c1ae5721f1313d1af82bd18ed38c4910", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.073236944099644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31781450994472443, - "normalized_score": 31.781450994472443 - }, - "bbh": { - "name": "BBH", - "value": 0.4216953430035033, - "normalized_score": 18.446949715094675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.40515625000000005, - "normalized_score": 9.944531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.285405585106383, - "normalized_score": 20.600620567375884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_full_2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.4734952780887614 - } - }, - { - "id": "godlikehhd/alpaca_data_full_3B_float16_8a49b5791eede0b460b64ff5bf62b26b10c698d9_False", - "model": { - "name": "godlikehhd/alpaca_data_full_3B", - "sha": "8a49b5791eede0b460b64ff5bf62b26b10c698d9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.16254856881883, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36957162550920447, - "normalized_score": 36.957162550920444 - }, - "bbh": { - "name": "BBH", - "value": 0.46841893776834337, - "normalized_score": 25.1691372567158 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4954791666666667, - "normalized_score": 21.6015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.335688164893617, - "normalized_score": 26.18757387706856 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_full_3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.6983988550920976 - } - }, - { - "id": "godlikehhd/alpaca_data_ifd_max_2600_float16_91d51354719b002d8dcaa61ab6e2f30c8a6778ca_False", - "model": { - "name": "godlikehhd/alpaca_data_ifd_max_2600", - "sha": "91d51354719b002d8dcaa61ab6e2f30c8a6778ca", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.169551135723685, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3042504997850149, - "normalized_score": 30.425049978501487 - }, - "bbh": { - "name": "BBH", - "value": 0.40285133876405865, - "normalized_score": 15.966392816231314 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.3508645833333333, - "normalized_score": 6.39140625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29163896276595747, - "normalized_score": 21.293218085106382 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_ifd_max_2600", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.9333451087343216 - } - }, - { - "id": "godlikehhd/alpaca_data_ifd_max_2600_3B_float16_315f9cdb7fbbaa343c45493e5153a6b5d5ed34e4_False", - "model": { - "name": "godlikehhd/alpaca_data_ifd_max_2600_3B", - "sha": "315f9cdb7fbbaa343c45493e5153a6b5d5ed34e4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.603034635124093, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.298155560579263, - "normalized_score": 29.8155560579263 - }, - "bbh": { - "name": "BBH", - "value": 0.4626377955326701, - "normalized_score": 24.472518592611554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1593655589123867, - "normalized_score": 15.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.43455208333333334, - "normalized_score": 12.952343750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32878989361702127, - "normalized_score": 25.42109929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_ifd_max_2600_3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 2.3536223757152306 - } - }, - { - "id": "godlikehhd/alpaca_data_ifd_me_max_5200_float16_8ee0cd7292a326a10be50321e5a58c0e785f04c7_False", - "model": { - "name": "godlikehhd/alpaca_data_ifd_me_max_5200", - "sha": "8ee0cd7292a326a10be50321e5a58c0e785f04c7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.250397979625767, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36832271705740766, - "normalized_score": 36.832271705740766 - }, - "bbh": { - "name": "BBH", - "value": 0.4153453015610935, - "normalized_score": 16.82395652793481 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3482604166666667, - "normalized_score": 6.599218750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29820478723404253, - "normalized_score": 22.022754137115836 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.754302786120011 - } - }, - { - "id": "godlikehhd/alpaca_data_ifd_min_2600_float16_67ea00ab824c427676699f28900d2851eee2f05e_False", - "model": { - "name": "godlikehhd/alpaca_data_ifd_min_2600", - "sha": "67ea00ab824c427676699f28900d2851eee2f05e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.51777329215277, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3749673089624419, - "normalized_score": 37.49673089624419 - }, - "bbh": { - "name": "BBH", - "value": 0.4219047173013076, - "normalized_score": 18.61162063219574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.36562500000000003, - "normalized_score": 6.703125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.289311835106383, - "normalized_score": 21.034648345153663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_ifd_min_2600", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7676294938866262 - } - }, - { - "id": "godlikehhd/alpaca_data_ins_ans_max_5200_float16_fe8144d1b38ee217159f292c94ca77faf8379400_False", - "model": { - "name": "godlikehhd/alpaca_data_ins_ans_max_5200", - "sha": "fe8144d1b38ee217159f292c94ca77faf8379400", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.92444135220606, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34786477657061043, - "normalized_score": 34.78647765706104 - }, - "bbh": { - "name": "BBH", - "value": 0.40982060224148426, - "normalized_score": 16.26868941225307 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3601666666666667, - "normalized_score": 7.6208333333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2900598404255319, - "normalized_score": 21.11776004728132 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7716212707932835 - } - }, - { - "id": "godlikehhd/alpaca_data_ins_max_5200_float16_e309c8a92204e30723f71f917682cd463cf45290_False", - "model": { - "name": "godlikehhd/alpaca_data_ins_max_5200", - "sha": "e309c8a92204e30723f71f917682cd463cf45290", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.68373909505621, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32750657145263457, - "normalized_score": 32.75065714526346 - }, - "bbh": { - "name": "BBH", - "value": 0.41550742328078477, - "normalized_score": 17.540671840924308 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.361375, - "normalized_score": 6.405208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2915558510638298, - "normalized_score": 21.283983451536646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_ins_max_5200", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.1128275845725115 - } - }, - { - "id": "godlikehhd/alpaca_data_ins_min_2600_float16_98f1ca459872ef11a414a2e9f40fe1a9c331c67f_False", - "model": { - "name": "godlikehhd/alpaca_data_ins_min_2600", - "sha": "98f1ca459872ef11a414a2e9f40fe1a9c331c67f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.228325384264654, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33300199027469335, - "normalized_score": 33.30019902746934 - }, - "bbh": { - "name": "BBH", - "value": 0.41873469888886056, - "normalized_score": 17.53633236067362 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.38534375000000004, - "normalized_score": 8.167968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28798204787234044, - "normalized_score": 20.886894208037827 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_ins_min_2600", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.120538562004114 - } - }, - { - "id": "godlikehhd/alpaca_data_ins_min_5200_float16_1039859027710fdd5196d401422d54c55d494ac4_False", - "model": { - "name": "godlikehhd/alpaca_data_ins_min_5200", - "sha": "1039859027710fdd5196d401422d54c55d494ac4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.38323586874574, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3359995921931586, - "normalized_score": 33.59995921931586 - }, - "bbh": { - "name": "BBH", - "value": 0.4289279419241076, - "normalized_score": 18.691277867616158 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.39055208333333336, - "normalized_score": 9.085677083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29488031914893614, - "normalized_score": 21.653368794326237 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_ins_min_5200", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.9986224648680768 - } - }, - { - "id": "godlikehhd/alpaca_data_sampled_ifd_5200_float16_744dc68050671f42b110bc322f0ef4d71549f067_False", - "model": { - "name": "godlikehhd/alpaca_data_sampled_ifd_5200", - "sha": "744dc68050671f42b110bc322f0ef4d71549f067", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.703649205002767, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2923853154075631, - "normalized_score": 29.238531540756306 - }, - "bbh": { - "name": "BBH", - "value": 0.4032969715626326, - "normalized_score": 15.96825358073914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.3520729166666667, - "normalized_score": 7.575781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2896442819148936, - "normalized_score": 21.071586879432623 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_sampled_ifd_5200", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7249114740707934 - } - }, - { - "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200_float16_cc169308e13b6a4f030713d9eda4ec005ebf1e16_False", - "model": { - "name": "godlikehhd/alpaca_data_sampled_ifd_new_5200", - "sha": "cc169308e13b6a4f030713d9eda4ec005ebf1e16", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.53778065653717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36632468516868577, - "normalized_score": 36.63246851686858 - }, - "bbh": { - "name": "BBH", - "value": 0.4177831234050982, - "normalized_score": 17.561425245080894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.36125, - "normalized_score": 8.389583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29247007978723405, - "normalized_score": 21.38556442080378 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_sampled_ifd_new_5200", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7552255359802686 - } - }, - { - "id": "godlikehhd/alpaca_data_score_max_0.1_2600_float16_04344047146d1d4f6da4ed9668717a84daf6718c_False", - "model": { - "name": "godlikehhd/alpaca_data_score_max_0.1_2600", - "sha": "04344047146d1d4f6da4ed9668717a84daf6718c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.917241105745404, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3287554799044313, - "normalized_score": 32.87554799044314 - }, - "bbh": { - "name": "BBH", - "value": 0.42522607952607777, - "normalized_score": 18.621496831589138 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.37064583333333334, - "normalized_score": 7.2640625000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29230385638297873, - "normalized_score": 21.3670951536643 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_score_max_0.1_2600", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.044420604704493 - } - }, - { - "id": "godlikehhd/alpaca_data_score_max_0.3_2600_float16_382a503c65ab28e85ed07a4f17f8efae0b3fac4b_False", - "model": { - "name": "godlikehhd/alpaca_data_score_max_0.3_2600", - "sha": "382a503c65ab28e85ed07a4f17f8efae0b3fac4b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.960793093536113, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33752332699459653, - "normalized_score": 33.75233269945966 - }, - "bbh": { - "name": "BBH", - "value": 0.4151448369012765, - "normalized_score": 16.924620676581448 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.37594791666666666, - "normalized_score": 8.226822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29130651595744683, - "normalized_score": 21.256279550827426 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_score_max_0.3_2600", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.066964020498706 - } - }, - { - "id": "godlikehhd/alpaca_data_score_max_0.7_2600_float16_68c728f78b338c7d40f565524d4e8bbd0afd2f3d_False", - "model": { - "name": "godlikehhd/alpaca_data_score_max_0.7_2600", - "sha": "68c728f78b338c7d40f565524d4e8bbd0afd2f3d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.630379005915092, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3639764713183243, - "normalized_score": 36.39764713183243 - }, - "bbh": { - "name": "BBH", - "value": 0.41845266250678703, - "normalized_score": 18.14398633190028 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3468645833333333, - "normalized_score": 5.324739583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2982878989361702, - "normalized_score": 22.031988770685576 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_score_max_0.7_2600", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7237661990988027 - } - }, - { - "id": "godlikehhd/alpaca_data_score_max_2500_float16_0d1adc4306c55ac0d89f12088768a170155ae293_False", - "model": { - "name": "godlikehhd/alpaca_data_score_max_2500", - "sha": "0d1adc4306c55ac0d89f12088768a170155ae293", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.49115247845423, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3563577973111345, - "normalized_score": 35.635779731113445 - }, - "bbh": { - "name": "BBH", - "value": 0.41801375075895447, - "normalized_score": 17.930587543977595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09516616314199396, - "normalized_score": 9.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.36270833333333335, - "normalized_score": 8.271875000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2939660904255319, - "normalized_score": 21.551787825059098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_score_max_2500", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.116895052230952 - } - }, - { - "id": "godlikehhd/alpaca_data_score_max_2600_3B_float16_45fa74597fa81030ef31d9be0776cb93f2a0e1c6_False", - "model": { - "name": "godlikehhd/alpaca_data_score_max_2600_3B", - "sha": "45fa74597fa81030ef31d9be0776cb93f2a0e1c6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.567090626055247, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33577463352792813, - "normalized_score": 33.57746335279281 - }, - "bbh": { - "name": "BBH", - "value": 0.4716306839273412, - "normalized_score": 26.00926717613288 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.44744791666666667, - "normalized_score": 14.297656249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3341921542553192, - "normalized_score": 26.021350472813243 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_score_max_2600_3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 2.0600194944763612 - } - }, - { - "id": "godlikehhd/alpaca_data_score_max_5200_float16_71f132308c6e915db3be25cb52c8364baee03246_False", - "model": { - "name": "godlikehhd/alpaca_data_score_max_5200", - "sha": "71f132308c6e915db3be25cb52c8364baee03246", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.366037527264833, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34454248061809334, - "normalized_score": 34.45424806180933 - }, - "bbh": { - "name": "BBH", - "value": 0.42417102847687554, - "normalized_score": 18.575115132209195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3877916666666667, - "normalized_score": 7.440625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29446476063829785, - "normalized_score": 21.607195626477537 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-10", - "generation": 0, - "base_model": "godlikehhd/alpaca_data_score_max_5200", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.1170595480613215 - } - }, - { - "id": "godlikehhd/ifd_2500_qwen_float16_d5311754b90d5628c5dea75d2edd9fed191139ef_False", - "model": { - "name": "godlikehhd/ifd_2500_qwen", - "sha": "d5311754b90d5628c5dea75d2edd9fed191139ef", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.206918147182165, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33647388928044253, - "normalized_score": 33.64738892804426 - }, - "bbh": { - "name": "BBH", - "value": 0.42983047351897224, - "normalized_score": 19.136234077656635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.36146875, - "normalized_score": 7.250260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2921376329787234, - "normalized_score": 21.34862588652482 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7273969106070146 - } - }, - { - "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen_float16_dcb4ee52aafbc24f691c2ffaeb04627d61df727e_False", - "model": { - "name": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", - "sha": "dcb4ee52aafbc24f691c2ffaeb04627d61df727e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.324012443597335, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33757319467900726, - "normalized_score": 33.75731946790073 - }, - "bbh": { - "name": "BBH", - "value": 0.4019641175400575, - "normalized_score": 15.682840817827374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.3561666666666667, - "normalized_score": 6.554166666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2888962765957447, - "normalized_score": 20.988475177304963 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7822922819233824 - } - }, - { - "id": "godlikehhd/ifd_new_correct_sample_2500_qwen_float16_8e9ed2c5bd0f5cdf2a6d3bddd28fdeab1a49c4c7_False", - "model": { - "name": "godlikehhd/ifd_new_correct_sample_2500_qwen", - "sha": "8e9ed2c5bd0f5cdf2a6d3bddd28fdeab1a49c4c7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.393782607601025, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33974631754854895, - "normalized_score": 33.9746317548549 - }, - "bbh": { - "name": "BBH", - "value": 0.41103125849665423, - "normalized_score": 16.877004154041817 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3078859060402685, - "normalized_score": 7.718120805369133 - }, - "musr": { - "name": "MUSR", - "value": 0.3626770833333333, - "normalized_score": 7.901302083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.293218085106383, - "normalized_score": 21.46867612293144 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.8353397447837017 - } - }, - { - "id": "godlikehhd/ifd_new_qwen_2500_float16_10d4c5b0deb3468fc606089ea026c07dc7674c2a_False", - "model": { - "name": "godlikehhd/ifd_new_qwen_2500", - "sha": "10d4c5b0deb3468fc606089ea026c07dc7674c2a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.886984394789133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.323959316834887, - "normalized_score": 32.3959316834887 - }, - "bbh": { - "name": "BBH", - "value": 0.41598162527775745, - "normalized_score": 17.63795023834911 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.3589583333333333, - "normalized_score": 6.169791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29105718085106386, - "normalized_score": 21.228575650118206 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.0012420732812295 - } - }, - { - "id": "godlikehhd/qwen-2.5-1.5b-cherry_float16_d9a8a826182d4703fbaaf749b93c4434b3a72c5e_False", - "model": { - "name": "godlikehhd/qwen-2.5-1.5b-cherry", - "sha": "d9a8a826182d4703fbaaf749b93c4434b3a72c5e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.683674027867733, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28933784580468713, - "normalized_score": 28.93378458046871 - }, - "bbh": { - "name": "BBH", - "value": 0.40357573315752204, - "normalized_score": 16.323588748077068 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.345625, - "normalized_score": 4.569791666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29230385638297873, - "normalized_score": 21.3670951536643 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.772, - "co2_cost": 1.8943105830699527 - } - }, - { - "id": "godlikehhd/qwen_2.5-1.5b-cherry_new_float16_e7d49cb02c1112c5ecbd5064e355dd57eb4bb481_False", - "model": { - "name": "godlikehhd/qwen_2.5-1.5b-cherry_new", - "sha": "e7d49cb02c1112c5ecbd5064e355dd57eb4bb481", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.370243206890223, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3120442647730245, - "normalized_score": 31.204426477302448 - }, - "bbh": { - "name": "BBH", - "value": 0.4149628386006759, - "normalized_score": 17.59708505979556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.34959375, - "normalized_score": 6.332552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28939494680851063, - "normalized_score": 21.043882978723403 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.7826744384672384 - } - }, - { - "id": "godlikehhd/qwen_full_data_alpaca_float16_8fb1e35250496473c58b24abda40eed086b30614_False", - "model": { - "name": "godlikehhd/qwen_full_data_alpaca", - "sha": "8fb1e35250496473c58b24abda40eed086b30614", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.856124752105812, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3136178672588731, - "normalized_score": 31.361786725887313 - }, - "bbh": { - "name": "BBH", - "value": 0.4229212208733662, - "normalized_score": 18.614215673166097 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.40515625000000005, - "normalized_score": 9.677864583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28507313829787234, - "normalized_score": 20.563682033096924 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.6464101240149864 - } - }, - { - "id": "godlikehhd/qwen_ins_ans_2500_float16_8eb2e22119485500990775f74289a271ab0051f1_True", - "model": { - "name": "godlikehhd/qwen_ins_ans_2500", - "sha": "8eb2e22119485500990775f74289a271ab0051f1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.783348090526234, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2698041197356348, - "normalized_score": 26.980411973563477 - }, - "bbh": { - "name": "BBH", - "value": 0.4073950292977672, - "normalized_score": 17.695311631481697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3588645833333333, - "normalized_score": 6.924739583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28091755319148937, - "normalized_score": 20.10195035460993 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 3.385132610918049 - } - }, - { - "id": "google/codegemma-1.1-2b_bfloat16_9d69e500da236427eab5867552ffc87108964f4d_False", - "model": { - "name": "google/codegemma-1.1-2b", - "sha": "9d69e500da236427eab5867552ffc87108964f4d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 7.133867903047553, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22936253584932426, - "normalized_score": 22.936253584932423 - }, - "bbh": { - "name": "BBH", - "value": 0.3353417790248454, - "normalized_score": 7.551225280004151 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3871458333333333, - "normalized_score": 5.9265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1278257978723404, - "normalized_score": 3.0917553191489344 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-30", - "submission_date": "2024-08-12", - "generation": 0, - "base_model": "google/codegemma-1.1-2b", - "hub_license": "gemma", - "hub_hearts": 17, - "params_billions": 2.506, - "co2_cost": 1.8997663921053338 - } - }, - { - "id": "google/flan-t5-base_float16_7bcac572ce56db69c1ea7c8af255c5d7c9672fc2_False", - "model": { - "name": "google/flan-t5-base", - "sha": "7bcac572ce56db69c1ea7c8af255c5d7c9672fc2", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 6.415642124982084, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18907055501624578, - "normalized_score": 18.907055501624576 - }, - "bbh": { - "name": "BBH", - "value": 0.3525980599300322, - "normalized_score": 11.337693677304879 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23825503355704697, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36711458333333336, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13572140957446807, - "normalized_score": 3.9690455082742293 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-21", - "submission_date": "2024-08-14", - "generation": 0, - "base_model": "google/flan-t5-base", - "hub_license": "apache-2.0", - "hub_hearts": 839, - "params_billions": 0.248, - "co2_cost": 0.3132428808489122 - } - }, - { - "id": "google/flan-t5-large_float16_0613663d0d48ea86ba8cb3d7a44f0f65dc596a2a_False", - "model": { - "name": "google/flan-t5-large", - "sha": "0613663d0d48ea86ba8cb3d7a44f0f65dc596a2a", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 9.658122925542836, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22009490374428736, - "normalized_score": 22.009490374428736 - }, - "bbh": { - "name": "BBH", - "value": 0.41531150356794316, - "normalized_score": 17.510018280067285 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.40832291666666665, - "normalized_score": 9.007031249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17087765957446807, - "normalized_score": 7.87529550827423 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-21", - "submission_date": "2024-08-14", - "generation": 0, - "base_model": "google/flan-t5-large", - "hub_license": "apache-2.0", - "hub_hearts": 713, - "params_billions": 0.783, - "co2_cost": 0.46698274327061645 - } - }, - { - "id": "google/flan-t5-small_float16_0fc9ddf78a1e988dac52e2dac162b0ede4fd74ab_False", - "model": { - "name": "google/flan-t5-small", - "sha": "0fc9ddf78a1e988dac52e2dac162b0ede4fd74ab", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 6.129661810537869, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1524255641697363, - "normalized_score": 15.24255641697363 - }, - "bbh": { - "name": "BBH", - "value": 0.3282901097640842, - "normalized_score": 6.36311196167965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.41229166666666667, - "normalized_score": 10.36979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1233377659574468, - "normalized_score": 2.5930851063829774 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-21", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "google/flan-t5-small", - "hub_license": "apache-2.0", - "hub_hearts": 323, - "params_billions": 0.077, - "co2_cost": 0.28626048455027764 - } - }, - { - "id": "google/flan-t5-xl_float16_7d6315df2c2fb742f0f5b556879d730926ca9001_False", - "model": { - "name": "google/flan-t5-xl", - "sha": "7d6315df2c2fb742f0f5b556879d730926ca9001", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 11.70507257989283, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22374189373085634, - "normalized_score": 22.374189373085635 - }, - "bbh": { - "name": "BBH", - "value": 0.45310636062112314, - "normalized_score": 22.695055811215397 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.41809375, - "normalized_score": 11.328385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21467752659574468, - "normalized_score": 12.741947399527188 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-21", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "google/flan-t5-xl", - "hub_license": "apache-2.0", - "hub_hearts": 485, - "params_billions": 2.85, - "co2_cost": 0.6978586885867614 - } - }, - { - "id": "google/flan-t5-xl_bfloat16_7d6315df2c2fb742f0f5b556879d730926ca9001_False", - "model": { - "name": "google/flan-t5-xl", - "sha": "7d6315df2c2fb742f0f5b556879d730926ca9001", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 11.58716743755607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2206944241279804, - "normalized_score": 22.06944241279804 - }, - "bbh": { - "name": "BBH", - "value": 0.45372172155693963, - "normalized_score": 22.837587663523298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634442, - "normalized_score": 0.07552870090634442 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.42203125, - "normalized_score": 11.853906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21417885638297873, - "normalized_score": 12.686539598108748 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-21", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "google/flan-t5-xl", - "hub_license": "apache-2.0", - "hub_hearts": 485, - "params_billions": 2.85, - "co2_cost": 0.28535173017878435 - } - }, - { - "id": "google/flan-t5-xxl_float16_ae7c9136adc7555eeccc78cdd960dfd60fb346ce_False", - "model": { - "name": "google/flan-t5-xxl", - "sha": "ae7c9136adc7555eeccc78cdd960dfd60fb346ce", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 13.662077060970686, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2200450360598767, - "normalized_score": 22.00450360598767 - }, - "bbh": { - "name": "BBH", - "value": 0.5065888015776924, - "normalized_score": 30.119255600105877 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.42175, - "normalized_score": 11.185416666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23429188829787234, - "normalized_score": 14.921320921985814 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-21", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/flan-t5-xxl", - "hub_license": "apache-2.0", - "hub_hearts": 1235, - "params_billions": 11.267, - "co2_cost": 1.4129536546292594 - } - }, - { - "id": "google/flan-ul2_bfloat16_452d74ce28ac4a7f211d6ba3ef0717027f7a8074_False", - "model": { - "name": "google/flan-ul2", - "sha": "452d74ce28ac4a7f211d6ba3ef0717027f7a8074", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 13.675998692966035, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23925406809487715, - "normalized_score": 23.925406809487715 - }, - "bbh": { - "name": "BBH", - "value": 0.5053738049125648, - "normalized_score": 30.02029012567709 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.3843541666666666, - "normalized_score": 5.577604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24933510638297873, - "normalized_score": 16.592789598108748 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-03-03", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "google/flan-ul2", - "hub_license": "apache-2.0", - "hub_hearts": 553, - "params_billions": 19.46, - "co2_cost": 1.1199325668968463 - } - }, - { - "id": "google/gemma-1.1-2b-it_bfloat16_bf4924f313df5166dee1467161e886e55f2eb4d4_True", - "model": { - "name": "google/gemma-1.1-2b-it", - "sha": "bf4924f313df5166dee1467161e886e55f2eb4d4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 8.053373854341979, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30674831668860847, - "normalized_score": 30.674831668860847 - }, - "bbh": { - "name": "BBH", - "value": 0.3184634974814922, - "normalized_score": 5.862826722774347 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.33939583333333334, - "normalized_score": 2.024479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14835438829787234, - "normalized_score": 5.372709810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-26", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "google/gemma-1.1-2b-it", - "hub_license": "gemma", - "hub_hearts": 157, - "params_billions": 2.506, - "co2_cost": 0.6584295628455202 - } - }, - { - "id": "google/gemma-1.1-7b-it_bfloat16_16128b0aeb50762ea96430c0c06a37941bf9f274_True", - "model": { - "name": "google/gemma-1.1-7b-it", - "sha": "16128b0aeb50762ea96430c0c06a37941bf9f274", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 17.693584228972615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5039107346285633, - "normalized_score": 50.391073462856326 - }, - "bbh": { - "name": "BBH", - "value": 0.3935297962833251, - "normalized_score": 15.93420938501317 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.42302083333333335, - "normalized_score": 11.510937500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2583942819148936, - "normalized_score": 17.5993646572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-26", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "google/gemma-1.1-7b-it", - "hub_license": "gemma", - "hub_hearts": 270, - "params_billions": 8.538, - "co2_cost": 1.156598379926708 - } - }, - { - "id": "google/gemma-2-27b_bfloat16_938270f5272feb02779b55c2bb2fffdd0f53ff0c_False", - "model": { - "name": "google/gemma-2-27b", - "sha": "938270f5272feb02779b55c2bb2fffdd0f53ff0c", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.926167340782822, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24752213017017072, - "normalized_score": 24.75221301701707 - }, - "bbh": { - "name": "BBH", - "value": 0.5642908317482057, - "normalized_score": 37.390737454186464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1661631419939577, - "normalized_score": 16.61631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.43963541666666667, - "normalized_score": 13.921093749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4370844414893617, - "normalized_score": 37.4538268321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-08-24", - "generation": 0, - "base_model": "google/gemma-2-27b", - "hub_license": "gemma", - "hub_hearts": 205, - "params_billions": 27.227, - "co2_cost": 11.22849855677268 - } - }, - { - "id": "google/gemma-2-27b-it_bfloat16_f6c533e5eb013c7e31fc74ef042ac4f3fb5cf40b_True", - "model": { - "name": "google/gemma-2-27b-it", - "sha": "f6c533e5eb013c7e31fc74ef042ac4f3fb5cf40b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 36.17428251510342, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7977677008116243, - "normalized_score": 79.77677008116243 - }, - "bbh": { - "name": "BBH", - "value": 0.6451387433168799, - "normalized_score": 49.27284215130387 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23867069486404835, - "normalized_score": 23.867069486404834 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.40330208333333334, - "normalized_score": 9.112760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4451462765957447, - "normalized_score": 38.34958628841608 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-08-07", - "generation": 1, - "base_model": "google/gemma-2-27b", - "hub_license": "gemma", - "hub_hearts": 541, - "params_billions": 27.227, - "co2_cost": 9.652422373385473 - } - }, - { - "id": "google/gemma-2-2b_bfloat16_4d05c88d00441bf62bf87dcfd29e204c05089f36_True", - "model": { - "name": "google/gemma-2-2b", - "sha": "4d05c88d00441bf62bf87dcfd29e204c05089f36", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 10.129463155055184, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19931226922343825, - "normalized_score": 19.931226922343825 - }, - "bbh": { - "name": "BBH", - "value": 0.3655966996422591, - "normalized_score": 11.755807532236112 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.4231770833333333, - "normalized_score": 11.430468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21800199468085107, - "normalized_score": 13.111332742316787 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 528, - "params_billions": 2.614, - "co2_cost": 1.5187957227139828 - } - }, - { - "id": "google/gemma-2-2b_float16_0738188b3055bc98daf0fe7211f0091357e5b979_True", - "model": { - "name": "google/gemma-2-2b", - "sha": "0738188b3055bc98daf0fe7211f0091357e5b979", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 10.359615568466916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20176021844262113, - "normalized_score": 20.176021844262113 - }, - "bbh": { - "name": "BBH", - "value": 0.3708674612470255, - "normalized_score": 12.497306228573644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.421875, - "normalized_score": 11.267708333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22165890957446807, - "normalized_score": 13.51765661938534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 528, - "params_billions": 2.614, - "co2_cost": 2.8365147022631594 - } - }, - { - "id": "google/gemma-2-2b-it_bfloat16_2b6ac3ff954ad896c115bbfa1b571cd93ea2c20f_True", - "model": { - "name": "google/gemma-2-2b-it", - "sha": "2b6ac3ff954ad896c115bbfa1b571cd93ea2c20f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 17.046939294966545, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5668337788179807, - "normalized_score": 56.68337788179808 - }, - "bbh": { - "name": "BBH", - "value": 0.41992308914274706, - "normalized_score": 17.980792881523424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.39288541666666665, - "normalized_score": 7.077343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25498670212765956, - "normalized_score": 17.22074468085106 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-07-31", - "generation": 1, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 1025, - "params_billions": 2.614, - "co2_cost": 1.2347432742058528 - } - }, - { - "id": "google/gemma-2-2b-jpn-it_float16_6b046bbc091084a1ec89fe03e58871fde10868eb_False", - "model": { - "name": "google/gemma-2-2b-jpn-it", - "sha": "6b046bbc091084a1ec89fe03e58871fde10868eb", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 17.11540570593849, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5077826832803628, - "normalized_score": 50.778268328036276 - }, - "bbh": { - "name": "BBH", - "value": 0.42255698900658106, - "normalized_score": 18.525626449832735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.39638541666666666, - "normalized_score": 7.681510416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2578125, - "normalized_score": 17.53472222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2024-10-11", - "generation": 2, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 163, - "params_billions": 2.614, - "co2_cost": 1.011437210514093 - } - }, - { - "id": "google/gemma-2-2b-jpn-it_bfloat16_6b046bbc091084a1ec89fe03e58871fde10868eb_True", - "model": { - "name": "google/gemma-2-2b-jpn-it", - "sha": "6b046bbc091084a1ec89fe03e58871fde10868eb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.678630066922224, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5288401441508531, - "normalized_score": 52.88401441508531 - }, - "bbh": { - "name": "BBH", - "value": 0.4178440226217119, - "normalized_score": 17.848086390818036 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.37276041666666665, - "normalized_score": 4.928385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2466755319148936, - "normalized_score": 16.297281323877066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2024-10-14", - "generation": 2, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 163, - "params_billions": 2.614, - "co2_cost": 1.7088004980800213 - } - }, - { - "id": "google/gemma-2-9b_bfloat16_beb0c08e9eeb0548f3aca2ac870792825c357b7d_False", - "model": { - "name": "google/gemma-2-9b", - "sha": "beb0c08e9eeb0548f3aca2ac870792825c357b7d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.205286776100692, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20398320899657355, - "normalized_score": 20.398320899657357 - }, - "bbh": { - "name": "BBH", - "value": 0.5377373397621884, - "normalized_score": 34.09681853589784 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.4461145833333333, - "normalized_score": 14.297656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4103224734042553, - "normalized_score": 34.48027482269504 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-07-11", - "generation": 0, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 653, - "params_billions": 9.0, - "co2_cost": 8.61623967949076 - } - }, - { - "id": "google/gemma-2-9b-it_bfloat16_1937c70277fcc5f7fb0fc772fc5bc69378996e71_True", - "model": { - "name": "google/gemma-2-9b-it", - "sha": "1937c70277fcc5f7fb0fc772fc5bc69378996e71", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.07276025267082, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7435626360279614, - "normalized_score": 74.35626360279613 - }, - "bbh": { - "name": "BBH", - "value": 0.5990342504164132, - "normalized_score": 42.136619683664655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19486404833836857, - "normalized_score": 19.486404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36073825503355705, - "normalized_score": 14.76510067114094 - }, - "musr": { - "name": "MUSR", - "value": 0.4072708333333333, - "normalized_score": 9.742187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3875498670212766, - "normalized_score": 31.949985224586293 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-07-11", - "generation": 1, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 690, - "params_billions": 9.0, - "co2_cost": 7.544268073096287 - } - }, - { - "id": "google/gemma-2b_bfloat16_2ac59a5d7bf4e1425010f0d457dde7d146658953_False", - "model": { - "name": "google/gemma-2b", - "sha": "2ac59a5d7bf4e1425010f0d457dde7d146658953", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 7.321959810488082, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20375825033134307, - "normalized_score": 20.375825033134305 - }, - "bbh": { - "name": "BBH", - "value": 0.33656381705857935, - "normalized_score": 8.246263426638125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.39778125, - "normalized_score": 7.555989583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13655252659574468, - "normalized_score": 4.061391843971631 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-08", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "google/gemma-2b", - "hub_license": "gemma", - "hub_hearts": 985, - "params_billions": 2.506, - "co2_cost": 1.2959887261338292 - } - }, - { - "id": "google/gemma-2b-it_bfloat16_de144fb2268dee1066f515465df532c05e699d48_True", - "model": { - "name": "google/gemma-2b-it", - "sha": "de144fb2268dee1066f515465df532c05e699d48", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 7.485804130315127, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26902950837112194, - "normalized_score": 26.902950837112197 - }, - "bbh": { - "name": "BBH", - "value": 0.31508191988788464, - "normalized_score": 5.214303022163619 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.334125, - "normalized_score": 3.0322916666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13530585106382978, - "normalized_score": 3.9228723404255303 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-08", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "google/gemma-2b-it", - "hub_license": "gemma", - "hub_hearts": 713, - "params_billions": 2.506, - "co2_cost": 0.7059006620955186 - } - }, - { - "id": "google/gemma-7b_bfloat16_a0eac5b80dba224e6ed79d306df50b1e92c2125d_False", - "model": { - "name": "google/gemma-7b", - "sha": "a0eac5b80dba224e6ed79d306df50b1e92c2125d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 15.442818570272307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2659321710838353, - "normalized_score": 26.593217108383534 - }, - "bbh": { - "name": "BBH", - "value": 0.43615285239286355, - "normalized_score": 21.11609932329174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4062395833333334, - "normalized_score": 10.979947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2947972074468085, - "normalized_score": 21.644134160756497 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-08", - "submission_date": "2024-06-08", - "generation": 0, - "base_model": "google/gemma-7b", - "hub_license": "gemma", - "hub_hearts": 3134, - "params_billions": 8.538, - "co2_cost": 2.5098281156398636 - } - }, - { - "id": "google/gemma-7b-it_bfloat16_18329f019fb74ca4b24f97371785268543d687d2_True", - "model": { - "name": "google/gemma-7b-it", - "sha": "18329f019fb74ca4b24f97371785268543d687d2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 13.067087110466217, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3868324933398937, - "normalized_score": 38.68324933398937 - }, - "bbh": { - "name": "BBH", - "value": 0.36459012743300967, - "normalized_score": 11.940832085290182 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.42742708333333335, - "normalized_score": 12.528385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16946476063829788, - "normalized_score": 7.7183067375886525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-13", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "google/gemma-7b", - "hub_license": "gemma", - "hub_hearts": 1158, - "params_billions": 8.538, - "co2_cost": 1.1990245623069617 - } - }, - { - "id": "google/mt5-base_float16_2eb15465c5dd7f72a8f7984306ad05ebc3dd1e1f_False", - "model": { - "name": "google/mt5-base", - "sha": "2eb15465c5dd7f72a8f7984306ad05ebc3dd1e1f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MT5ForConditionalGeneration", - "average_score": 3.716339524462038, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1645157072124186, - "normalized_score": 16.45157072124186 - }, - "bbh": { - "name": "BBH", - "value": 0.28831600228488835, - "normalized_score": 1.29855138817669 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36720833333333336, - "normalized_score": 2.8677083333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10696476063829788, - "normalized_score": 0.7738622931442081 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/mt5-base", - "hub_license": "apache-2.0", - "hub_hearts": 220, - "params_billions": 0.39, - "co2_cost": 0.40007963632893057 - } - }, - { - "id": "google/mt5-small_float16_73fb5dbe4756edadc8fbe8c769b0a109493acf7a_False", - "model": { - "name": "google/mt5-small", - "sha": "73fb5dbe4756edadc8fbe8c769b0a109493acf7a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MT5ForConditionalGeneration", - "average_score": 4.2559281732773515, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17180968718555653, - "normalized_score": 17.180968718555654 - }, - "bbh": { - "name": "BBH", - "value": 0.2765842029929075, - "normalized_score": 1.0709714795008913 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38575, - "normalized_score": 5.91875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/mt5-small", - "hub_license": "apache-2.0", - "hub_hearts": 139, - "params_billions": 0.17, - "co2_cost": 0.36098734872154653 - } - }, - { - "id": "google/mt5-xl_float16_63fc6450d80515b48e026b69ef2fbbd426433e84_False", - "model": { - "name": "google/mt5-xl", - "sha": "63fc6450d80515b48e026b69ef2fbbd426433e84", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MT5ForConditionalGeneration", - "average_score": 5.191420153031625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19596448534333347, - "normalized_score": 19.596448534333348 - }, - "bbh": { - "name": "BBH", - "value": 0.304735837080435, - "normalized_score": 3.2824619143354035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3795208333333333, - "normalized_score": 5.040104166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11195146276595745, - "normalized_score": 1.3279403073286051 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/mt5-xl", - "hub_license": "apache-2.0", - "hub_hearts": 23, - "params_billions": 3.23, - "co2_cost": 1.8075344491543635 - } - }, - { - "id": "google/mt5-xxl_float16_e07c395916dfbc315d4e5e48b4a54a1e8821b5c0_False", - "model": { - "name": "google/mt5-xxl", - "sha": "e07c395916dfbc315d4e5e48b4a54a1e8821b5c0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "T5ForConditionalGeneration", - "average_score": 5.10307678308611, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23575668116154028, - "normalized_score": 23.575668116154027 - }, - "bbh": { - "name": "BBH", - "value": 0.2959344159116905, - "normalized_score": 2.504710800447747 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36894791666666665, - "normalized_score": 3.5518229166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10887632978723404, - "normalized_score": 0.9862588652482256 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/mt5-xxl", - "hub_license": "apache-2.0", - "hub_hearts": 68, - "params_billions": 11.9, - "co2_cost": 4.563877005244583 - } - }, - { - "id": "google/recurrentgemma-2b_bfloat16_195f13c55b371fc721eda0662c00c64642c70e17_False", - "model": { - "name": "google/recurrentgemma-2b", - "sha": "195f13c55b371fc721eda0662c00c64642c70e17", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "RecurrentGemmaForCausalLM", - "average_score": 7.01512699699078, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3017028151970106, - "normalized_score": 30.170281519701057 - }, - "bbh": { - "name": "BBH", - "value": 0.31973582830084474, - "normalized_score": 4.8203622310347365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3445729166666667, - "normalized_score": 3.1049479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11760305851063829, - "normalized_score": 1.9558953900709206 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-06", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "google/recurrentgemma-2b", - "hub_license": "gemma", - "hub_hearts": 91, - "params_billions": 2.683, - "co2_cost": 5.5350494252772 - } - }, - { - "id": "google/recurrentgemma-2b-it_bfloat16_150248167d171fbdf4b02e7d28a4b3d749e570f6_True", - "model": { - "name": "google/recurrentgemma-2b-it", - "sha": "150248167d171fbdf4b02e7d28a4b3d749e570f6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "RecurrentGemmaForCausalLM", - "average_score": 7.995905374047496, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2949329999955673, - "normalized_score": 29.493299999556733 - }, - "bbh": { - "name": "BBH", - "value": 0.33300047272606553, - "normalized_score": 7.978763840391559 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3340625, - "normalized_score": 3.6244791666666676 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1402094414893617, - "normalized_score": 4.467715721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-08", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "google/recurrentgemma-2b-it", - "hub_license": "gemma", - "hub_hearts": 110, - "params_billions": 2.683, - "co2_cost": 3.059051279422074 - } - }, - { - "id": "google/recurrentgemma-9b_bfloat16_7b0ed98fb889ba8bdfa7c690f08f2e57a7c48dae_False", - "model": { - "name": "google/recurrentgemma-9b", - "sha": "7b0ed98fb889ba8bdfa7c690f08f2e57a7c48dae", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "RecurrentGemmaForCausalLM", - "average_score": 13.709460856107865, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31159434744256354, - "normalized_score": 31.15943474425635 - }, - "bbh": { - "name": "BBH", - "value": 0.39562568669428394, - "normalized_score": 15.323368888997413 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.3802604166666667, - "normalized_score": 6.599218750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2604720744680851, - "normalized_score": 17.830230496453904 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "google/recurrentgemma-9b", - "hub_license": "gemma", - "hub_hearts": 58, - "params_billions": 9.0, - "co2_cost": 26.08737700441429 - } - }, - { - "id": "google/recurrentgemma-9b-it_bfloat16_43e62f98c3d496a5469ef4b18c1b11e417d68d1d_True", - "model": { - "name": "google/recurrentgemma-9b-it", - "sha": "43e62f98c3d496a5469ef4b18c1b11e417d68d1d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "RecurrentGemmaForCausalLM", - "average_score": 19.218115006306835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5010383560065071, - "normalized_score": 50.10383560065072 - }, - "bbh": { - "name": "BBH", - "value": 0.4367189649027647, - "normalized_score": 21.621580084740117 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.43790625, - "normalized_score": 13.771614583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2843251329787234, - "normalized_score": 20.480570330969268 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-07-05", - "generation": 0, - "base_model": "google/recurrentgemma-9b-it", - "hub_license": "gemma", - "hub_hearts": 51, - "params_billions": 9.0, - "co2_cost": 15.127758447638215 - } - }, - { - "id": "google/switch-base-8_float16_92fe2d22b024d9937146fe097ba3d3a7ba146e1b_False", - "model": { - "name": "google/switch-base-8", - "sha": "92fe2d22b024d9937146fe097ba3d3a7ba146e1b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "SwitchTransformersForConditionalGeneration", - "average_score": 3.2959502683966075, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15852050337548815, - "normalized_score": 15.852050337548814 - }, - "bbh": { - "name": "BBH", - "value": 0.28763132730669333, - "normalized_score": 1.7024781049821334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35173958333333327, - "normalized_score": 1.133333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10979055851063829, - "normalized_score": 1.087839834515365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-10-24", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/switch-base-8", - "hub_license": "apache-2.0", - "hub_hearts": 16, - "params_billions": 0.62, - "co2_cost": 0.2934058677548048 - } - }, - { - "id": "google/umt5-base_float16_0de9394d54f8975e71838d309de1cb496c894ab9_False", - "model": { - "name": "google/umt5-base", - "sha": "0de9394d54f8975e71838d309de1cb496c894ab9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "UMT5ForConditionalGeneration", - "average_score": 3.516574726407488, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.174632198123202, - "normalized_score": 17.463219812320197 - }, - "bbh": { - "name": "BBH", - "value": 0.27877262328945457, - "normalized_score": 0.8135531788472962 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.33821875, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10779587765957446, - "normalized_score": 0.8662086288416063 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-02", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "google/umt5-base", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": -1.0, - "co2_cost": 1.3360920891163983 - } - }, - { - "id": "goulue5/merging_LLM_float16_587115b34d72ef957fee2d8348b3ade3ae06d4a8_False", - "model": { - "name": "goulue5/merging_LLM", - "sha": "587115b34d72ef957fee2d8348b3ade3ae06d4a8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.712100078199907, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32326006108237254, - "normalized_score": 32.32600610823725 - }, - "bbh": { - "name": "BBH", - "value": 0.4216498611590102, - "normalized_score": 18.28283029131199 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.43328125, - "normalized_score": 12.76015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29579454787234044, - "normalized_score": 21.75494976359338 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-22", - "generation": 0, - "base_model": "goulue5/merging_LLM", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 1.544, - "co2_cost": 1.102930181856804 - } - }, - { - "id": "gpt2_bfloat16_607a30d783dfa663caf39e06633721c8d4cfcd7e_False", - "model": { - "name": "gpt2", - "sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 6.39102973137443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1934168007553292, - "normalized_score": 19.34168007553292 - }, - "bbh": { - "name": "BBH", - "value": 0.3036385401516729, - "normalized_score": 2.7142978473877357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.43241666666666667, - "normalized_score": 12.985416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1149434840425532, - "normalized_score": 1.6603871158392434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "gpt2", - "hub_license": "mit", - "hub_hearts": 2628, - "params_billions": 0.137, - "co2_cost": 0.32392801241644914 - } - }, - { - "id": "gpt2_float16_607a30d783dfa663caf39e06633721c8d4cfcd7e_False", - "model": { - "name": "gpt2", - "sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.977736928104574, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08333333333333333, - "normalized_score": 8.333333333333332 - }, - "bbh": { - "name": "BBH", - "value": 0.30833333333333335, - "normalized_score": 9.199754901960786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23333333333333334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4333333333333333, - "normalized_score": 18.333333333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1, - "normalized_score": 0.0 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "gpt2", - "hub_license": "mit", - "hub_hearts": 2628, - "params_billions": 0.137, - "co2_cost": 0.039245173068546815 - } - }, - { - "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k_bfloat16_8697fb25cb77c852311e03b4464b8467471d56a4_True", - "model": { - "name": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", - "sha": "8697fb25cb77c852311e03b4464b8467471d56a4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.283333977044872, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4455588948434598, - "normalized_score": 44.55588948434598 - }, - "bbh": { - "name": "BBH", - "value": 0.4345903107069573, - "normalized_score": 21.01052898715872 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.42975, - "normalized_score": 13.518749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29404920212765956, - "normalized_score": 21.56102245862884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", - "hub_license": "llama3", - "hub_hearts": 682, - "params_billions": 8.03, - "co2_cost": 1.7743289735325676 - } - }, - { - "id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B_bfloat16_e62f1fd16eebdfa3f3084a44a8a37176ecb4074f_False", - "model": { - "name": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", - "sha": "e62f1fd16eebdfa3f3084a44a8a37176ecb4074f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.940480296007976, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4797060687863757, - "normalized_score": 47.97060687863757 - }, - "bbh": { - "name": "BBH", - "value": 0.5269400362212973, - "normalized_score": 32.76923482357123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22205438066465258, - "normalized_score": 22.20543806646526 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.44078124999999996, - "normalized_score": 14.097656249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3956948138297872, - "normalized_score": 32.8549793144208 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.3692639052605395 - } - }, - { - "id": "grimjim/Gigantes-v1-gemma2-9b-it_bfloat16_f912b7cf7f07a593d0a4262f9f20a7adb0a93f9d_False", - "model": { - "name": "grimjim/Gigantes-v1-gemma2-9b-it", - "sha": "f912b7cf7f07a593d0a4262f9f20a7adb0a93f9d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.23742832143159, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.692454908531585, - "normalized_score": 69.2454908531585 - }, - "bbh": { - "name": "BBH", - "value": 0.597792552822268, - "normalized_score": 42.79787696550411 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.45547916666666666, - "normalized_score": 16.33489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42253989361702127, - "normalized_score": 35.83776595744681 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "grimjim/Gigantes-v1-gemma2-9b-it (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 2.995648654632423 - } - }, - { - "id": "grimjim/Gigantes-v2-gemma2-9b-it_bfloat16_5c410fbc679de69de48b25d18bb7e374f4a3471f_False", - "model": { - "name": "grimjim/Gigantes-v2-gemma2-9b-it", - "sha": "5c410fbc679de69de48b25d18bb7e374f4a3471f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.876785775265446, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7350696152874374, - "normalized_score": 73.50696152874373 - }, - "bbh": { - "name": "BBH", - "value": 0.5986559388303995, - "normalized_score": 42.70163259850296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.45947916666666666, - "normalized_score": 17.13489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4259474734042553, - "normalized_score": 36.21638593380615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "grimjim/Gigantes-v2-gemma2-9b-it (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 3.0377316832337793 - } - }, - { - "id": "grimjim/Gigantes-v3-gemma2-9b-it_bfloat16_dcd1a7dd037a17f07819da0c70b96e987678206c_False", - "model": { - "name": "grimjim/Gigantes-v3-gemma2-9b-it", - "sha": "dcd1a7dd037a17f07819da0c70b96e987678206c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.49043715511172, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.697625633319592, - "normalized_score": 69.7625633319592 - }, - "bbh": { - "name": "BBH", - "value": 0.5983513792324827, - "normalized_score": 42.795367767587656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.4608125, - "normalized_score": 17.33489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4226230053191489, - "normalized_score": 35.847000591016545 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "grimjim/Gigantes-v3-gemma2-9b-it (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 3.035863590801693 - } - }, - { - "id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B_bfloat16_85bfa74ab2642914a532eb52a1720da5bc2afb00_False", - "model": { - "name": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", - "sha": "85bfa74ab2642914a532eb52a1720da5bc2afb00", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.47799926837126, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3961499931293413, - "normalized_score": 39.61499931293413 - }, - "bbh": { - "name": "BBH", - "value": 0.48863582396592203, - "normalized_score": 28.332777575588082 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38821752265861026, - "normalized_score": 38.82175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.38385416666666666, - "normalized_score": 11.115104166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30950797872340424, - "normalized_score": 23.278664302600472 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3971466462844657 - } - }, - { - "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge_bfloat16_7a8d334dce0a2ce948f75612b8d3a61c53d094aa_False", - "model": { - "name": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", - "sha": "7a8d334dce0a2ce948f75612b8d3a61c53d094aa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.83668391168813, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42712447417297217, - "normalized_score": 42.71244741729721 - }, - "bbh": { - "name": "BBH", - "value": 0.4961694535006833, - "normalized_score": 28.258014912987704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.40432291666666664, - "normalized_score": 9.540364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3625332446808511, - "normalized_score": 29.170360520094558 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-06-29", - "generation": 1, - "base_model": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge (Merge)", - "hub_license": "llama3", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.0950967078398595 - } - }, - { - "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge_bfloat16_8f4d460ea20e24e48914156af7def305c0cd347f_True", - "model": { - "name": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", - "sha": "8f4d460ea20e24e48914156af7def305c0cd347f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.04094205100742, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6805897241541332, - "normalized_score": 68.05897241541332 - }, - "bbh": { - "name": "BBH", - "value": 0.5021734091176594, - "normalized_score": 29.073285914476486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.38851041666666664, - "normalized_score": 6.697135416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3684341755319149, - "normalized_score": 29.826019503546092 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge (Merge)", - "hub_license": "llama3", - "hub_hearts": 3, - "params_billions": 8.0, - "co2_cost": 1.233883346225575 - } - }, - { - "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter_bfloat16_b37ab2f859c96b125ff1c45c7ff0e267aa229156_False", - "model": { - "name": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", - "sha": "b37ab2f859c96b125ff1c45c7ff0e267aa229156", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.217301725050117, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48695018107510296, - "normalized_score": 48.69501810751029 - }, - "bbh": { - "name": "BBH", - "value": 0.510526564708187, - "normalized_score": 29.415990262794168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.40103125, - "normalized_score": 9.262239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3651097074468085, - "normalized_score": 29.4566341607565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 29, - "params_billions": 8.03, - "co2_cost": 1.8038294368260206 - } - }, - { - "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct_bfloat16_3291a835eb6591c97484502476ebbde38380849b_False", - "model": { - "name": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", - "sha": "3291a835eb6591c97484502476ebbde38380849b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.856640245514402, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42500121898784116, - "normalized_score": 42.500121898784116 - }, - "bbh": { - "name": "BBH", - "value": 0.5286855891530357, - "normalized_score": 32.58754251798589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4235104166666667, - "normalized_score": 11.038802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3764128989361702, - "normalized_score": 30.712544326241126 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3560108759332417 - } - }, - { - "id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B_bfloat16_bef087901c4163991ec649608c7c07c759e4fe7c_False", - "model": { - "name": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", - "sha": "bef087901c4163991ec649608c7c07c759e4fe7c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.00221313012307, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4544519652300341, - "normalized_score": 45.44519652300341 - }, - "bbh": { - "name": "BBH", - "value": 0.5013477378974034, - "normalized_score": 28.18246234355982 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.40909375000000003, - "normalized_score": 9.470052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38090093085106386, - "normalized_score": 31.21121453900709 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3459978060195088 - } - }, - { - "id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B_bfloat16_d4012c41f98f7f1fd33104a48d49319b382e8554_False", - "model": { - "name": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", - "sha": "d4012c41f98f7f1fd33104a48d49319b382e8554", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.961665053235674, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43659157701565177, - "normalized_score": 43.65915770156518 - }, - "bbh": { - "name": "BBH", - "value": 0.5287189378780882, - "normalized_score": 32.95297252204676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30060422960725075, - "normalized_score": 30.060422960725074 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.3998541666666666, - "normalized_score": 11.115104166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3683510638297872, - "normalized_score": 29.81678486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3439985673215313 - } - }, - { - "id": "grimjim/Magnolia-v1-Gemma2-8k-9B_bfloat16_4a25a9e75e598bf249d92d84ed2cf5c707fa502e_False", - "model": { - "name": "grimjim/Magnolia-v1-Gemma2-8k-9B", - "sha": "4a25a9e75e598bf249d92d84ed2cf5c707fa502e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 25.51284798552607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35308536904302806, - "normalized_score": 35.308536904302805 - }, - "bbh": { - "name": "BBH", - "value": 0.5589031767575711, - "normalized_score": 36.79001219814328 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16842900302114805, - "normalized_score": 16.842900302114806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.46446875, - "normalized_score": 16.591927083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4242021276595745, - "normalized_score": 36.022458628841605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "grimjim/Magnolia-v1-Gemma2-8k-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 4.0740254212442935 - } - }, - { - "id": "grimjim/Magnolia-v2-12B_bfloat16_9a66fd94f74bc63b4b944969ca0777107a97d47b_False", - "model": { - "name": "grimjim/Magnolia-v2-12B", - "sha": "9a66fd94f74bc63b4b944969ca0777107a97d47b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.577222272679126, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3506119318962575, - "normalized_score": 35.06119318962575 - }, - "bbh": { - "name": "BBH", - "value": 0.5290279354217235, - "normalized_score": 32.50462527039537 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.41712499999999997, - "normalized_score": 10.90729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3601230053191489, - "normalized_score": 28.9025561465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "grimjim/Magnolia-v2-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.002732247053589 - } - }, - { - "id": "grimjim/Magnolia-v2-Gemma2-8k-9B_bfloat16_f9617a4216e6719b776f426dde6dce654cd93510_False", - "model": { - "name": "grimjim/Magnolia-v2-Gemma2-8k-9B", - "sha": "f9617a4216e6719b776f426dde6dce654cd93510", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.30134506171324, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7384417789243651, - "normalized_score": 73.84417789243652 - }, - "bbh": { - "name": "BBH", - "value": 0.6015773428405322, - "normalized_score": 42.84461695501066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2280966767371601, - "normalized_score": 22.80966767371601 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.44884375, - "normalized_score": 14.972135416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4331781914893617, - "normalized_score": 37.01979905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "grimjim/Magnolia-v2-Gemma2-8k-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 3, - "params_billions": 9.242, - "co2_cost": 3.3294433701942467 - } - }, - { - "id": "grimjim/Magnolia-v3-12B_bfloat16_b93104eefc85fc527b87973f79e28d149eb5135e_False", - "model": { - "name": "grimjim/Magnolia-v3-12B", - "sha": "b93104eefc85fc527b87973f79e28d149eb5135e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.786766373418164, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39649906692021614, - "normalized_score": 39.64990669202162 - }, - "bbh": { - "name": "BBH", - "value": 0.5326669270363916, - "normalized_score": 32.92491590836766 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1351963746223565, - "normalized_score": 13.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.4183958333333333, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3615359042553192, - "normalized_score": 29.05954491725768 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "grimjim/Magnolia-v3-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 2.0001082476739587 - } - }, - { - "id": "grimjim/Magnolia-v3-Gemma2-8k-9B_bfloat16_89cd67348bdb122cb8986053e66df4348cb1044a_False", - "model": { - "name": "grimjim/Magnolia-v3-Gemma2-8k-9B", - "sha": "89cd67348bdb122cb8986053e66df4348cb1044a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.353725481327054, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7378422585406721, - "normalized_score": 73.78422585406722 - }, - "bbh": { - "name": "BBH", - "value": 0.6015406636327695, - "normalized_score": 42.868229945045556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23187311178247735, - "normalized_score": 23.187311178247736 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.4488125, - "normalized_score": 15.001562500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43367686170212766, - "normalized_score": 37.07520685579196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "grimjim/Magnolia-v3-Gemma2-8k-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 3.41703063359954 - } - }, - { - "id": "grimjim/Magnolia-v4-12B_bfloat16_45605f7d55339291145798b55f3e613b0dfe9b11_False", - "model": { - "name": "grimjim/Magnolia-v4-12B", - "sha": "45605f7d55339291145798b55f3e613b0dfe9b11", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.593840281150975, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34179421712168156, - "normalized_score": 34.179421712168164 - }, - "bbh": { - "name": "BBH", - "value": 0.5430894084668724, - "normalized_score": 34.57748311994204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.42112499999999997, - "normalized_score": 13.573958333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3671875, - "normalized_score": 29.6875 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "grimjim/Magnolia-v4-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 0.823437390007458 - } - }, - { - "id": "grimjim/Magnolia-v5a-12B_bfloat16_e9e38c0b451c9e7fe96ffd58a669271d684c6a2d_False", - "model": { - "name": "grimjim/Magnolia-v5a-12B", - "sha": "e9e38c0b451c9e7fe96ffd58a669271d684c6a2d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.851795395693376, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41136185321613317, - "normalized_score": 41.136185321613326 - }, - "bbh": { - "name": "BBH", - "value": 0.5311764105029141, - "normalized_score": 32.694922623458645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13746223564954682, - "normalized_score": 13.746223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4144895833333333, - "normalized_score": 11.011197916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3601230053191489, - "normalized_score": 28.9025561465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "grimjim/Magnolia-v5a-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 0.8485829512595052 - } - }, - { - "id": "grimjim/Magot-v1-Gemma2-8k-9B_bfloat16_afae94acb42bc0dcf1d31b7338cb79c0bcab1829_False", - "model": { - "name": "grimjim/Magot-v1-Gemma2-8k-9B", - "sha": "afae94acb42bc0dcf1d31b7338cb79c0bcab1829", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 24.587402832733005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29967818720993633, - "normalized_score": 29.96781872099363 - }, - "bbh": { - "name": "BBH", - "value": 0.6019447732218105, - "normalized_score": 42.81812817526611 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.44884375, - "normalized_score": 14.905468750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43367686170212766, - "normalized_score": 37.07520685579196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "grimjim/Magot-v1-Gemma2-8k-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 5.908073960813349 - } - }, - { - "id": "grimjim/Magot-v2-Gemma2-8k-9B_bfloat16_e214d3e65d6efd4a0a9209ac615c1735aac71ec7_False", - "model": { - "name": "grimjim/Magot-v2-Gemma2-8k-9B", - "sha": "e214d3e65d6efd4a0a9209ac615c1735aac71ec7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.97995555494225, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7347449212533854, - "normalized_score": 73.47449212533854 - }, - "bbh": { - "name": "BBH", - "value": 0.5896713649821103, - "normalized_score": 41.459290753905556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.4343958333333333, - "normalized_score": 13.099479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4222905585106383, - "normalized_score": 35.810062056737586 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "grimjim/Magot-v2-Gemma2-8k-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 3.096882421506472 - } - }, - { - "id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B_bfloat16_a832b371328483d2f003ea87afb4e1dc58045fdc_False", - "model": { - "name": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", - "sha": "a832b371328483d2f003ea87afb4e1dc58045fdc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.68447204012895, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5219462138237654, - "normalized_score": 52.19462138237654 - }, - "bbh": { - "name": "BBH", - "value": 0.5222077363554879, - "normalized_score": 32.08877158243549 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.45268749999999996, - "normalized_score": 15.785937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39910239361702127, - "normalized_score": 33.23359929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.399417526016181 - } - }, - { - "id": "grimjim/llama-3-Nephilim-v1-8B_bfloat16_642799c8c768c53e831a03a1224db875116be866_False", - "model": { - "name": "grimjim/llama-3-Nephilim-v1-8B", - "sha": "642799c8c768c53e831a03a1224db875116be866", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.729737138539548, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4277239945566652, - "normalized_score": 42.772399455666516 - }, - "bbh": { - "name": "BBH", - "value": 0.5131817939007638, - "normalized_score": 29.907537489079242 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.41362499999999996, - "normalized_score": 10.63645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37957114361702127, - "normalized_score": 31.063460401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-21", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "grimjim/llama-3-Nephilim-v1-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7137452405943856 - } - }, - { - "id": "grimjim/llama-3-Nephilim-v2-8B_bfloat16_924f56cdefbfaf38deb6aee3ad301ced027e142d_False", - "model": { - "name": "grimjim/llama-3-Nephilim-v2-8B", - "sha": "924f56cdefbfaf38deb6aee3ad301ced027e142d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.600249913059073, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39222817679313116, - "normalized_score": 39.22281767931311 - }, - "bbh": { - "name": "BBH", - "value": 0.5048214936442625, - "normalized_score": 29.896263840620197 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3895, - "normalized_score": 7.887500000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3641123670212766, - "normalized_score": 29.34581855791962 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "grimjim/llama-3-Nephilim-v2-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4017468314255033 - } - }, - { - "id": "grimjim/llama-3-Nephilim-v2.1-8B_bfloat16_5f516d9df1778dbe53ea941a754aef73b87e8eaa_False", - "model": { - "name": "grimjim/llama-3-Nephilim-v2.1-8B", - "sha": "5f516d9df1778dbe53ea941a754aef73b87e8eaa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.434967127814037, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38950540122430705, - "normalized_score": 38.95054012243071 - }, - "bbh": { - "name": "BBH", - "value": 0.5095042703104161, - "normalized_score": 29.81966445991054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3935, - "normalized_score": 7.887500000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3644448138297872, - "normalized_score": 29.38275709219858 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "grimjim/llama-3-Nephilim-v2.1-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.426316661154474 - } - }, - { - "id": "grimjim/llama-3-Nephilim-v3-8B_bfloat16_fd012ba05116aad7dc297d0a866ddb3345a056a1_False", - "model": { - "name": "grimjim/llama-3-Nephilim-v3-8B", - "sha": "fd012ba05116aad7dc297d0a866ddb3345a056a1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.600988872221105, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4173825449806513, - "normalized_score": 41.73825449806513 - }, - "bbh": { - "name": "BBH", - "value": 0.5012671264428366, - "normalized_score": 28.955635498374203 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09516616314199396, - "normalized_score": 9.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.3989270833333334, - "normalized_score": 8.332552083333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3612034574468085, - "normalized_score": 29.022606382978722 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-08-26", - "generation": 1, - "base_model": "grimjim/llama-3-Nephilim-v3-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 14, - "params_billions": 8.03, - "co2_cost": 1.1281793692381223 - } - }, - { - "id": "gupta-tanish/llama-7b-dpo-baseline_bfloat16_1b5f1ef3ffa3b550619fbf64c33b6fd79e1bd559_False", - "model": { - "name": "gupta-tanish/llama-7b-dpo-baseline", - "sha": "1b5f1ef3ffa3b550619fbf64c33b6fd79e1bd559", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.857290104453797, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26930433472076315, - "normalized_score": 26.930433472076317 - }, - "bbh": { - "name": "BBH", - "value": 0.3896894398264714, - "normalized_score": 14.380522116367189 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.445625, - "normalized_score": 14.769791666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20279255319148937, - "normalized_score": 11.421394799054372 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "gupta-tanish/llama-7b-dpo-baseline (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.738, - "co2_cost": 1.5626329206994065 - } - }, - { - "id": "gz987/qwen2.5-7b-cabs-v0.1_bfloat16_5ef7ddd3d7c58504b8bbdee213c37105afade2a9_True", - "model": { - "name": "gz987/qwen2.5-7b-cabs-v0.1", - "sha": "5ef7ddd3d7c58504b8bbdee213c37105afade2a9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.56161315485646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7505817896514582, - "normalized_score": 75.05817896514581 - }, - "bbh": { - "name": "BBH", - "value": 0.5481580818735207, - "normalized_score": 35.838183211484505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.479607250755287, - "normalized_score": 47.9607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.437625, - "normalized_score": 14.169791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4405751329787234, - "normalized_score": 37.841681442080386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "gz987/qwen2.5-7b-cabs-v0.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6224453856348778 - } - }, - { - "id": "gz987/qwen2.5-7b-cabs-v0.2_bfloat16_8e5dd3c00616adb80d49f5a83cdd01f1794f7662_True", - "model": { - "name": "gz987/qwen2.5-7b-cabs-v0.2", - "sha": "8e5dd3c00616adb80d49f5a83cdd01f1794f7662", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.614018827547945, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7417640748768822, - "normalized_score": 74.17640748768822 - }, - "bbh": { - "name": "BBH", - "value": 0.5516262466675281, - "normalized_score": 36.27590658422937 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4901812688821752, - "normalized_score": 49.01812688821752 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44286458333333334, - "normalized_score": 14.858072916666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43974401595744683, - "normalized_score": 37.74933510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "gz987/qwen2.5-7b-cabs-v0.2 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6512716382261822 - } - }, - { - "id": "gz987/qwen2.5-7b-cabs-v0.3_bfloat16_2ad9b1f13a2e4dc4d6eed55eb706050c10232188_True", - "model": { - "name": "gz987/qwen2.5-7b-cabs-v0.3", - "sha": "2ad9b1f13a2e4dc4d6eed55eb706050c10232188", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.93504663341147, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7569515552068511, - "normalized_score": 75.69515552068512 - }, - "bbh": { - "name": "BBH", - "value": 0.5494465314719504, - "normalized_score": 35.95665199827286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.493202416918429, - "normalized_score": 49.320241691842895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44295833333333334, - "normalized_score": 15.236458333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4401595744680851, - "normalized_score": 37.79550827423168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "gz987/qwen2.5-7b-cabs-v0.3 (Merge)", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 0.6308176980387323 - } - }, - { - "id": "gz987/qwen2.5-7b-cabs-v0.4_bfloat16_642e0c1fd2012cd95e7236acbb0c245a60d1f391_True", - "model": { - "name": "gz987/qwen2.5-7b-cabs-v0.4", - "sha": "642e0c1fd2012cd95e7236acbb0c245a60d1f391", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.88194596713293, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7582503313430586, - "normalized_score": 75.82503313430587 - }, - "bbh": { - "name": "BBH", - "value": 0.5524401094760039, - "normalized_score": 36.358438375339254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48489425981873113, - "normalized_score": 48.48942598187311 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.44295833333333334, - "normalized_score": 15.169791666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4395777925531915, - "normalized_score": 37.730865839243506 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "gz987/qwen2.5-7b-cabs-v0.4 (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6297121050369008 - } - }, - { - "id": "h2oai/h2o-danube-1.8b-chat_float16_ff4789b36ed0875184b8c67697e434dbd63bb04f_False", - "model": { - "name": "h2oai/h2o-danube-1.8b-chat", - "sha": "ff4789b36ed0875184b8c67697e434dbd63bb04f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 6.953761979021041, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2198699450790569, - "normalized_score": 21.986994507905692 - }, - "bbh": { - "name": "BBH", - "value": 0.3219657593234448, - "normalized_score": 5.269859154260847 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3988645833333333, - "normalized_score": 9.058072916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13139960106382978, - "normalized_score": 3.4888445626477527 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-25", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "h2oai/h2o-danube-1.8b-chat", - "hub_license": "apache-2.0", - "hub_hearts": 54, - "params_billions": 1.831, - "co2_cost": 0.2625451227687596 - } - }, - { - "id": "h2oai/h2o-danube3-4b-base_bfloat16_6bdf2f1e317143c998b88d9e9d72facc621a863f_False", - "model": { - "name": "h2oai/h2o-danube3-4b-base", - "sha": "6bdf2f1e317143c998b88d9e9d72facc621a863f", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.0908487494014, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23380851695722904, - "normalized_score": 23.380851695722903 - }, - "bbh": { - "name": "BBH", - "value": 0.3599083951265592, - "normalized_score": 10.564444044561542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.37781250000000005, - "normalized_score": 6.526562500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2109375, - "normalized_score": 12.326388888888888 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-04", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "h2oai/h2o-danube3-4b-base", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 3.962, - "co2_cost": 0.8890039292136087 - } - }, - { - "id": "h2oai/h2o-danube3-4b-chat_float16_1e5c6fa6620f8bf078958069ab4581cd88e0202c_True", - "model": { - "name": "h2oai/h2o-danube3-4b-chat", - "sha": "1e5c6fa6620f8bf078958069ab4581cd88e0202c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.571247407067567, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3628771659197596, - "normalized_score": 36.28771659197596 - }, - "bbh": { - "name": "BBH", - "value": 0.3466170643135169, - "normalized_score": 8.839702966263845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.378125, - "normalized_score": 5.232291666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22282247340425532, - "normalized_score": 13.6469414893617 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-04", - "submission_date": "2024-07-15", - "generation": 0, - "base_model": "h2oai/h2o-danube3-4b-chat", - "hub_license": "apache-2.0", - "hub_hearts": 66, - "params_billions": 3.962, - "co2_cost": 0.9252426738721852 - } - }, - { - "id": "h2oai/h2o-danube3-500m-chat_float16_c202f976c26875541e738ea978c8158fa536da9a_True", - "model": { - "name": "h2oai/h2o-danube3-500m-chat", - "sha": "c202f976c26875541e738ea978c8158fa536da9a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.204440277019856, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2207941594968018, - "normalized_score": 22.079415949680183 - }, - "bbh": { - "name": "BBH", - "value": 0.3034691168308313, - "normalized_score": 3.065370444981646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23070469798657717, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34339583333333334, - "normalized_score": 2.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11436170212765957, - "normalized_score": 1.595744680851063 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-04", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "h2oai/h2o-danube3-500m-chat", - "hub_license": "apache-2.0", - "hub_hearts": 34, - "params_billions": 0.514, - "co2_cost": 0.43780670218272594 - } - }, - { - "id": "h2oai/h2o-danube3.1-4b-chat_bfloat16_e649b5c5844432e0b3e1b1102b6218604e6cbdb8_True", - "model": { - "name": "h2oai/h2o-danube3.1-4b-chat", - "sha": "e649b5c5844432e0b3e1b1102b6218604e6cbdb8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.41212819519095, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5021121734774842, - "normalized_score": 50.211217347748416 - }, - "bbh": { - "name": "BBH", - "value": 0.3608421638178268, - "normalized_score": 10.942062527495287 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.41015625, - "normalized_score": 10.202864583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2718583776595745, - "normalized_score": 19.095375295508276 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-11-29", - "generation": 0, - "base_model": "h2oai/h2o-danube3.1-4b-chat", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.962, - "co2_cost": 0.5982811536989042 - } - }, - { - "id": "haoranxu/ALMA-13B-R_bfloat16_b69ebad694274b929cfcf3db29dd7bb93d752e39_False", - "model": { - "name": "haoranxu/ALMA-13B-R", - "sha": "b69ebad694274b929cfcf3db29dd7bb93d752e39", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 3.8773019782821123, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.003921816336210145, - "normalized_score": 0.3921816336210145 - }, - "bbh": { - "name": "BBH", - "value": 0.345656261205981, - "normalized_score": 8.819669166822672 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.35279166666666667, - "normalized_score": 2.232291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18168218085106383, - "normalized_score": 9.075797872340424 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-17", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "haoranxu/ALMA-13B-R", - "hub_license": "mit", - "hub_hearts": 81, - "params_billions": 13.0, - "co2_cost": 1.9252597252610324 - } - }, - { - "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO_bfloat16_3ca4b5c3a6395ff090e1039d55ac1f6120777302_True", - "model": { - "name": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", - "sha": "3ca4b5c3a6395ff090e1039d55ac1f6120777302", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.910737255485603, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7046447869430887, - "normalized_score": 70.46447869430888 - }, - "bbh": { - "name": "BBH", - "value": 0.5048301774821616, - "normalized_score": 29.762188091412483 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3566666666666667, - "normalized_score": 3.4166666666666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3686003989361702, - "normalized_score": 29.844488770685572 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-19", - "submission_date": "2024-07-28", - "generation": 0, - "base_model": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.490670077540168 - } - }, - { - "id": "haoranxu/Llama-3-Instruct-8B-SimPO_bfloat16_8346770280fa169d41d737785dd63a66e9d94501_True", - "model": { - "name": "haoranxu/Llama-3-Instruct-8B-SimPO", - "sha": "8346770280fa169d41d737785dd63a66e9d94501", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.990729234610715, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7347449212533854, - "normalized_score": 73.47449212533854 - }, - "bbh": { - "name": "BBH", - "value": 0.49792360151415016, - "normalized_score": 28.2263760762505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.35660416666666667, - "normalized_score": 3.7421875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37333776595744683, - "normalized_score": 30.37086288416076 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-07-28", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1591559961402582 - } - }, - { - "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc_float16_aeb5362a28d3f7718233553bb039cd0ac0ee04e4_True", - "model": { - "name": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", - "sha": "aeb5362a28d3f7718233553bb039cd0ac0ee04e4", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.16435302663075, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41958004760701606, - "normalized_score": 41.9580047607016 - }, - "bbh": { - "name": "BBH", - "value": 0.4269926809768501, - "normalized_score": 19.805226737644773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2175226586102719, - "normalized_score": 21.75226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36097916666666663, - "normalized_score": 3.389062500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.277593085106383, - "normalized_score": 19.73256501182033 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-12", - "generation": 0, - "base_model": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 0.6156239180882589 - } - }, - { - "id": "hon9kon9ize/CantoneseLLMChat-v0.5_bfloat16_812eb4f168c3ea258ebb220393401db9578e0f67_False", - "model": { - "name": "hon9kon9ize/CantoneseLLMChat-v0.5", - "sha": "812eb4f168c3ea258ebb220393401db9578e0f67", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.959800818271107, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3230849701015528, - "normalized_score": 32.30849701015528 - }, - "bbh": { - "name": "BBH", - "value": 0.43452388803059244, - "normalized_score": 20.761385180655164 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4706458333333334, - "normalized_score": 18.13072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2504155585106383, - "normalized_score": 16.712839834515364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-01", - "submission_date": "2024-07-07", - "generation": 0, - "base_model": "hon9kon9ize/CantoneseLLMChat-v0.5", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 6.069, - "co2_cost": 1.6672683498631755 - } - }, - { - "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B_bfloat16_4703b1afc7aab8e3a8059432fd1c4b0aba011482_True", - "model": { - "name": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", - "sha": "4703b1afc7aab8e3a8059432fd1c4b0aba011482", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.503869934977185, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44548353923146145, - "normalized_score": 44.54835392314614 - }, - "bbh": { - "name": "BBH", - "value": 0.4865734655539633, - "normalized_score": 28.53613616746739 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.3882916666666667, - "normalized_score": 6.303125000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3784906914893617, - "normalized_score": 30.94341016548463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-10", - "generation": 1, - "base_model": "Removed", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 3.6627929127752954 - } - }, - { - "id": "hongbai12/li-0.4-pre_bfloat16_6b5c78e54a187d3c992fa3033456e63bfaf81349_True", - "model": { - "name": "hongbai12/li-0.4-pre", - "sha": "6b5c78e54a187d3c992fa3033456e63bfaf81349", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.49263658043271, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5199725616918665, - "normalized_score": 51.99725616918665 - }, - "bbh": { - "name": "BBH", - "value": 0.6298274927108823, - "normalized_score": 46.725545099612226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49244712990936557, - "normalized_score": 49.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.4513020833333334, - "normalized_score": 16.64609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5014960106382979, - "normalized_score": 44.61066784869976 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.969251663521824 - } - }, - { - "id": "hotmailuser/Deepseek-qwen-modelstock-2B_bfloat16_cf45d578f711d7c11a6b376b03bbcee4159f962a_False", - "model": { - "name": "hotmailuser/Deepseek-qwen-modelstock-2B", - "sha": "cf45d578f711d7c11a6b376b03bbcee4159f962a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.734549593045678, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21487431127186973, - "normalized_score": 21.487431127186973 - }, - "bbh": { - "name": "BBH", - "value": 0.3549242330959277, - "normalized_score": 10.02016891992718 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33987915407854985, - "normalized_score": 33.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.34745833333333337, - "normalized_score": 2.7656250000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19107380319148937, - "normalized_score": 10.11931146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "hotmailuser/Deepseek-qwen-modelstock-2B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.1776806671769675 - } - }, - { - "id": "hotmailuser/Falcon3Slerp1-10B_bfloat16_74d06a0b8e34e4a4c83a3d1e4a4f36b0e548aeaf_False", - "model": { - "name": "hotmailuser/Falcon3Slerp1-10B", - "sha": "74d06a0b8e34e4a4c83a3d1e4a4f36b0e548aeaf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.776680017784997, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5694069513335727, - "normalized_score": 56.940695133357266 - }, - "bbh": { - "name": "BBH", - "value": 0.616984966186231, - "normalized_score": 44.74398729818869 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2598187311178248, - "normalized_score": 25.981873111782477 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.43176041666666665, - "normalized_score": 12.670052083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4401595744680851, - "normalized_score": 37.79550827423168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/Falcon3Slerp1-10B (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 10.306, - "co2_cost": 1.6440682251388408 - } - }, - { - "id": "hotmailuser/Falcon3Slerp2-10B_bfloat16_03059d4fe878b69ba65d248f213bc416b4a9e5ed_False", - "model": { - "name": "hotmailuser/Falcon3Slerp2-10B", - "sha": "03059d4fe878b69ba65d248f213bc416b4a9e5ed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.308605732994934, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6117966994241945, - "normalized_score": 61.17966994241945 - }, - "bbh": { - "name": "BBH", - "value": 0.6164263500746402, - "normalized_score": 44.5423501026709 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23187311178247735, - "normalized_score": 23.187311178247736 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4095625, - "normalized_score": 9.761979166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4369182180851064, - "normalized_score": 37.43535756501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/Falcon3Slerp2-10B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 1.5033467329832644 - } - }, - { - "id": "hotmailuser/Falcon3Slerp4-10B_bfloat16_43cf3a3669f104492a0e883171da8974cf16b727_False", - "model": { - "name": "hotmailuser/Falcon3Slerp4-10B", - "sha": "43cf3a3669f104492a0e883171da8974cf16b727", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.717492522295817, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6072254950198805, - "normalized_score": 60.722549501988055 - }, - "bbh": { - "name": "BBH", - "value": 0.611433776236228, - "normalized_score": 43.75873169992713 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22885196374622357, - "normalized_score": 22.885196374622357 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.40175, - "normalized_score": 8.785416666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/Falcon3Slerp4-10B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 1.6148316233690154 - } - }, - { - "id": "hotmailuser/FalconSlerp-3B_bfloat16_b3d2a778f7188fd56d4539bfd6f18be4baf512c8_False", - "model": { - "name": "hotmailuser/FalconSlerp-3B", - "sha": "b3d2a778f7188fd56d4539bfd6f18be4baf512c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.47273640583823, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5694568190179834, - "normalized_score": 56.94568190179834 - }, - "bbh": { - "name": "BBH", - "value": 0.46239111387485293, - "normalized_score": 24.394008058642658 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17598187311178248, - "normalized_score": 17.598187311178247 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.3989270833333333, - "normalized_score": 8.999218749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29679188829787234, - "normalized_score": 21.86576536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.228, - "co2_cost": 0.922140828783185 - } - }, - { - "id": "hotmailuser/FalconSlerp1-7B_bfloat16_3708e7cb80f6309d5cefb68aa0be3fa8f76eb969_False", - "model": { - "name": "hotmailuser/FalconSlerp1-7B", - "sha": "3708e7cb80f6309d5cefb68aa0be3fa8f76eb969", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.681197911346505, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5394564200765082, - "normalized_score": 53.94564200765082 - }, - "bbh": { - "name": "BBH", - "value": 0.5354677787663963, - "normalized_score": 35.04308988047655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23791540785498488, - "normalized_score": 23.791540785498487 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.44525, - "normalized_score": 15.256249999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4128989361702128, - "normalized_score": 34.766548463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp1-7B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2278224733096967 - } - }, - { - "id": "hotmailuser/FalconSlerp2-7B_bfloat16_219ee277427f3bc985b82d985f47f8f57fbd5236_False", - "model": { - "name": "hotmailuser/FalconSlerp2-7B", - "sha": "219ee277427f3bc985b82d985f47f8f57fbd5236", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.286856255059003, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6160432097944565, - "normalized_score": 61.604320979445646 - }, - "bbh": { - "name": "BBH", - "value": 0.5537805428914538, - "normalized_score": 36.81735294513948 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2983383685800604, - "normalized_score": 29.83383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.44788541666666665, - "normalized_score": 15.285677083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4140625, - "normalized_score": 34.895833333333336 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp2-7B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2316635207541564 - } - }, - { - "id": "hotmailuser/FalconSlerp3-10B_float16_2752f054efbfee78630fcc54ac7b3366ba778042_False", - "model": { - "name": "hotmailuser/FalconSlerp3-10B", - "sha": "2752f054efbfee78630fcc54ac7b3366ba778042", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.415115815404064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6001564737119731, - "normalized_score": 60.01564737119731 - }, - "bbh": { - "name": "BBH", - "value": 0.6060288025434474, - "normalized_score": 42.818643189920245 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22734138972809667, - "normalized_score": 22.734138972809667 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.4030833333333333, - "normalized_score": 8.585416666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4323470744680851, - "normalized_score": 36.927452718676115 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp3-10B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 1.5993031087557148 - } - }, - { - "id": "hotmailuser/FalconSlerp3-7B_bfloat16_be57bac375fbc5bc8e5c93b3f9476c4ea20f4585_False", - "model": { - "name": "hotmailuser/FalconSlerp3-7B", - "sha": "be57bac375fbc5bc8e5c93b3f9476c4ea20f4585", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.531501918893497, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6096235765546527, - "normalized_score": 60.96235765546528 - }, - "bbh": { - "name": "BBH", - "value": 0.5532966528909408, - "normalized_score": 36.834016091662946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3157099697885196, - "normalized_score": 31.570996978851962 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.45067708333333334, - "normalized_score": 15.901302083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41273271276595747, - "normalized_score": 34.748079196217496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp3-7B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2133243870772685 - } - }, - { - "id": "hotmailuser/FalconSlerp4-7B_bfloat16_2d16b7120c7b300877b65cae7ee334e9bf28894a_False", - "model": { - "name": "hotmailuser/FalconSlerp4-7B", - "sha": "2d16b7120c7b300877b65cae7ee334e9bf28894a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.51227574563949, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6284580468711907, - "normalized_score": 62.84580468711906 - }, - "bbh": { - "name": "BBH", - "value": 0.5523506352993854, - "normalized_score": 36.46810457203872 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.4585208333333333, - "normalized_score": 16.98177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4031748670212766, - "normalized_score": 33.6860963356974 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp4-7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2299334634956316 - } - }, - { - "id": "hotmailuser/FalconSlerp6-7B_bfloat16_bfe93343c4c409a4be69fa28141ec8d42e3108ff_False", - "model": { - "name": "hotmailuser/FalconSlerp6-7B", - "sha": "bfe93343c4c409a4be69fa28141ec8d42e3108ff", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.79520632630417, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6026542906155667, - "normalized_score": 60.265429061556674 - }, - "bbh": { - "name": "BBH", - "value": 0.5383801786207648, - "normalized_score": 34.47834539078978 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.44921875, - "normalized_score": 15.219010416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39951795212765956, - "normalized_score": 33.279772458628834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "hotmailuser/FalconSlerp6-7B (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.184894649891033 - } - }, - { - "id": "hotmailuser/Gemma2Crono-27B_bfloat16_68feccf9af840291c9ce4dea83bdd7b68c351f45_False", - "model": { - "name": "hotmailuser/Gemma2Crono-27B", - "sha": "68feccf9af840291c9ce4dea83bdd7b68c351f45", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 36.28874920037128, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7086164709637096, - "normalized_score": 70.86164709637096 - }, - "bbh": { - "name": "BBH", - "value": 0.6505341690680219, - "normalized_score": 50.10341199129857 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24244712990936557, - "normalized_score": 24.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37080536912751677, - "normalized_score": 16.10738255033557 - }, - "musr": { - "name": "MUSR", - "value": 0.45668749999999997, - "normalized_score": 16.05260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4632646276595745, - "normalized_score": 40.36273640661938 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-02", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "hotmailuser/Gemma2Crono-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 7.7758279250270474 - } - }, - { - "id": "hotmailuser/Gemma2SimPO-27B_bfloat16_59d5de8216b2b53abcf56a79ebb630d17a856d00_False", - "model": { - "name": "hotmailuser/Gemma2SimPO-27B", - "sha": "59d5de8216b2b53abcf56a79ebb630d17a856d00", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 36.43283421130692, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7222303488078299, - "normalized_score": 72.22303488078299 - }, - "bbh": { - "name": "BBH", - "value": 0.6413158976157102, - "normalized_score": 49.15921945509655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28172205438066467, - "normalized_score": 28.172205438066467 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.44465625, - "normalized_score": 14.148697916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46417885638297873, - "normalized_score": 40.46431737588653 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.929292486388974 - } - }, - { - "id": "hotmailuser/Gemma2atlas-27B_bfloat16_8021ee95d2515abefc6b15924f9f49e2e98b88b8_False", - "model": { - "name": "hotmailuser/Gemma2atlas-27B", - "sha": "8021ee95d2515abefc6b15924f9f49e2e98b88b8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 35.809591577124344, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7213560020744957, - "normalized_score": 72.13560020744956 - }, - "bbh": { - "name": "BBH", - "value": 0.6544960921220462, - "normalized_score": 50.713279143065044 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.44453125000000004, - "normalized_score": 14.799739583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4749833776595745, - "normalized_score": 41.66481973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 1, - "base_model": "hotmailuser/Gemma2atlas-27B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.530740587560782 - } - }, - { - "id": "hotmailuser/Gemma2magnum-27b_bfloat16_c2beed3653f3732b0af82a9dd1cddd5919c9c686_False", - "model": { - "name": "hotmailuser/Gemma2magnum-27b", - "sha": "c2beed3653f3732b0af82a9dd1cddd5919c9c686", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.46719676338319, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5050599077115387, - "normalized_score": 50.50599077115387 - }, - "bbh": { - "name": "BBH", - "value": 0.6199590493843724, - "normalized_score": 46.10114598574725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.47234375, - "normalized_score": 18.17630208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45960771276595747, - "normalized_score": 39.95641252955083 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 27.227, - "co2_cost": 8.436378201254655 - } - }, - { - "id": "hotmailuser/Llama-Hermes-slerp-8B_bfloat16_95c15cd9c0893a5ffe40c3a1dae1556319daaf83_False", - "model": { - "name": "hotmailuser/Llama-Hermes-slerp-8B", - "sha": "95c15cd9c0893a5ffe40c3a1dae1556319daaf83", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.59642323585999, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3390470617960345, - "normalized_score": 33.90470617960345 - }, - "bbh": { - "name": "BBH", - "value": 0.5310290010444968, - "normalized_score": 33.30931089323399 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4077916666666667, - "normalized_score": 10.640625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33311170212765956, - "normalized_score": 25.901300236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/Llama-Hermes-slerp-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4214525678925432 - } - }, - { - "id": "hotmailuser/Llama-Hermes-slerp2-8B_bfloat16_cfd9a8f7e365268e27bf3ed363299abcbdf29989_False", - "model": { - "name": "hotmailuser/Llama-Hermes-slerp2-8B", - "sha": "cfd9a8f7e365268e27bf3ed363299abcbdf29989", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.611252287971684, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3728440537773109, - "normalized_score": 37.28440537773109 - }, - "bbh": { - "name": "BBH", - "value": 0.5265283171967207, - "normalized_score": 32.315975836753644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42481250000000004, - "normalized_score": 11.734895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33793218085106386, - "normalized_score": 26.43690898345154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/Llama-Hermes-slerp2-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.437970914055651 - } - }, - { - "id": "hotmailuser/LlamaStock-8B_bfloat16_634ab357e226771144582944d3c029f363905cdd_False", - "model": { - "name": "hotmailuser/LlamaStock-8B", - "sha": "634ab357e226771144582944d3c029f363905cdd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.411960823539555, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4249513513034304, - "normalized_score": 42.495135130343044 - }, - "bbh": { - "name": "BBH", - "value": 0.5328942883826541, - "normalized_score": 33.09114622817674 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16993957703927492, - "normalized_score": 16.993957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.41293749999999996, - "normalized_score": 12.417187499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3806515957446808, - "normalized_score": 31.183510638297868 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/LlamaStock-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2753957210387332 - } - }, - { - "id": "hotmailuser/Mistral-modelstock-24B_float16_47c017d9e32811649e6329c3366db7d0424d6de3_False", - "model": { - "name": "hotmailuser/Mistral-modelstock-24B", - "sha": "47c017d9e32811649e6329c3366db7d0424d6de3", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.725473787931804, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3424192254329623, - "normalized_score": 34.24192254329623 - }, - "bbh": { - "name": "BBH", - "value": 0.645229041403176, - "normalized_score": 48.712773019346535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41023489932885904, - "normalized_score": 21.364653243847872 - }, - "musr": { - "name": "MUSR", - "value": 0.4590416666666666, - "normalized_score": 15.746875000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5069813829787234, - "normalized_score": 45.2201536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "hotmailuser/Mistral-modelstock-24B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.71543130172185 - } - }, - { - "id": "hotmailuser/Mistral-modelstock2-24B_bfloat16_f8c34783dfee169d331cc1f2a652769acc3f0095_False", - "model": { - "name": "hotmailuser/Mistral-modelstock2-24B", - "sha": "f8c34783dfee169d331cc1f2a652769acc3f0095", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 33.81477606165948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43184528163051816, - "normalized_score": 43.18452816305181 - }, - "bbh": { - "name": "BBH", - "value": 0.6689381929188762, - "normalized_score": 52.387209345108715 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24018126888217523, - "normalized_score": 24.018126888217523 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3926174496644295, - "normalized_score": 19.01565995525727 - }, - "musr": { - "name": "MUSR", - "value": 0.46161458333333333, - "normalized_score": 16.30182291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5318317819148937, - "normalized_score": 47.98130910165485 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "hotmailuser/Mistral-modelstock2-24B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.4917658879570694 - } - }, - { - "id": "hotmailuser/Phi4-Slerp4-14B_bfloat16_c021ab7b45cf746aede182129e07d0ddc9b02410_False", - "model": { - "name": "hotmailuser/Phi4-Slerp4-14B", - "sha": "c021ab7b45cf746aede182129e07d0ddc9b02410", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.881045458267863, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0629485321170051, - "normalized_score": 6.294853211700511 - }, - "bbh": { - "name": "BBH", - "value": 0.6731037909447855, - "normalized_score": 52.76034640932895 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3474320241691843, - "normalized_score": 34.74320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39681208053691275, - "normalized_score": 19.574944071588366 - }, - "musr": { - "name": "MUSR", - "value": 0.5097395833333334, - "normalized_score": 24.38411458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5277593085106383, - "normalized_score": 47.52881205673759 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "hotmailuser/Phi4-Slerp4-14B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8446649148433258 - } - }, - { - "id": "hotmailuser/Qwen2.5-HomerSlerp-7B_bfloat16_19fe6a99882323c775e8208e8ebc7e219a80435b_False", - "model": { - "name": "hotmailuser/Qwen2.5-HomerSlerp-7B", - "sha": "19fe6a99882323c775e8208e8ebc7e219a80435b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.43258692955959, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44878145542715553, - "normalized_score": 44.87814554271555 - }, - "bbh": { - "name": "BBH", - "value": 0.5632506117591088, - "normalized_score": 37.40411935244851 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33157099697885195, - "normalized_score": 33.157099697885194 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4383333333333333, - "normalized_score": 13.225000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4548703457446808, - "normalized_score": 39.43003841607564 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "hotmailuser/Qwen2.5-HomerSlerp-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2001042958753323 - } - }, - { - "id": "hotmailuser/QwenModelStock-1.8B_float16_4362359464981da5a6d35acd0aece5f12e53aaf3_False", - "model": { - "name": "hotmailuser/QwenModelStock-1.8B", - "sha": "4362359464981da5a6d35acd0aece5f12e53aaf3", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.665220644656603, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3263075306852484, - "normalized_score": 32.63075306852485 - }, - "bbh": { - "name": "BBH", - "value": 0.41881762650909504, - "normalized_score": 17.724176359806876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4359166666666667, - "normalized_score": 13.05625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2958776595744681, - "normalized_score": 21.76418439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenModelStock-1.8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.0588223432600736 - } - }, - { - "id": "hotmailuser/QwenSlerp-14B_bfloat16_1186d3f7a807313bc8d492e91f3da471d9fb8412_False", - "model": { - "name": "hotmailuser/QwenSlerp-14B", - "sha": "1186d3f7a807313bc8d492e91f3da471d9fb8412", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.34991665759889, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7024716640735471, - "normalized_score": 70.24716640735471 - }, - "bbh": { - "name": "BBH", - "value": 0.6491286917834284, - "normalized_score": 49.421941687333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38368580060422963, - "normalized_score": 38.368580060422964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.4634479166666667, - "normalized_score": 16.830989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenSlerp-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.791800683805432 - } - }, - { - "id": "hotmailuser/QwenSlerp-3B_bfloat16_3e94c24d81dcd5b1b4ef7ab76cc79e4427f898a0_False", - "model": { - "name": "hotmailuser/QwenSlerp-3B", - "sha": "3e94c24d81dcd5b1b4ef7ab76cc79e4427f898a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.511060317673625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4333690164319561, - "normalized_score": 43.3369016431956 - }, - "bbh": { - "name": "BBH", - "value": 0.4892345530653528, - "normalized_score": 28.28933436034725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27492447129909364, - "normalized_score": 27.492447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.43166666666666664, - "normalized_score": 12.091666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3693484042553192, - "normalized_score": 29.927600472813236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenSlerp-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.397, - "co2_cost": 1.4724753956167291 - } - }, - { - "id": "hotmailuser/QwenSlerp-7B_bfloat16_0b12d7a297c1eda4ab669a784bfeb857e95230a6_False", - "model": { - "name": "hotmailuser/QwenSlerp-7B", - "sha": "0b12d7a297c1eda4ab669a784bfeb857e95230a6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.086749997612042, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4672912317096415, - "normalized_score": 46.72912317096415 - }, - "bbh": { - "name": "BBH", - "value": 0.5636352508232924, - "normalized_score": 37.61925634545651 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34441087613293053, - "normalized_score": 34.44108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4409375, - "normalized_score": 13.68385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45088098404255317, - "normalized_score": 38.98677600472813 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/QwenSlerp-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2683967687951827 - } - }, - { - "id": "hotmailuser/QwenSlerp2-14B_bfloat16_d0b78b5b20665a132d4c73957f663291419f2e85_False", - "model": { - "name": "hotmailuser/QwenSlerp2-14B", - "sha": "d0b78b5b20665a132d4c73957f663291419f2e85", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.85962754388034, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7036707048409332, - "normalized_score": 70.36707048409332 - }, - "bbh": { - "name": "BBH", - "value": 0.6492799322983842, - "normalized_score": 49.684327104915155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39652567975830816, - "normalized_score": 39.65256797583081 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.48065625, - "normalized_score": 19.348697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5378989361702128, - "normalized_score": 48.65543735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenSlerp2-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.7010769324853077 - } - }, - { - "id": "hotmailuser/QwenSlerp2-3B_bfloat16_304cc6b0aef391e1ec06f331963b520fd65c44fb_False", - "model": { - "name": "hotmailuser/QwenSlerp2-3B", - "sha": "304cc6b0aef391e1ec06f331963b520fd65c44fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.03431047768953, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4280486885907171, - "normalized_score": 42.8048688590717 - }, - "bbh": { - "name": "BBH", - "value": 0.4801760257099328, - "normalized_score": 26.901296704943547 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26057401812688824, - "normalized_score": 26.057401812688823 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4251875, - "normalized_score": 11.71510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3741688829787234, - "normalized_score": 30.46320921985816 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenSlerp2-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.397, - "co2_cost": 1.46318111692393 - } - }, - { - "id": "hotmailuser/QwenSlerp3-14B_bfloat16_c74c666041e88ee77e46ebbf19e42fa0d098c156_False", - "model": { - "name": "hotmailuser/QwenSlerp3-14B", - "sha": "c74c666041e88ee77e46ebbf19e42fa0d098c156", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.791587426587995, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6632291209546226, - "normalized_score": 66.32291209546227 - }, - "bbh": { - "name": "BBH", - "value": 0.6266526215170748, - "normalized_score": 46.50024294429596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43051359516616317, - "normalized_score": 43.051359516616316 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36661073825503354, - "normalized_score": 15.548098434004473 - }, - "musr": { - "name": "MUSR", - "value": 0.48078125, - "normalized_score": 19.964322916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5262632978723404, - "normalized_score": 47.362588652482266 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenSlerp3-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.705657276969295 - } - }, - { - "id": "hotmailuser/QwenSparse-7B_bfloat16_f419c206e4543d1c9d0081e02e0272f08c49fbad_False", - "model": { - "name": "hotmailuser/QwenSparse-7B", - "sha": "f419c206e4543d1c9d0081e02e0272f08c49fbad", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.4771858660491386, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10858632871891026, - "normalized_score": 10.858632871891025 - }, - "bbh": { - "name": "BBH", - "value": 0.28956619468137906, - "normalized_score": 1.88717525781587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35622916666666665, - "normalized_score": 4.361979166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11220079787234043, - "normalized_score": 1.3556442080378246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenSparse-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.4819065821862699 - } - }, - { - "id": "hotmailuser/QwenStock-0.5B_bfloat16_8a39dd23f5e894dec1dc3783dbe50717bc6982f2_False", - "model": { - "name": "hotmailuser/QwenStock-0.5B", - "sha": "8a39dd23f5e894dec1dc3783dbe50717bc6982f2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.093186530380667, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20490742341431845, - "normalized_score": 20.490742341431847 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11668882978723404, - "normalized_score": 1.854314420803781 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenStock-0.5B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.0069252525386143 - } - }, - { - "id": "hotmailuser/QwenStock-1.7B_float16_3c8f8e080e32648cf1cb2ecc44a19109f908eba7_False", - "model": { - "name": "hotmailuser/QwenStock-1.7B", - "sha": "3c8f8e080e32648cf1cb2ecc44a19109f908eba7", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.75925374781586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32141163224688274, - "normalized_score": 32.14116322468827 - }, - "bbh": { - "name": "BBH", - "value": 0.4187550547805281, - "normalized_score": 17.74065871627513 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.44121875, - "normalized_score": 13.952343750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2954621010638298, - "normalized_score": 21.71801122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenStock-1.7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.0975954025989914 - } - }, - { - "id": "hotmailuser/QwenStock1-14B_bfloat16_c50b4f0c6511500f2e2d4549301cfedccdb79f4f_False", - "model": { - "name": "hotmailuser/QwenStock1-14B", - "sha": "c50b4f0c6511500f2e2d4549301cfedccdb79f4f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.99889430195756, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6693240601603745, - "normalized_score": 66.93240601603745 - }, - "bbh": { - "name": "BBH", - "value": 0.6502248812491821, - "normalized_score": 49.56243886958751 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37009063444108764, - "normalized_score": 37.00906344410876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.47811458333333334, - "normalized_score": 19.29765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5416389627659575, - "normalized_score": 49.07099586288417 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "hotmailuser/QwenStock1-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.727170708681339 - } - }, - { - "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B_bfloat16_82a11837d273c1019be962d93cabe682ab2cd40b_False", - "model": { - "name": "hotmailuser/RombosBeagle-v2beta-MGS-32B", - "sha": "82a11837d273c1019be962d93cabe682ab2cd40b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.65614620168921, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5156761836371937, - "normalized_score": 51.567618363719376 - }, - "bbh": { - "name": "BBH", - "value": 0.7037350002757341, - "normalized_score": 58.117898971791085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49924471299093653, - "normalized_score": 49.92447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.5020833333333333, - "normalized_score": 24.460416666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5907579787234043, - "normalized_score": 54.52866430260048 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "hotmailuser/RombosBeagle-v2beta-MGS-32B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 32.764, - "co2_cost": 10.391246244145718 - } - }, - { - "id": "huggyllama/llama-13b_float16_bf57045473f207bb1de1ed035ace226f4d9f9bba_False", - "model": { - "name": "huggyllama/llama-13b", - "sha": "bf57045473f207bb1de1ed035ace226f4d9f9bba", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.39218439885523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24105262924595627, - "normalized_score": 24.10526292459563 - }, - "bbh": { - "name": "BBH", - "value": 0.39878925581174585, - "normalized_score": 16.145707376925767 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.34621875, - "normalized_score": 2.8106770833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19522938829787234, - "normalized_score": 10.581043144208037 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-04-03", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "huggyllama/llama-13b", - "hub_license": "other", - "hub_hearts": 139, - "params_billions": 13.016, - "co2_cost": 2.212281197143199 - } - }, - { - "id": "huggyllama/llama-65b_float16_49707c5313d34d1c5a846e29cf2a2a650c22c8ee_False", - "model": { - "name": "huggyllama/llama-65b", - "sha": "49707c5313d34d1c5a846e29cf2a2a650c22c8ee", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.688031554930518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25259311958935626, - "normalized_score": 25.259311958935623 - }, - "bbh": { - "name": "BBH", - "value": 0.4702556052882764, - "normalized_score": 25.254277114598622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.35945833333333327, - "normalized_score": 1.9656249999999986 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3077626329787234, - "normalized_score": 23.084736997635936 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-04-04", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "huggyllama/llama-65b", - "hub_license": "other", - "hub_hearts": 75, - "params_billions": 65.286, - "co2_cost": 18.660216163901705 - } - }, - { - "id": "huggyllama/llama-7b_float16_4782ad278652c7c71b72204d462d6d01eaaf7549_False", - "model": { - "name": "huggyllama/llama-7b", - "sha": "4782ad278652c7c71b72204d462d6d01eaaf7549", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.4149999259207915, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25009530268576263, - "normalized_score": 25.009530268576263 - }, - "bbh": { - "name": "BBH", - "value": 0.32773134782898566, - "normalized_score": 7.076660678102023 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.33539583333333334, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13131648936170212, - "normalized_score": 3.4796099290780127 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-04-03", - "submission_date": "2024-07-04", - "generation": 0, - "base_model": "huggyllama/llama-7b", - "hub_license": "other", - "hub_hearts": 320, - "params_billions": 6.738, - "co2_cost": 1.1272084313543247 - } - }, - { - "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2_bfloat16_a3b1e50cd454e219cb521d979eb6f61faf1a4504_True", - "model": { - "name": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", - "sha": "a3b1e50cd454e219cb521d979eb6f61faf1a4504", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.525034419590085, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42112927033604175, - "normalized_score": 42.11292703360417 - }, - "bbh": { - "name": "BBH", - "value": 0.34869240677927044, - "normalized_score": 8.72537349779457 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.47006250000000005, - "normalized_score": 18.62447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19148936170212766, - "normalized_score": 10.16548463356974 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2 (Merge)", - "hub_license": "", - "hub_hearts": 122, - "params_billions": 14.77, - "co2_cost": 3.710507681662543 - } - }, - { - "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030_bfloat16_2441de6df2f43702c4dddba14bd7a2bc10071ae4_True", - "model": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-7030", - "sha": "2441de6df2f43702c4dddba14bd7a2bc10071ae4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.719670343952135, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38650779930584184, - "normalized_score": 38.65077993058418 - }, - "bbh": { - "name": "BBH", - "value": 0.6177864730931621, - "normalized_score": 44.95662557932721 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2794561933534743, - "normalized_score": 27.945619335347434 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.39222916666666663, - "normalized_score": 6.761979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4367519946808511, - "normalized_score": 37.41688829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "huihui-ai/QwQ-32B-Coder-Fusion-7030 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 32.764, - "co2_cost": 10.99417036842794 - } - }, - { - "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020_bfloat16_5af8eace7a9377bf61c72570893c43ae8860650e_True", - "model": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-8020", - "sha": "5af8eace7a9377bf61c72570893c43ae8860650e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.81040369162885, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6020547702318737, - "normalized_score": 60.20547702318737 - }, - "bbh": { - "name": "BBH", - "value": 0.6664531829718748, - "normalized_score": 51.79260416831474 - }, - "math": { - "name": "MATH Level 5", - "value": 0.459214501510574, - "normalized_score": 45.9214501510574 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.42934374999999997, - "normalized_score": 12.434635416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5367353723404256, - "normalized_score": 48.52615248226951 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "huihui-ai/QwQ-32B-Coder-Fusion-8020 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 32.764, - "co2_cost": 9.202812113959975 - } - }, - { - "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010_bfloat16_6d19e2749fabb24efe732a2614e7458d61d92426_True", - "model": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-9010", - "sha": "6d19e2749fabb24efe732a2614e7458d61d92426", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.581938708070844, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5778246164620984, - "normalized_score": 57.78246164620983 - }, - "bbh": { - "name": "BBH", - "value": 0.6727405551499568, - "normalized_score": 53.02341820461157 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5317220543806647, - "normalized_score": 53.17220543806647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.4681979166666667, - "normalized_score": 19.52473958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5600066489361702, - "normalized_score": 51.1118498817967 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "huihui-ai/QwQ-32B-Coder-Fusion-9010 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 32.764, - "co2_cost": 22.301953590708713 - } - }, - { - "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2_bfloat16_68f298d4017b8999dc963fbc560b02eaefa41de3_True", - "model": { - "name": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", - "sha": "68f298d4017b8999dc963fbc560b02eaefa41de3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.74807839195605, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8327637335602867, - "normalized_score": 83.27637335602867 - }, - "bbh": { - "name": "BBH", - "value": 0.6323822447052897, - "normalized_score": 47.40618824926219 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5302114803625377, - "normalized_score": 53.02114803625378 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.42196875, - "normalized_score": 11.579427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49617686170212766, - "normalized_score": 44.019651300236404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-12-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 14.77, - "co2_cost": 3.2479654475791335 - } - }, - { - "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated_bfloat16_6c92140f35084f10f08832cb884fd87328509d45_True", - "model": { - "name": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", - "sha": "6c92140f35084f10f08832cb884fd87328509d45", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 48.10647092442315, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8592667455684251, - "normalized_score": 85.92667455684251 - }, - "bbh": { - "name": "BBH", - "value": 0.7189881596250237, - "normalized_score": 60.487869412693875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6012084592145015, - "normalized_score": 60.12084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.4232708333333333, - "normalized_score": 12.342187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5536901595744681, - "normalized_score": 50.410017730496456 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-12-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 10, - "params_billions": 72.706, - "co2_cost": 76.77142501291785 - } - }, - { - "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated_bfloat16_c04c14c82962506e2b16f58f9f6b0a2e60a6afde_True", - "model": { - "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", - "sha": "c04c14c82962506e2b16f58f9f6b0a2e60a6afde", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.27590445973728, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7546033413564897, - "normalized_score": 75.46033413564896 - }, - "bbh": { - "name": "BBH", - "value": 0.5261589972829911, - "normalized_score": 32.886673214320496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45770392749244715, - "normalized_score": 45.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.39666666666666667, - "normalized_score": 7.483333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41796875, - "normalized_score": 35.32986111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-24", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 3.297569060075492 - } - }, - { - "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2_bfloat16_05d179c1108cc2dc1c1a16a8255ac6f57eac5d32_True", - "model": { - "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", - "sha": "05d179c1108cc2dc1c1a16a8255ac6f57eac5d32", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.729008711870584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7606484128778308, - "normalized_score": 76.06484128778308 - }, - "bbh": { - "name": "BBH", - "value": 0.5376688442794247, - "normalized_score": 34.369626512494015 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4637462235649547, - "normalized_score": 46.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 8.091145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42079454787234044, - "normalized_score": 35.643838652482266 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-24", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 35, - "params_billions": 7.616, - "co2_cost": 3.334388792887168 - } - }, - { - "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3_float16_41a6b5b9d2595ca793e1a6689ecab62a24d12483_True", - "model": { - "name": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", - "sha": "41a6b5b9d2595ca793e1a6689ecab62a24d12483", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.2713829044269715, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15052726764983576, - "normalized_score": 15.052726764983579 - }, - "bbh": { - "name": "BBH", - "value": 0.2936618285636837, - "normalized_score": 2.6258625470390182 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.36178125, - "normalized_score": 4.02265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11643949468085106, - "normalized_score": 1.8266105200945615 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.27895379360455497 - } - }, - { - "id": "iFaz/llama31_8B_en_emo_v4_float16_0db785ab56c082e30ae7dea3645d45465fbb5797_True", - "model": { - "name": "iFaz/llama31_8B_en_emo_v4", - "sha": "0db785ab56c082e30ae7dea3645d45465fbb5797", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "", - "average_score": 17.153620627087744, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3042504997850149, - "normalized_score": 30.425049978501487 - }, - "bbh": { - "name": "BBH", - "value": 0.49155384618761383, - "normalized_score": 27.93118385237936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3642916666666667, - "normalized_score": 6.703125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3048537234042553, - "normalized_score": 22.761524822695034 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.777, - "co2_cost": 1.7973431451023771 - } - }, - { - "id": "iFaz/llama32_1B_en_emo_v1_float16_6e391fb0a2f1198a83625deaea1502e9a12c1cba_True", - "model": { - "name": "iFaz/llama32_1B_en_emo_v1", - "sha": "6e391fb0a2f1198a83625deaea1502e9a12c1cba", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.934841101081998, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44083808738591385, - "normalized_score": 44.08380873859139 - }, - "bbh": { - "name": "BBH", - "value": 0.33802631394113886, - "normalized_score": 7.2810483160822015 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34888541666666667, - "normalized_score": 2.010677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17611369680851063, - "normalized_score": 8.457077423167847 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2025-01-14", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.765, - "co2_cost": 1.7540947651885561 - } - }, - { - "id": "iFaz/llama32_3B_en_emo_1000_stp_float16_443770bbd4989aaf275df6382ef4662142b28dff_True", - "model": { - "name": "iFaz/llama32_3B_en_emo_1000_stp", - "sha": "443770bbd4989aaf275df6382ef4662142b28dff", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.648597853327, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7295243287809678, - "normalized_score": 72.95243287809677 - }, - "bbh": { - "name": "BBH", - "value": 0.45218477635502685, - "normalized_score": 23.111523420944195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3620625, - "normalized_score": 3.8911458333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3123337765957447, - "normalized_score": 23.592641843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.848, - "co2_cost": 0.9024010584183441 - } - }, - { - "id": "iFaz/llama32_3B_en_emo_2000_stp_float16_513307bd4826a01f391541e2ed25fdc65aaa14e8_True", - "model": { - "name": "iFaz/llama32_3B_en_emo_2000_stp", - "sha": "513307bd4826a01f391541e2ed25fdc65aaa14e8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.911624360048616, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7368681764385165, - "normalized_score": 73.68681764385164 - }, - "bbh": { - "name": "BBH", - "value": 0.45345889848516396, - "normalized_score": 23.416054848493687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.35269791666666667, - "normalized_score": 3.2539062500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3097573138297872, - "normalized_score": 23.30636820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.848, - "co2_cost": 0.9022375387579541 - } - }, - { - "id": "iFaz/llama32_3B_en_emo_300_stp_float16_55c889bfaa908ca773e8ccab084ebe9b6f00d3dc_True", - "model": { - "name": "iFaz/llama32_3B_en_emo_300_stp", - "sha": "55c889bfaa908ca773e8ccab084ebe9b6f00d3dc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.75197000352155, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.725552644760347, - "normalized_score": 72.5552644760347 - }, - "bbh": { - "name": "BBH", - "value": 0.45045681689917494, - "normalized_score": 22.939796393832058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16012084592145015, - "normalized_score": 16.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3620625, - "normalized_score": 3.8911458333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3148271276595745, - "normalized_score": 23.869680851063833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.848, - "co2_cost": 0.8700878633076544 - } - }, - { - "id": "iFaz/llama32_3B_en_emo_5000_stp_float16_52014daa941e30aaa5f280ab0db1470a0839f3dc_True", - "model": { - "name": "iFaz/llama32_3B_en_emo_5000_stp", - "sha": "52014daa941e30aaa5f280ab0db1470a0839f3dc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.22281886664426, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7100404703963262, - "normalized_score": 71.00404703963262 - }, - "bbh": { - "name": "BBH", - "value": 0.4567949942342784, - "normalized_score": 23.462261896367732 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.34460416666666666, - "normalized_score": 5.075520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30668218085106386, - "normalized_score": 22.964686761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.848, - "co2_cost": 0.9237204039607672 - } - }, - { - "id": "iFaz/llama32_3B_en_emo_v2_float16_a2e03c1a0cd3cba34c197971eddd8c42f3c88479_False", - "model": { - "name": "iFaz/llama32_3B_en_emo_v2", - "sha": "a2e03c1a0cd3cba34c197971eddd8c42f3c88479", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.303216399935796, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5454017562290279, - "normalized_score": 54.54017562290278 - }, - "bbh": { - "name": "BBH", - "value": 0.4283518305582969, - "normalized_score": 19.02916278101205 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.34822916666666665, - "normalized_score": 6.761979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3003656914893617, - "normalized_score": 22.26285460992908 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-26", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.848, - "co2_cost": 2.1775976581044354 - } - }, - { - "id": "iFaz/llama32_3B_en_emo_v3_float16_1ad78b1d9157ba683bd0013b319e3a3e1551222b_True", - "model": { - "name": "iFaz/llama32_3B_en_emo_v3", - "sha": "1ad78b1d9157ba683bd0013b319e3a3e1551222b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.26359573830477, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5759263199421978, - "normalized_score": 57.592631994219786 - }, - "bbh": { - "name": "BBH", - "value": 0.43013596402782367, - "normalized_score": 20.09714960563635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.35527083333333337, - "normalized_score": 3.7421875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27102726063829785, - "normalized_score": 19.003028959810873 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.848, - "co2_cost": 0.9326067547081006 - } - }, - { - "id": "iRyanBell/ARC1_bfloat16_28176c0fb77fa43e1410766faf35d2a2681566e9_False", - "model": { - "name": "iRyanBell/ARC1", - "sha": "28176c0fb77fa43e1410766faf35d2a2681566e9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.66167494214361, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.441112913735555, - "normalized_score": 44.1112913735555 - }, - "bbh": { - "name": "BBH", - "value": 0.4902999658144703, - "normalized_score": 26.564495132631716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3990520833333333, - "normalized_score": 8.148177083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3371010638297872, - "normalized_score": 26.34456264775413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-30", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "iRyanBell/ARC1", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.849328996764956 - } - }, - { - "id": "iRyanBell/ARC1-II_bfloat16_c81076b9bdaac0722b33e411a49b07a296e8fae8_False", - "model": { - "name": "iRyanBell/ARC1-II", - "sha": "c81076b9bdaac0722b33e411a49b07a296e8fae8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.559430625313345, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17083560508340093, - "normalized_score": 17.083560508340092 - }, - "bbh": { - "name": "BBH", - "value": 0.33817781029884353, - "normalized_score": 7.246229410679451 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.4912916666666667, - "normalized_score": 20.311458333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1685505319148936, - "normalized_score": 7.616725768321511 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-12", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "iRyanBell/ARC1-II", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.790552524437696 - } - }, - { - "id": "ibivibiv/colossus_120b_float16_b4c11f98bd874bfa454a0bb46153335cfb9b06a3_False", - "model": { - "name": "ibivibiv/colossus_120b", - "sha": "b4c11f98bd874bfa454a0bb46153335cfb9b06a3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.415203305397444, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42759877126025614, - "normalized_score": 42.759877126025614 - }, - "bbh": { - "name": "BBH", - "value": 0.6061408586494191, - "normalized_score": 44.071497527478364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4733125, - "normalized_score": 19.264062499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3961103723404255, - "normalized_score": 32.901152482269495 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-12", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "ibivibiv/colossus_120b", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 117.749, - "co2_cost": 27.50486346616086 - } - }, - { - "id": "ibivibiv/multimaster-7b-v6_float16_7b3bfecb654c86565c65cd510dd1138cb3e75087_False", - "model": { - "name": "ibivibiv/multimaster-7b-v6", - "sha": "7b3bfecb654c86565c65cd510dd1138cb3e75087", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 21.089768673380153, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4473075883101283, - "normalized_score": 44.73075883101282 - }, - "bbh": { - "name": "BBH", - "value": 0.519351871026721, - "normalized_score": 32.40128043389345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.43957291666666665, - "normalized_score": 13.379947916666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30950797872340424, - "normalized_score": 23.278664302600472 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-24", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "ibivibiv/multimaster-7b-v6", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 35.428, - "co2_cost": 5.1493610087548305 - } - }, - { - "id": "ibm/PowerLM-3b_bfloat16_38a624db4c8c9dd15d8bfb51845b7b177348e104_False", - "model": { - "name": "ibm/PowerLM-3b", - "sha": "38a624db4c8c9dd15d8bfb51845b7b177348e104", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 11.524079220212199, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33212764354135915, - "normalized_score": 33.21276435413591 - }, - "bbh": { - "name": "BBH", - "value": 0.3679456724439114, - "normalized_score": 12.02206036323482 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3562916666666667, - "normalized_score": 5.6364583333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20162898936170212, - "normalized_score": 11.292109929078013 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-14", - "submission_date": "2025-01-07", - "generation": 0, - "base_model": "ibm/PowerLM-3b", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 3.512, - "co2_cost": 1.121176518130097 - } - }, - { - "id": "ibm/merlinite-7b_bfloat16_233d12759d5bb9344231dafdb51310ec19d79c0e_False", - "model": { - "name": "ibm/merlinite-7b", - "sha": "233d12759d5bb9344231dafdb51310ec19d79c0e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.751033885892248, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2498703440205322, - "normalized_score": 24.98703440205322 - }, - "bbh": { - "name": "BBH", - "value": 0.50071326118705, - "normalized_score": 29.97724776968684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.44115624999999997, - "normalized_score": 13.877864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3068484042553192, - "normalized_score": 22.983156028368796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-02", - "submission_date": "2024-06-09", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 104, - "params_billions": 7.242, - "co2_cost": 1.1007936688970816 - } - }, - { - "id": "ibm-granite/granite-3.0-1b-a400m-base_bfloat16_8f3d6d6fb24a1d2528f24bad0d2ae3e8fc6f3232_False", - "model": { - "name": "ibm-granite/granite-3.0-1b-a400m-base", - "sha": "8f3d6d6fb24a1d2528f24bad0d2ae3e8fc6f3232", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 6.0307899730101875, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24040324117785256, - "normalized_score": 24.040324117785254 - }, - "bbh": { - "name": "BBH", - "value": 0.3221205531032148, - "normalized_score": 6.055007672005359 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3367291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11519281914893617, - "normalized_score": 1.6880910165484628 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "", - "generation": 0, - "base_model": "ibm-granite/granite-3.0-1b-a400m-base", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 1.335, - "co2_cost": 3.706138660642777 - } - }, - { - "id": "ibm-granite/granite-3.0-1b-a400m-instruct_bfloat16_acb9675a7d67b8657d9b8105d5cbd5818408293f_True", - "model": { - "name": "ibm-granite/granite-3.0-1b-a400m-instruct", - "sha": "acb9675a7d67b8657d9b8105d5cbd5818408293f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 8.0692284950461, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33315159332792543, - "normalized_score": 33.31515933279255 - }, - "bbh": { - "name": "BBH", - "value": 0.3223950988485842, - "normalized_score": 5.453219408698861 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.36228124999999994, - "normalized_score": 2.6851562500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12441821808510638, - "normalized_score": 2.713135342789597 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "", - "generation": 1, - "base_model": "ibm-granite/granite-3.0-1b-a400m-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 1.335, - "co2_cost": 3.2681255854392237 - } - }, - { - "id": "ibm-granite/granite-3.0-2b-base_bfloat16_532f55c03d71a31905c0b825eba4b24fe7f7936b_False", - "model": { - "name": "ibm-granite/granite-3.0-2b-base", - "sha": "532f55c03d71a31905c0b825eba4b24fe7f7936b", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 14.095784874796749, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3873821460391761, - "normalized_score": 38.738214603917605 - }, - "bbh": { - "name": "BBH", - "value": 0.40474805593806223, - "normalized_score": 17.563749725828334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28020134228187926, - "normalized_score": 4.026845637583901 - }, - "musr": { - "name": "MUSR", - "value": 0.3434270833333333, - "normalized_score": 3.461718750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23811502659574468, - "normalized_score": 15.346114066193852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "", - "generation": 0, - "base_model": "ibm-granite/granite-3.0-2b-base", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 2.634, - "co2_cost": 1.5711600010720157 - } - }, - { - "id": "ibm-granite/granite-3.0-2b-instruct_bfloat16_342f92f4a0b4d6d83c0b61dc6c122e253a4efebd_True", - "model": { - "name": "ibm-granite/granite-3.0-2b-instruct", - "sha": "342f92f4a0b4d6d83c0b61dc6c122e253a4efebd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 18.396095114284222, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.513977357854936, - "normalized_score": 51.3977357854936 - }, - "bbh": { - "name": "BBH", - "value": 0.44119772062630297, - "normalized_score": 21.73789141090241 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.35148958333333336, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2814162234042553, - "normalized_score": 20.15735815602837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "", - "generation": 1, - "base_model": "ibm-granite/granite-3.0-2b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 47, - "params_billions": 2.634, - "co2_cost": 1.5256897694562621 - } - }, - { - "id": "ibm-granite/granite-3.0-3b-a800m-base_bfloat16_0d1d12f91791b25289ef407e39d88f00d1256d10_False", - "model": { - "name": "ibm-granite/granite-3.0-3b-a800m-base", - "sha": "0d1d12f91791b25289ef407e39d88f00d1256d10", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 9.489841451458394, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2732261510569733, - "normalized_score": 27.32261510569733 - }, - "bbh": { - "name": "BBH", - "value": 0.36674974971308566, - "normalized_score": 11.3484424218006 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.34196875, - "normalized_score": 3.312760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18907912234042554, - "normalized_score": 9.89768026004728 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "", - "generation": 0, - "base_model": "ibm-granite/granite-3.0-3b-a800m-base", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 3.374, - "co2_cost": 5.293744935580685 - } - }, - { - "id": "ibm-granite/granite-3.0-3b-a800m-instruct_bfloat16_ab0c732243cfd50a601fa393dd46a2c5993746f7_True", - "model": { - "name": "ibm-granite/granite-3.0-3b-a800m-instruct", - "sha": "ab0c732243cfd50a601fa393dd46a2c5993746f7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 13.698124325974995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4298217618142085, - "normalized_score": 42.982176181420854 - }, - "bbh": { - "name": "BBH", - "value": 0.37527805291733446, - "normalized_score": 13.163009595010001 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3486666666666667, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21517619680851063, - "normalized_score": 12.797355200945626 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "ibm-granite/granite-3.0-3b-a800m-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 17, - "params_billions": 3.374, - "co2_cost": 4.625489402223453 - } - }, - { - "id": "ibm-granite/granite-3.0-8b-base_bfloat16_1edd1f646abfcd90ed5d6c0d9711fbb02c947884_False", - "model": { - "name": "ibm-granite/granite-3.0-8b-base", - "sha": "1edd1f646abfcd90ed5d6c0d9711fbb02c947884", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 21.690924398799496, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4583482936386566, - "normalized_score": 45.83482936386566 - }, - "bbh": { - "name": "BBH", - "value": 0.4943760637365333, - "normalized_score": 27.974358298982427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.40813541666666664, - "normalized_score": 10.450260416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3312832446808511, - "normalized_score": 25.69813829787234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-20", - "generation": 0, - "base_model": "ibm-granite/granite-3.0-8b-base", - "hub_license": "apache-2.0", - "hub_hearts": 23, - "params_billions": 8.171, - "co2_cost": 2.820031779871169 - } - }, - { - "id": "ibm-granite/granite-3.0-8b-instruct_bfloat16_e0a466fb25b9e07e9c2dc93380a360189700d1f8_True", - "model": { - "name": "ibm-granite/granite-3.0-8b-instruct", - "sha": "e0a466fb25b9e07e9c2dc93380a360189700d1f8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 24.027678753483297, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5309633993359841, - "normalized_score": 53.09633993359841 - }, - "bbh": { - "name": "BBH", - "value": 0.5191874631840226, - "normalized_score": 31.588159064715125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1419939577039275, - "normalized_score": 14.19939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.3900625, - "normalized_score": 7.024479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34566156914893614, - "normalized_score": 27.295729905437348 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "ibm-granite/granite-3.0-8b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 199, - "params_billions": 8.171, - "co2_cost": 2.5676637463760468 - } - }, - { - "id": "ibm-granite/granite-3.1-1b-a400m-base_bfloat16_6bbcb9db44432d0a5d4cf13cdc082324b7ee9e7b_False", - "model": { - "name": "ibm-granite/granite-3.1-1b-a400m-base", - "sha": "6bbcb9db44432d0a5d4cf13cdc082324b7ee9e7b", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteMoeForCausalLM", - "average_score": 6.312391508281799, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2519437315212525, - "normalized_score": 25.194373152125248 - }, - "bbh": { - "name": "BBH", - "value": 0.3298699546506724, - "normalized_score": 6.429845005402345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3500625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11394614361702128, - "normalized_score": 1.549571513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "ibm-granite/granite-3.1-1b-a400m-base", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 1.335, - "co2_cost": 2.246823674730084 - } - }, - { - "id": "ibm-granite/granite-3.1-1b-a400m-instruct_float16_c4f8e0dd19f0ce00bca6c9742751d1827e8fa03a_True", - "model": { - "name": "ibm-granite/granite-3.1-1b-a400m-instruct", - "sha": "c4f8e0dd19f0ce00bca6c9742751d1827e8fa03a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteMoeForCausalLM", - "average_score": 10.127255876383268, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46863987553025976, - "normalized_score": 46.86398755302597 - }, - "bbh": { - "name": "BBH", - "value": 0.3279834385375178, - "normalized_score": 6.178183215904788 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.33025, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12167553191489362, - "normalized_score": 2.40839243498818 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "ibm-granite/granite-3.1-1b-a400m-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 1.335, - "co2_cost": 2.096168213768369 - } - }, - { - "id": "ibm-granite/granite-3.1-2b-base_bfloat16_31f1cf9bd26ca0c69bfaf86824ee6dc4268b8b73_False", - "model": { - "name": "ibm-granite/granite-3.1-2b-base", - "sha": "31f1cf9bd26ca0c69bfaf86824ee6dc4268b8b73", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 13.202826259598206, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35216115462528313, - "normalized_score": 35.216115462528315 - }, - "bbh": { - "name": "BBH", - "value": 0.4047188028918873, - "normalized_score": 16.843689846888516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3485729166666667, - "normalized_score": 3.9049479166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22506648936170212, - "normalized_score": 13.89627659574468 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "ibm-granite/granite-3.1-2b-base", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 2.534, - "co2_cost": 0.9884427131625634 - } - }, - { - "id": "ibm-granite/granite-3.1-2b-instruct_float16_8f683a244be9034aeea43dd2a80b7b4fe01d376f_True", - "model": { - "name": "ibm-granite/granite-3.1-2b-instruct", - "sha": "8f683a244be9034aeea43dd2a80b7b4fe01d376f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 21.712212822028288, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.628557782240012, - "normalized_score": 62.8557782240012 - }, - "bbh": { - "name": "BBH", - "value": 0.44089858558056544, - "normalized_score": 21.822956140794506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.3605416666666667, - "normalized_score": 4.867708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28191489361702127, - "normalized_score": 20.212765957446805 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "ibm-granite/granite-3.1-2b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 47, - "params_billions": 2.534, - "co2_cost": 1.0112838075919377 - } - }, - { - "id": "ibm-granite/granite-3.1-3b-a800m-base_bfloat16_e16343c6e3120c5f4e99ef53e48e9f882c9a8bd3_False", - "model": { - "name": "ibm-granite/granite-3.1-3b-a800m-base", - "sha": "e16343c6e3120c5f4e99ef53e48e9f882c9a8bd3", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteMoeForCausalLM", - "average_score": 10.00105158723934, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2996294276962903, - "normalized_score": 29.96294276962903 - }, - "bbh": { - "name": "BBH", - "value": 0.362822992347764, - "normalized_score": 11.905605199489822 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3275208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1792719414893617, - "normalized_score": 8.807993498817966 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "ibm-granite/granite-3.1-3b-a800m-base", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 3.299, - "co2_cost": 3.2456210070564877 - } - }, - { - "id": "ibm-granite/granite-3.1-3b-a800m-instruct_float16_503b5f483d33443ba85a876d3c25f8b85779c9cc_True", - "model": { - "name": "ibm-granite/granite-3.1-3b-a800m-instruct", - "sha": "503b5f483d33443ba85a876d3c25f8b85779c9cc", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteMoeForCausalLM", - "average_score": 17.277676062054883, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5516462984880118, - "normalized_score": 55.164629848801184 - }, - "bbh": { - "name": "BBH", - "value": 0.4009494521947192, - "normalized_score": 16.687236366660443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3486354166666667, - "normalized_score": 2.512760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21476063829787234, - "normalized_score": 12.751182033096924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "ibm-granite/granite-3.1-3b-a800m-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 3.299, - "co2_cost": 3.2727272082111534 - } - }, - { - "id": "ibm-granite/granite-3.1-8b-base_float16_174c7f4d7e9a016bdb85ecdf3319fe1f2d8991c4_False", - "model": { - "name": "ibm-granite/granite-3.1-8b-base", - "sha": "174c7f4d7e9a016bdb85ecdf3319fe1f2d8991c4", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 20.05719991900457, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4221033524381973, - "normalized_score": 42.21033524381973 - }, - "bbh": { - "name": "BBH", - "value": 0.4776956677111636, - "normalized_score": 26.01958867101177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.3922291666666667, - "normalized_score": 8.36197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3232214095744681, - "normalized_score": 24.802378841607563 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "ibm-granite/granite-3.1-8b-base", - "hub_license": "apache-2.0", - "hub_hearts": 21, - "params_billions": 8.171, - "co2_cost": 1.1973851147577552 - } - }, - { - "id": "ibm-granite/granite-3.1-8b-instruct_float16_f6749f3946b2dc9983b870317a71ddf7a65c0806_True", - "model": { - "name": "ibm-granite/granite-3.1-8b-instruct", - "sha": "f6749f3946b2dc9983b870317a71ddf7a65c0806", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 30.6030430081627, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7207564816908026, - "normalized_score": 72.07564816908027 - }, - "bbh": { - "name": "BBH", - "value": 0.5364460433816018, - "normalized_score": 34.089655299414055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.47070833333333334, - "normalized_score": 19.00520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3537234042553192, - "normalized_score": 28.191489361702125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "ibm-granite/granite-3.1-8b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 154, - "params_billions": 8.171, - "co2_cost": 1.2356666876371007 - } - }, - { - "id": "ibm-granite/granite-3.2-2b-instruct_bfloat16_f8ad483f3631a3748679a46b27ca68fbfcb8bd7b_True", - "model": { - "name": "ibm-granite/granite-3.2-2b-instruct", - "sha": "f8ad483f3631a3748679a46b27ca68fbfcb8bd7b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 21.25014812377563, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6151688630611223, - "normalized_score": 61.51688630611223 - }, - "bbh": { - "name": "BBH", - "value": 0.43872707491212865, - "normalized_score": 21.668268416036614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14425981873111782, - "normalized_score": 14.425981873111782 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.3645729166666667, - "normalized_score": 4.704947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2783410904255319, - "normalized_score": 19.815676713947987 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "ibm-granite/granite-3.2-2b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 38, - "params_billions": 2.534, - "co2_cost": 0.5072217436302251 - } - }, - { - "id": "ibm-granite/granite-3.2-8b-instruct_bfloat16_0276d996f60d5eb0b376b6d06622042d4ef3eb4b_True", - "model": { - "name": "ibm-granite/granite-3.2-8b-instruct", - "sha": "0276d996f60d5eb0b376b6d06622042d4ef3eb4b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GraniteForCausalLM", - "average_score": 30.7704488980163, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274509412802475, - "normalized_score": 72.74509412802476 - }, - "bbh": { - "name": "BBH", - "value": 0.5401759656246116, - "normalized_score": 34.65536965519957 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23791540785498488, - "normalized_score": 23.791540785498487 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4561979166666667, - "normalized_score": 16.791406250000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35123005319148937, - "normalized_score": 27.914450354609933 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "ibm-granite/granite-3.2-8b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 59, - "params_billions": 8.171, - "co2_cost": 0.6149577544719722 - } - }, - { - "id": "ibm-granite/granite-7b-base_bfloat16_23fcb4cb5b69f8a122fb944491e9f1ad664ba37b_False", - "model": { - "name": "ibm-granite/granite-7b-base", - "sha": "23fcb4cb5b69f8a122fb944491e9f1ad664ba37b", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.908701929835419, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24142719096441884, - "normalized_score": 24.142719096441887 - }, - "bbh": { - "name": "BBH", - "value": 0.34804372716106186, - "normalized_score": 9.050800002899097 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35548958333333336, - "normalized_score": 3.4028645833333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18342752659574468, - "normalized_score": 9.269725177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-19", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "ibm-granite/granite-7b-base", - "hub_license": "apache-2.0", - "hub_hearts": 29, - "params_billions": 6.738, - "co2_cost": 1.3052476771223376 - } - }, - { - "id": "ibm-granite/granite-7b-instruct_bfloat16_c6d1adfa5cdba2c8344e055bb7de87b7935250a8_True", - "model": { - "name": "ibm-granite/granite-7b-instruct", - "sha": "c6d1adfa5cdba2c8344e055bb7de87b7935250a8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.03495955436329, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2972313461615181, - "normalized_score": 29.72313461615181 - }, - "bbh": { - "name": "BBH", - "value": 0.37229529603269523, - "normalized_score": 12.639328702465264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.40199999999999997, - "normalized_score": 8.816666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2286402925531915, - "normalized_score": 14.293365839243496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-19", - "submission_date": "2024-10-02", - "generation": 1, - "base_model": "ibm/granite-7b-base", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 6.738, - "co2_cost": 1.4229038379301109 - } - }, - { - "id": "icefog72/Ice0.15-02.10-RP_bfloat16_ab67a8b63836ec7c8e6729d79d9dfd2708b20eb3_False", - "model": { - "name": "icefog72/Ice0.15-02.10-RP", - "sha": "ab67a8b63836ec7c8e6729d79d9dfd2708b20eb3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.49132746100335, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5343355629729118, - "normalized_score": 53.43355629729119 - }, - "bbh": { - "name": "BBH", - "value": 0.4976384736188401, - "normalized_score": 30.130104071068587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.43197916666666664, - "normalized_score": 12.997395833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30659906914893614, - "normalized_score": 22.95545212765957 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-02", - "generation": 0, - "base_model": "icefog72/Ice0.15-02.10-RP", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 1.1856443208425487 - } - }, - { - "id": "icefog72/Ice0.16-02.10-RP_bfloat16_cb5c4d8a2e74efb41eae8b6dff8d06252c0a795d_False", - "model": { - "name": "icefog72/Ice0.16-02.10-RP", - "sha": "cb5c4d8a2e74efb41eae8b6dff8d06252c0a795d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.076418616812266, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5069083365470286, - "normalized_score": 50.69083365470286 - }, - "bbh": { - "name": "BBH", - "value": 0.4945564313654156, - "normalized_score": 29.58232083302582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05891238670694864, - "normalized_score": 5.8912386706948645 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.433375, - "normalized_score": 13.405208333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3067652925531915, - "normalized_score": 22.973921394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-02", - "generation": 0, - "base_model": "icefog72/Ice0.16-02.10-RP", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.1935116607722032 - } - }, - { - "id": "icefog72/Ice0.17-03.10-RP_bfloat16_ca5a429546334784d94bcab0eb52c5f22f433680_False", - "model": { - "name": "icefog72/Ice0.17-03.10-RP", - "sha": "ca5a429546334784d94bcab0eb52c5f22f433680", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.41440427176671, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5123538876846767, - "normalized_score": 51.235388768467665 - }, - "bbh": { - "name": "BBH", - "value": 0.5006815748225494, - "normalized_score": 30.376262438172095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.433375, - "normalized_score": 13.338541666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30851063829787234, - "normalized_score": 23.167848699763592 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "icefog72/Ice0.17-03.10-RP", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2204125060394582 - } - }, - { - "id": "icefog72/Ice0.27-06.11-RP_bfloat16_f2c78e71b59e0d36475217e3f265bc135f7c8505_False", - "model": { - "name": "icefog72/Ice0.27-06.11-RP", - "sha": "f2c78e71b59e0d36475217e3f265bc135f7c8505", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.83125244134635, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49182059158588104, - "normalized_score": 49.1820591585881 - }, - "bbh": { - "name": "BBH", - "value": 0.5111654648230625, - "normalized_score": 31.364751797095256 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.43278125000000006, - "normalized_score": 12.564322916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3154089095744681, - "normalized_score": 23.93432328605201 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "icefog72/Ice0.27-06.11-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8287012653892059 - } - }, - { - "id": "icefog72/Ice0.29-06.11-RP_bfloat16_932f16ea3f790553904f0d2dfcdc861d737cbaf7_False", - "model": { - "name": "icefog72/Ice0.29-06.11-RP", - "sha": "932f16ea3f790553904f0d2dfcdc861d737cbaf7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.71623231324205, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.486050346414181, - "normalized_score": 48.6050346414181 - }, - "bbh": { - "name": "BBH", - "value": 0.5087880173407883, - "normalized_score": 31.359453902395284 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4458958333333333, - "normalized_score": 14.370312499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30925864361702127, - "normalized_score": 23.250960401891252 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "icefog72/Ice0.29-06.11-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8596332192143764 - } - }, - { - "id": "icefog72/Ice0.31-08.11-RP_bfloat16_52d947b170ee72c7f4c2b63b11f00330847e44f9_False", - "model": { - "name": "icefog72/Ice0.31-08.11-RP", - "sha": "52d947b170ee72c7f4c2b63b11f00330847e44f9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.886899252180523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5145768782386291, - "normalized_score": 51.457687823862905 - }, - "bbh": { - "name": "BBH", - "value": 0.5032134100285419, - "normalized_score": 30.460341897671253 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 11.891666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3130817819148936, - "normalized_score": 23.675753546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 1, - "base_model": "icefog72/Ice0.31-08.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.9269387673132791 - } - }, - { - "id": "icefog72/Ice0.32-10.11-RP_bfloat16_a05dbb7fe0e756afb73c19e6f33c5481a9ac2ba8_False", - "model": { - "name": "icefog72/Ice0.32-10.11-RP", - "sha": "a05dbb7fe0e756afb73c19e6f33c5481a9ac2ba8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.63483115225961, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49154576523623983, - "normalized_score": 49.15457652362399 - }, - "bbh": { - "name": "BBH", - "value": 0.5047695597611622, - "normalized_score": 30.430940035916453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4382083333333333, - "normalized_score": 13.476041666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3100066489361702, - "normalized_score": 23.33407210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "icefog72/Ice0.32-10.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8480647893442718 - } - }, - { - "id": "icefog72/Ice0.34b-14.11-RP_bfloat16_5362f57fd0402c7c14c8dbe6b55c8b979cc8f475_False", - "model": { - "name": "icefog72/Ice0.34b-14.11-RP", - "sha": "5362f57fd0402c7c14c8dbe6b55c8b979cc8f475", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.681833980913552, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47620868185303883, - "normalized_score": 47.620868185303884 - }, - "bbh": { - "name": "BBH", - "value": 0.5067195329696937, - "normalized_score": 30.80635727588579 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4419895833333333, - "normalized_score": 13.615364583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3125, - "normalized_score": 23.61111111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "icefog72/Ice0.34b-14.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8596460466015595 - } - }, - { - "id": "icefog72/Ice0.34n-14.11-RP_float16_1a39b99112926fc8dd44c3be35d99c04388d3078_False", - "model": { - "name": "icefog72/Ice0.34n-14.11-RP", - "sha": "1a39b99112926fc8dd44c3be35d99c04388d3078", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.87840965107064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47865663107222167, - "normalized_score": 47.86566310722217 - }, - "bbh": { - "name": "BBH", - "value": 0.5091090160356474, - "normalized_score": 31.206252799751898 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4379583333333333, - "normalized_score": 12.844791666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31241688829787234, - "normalized_score": 23.60187647754137 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "icefog72/Ice0.34n-14.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8970969375770342 - } - }, - { - "id": "icefog72/Ice0.37-18.11-RP_bfloat16_4d9dfaa52efdaede3291c85ccb9c5966636298e0_False", - "model": { - "name": "icefog72/Ice0.37-18.11-RP", - "sha": "4d9dfaa52efdaede3291c85ccb9c5966636298e0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.913941249727642, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4972162750391184, - "normalized_score": 49.72162750391184 - }, - "bbh": { - "name": "BBH", - "value": 0.5084310833712639, - "normalized_score": 31.042850362735788 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.43392708333333335, - "normalized_score": 12.207552083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3143284574468085, - "normalized_score": 23.814273049645386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "icefog72/Ice0.37-18.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8290256218099152 - } - }, - { - "id": "icefog72/Ice0.38-19.11-RP_bfloat16_5d35120e4511369d97441c1732b3abf02bcc27ff_False", - "model": { - "name": "icefog72/Ice0.38-19.11-RP", - "sha": "5d35120e4511369d97441c1732b3abf02bcc27ff", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.813765464787064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44033830237104216, - "normalized_score": 44.03383023710421 - }, - "bbh": { - "name": "BBH", - "value": 0.510108216407024, - "normalized_score": 31.3306289411177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.43671875, - "normalized_score": 12.956510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31399601063829785, - "normalized_score": 23.777334515366427 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8454755064784819 - } - }, - { - "id": "icefog72/Ice0.39-19.11-RP_bfloat16_044d7404646a13187ecabc5f87480a4e6bcaf18c_False", - "model": { - "name": "icefog72/Ice0.39-19.11-RP", - "sha": "044d7404646a13187ecabc5f87480a4e6bcaf18c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.33876534002405, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47565902915375646, - "normalized_score": 47.565902915375645 - }, - "bbh": { - "name": "BBH", - "value": 0.5092985137525424, - "normalized_score": 31.263627378198198 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4341458333333333, - "normalized_score": 12.534895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3126662234042553, - "normalized_score": 23.62958037825059 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.850586712637743 - } - }, - { - "id": "icefog72/Ice0.40-20.11-RP_bfloat16_4d8d429be08dc2e57be3e890797c8e861264aad5_False", - "model": { - "name": "icefog72/Ice0.40-20.11-RP", - "sha": "4d8d429be08dc2e57be3e890797c8e861264aad5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.79272576511275, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4762585495374495, - "normalized_score": 47.62585495374495 - }, - "bbh": { - "name": "BBH", - "value": 0.509308586549064, - "normalized_score": 31.50523985734019 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44459374999999995, - "normalized_score": 14.274218749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30992353723404253, - "normalized_score": 23.324837470449168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-21", - "generation": 1, - "base_model": "icefog72/Ice0.40-20.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 1.3714913254279848 - } - }, - { - "id": "icefog72/Ice0.41-22.11-RP_bfloat16_d785cbcf4c7c25cf2c8ce1ad941c79810fc3ec59_False", - "model": { - "name": "icefog72/Ice0.41-22.11-RP", - "sha": "d785cbcf4c7c25cf2c8ce1ad941c79810fc3ec59", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.03517499742875, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4620451513096362, - "normalized_score": 46.20451513096362 - }, - "bbh": { - "name": "BBH", - "value": 0.4723318624775949, - "normalized_score": 25.412777425832047 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.45597916666666666, - "normalized_score": 16.597395833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26180186170212766, - "normalized_score": 17.977984633569736 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "icefog72/Ice0.41-22.11-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8546209986058674 - } - }, - { - "id": "icefog72/Ice0.50-16.01-RP_bfloat16_51238f8676051be213f7b53aa50d1188cae38565_False", - "model": { - "name": "icefog72/Ice0.50-16.01-RP", - "sha": "51238f8676051be213f7b53aa50d1188cae38565", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.162878876154817, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43848987353555235, - "normalized_score": 43.84898735355523 - }, - "bbh": { - "name": "BBH", - "value": 0.49804682910006176, - "normalized_score": 30.030386129201485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4380520833333333, - "normalized_score": 12.823177083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30693151595744683, - "normalized_score": 22.992390661938536 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.50-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8047468824222894 - } - }, - { - "id": "icefog72/Ice0.50.1-16.01-RP_bfloat16_616aa8f825a29699031521264d437745a32e155b_False", - "model": { - "name": "icefog72/Ice0.50.1-16.01-RP", - "sha": "616aa8f825a29699031521264d437745a32e155b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.645154207327916, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4829031414424837, - "normalized_score": 48.29031414424837 - }, - "bbh": { - "name": "BBH", - "value": 0.5107472937598788, - "normalized_score": 31.56631239449821 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.43274999999999997, - "normalized_score": 12.260416666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3132480053191489, - "normalized_score": 23.69422281323877 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.50.1-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8045758406128498 - } - }, - { - "id": "icefog72/Ice0.51-16.01-RP_bfloat16_dc363f17b4fded81ee94f8d2665020e3a3dcf321_False", - "model": { - "name": "icefog72/Ice0.51-16.01-RP", - "sha": "dc363f17b4fded81ee94f8d2665020e3a3dcf321", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.84509564746615, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4430610779398662, - "normalized_score": 44.30610779398663 - }, - "bbh": { - "name": "BBH", - "value": 0.5044464794803141, - "normalized_score": 31.042011224203208 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.44366666666666665, - "normalized_score": 14.425000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30601728723404253, - "normalized_score": 22.89080969267139 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.51-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8545865350107835 - } - }, - { - "id": "icefog72/Ice0.51.1-16.01-RP_bfloat16_5cb637492d86af52c717410ec7bf9a422e4074a2_False", - "model": { - "name": "icefog72/Ice0.51.1-16.01-RP", - "sha": "5cb637492d86af52c717410ec7bf9a422e4074a2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.35322617351512, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4573243438520902, - "normalized_score": 45.73243438520902 - }, - "bbh": { - "name": "BBH", - "value": 0.5121083021452105, - "normalized_score": 31.869153564804794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.43938541666666664, - "normalized_score": 13.223177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3104222074468085, - "normalized_score": 23.380245271867608 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.51.1-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8168791417026133 - } - }, - { - "id": "icefog72/Ice0.52-16.01-RP_bfloat16_f0b740c2df56c86556be27b52028115121e19201_False", - "model": { - "name": "icefog72/Ice0.52-16.01-RP", - "sha": "f0b740c2df56c86556be27b52028115121e19201", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.947275934460162, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4503051902285935, - "normalized_score": 45.030519022859345 - }, - "bbh": { - "name": "BBH", - "value": 0.504677500406742, - "normalized_score": 30.96644092952937 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.43960416666666663, - "normalized_score": 13.683854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3080119680851064, - "normalized_score": 23.112440898345156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.52-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.7951320775446538 - } - }, - { - "id": "icefog72/Ice0.52.1-16.01-RP_bfloat16_25289247c90ffe3c654cbc53e307acdd440b0d75_False", - "model": { - "name": "icefog72/Ice0.52.1-16.01-RP", - "sha": "25289247c90ffe3c654cbc53e307acdd440b0d75", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.2113364576448, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45492626231731803, - "normalized_score": 45.492626231731805 - }, - "bbh": { - "name": "BBH", - "value": 0.510648341878344, - "normalized_score": 31.801683327768355 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.43938541666666664, - "normalized_score": 13.156510416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31050531914893614, - "normalized_score": 23.389479905437348 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.52.1-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.825747444848155 - } - }, - { - "id": "icefog72/Ice0.53-16.01-RP_bfloat16_e73d4d94151f92dbf7356e80455370aaf4a5ea8b_False", - "model": { - "name": "icefog72/Ice0.53-16.01-RP", - "sha": "e73d4d94151f92dbf7356e80455370aaf4a5ea8b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.48687520926336, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4741352943523185, - "normalized_score": 47.41352943523185 - }, - "bbh": { - "name": "BBH", - "value": 0.5101675133484068, - "normalized_score": 31.406397736383838 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.43274999999999997, - "normalized_score": 12.260416666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31299867021276595, - "normalized_score": 23.66651891252955 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "icefog72/Ice0.53-16.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8283239344189874 - } - }, - { - "id": "icefog72/Ice0.54-17.01-RP_bfloat16_789822aacdd04552045ee2392979a0f307c93f31_False", - "model": { - "name": "icefog72/Ice0.54-17.01-RP", - "sha": "789822aacdd04552045ee2392979a0f307c93f31", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.92119268564771, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4378903531518593, - "normalized_score": 43.78903531518593 - }, - "bbh": { - "name": "BBH", - "value": 0.4853448809638454, - "normalized_score": 27.720691580598487 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.48741666666666666, - "normalized_score": 21.260416666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23262965425531915, - "normalized_score": 14.736628250591016 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "icefog72/Ice0.54-17.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8264819307094243 - } - }, - { - "id": "icefog72/Ice0.55-17.01-RP_bfloat16_8d2f53de294cf6508f5bb31c4055135a3a3f304e_False", - "model": { - "name": "icefog72/Ice0.55-17.01-RP", - "sha": "8d2f53de294cf6508f5bb31c4055135a3a3f304e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.460984298912976, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.496067101956143, - "normalized_score": 49.6067101956143 - }, - "bbh": { - "name": "BBH", - "value": 0.5076567509425027, - "normalized_score": 30.94478559005843 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4725, - "normalized_score": 18.829166666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2657912234042553, - "normalized_score": 18.42124704491726 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "icefog72/Ice0.55-17.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8075650561519988 - } - }, - { - "id": "icefog72/Ice0.57-17.01-RP_bfloat16_b79f93e6510d343bcb63a0fdfce22dd0c08b8e9d_False", - "model": { - "name": "icefog72/Ice0.57-17.01-RP", - "sha": "b79f93e6510d343bcb63a0fdfce22dd0c08b8e9d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.771237145846925, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5151763986223221, - "normalized_score": 51.51763986223221 - }, - "bbh": { - "name": "BBH", - "value": 0.5064080420224116, - "normalized_score": 30.954927188616967 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.46859375, - "normalized_score": 18.40755208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26512632978723405, - "normalized_score": 18.34736997635934 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "icefog72/Ice0.57-17.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8483639070260532 - } - }, - { - "id": "icefog72/Ice0.60-18.01-RP_bfloat16_d4fcf6fee9a9a2915b25d540f3f99b5875f33916_False", - "model": { - "name": "icefog72/Ice0.60-18.01-RP", - "sha": "d4fcf6fee9a9a2915b25d540f3f99b5875f33916", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.63934600501626, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5374329002601985, - "normalized_score": 53.74329002601985 - }, - "bbh": { - "name": "BBH", - "value": 0.5093724614980669, - "normalized_score": 31.50598646501167 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.46704166666666663, - "normalized_score": 17.546874999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28366023936170215, - "normalized_score": 20.40669326241135 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "icefog72/Ice0.60-18.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8687066808695614 - } - }, - { - "id": "icefog72/Ice0.60.1-18.01-RP_bfloat16_6c445ee26444b557a5335f84447cdd4490998c2a_False", - "model": { - "name": "icefog72/Ice0.60.1-18.01-RP", - "sha": "6c445ee26444b557a5335f84447cdd4490998c2a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.823113618523887, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5187735209244804, - "normalized_score": 51.877352092448035 - }, - "bbh": { - "name": "BBH", - "value": 0.5119675522804026, - "normalized_score": 31.832087469838836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4497708333333333, - "normalized_score": 14.421354166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2913896276595745, - "normalized_score": 21.265514184397162 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "icefog72/Ice0.60.1-18.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.831775152380986 - } - }, - { - "id": "icefog72/Ice0.61-18.01-RP_bfloat16_de7284f12936f199f64446577efcf9bfc3373cfb_False", - "model": { - "name": "icefog72/Ice0.61-18.01-RP", - "sha": "de7284f12936f199f64446577efcf9bfc3373cfb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.37634566318344, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5441273598496433, - "normalized_score": 54.41273598496434 - }, - "bbh": { - "name": "BBH", - "value": 0.5104839613346842, - "normalized_score": 31.77285517916415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4697395833333333, - "normalized_score": 17.917447916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27086103723404253, - "normalized_score": 18.98455969267139 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "icefog72/Ice0.61-18.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8564151296309443 - } - }, - { - "id": "icefog72/Ice0.62-18.01-RP_bfloat16_54dd5535f4610541170ff00c7d5b95a08bdbc53b_False", - "model": { - "name": "icefog72/Ice0.62-18.01-RP", - "sha": "54dd5535f4610541170ff00c7d5b95a08bdbc53b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.248597707539464, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.536733644507684, - "normalized_score": 53.6733644507684 - }, - "bbh": { - "name": "BBH", - "value": 0.5103327208197285, - "normalized_score": 31.62165712541642 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4537708333333333, - "normalized_score": 15.22135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28773271276595747, - "normalized_score": 20.859190307328607 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "icefog72/Ice0.62-18.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8312003066913778 - } - }, - { - "id": "icefog72/Ice0.62.1-24.01-RP_bfloat16_8984e9a4474f4dabfbcab7d57b31bd95a998b5c1_False", - "model": { - "name": "icefog72/Ice0.62.1-24.01-RP", - "sha": "8984e9a4474f4dabfbcab7d57b31bd95a998b5c1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.004775451212506, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5181740005407873, - "normalized_score": 51.81740005407873 - }, - "bbh": { - "name": "BBH", - "value": 0.5108967760246949, - "normalized_score": 31.63738531804965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.45510416666666664, - "normalized_score": 15.488020833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28706781914893614, - "normalized_score": 20.78531323877068 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "icefog72/Ice0.62.1-24.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8559000893897843 - } - }, - { - "id": "icefog72/Ice0.64-24.01-RP_bfloat16_6a092740c7afc5cb57ed4c8e3099e70c864aae5b_False", - "model": { - "name": "icefog72/Ice0.64-24.01-RP", - "sha": "6a092740c7afc5cb57ed4c8e3099e70c864aae5b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.875703285300133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5440774921652327, - "normalized_score": 54.40774921652327 - }, - "bbh": { - "name": "BBH", - "value": 0.5059610114856247, - "normalized_score": 31.22157004117841 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4620208333333333, - "normalized_score": 16.719270833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29330119680851063, - "normalized_score": 21.47791075650118 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "icefog72/Ice0.64-24.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8319368893469683 - } - }, - { - "id": "icefog72/Ice0.64.1-24.01-RP_bfloat16_012366d73b339025b121987aee071249cf07c3c8_False", - "model": { - "name": "icefog72/Ice0.64.1-24.01-RP", - "sha": "012366d73b339025b121987aee071249cf07c3c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.88569529169502, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5446770125489258, - "normalized_score": 54.467701254892575 - }, - "bbh": { - "name": "BBH", - "value": 0.5059610114856247, - "normalized_score": 31.22157004117841 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4620208333333333, - "normalized_score": 16.719270833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29330119680851063, - "normalized_score": 21.47791075650118 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "icefog72/Ice0.64.1-24.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8267874263763786 - } - }, - { - "id": "icefog72/Ice0.65-25.01-RP_bfloat16_30d373c0cbefb5ff5551f024dbe13ec49a1b934e_False", - "model": { - "name": "icefog72/Ice0.65-25.01-RP", - "sha": "30d373c0cbefb5ff5551f024dbe13ec49a1b934e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.728992372015465, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5029366525264077, - "normalized_score": 50.29366525264078 - }, - "bbh": { - "name": "BBH", - "value": 0.5095976254774931, - "normalized_score": 31.947024647830972 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4339583333333333, - "normalized_score": 12.178125000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29970079787234044, - "normalized_score": 22.18897754137116 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "icefog72/Ice0.65-25.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8236528476522945 - } - }, - { - "id": "icefog72/Ice0.66-25.01-RP_bfloat16_e702f830777c7a04860fc3b963d0e967b2e93901_False", - "model": { - "name": "icefog72/Ice0.66-25.01-RP", - "sha": "e702f830777c7a04860fc3b963d0e967b2e93901", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.685149861181817, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.532487134137422, - "normalized_score": 53.24871341374221 - }, - "bbh": { - "name": "BBH", - "value": 0.5128983540188711, - "normalized_score": 32.189358928643664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44344791666666666, - "normalized_score": 14.36432291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3039394946808511, - "normalized_score": 22.659943853427897 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "icefog72/Ice0.66-25.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8119911365270718 - } - }, - { - "id": "icefog72/Ice0.67-25.01-RP_bfloat16_be7d6f95484266f2845e889f1266658c60491027_False", - "model": { - "name": "icefog72/Ice0.67-25.01-RP", - "sha": "be7d6f95484266f2845e889f1266658c60491027", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.200117322014464, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.536134124123991, - "normalized_score": 53.6134124123991 - }, - "bbh": { - "name": "BBH", - "value": 0.5112894150790012, - "normalized_score": 32.095819566371766 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.42788541666666663, - "normalized_score": 12.019010416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30967420212765956, - "normalized_score": 23.29713356973995 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "icefog72/Ice0.67-25.01-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8446217784171618 - } - }, - { - "id": "icefog72/Ice0.68-25.01-RP_bfloat16_6ed376c2f7dd6aa1db08f250b6df442476ae751c_False", - "model": { - "name": "icefog72/Ice0.68-25.01-RP", - "sha": "6ed376c2f7dd6aa1db08f250b6df442476ae751c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.242050794562672, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5513714721383707, - "normalized_score": 55.13714721383707 - }, - "bbh": { - "name": "BBH", - "value": 0.5130058094823416, - "normalized_score": 32.485197545735645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.44456249999999997, - "normalized_score": 14.170312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3011968085106383, - "normalized_score": 22.355200945626475 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "icefog72/Ice0.68-25.01-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8313689642507519 - } - }, - { - "id": "icefog72/Ice0.69-25.01-RP_bfloat16_052401cd6c0a7d4fb12619b778912b77a6ab472e_False", - "model": { - "name": "icefog72/Ice0.69-25.01-RP", - "sha": "052401cd6c0a7d4fb12619b778912b77a6ab472e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.81762296294448, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5437527981311808, - "normalized_score": 54.375279813118084 - }, - "bbh": { - "name": "BBH", - "value": 0.5097683665599672, - "normalized_score": 31.801503019218796 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4485625, - "normalized_score": 14.836979166666659 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29654255319148937, - "normalized_score": 21.83806146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "icefog72/Ice0.69-25.01-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2529375653887906 - } - }, - { - "id": "icefog72/Ice0.7-29.09-RP_bfloat16_932f2687137eebcafa9b90fe06e73ed272e0be81_False", - "model": { - "name": "icefog72/Ice0.7-29.09-RP", - "sha": "932f2687137eebcafa9b90fe06e73ed272e0be81", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.550046416942568, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5175744801570943, - "normalized_score": 51.75744801570943 - }, - "bbh": { - "name": "BBH", - "value": 0.5047661992357916, - "normalized_score": 30.72587571429055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.4237916666666666, - "normalized_score": 11.507291666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3126662234042553, - "normalized_score": 23.62958037825059 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "icefog72/Ice0.7-29.09-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.1737084628394303 - } - }, - { - "id": "icefog72/Ice0.70-25.01-RP_bfloat16_32fa5aad9b85b3364c4215e7bd56aaae42178295_False", - "model": { - "name": "icefog72/Ice0.70-25.01-RP", - "sha": "32fa5aad9b85b3364c4215e7bd56aaae42178295", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.100001948439864, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.549797869652522, - "normalized_score": 54.9797869652522 - }, - "bbh": { - "name": "BBH", - "value": 0.513632436415875, - "normalized_score": 32.48918739061522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.45119791666666664, - "normalized_score": 15.266406249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2996176861702128, - "normalized_score": 22.17974290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "icefog72/Ice0.70-25.01-RP", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 9, - "params_billions": 7.242, - "co2_cost": 0.8120544366643394 - } - }, - { - "id": "icefog72/Ice0.70.1-01.02-RP_bfloat16_651dffdc704b3d01da43ee794e93b6a1e6f6de5f_False", - "model": { - "name": "icefog72/Ice0.70.1-01.02-RP", - "sha": "651dffdc704b3d01da43ee794e93b6a1e6f6de5f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.389656886486716, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5069582042314393, - "normalized_score": 50.69582042314393 - }, - "bbh": { - "name": "BBH", - "value": 0.5059798926804829, - "normalized_score": 30.98341832479747 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4599166666666667, - "normalized_score": 17.456249999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2748503989361702, - "normalized_score": 19.42782210401891 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "icefog72/Ice0.70.1-01.02-RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.7903627100350105 - } - }, - { - "id": "icefog72/Ice0.73-01.02-RP_bfloat16_787dd7ed7822e0e0ff640bafafd1a7a2d3bc4213_False", - "model": { - "name": "icefog72/Ice0.73-01.02-RP", - "sha": "787dd7ed7822e0e0ff640bafafd1a7a2d3bc4213", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.850970045622258, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.529164838184905, - "normalized_score": 52.9164838184905 - }, - "bbh": { - "name": "BBH", - "value": 0.5103425890792322, - "normalized_score": 31.512893244861278 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.46639583333333334, - "normalized_score": 18.432812499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27019614361702127, - "normalized_score": 18.910682624113473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "icefog72/Ice0.73-01.02-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8115800990167161 - } - }, - { - "id": "icefog72/Ice0.74-02.02-RP_bfloat16_8ed5ccee25c479b47bb40f69b6cfcfe66d9c4e49_False", - "model": { - "name": "icefog72/Ice0.74-02.02-RP", - "sha": "8ed5ccee25c479b47bb40f69b6cfcfe66d9c4e49", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.163954352130537, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2935344884905384, - "normalized_score": 29.35344884905384 - }, - "bbh": { - "name": "BBH", - "value": 0.4646134965075064, - "normalized_score": 24.816877383693065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42804166666666665, - "normalized_score": 11.805208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21434507978723405, - "normalized_score": 12.705008865248226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "icefog72/Ice0.74-02.02-RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8577043786899584 - } - }, - { - "id": "icefog72/Ice0.76-02.02-RP_bfloat16_80ae841ed7121d151b36952be936fdf74d50ed66_False", - "model": { - "name": "icefog72/Ice0.76-02.02-RP", - "sha": "80ae841ed7121d151b36952be936fdf74d50ed66", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.182689731680473, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45290274250100837, - "normalized_score": 45.290274250100836 - }, - "bbh": { - "name": "BBH", - "value": 0.5085610407875073, - "normalized_score": 31.0716806557854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.43616666666666665, - "normalized_score": 14.020833333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2652094414893617, - "normalized_score": 18.35660460992908 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "icefog72/Ice0.76-02.02-RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.7440540496769722 - } - }, - { - "id": "icefog72/Ice0.77-02.02-RP_bfloat16_b78089d84cf2255d9c32a74dd7aa5b041db2275d_False", - "model": { - "name": "icefog72/Ice0.77-02.02-RP", - "sha": "b78089d84cf2255d9c32a74dd7aa5b041db2275d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.480867406637245, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5309633993359841, - "normalized_score": 53.09633993359841 - }, - "bbh": { - "name": "BBH", - "value": 0.5109257300160749, - "normalized_score": 31.860105340069964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.4765, - "normalized_score": 19.095833333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29986702127659576, - "normalized_score": 22.20744680851064 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "icefog72/Ice0.77-02.02-RP (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 3.5639793499337524 - } - }, - { - "id": "icefog72/Ice0.78-02.02-RP_bfloat16_e335dea95135a17aa3e06c2893d385b7bf4a7c19_False", - "model": { - "name": "icefog72/Ice0.78-02.02-RP", - "sha": "e335dea95135a17aa3e06c2893d385b7bf4a7c19", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.937114158912596, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.405292401937969, - "normalized_score": 40.52924019379691 - }, - "bbh": { - "name": "BBH", - "value": 0.5002126961381052, - "normalized_score": 29.33342240128623 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.468625, - "normalized_score": 17.844791666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2954621010638298, - "normalized_score": 21.71801122931442 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "icefog72/Ice0.78-02.02-RP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 0.8631772807841601 - } - }, - { - "id": "icefog72/Ice0.80-03.02-RP_bfloat16_2aad3cb184adcef0a9d5f1c7a38511c3908bbc29_False", - "model": { - "name": "icefog72/Ice0.80-03.02-RP", - "sha": "2aad3cb184adcef0a9d5f1c7a38511c3908bbc29", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.887791167207023, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5516462984880118, - "normalized_score": 55.164629848801184 - }, - "bbh": { - "name": "BBH", - "value": 0.5097962218679292, - "normalized_score": 30.5837538790623 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.4923125, - "normalized_score": 20.939062500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2912234042553192, - "normalized_score": 21.247044917257686 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "icefog72/Ice0.80-03.02-RP (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9036628255082761 - } - }, - { - "id": "icefog72/IceCocoaRP-7b_float16_001beaf88932f7e010af21bbdeff0079bda73b1d_False", - "model": { - "name": "icefog72/IceCocoaRP-7b", - "sha": "001beaf88932f7e010af21bbdeff0079bda73b1d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.9218542806483, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4962421929369628, - "normalized_score": 49.62421929369628 - }, - "bbh": { - "name": "BBH", - "value": 0.4937902147076245, - "normalized_score": 29.63689549472275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4197916666666666, - "normalized_score": 11.17395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3098404255319149, - "normalized_score": 23.315602836879428 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "icefog72/IceCocoaRP-7b (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.1698831456217178 - } - }, - { - "id": "icefog72/IceCoffeeRP-7b_float16_131c0f7c0809a9d23b05b63cb550a586c3c7b372_False", - "model": { - "name": "icefog72/IceCoffeeRP-7b", - "sha": "131c0f7c0809a9d23b05b63cb550a586c3c7b372", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.34382476557608, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4959174989029109, - "normalized_score": 49.59174989029109 - }, - "bbh": { - "name": "BBH", - "value": 0.48887216244327214, - "normalized_score": 29.39810739240589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.4159791666666666, - "normalized_score": 10.997395833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2974567819148936, - "normalized_score": 21.939642434988176 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "icefog72/IceCoffeeRP-7b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 6, - "params_billions": 7.242, - "co2_cost": 1.1434890928451789 - } - }, - { - "id": "icefog72/IceDrinkByFrankensteinV3RP_bfloat16_a4d2eb422867ea28860ad3b983b93bc97ca91719_False", - "model": { - "name": "icefog72/IceDrinkByFrankensteinV3RP", - "sha": "a4d2eb422867ea28860ad3b983b93bc97ca91719", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.805345696389196, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4974911013887596, - "normalized_score": 49.74911013887596 - }, - "bbh": { - "name": "BBH", - "value": 0.4832523723413275, - "normalized_score": 28.84588139816073 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.4253125, - "normalized_score": 12.19739583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.292719414893617, - "normalized_score": 21.413268321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "icefog72/IceDrinkByFrankensteinV3RP", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 2.342835251712049 - } - }, - { - "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock_bfloat16_78f7625f85c3cb150565ebb68c3f8d47d48325c8_False", - "model": { - "name": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", - "sha": "78f7625f85c3cb150565ebb68c3f8d47d48325c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.6570669982101, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49684171332065585, - "normalized_score": 49.68417133206558 - }, - "bbh": { - "name": "BBH", - "value": 0.46578646938927254, - "normalized_score": 26.22465405632467 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.4067395833333334, - "normalized_score": 9.309114583333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2816655585106383, - "normalized_score": 20.18506205673759 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-24", - "generation": 0, - "base_model": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 2.2387574055675357 - } - }, - { - "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock_bfloat16_35db2bf9e6812c5819378be68f94159e962fd1cb_False", - "model": { - "name": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", - "sha": "35db2bf9e6812c5819378be68f94159e962fd1cb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.38126177033122, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5130032757527804, - "normalized_score": 51.30032757527805 - }, - "bbh": { - "name": "BBH", - "value": 0.502625425089929, - "normalized_score": 30.668251445896402 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4371875, - "normalized_score": 13.648437500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3064328457446808, - "normalized_score": 22.936982860520093 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2028972154551574 - } - }, - { - "id": "icefog72/IceDrunkCherryRP-7b_bfloat16_160b01e50d9c9441886f6cf987a3495bd8fa1c49_False", - "model": { - "name": "icefog72/IceDrunkCherryRP-7b", - "sha": "160b01e50d9c9441886f6cf987a3495bd8fa1c49", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.271642861184226, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48982255969715904, - "normalized_score": 48.98225596971591 - }, - "bbh": { - "name": "BBH", - "value": 0.4846629039263151, - "normalized_score": 28.241090201205953 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.4291875, - "normalized_score": 12.381770833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3009474734042553, - "normalized_score": 22.327497044917255 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 0, - "base_model": "icefog72/IceDrunkCherryRP-7b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.0898596002548293 - } - }, - { - "id": "icefog72/IceDrunkenCherryRP-7b_bfloat16_7a0d428a84bbef60a5287e838551dc56230b291f_False", - "model": { - "name": "icefog72/IceDrunkenCherryRP-7b", - "sha": "7a0d428a84bbef60a5287e838551dc56230b291f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.79272576511275, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4762585495374495, - "normalized_score": 47.62585495374495 - }, - "bbh": { - "name": "BBH", - "value": 0.509308586549064, - "normalized_score": 31.50523985734019 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44459374999999995, - "normalized_score": 14.274218749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30992353723404253, - "normalized_score": 23.324837470449168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-25", - "generation": 1, - "base_model": "icefog72/IceDrunkenCherryRP-7b (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 0.8331698129169264 - } - }, - { - "id": "icefog72/IceEspressoRPv2-7b_bfloat16_d71a4c2ae25c063fd4c3d3df039908c648a8bab4_False", - "model": { - "name": "icefog72/IceEspressoRPv2-7b", - "sha": "d71a4c2ae25c063fd4c3d3df039908c648a8bab4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.365276481870684, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4977160600539901, - "normalized_score": 49.77160600539901 - }, - "bbh": { - "name": "BBH", - "value": 0.5054890156350785, - "normalized_score": 31.303238558418098 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.43306249999999996, - "normalized_score": 12.766145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3061003989361702, - "normalized_score": 22.90004432624113 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-11", - "submission_date": "2024-09-11", - "generation": 1, - "base_model": "icefog72/IceEspressoRPv2-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.1450676211289978 - } - }, - { - "id": "icefog72/IceLemonTeaRP-32k-7b_float16_7ea0bdf873c535b73ca20db46db0799bac433662_False", - "model": { - "name": "icefog72/IceLemonTeaRP-32k-7b", - "sha": "7ea0bdf873c535b73ca20db46db0799bac433662", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.372435824981753, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5212214701436633, - "normalized_score": 52.12214701436632 - }, - "bbh": { - "name": "BBH", - "value": 0.49973852418379305, - "normalized_score": 30.135779642023177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.42903125, - "normalized_score": 12.195572916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3067652925531915, - "normalized_score": 22.973921394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-07-27", - "generation": 1, - "base_model": "icefog72/IceLemonTeaRP-32k-7b (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 24, - "params_billions": 7.242, - "co2_cost": 1.1428701039081552 - } - }, - { - "id": "icefog72/IceMartiniRP-7b_bfloat16_e5be38a55d2d9877fbb61cffc7f48402ac0193fc_False", - "model": { - "name": "icefog72/IceMartiniRP-7b", - "sha": "e5be38a55d2d9877fbb61cffc7f48402ac0193fc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.146002703846154, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5044603873278457, - "normalized_score": 50.446038732784565 - }, - "bbh": { - "name": "BBH", - "value": 0.4972421837639585, - "normalized_score": 29.685367916429147 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.4344895833333333, - "normalized_score": 13.144531250000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3073470744680851, - "normalized_score": 23.038563829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-24", - "generation": 0, - "base_model": "icefog72/IceMartiniRP-7b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.1388752444259926 - } - }, - { - "id": "icefog72/IceNalyvkaRP-7b_bfloat16_25e556b22d702e6139c61799c946e4287611b015_False", - "model": { - "name": "icefog72/IceNalyvkaRP-7b", - "sha": "25e556b22d702e6139c61799c946e4287611b015", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.100001948439864, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.549797869652522, - "normalized_score": 54.9797869652522 - }, - "bbh": { - "name": "BBH", - "value": 0.513632436415875, - "normalized_score": 32.48918739061522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.45119791666666664, - "normalized_score": 15.266406249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2996176861702128, - "normalized_score": 22.17974290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "icefog72/IceNalyvkaRP-7b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 9, - "params_billions": 7.242, - "co2_cost": 0.8293532808410896 - } - }, - { - "id": "icefog72/IceSakeRP-7b_float16_3b6b00bc48cd99e9b28e5aa8293dc987a0cf069a_False", - "model": { - "name": "icefog72/IceSakeRP-7b", - "sha": "3b6b00bc48cd99e9b28e5aa8293dc987a0cf069a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.56363636347618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5227950726295119, - "normalized_score": 52.279507262951185 - }, - "bbh": { - "name": "BBH", - "value": 0.5119287057484642, - "normalized_score": 31.6512550721568 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.41300000000000003, - "normalized_score": 10.225000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3176529255319149, - "normalized_score": 24.183658392434985 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-07", - "submission_date": "2024-08-22", - "generation": 1, - "base_model": "icefog72/IceSakeRP-7b (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 15, - "params_billions": 7.242, - "co2_cost": 1.8686736978920173 - } - }, - { - "id": "icefog72/IceSakeV4RP-7b_bfloat16_e8cb50b78918149c7d1bf663bcb807e7bfac3eed_False", - "model": { - "name": "icefog72/IceSakeV4RP-7b", - "sha": "e8cb50b78918149c7d1bf663bcb807e7bfac3eed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.05225099073927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4634192830578421, - "normalized_score": 46.34192830578421 - }, - "bbh": { - "name": "BBH", - "value": 0.4929557826908731, - "normalized_score": 29.234193217077536 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.40819791666666666, - "normalized_score": 9.858072916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31025598404255317, - "normalized_score": 23.361776004728128 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.09615726584972 - } - }, - { - "id": "icefog72/IceSakeV6RP-7b_float16_6838e68d35d037b0ef9b04a9de1ebc8ab508cd45_False", - "model": { - "name": "icefog72/IceSakeV6RP-7b", - "sha": "6838e68d35d037b0ef9b04a9de1ebc8ab508cd45", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.214466273348467, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5032613465604596, - "normalized_score": 50.32613465604596 - }, - "bbh": { - "name": "BBH", - "value": 0.49760336362566354, - "normalized_score": 30.391494717552195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.42001041666666666, - "normalized_score": 11.634635416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3093417553191489, - "normalized_score": 23.26019503546099 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1108855330334666 - } - }, - { - "id": "icefog72/IceSakeV8RP-7b_float16_0f8f73fe356583e561479c689aa6597435327f4e_True", - "model": { - "name": "icefog72/IceSakeV8RP-7b", - "sha": "0f8f73fe356583e561479c689aa6597435327f4e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.689486698109572, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6085741388404988, - "normalized_score": 60.857413884049876 - }, - "bbh": { - "name": "BBH", - "value": 0.48847141337960176, - "normalized_score": 28.966258233266576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3992708333333333, - "normalized_score": 8.542187500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.301030585106383, - "normalized_score": 22.336731678486995 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "icefog72/IceSakeV8RP-7b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2965708194682928 - } - }, - { - "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3_bfloat16_2d4b4fd596ff0f6706a5752198e59da6ffc08067_False", - "model": { - "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", - "sha": "2d4b4fd596ff0f6706a5752198e59da6ffc08067", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.671670611114738, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5263423272472595, - "normalized_score": 52.63423272472595 - }, - "bbh": { - "name": "BBH", - "value": 0.5019587584232624, - "normalized_score": 30.6127340167025 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4371875, - "normalized_score": 13.648437500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30560172872340424, - "normalized_score": 22.844636524822693 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-09-06", - "generation": 1, - "base_model": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.1598840944021205 - } - }, - { - "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5_bfloat16_0b0b0864347c3fad2b4d3e102f2f9839d20e296c_False", - "model": { - "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", - "sha": "0b0b0864347c3fad2b4d3e102f2f9839d20e296c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.321387609280592, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48709978412833504, - "normalized_score": 48.70997841283351 - }, - "bbh": { - "name": "BBH", - "value": 0.4399660013109026, - "normalized_score": 22.57322577923665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.39641666666666664, - "normalized_score": 7.78541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24983377659574468, - "normalized_score": 16.648197399527188 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.0014573988569522 - } - }, - { - "id": "ifable/gemma-2-Ifable-9B_bfloat16_d3dbde4efb93ea0a4f247de82541479de6b03160_False", - "model": { - "name": "ifable/gemma-2-Ifable-9B", - "sha": "d3dbde4efb93ea0a4f247de82541479de6b03160", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.56844886390901, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2984292787581395, - "normalized_score": 29.842927875813952 - }, - "bbh": { - "name": "BBH", - "value": 0.5866115556693244, - "normalized_score": 41.032644646265275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.40525000000000005, - "normalized_score": 8.522916666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4226230053191489, - "normalized_score": 35.847000591016545 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "ifable/gemma-2-Ifable-9B", - "hub_license": "gemma", - "hub_hearts": 63, - "params_billions": 9.242, - "co2_cost": 8.635208842924055 - } - }, - { - "id": "ilsp/Llama-Krikri-8B-Instruct_bfloat16_387b1399555201ee842a3985e827b464830cfc00_False", - "model": { - "name": "ilsp/Llama-Krikri-8B-Instruct", - "sha": "387b1399555201ee842a3985e827b464830cfc00", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.18078391296001, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6078748830879843, - "normalized_score": 60.78748830879843 - }, - "bbh": { - "name": "BBH", - "value": 0.504664191645287, - "normalized_score": 29.305557163927727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4079791666666666, - "normalized_score": 10.464062499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3312832446808511, - "normalized_score": 25.69813829787234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-10", - "generation": 1, - "base_model": "ilsp/Llama-Krikri-8B-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 17, - "params_billions": 8.202, - "co2_cost": 0.7570021231420504 - } - }, - { - "id": "inflatebot/MN-12B-Mag-Mell-R1_bfloat16_170cea841f08b4dc047363cb71e903fc34c3258d_False", - "model": { - "name": "inflatebot/MN-12B-Mag-Mell-R1", - "sha": "170cea841f08b4dc047363cb71e903fc34c3258d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.14682361352939, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46129602787271107, - "normalized_score": 46.129602787271104 - }, - "bbh": { - "name": "BBH", - "value": 0.5303854975434981, - "normalized_score": 32.53583283307959 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.40022916666666664, - "normalized_score": 11.2953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34383311170212766, - "normalized_score": 27.09256796690307 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "inflatebot/MN-12B-Mag-Mell-R1 (Merge)", - "hub_license": "", - "hub_hearts": 132, - "params_billions": 12.248, - "co2_cost": 2.23511126535229 - } - }, - { - "id": "informatiker/Qwen2-7B-Instruct-abliterated_bfloat16_7577d60acfe4544d5ab303f0a4d69a9fcb9cf1aa_True", - "model": { - "name": "informatiker/Qwen2-7B-Instruct-abliterated", - "sha": "7577d60acfe4544d5ab303f0a4d69a9fcb9cf1aa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.991694247930226, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5821708622011817, - "normalized_score": 58.217086220118176 - }, - "bbh": { - "name": "BBH", - "value": 0.5534265515936739, - "normalized_score": 37.79572344136589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.263595166163142, - "normalized_score": 26.3595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.38879166666666665, - "normalized_score": 6.83229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3873005319148936, - "normalized_score": 31.922281323877062 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-10", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "informatiker/Qwen2-7B-Instruct-abliterated", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 7.616, - "co2_cost": 2.121210091367628 - } - }, - { - "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model_float16_5e7923397020289986d89f704d42775e8cb11741_False", - "model": { - "name": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", - "sha": "5e7923397020289986d89f704d42775e8cb11741", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "", - "average_score": 17.182396518039905, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45884807865352817, - "normalized_score": 45.88480786535282 - }, - "bbh": { - "name": "BBH", - "value": 0.4146016381618061, - "normalized_score": 17.41910908101376 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.349875, - "normalized_score": 4.601041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2960438829787234, - "normalized_score": 21.7826536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-19", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.933, - "co2_cost": 1.721413675707368 - } - }, - { - "id": "instruction-pretrain/InstructLM-500M_float16_e9d33823c76303dfaff6a8397a8b70d0118ea350_False", - "model": { - "name": "instruction-pretrain/InstructLM-500M", - "sha": "e9d33823c76303dfaff6a8397a8b70d0118ea350", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 2.8543503197666724, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1027662158627996, - "normalized_score": 10.27662158627996 - }, - "bbh": { - "name": "BBH", - "value": 0.29408717872529677, - "normalized_score": 2.317053716048478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3528229166666667, - "normalized_score": 2.0695312500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1141123670212766, - "normalized_score": 1.5680407801418434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "instruction-pretrain/InstructLM-500M", - "hub_license": "apache-2.0", - "hub_hearts": 34, - "params_billions": 0.5, - "co2_cost": 0.491584290133651 - } - }, - { - "id": "internlm/internlm2-1_8b_bfloat16_c24f301c7374ad9f9b58d1ea80f68b5f57cbca13_False", - "model": { - "name": "internlm/internlm2-1_8b", - "sha": "c24f301c7374ad9f9b58d1ea80f68b5f57cbca13", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 8.748129853272754, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2197702097102355, - "normalized_score": 21.97702097102355 - }, - "bbh": { - "name": "BBH", - "value": 0.3879732800028095, - "normalized_score": 13.633857965906719 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38128125, - "normalized_score": 8.226822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15882646276595744, - "normalized_score": 6.536273640661936 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "internlm/internlm2-1_8b", - "hub_license": "other", - "hub_hearts": 30, - "params_billions": 8.0, - "co2_cost": 1.3272910338245865 - } - }, - { - "id": "internlm/internlm2-7b_float16_530fc706c606b1af1145c662877a7d99ad79d623_False", - "model": { - "name": "internlm/internlm2-7b", - "sha": "530fc706c606b1af1145c662877a7d99ad79d623", - "precision": "float16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 17.92336611649886, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22803680981595092, - "normalized_score": 22.803680981595093 - }, - "bbh": { - "name": "BBH", - "value": 0.5825, - "normalized_score": 40.27619825708061 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08571428571428572, - "normalized_score": 8.571428571428571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33666666666666667, - "normalized_score": 11.555555555555557 - }, - "musr": { - "name": "MUSR", - "value": 0.43999999999999995, - "normalized_score": 14.333333333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19, - "normalized_score": 10.0 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-12", - "submission_date": "", - "generation": 0, - "base_model": "internlm/internlm2-7b", - "hub_license": "other", - "hub_hearts": 42, - "params_billions": 0.0, - "co2_cost": 1.0395424089601004 - } - }, - { - "id": "internlm/internlm2-chat-1_8b_bfloat16_4e226eeb354499f4d34ef4c27f6939f377475cc1_True", - "model": { - "name": "internlm/internlm2-chat-1_8b", - "sha": "4e226eeb354499f4d34ef4c27f6939f377475cc1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 10.641800452239107, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2386545477111841, - "normalized_score": 23.865454771118408 - }, - "bbh": { - "name": "BBH", - "value": 0.4452271664119214, - "normalized_score": 20.67235743256185 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.36305208333333333, - "normalized_score": 4.61484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18392619680851063, - "normalized_score": 9.325132978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "internlm/internlm2-chat-1_8b", - "hub_license": "other", - "hub_hearts": 31, - "params_billions": 1.889, - "co2_cost": 1.192845124564106 - } - }, - { - "id": "internlm/internlm2_5-1_8b-chat_bfloat16_4426f00b854561fa60d555d2b628064b56bcb758_True", - "model": { - "name": "internlm/internlm2_5-1_8b-chat", - "sha": "4426f00b854561fa60d555d2b628064b56bcb758", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 14.749842142996677, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38490870889240547, - "normalized_score": 38.490870889240554 - }, - "bbh": { - "name": "BBH", - "value": 0.4488926786996439, - "normalized_score": 21.03092693656956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15861027190332327, - "normalized_score": 15.861027190332328 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.35939583333333336, - "normalized_score": 4.424479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12990359042553193, - "normalized_score": 3.322621158392436 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-30", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "internlm/internlm2_5-1_8b-chat", - "hub_license": "other", - "hub_hearts": 25, - "params_billions": 1.89, - "co2_cost": 1.543331294717085 - } - }, - { - "id": "internlm/internlm2_5-20b-chat_bfloat16_ef17bde929761255fee76d95e2c25969ccd93b0d_True", - "model": { - "name": "internlm/internlm2_5-20b-chat", - "sha": "ef17bde929761255fee76d95e2c25969ccd93b0d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 38.87959582082076, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7009977969565198, - "normalized_score": 70.09977969565199 - }, - "bbh": { - "name": "BBH", - "value": 0.7473580533672884, - "normalized_score": 62.83245915287989 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4078549848942598, - "normalized_score": 40.78549848942598 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4558229166666667, - "normalized_score": 16.744531249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39976728723404253, - "normalized_score": 33.30747635933806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-30", - "submission_date": "2024-08-12", - "generation": 0, - "base_model": "internlm/internlm2_5-20b-chat", - "hub_license": "other", - "hub_hearts": 92, - "params_billions": 19.86, - "co2_cost": 7.465415742664814 - } - }, - { - "id": "internlm/internlm2_5-7b-chat_float16_bebb00121ee105b823647c3ba2b1e152652edc33_True", - "model": { - "name": "internlm/internlm2_5-7b-chat", - "sha": "bebb00121ee105b823647c3ba2b1e152652edc33", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "InternLM2ForCausalLM", - "average_score": 32.974747665791206, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5538692890419642, - "normalized_score": 55.386928904196424 - }, - "bbh": { - "name": "BBH", - "value": 0.7073179916851792, - "normalized_score": 57.04314320825679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25302114803625375, - "normalized_score": 25.302114803625376 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.45938541666666666, - "normalized_score": 16.28984375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3776595744680851, - "normalized_score": 30.851063829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-07-03", - "generation": 0, - "base_model": "internlm/internlm2_5-7b-chat", - "hub_license": "other", - "hub_hearts": 194, - "params_billions": 7.738, - "co2_cost": 3.003415538760828 - } - }, - { - "id": "intervitens/mini-magnum-12b-v1.1_bfloat16_3b19e12711d3f4d9b81fdeb73860e9019ebe2404_True", - "model": { - "name": "intervitens/mini-magnum-12b-v1.1", - "sha": "3b19e12711d3f4d9b81fdeb73860e9019ebe2404", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.028735164842907, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5155509603407846, - "normalized_score": 51.55509603407847 - }, - "bbh": { - "name": "BBH", - "value": 0.506180035650624, - "normalized_score": 29.731186868686873 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.4004479166666666, - "normalized_score": 8.089322916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3291223404255319, - "normalized_score": 25.458037825059098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "intervitens/mini-magnum-12b-v1.1", - "hub_license": "apache-2.0", - "hub_hearts": 77, - "params_billions": 12.248, - "co2_cost": 4.46189669686465 - } - }, - { - "id": "inumulaisk/eval_model_float16_53d7d79face1c2e2ea416ea341b0f2663c9e6e20_False", - "model": { - "name": "inumulaisk/eval_model", - "sha": "53d7d79face1c2e2ea416ea341b0f2663c9e6e20", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.24174390101748, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19314197440568803, - "normalized_score": 19.3141974405688 - }, - "bbh": { - "name": "BBH", - "value": 0.35118774303346373, - "normalized_score": 9.506916615277103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.297583081570997, - "normalized_score": 29.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.35796875, - "normalized_score": 3.579427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16638962765957446, - "normalized_score": 7.376625295508273 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "inumulaisk/eval_model", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 0.5949305874819794 - } - }, - { - "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp_float16_39a1c76ddb5fa3a82c5b4071121d2e4866a25300_True", - "model": { - "name": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", - "sha": "39a1c76ddb5fa3a82c5b4071121d2e4866a25300", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.348078956309553, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45547591501660034, - "normalized_score": 45.54759150166004 - }, - "bbh": { - "name": "BBH", - "value": 0.5158439010792586, - "normalized_score": 31.635374808026484 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3992395833333333, - "normalized_score": 8.77161458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3145777925531915, - "normalized_score": 23.841976950354614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-10", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.5285014726940187 - } - }, - { - "id": "invisietch/EtherealRainbow-v0.2-8B_bfloat16_46611fbb6aac0f33478c8401488d3ec7763c04d0_False", - "model": { - "name": "invisietch/EtherealRainbow-v0.2-8B", - "sha": "46611fbb6aac0f33478c8401488d3ec7763c04d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.106576639387253, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39032988027323057, - "normalized_score": 39.03298802732306 - }, - "bbh": { - "name": "BBH", - "value": 0.5102035205059678, - "normalized_score": 30.283791366541095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.38267708333333333, - "normalized_score": 6.5679687499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36527593085106386, - "normalized_score": 29.47510342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-12", - "submission_date": "2024-07-01", - "generation": 0, - "base_model": "invisietch/EtherealRainbow-v0.2-8B", - "hub_license": "llama3", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.7428330849303162 - } - }, - { - "id": "invisietch/EtherealRainbow-v0.3-8B_bfloat16_c986c4ca5a5b8474820a59d3e911a431cf26938d_False", - "model": { - "name": "invisietch/EtherealRainbow-v0.3-8B", - "sha": "c986c4ca5a5b8474820a59d3e911a431cf26938d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.791231655800487, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36822298168858625, - "normalized_score": 36.82229816885863 - }, - "bbh": { - "name": "BBH", - "value": 0.5096758454539693, - "normalized_score": 30.080258475101616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.39039583333333333, - "normalized_score": 7.766145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36261635638297873, - "normalized_score": 29.179595153664305 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-19", - "submission_date": "2024-07-01", - "generation": 0, - "base_model": "invisietch/EtherealRainbow-v0.3-8B", - "hub_license": "llama3", - "hub_hearts": 18, - "params_billions": 8.03, - "co2_cost": 2.562534394436687 - } - }, - { - "id": "invisietch/MiS-Firefly-v0.2-22B_bfloat16_02dd13deefc5ff516edb59070ad66bd9f2831f4c_True", - "model": { - "name": "invisietch/MiS-Firefly-v0.2-22B", - "sha": "02dd13deefc5ff516edb59070ad66bd9f2831f4c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.754186150522894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5371082062261466, - "normalized_score": 53.71082062261466 - }, - "bbh": { - "name": "BBH", - "value": 0.5513523591170696, - "normalized_score": 36.082656217719574 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16540785498489427, - "normalized_score": 16.540785498489427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.46937500000000004, - "normalized_score": 17.80520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3620345744680851, - "normalized_score": 29.11495271867612 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-07", - "generation": 0, - "base_model": "invisietch/MiS-Firefly-v0.2-22B", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 22.247, - "co2_cost": 2.0505555920788168 - } - }, - { - "id": "invisietch/Nimbus-Miqu-v0.1-70B_float16_3209583a0849383daf8faa7b819f29726b8806cf_False", - "model": { - "name": "invisietch/Nimbus-Miqu-v0.1-70B", - "sha": "3209583a0849383daf8faa7b819f29726b8806cf", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.80805215493564, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46466819150963884, - "normalized_score": 46.466819150963886 - }, - "bbh": { - "name": "BBH", - "value": 0.601030667794844, - "normalized_score": 43.4509951550532 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.41331249999999997, - "normalized_score": 9.330729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3853058510638298, - "normalized_score": 31.700650118203306 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-30", - "submission_date": "2024-07-03", - "generation": 0, - "base_model": "invisietch/Nimbus-Miqu-v0.1-70B", - "hub_license": "unknown", - "hub_hearts": 21, - "params_billions": 68.977, - "co2_cost": 14.287354737481978 - } - }, - { - "id": "irahulpandey/mistralai-7B-slerp-v0.1_float16_699cd3bf4ee377e4e08dc30246caeebd8cf07e9a_False", - "model": { - "name": "irahulpandey/mistralai-7B-slerp-v0.1", - "sha": "699cd3bf4ee377e4e08dc30246caeebd8cf07e9a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.352696815222927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4966167546554254, - "normalized_score": 49.66167546554254 - }, - "bbh": { - "name": "BBH", - "value": 0.5010682924547378, - "normalized_score": 29.60624854675679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.45497916666666666, - "normalized_score": 14.87239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2951296542553192, - "normalized_score": 21.681072695035464 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "irahulpandey/mistralai-7B-slerp-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9083935593251266 - } - }, - { - "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model_float16_048bc8edfc32fdcf6d957332d5f4c0d4e5950746_True", - "model": { - "name": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", - "sha": "048bc8edfc32fdcf6d957332d5f4c0d4e5950746", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 3.81661033477558, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15722172723928066, - "normalized_score": 15.722172723928066 - }, - "bbh": { - "name": "BBH", - "value": 0.2863444769655102, - "normalized_score": 1.8203742908537424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3606979166666667, - "normalized_score": 2.2539062500000013 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11685505319148937, - "normalized_score": 1.8727836879432622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-23", - "submission_date": "2024-08-06", - "generation": 0, - "base_model": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.407, - "co2_cost": 0.46612749327371394 - } - }, - { - "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B_bfloat16_646b8ba54d99208204b214d67132729bf25a64ce_False", - "model": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", - "sha": "646b8ba54d99208204b214d67132729bf25a64ce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.869882790033316, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4778077722664752, - "normalized_score": 47.780777226647515 - }, - "bbh": { - "name": "BBH", - "value": 0.5447163557182707, - "normalized_score": 35.142095643012475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.425, - "normalized_score": 11.691666666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38580452127659576, - "normalized_score": 31.756057919621743 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Auro-Kosmos-EVAA-v2-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3376660491774912 - } - }, - { - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B_bfloat16_2fde03326a1129ec82a5a4c6a8d2fe244b4d96fb_False", - "model": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", - "sha": "2fde03326a1129ec82a5a4c6a8d2fe244b4d96fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.753245956983775, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4665919759571271, - "normalized_score": 46.659197595712705 - }, - "bbh": { - "name": "BBH", - "value": 0.5444200006474947, - "normalized_score": 35.195114448894905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4316979166666666, - "normalized_score": 12.862239583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.382563164893617, - "normalized_score": 31.395907210401887 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4314973009726266 - } - }, - { - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B_bfloat16_6775691ace30f5a09cf170c1f43b2b496518539b_False", - "model": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", - "sha": "6775691ace30f5a09cf170c1f43b2b496518539b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.794317671805487, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4267997801389203, - "normalized_score": 42.679978013892026 - }, - "bbh": { - "name": "BBH", - "value": 0.5431077158331955, - "normalized_score": 34.91772620301955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 11.899479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37982047872340424, - "normalized_score": 31.091164302600465 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4419073388687802 - } - }, - { - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B_bfloat16_06560f7b9c63ba66dc4a72306fe487aff28fd514_False", - "model": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", - "sha": "06560f7b9c63ba66dc4a72306fe487aff28fd514", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.827726018728324, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42712447417297217, - "normalized_score": 42.71244741729721 - }, - "bbh": { - "name": "BBH", - "value": 0.5440818233123913, - "normalized_score": 35.02426114679496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4277916666666666, - "normalized_score": 12.573958333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37840757978723405, - "normalized_score": 30.9341755319149 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4606882955697402 - } - }, - { - "id": "jaspionjader/Kosmos-Aurora_faustus-8B_bfloat16_3626672ad3a6b1fac444a08d0841dfd974429400_False", - "model": { - "name": "jaspionjader/Kosmos-Aurora_faustus-8B", - "sha": "3626672ad3a6b1fac444a08d0841dfd974429400", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.772496063013904, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.443236168920686, - "normalized_score": 44.3236168920686 - }, - "bbh": { - "name": "BBH", - "value": 0.5260325661068855, - "normalized_score": 32.36435397107119 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4116979166666666, - "normalized_score": 11.39557291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38131648936170215, - "normalized_score": 31.257387706855795 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-Aurora_faustus-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2817270535415657 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-8B_bfloat16_29ab30124b16c224e88ff4597a247b1327b012fa_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-8B", - "sha": "29ab30124b16c224e88ff4597a247b1327b012fa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.04291967518942, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4404635256674513, - "normalized_score": 44.04635256674513 - }, - "bbh": { - "name": "BBH", - "value": 0.5311831227740652, - "normalized_score": 33.09134006202049 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4236666666666667, - "normalized_score": 11.425000000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3818151595744681, - "normalized_score": 31.31279550827423 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3041788434603263 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B_bfloat16_1c4963027ba7880da678500864385167dfe6e12b_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", - "sha": "1c4963027ba7880da678500864385167dfe6e12b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.3817043439325, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43779061778303796, - "normalized_score": 43.7790617783038 - }, - "bbh": { - "name": "BBH", - "value": 0.5189720817259138, - "normalized_score": 31.18947290052941 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4236354166666667, - "normalized_score": 11.454427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3900432180851064, - "normalized_score": 32.227024231678485 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 8.03, - "co2_cost": 1.198589077745552 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B_bfloat16_42ea50d2b0b7f072637e301d92a8e1832633a23e_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", - "sha": "42ea50d2b0b7f072637e301d92a8e1832633a23e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.287509583046784, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4355676272290855, - "normalized_score": 43.55676272290855 - }, - "bbh": { - "name": "BBH", - "value": 0.5229513322616746, - "normalized_score": 31.76216941291032 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.42115624999999995, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3890458776595745, - "normalized_score": 32.116208628841605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-Franken-v38-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.880158808239308 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-Fusion-8B_float16_26bc667c2304919a25b07536a33adb58da4fd75d_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "sha": "26bc667c2304919a25b07536a33adb58da4fd75d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.941426516007922, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4417623018036587, - "normalized_score": 44.17623018036588 - }, - "bbh": { - "name": "BBH", - "value": 0.5405890148943007, - "normalized_score": 34.45875710784809 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1351963746223565, - "normalized_score": 13.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 12.225 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3859707446808511, - "normalized_score": 31.774527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-Fusion-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2934264055292035 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-Fusion-8B_bfloat16_26bc667c2304919a25b07536a33adb58da4fd75d_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "sha": "26bc667c2304919a25b07536a33adb58da4fd75d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.78962658485408, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43446832183052075, - "normalized_score": 43.44683218305208 - }, - "bbh": { - "name": "BBH", - "value": 0.5419028777027763, - "normalized_score": 34.61065709067911 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 12.225 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38538896276595747, - "normalized_score": 31.709884751773053 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-Fusion-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2600241220012323 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-8B_bfloat16_de6dd21557ecd2f42007640ac53cf253fb9a07f8_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-8B", - "sha": "de6dd21557ecd2f42007640ac53cf253fb9a07f8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.648962768096755, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34052092891306174, - "normalized_score": 34.052092891306174 - }, - "bbh": { - "name": "BBH", - "value": 0.5195634214282913, - "normalized_score": 31.507446723357827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4301145833333333, - "normalized_score": 11.69765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3646941489361702, - "normalized_score": 29.410460992907794 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1931663358566724 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B_bfloat16_f0561ad49c2a67bc02d065d19c912ecc779b4f12_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-light-8B", - "sha": "f0561ad49c2a67bc02d065d19c912ecc779b4f12", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.99624515951864, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38238651223198894, - "normalized_score": 38.23865122319889 - }, - "bbh": { - "name": "BBH", - "value": 0.5271029575696119, - "normalized_score": 32.28113826534745 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.42490625, - "normalized_score": 11.246614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3781582446808511, - "normalized_score": 30.906471631205672 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-light-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2005794673394319 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B_bfloat16_8b39a33ce2e3630e3d8a6285c997f8d26655b5e5_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", - "sha": "8b39a33ce2e3630e3d8a6285c997f8d26655b5e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.73397243488874, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4040933611705829, - "normalized_score": 40.409336117058295 - }, - "bbh": { - "name": "BBH", - "value": 0.5289840558524612, - "normalized_score": 33.03704089860875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.43684375, - "normalized_score": 13.505468749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37059507978723405, - "normalized_score": 30.06611997635934 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v23-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2616133314973699 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B_bfloat16_13080b1650779d80f4f33b42397d22671bf9b8ec_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", - "sha": "13080b1650779d80f4f33b42397d22671bf9b8ec", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.97857683964308, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42587556572117535, - "normalized_score": 42.587556572117535 - }, - "bbh": { - "name": "BBH", - "value": 0.5276140433113651, - "normalized_score": 32.86201554441706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.42903125, - "normalized_score": 12.46223958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3779089095744681, - "normalized_score": 30.87876773049646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v24-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2478613312692373 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B_bfloat16_bf9ae167b9c221f6e6ce0243b6e69da762856fbd_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", - "sha": "bf9ae167b9c221f6e6ce0243b6e69da762856fbd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.486899527557398, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4420869958377106, - "normalized_score": 44.208699583771065 - }, - "bbh": { - "name": "BBH", - "value": 0.5290702582598797, - "normalized_score": 33.05901994218396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.4303333333333333, - "normalized_score": 12.558333333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37159242021276595, - "normalized_score": 30.17693557919622 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v25-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2829483889600175 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B_bfloat16_05a9219eac478e1fbb1cd9bce4f5ba1b66de95fa_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", - "sha": "05a9219eac478e1fbb1cd9bce4f5ba1b66de95fa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.114976403936158, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4413877400851962, - "normalized_score": 44.13877400851962 - }, - "bbh": { - "name": "BBH", - "value": 0.5271171047819411, - "normalized_score": 32.71975634899274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4263645833333333, - "normalized_score": 12.195572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3793218085106383, - "normalized_score": 31.03575650118203 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v26-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2432433319287717 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B_bfloat16_0aa8467ea2771e392ace78e6a06b35711c7703e6_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", - "sha": "0aa8467ea2771e392ace78e6a06b35711c7703e6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.4077778762187, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4378404854674486, - "normalized_score": 43.784048546744856 - }, - "bbh": { - "name": "BBH", - "value": 0.5290320010579407, - "normalized_score": 32.99647631508889 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4343333333333333, - "normalized_score": 13.291666666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37549867021276595, - "normalized_score": 30.610963356973997 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v27-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2828938423829248 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B_bfloat16_a7f6dd1b6784b4e1611f46227e4f6f6e92c79bef_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", - "sha": "a7f6dd1b6784b4e1611f46227e4f6f6e92c79bef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.264906845014234, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43659157701565177, - "normalized_score": 43.65915770156518 - }, - "bbh": { - "name": "BBH", - "value": 0.5294743678489208, - "normalized_score": 33.073754773711705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.43296874999999996, - "normalized_score": 12.987760416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.375, - "normalized_score": 30.555555555555557 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v28-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2058860911125207 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B_bfloat16_6cce8f9b972f8a2636a08214aec37889b15c114b_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", - "sha": "6cce8f9b972f8a2636a08214aec37889b15c114b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.401854796574032, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4487315877427448, - "normalized_score": 44.87315877427448 - }, - "bbh": { - "name": "BBH", - "value": 0.5275189525290296, - "normalized_score": 32.92843632608228 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.42366666666666664, - "normalized_score": 11.824999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37649601063829785, - "normalized_score": 30.721778959810877 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v29-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.247893929420119 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B_bfloat16_58dfcc805241c0bebd34546f72e1e4eb7ab55ca2_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", - "sha": "58dfcc805241c0bebd34546f72e1e4eb7ab55ca2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.263563656014522, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42947268802333366, - "normalized_score": 42.94726880233337 - }, - "bbh": { - "name": "BBH", - "value": 0.5327819889174134, - "normalized_score": 32.980026204409654 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4263333333333333, - "normalized_score": 11.95833333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3937832446808511, - "normalized_score": 32.64258274231678 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v30-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8727119754433255 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B_bfloat16_b5155080c7aab6d7b09cbb3028814c1a125afd13_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", - "sha": "b5155080c7aab6d7b09cbb3028814c1a125afd13", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.539317547208807, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43986400528375824, - "normalized_score": 43.986400528375825 - }, - "bbh": { - "name": "BBH", - "value": 0.5315048053167004, - "normalized_score": 32.91395767598818 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 11.899479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39345079787234044, - "normalized_score": 32.60564420803782 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v31-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2069281446666016 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B_bfloat16_c15be2ab847e2b523c8626c5d5adca3d3796f1e1_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", - "sha": "c15be2ab847e2b523c8626c5d5adca3d3796f1e1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.43128878858796, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4487315877427448, - "normalized_score": 44.87315877427448 - }, - "bbh": { - "name": "BBH", - "value": 0.5292530349260334, - "normalized_score": 33.04697938500372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.42106249999999995, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3776595744680851, - "normalized_score": 30.851063829787233 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v32-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2725725342025458 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B_bfloat16_49bcf97fca1c4f4705460d5fc565663db79282c5_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", - "sha": "49bcf97fca1c4f4705460d5fc565663db79282c5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.234915401488184, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4301719437758481, - "normalized_score": 43.01719437758481 - }, - "bbh": { - "name": "BBH", - "value": 0.5321153222507468, - "normalized_score": 32.935042544278936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.41839583333333336, - "normalized_score": 10.966145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.390874335106383, - "normalized_score": 32.319370567375884 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v33-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2709855867345825 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B_bfloat16_fd39e4c9a1579dd7105c7cfb82e8cc6896372226_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", - "sha": "fd39e4c9a1579dd7105c7cfb82e8cc6896372226", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.742486352642512, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45625052638111324, - "normalized_score": 45.62505263811133 - }, - "bbh": { - "name": "BBH", - "value": 0.533301459442271, - "normalized_score": 33.1218626050209 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.42372916666666666, - "normalized_score": 11.766145833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3927027925531915, - "normalized_score": 32.52253250591017 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-PRP-v34-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.209066915792569 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-TSN-8B_float16_998ded0f65e9f206e5b47b334e281993f92f37eb_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-TSN-8B", - "sha": "998ded0f65e9f206e5b47b334e281993f92f37eb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.87846732227049, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47213726246359655, - "normalized_score": 47.213726246359656 - }, - "bbh": { - "name": "BBH", - "value": 0.5176546480934434, - "normalized_score": 31.425048235694074 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.43290625, - "normalized_score": 12.846614583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3816489361702128, - "normalized_score": 31.29432624113476 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-TSN-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3134151336948419 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B_bfloat16_a5cee4c83bc216b051883dc88f33f6ee45956f0b_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-TSN-light-8B", - "sha": "a5cee4c83bc216b051883dc88f33f6ee45956f0b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.675844003764762, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46849027247702757, - "normalized_score": 46.84902724770276 - }, - "bbh": { - "name": "BBH", - "value": 0.5235021286391058, - "normalized_score": 32.31709224526529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42893749999999997, - "normalized_score": 12.283854166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38056848404255317, - "normalized_score": 31.174276004728128 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-TSN-light-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.324997241130934 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B_bfloat16_b9847b6628a61117908a8f6f8e94d51154568c36_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", - "sha": "b9847b6628a61117908a8f6f8e94d51154568c36", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.5328956860039, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4563502617499346, - "normalized_score": 45.635026174993456 - }, - "bbh": { - "name": "BBH", - "value": 0.5316458785173577, - "normalized_score": 33.429515483219056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4276979166666666, - "normalized_score": 12.195572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37898936170212766, - "normalized_score": 30.99881796690307 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-TSN-v19-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.297936022832137 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B_bfloat16_ce90ff49cb08473dd8c66ca815bbf6dd7509931b_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", - "sha": "ce90ff49cb08473dd8c66ca815bbf6dd7509931b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.536494106696477, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4423119545029411, - "normalized_score": 44.23119545029411 - }, - "bbh": { - "name": "BBH", - "value": 0.5250468078369915, - "normalized_score": 32.00472858026129 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.42103124999999997, - "normalized_score": 11.39557291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39361702127659576, - "normalized_score": 32.6241134751773 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-TSN-v20-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2045490521318007 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B_bfloat16_3bbe313e2c452857a24e14da3ef1c4cd00ca6e2a_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", - "sha": "3bbe313e2c452857a24e14da3ef1c4cd00ca6e2a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.968398200512283, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46701640536000033, - "normalized_score": 46.701640536000035 - }, - "bbh": { - "name": "BBH", - "value": 0.524796520922724, - "normalized_score": 32.386295261036274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.43427083333333333, - "normalized_score": 13.217187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3816489361702128, - "normalized_score": 31.29432624113476 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-TSN-v21-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2729876389623414 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B_bfloat16_151317735ac8672d1160fdbdd98932d588a68c15_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", - "sha": "151317735ac8672d1160fdbdd98932d588a68c15", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.654432566783058, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4673410993940522, - "normalized_score": 46.73410993940523 - }, - "bbh": { - "name": "BBH", - "value": 0.5245863682593667, - "normalized_score": 32.32633123685557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4303333333333333, - "normalized_score": 12.691666666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38115026595744683, - "normalized_score": 31.23891843971632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-TSN-v22-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2996115083833457 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-8B_bfloat16_c0b4cac5c74918a2c84a9fc7541b261c9400e9ed_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-8B", - "sha": "c0b4cac5c74918a2c84a9fc7541b261c9400e9ed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.98708989579214, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45722460848326885, - "normalized_score": 45.72246084832688 - }, - "bbh": { - "name": "BBH", - "value": 0.5321936191858193, - "normalized_score": 33.103487394032555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4305833333333333, - "normalized_score": 13.189583333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39012632978723405, - "normalized_score": 32.23625886524823 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3506866639843 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B_bfloat16_11f95acd7c719d7666af0ce7e10b77eee0b5ac8c_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", - "sha": "11f95acd7c719d7666af0ce7e10b77eee0b5ac8c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.052330699621802, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4542270065648036, - "normalized_score": 45.42270065648036 - }, - "bbh": { - "name": "BBH", - "value": 0.5297928701221488, - "normalized_score": 32.85116982530719 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.42921875, - "normalized_score": 12.952343749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3896276595744681, - "normalized_score": 32.18085106382979 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-alt-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3044490161906677 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B_bfloat16_a89c25c226d3a8635e29bb50f0d9125f4b29ec1d_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-light-8B", - "sha": "a89c25c226d3a8635e29bb50f0d9125f4b29ec1d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.11044721388785, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45809895521660304, - "normalized_score": 45.80989552166031 - }, - "bbh": { - "name": "BBH", - "value": 0.5376138387743472, - "normalized_score": 33.68753243090729 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.42909375, - "normalized_score": 12.603385416666661 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.394281914893617, - "normalized_score": 32.69799054373522 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-light-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.1781725368622367 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B_bfloat16_1d0941141461966fa4f28028a3d3bf57aa1b92c1_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", - "sha": "1d0941141461966fa4f28028a3d3bf57aa1b92c1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.801281271469463, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44535942410581697, - "normalized_score": 44.5359424105817 - }, - "bbh": { - "name": "BBH", - "value": 0.5327145731870764, - "normalized_score": 33.05767050932263 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.43045833333333333, - "normalized_score": 12.90729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39228723404255317, - "normalized_score": 32.476359338061464 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.258765389845097 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B_bfloat16_56dbc698d481e91484365130c242e8161638af60_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", - "sha": "56dbc698d481e91484365130c242e8161638af60", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.802239954239454, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4563003940655239, - "normalized_score": 45.63003940655239 - }, - "bbh": { - "name": "BBH", - "value": 0.5316344937208096, - "normalized_score": 32.78464802043255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4196979166666666, - "normalized_score": 11.39557291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3914561170212766, - "normalized_score": 32.384013002364064 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4131181642035988 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B_bfloat16_9bbcccb18b15fd812df255d47315e32c2db4492c_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", - "sha": "9bbcccb18b15fd812df255d47315e32c2db4492c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.7210916104417, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44286160720222345, - "normalized_score": 44.28616072022234 - }, - "bbh": { - "name": "BBH", - "value": 0.5359422335881335, - "normalized_score": 33.47406748343683 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.42776041666666664, - "normalized_score": 12.336718749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3929521276595745, - "normalized_score": 32.55023640661938 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-v13-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2773691783087238 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B_bfloat16_0a4ee872602c0cc95e0cec0c49807d29ead4586f_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", - "sha": "0a4ee872602c0cc95e0cec0c49807d29ead4586f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.62429735133503, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4380155764482684, - "normalized_score": 43.80155764482684 - }, - "bbh": { - "name": "BBH", - "value": 0.5363063034440413, - "normalized_score": 33.65958954546506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.42772916666666666, - "normalized_score": 12.299479166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3931183510638298, - "normalized_score": 32.56870567375886 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-v14-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.20918414931387 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B_bfloat16_e65d9aa4d94dd11861d4bf5eed50c9f0a963b51f_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", - "sha": "e65d9aa4d94dd11861d4bf5eed50c9f0a963b51f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.02365934041546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4654428028741517, - "normalized_score": 46.54428028741517 - }, - "bbh": { - "name": "BBH", - "value": 0.534326872652317, - "normalized_score": 33.2170748468152 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.42772916666666666, - "normalized_score": 12.432812499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3941156914893617, - "normalized_score": 32.67952127659574 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-v15-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2007018233163582 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B_bfloat16_4f58e6c6624dea0b9116d650c1a205b9993eefc0_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", - "sha": "4f58e6c6624dea0b9116d650c1a205b9993eefc0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.98392902504385, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4556510059974202, - "normalized_score": 45.565100599742024 - }, - "bbh": { - "name": "BBH", - "value": 0.5343925058514598, - "normalized_score": 33.29159037553265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4264270833333333, - "normalized_score": 12.203385416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39170545212765956, - "normalized_score": 32.41171690307328 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-v16-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1915594994137388 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B_bfloat16_122efb5edf2b769d4705fa2a861bef21098662af_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", - "sha": "122efb5edf2b769d4705fa2a861bef21098662af", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.702369090863083, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4462337708391512, - "normalized_score": 44.623377083915116 - }, - "bbh": { - "name": "BBH", - "value": 0.5346666279815969, - "normalized_score": 33.346731824868634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.42906249999999996, - "normalized_score": 12.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39228723404255317, - "normalized_score": 32.476359338061464 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-v17-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2210254561964191 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B_bfloat16_8cec9573a60b7ce8af88e81efc9092b2a5e37f4b_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", - "sha": "8cec9573a60b7ce8af88e81efc9092b2a5e37f4b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.496714298861473, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43409376011205825, - "normalized_score": 43.40937601120582 - }, - "bbh": { - "name": "BBH", - "value": 0.5339179190615058, - "normalized_score": 33.23387233410255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4316979166666666, - "normalized_score": 12.795572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3904587765957447, - "normalized_score": 32.273197399527184 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-gamma-v18-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2339950249160372 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B_bfloat16_7d1e94f42904b66c5bc310721f627d3393b3e91e_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", - "sha": "7d1e94f42904b66c5bc310721f627d3393b3e91e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.12092179330857, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44078821970150317, - "normalized_score": 44.078821970150315 - }, - "bbh": { - "name": "BBH", - "value": 0.5214884907801955, - "normalized_score": 31.258884400704403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4143958333333333, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3887965425531915, - "normalized_score": 32.08850472813239 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3308076685501458 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v10-8B_bfloat16_eb48bac81fb5b288d6353dfe2867953718c626c5_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v10-8B", - "sha": "eb48bac81fb5b288d6353dfe2867953718c626c5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.090676803381882, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4261503920708165, - "normalized_score": 42.61503920708165 - }, - "bbh": { - "name": "BBH", - "value": 0.5375875314179012, - "normalized_score": 34.01111082889911 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4223645833333333, - "normalized_score": 11.39557291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38314494680851063, - "normalized_score": 31.460549645390074 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v10-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3959809536797354 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v11-8B_bfloat16_0d96e6e1e53814b3950a3993e7733cd9a3057114_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v11-8B", - "sha": "0d96e6e1e53814b3950a3993e7733cd9a3057114", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.769168519505644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44263664853699297, - "normalized_score": 44.2636648536993 - }, - "bbh": { - "name": "BBH", - "value": 0.5359208647512345, - "normalized_score": 33.83221649338713 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4184270833333333, - "normalized_score": 11.070052083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3835605053191489, - "normalized_score": 31.506722813238763 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v11-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2774191774778267 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v12-8B_bfloat16_31ae4c22a9b1f8b6174ec8f32e371973dc899e64_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v12-8B", - "sha": "31ae4c22a9b1f8b6174ec8f32e371973dc899e64", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.67369216786618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43779061778303796, - "normalized_score": 43.7790617783038 - }, - "bbh": { - "name": "BBH", - "value": 0.5348808250181011, - "normalized_score": 33.64435993303794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.42106249999999995, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3835605053191489, - "normalized_score": 31.506722813238763 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v12-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3129275432231455 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v2-8B_bfloat16_40fd11da76f2986eae7d67f60e7f7ed4a695b191_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v2-8B", - "sha": "40fd11da76f2986eae7d67f60e7f7ed4a695b191", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.34429925313133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4395891789341171, - "normalized_score": 43.95891789341171 - }, - "bbh": { - "name": "BBH", - "value": 0.5341160060985229, - "normalized_score": 33.60889502995319 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.42106249999999995, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3826462765957447, - "normalized_score": 31.405141843971634 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v2-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2935494946640862 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v3-8B_bfloat16_20afd2c2261e46936b6691b6a526e2ac56a0a8f2_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v3-8B", - "sha": "20afd2c2261e46936b6691b6a526e2ac56a0a8f2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.56106225267956, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4410630460511443, - "normalized_score": 44.10630460511443 - }, - "bbh": { - "name": "BBH", - "value": 0.5330987974156178, - "normalized_score": 33.468587339989305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4223958333333333, - "normalized_score": 11.766145833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38214760638297873, - "normalized_score": 31.349734042553195 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v3-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.226024618265807 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v4-8B_bfloat16_07a48ba99661c8eb22a0d21cf66908524dc86025_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v4-8B", - "sha": "07a48ba99661c8eb22a0d21cf66908524dc86025", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.08362197891132, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4289230353240513, - "normalized_score": 42.89230353240513 - }, - "bbh": { - "name": "BBH", - "value": 0.5336560458316563, - "normalized_score": 33.55721455326533 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.41972916666666665, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38173204787234044, - "normalized_score": 31.303560874704488 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v4-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2324701395863638 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v5-8B_bfloat16_b57426e197df1528b3322feb5db5da0f965e13c0_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v5-8B", - "sha": "b57426e197df1528b3322feb5db5da0f965e13c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.452627923750796, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44595894448951, - "normalized_score": 44.595894448951 - }, - "bbh": { - "name": "BBH", - "value": 0.5344958011609363, - "normalized_score": 33.50776477750613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4223958333333333, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3820644946808511, - "normalized_score": 31.340499408983447 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v5-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2738241090305913 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v6-8B_bfloat16_45a565bf7cda1d5bbc102f52b42af582dbafe22f_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v6-8B", - "sha": "45a565bf7cda1d5bbc102f52b42af582dbafe22f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.384013675258704, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4395891789341171, - "normalized_score": 43.95891789341171 - }, - "bbh": { - "name": "BBH", - "value": 0.5379609044843678, - "normalized_score": 34.084081768333256 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4184270833333333, - "normalized_score": 11.070052083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3820644946808511, - "normalized_score": 31.340499408983447 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v6-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.25514859695207 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v7-8B_bfloat16_973e46f555bbc5293673054b1d624f309555a014_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v7-8B", - "sha": "973e46f555bbc5293673054b1d624f309555a014", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.266235325792916, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4276741268722545, - "normalized_score": 42.767412687225445 - }, - "bbh": { - "name": "BBH", - "value": 0.5334882804815716, - "normalized_score": 33.50209397496653 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1336858006042296, - "normalized_score": 13.36858006042296 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.41709375, - "normalized_score": 11.070052083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3835605053191489, - "normalized_score": 31.506722813238763 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v7-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2459507125099005 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v8-8B_bfloat16_f5ad2d7afa492191293d4c72f457dc943db24110_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v8-8B", - "sha": "f5ad2d7afa492191293d4c72f457dc943db24110", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.533444499158758, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43834027048232027, - "normalized_score": 43.83402704823203 - }, - "bbh": { - "name": "BBH", - "value": 0.5359208647512345, - "normalized_score": 33.79805527334356 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.42103124999999997, - "normalized_score": 11.928906249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38272938829787234, - "normalized_score": 31.414376477541367 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v8-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.9080142651207193 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v9-8B_bfloat16_65edea4e7a87035a83baf8cf603b531bdc0b0097_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v9-8B", - "sha": "65edea4e7a87035a83baf8cf603b531bdc0b0097", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.377315210740033, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43686640336529303, - "normalized_score": 43.6866403365293 - }, - "bbh": { - "name": "BBH", - "value": 0.5360680608930435, - "normalized_score": 33.7544160404883 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4183958333333333, - "normalized_score": 11.232812499999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3819813829787234, - "normalized_score": 31.331264775413715 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v9-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.227281518781336 - } - }, - { - "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B_bfloat16_61a23eb8ed6fa8f77854ac460d09406a4cf71559_False", - "model": { - "name": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", - "sha": "61a23eb8ed6fa8f77854ac460d09406a4cf71559", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.357229970545784, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.428373382624769, - "normalized_score": 42.83733826247689 - }, - "bbh": { - "name": "BBH", - "value": 0.5539931244833417, - "normalized_score": 36.24517494591178 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.43544791666666666, - "normalized_score": 13.03098958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3836436170212766, - "normalized_score": 31.515957446808514 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.516953826809074 - } - }, - { - "id": "jaspionjader/Kosmos-Elusive-8b_bfloat16_32a31e0f3fe97d6b5d2eb677eca2c3a652e745a2_False", - "model": { - "name": "jaspionjader/Kosmos-Elusive-8b", - "sha": "32a31e0f3fe97d6b5d2eb677eca2c3a652e745a2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.72148946873371, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41688275996577967, - "normalized_score": 41.68827599657797 - }, - "bbh": { - "name": "BBH", - "value": 0.5338593917060857, - "normalized_score": 33.53558413403654 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4077916666666667, - "normalized_score": 10.107291666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3759973404255319, - "normalized_score": 30.666371158392437 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-Elusive-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.802080473067022 - } - }, - { - "id": "jaspionjader/Kosmos-Elusive-VENN-8B_bfloat16_40a336fee0e4810d0ecfcced82fad57ea11fcb8f_False", - "model": { - "name": "jaspionjader/Kosmos-Elusive-VENN-8B", - "sha": "40a336fee0e4810d0ecfcced82fad57ea11fcb8f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.81033720971873, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4232525255211727, - "normalized_score": 42.32525255211727 - }, - "bbh": { - "name": "BBH", - "value": 0.5355598563659026, - "normalized_score": 33.79747989824394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4156979166666667, - "normalized_score": 10.595572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3797373670212766, - "normalized_score": 31.081929669030732 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-Elusive-VENN-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.23513328622024 - } - }, - { - "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B_bfloat16_8a32a5b2c65aa46bf6511aa6fff398a67d5ebbea_False", - "model": { - "name": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", - "sha": "8a32a5b2c65aa46bf6511aa6fff398a67d5ebbea", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.561771850568558, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4541771388803929, - "normalized_score": 45.417713888039295 - }, - "bbh": { - "name": "BBH", - "value": 0.5312976840812583, - "normalized_score": 33.16698443913645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13444108761329304, - "normalized_score": 13.444108761329304 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 11.832812499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3842253989361702, - "normalized_score": 31.580599881796683 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.286053014145634 - } - }, - { - "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B_bfloat16_f3ff0504e4bc311e12427fcafd656b281c7f690d_False", - "model": { - "name": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", - "sha": "f3ff0504e4bc311e12427fcafd656b281c7f690d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.711557989116244, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4335441074127758, - "normalized_score": 43.35441074127759 - }, - "bbh": { - "name": "BBH", - "value": 0.5303980337010061, - "normalized_score": 33.008333200343884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.417, - "normalized_score": 11.558333333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3794880319148936, - "normalized_score": 31.05422576832151 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3093567484866826 - } - }, - { - "id": "jaspionjader/Kosmos-VENN-8B_float16_6bec99b56f916add1676598b09c05d9ab40cb67b_False", - "model": { - "name": "jaspionjader/Kosmos-VENN-8B", - "sha": "6bec99b56f916add1676598b09c05d9ab40cb67b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.14475508223022, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.433219413378724, - "normalized_score": 43.3219413378724 - }, - "bbh": { - "name": "BBH", - "value": 0.5317923607687299, - "normalized_score": 33.39577047946897 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.42109375, - "normalized_score": 11.203385416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3800698138297872, - "normalized_score": 31.11886820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "jaspionjader/Kosmos-VENN-8B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2884897990741933 - } - }, - { - "id": "jaspionjader/PRP-Kosmos-EVAA-8B_bfloat16_5730cb6625d9851f42a39934dd4d506fc7f59244_False", - "model": { - "name": "jaspionjader/PRP-Kosmos-EVAA-8B", - "sha": "5730cb6625d9851f42a39934dd4d506fc7f59244", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.355970519226545, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36327721556580983, - "normalized_score": 36.32772155658098 - }, - "bbh": { - "name": "BBH", - "value": 0.5237421324582278, - "normalized_score": 32.11810849839039 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.425, - "normalized_score": 11.425000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3765791223404255, - "normalized_score": 30.731013593380606 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/PRP-Kosmos-EVAA-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1946887107886779 - } - }, - { - "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B_bfloat16_6c0aee176270e82803d751405b9a3bae2e6dd199_False", - "model": { - "name": "jaspionjader/PRP-Kosmos-EVAA-light-8B", - "sha": "6c0aee176270e82803d751405b9a3bae2e6dd199", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.818510021314523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4321201079801593, - "normalized_score": 43.21201079801593 - }, - "bbh": { - "name": "BBH", - "value": 0.5274582578494339, - "normalized_score": 32.67446090823398 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4235416666666667, - "normalized_score": 11.142708333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3631150265957447, - "normalized_score": 29.235002955082745 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jaspionjader/PRP-Kosmos-EVAA-light-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2956604777136123 - } - }, - { - "id": "jaspionjader/TSN-Kosmos-EVAA-8B_bfloat16_0d2fad567d563c6cf14d68ebe2e416b4f8f996b1_False", - "model": { - "name": "jaspionjader/TSN-Kosmos-EVAA-8B", - "sha": "0d2fad567d563c6cf14d68ebe2e416b4f8f996b1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.896257217110144, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49032234471203073, - "normalized_score": 49.032234471203076 - }, - "bbh": { - "name": "BBH", - "value": 0.5347376087743225, - "normalized_score": 33.73244759125691 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4173125, - "normalized_score": 11.264062499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.383061835106383, - "normalized_score": 31.451315011820324 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/TSN-Kosmos-EVAA-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4479009137598755 - } - }, - { - "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B_bfloat16_d6c402879c63109a484ce2ebe1198f6c0c1696bd_False", - "model": { - "name": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", - "sha": "d6c402879c63109a484ce2ebe1198f6c0c1696bd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.89651047859336, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46669171132594844, - "normalized_score": 46.66917113259485 - }, - "bbh": { - "name": "BBH", - "value": 0.534342097284994, - "normalized_score": 33.53146314936159 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.41864583333333333, - "normalized_score": 12.064062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3762466755319149, - "normalized_score": 30.69407505910165 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/TSN-Kosmos-EVAA-v2-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4678468493711292 - } - }, - { - "id": "jaspionjader/bbb-1_bfloat16_3ab45702f60bf07cb8fd4ad048d16e9daa8735cf_False", - "model": { - "name": "jaspionjader/bbb-1", - "sha": "3ab45702f60bf07cb8fd4ad048d16e9daa8735cf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.60960304612495, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4864005283758206, - "normalized_score": 48.64005283758206 - }, - "bbh": { - "name": "BBH", - "value": 0.5375556962119087, - "normalized_score": 33.68952047615377 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.41706250000000006, - "normalized_score": 10.966145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38971077127659576, - "normalized_score": 32.190085697399525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.330856716977883 - } - }, - { - "id": "jaspionjader/bbb-2_bfloat16_7cb367ed6da1eb9f9777c00afbded6543ca012d9_False", - "model": { - "name": "jaspionjader/bbb-2", - "sha": "7cb367ed6da1eb9f9777c00afbded6543ca012d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.656531650464387, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4077403511571519, - "normalized_score": 40.77403511571519 - }, - "bbh": { - "name": "BBH", - "value": 0.5066789926627318, - "normalized_score": 29.187787519922125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4144583333333333, - "normalized_score": 12.507291666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.363530585106383, - "normalized_score": 29.281176122931434 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3060917369107468 - } - }, - { - "id": "jaspionjader/bbb-3_bfloat16_35b0bde357a53cce36f2361ce6451f4efd49272f_False", - "model": { - "name": "jaspionjader/bbb-3", - "sha": "35b0bde357a53cce36f2361ce6451f4efd49272f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.067128829993425, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.416832892281369, - "normalized_score": 41.6832892281369 - }, - "bbh": { - "name": "BBH", - "value": 0.5157831821186084, - "normalized_score": 30.42347738232735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1404833836858006, - "normalized_score": 14.04833836858006 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4264895833333333, - "normalized_score": 12.344531249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38563829787234044, - "normalized_score": 31.73758865248227 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9345593718263658 - } - }, - { - "id": "jaspionjader/bbb-4_bfloat16_0830735240b34e9322d111f429503f8526dc41d9_False", - "model": { - "name": "jaspionjader/bbb-4", - "sha": "0830735240b34e9322d111f429503f8526dc41d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.306922027984893, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47675833455232114, - "normalized_score": 47.67583345523211 - }, - "bbh": { - "name": "BBH", - "value": 0.52115051798211, - "normalized_score": 31.42180070448896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.40924999999999995, - "normalized_score": 10.789583333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3773271276595745, - "normalized_score": 30.814125295508273 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-4 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.0964247692210245 - } - }, - { - "id": "jaspionjader/bbb-5_bfloat16_e61731b38cc8800cf26f41684dee29bc3b983798_False", - "model": { - "name": "jaspionjader/bbb-5", - "sha": "e61731b38cc8800cf26f41684dee29bc3b983798", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.45290117457669, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4702888336281067, - "normalized_score": 47.028883362810674 - }, - "bbh": { - "name": "BBH", - "value": 0.5206902586604485, - "normalized_score": 31.2789023752389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13972809667673716, - "normalized_score": 13.972809667673717 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.3998229166666667, - "normalized_score": 9.677864583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3833942819148936, - "normalized_score": 31.488253546099287 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-5 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2991299609398375 - } - }, - { - "id": "jaspionjader/bbb-6_bfloat16_38749637f39b28f348bbb50b49a35231aca3ee73_False", - "model": { - "name": "jaspionjader/bbb-6", - "sha": "38749637f39b28f348bbb50b49a35231aca3ee73", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.071135369194597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48797413086166924, - "normalized_score": 48.79741308616693 - }, - "bbh": { - "name": "BBH", - "value": 0.5211453749255449, - "normalized_score": 31.296750246994566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13897280966767372, - "normalized_score": 13.897280966767372 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.40515625000000005, - "normalized_score": 10.477864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3871343085106383, - "normalized_score": 31.903812056737586 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3092681125293013 - } - }, - { - "id": "jaspionjader/bbb-7_bfloat16_c2ec7d278f261cf98cd065d94f8a688cfa5bb520_False", - "model": { - "name": "jaspionjader/bbb-7", - "sha": "c2ec7d278f261cf98cd065d94f8a688cfa5bb520", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.85265999247541, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48280340607366234, - "normalized_score": 48.280340607366234 - }, - "bbh": { - "name": "BBH", - "value": 0.5211089947725771, - "normalized_score": 31.125508104842208 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4038229166666667, - "normalized_score": 10.211197916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3859707446808511, - "normalized_score": 31.774527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/bbb-7 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.247956538258571 - } - }, - { - "id": "jaspionjader/bh-1_bfloat16_1787f4e4e5a1aa2fa471234f8206a581960654a0_False", - "model": { - "name": "jaspionjader/bh-1", - "sha": "1787f4e4e5a1aa2fa471234f8206a581960654a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.935035677562386, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42842325030917966, - "normalized_score": 42.84232503091796 - }, - "bbh": { - "name": "BBH", - "value": 0.5890155164168736, - "normalized_score": 41.45130060035327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4441041666666667, - "normalized_score": 14.813020833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3449135638297872, - "normalized_score": 27.21261820330969 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.830374517106328 - } - }, - { - "id": "jaspionjader/bh-10_bfloat16_d6ac5966ebb522d230201e669cf796e6e159ca3f_False", - "model": { - "name": "jaspionjader/bh-10", - "sha": "d6ac5966ebb522d230201e669cf796e6e159ca3f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.28628220700425, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46184568057199343, - "normalized_score": 46.18456805719934 - }, - "bbh": { - "name": "BBH", - "value": 0.5856025427339699, - "normalized_score": 40.861498713027906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.41985416666666664, - "normalized_score": 10.848437500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37076130319148937, - "normalized_score": 30.084589243498822 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-10 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4701216774772474 - } - }, - { - "id": "jaspionjader/bh-11_bfloat16_bb606df537ad63ca43725019938e2fc364356dce_False", - "model": { - "name": "jaspionjader/bh-11", - "sha": "bb606df537ad63ca43725019938e2fc364356dce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.42026599244475, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45754930251732073, - "normalized_score": 45.75493025173208 - }, - "bbh": { - "name": "BBH", - "value": 0.5851155912628809, - "normalized_score": 40.73264327719791 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4145520833333334, - "normalized_score": 10.219010416666672 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3738364361702128, - "normalized_score": 30.426270685579198 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-11 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3611822743766173 - } - }, - { - "id": "jaspionjader/bh-12_bfloat16_b53c06ed8da7b3c192fe8446febbfc6d947b399d_False", - "model": { - "name": "jaspionjader/bh-12", - "sha": "b53c06ed8da7b3c192fe8446febbfc6d947b399d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.407887173668158, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47338617091539337, - "normalized_score": 47.338617091539334 - }, - "bbh": { - "name": "BBH", - "value": 0.5802489392471556, - "normalized_score": 40.253624510427336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4144895833333333, - "normalized_score": 9.877864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37367021276595747, - "normalized_score": 30.40780141843972 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-12 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3662580036416 - } - }, - { - "id": "jaspionjader/bh-13_bfloat16_65dfa9e29ab360f3e730de8542103a0c11ce37fd_False", - "model": { - "name": "jaspionjader/bh-13", - "sha": "65dfa9e29ab360f3e730de8542103a0c11ce37fd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.405271709903644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4697890486132351, - "normalized_score": 46.978904861323514 - }, - "bbh": { - "name": "BBH", - "value": 0.5777886799254942, - "normalized_score": 40.01032313106834 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41585416666666664, - "normalized_score": 10.248437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37300531914893614, - "normalized_score": 30.33392434988179 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-13 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3237689917031747 - } - }, - { - "id": "jaspionjader/bh-15_bfloat16_40fb0cee4ea43e4ade9544ae10e7ea04dfc1dc39_False", - "model": { - "name": "jaspionjader/bh-15", - "sha": "40fb0cee4ea43e4ade9544ae10e7ea04dfc1dc39", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.4983726655142, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47453534399836883, - "normalized_score": 47.45353439983688 - }, - "bbh": { - "name": "BBH", - "value": 0.5818643001829722, - "normalized_score": 40.33141780064374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4105208333333334, - "normalized_score": 9.515104166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37666223404255317, - "normalized_score": 30.740248226950357 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-15 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3222540136225727 - } - }, - { - "id": "jaspionjader/bh-16_bfloat16_096e6fa98b59a78c5e02647326d7d7c6a850d43e_False", - "model": { - "name": "jaspionjader/bh-16", - "sha": "096e6fa98b59a78c5e02647326d7d7c6a850d43e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.57710023673242, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4730614768813415, - "normalized_score": 47.306147688134146 - }, - "bbh": { - "name": "BBH", - "value": 0.5783335636603978, - "normalized_score": 39.952339093735326 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4158541666666667, - "normalized_score": 10.381770833333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37757646276595747, - "normalized_score": 30.841829196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-16 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.36752509458533 - } - }, - { - "id": "jaspionjader/bh-17_bfloat16_f62181f922611513d03ec7bfd98527768a303176_False", - "model": { - "name": "jaspionjader/bh-17", - "sha": "f62181f922611513d03ec7bfd98527768a303176", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.253816929185927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4721871301480073, - "normalized_score": 47.21871301480073 - }, - "bbh": { - "name": "BBH", - "value": 0.5776302177859685, - "normalized_score": 39.75841395740856 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.41582291666666665, - "normalized_score": 10.211197916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37566489361702127, - "normalized_score": 30.629432624113477 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-17 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3579540781836985 - } - }, - { - "id": "jaspionjader/bh-18_bfloat16_02c29d295dcdb4bd9f7df12848db3130dec808aa_False", - "model": { - "name": "jaspionjader/bh-18", - "sha": "02c29d295dcdb4bd9f7df12848db3130dec808aa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.597830822803832, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47246195649764844, - "normalized_score": 47.24619564976484 - }, - "bbh": { - "name": "BBH", - "value": 0.5823837707078298, - "normalized_score": 40.464076641342125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4184895833333333, - "normalized_score": 10.677864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37566489361702127, - "normalized_score": 30.629432624113477 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-18 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2912412954439991 - } - }, - { - "id": "jaspionjader/bh-19_bfloat16_b2d210e74c848d399798da2db3705a39b64bb8b2_False", - "model": { - "name": "jaspionjader/bh-19", - "sha": "b2d210e74c848d399798da2db3705a39b64bb8b2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.18004103008259, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45842364925065493, - "normalized_score": 45.842364925065496 - }, - "bbh": { - "name": "BBH", - "value": 0.5765774285787187, - "normalized_score": 39.56714484667162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.417125, - "normalized_score": 10.640625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3774933510638298, - "normalized_score": 30.832594562647753 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-19 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3182863656471855 - } - }, - { - "id": "jaspionjader/bh-2_bfloat16_d442311231a30f60b9f162ee440bbe0d64df8793_False", - "model": { - "name": "jaspionjader/bh-2", - "sha": "d442311231a30f60b9f162ee440bbe0d64df8793", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.25166292008038, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45792386423578324, - "normalized_score": 45.79238642357833 - }, - "bbh": { - "name": "BBH", - "value": 0.5937358907182445, - "normalized_score": 41.94562264778238 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1027190332326284, - "normalized_score": 10.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.41864583333333333, - "normalized_score": 10.730729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3695146276595745, - "normalized_score": 29.94606973995272 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.8217813681439265 - } - }, - { - "id": "jaspionjader/bh-20_bfloat16_f7a72294d17bd98e4ee61333591e3e8c24949ca6_False", - "model": { - "name": "jaspionjader/bh-20", - "sha": "f7a72294d17bd98e4ee61333591e3e8c24949ca6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.985809500141922, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4727367828472896, - "normalized_score": 47.27367828472896 - }, - "bbh": { - "name": "BBH", - "value": 0.574973333640619, - "normalized_score": 39.32473656427744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.4105208333333334, - "normalized_score": 9.515104166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3768284574468085, - "normalized_score": 30.758717494089833 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-20 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.01052450867003 - } - }, - { - "id": "jaspionjader/bh-21_bfloat16_2d814304d5f933c92c06140b2581135a6251fd68_False", - "model": { - "name": "jaspionjader/bh-21", - "sha": "2d814304d5f933c92c06140b2581135a6251fd68", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.388213730278952, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47001400727846554, - "normalized_score": 47.00140072784656 - }, - "bbh": { - "name": "BBH", - "value": 0.5738369241857685, - "normalized_score": 39.30946768551373 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4157916666666667, - "normalized_score": 10.640625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37757646276595747, - "normalized_score": 30.841829196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-21 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.652223645039114 - } - }, - { - "id": "jaspionjader/bh-22_bfloat16_2454ffa279c5e0bac17b527eda9269c00663b829_False", - "model": { - "name": "jaspionjader/bh-22", - "sha": "2454ffa279c5e0bac17b527eda9269c00663b829", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.204382290276076, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45999725173650363, - "normalized_score": 45.99972517365036 - }, - "bbh": { - "name": "BBH", - "value": 0.579296884452635, - "normalized_score": 39.959361669826855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.41715625000000006, - "normalized_score": 10.544531250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3764128989361702, - "normalized_score": 30.712544326241126 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-22 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.5978900160791345 - } - }, - { - "id": "jaspionjader/bh-23_bfloat16_a74038987f6f889e880fdb5178509d2210cc3222_False", - "model": { - "name": "jaspionjader/bh-23", - "sha": "a74038987f6f889e880fdb5178509d2210cc3222", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.18046602127528, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46576749690820357, - "normalized_score": 46.57674969082036 - }, - "bbh": { - "name": "BBH", - "value": 0.570027700842045, - "normalized_score": 38.53896512438835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4197291666666667, - "normalized_score": 10.966145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37957114361702127, - "normalized_score": 31.063460401891252 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-23 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3146741406993703 - } - }, - { - "id": "jaspionjader/bh-24_bfloat16_1732cc1454bc301d045ed7fe3b22408757924fed_False", - "model": { - "name": "jaspionjader/bh-24", - "sha": "1732cc1454bc301d045ed7fe3b22408757924fed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.42300670299736, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4715377420799035, - "normalized_score": 47.15377420799035 - }, - "bbh": { - "name": "BBH", - "value": 0.5716684749879075, - "normalized_score": 38.79538568907879 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4157604166666667, - "normalized_score": 10.536718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38090093085106386, - "normalized_score": 31.21121453900709 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-24 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3130766638650941 - } - }, - { - "id": "jaspionjader/bh-25_bfloat16_7c40e832ae64bffdeb82de3ac0776be645f7c779_False", - "model": { - "name": "jaspionjader/bh-25", - "sha": "7c40e832ae64bffdeb82de3ac0776be645f7c779", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.01355078586454, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47518473206647255, - "normalized_score": 47.51847320664726 - }, - "bbh": { - "name": "BBH", - "value": 0.5705628020556314, - "normalized_score": 38.72954410110149 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.4117916666666667, - "normalized_score": 10.107291666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37824135638297873, - "normalized_score": 30.915706264775416 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-25 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3318180601928693 - } - }, - { - "id": "jaspionjader/bh-26_bfloat16_92be638bdfd6e1dbd52d3acc017762ac75e07a6c_False", - "model": { - "name": "jaspionjader/bh-26", - "sha": "92be638bdfd6e1dbd52d3acc017762ac75e07a6c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.518295926434586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4690897928607206, - "normalized_score": 46.90897928607207 - }, - "bbh": { - "name": "BBH", - "value": 0.5734958656360526, - "normalized_score": 38.97859481521599 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4276979166666666, - "normalized_score": 12.195572916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3771609042553192, - "normalized_score": 30.795656028368796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-26 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3453299226519775 - } - }, - { - "id": "jaspionjader/bh-27_bfloat16_e9d6ec1a8df709c367643ab26362d99f2d20f681_False", - "model": { - "name": "jaspionjader/bh-27", - "sha": "e9d6ec1a8df709c367643ab26362d99f2d20f681", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.502923044309686, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4818791916559174, - "normalized_score": 48.18791916559174 - }, - "bbh": { - "name": "BBH", - "value": 0.571405917910282, - "normalized_score": 38.70545276461525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.409125, - "normalized_score": 10.107291666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3799035904255319, - "normalized_score": 31.100398936170215 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-27 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3224764064490586 - } - }, - { - "id": "jaspionjader/bh-28_bfloat16_6fc7244ffa3191b1e6559a8ecd29e58a0c8ad281_False", - "model": { - "name": "jaspionjader/bh-28", - "sha": "6fc7244ffa3191b1e6559a8ecd29e58a0c8ad281", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.48580651508801, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4785070280189896, - "normalized_score": 47.85070280189896 - }, - "bbh": { - "name": "BBH", - "value": 0.5702617832390487, - "normalized_score": 38.64315088483485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.413125, - "normalized_score": 10.373958333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-28 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3363114008274333 - } - }, - { - "id": "jaspionjader/bh-29_bfloat16_18de72fb7e3a5cb6136e8df5e3754aee94ec5953_False", - "model": { - "name": "jaspionjader/bh-29", - "sha": "18de72fb7e3a5cb6136e8df5e3754aee94ec5953", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.377065926627154, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46881496651107946, - "normalized_score": 46.88149665110795 - }, - "bbh": { - "name": "BBH", - "value": 0.5670161357895335, - "normalized_score": 38.271768582086715 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4236979166666666, - "normalized_score": 11.66223958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38189827127659576, - "normalized_score": 31.32203014184397 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-29 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3587236809252874 - } - }, - { - "id": "jaspionjader/bh-3_bfloat16_d63451972ca76bd17d93aedb94efeea1d79ecc09_False", - "model": { - "name": "jaspionjader/bh-3", - "sha": "d63451972ca76bd17d93aedb94efeea1d79ecc09", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.48183645734593, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4663670172918966, - "normalized_score": 46.63670172918966 - }, - "bbh": { - "name": "BBH", - "value": 0.5890722855221537, - "normalized_score": 41.325395042771994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.41728125, - "normalized_score": 10.493489583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37017952127659576, - "normalized_score": 30.019946808510632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 3.5736644333791383 - } - }, - { - "id": "jaspionjader/bh-30_bfloat16_9e1264b2109015b9d4fce732bdeef1b6ca276e6c_False", - "model": { - "name": "jaspionjader/bh-30", - "sha": "9e1264b2109015b9d4fce732bdeef1b6ca276e6c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.17349100843053, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46664184364153777, - "normalized_score": 46.664184364153776 - }, - "bbh": { - "name": "BBH", - "value": 0.5705838505746653, - "normalized_score": 38.65102840424575 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.4144270833333334, - "normalized_score": 10.80338541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3781582446808511, - "normalized_score": 30.906471631205672 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-30 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2817456204588766 - } - }, - { - "id": "jaspionjader/bh-31_bfloat16_5a9ccda1ea63311dd380f32207bb68ccd041b5f2_False", - "model": { - "name": "jaspionjader/bh-31", - "sha": "5a9ccda1ea63311dd380f32207bb68ccd041b5f2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.19175854920456, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4727367828472896, - "normalized_score": 47.27367828472896 - }, - "bbh": { - "name": "BBH", - "value": 0.5665082303171874, - "normalized_score": 37.88578885449606 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4104270833333334, - "normalized_score": 10.003385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3819813829787234, - "normalized_score": 31.331264775413715 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-31 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3225714348331734 - } - }, - { - "id": "jaspionjader/bh-32_bfloat16_0df1348a1ba9c3213399c3cd3ecd7efbd164a4ae_False", - "model": { - "name": "jaspionjader/bh-32", - "sha": "0df1348a1ba9c3213399c3cd3ecd7efbd164a4ae", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.181944671595648, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4635943740386619, - "normalized_score": 46.35943740386618 - }, - "bbh": { - "name": "BBH", - "value": 0.5662056335064284, - "normalized_score": 38.0583806332999 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4157291666666667, - "normalized_score": 10.699479166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-32 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3191519612253972 - } - }, - { - "id": "jaspionjader/bh-33_bfloat16_f27bb80d41ba5585c4d127e5792ae8b1b5742791_False", - "model": { - "name": "jaspionjader/bh-33", - "sha": "f27bb80d41ba5585c4d127e5792ae8b1b5742791", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.175212795627772, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4685401401614383, - "normalized_score": 46.854014016143836 - }, - "bbh": { - "name": "BBH", - "value": 0.5652966799156172, - "normalized_score": 37.93177398115369 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4156979166666667, - "normalized_score": 11.128906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38081781914893614, - "normalized_score": 31.201979905437344 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-33 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3091935598049491 - } - }, - { - "id": "jaspionjader/bh-34_bfloat16_5531fc28cf18d5e80ea4373e551df6cb0ada096a_False", - "model": { - "name": "jaspionjader/bh-34", - "sha": "5531fc28cf18d5e80ea4373e551df6cb0ada096a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.075219338289553, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4623953332712758, - "normalized_score": 46.239533327127575 - }, - "bbh": { - "name": "BBH", - "value": 0.5681235912530039, - "normalized_score": 38.20458432336169 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4184583333333333, - "normalized_score": 11.17395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38040226063829785, - "normalized_score": 31.155806737588655 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-34 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.6563342824599672 - } - }, - { - "id": "jaspionjader/bh-35_bfloat16_d8698568033c3d2f61031730b7271c510c95a8f7_False", - "model": { - "name": "jaspionjader/bh-35", - "sha": "d8698568033c3d2f61031730b7271c510c95a8f7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.218719337673107, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47213726246359655, - "normalized_score": 47.213726246359656 - }, - "bbh": { - "name": "BBH", - "value": 0.5639648300586834, - "normalized_score": 37.76624487883907 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.41830208333333335, - "normalized_score": 10.387760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3829787234042553, - "normalized_score": 31.44208037825059 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-35 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9903752487611488 - } - }, - { - "id": "jaspionjader/bh-36_bfloat16_dafba34c8710359f5cc4145b449268924b6dcb3b_False", - "model": { - "name": "jaspionjader/bh-36", - "sha": "dafba34c8710359f5cc4145b449268924b6dcb3b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.500646123615535, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4665919759571271, - "normalized_score": 46.659197595712705 - }, - "bbh": { - "name": "BBH", - "value": 0.5664445599052024, - "normalized_score": 38.117107059680755 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4196354166666667, - "normalized_score": 11.454427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.383061835106383, - "normalized_score": 31.451315011820324 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-36 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.293844683482796 - } - }, - { - "id": "jaspionjader/bh-37_bfloat16_0cd28de27176045d544be3d4a1dcbad7e722e0ac_False", - "model": { - "name": "jaspionjader/bh-37", - "sha": "0cd28de27176045d544be3d4a1dcbad7e722e0ac", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.412457780567525, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48797413086166924, - "normalized_score": 48.79741308616693 - }, - "bbh": { - "name": "BBH", - "value": 0.562488460737535, - "normalized_score": 37.510762923762705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4156354166666667, - "normalized_score": 10.654427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3828125, - "normalized_score": 31.42361111111111 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-37 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3306327517230658 - } - }, - { - "id": "jaspionjader/bh-38_bfloat16_f2d9867f728736c4a49b6a395d6daced31096757_False", - "model": { - "name": "jaspionjader/bh-38", - "sha": "f2d9867f728736c4a49b6a395d6daced31096757", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.065232926604608, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46179581288758276, - "normalized_score": 46.17958128875827 - }, - "bbh": { - "name": "BBH", - "value": 0.5658176339168742, - "normalized_score": 38.05344075657449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4117291666666667, - "normalized_score": 10.166145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3810671542553192, - "normalized_score": 31.229683806146568 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-38 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.271061424375078 - } - }, - { - "id": "jaspionjader/bh-39_bfloat16_b42671d09ec8a65561c59815f2bcf4b8ac565560_False", - "model": { - "name": "jaspionjader/bh-39", - "sha": "b42671d09ec8a65561c59815f2bcf4b8ac565560", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.313151042679937, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45759917020173135, - "normalized_score": 45.759917020173134 - }, - "bbh": { - "name": "BBH", - "value": 0.5633012248625926, - "normalized_score": 37.62931792742345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4262395833333334, - "normalized_score": 11.77994791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38314494680851063, - "normalized_score": 31.460549645390074 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-39 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.365749840940557 - } - }, - { - "id": "jaspionjader/bh-4_bfloat16_4c9f4b6bd651bcc284124803ed5585fce6f8a79f_False", - "model": { - "name": "jaspionjader/bh-4", - "sha": "4c9f4b6bd651bcc284124803ed5585fce6f8a79f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.27326191997518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4672912317096415, - "normalized_score": 46.72912317096415 - }, - "bbh": { - "name": "BBH", - "value": 0.5892000111391051, - "normalized_score": 41.322953178368685 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.41728125, - "normalized_score": 10.426822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3705119680851064, - "normalized_score": 30.056885342789595 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-4 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.441372809462604 - } - }, - { - "id": "jaspionjader/bh-40_bfloat16_f28a1aef35a15d872df09c5e8a2b1dc431b9ea5c_False", - "model": { - "name": "jaspionjader/bh-40", - "sha": "f28a1aef35a15d872df09c5e8a2b1dc431b9ea5c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.130851066478012, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45357761849669986, - "normalized_score": 45.357761849669984 - }, - "bbh": { - "name": "BBH", - "value": 0.5633956317971519, - "normalized_score": 37.62940413720817 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4236041666666667, - "normalized_score": 11.350520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38347739361702127, - "normalized_score": 31.49748817966903 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-40 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.929656917369135 - } - }, - { - "id": "jaspionjader/bh-41_bfloat16_754c1a81b955d34e2bc93505350f1cdbac527260_False", - "model": { - "name": "jaspionjader/bh-41", - "sha": "754c1a81b955d34e2bc93505350f1cdbac527260", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.2479389953904, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4739856912990864, - "normalized_score": 47.39856912990864 - }, - "bbh": { - "name": "BBH", - "value": 0.56138466485423, - "normalized_score": 37.37607576190458 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.41827083333333337, - "normalized_score": 11.083854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38248005319148937, - "normalized_score": 31.386672576832154 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-41 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3416012935225354 - } - }, - { - "id": "jaspionjader/bh-42_bfloat16_fdfcc0bc8a5f57e80fbcfe52e579f21313475709_False", - "model": { - "name": "jaspionjader/bh-42", - "sha": "fdfcc0bc8a5f57e80fbcfe52e579f21313475709", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.232844116751995, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4660423232578447, - "normalized_score": 46.60423232578447 - }, - "bbh": { - "name": "BBH", - "value": 0.5645607204696422, - "normalized_score": 37.81206560286688 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42100000000000004, - "normalized_score": 10.891666666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-42 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9993257286925663 - } - }, - { - "id": "jaspionjader/bh-43_bfloat16_eadc306794457ac36c57620519120cc9581acd9d_False", - "model": { - "name": "jaspionjader/bh-43", - "sha": "eadc306794457ac36c57620519120cc9581acd9d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.020973802572843, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45999725173650363, - "normalized_score": 45.99972517365036 - }, - "bbh": { - "name": "BBH", - "value": 0.5635240412618795, - "normalized_score": 37.6625467846229 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4156041666666667, - "normalized_score": 10.817187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3819813829787234, - "normalized_score": 31.331264775413715 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-43 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3654369302798086 - } - }, - { - "id": "jaspionjader/bh-44_bfloat16_a737f1694616b96dca15cd7aa156cc3c9b6fb4ea_False", - "model": { - "name": "jaspionjader/bh-44", - "sha": "a737f1694616b96dca15cd7aa156cc3c9b6fb4ea", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.3932540087341, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4706135276621586, - "normalized_score": 47.06135276621586 - }, - "bbh": { - "name": "BBH", - "value": 0.5642775941837409, - "normalized_score": 37.82162994785929 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42487500000000006, - "normalized_score": 11.676041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3833942819148936, - "normalized_score": 31.488253546099287 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-44 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3557483523937854 - } - }, - { - "id": "jaspionjader/bh-46_bfloat16_b643bed0afb4b5d7ba26d9002e3d42d1c69707a2_False", - "model": { - "name": "jaspionjader/bh-46", - "sha": "b643bed0afb4b5d7ba26d9002e3d42d1c69707a2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.45744275031464, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4727367828472896, - "normalized_score": 47.27367828472896 - }, - "bbh": { - "name": "BBH", - "value": 0.5631697539272891, - "normalized_score": 37.61558589155505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4262395833333334, - "normalized_score": 11.57994791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3822307180851064, - "normalized_score": 31.358968676122927 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-46 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.337495322713837 - } - }, - { - "id": "jaspionjader/bh-47_bfloat16_0ca02d4af5624fcc7c39c1ef7f8ae9b09faa5cc4_False", - "model": { - "name": "jaspionjader/bh-47", - "sha": "0ca02d4af5624fcc7c39c1ef7f8ae9b09faa5cc4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.05418330216361, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46516797652451053, - "normalized_score": 46.51679765245105 - }, - "bbh": { - "name": "BBH", - "value": 0.5545716016743777, - "normalized_score": 36.31256652223936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4156041666666667, - "normalized_score": 11.083854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3854720744680851, - "normalized_score": 31.719119385342786 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-47 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3369329958007803 - } - }, - { - "id": "jaspionjader/bh-48_bfloat16_e16cff6ee81e0aa71cb922f2169bed436d529a23_False", - "model": { - "name": "jaspionjader/bh-48", - "sha": "e16cff6ee81e0aa71cb922f2169bed436d529a23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.045748688816236, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46881496651107946, - "normalized_score": 46.88149665110795 - }, - "bbh": { - "name": "BBH", - "value": 0.5541308128775738, - "normalized_score": 36.33510481146546 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12537764350453173, - "normalized_score": 12.537764350453173 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4209375, - "normalized_score": 10.817187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3859707446808511, - "normalized_score": 31.774527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-48 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3058294734706812 - } - }, - { - "id": "jaspionjader/bh-49_bfloat16_fbd82cfc8551a3ac58713afa0f8bf467b7bffa69_False", - "model": { - "name": "jaspionjader/bh-49", - "sha": "fbd82cfc8551a3ac58713afa0f8bf467b7bffa69", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.898201189992154, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47246195649764844, - "normalized_score": 47.24619564976484 - }, - "bbh": { - "name": "BBH", - "value": 0.5540285004706683, - "normalized_score": 36.290275257532365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.41290625000000003, - "normalized_score": 10.713281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38081781914893614, - "normalized_score": 31.201979905437344 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-49 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3646746265360599 - } - }, - { - "id": "jaspionjader/bh-5_bfloat16_874eae010bbff82cdca13b05e35724316a926105_False", - "model": { - "name": "jaspionjader/bh-5", - "sha": "874eae010bbff82cdca13b05e35724316a926105", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.28473590988246, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46516797652451053, - "normalized_score": 46.51679765245105 - }, - "bbh": { - "name": "BBH", - "value": 0.5881569099353959, - "normalized_score": 41.30461071540461 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4186145833333333, - "normalized_score": 10.693489583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37017952127659576, - "normalized_score": 30.019946808510632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-5 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.1091891409697805 - } - }, - { - "id": "jaspionjader/bh-50_bfloat16_d32075fa8ac27cc7730215c55304a290aadc0b0a_False", - "model": { - "name": "jaspionjader/bh-50", - "sha": "d32075fa8ac27cc7730215c55304a290aadc0b0a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.95066016776602, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47246195649764844, - "normalized_score": 47.24619564976484 - }, - "bbh": { - "name": "BBH", - "value": 0.555294802866646, - "normalized_score": 36.633310186842785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.41687500000000005, - "normalized_score": 10.342708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3842253989361702, - "normalized_score": 31.580599881796683 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-50 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2878958881775617 - } - }, - { - "id": "jaspionjader/bh-51_bfloat16_e869c9fedfea2c996c5d31bdce1f67d476e71e04_False", - "model": { - "name": "jaspionjader/bh-51", - "sha": "e869c9fedfea2c996c5d31bdce1f67d476e71e04", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.943535378176815, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4630447213393795, - "normalized_score": 46.30447213393795 - }, - "bbh": { - "name": "BBH", - "value": 0.5557101784534039, - "normalized_score": 36.60322305451519 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.41681250000000003, - "normalized_score": 11.201562500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38314494680851063, - "normalized_score": 31.460549645390074 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-51 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3122423717127931 - } - }, - { - "id": "jaspionjader/bh-52_bfloat16_a75649cfc4a393ae39417abf788745873a1b0aae_False", - "model": { - "name": "jaspionjader/bh-52", - "sha": "a75649cfc4a393ae39417abf788745873a1b0aae", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.45560997649567, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45362748618111054, - "normalized_score": 45.362748618111056 - }, - "bbh": { - "name": "BBH", - "value": 0.544409095161705, - "normalized_score": 34.932557534743474 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.41690625000000003, - "normalized_score": 11.246614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38430851063829785, - "normalized_score": 31.589834515366427 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-52 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2902681408577599 - } - }, - { - "id": "jaspionjader/bh-53_bfloat16_a01ad63416a614cbfea90bab324d160c3305a7f1_False", - "model": { - "name": "jaspionjader/bh-53", - "sha": "a01ad63416a614cbfea90bab324d160c3305a7f1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.320241544546935, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4779573753197073, - "normalized_score": 47.79573753197073 - }, - "bbh": { - "name": "BBH", - "value": 0.5494367702137035, - "normalized_score": 35.57594881398253 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29865771812080544, - "normalized_score": 6.487695749440725 - }, - "musr": { - "name": "MUSR", - "value": 0.4196041666666667, - "normalized_score": 11.617187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38580452127659576, - "normalized_score": 31.756057919621743 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-53 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.318827514819002 - } - }, - { - "id": "jaspionjader/bh-54_bfloat16_7de4a00abd99ff03ab957d1063672324e7debfd6_False", - "model": { - "name": "jaspionjader/bh-54", - "sha": "7de4a00abd99ff03ab957d1063672324e7debfd6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.26650310289129, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48405231452545916, - "normalized_score": 48.40523145254591 - }, - "bbh": { - "name": "BBH", - "value": 0.5547738488653888, - "normalized_score": 36.353920099875154 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4155416666666667, - "normalized_score": 10.609375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38248005319148937, - "normalized_score": 31.386672576832154 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-54 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3204749007141379 - } - }, - { - "id": "jaspionjader/bh-55_bfloat16_08ed327c5b8b5897d4eb630e193d576565a3632e_False", - "model": { - "name": "jaspionjader/bh-55", - "sha": "08ed327c5b8b5897d4eb630e193d576565a3632e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.48915917092739, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47093822169621047, - "normalized_score": 47.09382216962105 - }, - "bbh": { - "name": "BBH", - "value": 0.5549641462109072, - "normalized_score": 36.47069849338268 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.42220833333333335, - "normalized_score": 11.409375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3846409574468085, - "normalized_score": 31.62677304964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-55 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3167506366817783 - } - }, - { - "id": "jaspionjader/bh-56_bfloat16_b2e91ba7169d25d719d322ec23232fb7b26cdcc4_False", - "model": { - "name": "jaspionjader/bh-56", - "sha": "b2e91ba7169d25d719d322ec23232fb7b26cdcc4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.70813110478299, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45999725173650363, - "normalized_score": 45.99972517365036 - }, - "bbh": { - "name": "BBH", - "value": 0.5446903231355648, - "normalized_score": 35.076883829070766 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4116041666666667, - "normalized_score": 10.550520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3843916223404255, - "normalized_score": 31.599069148936167 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-56 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2553204749529787 - } - }, - { - "id": "jaspionjader/bh-57_float16_66ce88c930d110c5bc83c4d8dbc77dd39bdf9a1b_False", - "model": { - "name": "jaspionjader/bh-57", - "sha": "66ce88c930d110c5bc83c4d8dbc77dd39bdf9a1b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.751844167517202, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44051339335186196, - "normalized_score": 44.0513393351862 - }, - "bbh": { - "name": "BBH", - "value": 0.5424621834237494, - "normalized_score": 34.57783861568968 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.42103124999999997, - "normalized_score": 11.928906249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3896276595744681, - "normalized_score": 32.18085106382979 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-57 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8547357831923854 - } - }, - { - "id": "jaspionjader/bh-58_bfloat16_ef1f576ee205b7d4fee6849432b14930b4eee5c0_False", - "model": { - "name": "jaspionjader/bh-58", - "sha": "ef1f576ee205b7d4fee6849432b14930b4eee5c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.270181220846553, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4630447213393795, - "normalized_score": 46.30447213393795 - }, - "bbh": { - "name": "BBH", - "value": 0.5446322106157867, - "normalized_score": 34.86550097653127 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4183333333333333, - "normalized_score": 11.558333333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3896276595744681, - "normalized_score": 32.18085106382979 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "jaspionjader/bh-58 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3464084088747819 - } - }, - { - "id": "jaspionjader/bh-59_bfloat16_42e50611b04932fd6faa4a4206038179a7fc6fe6_False", - "model": { - "name": "jaspionjader/bh-59", - "sha": "42e50611b04932fd6faa4a4206038179a7fc6fe6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.333977051688347, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43414362779646887, - "normalized_score": 43.41436277964689 - }, - "bbh": { - "name": "BBH", - "value": 0.5511531646170439, - "normalized_score": 35.897385616875816 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1540785498489426, - "normalized_score": 15.407854984894259 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.41700000000000004, - "normalized_score": 11.025 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3838098404255319, - "normalized_score": 31.534426713947987 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "jaspionjader/bh-59 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.280590894222326 - } - }, - { - "id": "jaspionjader/bh-6_bfloat16_04ffb5a5f7233934eb02a12b6d81c783babe4701_False", - "model": { - "name": "jaspionjader/bh-6", - "sha": "04ffb5a5f7233934eb02a12b6d81c783babe4701", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.345390925830227, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4620706392372239, - "normalized_score": 46.20706392372239 - }, - "bbh": { - "name": "BBH", - "value": 0.5890658635262072, - "normalized_score": 41.492905820709844 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.41991666666666666, - "normalized_score": 10.922916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36976396276595747, - "normalized_score": 29.973773640661943 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-6 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.2109803566019988 - } - }, - { - "id": "jaspionjader/bh-60_bfloat16_7fb0314fbc388d3f1078c30acff92e590a07e5aa_False", - "model": { - "name": "jaspionjader/bh-60", - "sha": "7fb0314fbc388d3f1078c30acff92e590a07e5aa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.367675846211625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42070484093316846, - "normalized_score": 42.07048409331685 - }, - "bbh": { - "name": "BBH", - "value": 0.5368509826419269, - "normalized_score": 33.68824984560268 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1578549848942598, - "normalized_score": 15.785498489425981 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.42890625, - "normalized_score": 14.71328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3689328457446808, - "normalized_score": 29.881427304964532 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "jaspionjader/bh-60 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.081683890277799 - } - }, - { - "id": "jaspionjader/bh-61_bfloat16_58936a9c6ce18005f5115bfef3bb0f9a28b31d05_False", - "model": { - "name": "jaspionjader/bh-61", - "sha": "58936a9c6ce18005f5115bfef3bb0f9a28b31d05", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.55820931591465, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42467652495378927, - "normalized_score": 42.46765249537893 - }, - "bbh": { - "name": "BBH", - "value": 0.5271029876122725, - "normalized_score": 32.55596453531747 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4355729166666667, - "normalized_score": 16.31328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3679355053191489, - "normalized_score": 29.770611702127653 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "jaspionjader/bh-61 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3660730955623497 - } - }, - { - "id": "jaspionjader/bh-62_bfloat16_a7e2e06af65fdd37436ab59ba46ba87515e19b23_False", - "model": { - "name": "jaspionjader/bh-62", - "sha": "a7e2e06af65fdd37436ab59ba46ba87515e19b23", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.433220801404314, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41498446344587914, - "normalized_score": 41.49844634458792 - }, - "bbh": { - "name": "BBH", - "value": 0.5379352222621877, - "normalized_score": 34.005745917802976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1623867069486405, - "normalized_score": 16.238670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.42890625, - "normalized_score": 15.246614583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3719248670212766, - "normalized_score": 30.213874113475175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "jaspionjader/bh-62 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3805567706646085 - } - }, - { - "id": "jaspionjader/bh-63_bfloat16_a4f9bc8e542a977baaf01f779bfac2e6e426c660_False", - "model": { - "name": "jaspionjader/bh-63", - "sha": "a4f9bc8e542a977baaf01f779bfac2e6e426c660", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.68160361165873, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43077146415954115, - "normalized_score": 43.07714641595411 - }, - "bbh": { - "name": "BBH", - "value": 0.49171126396743653, - "normalized_score": 27.810720725561197 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4312604166666667, - "normalized_score": 16.074218749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3248005319148936, - "normalized_score": 24.977836879432623 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "jaspionjader/bh-63 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4170756790767831 - } - }, - { - "id": "jaspionjader/bh-64_bfloat16_c3ef59b8a3b5c6cb52726f06ce3282e2473f02a0_False", - "model": { - "name": "jaspionjader/bh-64", - "sha": "c3ef59b8a3b5c6cb52726f06ce3282e2473f02a0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.325275625065377, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41401038134372353, - "normalized_score": 41.40103813437236 - }, - "bbh": { - "name": "BBH", - "value": 0.5359944334653838, - "normalized_score": 33.698327780013834 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4355416666666667, - "normalized_score": 15.942708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3692652925531915, - "normalized_score": 29.918365839243506 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-18", - "generation": 1, - "base_model": "jaspionjader/bh-64 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4000885095336175 - } - }, - { - "id": "jaspionjader/bh-7_bfloat16_d7dbb0fff29f3186f459a0b57d076e1aeee0f2e9_False", - "model": { - "name": "jaspionjader/bh-7", - "sha": "d7dbb0fff29f3186f459a0b57d076e1aeee0f2e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.27717239337679, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4623953332712758, - "normalized_score": 46.239533327127575 - }, - "bbh": { - "name": "BBH", - "value": 0.5860594415302606, - "normalized_score": 41.169212894943996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.41191666666666665, - "normalized_score": 9.522916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3715093085106383, - "normalized_score": 30.167700945626475 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-7 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3820944572691356 - } - }, - { - "id": "jaspionjader/bh-8_bfloat16_d378c72c713ab759638c78b71b8cdf9c6b00246c_False", - "model": { - "name": "jaspionjader/bh-8", - "sha": "d378c72c713ab759638c78b71b8cdf9c6b00246c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.723236409499417, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45967255770245175, - "normalized_score": 45.967255770245174 - }, - "bbh": { - "name": "BBH", - "value": 0.5899505025903907, - "normalized_score": 41.428206212410664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4265208333333333, - "normalized_score": 12.115104166666661 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37200797872340424, - "normalized_score": 30.223108747044915 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-8 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4091824887642739 - } - }, - { - "id": "jaspionjader/bh-9_bfloat16_84e708cbf9c0064766f98cb2da39713e1c4b9972_False", - "model": { - "name": "jaspionjader/bh-9", - "sha": "84e708cbf9c0064766f98cb2da39713e1c4b9972", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.077773579503162, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4508548429278758, - "normalized_score": 45.085484292787584 - }, - "bbh": { - "name": "BBH", - "value": 0.5850048697918168, - "normalized_score": 40.93804479430806 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4145833333333333, - "normalized_score": 9.922916666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3702626329787234, - "normalized_score": 30.029181442080382 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/bh-9 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3725313775411982 - } - }, - { - "id": "jaspionjader/dp-6-8b_bfloat16_9a189355d1136b901b7b1af2449179916950064f_False", - "model": { - "name": "jaspionjader/dp-6-8b", - "sha": "9a189355d1136b901b7b1af2449179916950064f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.735409417342357, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4805804155197099, - "normalized_score": 48.05804155197099 - }, - "bbh": { - "name": "BBH", - "value": 0.5299697041031141, - "normalized_score": 32.66331333979792 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.44338541666666664, - "normalized_score": 14.489843749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38971077127659576, - "normalized_score": 32.190085697399525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "jaspionjader/dp-6-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2714995115249983 - } - }, - { - "id": "jaspionjader/dp-7-8b_bfloat16_e62e9ba3d7ec6f9d73a97993f8b147554fea5e80_False", - "model": { - "name": "jaspionjader/dp-7-8b", - "sha": "e62e9ba3d7ec6f9d73a97993f8b147554fea5e80", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.05857973112694, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44983089314130953, - "normalized_score": 44.98308931413095 - }, - "bbh": { - "name": "BBH", - "value": 0.5290850650389306, - "normalized_score": 32.60386262129972 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.44075, - "normalized_score": 14.060416666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3933676861702128, - "normalized_score": 32.59640957446809 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "jaspionjader/dp-7-8b (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3018850832262412 - } - }, - { - "id": "jaspionjader/ek-6_bfloat16_8169e51d12b9ed9693098dc0470852ddf7ba4c56_False", - "model": { - "name": "jaspionjader/ek-6", - "sha": "8169e51d12b9ed9693098dc0470852ddf7ba4c56", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.660510173858114, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4642437621067656, - "normalized_score": 46.424376210676556 - }, - "bbh": { - "name": "BBH", - "value": 0.5219292795769993, - "normalized_score": 31.302615174659127 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4143645833333333, - "normalized_score": 11.39557291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3861369680851064, - "normalized_score": 31.792996453900706 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "jaspionjader/ek-6 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3746079267365563 - } - }, - { - "id": "jaspionjader/ek-7_bfloat16_89b5da79ea12b2e1c076c17c763e1f57f90b0378_False", - "model": { - "name": "jaspionjader/ek-7", - "sha": "89b5da79ea12b2e1c076c17c763e1f57f90b0378", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.05190462596363, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47670846686791046, - "normalized_score": 47.67084668679105 - }, - "bbh": { - "name": "BBH", - "value": 0.5194098090521417, - "normalized_score": 30.932091410213463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.41706249999999995, - "normalized_score": 11.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38871343085106386, - "normalized_score": 32.079270094562645 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "jaspionjader/ek-7 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2607618925710957 - } - }, - { - "id": "jaspionjader/f-1-8b_bfloat16_3b525d615d7c65dc50046237a76f14ac7e035ee6_False", - "model": { - "name": "jaspionjader/f-1-8b", - "sha": "3b525d615d7c65dc50046237a76f14ac7e035ee6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.950601941148005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49826571275327247, - "normalized_score": 49.82657127532725 - }, - "bbh": { - "name": "BBH", - "value": 0.5140825686172996, - "normalized_score": 30.920344788610482 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.45268749999999996, - "normalized_score": 15.985937499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39070811170212766, - "normalized_score": 32.300901300236404 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/f-1-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3471122729437774 - } - }, - { - "id": "jaspionjader/f-2-8b_bfloat16_abccb1e79e71c894cf28408ccf698a73fb6a0826_False", - "model": { - "name": "jaspionjader/f-2-8b", - "sha": "abccb1e79e71c894cf28408ccf698a73fb6a0826", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.65845169680976, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48237897667078905, - "normalized_score": 48.237897667078904 - }, - "bbh": { - "name": "BBH", - "value": 0.5294150378468933, - "normalized_score": 32.7394137680834 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4500520833333334, - "normalized_score": 15.756510416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39619348404255317, - "normalized_score": 32.91038711583924 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/f-2-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2418458334556366 - } - }, - { - "id": "jaspionjader/f-3-8b_bfloat16_47083bf24e9c3856ca8a630d455065dcbd8d889e_False", - "model": { - "name": "jaspionjader/f-3-8b", - "sha": "47083bf24e9c3856ca8a630d455065dcbd8d889e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.704183722506134, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4803055891700687, - "normalized_score": 48.03055891700687 - }, - "bbh": { - "name": "BBH", - "value": 0.5274906581043712, - "normalized_score": 32.37894525683091 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.44208333333333333, - "normalized_score": 14.327083333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39544547872340424, - "normalized_score": 32.827275413711575 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/f-3-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2717586051192296 - } - }, - { - "id": "jaspionjader/f-4-8b_bfloat16_9cc30b7349fd1d1bcb352531a5d2febbfc3cc095_False", - "model": { - "name": "jaspionjader/f-4-8b", - "sha": "9cc30b7349fd1d1bcb352531a5d2febbfc3cc095", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.76687133257254, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4797060687863757, - "normalized_score": 47.97060687863757 - }, - "bbh": { - "name": "BBH", - "value": 0.5288622486396436, - "normalized_score": 32.54745293621357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.45141666666666663, - "normalized_score": 15.927083333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39561170212765956, - "normalized_score": 32.845744680851055 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/f-4-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3060924340979272 - } - }, - { - "id": "jaspionjader/f-5-8b_bfloat16_eef2ab8ea02dbb1a901ee2a3e157108aaada5676_False", - "model": { - "name": "jaspionjader/f-5-8b", - "sha": "eef2ab8ea02dbb1a901ee2a3e157108aaada5676", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.236685497779423, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5043606519590242, - "normalized_score": 50.43606519590243 - }, - "bbh": { - "name": "BBH", - "value": 0.5313273519630752, - "normalized_score": 32.98155315144156 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4460520833333334, - "normalized_score": 15.023177083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39486369680851063, - "normalized_score": 32.7626329787234 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/f-5-8b (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.272994814836517 - } - }, - { - "id": "jaspionjader/f-6-8b_bfloat16_57a3cc9f1d70dffbb9bb679aee0a07a8af7ba2b3_False", - "model": { - "name": "jaspionjader/f-6-8b", - "sha": "57a3cc9f1d70dffbb9bb679aee0a07a8af7ba2b3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.646304339780215, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48460196722474147, - "normalized_score": 48.46019672247415 - }, - "bbh": { - "name": "BBH", - "value": 0.524094753042471, - "normalized_score": 32.052317591512676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.44735416666666666, - "normalized_score": 15.052604166666661 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3939494680851064, - "normalized_score": 32.661052009456256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "jaspionjader/f-6-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2474840432667238 - } - }, - { - "id": "jaspionjader/f-7-8b_bfloat16_671c04277a95ef6d14d4fe544770e7afd3a0a26c_False", - "model": { - "name": "jaspionjader/f-7-8b", - "sha": "671c04277a95ef6d14d4fe544770e7afd3a0a26c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.808814144149792, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4462337708391512, - "normalized_score": 44.623377083915116 - }, - "bbh": { - "name": "BBH", - "value": 0.5277022085059414, - "normalized_score": 32.323956862199424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4315104166666666, - "normalized_score": 12.505468749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39361702127659576, - "normalized_score": 32.6241134751773 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "jaspionjader/f-7-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.282092306482512 - } - }, - { - "id": "jaspionjader/f-8-8b_bfloat16_3192c85dba7d5762d5e9e76746b71e8815f40606_False", - "model": { - "name": "jaspionjader/f-8-8b", - "sha": "3192c85dba7d5762d5e9e76746b71e8815f40606", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.24099112237977, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4739358236146758, - "normalized_score": 47.39358236146758 - }, - "bbh": { - "name": "BBH", - "value": 0.5259311478463803, - "normalized_score": 32.106937481055674 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.43544791666666666, - "normalized_score": 13.097656249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39403257978723405, - "normalized_score": 32.67028664302601 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "jaspionjader/f-8-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2441897490485316 - } - }, - { - "id": "jaspionjader/f-9-8b_bfloat16_de985cf1faa0f77d8f666236ade6b165e255a83a_False", - "model": { - "name": "jaspionjader/f-9-8b", - "sha": "de985cf1faa0f77d8f666236ade6b165e255a83a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.48334046224257, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4601723427173233, - "normalized_score": 46.017234271732335 - }, - "bbh": { - "name": "BBH", - "value": 0.5291558412946383, - "normalized_score": 32.629822943023534 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.44608333333333333, - "normalized_score": 15.060416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3943650265957447, - "normalized_score": 32.70722517730496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "jaspionjader/f-9-8b (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.308281193697638 - } - }, - { - "id": "jaspionjader/fct-14-8b_bfloat16_3081f7643cc1832fd0451c6e12eb5f85f5c2e8f3_False", - "model": { - "name": "jaspionjader/fct-14-8b", - "sha": "3081f7643cc1832fd0451c6e12eb5f85f5c2e8f3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.806355745852326, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4128612082607481, - "normalized_score": 41.28612082607481 - }, - "bbh": { - "name": "BBH", - "value": 0.5206018889288543, - "normalized_score": 31.53726552564611 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4185520833333333, - "normalized_score": 11.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3875498670212766, - "normalized_score": 31.949985224586293 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "jaspionjader/fct-14-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3011536994911783 - } - }, - { - "id": "jaspionjader/fct-9-8b_bfloat16_2c4787a2874d16cb02cc6307b1afde50bf82bc92_False", - "model": { - "name": "jaspionjader/fct-9-8b", - "sha": "2c4787a2874d16cb02cc6307b1afde50bf82bc92", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.25010065000295, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4353925362482657, - "normalized_score": 43.53925362482657 - }, - "bbh": { - "name": "BBH", - "value": 0.520510244410076, - "normalized_score": 31.232275252624323 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.42906249999999996, - "normalized_score": 12.499479166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39320146276595747, - "normalized_score": 32.5779403073286 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "jaspionjader/fct-9-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4732704881335137 - } - }, - { - "id": "jaspionjader/fr-1-8b_bfloat16_580be62d1325215d030fce86c8639a31e97367e5_False", - "model": { - "name": "jaspionjader/fr-1-8b", - "sha": "580be62d1325215d030fce86c8639a31e97367e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.065073898707798, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.421079402651631, - "normalized_score": 42.1079402651631 - }, - "bbh": { - "name": "BBH", - "value": 0.5142290494968609, - "normalized_score": 31.055328358201677 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4276979166666666, - "normalized_score": 11.662239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36103723404255317, - "normalized_score": 29.00413711583924 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "jaspionjader/fr-1-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2811561102040658 - } - }, - { - "id": "jaspionjader/fr-10-8b_bfloat16_2f246c1a4816b91f395ffa8c113f7fae1f74f2d6_False", - "model": { - "name": "jaspionjader/fr-10-8b", - "sha": "2f246c1a4816b91f395ffa8c113f7fae1f74f2d6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.98290735666885, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44018869931781013, - "normalized_score": 44.01886993178101 - }, - "bbh": { - "name": "BBH", - "value": 0.5206624978702634, - "normalized_score": 31.101142245733232 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4118541666666667, - "normalized_score": 9.781770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3863031914893617, - "normalized_score": 31.811465721040182 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "jaspionjader/fr-10-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2102327181157422 - } - }, - { - "id": "jaspionjader/fr-3-8b_bfloat16_2ed077d0ce608b7250342265ac7482a9d8b0c748_False", - "model": { - "name": "jaspionjader/fr-3-8b", - "sha": "2ed077d0ce608b7250342265ac7482a9d8b0c748", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.896518178840868, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4325700253106203, - "normalized_score": 43.25700253106203 - }, - "bbh": { - "name": "BBH", - "value": 0.5255174690526301, - "normalized_score": 32.054254099420874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.41982291666666666, - "normalized_score": 11.544531249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3863031914893617, - "normalized_score": 31.811465721040182 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "jaspionjader/fr-3-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3036556633338388 - } - }, - { - "id": "jaspionjader/gamma-Kosmos-EVAA-8B_bfloat16_a219a68fce6de7a153d0537b6de6da3a929e3ce0_False", - "model": { - "name": "jaspionjader/gamma-Kosmos-EVAA-8B", - "sha": "a219a68fce6de7a153d0537b6de6da3a929e3ce0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.91189753415071, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42500121898784116, - "normalized_score": 42.500121898784116 - }, - "bbh": { - "name": "BBH", - "value": 0.5252624326543692, - "normalized_score": 32.36253555048168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.44115624999999997, - "normalized_score": 14.277864583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37757646276595747, - "normalized_score": 30.841829196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/gamma-Kosmos-EVAA-8B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2357917815442743 - } - }, - { - "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B_bfloat16_eeb700c64d5967e7c59eaa7c701c225d999a3308_False", - "model": { - "name": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", - "sha": "eeb700c64d5967e7c59eaa7c701c225d999a3308", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.10243444820793, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4232525255211727, - "normalized_score": 42.32525255211727 - }, - "bbh": { - "name": "BBH", - "value": 0.5262464083930688, - "normalized_score": 32.46635236533594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4343958333333333, - "normalized_score": 13.232812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3755817819148936, - "normalized_score": 30.62019799054373 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/gamma-Kosmos-EVAA-v2-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2220187804593092 - } - }, - { - "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B_bfloat16_90381757fd00c499a4a115f02e0867228c857546_False", - "model": { - "name": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", - "sha": "90381757fd00c499a4a115f02e0867228c857546", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.195991097275467, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43326928106313467, - "normalized_score": 43.32692810631347 - }, - "bbh": { - "name": "BBH", - "value": 0.527793553969925, - "normalized_score": 32.436623618170984 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4263020833333333, - "normalized_score": 11.721093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3897938829787234, - "normalized_score": 32.19932033096927 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "jaspionjader/gamma-Kosmos-EVAA-v3-8B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1916567657175103 - } - }, - { - "id": "jaspionjader/knf-2-8b_bfloat16_856a711ca04c93b85badb7eb147e691b50ab56dd_False", - "model": { - "name": "jaspionjader/knf-2-8b", - "sha": "856a711ca04c93b85badb7eb147e691b50ab56dd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.8072765120901, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42500121898784116, - "normalized_score": 42.500121898784116 - }, - "bbh": { - "name": "BBH", - "value": 0.5206718655559387, - "normalized_score": 31.224927696796726 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4185208333333333, - "normalized_score": 11.115104166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3874667553191489, - "normalized_score": 31.940750591016542 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "jaspionjader/knf-2-8b (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3370901305776082 - } - }, - { - "id": "jaspionjader/knfp-2-8b_bfloat16_912bc000c1e4855ac337e8f7f6573aa331098d90_False", - "model": { - "name": "jaspionjader/knfp-2-8b", - "sha": "912bc000c1e4855ac337e8f7f6573aa331098d90", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.672507998564374, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5327120928026525, - "normalized_score": 53.27120928026525 - }, - "bbh": { - "name": "BBH", - "value": 0.5304878011708133, - "normalized_score": 33.05584007121163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14274924471299094, - "normalized_score": 14.274924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.4184583333333333, - "normalized_score": 11.440624999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37258976063829785, - "normalized_score": 30.287751182033094 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "jaspionjader/knfp-2-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3830640815292654 - } - }, - { - "id": "jaspionjader/knfp-3-8b_bfloat16_6ca6f01737225171917e6cb4e819a1713f100614_False", - "model": { - "name": "jaspionjader/knfp-3-8b", - "sha": "6ca6f01737225171917e6cb4e819a1713f100614", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.99800729357214, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49456885508229276, - "normalized_score": 49.45688550822928 - }, - "bbh": { - "name": "BBH", - "value": 0.5199790073136731, - "normalized_score": 31.233992064698388 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41712499999999997, - "normalized_score": 11.440624999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3881316489361702, - "normalized_score": 32.014627659574465 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "jaspionjader/knfp-3-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.29315344644979 - } - }, - { - "id": "jaspionjader/kstc-1-8b_bfloat16_04fea34b48af65d0182cfb2741f625a8a2b66607_False", - "model": { - "name": "jaspionjader/kstc-1-8b", - "sha": "04fea34b48af65d0182cfb2741f625a8a2b66607", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.579746063684382, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4642936297911763, - "normalized_score": 46.42936297911763 - }, - "bbh": { - "name": "BBH", - "value": 0.5209048705325947, - "normalized_score": 31.351649338559994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4157916666666666, - "normalized_score": 10.90729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3892121010638298, - "normalized_score": 32.134677895981085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "jaspionjader/kstc-1-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.353302886543091 - } - }, - { - "id": "jaspionjader/kstc-11-8b_bfloat16_37f67969d19bd4c470fe72dc083c0bd140caa5d4_False", - "model": { - "name": "jaspionjader/kstc-11-8b", - "sha": "37f67969d19bd4c470fe72dc083c0bd140caa5d4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.422736110402607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4757343847657549, - "normalized_score": 47.57343847657549 - }, - "bbh": { - "name": "BBH", - "value": 0.5189389675805397, - "normalized_score": 31.116625700427637 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4117604166666667, - "normalized_score": 10.80338541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3878823138297872, - "normalized_score": 31.98692375886525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2441986131850935 - } - }, - { - "id": "jaspionjader/kstc-4-8b_bfloat16_3a363149231244e7ae48fbdec911c1a54f1de940_False", - "model": { - "name": "jaspionjader/kstc-4-8b", - "sha": "3a363149231244e7ae48fbdec911c1a54f1de940", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.583995424044815, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4769832932175517, - "normalized_score": 47.69832932175517 - }, - "bbh": { - "name": "BBH", - "value": 0.5216059333020012, - "normalized_score": 31.476699762140168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4117916666666666, - "normalized_score": 10.90729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3868849734042553, - "normalized_score": 31.876108156028373 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "jaspionjader/kstc-4-8b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.252027183144063 - } - }, - { - "id": "jaspionjader/kstc-5-8b_bfloat16_2d03032c74cfa248ef84fc9503c22eb6ee200098_False", - "model": { - "name": "jaspionjader/kstc-5-8b", - "sha": "2d03032c74cfa248ef84fc9503c22eb6ee200098", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.077626579224447, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47208739477918593, - "normalized_score": 47.208739477918584 - }, - "bbh": { - "name": "BBH", - "value": 0.5211438914491455, - "normalized_score": 31.373760830790683 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4223958333333333, - "normalized_score": 12.032812499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3892121010638298, - "normalized_score": 32.134677895981085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jaspionjader/kstc-5-8b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.231027975432848 - } - }, - { - "id": "jaspionjader/kstc-6-8b_bfloat16_3a0cda5b0b379790131a64407147ca01b520c342_False", - "model": { - "name": "jaspionjader/kstc-6-8b", - "sha": "3a0cda5b0b379790131a64407147ca01b520c342", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.838631190941225, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49439376410147295, - "normalized_score": 49.43937641014729 - }, - "bbh": { - "name": "BBH", - "value": 0.5230977287748603, - "normalized_score": 31.50593464386095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4104895833333333, - "normalized_score": 11.277864583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3857214095744681, - "normalized_score": 31.746823286052013 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2622968203867957 - } - }, - { - "id": "jaspionjader/kstc-8-8b_bfloat16_73eb9131c63ff4d7f753541870cb120c689730bc_False", - "model": { - "name": "jaspionjader/kstc-8-8b", - "sha": "73eb9131c63ff4d7f753541870cb120c689730bc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.195358668793858, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49097173278013445, - "normalized_score": 49.09717327801344 - }, - "bbh": { - "name": "BBH", - "value": 0.5238910223750602, - "normalized_score": 31.820932114012862 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.42112499999999997, - "normalized_score": 11.707291666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3888796542553192, - "normalized_score": 32.097739361702125 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2513218816498801 - } - }, - { - "id": "jaspionjader/kstc-9-8b_bfloat16_fe7fdf0a4e561b81843bd2074d0b4e7a2ac86240_False", - "model": { - "name": "jaspionjader/kstc-9-8b", - "sha": "fe7fdf0a4e561b81843bd2074d0b4e7a2ac86240", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.936405590737944, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4860758343417687, - "normalized_score": 48.607583434176874 - }, - "bbh": { - "name": "BBH", - "value": 0.5238366551736342, - "normalized_score": 31.77207937089541 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13595166163141995, - "normalized_score": 13.595166163141995 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4117916666666666, - "normalized_score": 10.90729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38721742021276595, - "normalized_score": 31.91304669030733 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3353232700936393 - } - }, - { - "id": "jaspionjader/slu-10_bfloat16_7fb721626cee9d3ce756d45577856378b89cce7e_False", - "model": { - "name": "jaspionjader/slu-10", - "sha": "7fb721626cee9d3ce756d45577856378b89cce7e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.55040710616896, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4359920566319587, - "normalized_score": 43.599205663195875 - }, - "bbh": { - "name": "BBH", - "value": 0.5096469529197213, - "normalized_score": 29.96246024103085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.3920104166666667, - "normalized_score": 7.901302083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3663563829787234, - "normalized_score": 29.595153664302604 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-10 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3183487838470793 - } - }, - { - "id": "jaspionjader/slu-11_bfloat16_88f36d10b9e730ef1169e0418b20146322eea1d8_False", - "model": { - "name": "jaspionjader/slu-11", - "sha": "88f36d10b9e730ef1169e0418b20146322eea1d8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.804382344048094, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.372519359743259, - "normalized_score": 37.251935974325896 - }, - "bbh": { - "name": "BBH", - "value": 0.4890236865115587, - "normalized_score": 28.134961733027723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3919479166666667, - "normalized_score": 8.226822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33818151595744683, - "normalized_score": 26.46461288416076 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-11 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1244251961237677 - } - }, - { - "id": "jaspionjader/slu-13_bfloat16_39e80ca36621971a8990bf3856ee90fcc9f6612d_False", - "model": { - "name": "jaspionjader/slu-13", - "sha": "39e80ca36621971a8990bf3856ee90fcc9f6612d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.904970185551083, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4378404854674486, - "normalized_score": 43.784048546744856 - }, - "bbh": { - "name": "BBH", - "value": 0.5097334543819346, - "normalized_score": 29.865275873551706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.38140625, - "normalized_score": 7.309114583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35804521276595747, - "normalized_score": 28.671690307328607 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-13 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2858861333071643 - } - }, - { - "id": "jaspionjader/slu-14_bfloat16_fe53372aaac2ae2ca734f795ef18abab135738a1_False", - "model": { - "name": "jaspionjader/slu-14", - "sha": "fe53372aaac2ae2ca734f795ef18abab135738a1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.889513928235285, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4106880853912065, - "normalized_score": 41.068808539120646 - }, - "bbh": { - "name": "BBH", - "value": 0.5088505978489455, - "normalized_score": 29.879580354102824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.3960416666666667, - "normalized_score": 7.738541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3626994680851064, - "normalized_score": 29.188829787234038 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-14 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2612141100902652 - } - }, - { - "id": "jaspionjader/slu-17_bfloat16_65cbd80b02a7f97c548af131e25184b932953e7f_False", - "model": { - "name": "jaspionjader/slu-17", - "sha": "65cbd80b02a7f97c548af131e25184b932953e7f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.804993679674656, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42167892303532406, - "normalized_score": 42.167892303532405 - }, - "bbh": { - "name": "BBH", - "value": 0.5070562055653275, - "normalized_score": 29.787844658593286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.3761041666666667, - "normalized_score": 7.413020833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3618683510638298, - "normalized_score": 29.096483451536642 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-17 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2847611084904456 - } - }, - { - "id": "jaspionjader/slu-2_bfloat16_2ad46e3787ba34222aa83f1c7cbd48259f658ea1_False", - "model": { - "name": "jaspionjader/slu-2", - "sha": "2ad46e3787ba34222aa83f1c7cbd48259f658ea1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.756717480195242, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40159554426698935, - "normalized_score": 40.15955442669893 - }, - "bbh": { - "name": "BBH", - "value": 0.5008068127974601, - "normalized_score": 28.879825492610465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.3958854166666667, - "normalized_score": 8.819010416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35064827127659576, - "normalized_score": 27.849807919621743 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2531850930850796 - } - }, - { - "id": "jaspionjader/slu-20_bfloat16_e636af1767c6f60fa0b5362010d8e5e000619458_False", - "model": { - "name": "jaspionjader/slu-20", - "sha": "e636af1767c6f60fa0b5362010d8e5e000619458", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.37957338953326, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4393143525844759, - "normalized_score": 43.93143525844759 - }, - "bbh": { - "name": "BBH", - "value": 0.5061273966566772, - "normalized_score": 29.24863516444493 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229606, - "normalized_score": 8.685800604229605 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.39334375000000005, - "normalized_score": 8.967968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36652260638297873, - "normalized_score": 29.613622931442084 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-20 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3226731418683069 - } - }, - { - "id": "jaspionjader/slu-22_bfloat16_94717cb95690e8de06c24f6fedc11a97ba0f704f_False", - "model": { - "name": "jaspionjader/slu-22", - "sha": "94717cb95690e8de06c24f6fedc11a97ba0f704f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.188978293763615, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4321201079801593, - "normalized_score": 43.21201079801593 - }, - "bbh": { - "name": "BBH", - "value": 0.5081790871805086, - "normalized_score": 29.539288054181537 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.38934375000000004, - "normalized_score": 8.167968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3650265957446808, - "normalized_score": 29.44739952718675 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-22 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2822667377774175 - } - }, - { - "id": "jaspionjader/slu-23_bfloat16_981504403eb1b288f0a10bb3fd7bf1ff5ba13df8_False", - "model": { - "name": "jaspionjader/slu-23", - "sha": "981504403eb1b288f0a10bb3fd7bf1ff5ba13df8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.05300469060442, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44780737332499987, - "normalized_score": 44.78073733249999 - }, - "bbh": { - "name": "BBH", - "value": 0.5131603005034272, - "normalized_score": 30.2907431370659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.40924999999999995, - "normalized_score": 10.256249999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3725066489361702, - "normalized_score": 30.27851654846335 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-23 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3414044761902386 - } - }, - { - "id": "jaspionjader/slu-25_bfloat16_0470c84ba9c1c516efb6b5b6f4e1e0fc4ee56af0_False", - "model": { - "name": "jaspionjader/slu-25", - "sha": "0470c84ba9c1c516efb6b5b6f4e1e0fc4ee56af0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.53675810500795, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4500303638789523, - "normalized_score": 45.00303638789523 - }, - "bbh": { - "name": "BBH", - "value": 0.5094887898349904, - "normalized_score": 29.95100639270016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.3946145833333334, - "normalized_score": 8.226822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3684341755319149, - "normalized_score": 29.826019503546092 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-25 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2431247679452595 - } - }, - { - "id": "jaspionjader/slu-29_bfloat16_15da91c95ca92d1ad6f3747c4d943138e4492090_False", - "model": { - "name": "jaspionjader/slu-29", - "sha": "15da91c95ca92d1ad6f3747c4d943138e4492090", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.424562623311605, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4430610779398662, - "normalized_score": 44.30610779398663 - }, - "bbh": { - "name": "BBH", - "value": 0.5096472519745161, - "normalized_score": 29.95867809359305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229606, - "normalized_score": 8.685800604229605 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.3933125, - "normalized_score": 8.330729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.366938164893617, - "normalized_score": 29.659796099290777 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-29 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3989189692021649 - } - }, - { - "id": "jaspionjader/slu-32_bfloat16_878be6d0004aba4c05dade9def452339cf78c75f_False", - "model": { - "name": "jaspionjader/slu-32", - "sha": "878be6d0004aba4c05dade9def452339cf78c75f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.29921319060769, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45155409868039026, - "normalized_score": 45.15540986803903 - }, - "bbh": { - "name": "BBH", - "value": 0.5167277162337642, - "normalized_score": 30.525740444353357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4039166666666667, - "normalized_score": 9.722916666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3765791223404255, - "normalized_score": 30.731013593380606 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-32 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.389078085306773 - } - }, - { - "id": "jaspionjader/slu-33_bfloat16_6ff20fee962f575e746ed728dc8e15effab908ad_False", - "model": { - "name": "jaspionjader/slu-33", - "sha": "6ff20fee962f575e746ed728dc8e15effab908ad", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.668822088893943, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4457339858242796, - "normalized_score": 44.573398582427956 - }, - "bbh": { - "name": "BBH", - "value": 0.5081308429202344, - "normalized_score": 29.520426724137007 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.38667708333333334, - "normalized_score": 7.901302083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3679355053191489, - "normalized_score": 29.770611702127653 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-33 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2767204967176065 - } - }, - { - "id": "jaspionjader/slu-34_bfloat16_17e05fb543a135faf323dc9b7bf7118af25fd791_False", - "model": { - "name": "jaspionjader/slu-34", - "sha": "17e05fb543a135faf323dc9b7bf7118af25fd791", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.496927670912054, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4350678422142138, - "normalized_score": 43.506784221421384 - }, - "bbh": { - "name": "BBH", - "value": 0.5077400809148992, - "normalized_score": 29.5585553986661 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.3880416666666667, - "normalized_score": 8.005208333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37200797872340424, - "normalized_score": 30.223108747044915 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-34 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3281678288401984 - } - }, - { - "id": "jaspionjader/slu-35_bfloat16_999a6819da634455cc83f7a2efd54aec1f3a07e0_False", - "model": { - "name": "jaspionjader/slu-35", - "sha": "999a6819da634455cc83f7a2efd54aec1f3a07e0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.561331301961175, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42417673993891764, - "normalized_score": 42.41767399389176 - }, - "bbh": { - "name": "BBH", - "value": 0.5103079759559944, - "normalized_score": 29.95432730687622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.39464583333333336, - "normalized_score": 8.864062500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3676030585106383, - "normalized_score": 29.733673167848696 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-35 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.317880945938176 - } - }, - { - "id": "jaspionjader/slu-36_bfloat16_5c0b9c0b4efd5715e98f39a8590ee8dcef3d2b30_False", - "model": { - "name": "jaspionjader/slu-36", - "sha": "5c0b9c0b4efd5715e98f39a8590ee8dcef3d2b30", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.82465906451858, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4518289250300314, - "normalized_score": 45.182892503003146 - }, - "bbh": { - "name": "BBH", - "value": 0.5087352369131289, - "normalized_score": 29.705289242535667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.3933125, - "normalized_score": 8.597395833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37109375, - "normalized_score": 30.12152777777778 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-36 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2942129275204786 - } - }, - { - "id": "jaspionjader/slu-37_bfloat16_94227455c1f45ae430312f1089074eb0b6c397b4_False", - "model": { - "name": "jaspionjader/slu-37", - "sha": "94227455c1f45ae430312f1089074eb0b6c397b4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.805877912918163, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4533526598314694, - "normalized_score": 45.33526598314694 - }, - "bbh": { - "name": "BBH", - "value": 0.5099854293096197, - "normalized_score": 29.873736188721313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.39464583333333336, - "normalized_score": 8.330729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3695146276595745, - "normalized_score": 29.94606973995272 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-37 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3346887529396667 - } - }, - { - "id": "jaspionjader/slu-6_bfloat16_c14606c7a876bab7160eb45ad0a1bc195e8d2c20_False", - "model": { - "name": "jaspionjader/slu-6", - "sha": "c14606c7a876bab7160eb45ad0a1bc195e8d2c20", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.953096684197828, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41166216749336204, - "normalized_score": 41.1662167493362 - }, - "bbh": { - "name": "BBH", - "value": 0.5098719666858446, - "normalized_score": 29.920194960710266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4066458333333334, - "normalized_score": 9.130729166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3611203457446808, - "normalized_score": 29.01337174940898 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.286933360341945 - } - }, - { - "id": "jaspionjader/slu-mix-1_bfloat16_e5dab5ddf9d0a1a6176a59bdc4bed670534132d0_False", - "model": { - "name": "jaspionjader/slu-mix-1", - "sha": "e5dab5ddf9d0a1a6176a59bdc4bed670534132d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.420400006262994, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45689991444921696, - "normalized_score": 45.6899914449217 - }, - "bbh": { - "name": "BBH", - "value": 0.5240269525191525, - "normalized_score": 31.758280422354996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 12.624999999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39303523936170215, - "normalized_score": 32.55947104018913 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "jaspionjader/slu-mix-1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.384987916328368 - } - }, - { - "id": "jaspionjader/sof-1_bfloat16_c01f9c9c996b29cea46c9229f0ddf73827c6e9be_False", - "model": { - "name": "jaspionjader/sof-1", - "sha": "c01f9c9c996b29cea46c9229f0ddf73827c6e9be", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.572264115945842, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4313709845432342, - "normalized_score": 43.137098454323414 - }, - "bbh": { - "name": "BBH", - "value": 0.5009822733212669, - "normalized_score": 28.67139572134592 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.40819791666666666, - "normalized_score": 9.458072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.367436835106383, - "normalized_score": 29.715203900709213 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "jaspionjader/sof-1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2627230607158582 - } - }, - { - "id": "jaspionjader/sof-10_bfloat16_33384701c303f4d75785b8b0d2c5ae6f0d95151c_False", - "model": { - "name": "jaspionjader/sof-10", - "sha": "33384701c303f4d75785b8b0d2c5ae6f0d95151c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.315168787914505, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46484328249045864, - "normalized_score": 46.484328249045866 - }, - "bbh": { - "name": "BBH", - "value": 0.5197177291754291, - "normalized_score": 30.894575246850525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.40906250000000005, - "normalized_score": 10.699479166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38738364361702127, - "normalized_score": 31.93151595744681 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "jaspionjader/sof-10 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.3102986005975679 - } - }, - { - "id": "jaspionjader/sof-3_bfloat16_7d226e4242c243f3e41ff18952e629348b20f70f_False", - "model": { - "name": "jaspionjader/sof-3", - "sha": "7d226e4242c243f3e41ff18952e629348b20f70f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.121574249978817, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46369410940748323, - "normalized_score": 46.369410940748324 - }, - "bbh": { - "name": "BBH", - "value": 0.5206072122413828, - "normalized_score": 30.909590596424703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12764350453172205, - "normalized_score": 12.764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.41312499999999996, - "normalized_score": 11.17395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "jaspionjader/sof-3 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2059052768508982 - } - }, - { - "id": "jaspionjader/sof-6_bfloat16_62c172c831d4164500dc124c68bec24222ad0218_False", - "model": { - "name": "jaspionjader/sof-6", - "sha": "62c172c831d4164500dc124c68bec24222ad0218", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.79773077631505, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4353925362482657, - "normalized_score": 43.53925362482657 - }, - "bbh": { - "name": "BBH", - "value": 0.5209098090521417, - "normalized_score": 31.091426922196035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.41706250000000006, - "normalized_score": 10.966145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3843916223404255, - "normalized_score": 31.599069148936167 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "jaspionjader/sof-6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.2643305811944732 - } - }, - { - "id": "jaspionjader/test-10_bfloat16_6a427c33b397017d04990efbae3180144c5c6f77_False", - "model": { - "name": "jaspionjader/test-10", - "sha": "6a427c33b397017d04990efbae3180144c5c6f77", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.988062713940256, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4578241288669619, - "normalized_score": 45.78241288669619 - }, - "bbh": { - "name": "BBH", - "value": 0.5316217442466934, - "normalized_score": 32.89618100381385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11404833836858005, - "normalized_score": 11.404833836858005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.42509375, - "normalized_score": 11.936718749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39361702127659576, - "normalized_score": 32.6241134751773 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2391132027399339 - } - }, - { - "id": "jaspionjader/test-11_bfloat16_66b1e5563dbd526855e90a6572a0e57e9a678138_False", - "model": { - "name": "jaspionjader/test-11", - "sha": "66b1e5563dbd526855e90a6572a0e57e9a678138", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.299110246815406, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45412727119598223, - "normalized_score": 45.41272711959822 - }, - "bbh": { - "name": "BBH", - "value": 0.5350048053167004, - "normalized_score": 33.37675179363524 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.429, - "normalized_score": 12.491666666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3939494680851064, - "normalized_score": 32.661052009456256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.215754502587888 - } - }, - { - "id": "jaspionjader/test-12_bfloat16_60009a5be317a833eee03eac2c4795dace5b1fd9_False", - "model": { - "name": "jaspionjader/test-12", - "sha": "60009a5be317a833eee03eac2c4795dace5b1fd9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.56243386883551, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4368165356808823, - "normalized_score": 43.68165356808823 - }, - "bbh": { - "name": "BBH", - "value": 0.5347063686599355, - "normalized_score": 33.309634149213345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.42503124999999997, - "normalized_score": 11.795572916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3935339095744681, - "normalized_score": 32.61487884160757 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2049300158577165 - } - }, - { - "id": "jaspionjader/test-13_bfloat16_563c22d523380876af9a12526ba6d18785d7d3fa_False", - "model": { - "name": "jaspionjader/test-13", - "sha": "563c22d523380876af9a12526ba6d18785d7d3fa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.805351597197618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45809895521660304, - "normalized_score": 45.80989552166031 - }, - "bbh": { - "name": "BBH", - "value": 0.531808681066841, - "normalized_score": 32.89714888833163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4263958333333333, - "normalized_score": 12.099479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3935339095744681, - "normalized_score": 32.61487884160757 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.252774550092542 - } - }, - { - "id": "jaspionjader/test-14_bfloat16_6e2e7b4b01c91e66ffede07f839845ca798d7697_False", - "model": { - "name": "jaspionjader/test-14", - "sha": "6e2e7b4b01c91e66ffede07f839845ca798d7697", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.771103301019078, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4443853420036614, - "normalized_score": 44.438534200366135 - }, - "bbh": { - "name": "BBH", - "value": 0.5322932549151301, - "normalized_score": 33.002110558637106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.4316979166666666, - "normalized_score": 12.995572916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3929521276595745, - "normalized_score": 32.55023640661938 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2374676050609221 - } - }, - { - "id": "jaspionjader/test-15_bfloat16_7a5db00581e84a1839c7cd97ee7f5004cdd61c90_False", - "model": { - "name": "jaspionjader/test-15", - "sha": "7a5db00581e84a1839c7cd97ee7f5004cdd61c90", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.48260697104523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4364918416468304, - "normalized_score": 43.64918416468304 - }, - "bbh": { - "name": "BBH", - "value": 0.53278841091336, - "normalized_score": 33.10384984912975 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4264270833333333, - "normalized_score": 12.136718749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3929521276595745, - "normalized_score": 32.55023640661938 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2513938577310615 - } - }, - { - "id": "jaspionjader/test-16_bfloat16_85f4804b908f53a750c0788fafaed220d9757131_False", - "model": { - "name": "jaspionjader/test-16", - "sha": "85f4804b908f53a750c0788fafaed220d9757131", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.76530715829608, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4599473840520929, - "normalized_score": 45.99473840520929 - }, - "bbh": { - "name": "BBH", - "value": 0.5330160713144172, - "normalized_score": 33.07756163805877 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4224583333333333, - "normalized_score": 11.507291666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39303523936170215, - "normalized_score": 32.55947104018913 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.216782641676609 - } - }, - { - "id": "jaspionjader/test-17_bfloat16_41596590689dadfd0951fe4c7c581ff4b47f2efb_False", - "model": { - "name": "jaspionjader/test-17", - "sha": "41596590689dadfd0951fe4c7c581ff4b47f2efb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.366597715396797, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42674991245450955, - "normalized_score": 42.674991245450954 - }, - "bbh": { - "name": "BBH", - "value": 0.5329373895863633, - "normalized_score": 33.14214119658744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11027190332326284, - "normalized_score": 11.027190332326283 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.429, - "normalized_score": 12.424999999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39286901595744683, - "normalized_score": 32.54100177304965 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.015, - "co2_cost": 1.2887432946876372 - } - }, - { - "id": "jaspionjader/test-18_bfloat16_5d8d46ac16a60d5b7409c5852946973bb551f9e5_False", - "model": { - "name": "jaspionjader/test-18", - "sha": "5d8d46ac16a60d5b7409c5852946973bb551f9e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.499236856418005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43916474953124374, - "normalized_score": 43.91647495312438 - }, - "bbh": { - "name": "BBH", - "value": 0.5317453097096507, - "normalized_score": 32.86222851906328 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 11.899479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39303523936170215, - "normalized_score": 32.55947104018913 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/test-18 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2185558146482152 - } - }, - { - "id": "jaspionjader/test-19_bfloat16_fcb034b4d93dbcc163066d4fdcab6e41f7d225e9_False", - "model": { - "name": "jaspionjader/test-19", - "sha": "fcb034b4d93dbcc163066d4fdcab6e41f7d225e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.419129160518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44008896394898867, - "normalized_score": 44.00889639489887 - }, - "bbh": { - "name": "BBH", - "value": 0.5319373895863634, - "normalized_score": 32.971901545171306 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4263958333333333, - "normalized_score": 12.099479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39286901595744683, - "normalized_score": 32.54100177304965 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/test-19 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2434502883277698 - } - }, - { - "id": "jaspionjader/test-20_bfloat16_439bc991a6cb4b6ce44bd29c969d22cc8ce3759b_False", - "model": { - "name": "jaspionjader/test-20", - "sha": "439bc991a6cb4b6ce44bd29c969d22cc8ce3759b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.71584434757474, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45292823042859615, - "normalized_score": 45.29282304285961 - }, - "bbh": { - "name": "BBH", - "value": 0.5327388877137041, - "normalized_score": 33.05064343643468 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 11.832812499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39195478723404253, - "normalized_score": 32.439420803782504 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jaspionjader/test-20 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.2302277666575685 - } - }, - { - "id": "jayasuryajsk/Qwen2.5-3B-reasoner_float16_a521780c1e5c94868b5c1e3ff220bb4579d6ab6e_False", - "model": { - "name": "jayasuryajsk/Qwen2.5-3B-reasoner", - "sha": "a521780c1e5c94868b5c1e3ff220bb4579d6ab6e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.088475922135004, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4159585455480348, - "normalized_score": 41.59585455480348 - }, - "bbh": { - "name": "BBH", - "value": 0.46511772991620703, - "normalized_score": 25.2688279392506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2084592145015106, - "normalized_score": 20.84592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.41229166666666667, - "normalized_score": 10.303125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3482380319148936, - "normalized_score": 27.582003546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-02-07", - "generation": 3, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7555740737503304 - } - }, - { - "id": "jeanmichela/o-distil-qwen_float16_de0828ff3d5e1d7490713c158094974944b08f01_False", - "model": { - "name": "jeanmichela/o-distil-qwen", - "sha": "de0828ff3d5e1d7490713c158094974944b08f01", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.35229225785721, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44823180272787316, - "normalized_score": 44.82318027278731 - }, - "bbh": { - "name": "BBH", - "value": 0.5900367438200601, - "normalized_score": 40.679114887508604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5649546827794562, - "normalized_score": 56.49546827794561 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.5339895833333334, - "normalized_score": 28.348697916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46575797872340424, - "normalized_score": 40.639775413711575 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "jeanmichela/o-distil-qwen (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 1.8877461349596358 - } - }, - { - "id": "jebcarter/psyonic-cetacean-20B_float16_298d2086a949d53af06096d229f64f4719261698_False", - "model": { - "name": "jebcarter/psyonic-cetacean-20B", - "sha": "298d2086a949d53af06096d229f64f4719261698", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.012258737866414, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25436619281284767, - "normalized_score": 25.43661928128477 - }, - "bbh": { - "name": "BBH", - "value": 0.4907386156835858, - "normalized_score": 27.843060379681305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.46611458333333333, - "normalized_score": 16.897656249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28856382978723405, - "normalized_score": 20.951536643026003 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-11-28", - "submission_date": "2024-06-30", - "generation": 0, - "base_model": "jebcarter/psyonic-cetacean-20B", - "hub_license": "other", - "hub_hearts": 40, - "params_billions": 19.994, - "co2_cost": 4.289896326622158 - } - }, - { - "id": "jebish7/Llama-3-Nanda-10B-Chat_bfloat16_6866e1c85dcae415f37186c705831a179cbd2845_False", - "model": { - "name": "jebish7/Llama-3-Nanda-10B-Chat", - "sha": "6866e1c85dcae415f37186c705831a179cbd2845", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.064509209714284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2952831819572069, - "normalized_score": 29.52831819572069 - }, - "bbh": { - "name": "BBH", - "value": 0.4958605204321644, - "normalized_score": 29.600465622828253 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4356041666666666, - "normalized_score": 12.883854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3156582446808511, - "normalized_score": 23.96202718676123 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 0, - "base_model": "jebish7/Llama-3-Nanda-10B-Chat", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.985, - "co2_cost": 1.016650032181119 - } - }, - { - "id": "jebish7/Llama-3.1-8B-Instruct_bfloat16_c31aabd088d023ac58f11e2539eece7fbdce9920_False", - "model": { - "name": "jebish7/Llama-3.1-8B-Instruct", - "sha": "c31aabd088d023ac58f11e2539eece7fbdce9920", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.022312491744717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5058345190760515, - "normalized_score": 50.58345190760515 - }, - "bbh": { - "name": "BBH", - "value": 0.5088388495224864, - "normalized_score": 29.191619249410266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15483383685800603, - "normalized_score": 15.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.3997916666666667, - "normalized_score": 8.507291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3777426861702128, - "normalized_score": 30.860298463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.44099168045061 - } - }, - { - "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Base_bfloat16_cbec9d08f0257eccd22606160d35f3dfca4fff56_False", - "model": { - "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", - "sha": "cbec9d08f0257eccd22606160d35f3dfca4fff56", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "NemotronForCausalLM", - "average_score": 12.020294543391651, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22848818911599, - "normalized_score": 22.848818911599004 - }, - "bbh": { - "name": "BBH", - "value": 0.3923566745600671, - "normalized_score": 14.862755734528113 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.42490625, - "normalized_score": 10.51328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25033244680851063, - "normalized_score": 16.703605200945624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.191, - "co2_cost": 2.0685416017226625 - } - }, - { - "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct_bfloat16_1cf6f7d5054a18aa894746b6f3085bb8e07bb26c_False", - "model": { - "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", - "sha": "1cf6f7d5054a18aa894746b6f3085bb8e07bb26c", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "NemotronForCausalLM", - "average_score": 14.629004135847653, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3345257250761313, - "normalized_score": 33.452572507613134 - }, - "bbh": { - "name": "BBH", - "value": 0.4040596055988545, - "normalized_score": 16.39051081744601 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.41529166666666667, - "normalized_score": 9.511458333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25947473404255317, - "normalized_score": 17.719414893617017 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.191, - "co2_cost": 1.9681587752820158 - } - }, - { - "id": "jebish7/Nemotron-Mini-4B-Instruct_bfloat16_b2dd4565b9d2186566336ed1e8a0318358b446bc_False", - "model": { - "name": "jebish7/Nemotron-Mini-4B-Instruct", - "sha": "b2dd4565b9d2186566336ed1e8a0318358b446bc", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "NemotronForCausalLM", - "average_score": 16.90052524850467, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37092026932982264, - "normalized_score": 37.092026932982264 - }, - "bbh": { - "name": "BBH", - "value": 0.4244475437312765, - "normalized_score": 19.432876400775246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.47271875, - "normalized_score": 18.35651041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27825797872340424, - "normalized_score": 19.806442080378247 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "jebish7/Nemotron-Mini-4B-Instruct", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.191, - "co2_cost": 2.8120929582167884 - } - }, - { - "id": "jebish7/aya-expanse-8b_bfloat16_31a967d26697e7d41e7625cee09ccf60484a7719_False", - "model": { - "name": "jebish7/aya-expanse-8b", - "sha": "31a967d26697e7d41e7625cee09ccf60484a7719", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "CohereForCausalLM", - "average_score": 18.10730460297795, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37911408396388246, - "normalized_score": 37.911408396388246 - }, - "bbh": { - "name": "BBH", - "value": 0.496904421264497, - "normalized_score": 28.243958088217383 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3868958333333334, - "normalized_score": 6.495312500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31025598404255317, - "normalized_score": 23.361776004728128 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "jebish7/aya-expanse-8b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 8.028, - "co2_cost": 1.7394495348237953 - } - }, - { - "id": "jebish7/gemma-2-2b-it_bfloat16_275bb885ea2f325e6f05249ad00a52f2e8cb0691_False", - "model": { - "name": "jebish7/gemma-2-2b-it", - "sha": "275bb885ea2f325e6f05249ad00a52f2e8cb0691", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 12.36333618408293, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12717035244263, - "normalized_score": 12.717035244263 - }, - "bbh": { - "name": "BBH", - "value": 0.43951564907099594, - "normalized_score": 20.91911520531148 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.42444791666666665, - "normalized_score": 11.822656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27152593085106386, - "normalized_score": 19.058436761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.4269121183326474 - } - }, - { - "id": "jebish7/gemma-2-9b-it_bfloat16_e6111e3d6933874b2a3f448a4a769617ca0cc31b_False", - "model": { - "name": "jebish7/gemma-2-9b-it", - "sha": "e6111e3d6933874b2a3f448a4a769617ca0cc31b", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.76298915231794, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1557467519514887, - "normalized_score": 15.57467519514887 - }, - "bbh": { - "name": "BBH", - "value": 0.5949210568047724, - "normalized_score": 42.41412690099088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.4554479166666667, - "normalized_score": 16.23098958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.414311835106383, - "normalized_score": 34.92353723404255 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 5.890885756746063 - } - }, - { - "id": "jebish7/qwen2.5-0.5B-IHA-Hin_bfloat16_edf269b7cceddd36095abe4e06c1653272558621_False", - "model": { - "name": "jebish7/qwen2.5-0.5B-IHA-Hin", - "sha": "edf269b7cceddd36095abe4e06c1653272558621", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.251315269739358, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14163419726326149, - "normalized_score": 14.163419726326149 - }, - "bbh": { - "name": "BBH", - "value": 0.29891753632624085, - "normalized_score": 2.697703505644682 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.34748958333333335, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.109375, - "normalized_score": 1.041666666666666 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-11", - "generation": 0, - "base_model": "jebish7/qwen2.5-0.5B-IHA-Hin", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.9351933586481094 - } - }, - { - "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0_bfloat16_245a9a038ea9cfdc214a5e24a2e7ff9362f56b4a_False", - "model": { - "name": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", - "sha": "245a9a038ea9cfdc214a5e24a2e7ff9362f56b4a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.82039597916994, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6135952605752737, - "normalized_score": 61.35952605752736 - }, - "bbh": { - "name": "BBH", - "value": 0.5421083753999172, - "normalized_score": 34.18632007953577 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28700906344410876, - "normalized_score": 28.700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.47929166666666667, - "normalized_score": 18.911458333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4362533244680851, - "normalized_score": 37.3614804964539 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-10-30", - "generation": 1, - "base_model": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.4146738803403005 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2_bfloat16_d7ba08c49f9e13e65b0abbf8539037f0712233c2_True", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-minperplexity-2", - "sha": "d7ba08c49f9e13e65b0abbf8539037f0712233c2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.04145905845039, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.509730847484674, - "normalized_score": 50.9730847484674 - }, - "bbh": { - "name": "BBH", - "value": 0.552390586276348, - "normalized_score": 36.89009035044375 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3013595166163142, - "normalized_score": 30.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.46245833333333336, - "normalized_score": 16.907291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4345910904255319, - "normalized_score": 37.1767878250591 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-minperplexity-2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.0, - "co2_cost": 1.9565629076320818 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9_bfloat16_7eb2a19e13fb32c1bab751eb89fed33f6c66b4e6_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", - "sha": "7eb2a19e13fb32c1bab751eb89fed33f6c66b4e6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.896915034467217, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6048274134851084, - "normalized_score": 60.48274134851084 - }, - "bbh": { - "name": "BBH", - "value": 0.5469701834138724, - "normalized_score": 34.79159883396676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2945619335347432, - "normalized_score": 29.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.48198958333333336, - "normalized_score": 19.548697916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4363364361702128, - "normalized_score": 37.37071513002365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-02", - "submission_date": "2024-11-13", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.446062128659443 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0_bfloat16_8f478661c654990358904e2159252d5c5236b80f_True", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", - "sha": "8f478661c654990358904e2159252d5c5236b80f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.19369887340693, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7695159953368174, - "normalized_score": 76.95159953368174 - }, - "bbh": { - "name": "BBH", - "value": 0.541762771903226, - "normalized_score": 34.737157075604436 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47129909365558914, - "normalized_score": 47.129909365558916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4551145833333334, - "normalized_score": 16.822656250000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4253656914893617, - "normalized_score": 36.151743498817964 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.4631763547494614 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1_bfloat16_e757ba9e4c1a5a43ba3a3e98b44ebbbfe7bf831a_True", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", - "sha": "e757ba9e4c1a5a43ba3a3e98b44ebbbfe7bf831a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.684283405688316, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6626296005709296, - "normalized_score": 66.26296005709295 - }, - "bbh": { - "name": "BBH", - "value": 0.48640249867140106, - "normalized_score": 26.661152209882328 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.38429166666666664, - "normalized_score": 5.303125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3849734042553192, - "normalized_score": 31.663711583924346 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-09", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.368201065409928 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2_bfloat16_8ba84532e3eea17c821f96f3e80bec7c9d8b3799_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", - "sha": "8ba84532e3eea17c821f96f3e80bec7c9d8b3799", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.398595020008255, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49646715160219335, - "normalized_score": 49.64671516021933 - }, - "bbh": { - "name": "BBH", - "value": 0.494592979290867, - "normalized_score": 27.660911941379897 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.41724999999999995, - "normalized_score": 10.856249999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3968583776595745, - "normalized_score": 32.98426418439716 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-09", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.251827084819927 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3_bfloat16_db61c49ae128777c4b893ab544975df349052d66_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", - "sha": "db61c49ae128777c4b893ab544975df349052d66", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.06198639675365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49951462120506923, - "normalized_score": 49.95146212050692 - }, - "bbh": { - "name": "BBH", - "value": 0.5026055485090198, - "normalized_score": 28.900263386108733 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.41873958333333333, - "normalized_score": 11.309114583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4015957446808511, - "normalized_score": 33.51063829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-03", - "submission_date": "2024-11-09", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 1.230700447758749 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4_bfloat16_7da97922062bae96d0e694fbd3a5f1c06cf375b6_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", - "sha": "7da97922062bae96d0e694fbd3a5f1c06cf375b6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.56603265287841, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6078748830879843, - "normalized_score": 60.78748830879843 - }, - "bbh": { - "name": "BBH", - "value": 0.5467076263362468, - "normalized_score": 34.85601776135507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2809667673716012, - "normalized_score": 28.09667673716012 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.47138541666666667, - "normalized_score": 17.82317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44190492021276595, - "normalized_score": 37.98943557919622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3226161315672365 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5_bfloat16_055cf43cab9027de7e548728dc231afea6a3dfd1_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", - "sha": "055cf43cab9027de7e548728dc231afea6a3dfd1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.834944840883523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5650352176669016, - "normalized_score": 56.50352176669016 - }, - "bbh": { - "name": "BBH", - "value": 0.5522599149696679, - "normalized_score": 35.92532095316597 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2756797583081571, - "normalized_score": 27.567975830815712 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.49820833333333336, - "normalized_score": 22.409375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44481382978723405, - "normalized_score": 38.31264775413712 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3682022783351009 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7_bfloat16_a27917d12ac64d91a86afee9953bce6d1a9b6424_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", - "sha": "a27917d12ac64d91a86afee9953bce6d1a9b6424", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.677649337210752, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4201551882338861, - "normalized_score": 42.01551882338861 - }, - "bbh": { - "name": "BBH", - "value": 0.5391718355132782, - "normalized_score": 33.83351101288335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.48484375, - "normalized_score": 20.77213541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42802526595744683, - "normalized_score": 36.44725177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.329739617643981 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8_bfloat16_31828a485faf437ff56a00769c1192a60f740ccf_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", - "sha": "31828a485faf437ff56a00769c1192a60f740ccf", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.635927180803225, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6255601803215468, - "normalized_score": 62.55601803215468 - }, - "bbh": { - "name": "BBH", - "value": 0.5446899383425835, - "normalized_score": 34.6046086730829 - }, - "math": { - "name": "MATH Level 5", - "value": 0.270392749244713, - "normalized_score": 27.0392749244713 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.47671875, - "normalized_score": 18.62317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4343417553191489, - "normalized_score": 37.149083924349874 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.3407312154562863 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0_bfloat16_b8fffe13667329448267eba531f69c02bf7023f7_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.0", - "sha": "b8fffe13667329448267eba531f69c02bf7023f7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.108563094878495, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5331365222055258, - "normalized_score": 53.313652220552584 - }, - "bbh": { - "name": "BBH", - "value": 0.5659918212629057, - "normalized_score": 38.22235774647793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2862537764350453, - "normalized_score": 28.625377643504528 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.42776041666666664, - "normalized_score": 11.470052083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4566156914893617, - "normalized_score": 39.623965721040186 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-olm-v1.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.8342811688561649 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1_bfloat16_7766302f72fe26d401f37fdeaa37cf5d4a5b183d_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.1", - "sha": "7766302f72fe26d401f37fdeaa37cf5d4a5b183d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.253116755964555, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4329445870290828, - "normalized_score": 43.29445870290829 - }, - "bbh": { - "name": "BBH", - "value": 0.5478077656573704, - "normalized_score": 35.43051618397058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38293051359516617, - "normalized_score": 38.29305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.48081250000000003, - "normalized_score": 19.4015625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4354222074468085, - "normalized_score": 37.2691341607565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-olm-v1.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.245356561264993 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2_bfloat16_f48d6dd328dc60ccec91234b8240e2be6cc4e599_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.2", - "sha": "f48d6dd328dc60ccec91234b8240e2be6cc4e599", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.53753917623895, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42025492360270744, - "normalized_score": 42.025492360270746 - }, - "bbh": { - "name": "BBH", - "value": 0.5533340429711561, - "normalized_score": 36.44070084062748 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2847432024169184, - "normalized_score": 28.474320241691842 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.46878125, - "normalized_score": 17.697656249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-olm-v1.2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.228986153444949 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3_bfloat16_07d2bc3e931a0e20f2de32a6582b52f345be1425_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.3", - "sha": "07d2bc3e931a0e20f2de32a6582b52f345be1425", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.2967667117791, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4218540140161438, - "normalized_score": 42.185401401614385 - }, - "bbh": { - "name": "BBH", - "value": 0.5531852688351706, - "normalized_score": 36.40248084197368 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3104229607250755, - "normalized_score": 31.042296072507554 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4700520833333333, - "normalized_score": 18.08984375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44697473404255317, - "normalized_score": 38.55274822695035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-18", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-olm-v1.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2636960715772714 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4_bfloat16_63f9f26dbaee773f651eb95b66a110604934dfb1_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.4", - "sha": "63f9f26dbaee773f651eb95b66a110604934dfb1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.182337218829193, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4545018329144448, - "normalized_score": 45.450183291444475 - }, - "bbh": { - "name": "BBH", - "value": 0.5581962445576828, - "normalized_score": 36.64655745900427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29229607250755285, - "normalized_score": 29.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.46220833333333333, - "normalized_score": 17.07604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4457280585106383, - "normalized_score": 38.41422872340425 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-olm-v1.4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2292866933893654 - } - }, - { - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5_bfloat16_ad31c67411c01905d510728f03da46d51fe75d3b_False", - "model": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.5", - "sha": "ad31c67411c01905d510728f03da46d51fe75d3b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.23575275519298, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4546514359676769, - "normalized_score": 45.46514359676769 - }, - "bbh": { - "name": "BBH", - "value": 0.5543943528577703, - "normalized_score": 36.63313094998246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28172205438066467, - "normalized_score": 28.172205438066467 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.4539270833333333, - "normalized_score": 15.407552083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43991023936170215, - "normalized_score": 37.76780437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "jeffmeloy/Qwen2.5-7B-olm-v1.5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.025089690934133 - } - }, - { - "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1_bfloat16_8ae87cfb63b5afcb0a8aca3b7ba9b8044cc3ba0e_False", - "model": { - "name": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", - "sha": "8ae87cfb63b5afcb0a8aca3b7ba9b8044cc3ba0e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.475832080246903, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37571643239936703, - "normalized_score": 37.571643239936705 - }, - "bbh": { - "name": "BBH", - "value": 0.5582354546195324, - "normalized_score": 37.821507463733965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.42903125, - "normalized_score": 11.928906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4367519946808511, - "normalized_score": 37.41688829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.8395922038311505 - } - }, - { - "id": "jeonsworld/CarbonVillain-en-10.7B-v4_float16_57d6ad4d705d336aba228356683d9f221507440a_True", - "model": { - "name": "jeonsworld/CarbonVillain-en-10.7B-v4", - "sha": "57d6ad4d705d336aba228356683d9f221507440a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.328676385842854, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45792386423578324, - "normalized_score": 45.79238642357833 - }, - "bbh": { - "name": "BBH", - "value": 0.516795955873779, - "normalized_score": 31.80563982727619 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.3965416666666666, - "normalized_score": 8.401041666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31416223404255317, - "normalized_score": 23.795803782505907 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "jeonsworld/CarbonVillain-en-10.7B-v4", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 6, - "params_billions": 10.732, - "co2_cost": 1.5394617546627765 - } - }, - { - "id": "jiangxinyang-shanda/Homer-LLama3-8B_bfloat16_550cdaea5feac5df9b0984bda14d00570daa4437_True", - "model": { - "name": "jiangxinyang-shanda/Homer-LLama3-8B", - "sha": "550cdaea5feac5df9b0984bda14d00570daa4437", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.91583079813962, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3991719748046295, - "normalized_score": 39.917197480462946 - }, - "bbh": { - "name": "BBH", - "value": 0.5173242047543128, - "normalized_score": 31.69897508701321 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.40562499999999996, - "normalized_score": 9.236458333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3139128989361702, - "normalized_score": 23.768099881796687 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.392852021486215 - } - }, - { - "id": "jieliu/Storm-7B_bfloat16_71edab8ee6c2578e428b0359158fb0d43133e989_False", - "model": { - "name": "jieliu/Storm-7B", - "sha": "71edab8ee6c2578e428b0359158fb0d43133e989", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.763877835116066, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3424192254329623, - "normalized_score": 34.24192254329623 - }, - "bbh": { - "name": "BBH", - "value": 0.5187285371254579, - "normalized_score": 32.33028437916087 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4428958333333333, - "normalized_score": 14.628645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3119182180851064, - "normalized_score": 23.54646867612293 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-06-26", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 41, - "params_billions": 7.242, - "co2_cost": 1.2238269795492844 - } - }, - { - "id": "jiviai/medX_v2_float16_6df3f9b25063b6ec4aba54341afccd39f0bf98c8_False", - "model": { - "name": "jiviai/medX_v2", - "sha": "6df3f9b25063b6ec4aba54341afccd39f0bf98c8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.31888331901421, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37431792089433813, - "normalized_score": 37.43179208943381 - }, - "bbh": { - "name": "BBH", - "value": 0.4508721125093523, - "normalized_score": 21.433009954500687 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.34984375, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34283577127659576, - "normalized_score": 26.981752364066192 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "jiviai/medX_v2", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 8.03, - "co2_cost": 1.2845304472152206 - } - }, - { - "id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625_float16_94c17462abd991aed582121532dabf867f1b5bce_True", - "model": { - "name": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", - "sha": "94c17462abd991aed582121532dabf867f1b5bce", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.5491918072634, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35575827692744144, - "normalized_score": 35.57582769274414 - }, - "bbh": { - "name": "BBH", - "value": 0.4773774601029352, - "normalized_score": 26.914542436180227 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13670694864048338, - "normalized_score": 13.670694864048338 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.39809374999999997, - "normalized_score": 8.128385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3198969414893617, - "normalized_score": 24.432993498817968 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 2.3544383059212852 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01_bfloat16_f4ebbf27d586e94c63f0a7293f565cbd947b824f_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", - "sha": "f4ebbf27d586e94c63f0a7293f565cbd947b824f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.442126802141008, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42712447417297217, - "normalized_score": 42.71244741729721 - }, - "bbh": { - "name": "BBH", - "value": 0.5035519809362171, - "normalized_score": 29.550013804457734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4637604166666667, - "normalized_score": 17.80338541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37391954787234044, - "normalized_score": 30.43550531914893 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9870363955915202 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1_bfloat16_66c7330e9d04b13a68ea7dcf25bc0a71d144221a_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", - "sha": "66c7330e9d04b13a68ea7dcf25bc0a71d144221a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.44635595224064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42532591302189304, - "normalized_score": 42.5325913021893 - }, - "bbh": { - "name": "BBH", - "value": 0.5018845446835877, - "normalized_score": 28.607718394442788 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.41502083333333334, - "normalized_score": 10.777604166666672 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37242353723404253, - "normalized_score": 30.269281914893615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.61644064497357 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01_bfloat16_4a432be239528ffc654955338982f1f32eb12901_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", - "sha": "4a432be239528ffc654955338982f1f32eb12901", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.2797757714617, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33774828565982706, - "normalized_score": 33.7748285659827 - }, - "bbh": { - "name": "BBH", - "value": 0.4917135045463188, - "normalized_score": 28.135682301211705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.5017708333333334, - "normalized_score": 22.288020833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3533078457446808, - "normalized_score": 28.14531619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.0731261031285424 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1_bfloat16_d6f8ed8dc4b7f74b4312bc0d24aaac275c61958d_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", - "sha": "d6f8ed8dc4b7f74b4312bc0d24aaac275c61958d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.833220529017808, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4273993005226133, - "normalized_score": 42.73993005226133 - }, - "bbh": { - "name": "BBH", - "value": 0.5125777877188348, - "normalized_score": 30.51494334374904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.42264583333333333, - "normalized_score": 11.397395833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37391954787234044, - "normalized_score": 30.43550531914893 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6365972948023049 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01_bfloat16_6ab1392c825907b08eff8fbed4c97a3e6e0d6dd9_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", - "sha": "6ab1392c825907b08eff8fbed4c97a3e6e0d6dd9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.44753152983463, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32036219453272874, - "normalized_score": 32.03621945327288 - }, - "bbh": { - "name": "BBH", - "value": 0.48835763921755193, - "normalized_score": 27.665794638508448 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.5097708333333334, - "normalized_score": 23.621354166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33444148936170215, - "normalized_score": 26.049054373522463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.055803756066905 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1_bfloat16_a481edaceeaab34f4dc0e90c4d8ec0f72658bbdd_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", - "sha": "a481edaceeaab34f4dc0e90c4d8ec0f72658bbdd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.371145290205714, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43963904661852776, - "normalized_score": 43.96390466185278 - }, - "bbh": { - "name": "BBH", - "value": 0.5140041302485145, - "normalized_score": 30.854731427683628 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.43979166666666664, - "normalized_score": 13.840625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36959773936170215, - "normalized_score": 29.955304373522463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6302536002576227 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01_bfloat16_61f4b44fb917cdb46f0ade9f8fc2a382e0cf67af_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", - "sha": "61f4b44fb917cdb46f0ade9f8fc2a382e0cf67af", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.480197691407646, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2814443454478561, - "normalized_score": 28.14443454478561 - }, - "bbh": { - "name": "BBH", - "value": 0.4854325756272537, - "normalized_score": 27.16443115792157 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.5163125000000001, - "normalized_score": 24.472395833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3295378989361702, - "normalized_score": 25.5042109929078 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.048876296373344 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1_bfloat16_139a9bccd0ffb284e670a181a5986a01b1420c6c_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", - "sha": "139a9bccd0ffb284e670a181a5986a01b1420c6c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.77274590923878, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4302218114602588, - "normalized_score": 43.02218114602588 - }, - "bbh": { - "name": "BBH", - "value": 0.5157097379648965, - "normalized_score": 31.163507714744895 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.43315624999999996, - "normalized_score": 12.877864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36627327127659576, - "normalized_score": 29.58591903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.868261638719137 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01_bfloat16_c88c6b65f751156e7bc04c738947387eb55747e9_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", - "sha": "c88c6b65f751156e7bc04c738947387eb55747e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.508789697368925, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2789963962286732, - "normalized_score": 27.899639622867326 - }, - "bbh": { - "name": "BBH", - "value": 0.48611535229340735, - "normalized_score": 27.22486881424896 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.5150104166666667, - "normalized_score": 24.242968750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3304521276595745, - "normalized_score": 25.60579196217494 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.039531586586898 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1_bfloat16_818f7e586444b551200862fb234c39bd48d69ae8_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", - "sha": "818f7e586444b551200862fb234c39bd48d69ae8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.931665888548704, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4222784434190171, - "normalized_score": 42.22784434190171 - }, - "bbh": { - "name": "BBH", - "value": 0.5153764046315631, - "normalized_score": 31.124765884679533 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4384270833333333, - "normalized_score": 13.670052083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3650265957446808, - "normalized_score": 29.44739952718675 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-08", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8704515446304006 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01_bfloat16_861347cd643d396877d8e560367cf0717c671228_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", - "sha": "861347cd643d396877d8e560367cf0717c671228", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.174229898217675, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4358923212631374, - "normalized_score": 43.58923212631373 - }, - "bbh": { - "name": "BBH", - "value": 0.5040935986635269, - "normalized_score": 29.530012820716838 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.45315625, - "normalized_score": 16.344531249999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3762466755319149, - "normalized_score": 30.69407505910165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.069728793671311 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1_bfloat16_2647bc863e6ee686e7174366107eecbd4b37f62e_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", - "sha": "2647bc863e6ee686e7174366107eecbd4b37f62e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.303402130290717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4201551882338861, - "normalized_score": 42.01551882338861 - }, - "bbh": { - "name": "BBH", - "value": 0.501124270710985, - "normalized_score": 28.504906370089667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.41502083333333334, - "normalized_score": 10.777604166666672 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3699301861702128, - "normalized_score": 29.99224290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6826316614050705 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01_bfloat16_fa77530fe3723d7b15b06b88c3ca6110a8421742_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", - "sha": "fa77530fe3723d7b15b06b88c3ca6110a8421742", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.586572385911417, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35178659290682057, - "normalized_score": 35.178659290682056 - }, - "bbh": { - "name": "BBH", - "value": 0.49985217584312186, - "normalized_score": 29.136918888444114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.48710416666666667, - "normalized_score": 20.3546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3611203457446808, - "normalized_score": 29.01337174940898 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.1115852623492612 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1_bfloat16_6fe73aa7f9c5b59297739166e9557089d39e5fc7_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", - "sha": "6fe73aa7f9c5b59297739166e9557089d39e5fc7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.723601789562878, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42038014689911657, - "normalized_score": 42.03801468991166 - }, - "bbh": { - "name": "BBH", - "value": 0.5107301269172088, - "normalized_score": 30.244175919150706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.42785416666666665, - "normalized_score": 11.915104166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37101063829787234, - "normalized_score": 30.112293144208035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.70367132552859 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01_bfloat16_a31f86b538ba8b2983620cc27a741bc9a81a7e2f_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", - "sha": "a31f86b538ba8b2983620cc27a741bc9a81a7e2f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.250340892451284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34541682735142754, - "normalized_score": 34.54168273514276 - }, - "bbh": { - "name": "BBH", - "value": 0.4983827321097329, - "normalized_score": 29.32060751365874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.49113541666666666, - "normalized_score": 21.058593749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3531416223404255, - "normalized_score": 28.126846926713938 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9053695429986484 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1_bfloat16_f9d5bab1c1d0d6890e89b513225d13f68a1c6d75_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", - "sha": "f9d5bab1c1d0d6890e89b513225d13f68a1c6d75", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.442231277824096, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40916435058976847, - "normalized_score": 40.916435058976845 - }, - "bbh": { - "name": "BBH", - "value": 0.513665952913411, - "normalized_score": 30.693077471988918 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.43569791666666663, - "normalized_score": 13.262239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.366938164893617, - "normalized_score": 29.659796099290777 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5867146866582529 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01_bfloat16_d30c75506feaec957dc73bc5c040159c310ecf4c_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", - "sha": "d30c75506feaec957dc73bc5c040159c310ecf4c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.28808146621785, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29038728351884113, - "normalized_score": 29.03872835188411 - }, - "bbh": { - "name": "BBH", - "value": 0.4967337534367295, - "normalized_score": 28.739266057268594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4990729166666667, - "normalized_score": 22.250781249999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34898603723404253, - "normalized_score": 27.665115248226947 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8169628425306639 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1_bfloat16_cd52bafe64e82d466d0bc590da5399f2299d24e1_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", - "sha": "cd52bafe64e82d466d0bc590da5399f2299d24e1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.626887508307224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41988036188424493, - "normalized_score": 41.98803618842449 - }, - "bbh": { - "name": "BBH", - "value": 0.5146905664948336, - "normalized_score": 31.007758447741608 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.43576041666666665, - "normalized_score": 13.136718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3615359042553192, - "normalized_score": 29.05954491725768 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6125248160666634 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01_bfloat16_4c30fdbe0708afefe50788ea640c3dfab294c77f_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", - "sha": "4c30fdbe0708afefe50788ea640c3dfab294c77f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.06061039400886, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29131149793658606, - "normalized_score": 29.131149793658604 - }, - "bbh": { - "name": "BBH", - "value": 0.49182964384768835, - "normalized_score": 28.219373273671142 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4976770833333333, - "normalized_score": 21.976302083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34541223404255317, - "normalized_score": 27.268026004728128 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8012618523667672 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1_bfloat16_378a7cad3e34a1a8b11e77edd95b02ff0d228da2_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", - "sha": "378a7cad3e34a1a8b11e77edd95b02ff0d228da2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.361497158834098, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41623337189767595, - "normalized_score": 41.6233371897676 - }, - "bbh": { - "name": "BBH", - "value": 0.5138610942606995, - "normalized_score": 30.841602413783743 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.43172916666666666, - "normalized_score": 12.499479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3624501329787234, - "normalized_score": 29.161125886524825 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8598235896259063 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_dare_linear_bfloat16_abb81fd8fdc2ad32f65befcb7ae369c9837cd563_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_linear", - "sha": "abb81fd8fdc2ad32f65befcb7ae369c9837cd563", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.123522915539402, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21454961723781787, - "normalized_score": 21.454961723781786 - }, - "bbh": { - "name": "BBH", - "value": 0.4282807940700452, - "normalized_score": 19.610998997495773 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.49792708333333335, - "normalized_score": 21.807552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24143949468085107, - "normalized_score": 15.715499408983453 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_dare_linear (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8205118781666902 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1_bfloat16_e7a3a3b955d945f53da8301b958f0b90a28a62d3_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", - "sha": "e7a3a3b955d945f53da8301b958f0b90a28a62d3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.632495167787908, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18907055501624578, - "normalized_score": 18.907055501624576 - }, - "bbh": { - "name": "BBH", - "value": 0.41187360174735804, - "normalized_score": 16.85891694951123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.46580208333333334, - "normalized_score": 16.991927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22647938829787234, - "normalized_score": 14.05326536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8222681657441384 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3_bfloat16_6f966d14d7236f3da6d1ea9ce3bd9b20808e02a9_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", - "sha": "6f966d14d7236f3da6d1ea9ce3bd9b20808e02a9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.96894703975495, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21132705665412216, - "normalized_score": 21.132705665412217 - }, - "bbh": { - "name": "BBH", - "value": 0.4558569854124363, - "normalized_score": 23.0949356647322 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.5069479166666667, - "normalized_score": 22.50182291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30402260638297873, - "normalized_score": 22.669178486997634 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.846769484109694 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7_bfloat16_b14b5cd07feb749e42b0567b1e387b390bed033e_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", - "sha": "b14b5cd07feb749e42b0567b1e387b390bed033e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.77203043517005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20338368861288048, - "normalized_score": 20.338368861288046 - }, - "bbh": { - "name": "BBH", - "value": 0.4722858888388635, - "normalized_score": 25.253545989338345 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0030211480362537764, - "normalized_score": 0.3021148036253776 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.5110104166666667, - "normalized_score": 23.709635416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3148271276595745, - "normalized_score": 23.869680851063833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.0754243303213493 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9_bfloat16__False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.30976967313407, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21607335203925582, - "normalized_score": 21.607335203925583 - }, - "bbh": { - "name": "BBH", - "value": 0.46639610671811504, - "normalized_score": 24.68762324471831 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.5230416666666667, - "normalized_score": 25.88020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3143284574468085, - "normalized_score": 23.814273049645386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.659320863272562 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_linear_bfloat16_7449157fbc2e8b02e5b6e8ad56b4b2bd7ea82e9d_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_linear", - "sha": "7449157fbc2e8b02e5b6e8ad56b4b2bd7ea82e9d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.370872595590694, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4308213318439518, - "normalized_score": 43.08213318439518 - }, - "bbh": { - "name": "BBH", - "value": 0.5031496839210309, - "normalized_score": 28.778577217548463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40971874999999996, - "normalized_score": 10.14817708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37117686170212766, - "normalized_score": 30.13076241134751 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_linear (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6522123560637827 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1_bfloat16_84793f89ebe3be5b5bd9a797d4bbdf374c07419d_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", - "sha": "84793f89ebe3be5b5bd9a797d4bbdf374c07419d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.428512140604372, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41161229980895137, - "normalized_score": 41.161229980895136 - }, - "bbh": { - "name": "BBH", - "value": 0.5021445196013956, - "normalized_score": 28.768718884316495 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.417375, - "normalized_score": 10.671875000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36003989361702127, - "normalized_score": 28.893321513002363 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5727219149830023 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3_bfloat16_8d051f3eec3fc93a4521073c2d290c4ff9144fc1_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", - "sha": "8d051f3eec3fc93a4521073c2d290c4ff9144fc1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.85496971399459, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3626278274977061, - "normalized_score": 36.26278274977061 - }, - "bbh": { - "name": "BBH", - "value": 0.49061122520005807, - "normalized_score": 27.724506656987163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40248958333333335, - "normalized_score": 10.477864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33211436170212766, - "normalized_score": 25.790484633569736 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9146706651081538 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5_bfloat16_c857e33c30016960f114e3a049f5dae41d68bfe7_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", - "sha": "c857e33c30016960f114e3a049f5dae41d68bfe7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.221595666098747, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37966373666316483, - "normalized_score": 37.966373666316485 - }, - "bbh": { - "name": "BBH", - "value": 0.47931248948849836, - "normalized_score": 26.01209708592899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.3879791666666667, - "normalized_score": 7.797395833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31748670212765956, - "normalized_score": 24.165189125295505 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6838661913258426 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7_bfloat16_8d7d8bbb1e8cba5e51337f97bc3d6d8ae40544d5_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", - "sha": "8d7d8bbb1e8cba5e51337f97bc3d6d8ae40544d5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.056542734095075, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3681232463197649, - "normalized_score": 36.81232463197649 - }, - "bbh": { - "name": "BBH", - "value": 0.4738186124296502, - "normalized_score": 25.371407671115207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3880729166666667, - "normalized_score": 7.575781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3152426861702128, - "normalized_score": 23.91585401891253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.796101901515267 - } - }, - { - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9_bfloat16_57c280ce43fe81a23c966b48de6db7f4a85383a3_False", - "model": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", - "sha": "57c280ce43fe81a23c966b48de6db7f4a85383a3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.13585052124981, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3858085435533274, - "normalized_score": 38.58085435533274 - }, - "bbh": { - "name": "BBH", - "value": 0.47354321136013144, - "normalized_score": 25.46373486461887 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3880416666666667, - "normalized_score": 7.738541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3181515957446808, - "normalized_score": 24.23906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8021161854626704 - } - }, - { - "id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO_float16_30677e58010979af26b70240846fdf7ff38cbbf2_False", - "model": { - "name": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", - "sha": "30677e58010979af26b70240846fdf7ff38cbbf2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 30.316420737984473, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4688648341954902, - "normalized_score": 46.886483419549016 - }, - "bbh": { - "name": "BBH", - "value": 0.6299582409761587, - "normalized_score": 48.02072159780435 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.44388541666666664, - "normalized_score": 15.15234375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4763962765957447, - "normalized_score": 41.8218085106383 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-01", - "submission_date": "2024-08-08", - "generation": 0, - "base_model": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 13.96, - "co2_cost": 9.896402070152561 - } - }, - { - "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2_float16_d34bbd55b48e553f28579d86f3ccae19726c6b39_True", - "model": { - "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", - "sha": "d34bbd55b48e553f28579d86f3ccae19726c6b39", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 33.79581095053082, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6852107962428579, - "normalized_score": 68.52107962428579 - }, - "bbh": { - "name": "BBH", - "value": 0.6438408959901142, - "normalized_score": 49.845064475726 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20921450151057402, - "normalized_score": 20.921450151057403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.4267708333333333, - "normalized_score": 12.346354166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46966422872340424, - "normalized_score": 41.073803191489354 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-08-28", - "generation": 0, - "base_model": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", - "hub_license": "mit", - "hub_hearts": 14, - "params_billions": 13.96, - "co2_cost": 3.081206506576796 - } - }, - { - "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3_float16_145732abae63ecdcae9770d47b5c29dd67550837_True", - "model": { - "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", - "sha": "145732abae63ecdcae9770d47b5c29dd67550837", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 42.42049137915473, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.703995398874985, - "normalized_score": 70.39953988749849 - }, - "bbh": { - "name": "BBH", - "value": 0.6846125547592651, - "normalized_score": 54.84648579293099 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5619335347432024, - "normalized_score": 56.19335347432024 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.42339583333333336, - "normalized_score": 12.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5374002659574468, - "normalized_score": 48.60002955082743 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2025-01-20", - "generation": 1, - "base_model": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.696518369708774 - } - }, - { - "id": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1_float16_023a26b311482d3d849684b2f0e235779d0a9d67_False", - "model": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", - "sha": "023a26b311482d3d849684b2f0e235779d0a9d67", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.971479912083165, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10334024831890495, - "normalized_score": 10.334024831890495 - }, - "bbh": { - "name": "BBH", - "value": 0.669567432054888, - "normalized_score": 52.01883589019085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2756797583081571, - "normalized_score": 27.567975830815712 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.44673958333333336, - "normalized_score": 15.309114583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5123836436170213, - "normalized_score": 45.8204048463357 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.6779976929912963 - } - }, - { - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0_float16_46123d125c3f2a7f05e96bd6f25d4acd9e6c1bb8_False", - "model": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", - "sha": "46123d125c3f2a7f05e96bd6f25d4acd9e6c1bb8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.39132529353342, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0885273297073986, - "normalized_score": 8.85273297073986 - }, - "bbh": { - "name": "BBH", - "value": 0.6769929749559443, - "normalized_score": 53.42017331810562 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48036253776435045, - "normalized_score": 48.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.5021145833333334, - "normalized_score": 23.897656249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5301695478723404, - "normalized_score": 47.796616430260045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.8617544792855065 - } - }, - { - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1_float16_2d682f3fcc93c15dcca03ea11e8663e39eac4c53_False", - "model": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", - "sha": "2d682f3fcc93c15dcca03ea11e8663e39eac4c53", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 33.080572523928765, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.07421419611076388, - "normalized_score": 7.421419611076388 - }, - "bbh": { - "name": "BBH", - "value": 0.6736278064166185, - "normalized_score": 52.9014914620923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.479607250755287, - "normalized_score": 47.9607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.50075, - "normalized_score": 23.527083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5299202127659575, - "normalized_score": 47.76891252955083 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 1.8069401829717737 - } - }, - { - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3_float16_6b576e8845f27d3472e522eca31f962bf16648b6_False", - "model": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", - "sha": "6b576e8845f27d3472e522eca31f962bf16648b6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.32784968296863, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7037205725253439, - "normalized_score": 70.37205725253439 - }, - "bbh": { - "name": "BBH", - "value": 0.6548026688308357, - "normalized_score": 50.631344939951724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4206948640483384, - "normalized_score": 42.06948640483384 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.47681250000000003, - "normalized_score": 19.06822916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5374002659574468, - "normalized_score": 48.60002955082743 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 14.766, - "co2_cost": 3.812492225830732 - } - }, - { - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2_float16_961454f7242d9a35b85c40bf7ca37821edb8edc2_False", - "model": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", - "sha": "961454f7242d9a35b85c40bf7ca37821edb8edc2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.24635883058548, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7240787776433197, - "normalized_score": 72.40787776433197 - }, - "bbh": { - "name": "BBH", - "value": 0.6475822300543483, - "normalized_score": 49.578491329758265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3950151057401813, - "normalized_score": 39.50151057401813 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.48075, - "normalized_score": 19.660416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5369015957446809, - "normalized_score": 48.54462174940899 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", - "hub_license": "mit", - "hub_hearts": 6, - "params_billions": 14.766, - "co2_cost": 3.7287942170831587 - } - }, - { - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3_float16_73116c04c78e401dd6291f5755b2cdf31aca7068_False", - "model": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", - "sha": "73116c04c78e401dd6291f5755b2cdf31aca7068", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.4335223068507, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7322969720342026, - "normalized_score": 73.22969720342026 - }, - "bbh": { - "name": "BBH", - "value": 0.646878884179919, - "normalized_score": 49.56651063787595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4108761329305136, - "normalized_score": 41.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.47811458333333334, - "normalized_score": 19.29765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5337433510638298, - "normalized_score": 48.19370567375886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.850071904512919 - } - }, - { - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised_float16_c403df6c0f78148cfb477972455cbd859149311a_True", - "model": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", - "sha": "c403df6c0f78148cfb477972455cbd859149311a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 28.22663122048826, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5622625744136669, - "normalized_score": 56.226257441366684 - }, - "bbh": { - "name": "BBH", - "value": 0.5539982344792619, - "normalized_score": 37.1552860906475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18051359516616314, - "normalized_score": 18.051359516616312 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.44534375, - "normalized_score": 15.101302083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3988530585106383, - "normalized_score": 33.205895390070914 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-07-19", - "generation": 0, - "base_model": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", - "hub_license": "mit", - "hub_hearts": 28, - "params_billions": 3.821, - "co2_cost": 1.5094489904099078 - } - }, - { - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0_float16_98d049b8f8c305cfba81adae498a95e6b5647d4a_False", - "model": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", - "sha": "98d049b8f8c305cfba81adae498a95e6b5647d4a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.429590501932733, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3737184005106451, - "normalized_score": 37.37184005106451 - }, - "bbh": { - "name": "BBH", - "value": 0.5471398082537478, - "normalized_score": 36.554520056455814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4754791666666667, - "normalized_score": 19.468229166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3937001329787234, - "normalized_score": 32.63334810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-11", - "submission_date": "2024-07-11", - "generation": 0, - "base_model": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 3.821, - "co2_cost": 1.598445886916085 - } - }, - { - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2_float16_ebc9de6c266586adb1ec0db31bf050d1cd8fdffe_True", - "model": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", - "sha": "ebc9de6c266586adb1ec0db31bf050d1cd8fdffe", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 27.861913021910855, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5455014915978493, - "normalized_score": 54.55014915978493 - }, - "bbh": { - "name": "BBH", - "value": 0.5487182027245813, - "normalized_score": 35.99938785144921 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.41542708333333334, - "normalized_score": 12.328385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3877160904255319, - "normalized_score": 31.96845449172577 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-08-28", - "generation": 0, - "base_model": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", - "hub_license": "mit", - "hub_hearts": 9, - "params_billions": 3.821, - "co2_cost": 1.9488891070112528 - } - }, - { - "id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1_float16_b1ea75c097bf8c64d6ceb0c03140ca346e13692e_False", - "model": { - "name": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", - "sha": "b1ea75c097bf8c64d6ceb0c03140ca346e13692e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.128777731316648, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30475028479988653, - "normalized_score": 30.475028479988655 - }, - "bbh": { - "name": "BBH", - "value": 0.38346961466103785, - "normalized_score": 14.914767084175816 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3644479166666667, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1809341755319149, - "normalized_score": 8.992686170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-02", - "generation": 0, - "base_model": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.5573261754243181 - } - }, - { - "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1_float16_eea677f0abd298574708d41351b3aeb5cd348756_False", - "model": { - "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", - "sha": "eea677f0abd298574708d41351b3aeb5cd348756", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.704829753358311, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31209413245743517, - "normalized_score": 31.209413245743512 - }, - "bbh": { - "name": "BBH", - "value": 0.37810118011411814, - "normalized_score": 14.205402704412565 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.40159374999999997, - "normalized_score": 8.132552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18375997340425532, - "normalized_score": 9.306663711583923 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.4837132153094756 - } - }, - { - "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3_float16_32ba7b0321e1050cd473fe4ef598075f6c77532a_False", - "model": { - "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", - "sha": "32ba7b0321e1050cd473fe4ef598075f6c77532a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.946784224112013, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3044754584502453, - "normalized_score": 30.447545845024532 - }, - "bbh": { - "name": "BBH", - "value": 0.381900181819828, - "normalized_score": 14.565626762986701 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.38178124999999996, - "normalized_score": 4.955989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1763630319148936, - "normalized_score": 8.484781323877066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.4789030712987271 - } - }, - { - "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0_float16_24219e509c7dcc3afd5012951fb4b190a36c9cba_False", - "model": { - "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", - "sha": "24219e509c7dcc3afd5012951fb4b190a36c9cba", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.63540112320866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32335979645119395, - "normalized_score": 32.335979645119394 - }, - "bbh": { - "name": "BBH", - "value": 0.3802022135816421, - "normalized_score": 14.756535323843266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.38438541666666665, - "normalized_score": 5.481510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1870844414893617, - "normalized_score": 9.676049054373523 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.49242190189876767 - } - }, - { - "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1_float16_b1e18403e60b62f980715bed673aa34ac8a59c7c_False", - "model": { - "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", - "sha": "b1e18403e60b62f980715bed673aa34ac8a59c7c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.89983260022945, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30142798884736943, - "normalized_score": 30.14279888473694 - }, - "bbh": { - "name": "BBH", - "value": 0.38078615414710804, - "normalized_score": 14.68047975728274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.37502083333333336, - "normalized_score": 3.8442708333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18617021276595744, - "normalized_score": 9.574468085106382 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.5822830688657448 - } - }, - { - "id": "jpacifico/Lucie-Boosted-7B-Instruct_float16_520a92d2cf75cd6ddfa6dcbc93c31dbf7f23939f_False", - "model": { - "name": "jpacifico/Lucie-Boosted-7B-Instruct", - "sha": "520a92d2cf75cd6ddfa6dcbc93c31dbf7f23939f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.306616536792435, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25661467129438775, - "normalized_score": 25.661467129438776 - }, - "bbh": { - "name": "BBH", - "value": 0.34654827210803724, - "normalized_score": 10.258060724767985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.369875, - "normalized_score": 3.4010416666666683 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1629820478723404, - "normalized_score": 6.998005319148934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "jpacifico/Lucie-Boosted-7B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.707, - "co2_cost": 0.9901122137826461 - } - }, - { - "id": "jsfs11/L3-8B-Stheno-slerp_float16_b74450cac91180fcd92d72d60377e2d0a0b1bd11_True", - "model": { - "name": "jsfs11/L3-8B-Stheno-slerp", - "sha": "b74450cac91180fcd92d72d60377e2d0a0b1bd11", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.999084327806248, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6751940407008958, - "normalized_score": 67.51940407008958 - }, - "bbh": { - "name": "BBH", - "value": 0.5325675903618755, - "normalized_score": 33.31031560721856 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.3725416666666667, - "normalized_score": 5.134375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36494348404255317, - "normalized_score": 29.43816489361702 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-18", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "jsfs11/L3-8B-Stheno-slerp (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.0704594209467124 - } - }, - { - "id": "jsfs11/MixtureofMerges-MoE-4x7b-v4_bfloat16_2b98406f20a874184dbffb5ed24e1f4b5063ec4b_False", - "model": { - "name": "jsfs11/MixtureofMerges-MoE-4x7b-v4", - "sha": "2b98406f20a874184dbffb5ed24e1f4b5063ec4b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.022361213958778, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40299405577201824, - "normalized_score": 40.299405577201824 - }, - "bbh": { - "name": "BBH", - "value": 0.5169007103786006, - "normalized_score": 32.21799819533692 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.43855208333333334, - "normalized_score": 13.885677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30319148936170215, - "normalized_score": 22.576832151300238 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-11", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "jsfs11/MixtureofMerges-MoE-4x7b-v4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 24.154, - "co2_cost": 2.7676556130817755 - } - }, - { - "id": "jsfs11/MixtureofMerges-MoE-4x7b-v5_bfloat16_c1b5ce7144b966062df7627d2482a59e0df3757c_False", - "model": { - "name": "jsfs11/MixtureofMerges-MoE-4x7b-v5", - "sha": "c1b5ce7144b966062df7627d2482a59e0df3757c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.434941072567536, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41993022956865567, - "normalized_score": 41.99302295686556 - }, - "bbh": { - "name": "BBH", - "value": 0.5198481257083689, - "normalized_score": 32.82672418068055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4304895833333333, - "normalized_score": 12.344531250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3097573138297872, - "normalized_score": 23.30636820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-25", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "jsfs11/MixtureofMerges-MoE-4x7b-v5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 24.154, - "co2_cost": 2.8625448069978474 - } - }, - { - "id": "kaist-ai/janus-7b_bfloat16_f19c614ae7c81db06af1655d297c67afa99ad286_False", - "model": { - "name": "kaist-ai/janus-7b", - "sha": "f19c614ae7c81db06af1655d297c67afa99ad286", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.616998787714394, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37751499355044615, - "normalized_score": 37.751499355044615 - }, - "bbh": { - "name": "BBH", - "value": 0.4693667591541633, - "normalized_score": 25.749870021061568 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.4401041666666667, - "normalized_score": 14.279687500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28740026595744683, - "normalized_score": 20.822251773049647 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-04", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "alpindale/Mistral-7B-v0.2-hf", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 7.242, - "co2_cost": 1.2132069966814876 - } - }, - { - "id": "kaist-ai/janus-dpo-7b_bfloat16_a414396b6d03fba75d12ccf7d8391186b4b639ce_False", - "model": { - "name": "kaist-ai/janus-dpo-7b", - "sha": "a414396b6d03fba75d12ccf7d8391186b4b639ce", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.53164895276002, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4002712802031942, - "normalized_score": 40.027128020319424 - }, - "bbh": { - "name": "BBH", - "value": 0.4772581104894978, - "normalized_score": 27.090901576814417 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.43873958333333335, - "normalized_score": 13.709114583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2976230053191489, - "normalized_score": 21.95811170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.2528567073492922 - } - }, - { - "id": "kaist-ai/janus-rm-7b_bfloat16_ffdbcc353ad4034fdfa68a767d265920d5f3e71c_False", - "model": { - "name": "kaist-ai/janus-rm-7b", - "sha": "ffdbcc353ad4034fdfa68a767d265920d5f3e71c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LLMForSequenceRegression", - "average_score": 4.775598832496902, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.177804891022487, - "normalized_score": 17.7804891022487 - }, - "bbh": { - "name": "BBH", - "value": 0.3056467446788138, - "normalized_score": 3.2777812036470793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.38829166666666665, - "normalized_score": 5.969791666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-09", - "submission_date": "2024-10-09", - "generation": 0, - "base_model": "kaist-ai/janus-rm-7b", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.111, - "co2_cost": 1.0782212020370971 - } - }, - { - "id": "kaist-ai/mistral-orpo-capybara-7k_bfloat16_24c1172060658a1923c9b454796857e2cc59fbeb_True", - "model": { - "name": "kaist-ai/mistral-orpo-capybara-7k", - "sha": "24c1172060658a1923c9b454796857e2cc59fbeb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.22089458033309, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.536733644507684, - "normalized_score": 53.6733644507684 - }, - "bbh": { - "name": "BBH", - "value": 0.4488995185492166, - "normalized_score": 23.434359116276923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.3963541666666666, - "normalized_score": 7.57760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.297124335106383, - "normalized_score": 21.90270390070922 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-23", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "kaist-ai/mistral-orpo-capybara-7k (Merge)", - "hub_license": "mit", - "hub_hearts": 26, - "params_billions": 7.242, - "co2_cost": 1.3214870679674993 - } - }, - { - "id": "kavonalds/BunderMaxx-0710_float16_926bc1c4eff036fda0a56e4650366bbd35ae64ec_True", - "model": { - "name": "kavonalds/BunderMaxx-0710", - "sha": "926bc1c4eff036fda0a56e4650366bbd35ae64ec", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.371169453634327, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32825569488955975, - "normalized_score": 32.82556948895597 - }, - "bbh": { - "name": "BBH", - "value": 0.6650758850169982, - "normalized_score": 51.57754755283705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3393333333333333, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13139960106382978, - "normalized_score": 3.4888445626477527 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3468797122616692 - } - }, - { - "id": "kavonalds/BunderMaxx-0710_bfloat16_926bc1c4eff036fda0a56e4650366bbd35ae64ec_False", - "model": { - "name": "kavonalds/BunderMaxx-0710", - "sha": "926bc1c4eff036fda0a56e4650366bbd35ae64ec", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.954599839870426, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27007894608527594, - "normalized_score": 27.007894608527597 - }, - "bbh": { - "name": "BBH", - "value": 0.556586279503196, - "normalized_score": 36.109779627024714 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3681979166666667, - "normalized_score": 4.791406249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1449468085106383, - "normalized_score": 4.994089834515365 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3648219238808784 - } - }, - { - "id": "kavonalds/BunderMaxx-1010_bfloat16_84903278a74a863ae317bb3f4678a1440c67187a_True", - "model": { - "name": "kavonalds/BunderMaxx-1010", - "sha": "84903278a74a863ae317bb3f4678a1440c67187a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.42694830116734, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2980558252104416, - "normalized_score": 29.805582521044165 - }, - "bbh": { - "name": "BBH", - "value": 0.7019840419971701, - "normalized_score": 56.82265210373464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3484479166666667, - "normalized_score": 3.489322916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12242353723404255, - "normalized_score": 2.491504137115838 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.35028869393205664 - } - }, - { - "id": "kavonalds/Lancer-1-1b-Instruct_float16_1fcd28f30158c8990c62a13f7750de1a98f229e2_True", - "model": { - "name": "kavonalds/Lancer-1-1b-Instruct", - "sha": "1fcd28f30158c8990c62a13f7750de1a98f229e2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.306110155307538, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5545940327220664, - "normalized_score": 55.459403272206636 - }, - "bbh": { - "name": "BBH", - "value": 0.32532742727549835, - "normalized_score": 6.035793918460105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3144375, - "normalized_score": 0.5333333333333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1568317819148936, - "normalized_score": 6.314642434988178 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.3665848496232295 - } - }, - { - "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe_float16_7a737c91827de2c67a632090730a03dae0921e3a_False", - "model": { - "name": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", - "sha": "7a737c91827de2c67a632090730a03dae0921e3a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.418098677759176, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6081497094376255, - "normalized_score": 60.814970943762546 - }, - "bbh": { - "name": "BBH", - "value": 0.5549941776226351, - "normalized_score": 36.45157980184221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37613293051359514, - "normalized_score": 37.61329305135951 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.42772916666666666, - "normalized_score": 11.632812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44639295212765956, - "normalized_score": 38.48810579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2025-02-20", - "generation": 0, - "base_model": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.6672713485131164 - } - }, - { - "id": "keeeeenw/MicroLlama_float16_8d5874ca07b86ea1ea2e71eea96212278506ba65_False", - "model": { - "name": "keeeeenw/MicroLlama", - "sha": "8d5874ca07b86ea1ea2e71eea96212278506ba65", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.266088341806957, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19853765785892544, - "normalized_score": 19.85376578589254 - }, - "bbh": { - "name": "BBH", - "value": 0.3007313991347165, - "normalized_score": 2.831363636363637 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.36981249999999993, - "normalized_score": 4.793229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11377992021276596, - "normalized_score": 1.5311022458628842 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-29", - "submission_date": "2024-09-15", - "generation": 0, - "base_model": "keeeeenw/MicroLlama", - "hub_license": "apache-2.0", - "hub_hearts": 45, - "params_billions": 0.305, - "co2_cost": 0.371535779070013 - } - }, - { - "id": "kekmodel/StopCarbon-10.7B-v5_float16_7d59819dce2439f6c83b4f5c21a68aa882ff5ac9_True", - "model": { - "name": "kekmodel/StopCarbon-10.7B-v5", - "sha": "7d59819dce2439f6c83b4f5c21a68aa882ff5ac9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.93299222055526, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47283651821611106, - "normalized_score": 47.2836518216111 - }, - "bbh": { - "name": "BBH", - "value": 0.5177716413471513, - "normalized_score": 31.9932224557197 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4019375, - "normalized_score": 9.27552083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3156582446808511, - "normalized_score": 23.96202718676123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "kekmodel/StopCarbon-10.7B-v5", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 2, - "params_billions": 10.732, - "co2_cost": 1.4905873692729197 - } - }, - { - "id": "kevin009/llamaRAGdrama_bfloat16_8c103ca8fa6dd9a8d3dab81b319408095e9a1ad8_True", - "model": { - "name": "kevin009/llamaRAGdrama", - "sha": "8c103ca8fa6dd9a8d3dab81b319408095e9a1ad8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.348717196012394, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2598372318780835, - "normalized_score": 25.98372318780835 - }, - "bbh": { - "name": "BBH", - "value": 0.4007385667099335, - "normalized_score": 16.637813694151937 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.43157291666666664, - "normalized_score": 12.113281250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27235704787234044, - "normalized_score": 19.150783096926716 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-04", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "kevin009/llamaRAGdrama", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 7.242, - "co2_cost": 1.279278093587749 - } - }, - { - "id": "khoantap/cheap-moe-merge_float16_1bb9f1fe81fafd43cf8dbbeae1eae43da665d3f4_False", - "model": { - "name": "khoantap/cheap-moe-merge", - "sha": "1bb9f1fe81fafd43cf8dbbeae1eae43da665d3f4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 21.655014581085677, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4557008736818309, - "normalized_score": 45.57008736818309 - }, - "bbh": { - "name": "BBH", - "value": 0.513116897226939, - "normalized_score": 29.799724462846836 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.4103020833333333, - "normalized_score": 13.321093749999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3338597074468085, - "normalized_score": 25.984411938534276 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/cheap-moe-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 19.305, - "co2_cost": 4.786928186769573 - } - }, - { - "id": "khoantap/llama-3-8b-stock-merge_float16_0ba787be8f982dab1415aaeaf66afb58579574b8_False", - "model": { - "name": "khoantap/llama-3-8b-stock-merge", - "sha": "0ba787be8f982dab1415aaeaf66afb58579574b8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.933563133579472, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48117993590340297, - "normalized_score": 48.1179935903403 - }, - "bbh": { - "name": "BBH", - "value": 0.5162255701726589, - "normalized_score": 30.760623629541772 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.39458333333333334, - "normalized_score": 8.389583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37998670212765956, - "normalized_score": 31.109633569739948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-3-8b-stock-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4837990968618608 - } - }, - { - "id": "khoantap/llama-breadcrumbs-ties-merge_float16_57e28eb2dc698f1d6f807145efe61f4be0e6bfe4_False", - "model": { - "name": "khoantap/llama-breadcrumbs-ties-merge", - "sha": "57e28eb2dc698f1d6f807145efe61f4be0e6bfe4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.906279895712146, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22051933314716063, - "normalized_score": 22.051933314716063 - }, - "bbh": { - "name": "BBH", - "value": 0.5415928172799896, - "normalized_score": 33.78078314143679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.44344791666666666, - "normalized_score": 14.097656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3171542553191489, - "normalized_score": 24.128250591016545 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-breadcrumbs-ties-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4975703501702071 - } - }, - { - "id": "khoantap/llama-evolve-ties-best-merge_float16_8616af5c46c02605a57e46dcfe5d960f767541d9_False", - "model": { - "name": "khoantap/llama-evolve-ties-best-merge", - "sha": "8616af5c46c02605a57e46dcfe5d960f767541d9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.644113718571266, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6743950495795601, - "normalized_score": 67.439504957956 - }, - "bbh": { - "name": "BBH", - "value": 0.5413565104914732, - "normalized_score": 34.84865280113286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15634441087613293, - "normalized_score": 15.634441087613293 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.39455208333333336, - "normalized_score": 7.219010416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3859707446808511, - "normalized_score": 31.774527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-evolve-ties-best-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4619226795471676 - } - }, - { - "id": "khoantap/llama-linear-0.5-0.5-1-merge_float16_a131718ecb2fa7da4a873fc2cfa4bae180bf9c7a_False", - "model": { - "name": "khoantap/llama-linear-0.5-0.5-1-merge", - "sha": "a131718ecb2fa7da4a873fc2cfa4bae180bf9c7a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.90696272224101, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48122980358781364, - "normalized_score": 48.12298035878136 - }, - "bbh": { - "name": "BBH", - "value": 0.5643013649244941, - "normalized_score": 38.20585226683991 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41427083333333337, - "normalized_score": 9.483854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38331117021276595, - "normalized_score": 31.47901891252955 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-linear-0.5-0.5-1-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4518757123498789 - } - }, - { - "id": "khoantap/llama-linear-0.5-1-0.5-merge_float16_ac380e0915db101279ea64defeff6ef45fb653e5_False", - "model": { - "name": "khoantap/llama-linear-0.5-1-0.5-merge", - "sha": "ac380e0915db101279ea64defeff6ef45fb653e5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.550040759082293, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5031616111916382, - "normalized_score": 50.31616111916382 - }, - "bbh": { - "name": "BBH", - "value": 0.5950766502131658, - "normalized_score": 42.29147047597542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4171875, - "normalized_score": 10.181770833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3690159574468085, - "normalized_score": 29.89066193853428 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-linear-0.5-1-0.5-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3970814355436818 - } - }, - { - "id": "khoantap/llama-linear-1-0.5-0.5-merge_float16_b71a096c0541759b219850134d81e8bbb3de8db4_False", - "model": { - "name": "khoantap/llama-linear-1-0.5-0.5-merge", - "sha": "b71a096c0541759b219850134d81e8bbb3de8db4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.278191179525752, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45145436331156885, - "normalized_score": 45.14543633115689 - }, - "bbh": { - "name": "BBH", - "value": 0.5526017944110775, - "normalized_score": 36.494370655874725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24773413897280966, - "normalized_score": 24.773413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.4117604166666667, - "normalized_score": 10.270052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.363530585106383, - "normalized_score": 29.281176122931434 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-linear-1-0.5-0.5-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4401876994687541 - } - }, - { - "id": "khoantap/llama-slerp-merge_float16_011e6ceb076efa340a3b88f525de1523769d760d_False", - "model": { - "name": "khoantap/llama-slerp-merge", - "sha": "011e6ceb076efa340a3b88f525de1523769d760d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.169846243728667, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49799088640363126, - "normalized_score": 49.799088640363124 - }, - "bbh": { - "name": "BBH", - "value": 0.5782782780315171, - "normalized_score": 39.91531358821765 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.40531249999999996, - "normalized_score": 10.19739583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3677692819148936, - "normalized_score": 29.752142434988176 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/llama-slerp-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4480991046886789 - } - }, - { - "id": "khoantap/moe-out-merge_float16_6ded40192b68587aeb2eb05c8f1d4f09a4cfa3f5_False", - "model": { - "name": "khoantap/moe-out-merge", - "sha": "6ded40192b68587aeb2eb05c8f1d4f09a4cfa3f5", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2MoeForCausalLM", - "average_score": 21.17751635635514, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4504802812094133, - "normalized_score": 45.048028120941325 - }, - "bbh": { - "name": "BBH", - "value": 0.515116897226939, - "normalized_score": 30.04120594432831 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.40630208333333334, - "normalized_score": 11.454427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3347739361702128, - "normalized_score": 26.08599290780142 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "khoantap/moe-out-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 19.305, - "co2_cost": 4.691127876028741 - } - }, - { - "id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k_bfloat16_bf30101a0cbfca2265528aa9ffc4397f7581df7e_True", - "model": { - "name": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", - "sha": "bf30101a0cbfca2265528aa9ffc4397f7581df7e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.101958513598035, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.583170432230925, - "normalized_score": 58.317043223092504 - }, - "bbh": { - "name": "BBH", - "value": 0.4789526925494476, - "normalized_score": 26.547568201488428 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3379375, - "normalized_score": 2.9421874999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3100066489361702, - "normalized_score": 23.33407210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k (Merge)", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.37775724599284 - } - }, - { - "id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1_float16_f296897830363557c84cc4a942c2cd1f91818ae4_True", - "model": { - "name": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", - "sha": "f296897830363557c84cc4a942c2cd1f91818ae4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 17.99671713302148, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5455014915978493, - "normalized_score": 54.55014915978493 - }, - "bbh": { - "name": "BBH", - "value": 0.42890394469736065, - "normalized_score": 19.079190454097787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.38206249999999997, - "normalized_score": 5.491145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2798371010638298, - "normalized_score": 19.98190011820331 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2024-10-14", - "generation": 2, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 9.3, - "co2_cost": 4.599955449714945 - } - }, - { - "id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath_float16_81453e5718775630581ab9950e6c0ccf0d7a4177_True", - "model": { - "name": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", - "sha": "81453e5718775630581ab9950e6c0ccf0d7a4177", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 22.110300080411182, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4863251727638222, - "normalized_score": 48.63251727638222 - }, - "bbh": { - "name": "BBH", - "value": 0.49871846432893613, - "normalized_score": 29.259630505683372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.39828125, - "normalized_score": 8.351822916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3480718085106383, - "normalized_score": 27.56353427895981 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-23", - "submission_date": "2024-11-25", - "generation": 1, - "base_model": "unsloth/Phi-3-mini-4k-instruct-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 4.132, - "co2_cost": 2.7139691114871574 - } - }, - { - "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_float16_b3c1a1875fe4679e8c402b2bde02ae6c1127eb63_True", - "model": { - "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1", - "sha": "b3c1a1875fe4679e8c402b2bde02ae6c1127eb63", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 11.66485615226577, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.211052230304481, - "normalized_score": 21.1052230304481 - }, - "bbh": { - "name": "BBH", - "value": 0.3578007894497858, - "normalized_score": 10.326751194458689 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3686979166666666, - "normalized_score": 3.2539062499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21584109042553193, - "normalized_score": 12.871232269503546 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2024-10-14", - "generation": 4, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.911, - "co2_cost": 6.794124236950634 - } - }, - { - "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath_float16_ef9926d75ab1d54532f6a30dd5e760355eb9aa4d_True", - "model": { - "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", - "sha": "ef9926d75ab1d54532f6a30dd5e760355eb9aa4d", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 16.982090044361417, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25836336476105626, - "normalized_score": 25.836336476105622 - }, - "bbh": { - "name": "BBH", - "value": 0.3892856967853256, - "normalized_score": 14.135344732061384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30966767371601206, - "normalized_score": 30.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.40869791666666666, - "normalized_score": 9.453906249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24517952127659576, - "normalized_score": 16.13105791962175 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-22", - "generation": 4, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 15.231, - "co2_cost": 2.5580615242566784 - } - }, - { - "id": "kno10/ende-chat-0.0.5_float16_fff913e8ce204bab72b02582b663db669cb61412_True", - "model": { - "name": "kno10/ende-chat-0.0.5", - "sha": "fff913e8ce204bab72b02582b663db669cb61412", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.850085123772722, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3404455733010634, - "normalized_score": 34.04455733010634 - }, - "bbh": { - "name": "BBH", - "value": 0.3604365707523862, - "normalized_score": 11.125830654491324 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.39384375, - "normalized_score": 7.097135416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17902260638297873, - "normalized_score": 8.780289598108746 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "kno10/ende-chat-0.0.5", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.891, - "co2_cost": 2.9640345290148185 - } - }, - { - "id": "kno10/ende-chat-0.0.7_float16_1d45f51e5a3387378cea1036b0c65f2893466dd6_True", - "model": { - "name": "kno10/ende-chat-0.0.7", - "sha": "1d45f51e5a3387378cea1036b0c65f2893466dd6", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.371913816595809, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.440063476021401, - "normalized_score": 44.006347602140096 - }, - "bbh": { - "name": "BBH", - "value": 0.37918745577624335, - "normalized_score": 13.57894913417845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.386125, - "normalized_score": 6.032291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19664228723404256, - "normalized_score": 10.738031914893616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-30", - "submission_date": "2024-07-30", - "generation": 0, - "base_model": "kno10/ende-chat-0.0.7", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.891, - "co2_cost": 1.9167316612184144 - } - }, - { - "id": "kyutai/helium-1-preview-2b_bfloat16_dab850c85de673482dbf28b873064a274583e3b3_False", - "model": { - "name": "kyutai/helium-1-preview-2b", - "sha": "dab850c85de673482dbf28b873064a274583e3b3", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "HeliumForCausalLM", - "average_score": 9.329143600411902, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26136096667952147, - "normalized_score": 26.136096667952145 - }, - "bbh": { - "name": "BBH", - "value": 0.3638164815956466, - "normalized_score": 10.945139672307278 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.3549583333333333, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18725066489361702, - "normalized_score": 9.694518321513002 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "kyutai/helium-1-preview-2b", - "hub_license": "cc-by-4.0", - "hub_hearts": 140, - "params_billions": 2.173, - "co2_cost": 0.6881615613910846 - } - }, - { - "id": "kz919/QwQ-0.5B-Distilled-SFT_bfloat16_06b5127157cad87614a851f7b7b2ec2a9b8bd49d_True", - "model": { - "name": "kz919/QwQ-0.5B-Distilled-SFT", - "sha": "06b5127157cad87614a851f7b7b2ec2a9b8bd49d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.089107211771186, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3076725311063534, - "normalized_score": 30.767253110635338 - }, - "bbh": { - "name": "BBH", - "value": 0.3256291569645335, - "normalized_score": 7.277628928283643 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3408541666666667, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15874335106382978, - "normalized_score": 6.527039007092197 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "kz919/QwQ-0.5B-Distilled-SFT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 23, - "params_billions": 0.494, - "co2_cost": 1.019595475018066 - } - }, - { - "id": "ladydaina/ECE-FDF_bfloat16_81e709d727e9ba5cf8707fe0c5c08e688a4cc6bd_False", - "model": { - "name": "ladydaina/ECE-FDF", - "sha": "81e709d727e9ba5cf8707fe0c5c08e688a4cc6bd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.04236528565559, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3728440537773109, - "normalized_score": 37.28440537773109 - }, - "bbh": { - "name": "BBH", - "value": 0.5150177593278346, - "normalized_score": 32.250998220060005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.45039583333333333, - "normalized_score": 15.899479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30069813829787234, - "normalized_score": 22.299793144208035 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "ladydaina/ECE-FDF (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8924481728804147 - } - }, - { - "id": "laislemke/LLaMA-2-vicuna-7b-slerp_bfloat16_84a64f0ac8ff7db632a9d012fd5f4dcdf1eff950_True", - "model": { - "name": "laislemke/LLaMA-2-vicuna-7b-slerp", - "sha": "84a64f0ac8ff7db632a9d012fd5f4dcdf1eff950", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.694402356108033, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29320979445648654, - "normalized_score": 29.320979445648653 - }, - "bbh": { - "name": "BBH", - "value": 0.29862163052356266, - "normalized_score": 2.598263938597983 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3833020833333333, - "normalized_score": 6.179427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13422539893617022, - "normalized_score": 3.8028221040189125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-03", - "submission_date": "2024-07-03", - "generation": 1, - "base_model": "laislemke/LLaMA-2-vicuna-7b-slerp (Merge)", - "hub_license": "llama2", - "hub_hearts": 0, - "params_billions": 6.738, - "co2_cost": 1.1941514924080088 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR_bfloat16_bf80bf3d14a79b5dcb322b97b6dbaf10e316a3ee_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", - "sha": "bf80bf3d14a79b5dcb322b97b6dbaf10e316a3ee", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.057838460114436, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21377500587330506, - "normalized_score": 21.377500587330506 - }, - "bbh": { - "name": "BBH", - "value": 0.32694393820046386, - "normalized_score": 6.485922419195989 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.32625, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15334109042553193, - "normalized_score": 5.926787825059102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.165879958375255 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4_bfloat16_3a34c33dba0f02cd8c5172f45b6f6510cad1563d_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", - "sha": "3a34c33dba0f02cd8c5172f45b6f6510cad1563d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.3809433295396625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15639724819035714, - "normalized_score": 15.639724819035713 - }, - "bbh": { - "name": "BBH", - "value": 0.2894308596288922, - "normalized_score": 2.096080371265706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.37892708333333336, - "normalized_score": 4.99921875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11685505319148937, - "normalized_score": 1.8727836879432622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.8909308333202324 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1_bfloat16_7865b6f386969b831e9c1754914463154fecbda2_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", - "sha": "7865b6f386969b831e9c1754914463154fecbda2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.6107222690519762, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1437075847639818, - "normalized_score": 14.37075847639818 - }, - "bbh": { - "name": "BBH", - "value": 0.3031946898842932, - "normalized_score": 2.92944936253925 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2348993288590604, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3646041666666667, - "normalized_score": 2.9421874999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11211768617021277, - "normalized_score": 1.3464095744680846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-09", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.0505467135859123 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3_bfloat16_eef4293be744aef0524f00a7657e915a6601a459_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", - "sha": "eef4293be744aef0524f00a7657e915a6601a459", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.44790148688646, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.325008754549041, - "normalized_score": 32.500875454904104 - }, - "bbh": { - "name": "BBH", - "value": 0.42245501886651654, - "normalized_score": 18.22865501035824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.42128125, - "normalized_score": 10.82682291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2931349734042553, - "normalized_score": 21.4594414893617 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.186213983282585 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4_bfloat16_dfa5e42b6f4f83cacc3b9e7d0ff05fec7f941835_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", - "sha": "dfa5e42b6f4f83cacc3b9e7d0ff05fec7f941835", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.43838512195718, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33235260220658963, - "normalized_score": 33.23526022065896 - }, - "bbh": { - "name": "BBH", - "value": 0.4170742409015322, - "normalized_score": 17.411751961605876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4306145833333333, - "normalized_score": 12.093489583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.289311835106383, - "normalized_score": 21.034648345153663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.2237425131364639 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1_bfloat16_56789ff5fcc863460fce652ebe6ed6bb5a4bd30c_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", - "sha": "56789ff5fcc863460fce652ebe6ed6bb5a4bd30c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.037041744632443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3264072660540699, - "normalized_score": 32.64072660540699 - }, - "bbh": { - "name": "BBH", - "value": 0.46293726502592586, - "normalized_score": 24.515258836802445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.48639583333333336, - "normalized_score": 20.632812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32139295212765956, - "normalized_score": 24.599216903073284 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-08", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.061, - "co2_cost": 1.003160791822773 - } - }, - { - "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2_bfloat16_18d282d0206ae8f878a9cfa80ce4eaf042056569_False", - "model": { - "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", - "sha": "18d282d0206ae8f878a9cfa80ce4eaf042056569", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.01164616460848, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3248835312526319, - "normalized_score": 32.48835312526319 - }, - "bbh": { - "name": "BBH", - "value": 0.46293726502592586, - "normalized_score": 24.515258836802445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.48639583333333336, - "normalized_score": 20.632812500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32139295212765956, - "normalized_score": 24.599216903073284 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-09", - "submission_date": "2024-11-09", - "generation": 0, - "base_model": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 6.061, - "co2_cost": 0.99626226450699 - } - }, - { - "id": "langgptai/Qwen-las-v0.1_bfloat16_a7a4d4945d28bac955554c9abd2f74a71ebbf22f_True", - "model": { - "name": "langgptai/Qwen-las-v0.1", - "sha": "a7a4d4945d28bac955554c9abd2f74a71ebbf22f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 11.633178497007412, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33010412372504955, - "normalized_score": 33.01041237250495 - }, - "bbh": { - "name": "BBH", - "value": 0.38925525629956187, - "normalized_score": 14.698639898107368 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37009374999999994, - "normalized_score": 3.66171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2325465425531915, - "normalized_score": 14.727393617021276 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "Qwen/Qwen1.5-4B-Chat", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.901, - "co2_cost": 3.596192830549491 - } - }, - { - "id": "langgptai/qwen1.5-7b-chat-sa-v0.1_bfloat16_5f4f5e69ac7f1d508f8369e977de208b4803444b_True", - "model": { - "name": "langgptai/qwen1.5-7b-chat-sa-v0.1", - "sha": "5f4f5e69ac7f1d508f8369e977de208b4803444b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 16.580170752646193, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42677429221133256, - "normalized_score": 42.67742922113326 - }, - "bbh": { - "name": "BBH", - "value": 0.4325267992878656, - "normalized_score": 20.302342129934097 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.3551458333333333, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29928523936170215, - "normalized_score": 22.14280437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-30", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "Qwen/Qwen1.5-7B-Chat", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 15.443, - "co2_cost": 1.464321083426148 - } - }, - { - "id": "lars1234/Mistral-Small-24B-Instruct-2501-writer_float16_45850ca22637c0f5eaa2aa1fd22cf6d8aa619d47_False", - "model": { - "name": "lars1234/Mistral-Small-24B-Instruct-2501-writer", - "sha": "45850ca22637c0f5eaa2aa1fd22cf6d8aa619d47", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 39.855790317231644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6565346613651777, - "normalized_score": 65.65346613651778 - }, - "bbh": { - "name": "BBH", - "value": 0.6733164099871131, - "normalized_score": 52.78404012259051 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3557401812688822, - "normalized_score": 35.57401812688822 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38926174496644295, - "normalized_score": 18.568232662192393 - }, - "musr": { - "name": "MUSR", - "value": 0.46453125, - "normalized_score": 17.133072916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5447972074468085, - "normalized_score": 49.42191193853428 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "lars1234/Mistral-Small-24B-Instruct-2501-writer (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 23.572, - "co2_cost": 1.2963527696731758 - } - }, - { - "id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct_float16_b748441154efdbd7690d773b0194197bfc136ed0_True", - "model": { - "name": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", - "sha": "b748441154efdbd7690d773b0194197bfc136ed0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.87834660945809, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7125382872999197, - "normalized_score": 71.25382872999197 - }, - "bbh": { - "name": "BBH", - "value": 0.5009088261495708, - "normalized_score": 28.448045037118618 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3681979166666667, - "normalized_score": 8.524739583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37242353723404253, - "normalized_score": 30.269281914893615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "leafspark/Llama-3.1-8B-MultiReflection-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.6968936655051676 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-9B_bfloat16_fb22193268c7a6c3b4598255999ce2de3af8c256_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-9B", - "sha": "fb22193268c7a6c3b4598255999ce2de3af8c256", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.711508058895607, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3008772279773224, - "normalized_score": 30.08772279773224 - }, - "bbh": { - "name": "BBH", - "value": 0.5931298417725773, - "normalized_score": 42.03199052898647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4424270833333333, - "normalized_score": 14.47005208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4226230053191489, - "normalized_score": 35.847000591016545 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-14", - "submission_date": "2024-08-27", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 75, - "params_billions": 10.159, - "co2_cost": 5.821376360861339 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B_bfloat16_960654f5780f0b458367a6b591ad8440892c2aad_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", - "sha": "960654f5780f0b458367a6b591ad8440892c2aad", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 28.34483988750465, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5515964308036011, - "normalized_score": 55.15964308036011 - }, - "bbh": { - "name": "BBH", - "value": 0.5889067263184956, - "normalized_score": 41.16143815473681 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.3760729166666667, - "normalized_score": 6.509114583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4243683510638298, - "normalized_score": 36.040927895981085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-Advanced-9B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 6.454554490472016 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B_bfloat16_f917a9be9f86d58fe122d58ba84cf4b08e4a975e_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-Remix-9B", - "sha": "f917a9be9f86d58fe122d58ba84cf4b08e4a975e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.35834799113052, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7083416446140685, - "normalized_score": 70.83416446140684 - }, - "bbh": { - "name": "BBH", - "value": 0.5892021015046846, - "normalized_score": 41.59231281593379 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.4371875, - "normalized_score": 13.715104166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42386968085106386, - "normalized_score": 35.985520094562645 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-Remix-9B (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 10.159, - "co2_cost": 4.314881808199638 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v2-9B_bfloat16_77aca48ac25eb2cbe8c0751a4ef77e5face34d80_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v2-9B", - "sha": "77aca48ac25eb2cbe8c0751a4ef77e5face34d80", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.43264188820716, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21362429464930827, - "normalized_score": 21.362429464930827 - }, - "bbh": { - "name": "BBH", - "value": 0.5765835815625312, - "normalized_score": 39.79685359725269 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.34838541666666667, - "normalized_score": 4.881510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.422124335106383, - "normalized_score": 35.791592789598106 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-28", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v2-9B (Merge)", - "hub_license": "", - "hub_hearts": 17, - "params_billions": 10.159, - "co2_cost": 5.992483788088308 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B_bfloat16_899fb093d80569fc919f53217e3acf031dde89a5_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v2a-9B", - "sha": "899fb093d80569fc919f53217e3acf031dde89a5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.03899884599574, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15946909755005606, - "normalized_score": 15.946909755005606 - }, - "bbh": { - "name": "BBH", - "value": 0.518248966271832, - "normalized_score": 31.19852836941699 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.31647916666666664, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35147938829787234, - "normalized_score": 27.942154255319146 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v2a-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 5.9627924712961695 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B_bfloat16_44da9d6a9bc7be5a9af24fb0951047849d5f717d_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v2f-9B", - "sha": "44da9d6a9bc7be5a9af24fb0951047849d5f717d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.70451439160213, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37911408396388246, - "normalized_score": 37.911408396388246 - }, - "bbh": { - "name": "BBH", - "value": 0.5192845467961766, - "normalized_score": 31.421336195418803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.3231458333333333, - "normalized_score": 3.5932291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3503158244680851, - "normalized_score": 27.812869385342786 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v2f-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 6.797191109957615 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B_bfloat16_318afe2b44a150780e44483a0f90a499e81f946f_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", - "sha": "318afe2b44a150780e44483a0f90a499e81f946f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 31.430553707803153, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6601816513517467, - "normalized_score": 66.01816513517468 - }, - "bbh": { - "name": "BBH", - "value": 0.5935146853737787, - "normalized_score": 42.21047229127766 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18731117824773413, - "normalized_score": 18.731117824773413 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.44496874999999997, - "normalized_score": 14.58776041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41963098404255317, - "normalized_score": 35.51455378250591 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 5.575991958976661 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B_bfloat16_de8bbacddabf22dad89658d3b3d358b3eccbd59c_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v3b-9B", - "sha": "de8bbacddabf22dad89658d3b3d358b3eccbd59c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.188534615134834, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6809144181881852, - "normalized_score": 68.09144181881851 - }, - "bbh": { - "name": "BBH", - "value": 0.5907698162898164, - "normalized_score": 41.62398549212332 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.44887499999999997, - "normalized_score": 15.209374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4204621010638298, - "normalized_score": 35.606900118203306 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v3b-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 4.607455531980445 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B_bfloat16_8bd1ce81b6f42ebeebd9957b605c7313eedbe0a8_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v3i-9B", - "sha": "8bd1ce81b6f42ebeebd9957b605c7313eedbe0a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.824039054205116, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4203047912871182, - "normalized_score": 42.03047912871182 - }, - "bbh": { - "name": "BBH", - "value": 0.5625750779805955, - "normalized_score": 38.238824874777286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.31806249999999997, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41663896276595747, - "normalized_score": 35.182106973995275 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-10-06", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v3i-9B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 9.242, - "co2_cost": 6.951019763878497 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B_bfloat16_7ad4a1bf604f37bd82f3470dbc24870896d7287d_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v3j-9B", - "sha": "7ad4a1bf604f37bd82f3470dbc24870896d7287d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.987245785456135, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4169326276501904, - "normalized_score": 41.69326276501904 - }, - "bbh": { - "name": "BBH", - "value": 0.5632286961183511, - "normalized_score": 38.16656919949616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1691842900302115, - "normalized_score": 16.91842900302115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.31803125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41339760638297873, - "normalized_score": 34.821956264775416 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v3j-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 9.242, - "co2_cost": 6.827644953576105 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B_bfloat16_bc9edb78753fc60a22268cd91e93e43dd9fbc648_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", - "sha": "bc9edb78753fc60a22268cd91e93e43dd9fbc648", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.40277377153587, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7015474496558022, - "normalized_score": 70.15474496558022 - }, - "bbh": { - "name": "BBH", - "value": 0.6023627309683861, - "normalized_score": 43.18189722480503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.4580520833333333, - "normalized_score": 16.289843750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4366688829787234, - "normalized_score": 37.4076536643026 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 10.159, - "co2_cost": 4.659128481215576 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B_bfloat16_78dca140ec1b704233c932706fc9640404433cc5_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", - "sha": "78dca140ec1b704233c932706fc9640404433cc5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.2858610237303, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7135123694020753, - "normalized_score": 71.35123694020754 - }, - "bbh": { - "name": "BBH", - "value": 0.598838715496553, - "normalized_score": 42.73751687792403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.44890625, - "normalized_score": 15.179947916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4309341755319149, - "normalized_score": 36.77046394799054 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 4.5792358936199395 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B_bfloat16_70dc6ddfaede76ff01584922fca53ba90837cd52_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v4b-9B", - "sha": "70dc6ddfaede76ff01584922fca53ba90837cd52", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.46834927886008, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6878338364428604, - "normalized_score": 68.78338364428605 - }, - "bbh": { - "name": "BBH", - "value": 0.6039158192304305, - "normalized_score": 43.44273930792989 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23338368580060423, - "normalized_score": 23.338368580060422 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.45547916666666666, - "normalized_score": 15.868229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4356715425531915, - "normalized_score": 37.29683806146573 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v4b-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 4.8500336804843585 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B_bfloat16_26f2619a432266a5f73c135804b1aa34f00ec689_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v4c-9B", - "sha": "26f2619a432266a5f73c135804b1aa34f00ec689", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.406962694028415, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6945282960323054, - "normalized_score": 69.45282960323055 - }, - "bbh": { - "name": "BBH", - "value": 0.6084319292299174, - "normalized_score": 44.12536650674094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22658610271903323, - "normalized_score": 22.658610271903324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.45278124999999997, - "normalized_score": 15.297656249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43949468085106386, - "normalized_score": 37.72163120567376 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v4c-9B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 5.530475180368803 - } - }, - { - "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B_bfloat16_24f9ad78e42c92df5277b3aea4deb4083a8625d9_False", - "model": { - "name": "lemon07r/Gemma-2-Ataraxy-v4d-9B", - "sha": "24f9ad78e42c92df5277b3aea4deb4083a8625d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.242385930783996, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7250029920610646, - "normalized_score": 72.50029920610646 - }, - "bbh": { - "name": "BBH", - "value": 0.6054158192304304, - "normalized_score": 43.595239307929894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23338368580060423, - "normalized_score": 23.338368580060422 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.4541458333333333, - "normalized_score": 15.868229166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4345910904255319, - "normalized_score": 37.1767878250591 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "lemon07r/Gemma-2-Ataraxy-v4d-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 15, - "params_billions": 10.159, - "co2_cost": 5.370554856308457 - } - }, - { - "id": "lemon07r/Llama-3-RedMagic4-8B_bfloat16_65ee08a0434f1903a8971640fc3cca6c8ae8590e_True", - "model": { - "name": "lemon07r/Llama-3-RedMagic4-8B", - "sha": "65ee08a0434f1903a8971640fc3cca6c8ae8590e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.430990497953946, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4864005283758206, - "normalized_score": 48.64005283758206 - }, - "bbh": { - "name": "BBH", - "value": 0.42560489470390417, - "normalized_score": 19.475746974326068 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.3766354166666666, - "normalized_score": 4.379427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3676030585106383, - "normalized_score": 29.733673167848696 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-19", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "lemon07r/Llama-3-RedMagic4-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5971920239970157 - } - }, - { - "id": "lemon07r/llama-3-NeuralMahou-8b_bfloat16_59a0937df85f9d6d65d15dbb4a7c06b6ad8a0305_True", - "model": { - "name": "lemon07r/llama-3-NeuralMahou-8b", - "sha": "59a0937df85f9d6d65d15dbb4a7c06b6ad8a0305", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.846074213837827, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49009738604680025, - "normalized_score": 49.009738604680024 - }, - "bbh": { - "name": "BBH", - "value": 0.41841123683301523, - "normalized_score": 18.69206874721008 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3872708333333333, - "normalized_score": 6.142187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3690159574468085, - "normalized_score": 29.89066193853428 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-30", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "lemon07r/llama-3-NeuralMahou-8b (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6924554086165793 - } - }, - { - "id": "lesubra/ECE-EIFFEL-3B_float16_aa56433ac824d245ac82d5e55ce8e589df0711ec_False", - "model": { - "name": "lesubra/ECE-EIFFEL-3B", - "sha": "aa56433ac824d245ac82d5e55ce8e589df0711ec", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 22.50442333553349, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3469405621528655, - "normalized_score": 34.69405621528655 - }, - "bbh": { - "name": "BBH", - "value": 0.5101583259186949, - "normalized_score": 31.286439186186243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.43622916666666667, - "normalized_score": 14.6953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3820644946808511, - "normalized_score": 31.340499408983447 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "lesubra/ECE-EIFFEL-3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 2.3088140916354853 - } - }, - { - "id": "lesubra/ECE-EIFFEL-3Bv2_float16_b059d1a0d49f09d6df34d93f133d24f6641bc535_False", - "model": { - "name": "lesubra/ECE-EIFFEL-3Bv2", - "sha": "b059d1a0d49f09d6df34d93f133d24f6641bc535", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.141091471464545, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30130276555096036, - "normalized_score": 30.13027655509603 - }, - "bbh": { - "name": "BBH", - "value": 0.5424007873371969, - "normalized_score": 36.35313296509659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.4442916666666667, - "normalized_score": 15.769791666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39993351063829785, - "normalized_score": 33.32594562647754 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "lesubra/ECE-EIFFEL-3Bv2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.7206899111516973 - } - }, - { - "id": "lesubra/ECE-EIFFEL-3Bv3_float16_2cd31e58d38b96626a8a83192b5d2eec6669f5e2_False", - "model": { - "name": "lesubra/ECE-EIFFEL-3Bv3", - "sha": "2cd31e58d38b96626a8a83192b5d2eec6669f5e2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.50122739306737, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3786142989490109, - "normalized_score": 37.861429894901086 - }, - "bbh": { - "name": "BBH", - "value": 0.5469446669064592, - "normalized_score": 36.46408334995511 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16691842900302115, - "normalized_score": 16.691842900302113 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.46751041666666665, - "normalized_score": 18.30546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39752327127659576, - "normalized_score": 33.05814125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "lesubra/ECE-EIFFEL-3Bv3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.4342936747088417 - } - }, - { - "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1_float16_e46f1de93f10b1a57f9175653fd29dda355a61e6_False", - "model": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", - "sha": "e46f1de93f10b1a57f9175653fd29dda355a61e6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.135359696186224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2932840418977203, - "normalized_score": 29.328404189772023 - }, - "bbh": { - "name": "BBH", - "value": 0.5340594627933309, - "normalized_score": 35.05306761164019 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1661631419939577, - "normalized_score": 16.61631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.45951041666666664, - "normalized_score": 16.638802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3900432180851064, - "normalized_score": 32.227024231678485 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.4271879067095128 - } - }, - { - "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2_float16_ba617ea0b1ed5497f62bf49635c30bcfb0547133_False", - "model": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", - "sha": "ba617ea0b1ed5497f62bf49635c30bcfb0547133", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.135359696186224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2932840418977203, - "normalized_score": 29.328404189772023 - }, - "bbh": { - "name": "BBH", - "value": 0.5340594627933309, - "normalized_score": 35.05306761164019 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1661631419939577, - "normalized_score": 16.61631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.45951041666666664, - "normalized_score": 16.638802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3900432180851064, - "normalized_score": 32.227024231678485 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 0, - "base_model": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.4606505124151785 - } - }, - { - "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1_float16_354e5c732dd2fde016da1e33a018d2d2787f7805_False", - "model": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", - "sha": "354e5c732dd2fde016da1e33a018d2d2787f7805", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.961424176446453, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3649006857360692, - "normalized_score": 36.49006857360692 - }, - "bbh": { - "name": "BBH", - "value": 0.5411447467732948, - "normalized_score": 35.710681082357645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16767371601208458, - "normalized_score": 16.76737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4661458333333333, - "normalized_score": 18.068229166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3990192819148936, - "normalized_score": 33.2243646572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.2788553558488625 - } - }, - { - "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2_float16_d5074a951206f946a6be331a74bd4fa381d348eb_False", - "model": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", - "sha": "d5074a951206f946a6be331a74bd4fa381d348eb", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.98681975647042, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3664244205375071, - "normalized_score": 36.64244205375071 - }, - "bbh": { - "name": "BBH", - "value": 0.5411447467732948, - "normalized_score": 35.710681082357645 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16767371601208458, - "normalized_score": 16.76737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.4661458333333333, - "normalized_score": 18.068229166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3990192819148936, - "normalized_score": 33.2243646572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 0, - "base_model": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.071033580260257 - } - }, - { - "id": "lesubra/merge-test_float16_39895c64dd646443719873a2ab2b19d3afe4f86c_True", - "model": { - "name": "lesubra/merge-test", - "sha": "39895c64dd646443719873a2ab2b19d3afe4f86c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 26.075520921002326, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.538257379309122, - "normalized_score": 53.8257379309122 - }, - "bbh": { - "name": "BBH", - "value": 0.5240434385320306, - "normalized_score": 33.353311441745 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.44190625, - "normalized_score": 15.638281249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38738364361702127, - "normalized_score": 31.93151595744681 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "lesubra/merge-test", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.9370512511842015 - } - }, - { - "id": "lightblue/suzume-llama-3-8B-multilingual_bfloat16_0cb15aa9ec685eef494f9a15f65aefcfe3c04c66_True", - "model": { - "name": "lightblue/suzume-llama-3-8B-multilingual", - "sha": "0cb15aa9ec685eef494f9a15f65aefcfe3c04c66", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.98630635028447, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6678003253589365, - "normalized_score": 66.78003253589365 - }, - "bbh": { - "name": "BBH", - "value": 0.49499524187359745, - "normalized_score": 28.895092037237777 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.39768749999999997, - "normalized_score": 7.8442708333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33834773936170215, - "normalized_score": 26.483082151300234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-23", - "submission_date": "2024-07-30", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "other", - "hub_hearts": 110, - "params_billions": 8.03, - "co2_cost": 1.6819792497844037 - } - }, - { - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full_bfloat16_ac04e23fb8861c188f8ecddfecc4250b40aee04d_True", - "model": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", - "sha": "ac04e23fb8861c188f8ecddfecc4250b40aee04d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.301707657676165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5817464327983085, - "normalized_score": 58.174643279830846 - }, - "bbh": { - "name": "BBH", - "value": 0.4714219934773132, - "normalized_score": 25.075474888496917 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3221875, - "normalized_score": 4.040104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33095079787234044, - "normalized_score": 25.66119976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-07-29", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.6051331756851843 - } - }, - { - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half_bfloat16_b82150a9840ba5ba93918c745adc70afc6ad2ce1_True", - "model": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", - "sha": "b82150a9840ba5ba93918c745adc70afc6ad2ce1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.50979670721306, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6249107922534431, - "normalized_score": 62.49107922534431 - }, - "bbh": { - "name": "BBH", - "value": 0.47074584910573014, - "normalized_score": 26.34859792572119 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35158333333333336, - "normalized_score": 2.114583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36136968085106386, - "normalized_score": 29.041075650118202 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-25", - "submission_date": "2024-06-29", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 16, - "params_billions": 8.03, - "co2_cost": 1.7726738903491541 - } - }, - { - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25_bfloat16_5a2f17238cc83932e00613d285f8bf6b8f4a0c3a_True", - "model": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", - "sha": "5a2f17238cc83932e00613d285f8bf6b8f4a0c3a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.684768112420983, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6636535503574958, - "normalized_score": 66.36535503574959 - }, - "bbh": { - "name": "BBH", - "value": 0.4864641205580417, - "normalized_score": 27.665285015300114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.35660416666666667, - "normalized_score": 4.808854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3684341755319149, - "normalized_score": 29.826019503546092 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-06-29", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.6697367010796205 - } - }, - { - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75_bfloat16_555f4a0092f239557e1aa34f9d489e8156b907bb_True", - "model": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", - "sha": "555f4a0092f239557e1aa34f9d489e8156b907bb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.647119777794057, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6687245397766814, - "normalized_score": 66.87245397766813 - }, - "bbh": { - "name": "BBH", - "value": 0.48333166095856117, - "normalized_score": 28.056255938988922 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3816875, - "normalized_score": 5.3109375000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37691156914893614, - "normalized_score": 30.767952127659566 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-06-29", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.8930085171067998 - } - }, - { - "id": "lkoenig/BBAI_145__bfloat16_99e3e08fd5154b863b41d07b88fc8c67f4bab0ea_False", - "model": { - "name": "lkoenig/BBAI_145_", - "sha": "99e3e08fd5154b863b41d07b88fc8c67f4bab0ea", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.670567492403322, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44503473007176514, - "normalized_score": 44.50347300717651 - }, - "bbh": { - "name": "BBH", - "value": 0.5567169940219221, - "normalized_score": 36.73010277668871 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3610271903323263, - "normalized_score": 36.102719033232624 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4382083333333333, - "normalized_score": 13.076041666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.448969414893617, - "normalized_score": 38.77437943262411 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "lkoenig/BBAI_145_ (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.8429295416019482 - } - }, - { - "id": "lkoenig/BBAI_200_Gemma_bfloat16_ebb82acd5ce0a2c906d730d229db0260190f6056_False", - "model": { - "name": "lkoenig/BBAI_200_Gemma", - "sha": "ebb82acd5ce0a2c906d730d229db0260190f6056", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 5.086148057258666, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.07051733843978422, - "normalized_score": 7.051733843978422 - }, - "bbh": { - "name": "BBH", - "value": 0.3449044607726533, - "normalized_score": 9.395846417818936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.36311458333333335, - "normalized_score": 4.289322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16788563829787234, - "normalized_score": 7.542848699763592 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "lkoenig/BBAI_200_Gemma (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 19.3, - "co2_cost": 3.2482047638736105 - } - }, - { - "id": "lkoenig/BBAI_212_QwenLawLo_bfloat16_e4229aee1152cd8f3923528d0d1e7480a78cc798_False", - "model": { - "name": "lkoenig/BBAI_212_QwenLawLo", - "sha": "e4229aee1152cd8f3923528d0d1e7480a78cc798", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.879847291376716, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4566250880995758, - "normalized_score": 45.66250880995758 - }, - "bbh": { - "name": "BBH", - "value": 0.5574113357405873, - "normalized_score": 36.93312368555738 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3602719033232628, - "normalized_score": 36.02719033232628 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.43696874999999996, - "normalized_score": 13.054427083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44888630319148937, - "normalized_score": 38.765144799054376 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "lkoenig/BBAI_212_QwenLawLo (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6815753688502857 - } - }, - { - "id": "lkoenig/BBAI_212_Qwencore_bfloat16_032f274a2f3fb7a4e1cd6f876d7e3fbe557d7027_False", - "model": { - "name": "lkoenig/BBAI_212_Qwencore", - "sha": "032f274a2f3fb7a4e1cd6f876d7e3fbe557d7027", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.286401247119155, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4384400058511416, - "normalized_score": 43.84400058511416 - }, - "bbh": { - "name": "BBH", - "value": 0.556868234536878, - "normalized_score": 36.74407860821423 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34894259818731116, - "normalized_score": 34.894259818731115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4343333333333333, - "normalized_score": 12.625000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.448969414893617, - "normalized_score": 38.77437943262411 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "lkoenig/BBAI_212_Qwencore (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6966295253661859 - } - }, - { - "id": "lkoenig/BBAI_230_Xiaqwen_bfloat16_54ab14c2731a9f4cae610407b83b59c82bdf761a_False", - "model": { - "name": "lkoenig/BBAI_230_Xiaqwen", - "sha": "54ab14c2731a9f4cae610407b83b59c82bdf761a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.155385860810895, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4648931501748693, - "normalized_score": 46.48931501748693 - }, - "bbh": { - "name": "BBH", - "value": 0.557779565750489, - "normalized_score": 36.82828817621174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36631419939577037, - "normalized_score": 36.631419939577036 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4422083333333333, - "normalized_score": 13.809375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4480551861702128, - "normalized_score": 38.672798463356976 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "lkoenig/BBAI_230_Xiaqwen (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 0.7278725264579338 - } - }, - { - "id": "lkoenig/BBAI_375_QwenDyancabs_bfloat16_2a21d76baa1a59605d7b5df0ff091efd7452a001_False", - "model": { - "name": "lkoenig/BBAI_375_QwenDyancabs", - "sha": "2a21d76baa1a59605d7b5df0ff091efd7452a001", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.23229065895235, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4565752204151651, - "normalized_score": 45.657522041516515 - }, - "bbh": { - "name": "BBH", - "value": 0.5571383122938682, - "normalized_score": 36.71718233521738 - }, - "math": { - "name": "MATH Level 5", - "value": 0.377643504531722, - "normalized_score": 37.7643504531722 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.44617708333333334, - "normalized_score": 14.238802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4476396276595745, - "normalized_score": 38.62662529550827 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "lkoenig/BBAI_375_QwenDyancabs (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6564723678266879 - } - }, - { - "id": "lkoenig/BBAI_456_QwenKoen_bfloat16_a56aa459673eb7d685ee663b51371bc84b67c814_False", - "model": { - "name": "lkoenig/BBAI_456_QwenKoen", - "sha": "a56aa459673eb7d685ee663b51371bc84b67c814", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.811925443075086, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45292823042859615, - "normalized_score": 45.29282304285961 - }, - "bbh": { - "name": "BBH", - "value": 0.5552713612233481, - "normalized_score": 36.549146151614444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3685800604229607, - "normalized_score": 36.85800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.4395104166666666, - "normalized_score": 13.238802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4468916223404255, - "normalized_score": 38.54351359338061 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "lkoenig/BBAI_456_QwenKoen (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6667178605086506 - } - }, - { - "id": "lkoenig/BBAI_7B_KoenQwenDyan_bfloat16_6638a1360874843766f32c576f6cad02536fb1c8_False", - "model": { - "name": "lkoenig/BBAI_7B_KoenQwenDyan", - "sha": "6638a1360874843766f32c576f6cad02536fb1c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.02495361134858, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5807224830117421, - "normalized_score": 58.07224830117421 - }, - "bbh": { - "name": "BBH", - "value": 0.5536566841353078, - "normalized_score": 36.2457227762662 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37386706948640486, - "normalized_score": 37.38670694864049 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.43687499999999996, - "normalized_score": 12.942708333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44597739361702127, - "normalized_score": 38.44193262411347 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "lkoenig/BBAI_7B_KoenQwenDyan (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.672759751244066 - } - }, - { - "id": "lkoenig/BBAI_7B_Qwen2.5koen_bfloat16_fb6914a9b1b8234a73920be9ae8ed935bda35f4b_False", - "model": { - "name": "lkoenig/BBAI_7B_Qwen2.5koen", - "sha": "fb6914a9b1b8234a73920be9ae8ed935bda35f4b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.836311993118755, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45999725173650363, - "normalized_score": 45.99972517365036 - }, - "bbh": { - "name": "BBH", - "value": 0.5544031312134464, - "normalized_score": 36.307407586886015 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36555891238670696, - "normalized_score": 36.5558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.43690625, - "normalized_score": 13.046614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4484707446808511, - "normalized_score": 38.71897163120567 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "lkoenig/BBAI_7B_Qwen2.5koen (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6607702241816975 - } - }, - { - "id": "lkoenig/BBAI_7B_QwenDyanKoenLo_bfloat16_ce3449d15a540fefdcf6c64ed87176fa45450e1b_False", - "model": { - "name": "lkoenig/BBAI_7B_QwenDyanKoenLo", - "sha": "ce3449d15a540fefdcf6c64ed87176fa45450e1b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.01758177649708, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46631714960748594, - "normalized_score": 46.631714960748596 - }, - "bbh": { - "name": "BBH", - "value": 0.5562461525503201, - "normalized_score": 36.678248178014 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3640483383685801, - "normalized_score": 36.40483383685801 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4343020833333333, - "normalized_score": 12.721093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4464760638297872, - "normalized_score": 38.49734042553191 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "lkoenig/BBAI_7B_QwenDyanKoenLo (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6754508597915261 - } - }, - { - "id": "lkoenig/BBAI_7B_QwenDyancabsLAW_bfloat16_343b0c0d3d92386e9d3756f37bb0b27a4479a1ce_False", - "model": { - "name": "lkoenig/BBAI_7B_QwenDyancabsLAW", - "sha": "343b0c0d3d92386e9d3756f37bb0b27a4479a1ce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.816743980264878, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5549685944405289, - "normalized_score": 55.49685944405289 - }, - "bbh": { - "name": "BBH", - "value": 0.5578836606885887, - "normalized_score": 36.77999384422684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3678247734138973, - "normalized_score": 36.78247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4461145833333333, - "normalized_score": 14.09765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4471409574468085, - "normalized_score": 38.57121749408983 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "lkoenig/BBAI_7B_QwenDyancabsLAW (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6552148107125473 - } - }, - { - "id": "llmat/Mistral-v0.3-7B-ORPO_bfloat16_868d8a51e8deb6fd948eabe5bc296c53bcf41073_True", - "model": { - "name": "llmat/Mistral-v0.3-7B-ORPO", - "sha": "868d8a51e8deb6fd948eabe5bc296c53bcf41073", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.399290010550855, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3770406964631622, - "normalized_score": 37.70406964631621 - }, - "bbh": { - "name": "BBH", - "value": 0.39776607302918093, - "normalized_score": 14.86315851911541 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.35552083333333334, - "normalized_score": 2.973437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2278091755319149, - "normalized_score": 14.2010195035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-04", - "submission_date": "2024-09-02", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.248, - "co2_cost": 1.8980608067578275 - } - }, - { - "id": "llmat/Mistral-v0.3-7B-ORPO_float16_868d8a51e8deb6fd948eabe5bc296c53bcf41073_True", - "model": { - "name": "llmat/Mistral-v0.3-7B-ORPO", - "sha": "868d8a51e8deb6fd948eabe5bc296c53bcf41073", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.024321589275658, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3639764713183243, - "normalized_score": 36.39764713183243 - }, - "bbh": { - "name": "BBH", - "value": 0.400465557804411, - "normalized_score": 15.59149132338697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.3528541666666667, - "normalized_score": 2.973437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23013630319148937, - "normalized_score": 14.459589243498819 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-04", - "submission_date": "2024-08-06", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.248, - "co2_cost": 0.6268180652325449 - } - }, - { - "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5_bfloat16_6facb36cea2f670e32d6571846f00aa4cf5aaa86_False", - "model": { - "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", - "sha": "6facb36cea2f670e32d6571846f00aa4cf5aaa86", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.836336402400669, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33125329680802496, - "normalized_score": 33.125329680802494 - }, - "bbh": { - "name": "BBH", - "value": 0.42329545804357255, - "normalized_score": 18.879659027462832 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11102719033232629, - "normalized_score": 11.10271903323263 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.3868020833333334, - "normalized_score": 5.650260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29305186170212766, - "normalized_score": 21.45020685579196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.2803565086009958 - } - }, - { - "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6_bfloat16_f15fb39e40475348e7d349c3ec2f346ffca39377_False", - "model": { - "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", - "sha": "f15fb39e40475348e7d349c3ec2f346ffca39377", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.395273583656481, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13876181864120535, - "normalized_score": 13.876181864120534 - }, - "bbh": { - "name": "BBH", - "value": 0.3944027089700251, - "normalized_score": 14.538923027777068 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.39279166666666665, - "normalized_score": 7.365624999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2349567819148936, - "normalized_score": 14.995197990543732 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.357, - "co2_cost": 1.1108290864726493 - } - }, - { - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1_bfloat16_4918220543f4923137d20204a5ea396f65f6b956_False", - "model": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", - "sha": "4918220543f4923137d20204a5ea396f65f6b956", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.626794158168616, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23463299600615256, - "normalized_score": 23.463299600615258 - }, - "bbh": { - "name": "BBH", - "value": 0.4018418465179459, - "normalized_score": 15.79746247814972 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.3364479166666667, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2849900265957447, - "normalized_score": 20.554447399527188 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.81, - "co2_cost": 1.1587768568716361 - } - }, - { - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2_bfloat16_c3d4fbef1a10ef2746c47c0379b4247c784758e5_False", - "model": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", - "sha": "c3d4fbef1a10ef2746c47c0379b4247c784758e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.813468340726132, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2309361383351729, - "normalized_score": 23.09361383351729 - }, - "bbh": { - "name": "BBH", - "value": 0.39897709281426197, - "normalized_score": 15.20224370386771 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3587708333333333, - "normalized_score": 6.613020833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28997672872340424, - "normalized_score": 21.108525413711583 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.81, - "co2_cost": 1.0876183581709418 - } - }, - { - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3_bfloat16_90648507743059de96334fdc97309b6f2af3d01d_False", - "model": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", - "sha": "90648507743059de96334fdc97309b6f2af3d01d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 23.42685471667849, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35808100285021516, - "normalized_score": 35.80810028502152 - }, - "bbh": { - "name": "BBH", - "value": 0.5473121918055145, - "normalized_score": 36.62575632451354 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.43613541666666666, - "normalized_score": 14.050260416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40433843085106386, - "normalized_score": 33.81538120567376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 1.086650979183976 - } - }, - { - "id": "lmsys/vicuna-13b-v1.3_float16_6566e9cb1787585d1147dcf4f9bc48f29e1328d2_True", - "model": { - "name": "lmsys/vicuna-13b-v1.3", - "sha": "6566e9cb1787585d1147dcf4f9bc48f29e1328d2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.435533675653645, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3343506340953115, - "normalized_score": 33.43506340953115 - }, - "bbh": { - "name": "BBH", - "value": 0.3384399312777569, - "normalized_score": 7.4897893116292105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3727291666666666, - "normalized_score": 4.091145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2243184840425532, - "normalized_score": 13.813164893617023 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-06-18", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "lmsys/vicuna-13b-v1.3", - "hub_license": "", - "hub_hearts": 198, - "params_billions": 13.0, - "co2_cost": 2.188465811376766 - } - }, - { - "id": "lmsys/vicuna-7b-v1.3_float16_236eeeab96f0dc2e463f2bebb7bb49809279c6d6_True", - "model": { - "name": "lmsys/vicuna-7b-v1.3", - "sha": "236eeeab96f0dc2e463f2bebb7bb49809279c6d6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.525809191714858, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29086158060612505, - "normalized_score": 29.086158060612505 - }, - "bbh": { - "name": "BBH", - "value": 0.3298410006592924, - "normalized_score": 6.461378796018201 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3793333333333333, - "normalized_score": 5.0166666666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18375997340425532, - "normalized_score": 9.306663711583923 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-06-18", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "lmsys/vicuna-7b-v1.3", - "hub_license": "", - "hub_hearts": 132, - "params_billions": 7.0, - "co2_cost": 1.1267564062407687 - } - }, - { - "id": "lmsys/vicuna-7b-v1.5_float16_3321f76e3f527bd14065daf69dad9344000a201d_False", - "model": { - "name": "lmsys/vicuna-7b-v1.5", - "sha": "3321f76e3f527bd14065daf69dad9344000a201d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.885152314855338, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23515716077784724, - "normalized_score": 23.515716077784724 - }, - "bbh": { - "name": "BBH", - "value": 0.39470436842233775, - "normalized_score": 15.15250931284372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.42311458333333335, - "normalized_score": 11.422656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21467752659574468, - "normalized_score": 12.741947399527188 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-29", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "lmsys/vicuna-7b-v1.5", - "hub_license": "llama2", - "hub_hearts": 335, - "params_billions": 7.0, - "co2_cost": 1.2054362102644438 - } - }, - { - "id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7_bfloat16_6b7673cd78398c3a8c92f8e759aaae6409e96978_False", - "model": { - "name": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", - "sha": "6b7673cd78398c3a8c92f8e759aaae6409e96978", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.812819354645235, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3514618988727687, - "normalized_score": 35.14618988727687 - }, - "bbh": { - "name": "BBH", - "value": 0.39069140261362917, - "normalized_score": 14.43786307942362 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.36159375, - "normalized_score": 4.732552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1973902925531915, - "normalized_score": 10.821143617021276 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-07-30", - "generation": 0, - "base_model": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", - "hub_license": "wtfpl", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8631477143759163 - } - }, - { - "id": "lordjia/Llama-3-Cantonese-8B-Instruct_bfloat16_ea98e9b1ab3ea0d66e5270816e43d7a70aaaa151_True", - "model": { - "name": "lordjia/Llama-3-Cantonese-8B-Instruct", - "sha": "ea98e9b1ab3ea0d66e5270816e43d7a70aaaa151", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.271708884717096, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6669259786256023, - "normalized_score": 66.69259786256023 - }, - "bbh": { - "name": "BBH", - "value": 0.4814148018954038, - "normalized_score": 26.79103884029782 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.40460416666666665, - "normalized_score": 9.47552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35147938829787234, - "normalized_score": 27.942154255319146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-08-03", - "generation": 0, - "base_model": "lordjia/Llama-3-Cantonese-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.5354059256889798 - } - }, - { - "id": "lordjia/Qwen2-Cantonese-7B-Instruct_bfloat16_eb8b0faee749d167fd70e74f5e579094c4cfe7fb_True", - "model": { - "name": "lordjia/Qwen2-Cantonese-7B-Instruct", - "sha": "eb8b0faee749d167fd70e74f5e579094c4cfe7fb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.309196155583507, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5435278394659503, - "normalized_score": 54.35278394659503 - }, - "bbh": { - "name": "BBH", - "value": 0.5215311346221223, - "normalized_score": 32.45321665791298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25604229607250756, - "normalized_score": 25.604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40038541666666666, - "normalized_score": 7.814843749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38430851063829785, - "normalized_score": 31.589834515366427 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-13", - "submission_date": "2024-08-03", - "generation": 0, - "base_model": "lordjia/Qwen2-Cantonese-7B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 2.032013591793652 - } - }, - { - "id": "lt-asset/nova-1.3b_bfloat16_766eb459b5aa1e084b5474bb86ade09f9bed8fca_False", - "model": { - "name": "lt-asset/nova-1.3b", - "sha": "766eb459b5aa1e084b5474bb86ade09f9bed8fca", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "NovaForCausalLM", - "average_score": 3.8536506008151683, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1214255951985177, - "normalized_score": 12.14255951985177 - }, - "bbh": { - "name": "BBH", - "value": 0.31700122104895806, - "normalized_score": 4.437619873492811 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36978125, - "normalized_score": 3.755989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-20", - "submission_date": "2024-11-16", - "generation": 0, - "base_model": "lt-asset/nova-1.3b", - "hub_license": "bsd-3-clause-clear", - "hub_hearts": 4, - "params_billions": 1.347, - "co2_cost": 0.4954933143986224 - } - }, - { - "id": "lunahr/thea-3b-50r-u1_bfloat16_34371d851aa8c2f6fa2e05061a357196d8892d65_True", - "model": { - "name": "lunahr/thea-3b-50r-u1", - "sha": "34371d851aa8c2f6fa2e05061a357196d8892d65", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.037090298507373, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6030288523340293, - "normalized_score": 60.30288523340292 - }, - "bbh": { - "name": "BBH", - "value": 0.41046731029294475, - "normalized_score": 16.2229363475435 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3181875, - "normalized_score": 2.706770833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2808344414893617, - "normalized_score": 20.09271572104019 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-11", - "generation": 2, - "base_model": "CreitinGameplays/Llama-3.2-3b-Instruct-uncensored-refinetune (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.7253496126746135 - } - }, - { - "id": "lunahr/thea-v2-3b-50r_bfloat16_b6c37e548658795006b2603dc500e6df01c674eb_True", - "model": { - "name": "lunahr/thea-v2-3b-50r", - "sha": "b6c37e548658795006b2603dc500e6df01c674eb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.953795195391686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.370396104558128, - "normalized_score": 37.0396104558128 - }, - "bbh": { - "name": "BBH", - "value": 0.4194416192911743, - "normalized_score": 18.711907810841776 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3221875, - "normalized_score": 2.4401041666666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2409408244680851, - "normalized_score": 15.660091607565011 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-14", - "generation": 3, - "base_model": "Removed", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.265276493490633 - } - }, - { - "id": "m42-health/Llama3-Med42-70B_bfloat16_867064e18aad7bf3f4795f20dcb25a1108952543_True", - "model": { - "name": "m42-health/Llama3-Med42-70B", - "sha": "867064e18aad7bf3f4795f20dcb25a1108952543", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.68301603364157, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6291074349392944, - "normalized_score": 62.91074349392944 - }, - "bbh": { - "name": "BBH", - "value": 0.6687891109485058, - "normalized_score": 52.9713478709021 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2258308157099698, - "normalized_score": 22.58308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.46289583333333334, - "normalized_score": 18.628645833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4962599734042553, - "normalized_score": 44.02888593380615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-12-11", - "generation": 0, - "base_model": "m42-health/Llama3-Med42-70B", - "hub_license": "llama3", - "hub_hearts": 44, - "params_billions": 70.554, - "co2_cost": 50.82246883597565 - } - }, - { - "id": "macadeliccc/Samantha-Qwen-2-7B_float16_59058972fa9b56d132d04589eb17cbba277c2826_True", - "model": { - "name": "macadeliccc/Samantha-Qwen-2-7B", - "sha": "59058972fa9b56d132d04589eb17cbba277c2826", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.06508576450433, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4377152621710395, - "normalized_score": 43.771526217103954 - }, - "bbh": { - "name": "BBH", - "value": 0.5082341412476951, - "normalized_score": 31.41189390746123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.4799479166666667, - "normalized_score": 20.160156250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3779089095744681, - "normalized_score": 30.87876773049646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-15", - "submission_date": "2024-08-05", - "generation": 1, - "base_model": "Qwen/Qwen2-7B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 2.6796147912948087 - } - }, - { - "id": "macadeliccc/magistrate-3.2-3b-base_bfloat16_2a40ac9ca1904fca2c1e69573e27f0ff8039b738_False", - "model": { - "name": "macadeliccc/magistrate-3.2-3b-base", - "sha": "2a40ac9ca1904fca2c1e69573e27f0ff8039b738", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.046097363125316, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1159301763764589, - "normalized_score": 11.59301763764589 - }, - "bbh": { - "name": "BBH", - "value": 0.3342701056047533, - "normalized_score": 6.910280939116192 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.39759374999999997, - "normalized_score": 7.532552083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16888297872340424, - "normalized_score": 7.65366430260047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.460685925775111 - } - }, - { - "id": "macadeliccc/magistrate-3.2-3b-it_bfloat16_122961278c97195dd59d67b244907359013e4de5_True", - "model": { - "name": "macadeliccc/magistrate-3.2-3b-it", - "sha": "122961278c97195dd59d67b244907359013e4de5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.088076253678785, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22918744486850445, - "normalized_score": 22.918744486850443 - }, - "bbh": { - "name": "BBH", - "value": 0.3256506790327196, - "normalized_score": 5.323155419813335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3763229166666667, - "normalized_score": 5.740364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15924202127659576, - "normalized_score": 6.582446808510639 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-01", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.4059020586066877 - } - }, - { - "id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002_bfloat16_8a7bdc02074a472ac693dd326c05aef56d00aa40_True", - "model": { - "name": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", - "sha": "8a7bdc02074a472ac693dd326c05aef56d00aa40", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 7.1244683931064765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20181008612703183, - "normalized_score": 20.181008612703184 - }, - "bbh": { - "name": "BBH", - "value": 0.3281563256810973, - "normalized_score": 5.851019142844943 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.41229166666666667, - "normalized_score": 9.569791666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1471908244680851, - "normalized_score": 5.243424940898343 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "unsloth/Phi-3-mini-4k-instruct-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.821, - "co2_cost": 0.15722794736490572 - } - }, - { - "id": "maldv/Awqward2.5-32B-Instruct_bfloat16_fd8f6751645a1923d588f80ec1d8292cb69691a1_True", - "model": { - "name": "maldv/Awqward2.5-32B-Instruct", - "sha": "fd8f6751645a1923d588f80ec1d8292cb69691a1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.74902268350894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8254697535871487, - "normalized_score": 82.54697535871487 - }, - "bbh": { - "name": "BBH", - "value": 0.6974465506773041, - "normalized_score": 57.20733868173476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6231117824773413, - "normalized_score": 62.311178247734134 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.42748958333333337, - "normalized_score": 13.86953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5723071808510638, - "normalized_score": 52.478575650118195 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "maldv/Awqward2.5-32B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 32.764, - "co2_cost": 7.445094544222052 - } - }, - { - "id": "maldv/Lytta2.5-32B-Instruct_bfloat16_d5ecf702a5c25e0e900fb6e44283864557b03ce5_True", - "model": { - "name": "maldv/Lytta2.5-32B-Instruct", - "sha": "d5ecf702a5c25e0e900fb6e44283864557b03ce5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.790451743833525, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25079455843827714, - "normalized_score": 25.07945584382771 - }, - "bbh": { - "name": "BBH", - "value": 0.559971089357847, - "normalized_score": 37.03153984906665 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34441087613293053, - "normalized_score": 34.44108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.37685416666666666, - "normalized_score": 4.973437500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5048204787234043, - "normalized_score": 44.98005319148937 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "maldv/Lytta2.5-32B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 32.764, - "co2_cost": 10.239978563915981 - } - }, - { - "id": "maldv/Qwentile2.5-32B-Instruct_bfloat16_1cb04716c8aba33838b7f5dad99b23b7f0c6c152_True", - "model": { - "name": "maldv/Qwentile2.5-32B-Instruct", - "sha": "1cb04716c8aba33838b7f5dad99b23b7f0c6c152", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.9002633632381, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7393161256576994, - "normalized_score": 73.93161256576994 - }, - "bbh": { - "name": "BBH", - "value": 0.6962837451098368, - "normalized_score": 57.205877636883635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5219033232628398, - "normalized_score": 52.190332326283986 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.4682291666666667, - "normalized_score": 19.96197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5879321808510638, - "normalized_score": 54.214686761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "maldv/Qwentile2.5-32B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 32, - "params_billions": 32.764, - "co2_cost": 7.065083471182076 - } - }, - { - "id": "maldv/badger-kappa-llama-3-8b_bfloat16_aa6863eb816ca6ad29453b8aaf846962c4328998_True", - "model": { - "name": "maldv/badger-kappa-llama-3-8b", - "sha": "aa6863eb816ca6ad29453b8aaf846962c4328998", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.166688498001093, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46946435457918323, - "normalized_score": 46.94643545791833 - }, - "bbh": { - "name": "BBH", - "value": 0.5084927997756815, - "normalized_score": 30.153238604373765 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.3765104166666666, - "normalized_score": 4.297135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3695146276595745, - "normalized_score": 29.94606973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-02", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "maldv/badger-kappa-llama-3-8b", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.9182506457299924 - } - }, - { - "id": "maldv/badger-lambda-llama-3-8b_bfloat16_8ef157d0d3c12212ca5e70d354869aed90e03f22_True", - "model": { - "name": "maldv/badger-lambda-llama-3-8b", - "sha": "8ef157d0d3c12212ca5e70d354869aed90e03f22", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.943850248259498, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4860758343417687, - "normalized_score": 48.607583434176874 - }, - "bbh": { - "name": "BBH", - "value": 0.49634866510444836, - "normalized_score": 28.103050014353716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.3753645833333333, - "normalized_score": 4.520572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37666223404255317, - "normalized_score": 30.740248226950357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-10", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "maldv/badger-lambda-llama-3-8b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 2.2220444352948254 - } - }, - { - "id": "maldv/badger-mu-llama-3-8b_bfloat16_952a269bb1e6c18ee772c6d088e74d305df4425d_True", - "model": { - "name": "maldv/badger-mu-llama-3-8b", - "sha": "952a269bb1e6c18ee772c6d088e74d305df4425d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.322170525640896, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49194581488229006, - "normalized_score": 49.194581488229005 - }, - "bbh": { - "name": "BBH", - "value": 0.514287576852281, - "normalized_score": 30.51396514214568 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35545833333333327, - "normalized_score": 5.69895833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3673537234042553, - "normalized_score": 29.70596926713948 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-27", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "maldv/badger-mu-llama-3-8b", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.8092706264539988 - } - }, - { - "id": "maldv/badger-writer-llama-3-8b_bfloat16_1d8134d01af87e994571ae16ccd7b31cce42418f_True", - "model": { - "name": "maldv/badger-writer-llama-3-8b", - "sha": "1d8134d01af87e994571ae16ccd7b31cce42418f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.09688762786649, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5303140112678804, - "normalized_score": 53.03140112678803 - }, - "bbh": { - "name": "BBH", - "value": 0.4863893856673737, - "normalized_score": 26.878360614538398 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.35809375000000004, - "normalized_score": 3.195052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3759973404255319, - "normalized_score": 30.666371158392437 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-17", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "maldv/badger-writer-llama-3-8b (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 9, - "params_billions": 8.03, - "co2_cost": 2.4716210285401963 - } - }, - { - "id": "marcuscedricridia/Cheng-1_bfloat16_cd8c9dd37c67c2e1b7c683fdd5e72b7f08c074b9_True", - "model": { - "name": "marcuscedricridia/Cheng-1", - "sha": "cd8c9dd37c67c2e1b7c683fdd5e72b7f08c074b9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.05830324427348, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7788833628106757, - "normalized_score": 77.88833628106758 - }, - "bbh": { - "name": "BBH", - "value": 0.5524677845280024, - "normalized_score": 36.536366691615335 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48942598187311176, - "normalized_score": 48.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4073333333333333, - "normalized_score": 9.616666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43492353723404253, - "normalized_score": 37.21372635933806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-12", - "generation": 0, - "base_model": "marcuscedricridia/Cheng-1", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 2.072174042569459 - } - }, - { - "id": "marcuscedricridia/Cheng-2_bfloat16_c22f780671f65fd4566fc9fefca6afdf9f09e3c0_True", - "model": { - "name": "marcuscedricridia/Cheng-2", - "sha": "c22f780671f65fd4566fc9fefca6afdf9f09e3c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.84836261136232, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8337378156624423, - "normalized_score": 83.37378156624423 - }, - "bbh": { - "name": "BBH", - "value": 0.6498988582965893, - "normalized_score": 49.97518634878575 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5438066465256798, - "normalized_score": 54.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.41933333333333334, - "normalized_score": 12.016666666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5013297872340425, - "normalized_score": 44.59219858156028 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "marcuscedricridia/Cheng-2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.626526680992532 - } - }, - { - "id": "marcuscedricridia/Cheng-2-v1.1_bfloat16_c007eb2377bc2ce46fe2b75b6e306baae2fe8691_True", - "model": { - "name": "marcuscedricridia/Cheng-2-v1.1", - "sha": "c007eb2377bc2ce46fe2b75b6e306baae2fe8691", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.679378045404626, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8269934883885868, - "normalized_score": 82.69934883885867 - }, - "bbh": { - "name": "BBH", - "value": 0.6510142192324059, - "normalized_score": 50.248143037694966 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5392749244712991, - "normalized_score": 53.92749244712991 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.41672916666666665, - "normalized_score": 11.491145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5076462765957447, - "normalized_score": 45.29403073286053 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "marcuscedricridia/Cheng-2-v1.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 1.6672849363315172 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST_bfloat16_3f9957f0c0812e781ce27ba6372ba5f1a1b88143_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST", - "sha": "3f9957f0c0812e781ce27ba6372ba5f1a1b88143", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.66195961349608, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7488330961847898, - "normalized_score": 74.88330961847896 - }, - "bbh": { - "name": "BBH", - "value": 0.5458495423775734, - "normalized_score": 35.35007344104428 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4244712990936556, - "normalized_score": 42.44712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.3913645833333333, - "normalized_score": 6.987239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41630651595744683, - "normalized_score": 35.145168439716315 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-MST (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6958530315799107 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1_bfloat16_61c74d22df2900512a70e6320446d33c895a3706_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", - "sha": "61c74d22df2900512a70e6320446d33c895a3706", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.229152099424084, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7444868504457063, - "normalized_score": 74.44868504457062 - }, - "bbh": { - "name": "BBH", - "value": 0.555919540267728, - "normalized_score": 36.82982562300872 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4652567975830816, - "normalized_score": 46.52567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4073333333333333, - "normalized_score": 9.41666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.429936835106383, - "normalized_score": 36.65964834515366 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6743135699592855 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3_bfloat16_4043f735c65a687241b74c8d9e62783376ace3f0_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", - "sha": "4043f735c65a687241b74c8d9e62783376ace3f0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.73029750670216, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.704320092909037, - "normalized_score": 70.4320092909037 - }, - "bbh": { - "name": "BBH", - "value": 0.5516165586639877, - "normalized_score": 36.45190695170704 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47583081570996977, - "normalized_score": 47.583081570996974 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.43105208333333334, - "normalized_score": 13.081510416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44398271276595747, - "normalized_score": 38.22030141843972 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.7067317403547374 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview_bfloat16_3787aba9fef0e1ffd01757d1f3471fc84b948a05_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", - "sha": "3787aba9fef0e1ffd01757d1f3471fc84b948a05", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.12716296399811, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7962439660101863, - "normalized_score": 79.62439660101863 - }, - "bbh": { - "name": "BBH", - "value": 0.5431064770878757, - "normalized_score": 35.32875647048946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37537764350453173, - "normalized_score": 37.53776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4298125, - "normalized_score": 12.7265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43641954787234044, - "normalized_score": 37.37994976359338 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-Preview (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6838750927656837 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M_bfloat16_879c76f96c960efcd5db8ef1a98379319e69a5c3_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", - "sha": "879c76f96c960efcd5db8ef1a98379319e69a5c3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.31791936262957, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7727884236049238, - "normalized_score": 77.27884236049238 - }, - "bbh": { - "name": "BBH", - "value": 0.5295123017150106, - "normalized_score": 32.681799063269544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3368580060422961, - "normalized_score": 33.68580060422961 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.44327083333333334, - "normalized_score": 14.942187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4134807180851064, - "normalized_score": 34.83119089834515 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.7061472523162416 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1_bfloat16_ad2151f84cc141aa20a85542308b3a14add5f1fa_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", - "sha": "ad2151f84cc141aa20a85542308b3a14add5f1fa", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.47835434931775, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889499860370484, - "normalized_score": 78.89499860370483 - }, - "bbh": { - "name": "BBH", - "value": 0.5383575636307666, - "normalized_score": 34.40003587057091 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4380664652567976, - "normalized_score": 43.80664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4179375, - "normalized_score": 11.075520833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4227061170212766, - "normalized_score": 35.85623522458629 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6900126630786094 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2_bfloat16_f612046f36daa8afd7a1bd396e70d3869dda8638_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", - "sha": "f612046f36daa8afd7a1bd396e70d3869dda8638", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.53808932923916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7865020368178655, - "normalized_score": 78.65020368178655 - }, - "bbh": { - "name": "BBH", - "value": 0.540250407222091, - "normalized_score": 34.7406275243418 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44033232628398794, - "normalized_score": 44.03323262839879 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.421875, - "normalized_score": 11.667708333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4197140957446808, - "normalized_score": 35.52378841607564 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6894710937811266 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3_bfloat16_22714b419eb73afe1fb8016a67240634ddc99897_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", - "sha": "22714b419eb73afe1fb8016a67240634ddc99897", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.93143365739242, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7856276900845313, - "normalized_score": 78.56276900845313 - }, - "bbh": { - "name": "BBH", - "value": 0.5326893189699237, - "normalized_score": 33.96881934058692 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3323262839879154, - "normalized_score": 33.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.42463541666666665, - "normalized_score": 12.379427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43450797872340424, - "normalized_score": 37.167553191489354 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.6526124113262405 - } - }, - { - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4_bfloat16_dd001376afe7f7c98c584d201bedcc4ad234ad7e_True", - "model": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", - "sha": "dd001376afe7f7c98c584d201bedcc4ad234ad7e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.18387958085206, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7834545672149895, - "normalized_score": 78.34545672149895 - }, - "bbh": { - "name": "BBH", - "value": 0.54229983590397, - "normalized_score": 35.05829745506504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4259818731117825, - "normalized_score": 42.59818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4231770833333333, - "normalized_score": 11.430468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4195478723404255, - "normalized_score": 35.50531914893617 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6864942532990541 - } - }, - { - "id": "marcuscedricridia/Qwen2.5-7B-Preview_bfloat16_935631778e482a336c34b15fdede64d2571685f0_True", - "model": { - "name": "marcuscedricridia/Qwen2.5-7B-Preview", - "sha": "935631778e482a336c34b15fdede64d2571685f0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.62064174853305, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7679423928509688, - "normalized_score": 76.79423928509688 - }, - "bbh": { - "name": "BBH", - "value": 0.5359781834039953, - "normalized_score": 33.85996689538109 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34441087613293053, - "normalized_score": 34.44108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.41403125, - "normalized_score": 10.587239583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42578125, - "normalized_score": 36.19791666666667 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "marcuscedricridia/Qwen2.5-7B-Preview (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.7067649389855902 - } - }, - { - "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview_bfloat16_9c9cadc3c25e04502821433e49b17502551de37e_True", - "model": { - "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", - "sha": "9c9cadc3c25e04502821433e49b17502551de37e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.147687224987916, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5838696879834395, - "normalized_score": 58.38696879834396 - }, - "bbh": { - "name": "BBH", - "value": 0.537136379549371, - "normalized_score": 34.76337530701511 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19259818731117825, - "normalized_score": 19.259818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.40463541666666664, - "normalized_score": 9.246093750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37982047872340424, - "normalized_score": 31.091164302600465 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "marcuscedricridia/Yell-Qwen2.5-7B-Preview (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6761295398176718 - } - }, - { - "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1_bfloat16_99739c789f117c83527c3940a236d1741c1fae30_True", - "model": { - "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", - "sha": "99739c789f117c83527c3940a236d1741c1fae30", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.093952629179146, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5757013612769672, - "normalized_score": 57.57013612769673 - }, - "bbh": { - "name": "BBH", - "value": 0.5347734083768815, - "normalized_score": 34.15662850738155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18957703927492447, - "normalized_score": 18.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4059375, - "normalized_score": 9.608854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38314494680851063, - "normalized_score": 31.460549645390074 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.671546849924999 - } - }, - { - "id": "marcuscedricridia/absolute-o1-7b_bfloat16_d11523bb20f692efb61fc72cff79eee70b0ecf0b_True", - "model": { - "name": "marcuscedricridia/absolute-o1-7b", - "sha": "d11523bb20f692efb61fc72cff79eee70b0ecf0b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.52860800635401, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7515558717536137, - "normalized_score": 75.15558717536138 - }, - "bbh": { - "name": "BBH", - "value": 0.5469413884153854, - "normalized_score": 35.655762760822036 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5083081570996979, - "normalized_score": 50.83081570996979 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.4113645833333333, - "normalized_score": 10.32057291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44132313829787234, - "normalized_score": 37.92479314420804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "marcuscedricridia/absolute-o1-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6894055907798708 - } - }, - { - "id": "marcuscedricridia/cursa-o1-7b_bfloat16_5ab72cb7de828a3064d69e05008662161cd25684_True", - "model": { - "name": "marcuscedricridia/cursa-o1-7b", - "sha": "5ab72cb7de828a3064d69e05008662161cd25684", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.70965718904586, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7628215357473725, - "normalized_score": 76.28215357473725 - }, - "bbh": { - "name": "BBH", - "value": 0.5465860023973769, - "normalized_score": 35.70429131124187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4954682779456193, - "normalized_score": 49.546827794561935 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4300625, - "normalized_score": 13.42447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4392453457446808, - "normalized_score": 37.69392730496453 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "marcuscedricridia/cursa-o1-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6845874324660594 - } - }, - { - "id": "marcuscedricridia/cursa-o1-7b-2-28-2025_bfloat16_50faf0f516e0afa0530daa7813366a147149b079_True", - "model": { - "name": "marcuscedricridia/cursa-o1-7b-2-28-2025", - "sha": "50faf0f516e0afa0530daa7813366a147149b079", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.843864079561996, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7467098409996586, - "normalized_score": 74.67098409996586 - }, - "bbh": { - "name": "BBH", - "value": 0.538413713363387, - "normalized_score": 34.668302854132016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4811178247734139, - "normalized_score": 48.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.42733333333333334, - "normalized_score": 12.616666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4365026595744681, - "normalized_score": 37.38918439716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "marcuscedricridia/cursa-o1-7b-2-28-2025 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6999771631262225 - } - }, - { - "id": "marcuscedricridia/cursa-o1-7b-v1.1_bfloat16_7932d91ce03991a94866f4d7291c9866b3733906_True", - "model": { - "name": "marcuscedricridia/cursa-o1-7b-v1.1", - "sha": "7932d91ce03991a94866f4d7291c9866b3733906", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.502776909004204, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527549125209998, - "normalized_score": 75.27549125209998 - }, - "bbh": { - "name": "BBH", - "value": 0.5492557305346194, - "normalized_score": 36.06689595024021 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4984894259818731, - "normalized_score": 49.848942598187314 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.425875, - "normalized_score": 12.534375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43916223404255317, - "normalized_score": 37.6846926713948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/cursa-o1-7b-v1.1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.613, - "co2_cost": 0.6935658180791084 - } - }, - { - "id": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false_bfloat16_96735a68663bcc13ed471697f9b1ade1551312d0_True", - "model": { - "name": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", - "sha": "96735a68663bcc13ed471697f9b1ade1551312d0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.79978502425582, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7615726272955757, - "normalized_score": 76.15726272955757 - }, - "bbh": { - "name": "BBH", - "value": 0.5492349810703803, - "normalized_score": 36.12772971752309 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49924471299093653, - "normalized_score": 49.92447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4272708333333333, - "normalized_score": 12.808854166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4435671542553192, - "normalized_score": 38.17412825059102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6751990549019224 - } - }, - { - "id": "marcuscedricridia/cursor-o1-7b_bfloat16_e2deb6ec40fe3aaa52d8fe30c9ef2123bd8b2abd_True", - "model": { - "name": "marcuscedricridia/cursor-o1-7b", - "sha": "e2deb6ec40fe3aaa52d8fe30c9ef2123bd8b2abd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.571431098235255, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4106880853912065, - "normalized_score": 41.068808539120646 - }, - "bbh": { - "name": "BBH", - "value": 0.5007453242508472, - "normalized_score": 28.820714356242792 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.41009375, - "normalized_score": 10.261718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32513297872340424, - "normalized_score": 25.014775413711583 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.7800219303340636 - } - }, - { - "id": "marcuscedricridia/cursorr-o1.2-7b_bfloat16_7aa4af42a5100355d36120451a9b71c11b397097_True", - "model": { - "name": "marcuscedricridia/cursorr-o1.2-7b", - "sha": "7aa4af42a5100355d36120451a9b71c11b397097", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.069130519573485, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1659895743294459, - "normalized_score": 16.598957432944587 - }, - "bbh": { - "name": "BBH", - "value": 0.3068134113454804, - "normalized_score": 3.4654936219477315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.35384375, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10804521276595745, - "normalized_score": 0.8939125295508273 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6764200040790131 - } - }, - { - "id": "marcuscedricridia/etr1o-explicit-v1.1_bfloat16_5d4d240f0b0abfe4566efde3ec4843a3ca1c8b31_True", - "model": { - "name": "marcuscedricridia/etr1o-explicit-v1.1", - "sha": "5d4d240f0b0abfe4566efde3ec4843a3ca1c8b31", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.109189405741137, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28803906966847964, - "normalized_score": 28.80390696684797 - }, - "bbh": { - "name": "BBH", - "value": 0.31316553135589525, - "normalized_score": 4.190313048867292 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4110520833333333, - "normalized_score": 9.348177083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11951462765957446, - "normalized_score": 2.1682919621749397 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.8023332738780068 - } - }, - { - "id": "marcuscedricridia/etr1o-explicit-v1.2_bfloat16_b00fd4311767963b1e3b12c1f808ebfb428da125_True", - "model": { - "name": "marcuscedricridia/etr1o-explicit-v1.2", - "sha": "b00fd4311767963b1e3b12c1f808ebfb428da125", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.805786982236451, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1504020443534267, - "normalized_score": 15.04020443534267 - }, - "bbh": { - "name": "BBH", - "value": 0.29497368605886115, - "normalized_score": 1.9825717963953267 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.40311458333333333, - "normalized_score": 8.955989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11261635638297872, - "normalized_score": 1.4018173758865236 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-03", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6925911936091056 - } - }, - { - "id": "marcuscedricridia/etr1o-v1.1_bfloat16_6aaf3b43c713ac0964528705bd771b0f128c2c4d_True", - "model": { - "name": "marcuscedricridia/etr1o-v1.1", - "sha": "6aaf3b43c713ac0964528705bd771b0f128c2c4d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.967503215885146, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15971954414287426, - "normalized_score": 15.971954414287424 - }, - "bbh": { - "name": "BBH", - "value": 0.31003625778742805, - "normalized_score": 3.321013560260136 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.40165625, - "normalized_score": 7.873697916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11569148936170212, - "normalized_score": 1.7434988179669018 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "marcuscedricridia/etr1o-v1.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.4274969923485479 - } - }, - { - "id": "marcuscedricridia/etr1o-v1.2_bfloat16_835c7a4f1cd397e72fa7e90d56c0bc02377d0722_False", - "model": { - "name": "marcuscedricridia/etr1o-v1.2", - "sha": "835c7a4f1cd397e72fa7e90d56c0bc02377d0722", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.91280232445549, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7286998497320443, - "normalized_score": 72.86998497320442 - }, - "bbh": { - "name": "BBH", - "value": 0.6349035922791185, - "normalized_score": 47.700911102136416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35876132930513593, - "normalized_score": 35.87613293051359 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.4714479166666667, - "normalized_score": 18.297656250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5315824468085106, - "normalized_score": 47.953605200945624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "marcuscedricridia/etr1o-v1.2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.7821409347839607 - } - }, - { - "id": "marcuscedricridia/fan-o1-7b_bfloat16_f687a7029b01b79314dce6b31098383f8838c8b9_True", - "model": { - "name": "marcuscedricridia/fan-o1-7b", - "sha": "f687a7029b01b79314dce6b31098383f8838c8b9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.506029356913686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4455588948434598, - "normalized_score": 44.55588948434598 - }, - "bbh": { - "name": "BBH", - "value": 0.4849058892394324, - "normalized_score": 26.546331472502214 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16163141993957703, - "normalized_score": 16.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.3833645833333333, - "normalized_score": 5.920572916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3273769946808511, - "normalized_score": 25.26411052009456 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6796565802562364 - } - }, - { - "id": "marcuscedricridia/olmner-7b_bfloat16_9c7bd318b35ab3df4ca91b5d3a3434f81680034f_True", - "model": { - "name": "marcuscedricridia/olmner-7b", - "sha": "9c7bd318b35ab3df4ca91b5d3a3434f81680034f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.56417304972707, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7253775537795273, - "normalized_score": 72.53775537795272 - }, - "bbh": { - "name": "BBH", - "value": 0.5471591805569388, - "normalized_score": 35.74684409479422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46299093655589124, - "normalized_score": 46.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.43796875, - "normalized_score": 14.31276041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4309341755319149, - "normalized_score": 36.77046394799054 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6545304247150346 - } - }, - { - "id": "marcuscedricridia/olmner-della-7b_bfloat16_6e7d48350df281683fc54afe035a7984fc561306_True", - "model": { - "name": "marcuscedricridia/olmner-della-7b", - "sha": "6e7d48350df281683fc54afe035a7984fc561306", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.354392479860905, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7636958824807067, - "normalized_score": 76.36958824807067 - }, - "bbh": { - "name": "BBH", - "value": 0.5491231851969524, - "normalized_score": 35.896041596647144 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4962235649546828, - "normalized_score": 49.62235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4207604166666667, - "normalized_score": 11.795052083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43858045212765956, - "normalized_score": 37.62005023640661 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "marcuscedricridia/olmner-della-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 0.651305802502441 - } - }, - { - "id": "marcuscedricridia/olmner-o1-7b_bfloat16_b98b6ccf2b20346e760ab3db77d518ce92adf5fb_True", - "model": { - "name": "marcuscedricridia/olmner-o1-7b", - "sha": "b98b6ccf2b20346e760ab3db77d518ce92adf5fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.292532643734724, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527549125209998, - "normalized_score": 75.27549125209998 - }, - "bbh": { - "name": "BBH", - "value": 0.5480873056178129, - "normalized_score": 35.68672724705917 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49244712990936557, - "normalized_score": 49.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.42990625, - "normalized_score": 13.104947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43858045212765956, - "normalized_score": 37.62005023640661 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6713850918516362 - } - }, - { - "id": "marcuscedricridia/olmner-sbr-7b_bfloat16_8af017146aef2673138387020d311b19b6c60ef1_True", - "model": { - "name": "marcuscedricridia/olmner-sbr-7b", - "sha": "8af017146aef2673138387020d311b19b6c60ef1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.29850278097941, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7600488924941378, - "normalized_score": 76.00488924941378 - }, - "bbh": { - "name": "BBH", - "value": 0.5461642048146724, - "normalized_score": 35.71538595353347 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4947129909365559, - "normalized_score": 49.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.4153645833333333, - "normalized_score": 10.853906250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4412400265957447, - "normalized_score": 37.9155585106383 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "marcuscedricridia/olmner-sbr-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6974451975798761 - } - }, - { - "id": "marcuscedricridia/post-cursa-o1_bfloat16_ec7e81808cb91d95ebbcc6e35bed6aaf01c0a5bb_True", - "model": { - "name": "marcuscedricridia/post-cursa-o1", - "sha": "ec7e81808cb91d95ebbcc6e35bed6aaf01c0a5bb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.661699398945466, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7628215357473725, - "normalized_score": 76.28215357473725 - }, - "bbh": { - "name": "BBH", - "value": 0.5479692437233474, - "normalized_score": 35.82728921979409 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4871601208459215, - "normalized_score": 48.716012084592144 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.43514583333333334, - "normalized_score": 13.859895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4360871010638298, - "normalized_score": 37.34301122931442 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/post-cursa-o1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6909705612974125 - } - }, - { - "id": "marcuscedricridia/pre-cursa-o1_bfloat16_2912216a41a29bb0cbc2f0234f26c8bd4aeaed74_True", - "model": { - "name": "marcuscedricridia/pre-cursa-o1", - "sha": "2912216a41a29bb0cbc2f0234f26c8bd4aeaed74", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.45904597530088, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.740889728143548, - "normalized_score": 74.0889728143548 - }, - "bbh": { - "name": "BBH", - "value": 0.5461688442794247, - "normalized_score": 35.72155461706918 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5037764350453172, - "normalized_score": 50.37764350453172 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.42596875, - "normalized_score": 12.579427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4424035904255319, - "normalized_score": 38.04484338061466 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/pre-cursa-o1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.7019430148436578 - } - }, - { - "id": "marcuscedricridia/pre-cursa-o1-v1.2_bfloat16_f1db05705595b63739a88ef8ca338707e0bbafc6_True", - "model": { - "name": "marcuscedricridia/pre-cursa-o1-v1.2", - "sha": "f1db05705595b63739a88ef8ca338707e0bbafc6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.886278898117105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7548781677061308, - "normalized_score": 75.48781677061308 - }, - "bbh": { - "name": "BBH", - "value": 0.5486788313377599, - "normalized_score": 36.1178124071646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.506797583081571, - "normalized_score": 50.6797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.42723958333333334, - "normalized_score": 12.838281250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4402426861702128, - "normalized_score": 37.80474290780142 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/pre-cursa-o1-v1.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6879118341708577 - } - }, - { - "id": "marcuscedricridia/pre-cursa-o1-v1.3_bfloat16_f33523f9bc4c3524381bede36b8d4d422c7f9cad_True", - "model": { - "name": "marcuscedricridia/pre-cursa-o1-v1.3", - "sha": "f33523f9bc4c3524381bede36b8d4d422c7f9cad", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.71219961022997, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7506815250202795, - "normalized_score": 75.06815250202794 - }, - "bbh": { - "name": "BBH", - "value": 0.5454519705653261, - "normalized_score": 35.4685970258894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5075528700906344, - "normalized_score": 50.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.42714583333333334, - "normalized_score": 12.593229166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4419880319148936, - "normalized_score": 37.99867021276595 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/pre-cursa-o1-v1.3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6831272927020302 - } - }, - { - "id": "marcuscedricridia/pre-cursa-o1-v1.4_bfloat16_636558c7a765cc4356a79b51ff479a50dd15fc14_True", - "model": { - "name": "marcuscedricridia/pre-cursa-o1-v1.4", - "sha": "636558c7a765cc4356a79b51ff479a50dd15fc14", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.264047353976316, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.748783228500379, - "normalized_score": 74.8783228500379 - }, - "bbh": { - "name": "BBH", - "value": 0.5493014138981462, - "normalized_score": 35.98044535759805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48338368580060426, - "normalized_score": 48.338368580060425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.42851041666666667, - "normalized_score": 12.830468750000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4435671542553192, - "normalized_score": 38.17412825059102 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/pre-cursa-o1-v1.4 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.691780936716261 - } - }, - { - "id": "marcuscedricridia/pre-cursa-o1-v1.6_bfloat16_044f5202846d83e8cf352ae999e087661ff31665_True", - "model": { - "name": "marcuscedricridia/pre-cursa-o1-v1.6", - "sha": "044f5202846d83e8cf352ae999e087661ff31665", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.795179506465466, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527549125209998, - "normalized_score": 75.27549125209998 - }, - "bbh": { - "name": "BBH", - "value": 0.5473342320067097, - "normalized_score": 35.92091323812233 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5, - "normalized_score": 50.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4233645833333333, - "normalized_score": 12.253906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44132313829787234, - "normalized_score": 37.92479314420804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "marcuscedricridia/pre-cursa-o1-v1.6 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.613, - "co2_cost": 0.6700119516121076 - } - }, - { - "id": "marcuscedricridia/r1o-et_bfloat16_c0393cc3b2e0f32383a0c4a0cec27eb2eb36ef7e_True", - "model": { - "name": "marcuscedricridia/r1o-et", - "sha": "c0393cc3b2e0f32383a0c4a0cec27eb2eb36ef7e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.343384030894796, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3596800932636516, - "normalized_score": 35.968009326365156 - }, - "bbh": { - "name": "BBH", - "value": 0.42092007019831174, - "normalized_score": 18.9129347129545 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07930513595166164, - "normalized_score": 7.930513595166164 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3579375, - "normalized_score": 2.675520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2579787234042553, - "normalized_score": 17.5531914893617 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-02", - "generation": 1, - "base_model": "marcuscedricridia/r1o-et (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6715149105937247 - } - }, - { - "id": "marcuscedricridia/sbr-o1-7b_bfloat16_8e92f7967b8dc6e3770834bbcabc031ba3e8b13d_True", - "model": { - "name": "marcuscedricridia/sbr-o1-7b", - "sha": "8e92f7967b8dc6e3770834bbcabc031ba3e8b13d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.69314046938948, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7454609325478618, - "normalized_score": 74.54609325478619 - }, - "bbh": { - "name": "BBH", - "value": 0.5478826565229475, - "normalized_score": 35.77966356053602 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4984894259818731, - "normalized_score": 49.848942598187314 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.4404166666666667, - "normalized_score": 14.652083333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43550531914893614, - "normalized_score": 37.278368794326234 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "marcuscedricridia/sbr-o1-7b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6722542404365971 - } - }, - { - "id": "marcuscedricridia/stray-r1o-et_bfloat16_7930bd53216aaad96325543fc64dc4a2917ebfce_True", - "model": { - "name": "marcuscedricridia/stray-r1o-et", - "sha": "7930bd53216aaad96325543fc64dc4a2917ebfce", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.1276666327892615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15622215720953736, - "normalized_score": 15.622215720953736 - }, - "bbh": { - "name": "BBH", - "value": 0.2967459956151434, - "normalized_score": 2.6446684279500334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.4085729166666667, - "normalized_score": 9.438281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.109375, - "normalized_score": 1.041666666666666 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "marcuscedricridia/stray-r1o-et (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.7094334830006445 - } - }, - { - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3_bfloat16_60c5853d376d4b62b19dd4c4741224d0246ec5b4_False", - "model": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", - "sha": "60c5853d376d4b62b19dd4c4741224d0246ec5b4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.224121473565234, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18732186154957736, - "normalized_score": 18.732186154957734 - }, - "bbh": { - "name": "BBH", - "value": 0.3239117424825444, - "normalized_score": 7.9185120409032566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.37520833333333337, - "normalized_score": 4.601041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17195811170212766, - "normalized_score": 7.99534574468085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.7035439123483884 - } - }, - { - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis_bfloat16_3fd229bcc3b4d2502ed7f3bdd48ccb5c97e83212_False", - "model": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", - "sha": "3fd229bcc3b4d2502ed7f3bdd48ccb5c97e83212", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.224121473565234, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18732186154957736, - "normalized_score": 18.732186154957734 - }, - "bbh": { - "name": "BBH", - "value": 0.3239117424825444, - "normalized_score": 7.9185120409032566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.37520833333333337, - "normalized_score": 4.601041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17195811170212766, - "normalized_score": 7.99534574468085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.7290301608684984 - } - }, - { - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis_bfloat16_455945ed4318bbeae008a253f877f56a68291b8b_False", - "model": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", - "sha": "455945ed4318bbeae008a253f877f56a68291b8b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.224121473565234, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18732186154957736, - "normalized_score": 18.732186154957734 - }, - "bbh": { - "name": "BBH", - "value": 0.3239117424825444, - "normalized_score": 7.9185120409032566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.37520833333333337, - "normalized_score": 4.601041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17195811170212766, - "normalized_score": 7.99534574468085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 1.7281217213354472 - } - }, - { - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis_float16_dd86c3d7f77748a0ba18d911ceb93358a69ce160_False", - "model": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", - "sha": "dd86c3d7f77748a0ba18d911ceb93358a69ce160", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.257344997193762, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18824607596732226, - "normalized_score": 18.824607596732225 - }, - "bbh": { - "name": "BBH", - "value": 0.32327887380902803, - "normalized_score": 8.079577103291848 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.3684791666666667, - "normalized_score": 4.1265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17204122340425532, - "normalized_score": 8.00458037825059 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-31", - "generation": 0, - "base_model": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 0.494, - "co2_cost": 1.8201847180981525 - } - }, - { - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis_bfloat16_7a9d848188a674302d64a865786d4508be19571a_True", - "model": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "sha": "7a9d848188a674302d64a865786d4508be19571a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 5.9763915232152165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16521496296493304, - "normalized_score": 16.521496296493304 - }, - "bbh": { - "name": "BBH", - "value": 0.30237295164613204, - "normalized_score": 3.0833518776266557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.42730208333333336, - "normalized_score": 12.179427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1116190159574468, - "normalized_score": 1.2910017730496441 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 2.1979019569021543 - } - }, - { - "id": "mattshumer/Reflection-Llama-3.1-70B_float16_458962ed801fac4eadd01a91a2029a3a82f4cd84_True", - "model": { - "name": "mattshumer/Reflection-Llama-3.1-70B", - "sha": "458962ed801fac4eadd01a91a2029a3a82f4cd84", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.392555308681647, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.00452133671990319, - "normalized_score": 0.45213367199031906 - }, - "bbh": { - "name": "BBH", - "value": 0.645001286484342, - "normalized_score": 47.86623716674634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.45765625000000004, - "normalized_score": 17.540364583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4955119680851064, - "normalized_score": 43.94577423167848 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-05", - "submission_date": "2024-12-25", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 1714, - "params_billions": 70.554, - "co2_cost": 39.03104120119348 - } - }, - { - "id": "mattshumer/ref_70_e3_float16_5d2d9dbb9e0bf61879255f63f1b787296fe524cc_True", - "model": { - "name": "mattshumer/ref_70_e3", - "sha": "5d2d9dbb9e0bf61879255f63f1b787296fe524cc", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.395599658838286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6294321289733462, - "normalized_score": 62.943212897334625 - }, - "bbh": { - "name": "BBH", - "value": 0.6500839481104265, - "normalized_score": 49.27446660003019 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2794561933534743, - "normalized_score": 27.945619335347434 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.4327604166666667, - "normalized_score": 12.995052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5302526595744681, - "normalized_score": 47.80585106382979 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-08", - "submission_date": "2024-09-08", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 58, - "params_billions": 70.554, - "co2_cost": 64.10397419494268 - } - }, - { - "id": "maywell/Qwen2-7B-Multilingual-RP_bfloat16_487e8f0498419e4d1188f661dbb63bd629be4638_True", - "model": { - "name": "maywell/Qwen2-7B-Multilingual-RP", - "sha": "487e8f0498419e4d1188f661dbb63bd629be4638", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.450878693391584, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4347176602525743, - "normalized_score": 43.47176602525742 - }, - "bbh": { - "name": "BBH", - "value": 0.5062058680861069, - "normalized_score": 30.54356147647468 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3695625, - "normalized_score": 6.228645833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3858876329787234, - "normalized_score": 31.765292553191493 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-24", - "submission_date": "2024-09-05", - "generation": 0, - "base_model": "maywell/Qwen2-7B-Multilingual-RP", - "hub_license": "apache-2.0", - "hub_hearts": 55, - "params_billions": 7.616, - "co2_cost": 1.918825671514629 - } - }, - { - "id": "meditsolutions/Llama-3.1-MedIT-SUN-8B_bfloat16_0c11abbaa40e76b538b8c0f9c50e965078999087_True", - "model": { - "name": "meditsolutions/Llama-3.1-MedIT-SUN-8B", - "sha": "0c11abbaa40e76b538b8c0f9c50e965078999087", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.19415971775715, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7837293935646308, - "normalized_score": 78.37293935646308 - }, - "bbh": { - "name": "BBH", - "value": 0.5186924904597405, - "normalized_score": 32.001650567550215 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20921450151057402, - "normalized_score": 20.921450151057403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.40562499999999996, - "normalized_score": 9.636458333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3916223404255319, - "normalized_score": 32.402482269503544 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "meditsolutions/Llama-3.1-MedIT-SUN-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4261000691308154 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-1B-Instruct_bfloat16_538477c528ecd80f9537b0d4ea730b7b9b529115_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-1B-Instruct", - "sha": "538477c528ecd80f9537b0d4ea730b7b9b529115", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaMedITForCausalLM", - "average_score": 15.524297116936125, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6412973133507981, - "normalized_score": 64.12973133507981 - }, - "bbh": { - "name": "BBH", - "value": 0.34738999022447486, - "normalized_score": 9.183738602498957 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35136458333333337, - "normalized_score": 4.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17810837765957446, - "normalized_score": 8.678708628841607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-27", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 1.498, - "co2_cost": 0.7034879137518993 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-1B-chat_bfloat16_a67791cfc31d09c3e96bd8c62a386f6107378087_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-1B-chat", - "sha": "a67791cfc31d09c3e96bd8c62a386f6107378087", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.641365564259702, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5481743994822625, - "normalized_score": 54.81743994822626 - }, - "bbh": { - "name": "BBH", - "value": 0.35144575516411386, - "normalized_score": 8.690237956315015 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3249166666666667, - "normalized_score": 1.0479166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18375997340425532, - "normalized_score": 9.306663711583923 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-03", - "submission_date": "2024-11-07", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-1B-chat (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 1.498, - "co2_cost": 1.0954387327649717 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000_bfloat16_1300885555ca8bbed20a57cf0ec9f7ae014200c3_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", - "sha": "1300885555ca8bbed20a57cf0ec9f7ae014200c3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.143484964590147, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28139447776344545, - "normalized_score": 28.139447776344547 - }, - "bbh": { - "name": "BBH", - "value": 0.3017752699243885, - "normalized_score": 2.8953053502640427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.41033333333333327, - "normalized_score": 8.491666666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1344747340425532, - "normalized_score": 3.830526004728132 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-10-04", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 2.209, - "co2_cost": 1.6263405267461448 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800_bfloat16_ef65f05f577a69a1992349c8d33c96cd099844f7_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", - "sha": "ef65f05f577a69a1992349c8d33c96cd099844f7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.193102548827737, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25009530268576263, - "normalized_score": 25.009530268576263 - }, - "bbh": { - "name": "BBH", - "value": 0.3161124673749052, - "normalized_score": 5.466179719646344 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4022395833333334, - "normalized_score": 8.846614583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13572140957446807, - "normalized_score": 3.9690455082742293 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 2.209, - "co2_cost": 1.6310659932660034 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0_bfloat16_b8a31c62ab4acbd4c645fd882d899c4ec7280677_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", - "sha": "b8a31c62ab4acbd4c645fd882d899c4ec7280677", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.31712952335647, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5636865738462834, - "normalized_score": 56.36865738462834 - }, - "bbh": { - "name": "BBH", - "value": 0.3390826682107771, - "normalized_score": 7.211667793228333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.32094791666666667, - "normalized_score": 3.0184895833333347 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15425531914893617, - "normalized_score": 6.02836879432624 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 2.472, - "co2_cost": 6.0196429945678105 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-2.5B-chat_bfloat16_2bd68a18c0f7984f430acbc2efad76344177aba0_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-2.5B-chat", - "sha": "2bd68a18c0f7984f430acbc2efad76344177aba0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.98771014319157, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.560414145578177, - "normalized_score": 56.0414145578177 - }, - "bbh": { - "name": "BBH", - "value": 0.3574734302161124, - "normalized_score": 9.40909318881213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3155208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1813497340425532, - "normalized_score": 9.038859338061465 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-27", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-2.5B-chat (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 2.472, - "co2_cost": 2.9139059464793857 - } - }, - { - "id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct_bfloat16_44d22fc1c0a85f880e75397b7fd3d0c6c1408f57_True", - "model": { - "name": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", - "sha": "44d22fc1c0a85f880e75397b7fd3d0c6c1408f57", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.901863518867478, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6826631116548536, - "normalized_score": 68.26631116548536 - }, - "bbh": { - "name": "BBH", - "value": 0.3507731670753292, - "normalized_score": 9.529081854604923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3593645833333334, - "normalized_score": 3.7872395833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16871675531914893, - "normalized_score": 7.635195035460991 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-11", - "generation": 1, - "base_model": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.498, - "co2_cost": 0.7079900426911147 - } - }, - { - "id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune_bfloat16_a0ffd0cd00cab2245c1f0edcef4d1d8ead4c6d6e_True", - "model": { - "name": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", - "sha": "a0ffd0cd00cab2245c1f0edcef4d1d8ead4c6d6e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.528174671948811, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36550020611976225, - "normalized_score": 36.55002061197622 - }, - "bbh": { - "name": "BBH", - "value": 0.4034845834509661, - "normalized_score": 16.138425927069104 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.42534374999999996, - "normalized_score": 11.56796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21899933510638298, - "normalized_score": 13.222148345153665 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-13", - "generation": 1, - "base_model": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.646, - "co2_cost": 2.1181999813821943 - } - }, - { - "id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge_bfloat16_2db5e8871fb3be7e658e3bc6e2885d26b891b8b8_True", - "model": { - "name": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", - "sha": "2db5e8871fb3be7e658e3bc6e2885d26b891b8b8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.55070593519018, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5814217387642566, - "normalized_score": 58.14217387642566 - }, - "bbh": { - "name": "BBH", - "value": 0.5671722290858499, - "normalized_score": 38.0234352820579 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20770392749244712, - "normalized_score": 20.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.43845833333333334, - "normalized_score": 13.840624999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3499833776595745, - "normalized_score": 27.77593085106383 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 11.169, - "co2_cost": 1.6854816575360465 - } - }, - { - "id": "meditsolutions/MedIT-Mesh-3B-Instruct_bfloat16_469d1a58f7747c3d456b3308b5a7042df4ab49e3_True", - "model": { - "name": "meditsolutions/MedIT-Mesh-3B-Instruct", - "sha": "469d1a58f7747c3d456b3308b5a7042df4ab49e3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 28.318227712460025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5814217387642566, - "normalized_score": 58.14217387642566 - }, - "bbh": { - "name": "BBH", - "value": 0.5575523356865378, - "normalized_score": 37.54705419374355 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4047604166666667, - "normalized_score": 10.595052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4011801861702128, - "normalized_score": 33.46446513002365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-01", - "submission_date": "2024-11-01", - "generation": 1, - "base_model": "meditsolutions/MedIT-Mesh-3B-Instruct (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 3.821, - "co2_cost": 1.0609726975903322 - } - }, - { - "id": "meditsolutions/SmolLM2-MedIT-Upscale-2B_bfloat16_5696c9ea7cbdee0f8ad1845f5a2dc7309f376143_True", - "model": { - "name": "meditsolutions/SmolLM2-MedIT-Upscale-2B", - "sha": "5696c9ea7cbdee0f8ad1845f5a2dc7309f376143", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.922534280948163, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6429207835210575, - "normalized_score": 64.29207835210575 - }, - "bbh": { - "name": "BBH", - "value": 0.3551122445928012, - "normalized_score": 10.514326138245686 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.33136458333333335, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19705784574468085, - "normalized_score": 10.784205082742316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-02", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "meditsolutions/SmolLM2-MedIT-Upscale-2B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 2.114, - "co2_cost": 0.6722832325143988 - } - }, - { - "id": "meetkai/functionary-small-v3.1_bfloat16_8e43bc1d2e259b91799e704c410a95b8ca458121_True", - "model": { - "name": "meetkai/functionary-small-v3.1", - "sha": "8e43bc1d2e259b91799e704c410a95b8ca458121", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.08333552224185, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6274584768414474, - "normalized_score": 62.74584768414474 - }, - "bbh": { - "name": "BBH", - "value": 0.4981781042779377, - "normalized_score": 28.616314665836143 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15709969788519637, - "normalized_score": 15.709969788519636 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3833645833333333, - "normalized_score": 6.1872395833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33485704787234044, - "normalized_score": 26.09522754137116 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-26", - "submission_date": "2024-11-10", - "generation": 0, - "base_model": "meetkai/functionary-small-v3.1", - "hub_license": "mit", - "hub_hearts": 18, - "params_billions": 8.03, - "co2_cost": 1.4087393985659924 - } - }, - { - "id": "meraGPT/mera-mix-4x7B_bfloat16_09d965c5ef9b66ce419986027e03a915cb869e43_True", - "model": { - "name": "meraGPT/mera-mix-4x7B", - "sha": "09d965c5ef9b66ce419986027e03a915cb869e43", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 17.854958732939675, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4831779677921249, - "normalized_score": 48.317796779212486 - }, - "bbh": { - "name": "BBH", - "value": 0.40189899163661713, - "normalized_score": 17.48643895465503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.40565625, - "normalized_score": 9.273697916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27476728723404253, - "normalized_score": 19.418587470449168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-13", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "meraGPT/mera-mix-4x7B", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 24.154, - "co2_cost": 3.3288013261759777 - } - }, - { - "id": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B_float16_b4f2b833913f2f7b3ef009b67b47463f10c87e7d_False", - "model": { - "name": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "sha": "b4f2b833913f2f7b3ef009b67b47463f10c87e7d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.545574104726295, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49406907006742107, - "normalized_score": 49.40690700674211 - }, - "bbh": { - "name": "BBH", - "value": 0.436971949757697, - "normalized_score": 20.143745533955016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12462235649546828, - "normalized_score": 12.462235649546828 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.36562500000000003, - "normalized_score": 6.969791666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3032746010638298, - "normalized_score": 22.586066784869978 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.5939820838944418 - } - }, - { - "id": "mergekit-community/SuperQwen-2.5-1.5B_bfloat16_40caabd01f4d263eaff180005128019697ce7ad4_True", - "model": { - "name": "mergekit-community/SuperQwen-2.5-1.5B", - "sha": "40caabd01f4d263eaff180005128019697ce7ad4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.2597119953830176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1336409615376091, - "normalized_score": 13.36409615376091 - }, - "bbh": { - "name": "BBH", - "value": 0.2906897601443365, - "normalized_score": 1.735104550745163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.3355208333333333, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10746343085106383, - "normalized_score": 0.8292700945626471 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "mergekit-community/SuperQwen-2.5-1.5B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 1.777, - "co2_cost": 1.1848733045334847 - } - }, - { - "id": "mergekit-community/VirtuosoSmall-InstructModelStock_bfloat16_4ac90913a36d0f1b7bcf6ed31561137d1f7b0aa6_False", - "model": { - "name": "mergekit-community/VirtuosoSmall-InstructModelStock", - "sha": "4ac90913a36d0f1b7bcf6ed31561137d1f7b0aa6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.226936795467445, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5237946426592552, - "normalized_score": 52.37946426592552 - }, - "bbh": { - "name": "BBH", - "value": 0.6517899193567194, - "normalized_score": 49.941772258845 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4093655589123867, - "normalized_score": 40.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.4755729166666667, - "normalized_score": 19.31328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5420545212765957, - "normalized_score": 49.11716903073285 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "mergekit-community/VirtuosoSmall-InstructModelStock (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.973855401053847 - } - }, - { - "id": "mergekit-community/diabolic6045_ELN-AOC-CAIN_bfloat16_8c58c8a7139d8002e2acabc66199aad25bb95453_False", - "model": { - "name": "mergekit-community/diabolic6045_ELN-AOC-CAIN", - "sha": "8c58c8a7139d8002e2acabc66199aad25bb95453", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.069643163144771, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0861547361002141, - "normalized_score": 8.615473610021409 - }, - "bbh": { - "name": "BBH", - "value": 0.31256779393862577, - "normalized_score": 4.63001485442662 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.36575, - "normalized_score": 6.052083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11909906914893617, - "normalized_score": 2.1221187943262403 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "mergekit-community/diabolic6045_ELN-AOC-CAIN (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7230044845561019 - } - }, - { - "id": "mergekit-community/mergekit-dare_ties-ajgjgea_bfloat16_13203710422db795468a10e1d9fe623d0759a9da_True", - "model": { - "name": "mergekit-community/mergekit-dare_ties-ajgjgea", - "sha": "13203710422db795468a10e1d9fe623d0759a9da", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.518727290811396, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5263423272472595, - "normalized_score": 52.63423272472595 - }, - "bbh": { - "name": "BBH", - "value": 0.3494703687455365, - "normalized_score": 9.245558662707438 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3289166666666667, - "normalized_score": 2.6479166666666676 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17436835106382978, - "normalized_score": 8.263150118203308 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "mergekit-community/mergekit-dare_ties-ajgjgea (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.6912931762856392 - } - }, - { - "id": "mergekit-community/mergekit-della-zgowfmf_bfloat16_8d99e6b381db8b64944b3dcfb05daa444206782d_False", - "model": { - "name": "mergekit-community/mergekit-della-zgowfmf", - "sha": "8d99e6b381db8b64944b3dcfb05daa444206782d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.278572482000335, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4827535383892516, - "normalized_score": 48.275353838925156 - }, - "bbh": { - "name": "BBH", - "value": 0.6590790528029254, - "normalized_score": 50.99537348773462 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36178247734138974, - "normalized_score": 36.17824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3901006711409396, - "normalized_score": 18.680089485458616 - }, - "musr": { - "name": "MUSR", - "value": 0.4833854166666667, - "normalized_score": 20.489843749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5414727393617021, - "normalized_score": 49.05252659574468 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "mergekit-community/mergekit-della-zgowfmf (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.871068971831803 - } - }, - { - "id": "mergekit-community/mergekit-model_stock-azgztvm_bfloat16_7f63ea96f89147daf909251cd3c1f1a20e005559_False", - "model": { - "name": "mergekit-community/mergekit-model_stock-azgztvm", - "sha": "7f63ea96f89147daf909251cd3c1f1a20e005559", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.37425822937247, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5061592131101034, - "normalized_score": 50.61592131101034 - }, - "bbh": { - "name": "BBH", - "value": 0.6542775546755846, - "normalized_score": 50.29437669451047 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43731117824773413, - "normalized_score": 43.73111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.47300000000000003, - "normalized_score": 19.091666666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5405585106382979, - "normalized_score": 48.95094562647754 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "mergekit-community/mergekit-model_stock-azgztvm (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.067797803780636 - } - }, - { - "id": "mergekit-community/mergekit-slerp-fmrazcr_float16_87305622616c521e66fd48c48fc9b6eeb6287ff8_False", - "model": { - "name": "mergekit-community/mergekit-slerp-fmrazcr", - "sha": "87305622616c521e66fd48c48fc9b6eeb6287ff8", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.697490367242235, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41743241266506204, - "normalized_score": 41.7432412665062 - }, - "bbh": { - "name": "BBH", - "value": 0.5341624678276029, - "normalized_score": 33.65092926552356 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11933534743202417, - "normalized_score": 11.933534743202417 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.41045833333333337, - "normalized_score": 9.840625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3776595744680851, - "normalized_score": 30.851063829787233 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 1, - "base_model": "mergekit-community/mergekit-slerp-fmrazcr (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4609692317722878 - } - }, - { - "id": "mergekit-community/mergekit-ties-rraxdhv_bfloat16_27862d9e4e53426ab3274b316b5af3381c562e6d_True", - "model": { - "name": "mergekit-community/mergekit-ties-rraxdhv", - "sha": "27862d9e4e53426ab3274b316b5af3381c562e6d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.32304915695013, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11230756614671294, - "normalized_score": 11.230756614671295 - }, - "bbh": { - "name": "BBH", - "value": 0.5183590984128971, - "normalized_score": 31.666384922678485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.42019791666666667, - "normalized_score": 10.991406249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39095744680851063, - "normalized_score": 32.328605200945624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "mergekit-community/mergekit-ties-rraxdhv (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 3.8996389501796522 - } - }, - { - "id": "mergekit-community/mergekit-ties-ykqemwr_float16_81ba78a711ee017c3174f0b3cbf2135ec5b45d3d_False", - "model": { - "name": "mergekit-community/mergekit-ties-ykqemwr", - "sha": "81ba78a711ee017c3174f0b3cbf2135ec5b45d3d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.441350549989938, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35995491961329273, - "normalized_score": 35.99549196132927 - }, - "bbh": { - "name": "BBH", - "value": 0.5455496677885336, - "normalized_score": 34.709885806490576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12235649546827794, - "normalized_score": 12.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.4197916666666666, - "normalized_score": 11.707291666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3734208776595745, - "normalized_score": 30.38009751773049 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "mergekit-community/mergekit-ties-ykqemwr (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 2.0678520276800887 - } - }, - { - "id": "mergekit-community/sexeh_time_testing_bfloat16_78a3c6eb8ee3d92f3c3669d91e3869ed5ca20a5c_True", - "model": { - "name": "mergekit-community/sexeh_time_testing", - "sha": "78a3c6eb8ee3d92f3c3669d91e3869ed5ca20a5c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.63134503622095, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7329463601023063, - "normalized_score": 73.29463601023062 - }, - "bbh": { - "name": "BBH", - "value": 0.5241321549202608, - "normalized_score": 32.4874943439471 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.36190625000000004, - "normalized_score": 3.9049479166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36668882978723405, - "normalized_score": 29.63209219858156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "mergekit-community/sexeh_time_testing (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.0850648010111519 - } - }, - { - "id": "meta-llama/Llama-2-13b-chat-hf_float16_a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8_True", - "model": { - "name": "meta-llama/Llama-2-13b-chat-hf", - "sha": "a2cb7a712bb6e5e736ca7f8cd98167f81a0b5bd8", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.12963532656997, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.398472719052115, - "normalized_score": 39.8472719052115 - }, - "bbh": { - "name": "BBH", - "value": 0.33427367066714186, - "normalized_score": 7.155379968626988 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23154362416107382, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.40072916666666664, - "normalized_score": 8.1578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19232047872340424, - "normalized_score": 10.257830969267138 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-13", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Llama-2-13b-chat-hf", - "hub_license": "llama2", - "hub_hearts": 1067, - "params_billions": 13.016, - "co2_cost": 1.7491390164303326 - } - }, - { - "id": "meta-llama/Llama-2-13b-hf_float16_5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1_False", - "model": { - "name": "meta-llama/Llama-2-13b-hf", - "sha": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.065185981273997, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24824687385027283, - "normalized_score": 24.824687385027282 - }, - "bbh": { - "name": "BBH", - "value": 0.41256242233835055, - "normalized_score": 17.222559825058127 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.35375, - "normalized_score": 3.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23778257978723405, - "normalized_score": 15.309175531914892 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-13", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Llama-2-13b-hf", - "hub_license": "llama2", - "hub_hearts": 594, - "params_billions": 13.016, - "co2_cost": 2.224760312615953 - } - }, - { - "id": "meta-llama/Llama-2-70b-chat-hf_float16_e9149a12809580e8602995856f8098ce973d1080_True", - "model": { - "name": "meta-llama/Llama-2-70b-chat-hf", - "sha": "e9149a12809580e8602995856f8098ce973d1080", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.073695775827504, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49579227560650185, - "normalized_score": 49.57922756065019 - }, - "bbh": { - "name": "BBH", - "value": 0.30424741461642657, - "normalized_score": 4.613767082590614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3686666666666667, - "normalized_score": 3.4833333333333356 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2432679521276596, - "normalized_score": 15.918661347517732 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-14", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Llama-2-70b-chat-hf", - "hub_license": "llama2", - "hub_hearts": 2182, - "params_billions": 68.977, - "co2_cost": 45.79691011052116 - } - }, - { - "id": "meta-llama/Llama-2-70b-hf_float16_3aba440b59558f995867ba6e1f58f21d0336b5bb_False", - "model": { - "name": "meta-llama/Llama-2-70b-hf", - "sha": "3aba440b59558f995867ba6e1f58f21d0336b5bb", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.372598605703004, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2406780675274937, - "normalized_score": 24.06780675274937 - }, - "bbh": { - "name": "BBH", - "value": 0.5472591190449342, - "normalized_score": 35.900061863721675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.41235416666666663, - "normalized_score": 9.777604166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37175864361702127, - "normalized_score": 30.1954048463357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Llama-2-70b-hf", - "hub_license": "llama2", - "hub_hearts": 846, - "params_billions": 68.977, - "co2_cost": 59.24249323430371 - } - }, - { - "id": "meta-llama/Llama-2-7b-chat-hf_float16_f5db02db724555f92da89c216ac04704f23d4590_True", - "model": { - "name": "meta-llama/Llama-2-7b-chat-hf", - "sha": "f5db02db724555f92da89c216ac04704f23d4590", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.609483264152255, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3986478100329348, - "normalized_score": 39.86478100329349 - }, - "bbh": { - "name": "BBH", - "value": 0.3113546355002185, - "normalized_score": 4.459171645959485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3675520833333333, - "normalized_score": 3.27734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16879986702127658, - "normalized_score": 7.644429669030731 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-13", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "meta-llama/Llama-2-7b-chat-hf", - "hub_license": "llama2", - "hub_hearts": 4318, - "params_billions": 6.738, - "co2_cost": 1.7913927457321086 - } - }, - { - "id": "meta-llama/Llama-2-7b-hf_float16_01c7f73d771dfac7d292323805ebc428287df4f9_False", - "model": { - "name": "meta-llama/Llama-2-7b-hf", - "sha": "01c7f73d771dfac7d292323805ebc428287df4f9", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.806357596540016, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2518938638368418, - "normalized_score": 25.18938638368418 - }, - "bbh": { - "name": "BBH", - "value": 0.34961958199821835, - "normalized_score": 10.35141665784897 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.37006249999999996, - "normalized_score": 3.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18608710106382978, - "normalized_score": 9.56523345153664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-13", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Llama-2-7b-hf", - "hub_license": "llama2", - "hub_hearts": 1985, - "params_billions": 6.738, - "co2_cost": 1.1261891255441274 - } - }, - { - "id": "meta-llama/Llama-3.1-70B_bfloat16_f7d3cc45ed4ff669a354baf2e0f05e65799a0bee_True", - "model": { - "name": "meta-llama/Llama-3.1-70B", - "sha": "f7d3cc45ed4ff669a354baf2e0f05e65799a0bee", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.200215843375947, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16843752354862876, - "normalized_score": 16.843752354862875 - }, - "bbh": { - "name": "BBH", - "value": 0.626006918317161, - "normalized_score": 46.39941295581887 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18429003021148038, - "normalized_score": 18.429003021148038 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.4571875, - "normalized_score": 16.581770833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4654255319148936, - "normalized_score": 40.602836879432616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-07-23", - "generation": 0, - "base_model": "meta-llama/Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 350, - "params_billions": 70.554, - "co2_cost": 13.601852032718597 - } - }, - { - "id": "meta-llama/Llama-3.1-70B-Instruct_bfloat16_b9461463b511ed3c0762467538ea32cf7c9669f2_True", - "model": { - "name": "meta-llama/Llama-3.1-70B-Instruct", - "sha": "b9461463b511ed3c0762467538ea32cf7c9669f2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.409948245645786, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8668854195756149, - "normalized_score": 86.6885419575615 - }, - "bbh": { - "name": "BBH", - "value": 0.6917287453663654, - "normalized_score": 55.92799173898473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3806646525679758, - "normalized_score": 38.066465256797585 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.45806250000000004, - "normalized_score": 17.691145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5309175531914894, - "normalized_score": 47.87972813238771 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-08-15", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 797, - "params_billions": 70.554, - "co2_cost": 40.22182401425256 - } - }, - { - "id": "meta-llama/Llama-3.1-8B_float16_d04e592bb4f6aa9cfee91e2e20afa771667e1d4b_False", - "model": { - "name": "meta-llama/Llama-3.1-8B", - "sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.42086519266696, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12459828809780273, - "normalized_score": 12.459828809780273 - }, - "bbh": { - "name": "BBH", - "value": 0.46595905446007296, - "normalized_score": 25.30447063475493 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.3811875, - "normalized_score": 8.715104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32878989361702127, - "normalized_score": 25.42109929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-12-07", - "generation": 0, - "base_model": "meta-llama/Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 1503, - "params_billions": 8.03, - "co2_cost": 1.4264871618876929 - } - }, - { - "id": "meta-llama/Llama-3.1-8B-Instruct_float16_0e9e39f249a16976918f6564b8830bc894c89659_False", - "model": { - "name": "meta-llama/Llama-3.1-8B-Instruct", - "sha": "0e9e39f249a16976918f6564b8830bc894c89659", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.763729445470883, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4921707735475206, - "normalized_score": 49.217077354752064 - }, - "bbh": { - "name": "BBH", - "value": 0.5087032184331889, - "normalized_score": 29.379192497334035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1555891238670695, - "normalized_score": 15.55891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.39715625000000004, - "normalized_score": 8.611197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37982047872340424, - "normalized_score": 31.091164302600465 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-18", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 3759, - "params_billions": 8.03, - "co2_cost": 2.1060373342307948 - } - }, - { - "id": "meta-llama/Llama-3.2-1B_bfloat16_a7c18587d7f473bfea02aa5639aa349403307b54_False", - "model": { - "name": "meta-llama/Llama-3.2-1B", - "sha": "a7c18587d7f473bfea02aa5639aa349403307b54", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.195140014045501, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14777900415342402, - "normalized_score": 14.777900415342401 - }, - "bbh": { - "name": "BBH", - "value": 0.31149540964608097, - "normalized_score": 4.366029656556756 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22818791946308725, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3447291666666667, - "normalized_score": 2.5578125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12034574468085106, - "normalized_score": 2.2606382978723394 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "meta-llama/Llama-3.2-1B", - "hub_license": "llama3.2", - "hub_hearts": 1700, - "params_billions": 1.24, - "co2_cost": 0.83825703568204 - } - }, - { - "id": "meta-llama/Llama-3.2-1B-Instruct_bfloat16_d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0_True", - "model": { - "name": "meta-llama/Llama-3.2-1B-Instruct", - "sha": "d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.443126333711135, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5698313807364459, - "normalized_score": 56.9831380736446 - }, - "bbh": { - "name": "BBH", - "value": 0.34968498061768266, - "normalized_score": 8.742521312303046 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3328541666666667, - "normalized_score": 2.973437500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16821808510638298, - "normalized_score": 7.579787234042552 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "meta-llama/Llama-3.2-1B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 831, - "params_billions": 1.24, - "co2_cost": 0.8098090498152448 - } - }, - { - "id": "meta-llama/Llama-3.2-3B_bfloat16_95c102307f55fbd6d18ddf28bfbcb537ffdc2806_False", - "model": { - "name": "meta-llama/Llama-3.2-3B", - "sha": "95c102307f55fbd6d18ddf28bfbcb537ffdc2806", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.697822716562822, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13374069690643048, - "normalized_score": 13.374069690643047 - }, - "bbh": { - "name": "BBH", - "value": 0.3905117116991059, - "normalized_score": 14.232664884364107 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.35771875000000003, - "normalized_score": 3.8148437499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2487533244680851, - "normalized_score": 16.528147163120565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "meta-llama/Llama-3.2-3B", - "hub_license": "llama3.2", - "hub_hearts": 529, - "params_billions": 3.213, - "co2_cost": 2.0137352826123074 - } - }, - { - "id": "meta-llama/Llama-3.2-3B-Instruct_bfloat16_276b29ce8303c9b88966a9b32fc75692dce4d8e1_True", - "model": { - "name": "meta-llama/Llama-3.2-3B-Instruct", - "sha": "276b29ce8303c9b88966a9b32fc75692dce4d8e1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.204650807793456, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7393161256576994, - "normalized_score": 73.93161256576994 - }, - "bbh": { - "name": "BBH", - "value": 0.4610070239466069, - "normalized_score": 24.059186446885473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17673716012084592, - "normalized_score": 17.673716012084594 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.3528541666666667, - "normalized_score": 1.3734374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3194813829787234, - "normalized_score": 24.386820330969268 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 1245, - "params_billions": 3.213, - "co2_cost": 1.9279617086879264 - } - }, - { - "id": "meta-llama/Llama-3.3-70B-Instruct_bfloat16__True", - "model": { - "name": "meta-llama/Llama-3.3-70B-Instruct", - "sha": "", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 44.84747145129876, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8997581971391464, - "normalized_score": 89.97581971391463 - }, - "bbh": { - "name": "BBH", - "value": 0.6919312828325811, - "normalized_score": 56.561410788022194 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48338368580060426, - "normalized_score": 48.338368580060425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.44612500000000005, - "normalized_score": 15.565624999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5331615691489362, - "normalized_score": 48.12906323877069 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "meta-llama/Llama-3.3-70B-Instruct (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 2169, - "params_billions": 70.554, - "co2_cost": 76.55907410745293 - } - }, - { - "id": "meta-llama/Meta-Llama-3-70B_bfloat16_b4d08b7db49d488da3ac49adf25a6b9ac01ae338_False", - "model": { - "name": "meta-llama/Meta-Llama-3-70B", - "sha": "b4d08b7db49d488da3ac49adf25a6b9ac01ae338", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.705350171613343, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1603190645265673, - "normalized_score": 16.031906452656727 - }, - "bbh": { - "name": "BBH", - "value": 0.6461074599904467, - "normalized_score": 48.709812647505885 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3976510067114094, - "normalized_score": 19.686800894854585 - }, - "musr": { - "name": "MUSR", - "value": 0.4518229166666667, - "normalized_score": 16.011197916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4709109042553192, - "normalized_score": 41.21232269503546 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 854, - "params_billions": 70.554, - "co2_cost": 46.8143715587228 - } - }, - { - "id": "meta-llama/Meta-Llama-3-70B-Instruct_bfloat16_7129260dd854a80eb10ace5f61c20324b472b31c_True", - "model": { - "name": "meta-llama/Meta-Llama-3-70B-Instruct", - "sha": "7129260dd854a80eb10ace5f61c20324b472b31c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.37222412927012, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8099077115387172, - "normalized_score": 80.99077115387172 - }, - "bbh": { - "name": "BBH", - "value": 0.6546699432372051, - "normalized_score": 50.18513318440344 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24471299093655588, - "normalized_score": 24.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4153645833333333, - "normalized_score": 10.92057291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5206948138297872, - "normalized_score": 46.74386820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-70B", - "hub_license": "llama3", - "hub_hearts": 1464, - "params_billions": 70.554, - "co2_cost": 36.478300235478294 - } - }, - { - "id": "meta-llama/Meta-Llama-3-8B_bfloat16_62bd457b6fe961a42a631306577e622c83876cb6_False", - "model": { - "name": "meta-llama/Meta-Llama-3-8B", - "sha": "62bd457b6fe961a42a631306577e622c83876cb6", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.626857071686075, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14550614591506092, - "normalized_score": 14.550614591506093 - }, - "bbh": { - "name": "BBH", - "value": 0.4597905195240255, - "normalized_score": 24.50076379676797 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.36140625, - "normalized_score": 6.242447916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32097739361702127, - "normalized_score": 24.553043735224584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 6093, - "params_billions": 8.03, - "co2_cost": 1.7451369952216194 - } - }, - { - "id": "meta-llama/Meta-Llama-3-8B-Instruct_bfloat16_e1945c40cd546c78e41f1151f4db032b271faeaa_True", - "model": { - "name": "meta-llama/Meta-Llama-3-8B-Instruct", - "sha": "e1945c40cd546c78e41f1151f4db032b271faeaa", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.908735693936837, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7408398604591373, - "normalized_score": 74.08398604591373 - }, - "bbh": { - "name": "BBH", - "value": 0.49887111136169526, - "normalized_score": 28.244949576343615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3568229166666667, - "normalized_score": 1.602864583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3664394946808511, - "normalized_score": 29.604388297872337 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 3873, - "params_billions": 8.03, - "co2_cost": 0.7974996778909468 - } - }, - { - "id": "meta-llama/Meta-Llama-3-8B-Instruct_float16_e1945c40cd546c78e41f1151f4db032b271faeaa_False", - "model": { - "name": "meta-llama/Meta-Llama-3-8B-Instruct", - "sha": "e1945c40cd546c78e41f1151f4db032b271faeaa", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.609159446025874, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47823220166934843, - "normalized_score": 47.823220166934846 - }, - "bbh": { - "name": "BBH", - "value": 0.4910264175128683, - "normalized_score": 26.795283502573653 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3805416666666666, - "normalized_score": 5.401041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.359125664893617, - "normalized_score": 28.791740543735223 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-17", - "submission_date": "2024-07-08", - "generation": 0, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 3873, - "params_billions": 8.03, - "co2_cost": 1.898946514376925 - } - }, - { - "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1_bfloat16_19d55d8d5bf1e7d98a865121862f3781a27b1b2e_True", - "model": { - "name": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", - "sha": "19d55d8d5bf1e7d98a865121862f3781a27b1b2e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.551664639073516, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14817905379947427, - "normalized_score": 14.81790537994743 - }, - "bbh": { - "name": "BBH", - "value": 0.32557832478283544, - "normalized_score": 5.7245272906248355 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.35003125, - "normalized_score": 1.920572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11394614361702128, - "normalized_score": 1.549571513002364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "mhl1/Qwen2.5-0.5B-cinstruct-stage1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 1.7648014609925822 - } - }, - { - "id": "microsoft/DialoGPT-medium_bfloat16_7b40bb0f92c45fefa957d088000d8648e5c7fa33_True", - "model": { - "name": "microsoft/DialoGPT-medium", - "sha": "7b40bb0f92c45fefa957d088000d8648e5c7fa33", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.251433606790305, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14790422744983311, - "normalized_score": 14.79042274498331 - }, - "bbh": { - "name": "BBH", - "value": 0.3014156380141994, - "normalized_score": 2.5568557723352243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.4286666666666667, - "normalized_score": 12.283333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1118683510638298, - "normalized_score": 1.3187056737588652 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "microsoft/DialoGPT-medium", - "hub_license": "mit", - "hub_hearts": 360, - "params_billions": 0.345, - "co2_cost": 0.2589289702005322 - } - }, - { - "id": "microsoft/Orca-2-13b_bfloat16_2539ff53e6baa4cc603774ad5a2d646f4041ea4e_False", - "model": { - "name": "microsoft/Orca-2-13b", - "sha": "2539ff53e6baa4cc603774ad5a2d646f4041ea4e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.501871091807203, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3127933882099496, - "normalized_score": 31.27933882099496 - }, - "bbh": { - "name": "BBH", - "value": 0.48844897288396094, - "normalized_score": 27.308019499942578 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.5129687500000001, - "normalized_score": 25.787760416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27493351063829785, - "normalized_score": 19.437056737588648 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-14", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "microsoft/Orca-2-13b", - "hub_license": "other", - "hub_hearts": 666, - "params_billions": 13.0, - "co2_cost": 2.0171633991909843 - } - }, - { - "id": "microsoft/Orca-2-7b_bfloat16_60e31e6bdcf582ad103b807cb74b73ee1d2c4b17_False", - "model": { - "name": "microsoft/Orca-2-7b", - "sha": "60e31e6bdcf582ad103b807cb74b73ee1d2c4b17", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.404830081400474, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2183462102776189, - "normalized_score": 21.83462102776189 - }, - "bbh": { - "name": "BBH", - "value": 0.4452132267545943, - "normalized_score": 22.429468402818458 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.5026145833333333, - "normalized_score": 24.093489583333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23188164893617022, - "normalized_score": 14.653516548463358 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-14", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "microsoft/Orca-2-7b", - "hub_license": "other", - "hub_hearts": 218, - "params_billions": 7.0, - "co2_cost": 1.8106704974598915 - } - }, - { - "id": "microsoft/Phi-3-medium-128k-instruct_bfloat16_fa7d2aa4f5ea69b2e36b20d050cdae79c9bfbb3f_True", - "model": { - "name": "microsoft/Phi-3-medium-128k-instruct", - "sha": "fa7d2aa4f5ea69b2e36b20d050cdae79c9bfbb3f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 32.026356176108685, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6040029344361849, - "normalized_score": 60.400293443618494 - }, - "bbh": { - "name": "BBH", - "value": 0.6382322530870549, - "normalized_score": 48.46045127399018 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.4129479166666667, - "normalized_score": 11.351822916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47116023936170215, - "normalized_score": 41.24002659574468 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-08-21", - "generation": 0, - "base_model": "microsoft/Phi-3-medium-128k-instruct", - "hub_license": "mit", - "hub_hearts": 381, - "params_billions": 13.96, - "co2_cost": 3.8951174890523355 - } - }, - { - "id": "microsoft/Phi-3-medium-4k-instruct_bfloat16_d194e4e74ffad5a5e193e26af25bcfc80c7f1ffc_True", - "model": { - "name": "microsoft/Phi-3-medium-4k-instruct", - "sha": "d194e4e74ffad5a5e193e26af25bcfc80c7f1ffc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 33.09765943937642, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6422713954529538, - "normalized_score": 64.22713954529537 - }, - "bbh": { - "name": "BBH", - "value": 0.6412464890555547, - "normalized_score": 49.38061007422016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19561933534743203, - "normalized_score": 19.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.42575, - "normalized_score": 13.052083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4675864361702128, - "normalized_score": 40.84293735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "microsoft/Phi-3-medium-4k-instruct", - "hub_license": "mit", - "hub_hearts": 217, - "params_billions": 13.96, - "co2_cost": 2.9105250466587322 - } - }, - { - "id": "microsoft/Phi-3-mini-128k-instruct_bfloat16_5be6479b4bc06a081e8f4c6ece294241ccd32dec_True", - "model": { - "name": "microsoft/Phi-3-mini-128k-instruct", - "sha": "5be6479b4bc06a081e8f4c6ece294241ccd32dec", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 26.343809931865636, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5976331688807919, - "normalized_score": 59.76331688807919 - }, - "bbh": { - "name": "BBH", - "value": 0.5574531792679852, - "normalized_score": 37.09976663224031 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1404833836858006, - "normalized_score": 14.04833836858006 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.3936875, - "normalized_score": 7.710937500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3734208776595745, - "normalized_score": 30.38009751773049 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-22", - "submission_date": "2024-08-21", - "generation": 0, - "base_model": "microsoft/Phi-3-mini-128k-instruct", - "hub_license": "mit", - "hub_hearts": 1636, - "params_billions": 3.821, - "co2_cost": 48.44450318537764 - } - }, - { - "id": "microsoft/Phi-3-mini-4k-instruct_bfloat16_ff07dc01615f8113924aed013115ab2abd32115b_True", - "model": { - "name": "microsoft/Phi-3-mini-4k-instruct", - "sha": "ff07dc01615f8113924aed013115ab2abd32115b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 25.967732638041607, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5612884923115112, - "normalized_score": 56.12884923115112 - }, - "bbh": { - "name": "BBH", - "value": 0.5675972626334875, - "normalized_score": 39.2693352377728 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.3950208333333333, - "normalized_score": 7.644270833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38663563829787234, - "normalized_score": 31.848404255319146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "microsoft/Phi-3-mini-4k-instruct", - "hub_license": "mit", - "hub_hearts": 1154, - "params_billions": 3.821, - "co2_cost": 0.8040748299123722 - } - }, - { - "id": "microsoft/Phi-3-mini-4k-instruct_float16_c1358f8a35e6d2af81890deffbbfa575b978c62f_True", - "model": { - "name": "microsoft/Phi-3-mini-4k-instruct", - "sha": "c1358f8a35e6d2af81890deffbbfa575b978c62f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 27.562174043592282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.547674614467391, - "normalized_score": 54.76746144673909 - }, - "bbh": { - "name": "BBH", - "value": 0.5490718919495822, - "normalized_score": 36.55985530518785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.42841666666666667, - "normalized_score": 13.118749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4021775265957447, - "normalized_score": 33.57528073286053 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-22", - "submission_date": "2024-07-02", - "generation": 0, - "base_model": "microsoft/Phi-3-mini-4k-instruct", - "hub_license": "mit", - "hub_hearts": 1154, - "params_billions": 3.821, - "co2_cost": 1.5733985309086815 - } - }, - { - "id": "microsoft/Phi-3-small-128k-instruct_bfloat16_f80aaa30bfc64c2b8ab214b541d9050e97163bc4_True", - "model": { - "name": "microsoft/Phi-3-small-128k-instruct", - "sha": "f80aaa30bfc64c2b8ab214b541d9050e97163bc4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3SmallForCausalLM", - "average_score": 31.96780316372565, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6368258443153056, - "normalized_score": 63.68258443153056 - }, - "bbh": { - "name": "BBH", - "value": 0.6202176778696983, - "normalized_score": 45.63406964144793 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2026086956521739, - "normalized_score": 20.26086956521739 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.43784375000000003, - "normalized_score": 14.49713541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4490525265957447, - "normalized_score": 38.783614066193856 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "microsoft/Phi-3-small-128k-instruct", - "hub_license": "mit", - "hub_hearts": 175, - "params_billions": 7.392, - "co2_cost": 3.8980242595219776 - } - }, - { - "id": "microsoft/Phi-3-small-8k-instruct_bfloat16_1535ae26fb4faada95c6950e8bc6e867cdad6b00_True", - "model": { - "name": "microsoft/Phi-3-small-8k-instruct", - "sha": "1535ae26fb4faada95c6950e8bc6e867cdad6b00", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3SmallForCausalLM", - "average_score": 32.34201367038736, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6496651107949131, - "normalized_score": 64.96651107949131 - }, - "bbh": { - "name": "BBH", - "value": 0.6208364880870563, - "normalized_score": 46.205570366389075 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18869565217391304, - "normalized_score": 18.869565217391305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.45579166666666665, - "normalized_score": 16.77395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4506316489361702, - "normalized_score": 38.95907210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "microsoft/Phi-3-small-8k-instruct", - "hub_license": "mit", - "hub_hearts": 165, - "params_billions": 7.392, - "co2_cost": 2.050907207043598 - } - }, - { - "id": "microsoft/Phi-3.5-MoE-instruct_bfloat16_482a9ba0eb0e1fa1671e3560e009d7cec2e5147c_True", - "model": { - "name": "microsoft/Phi-3.5-MoE-instruct", - "sha": "482a9ba0eb0e1fa1671e3560e009d7cec2e5147c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 36.8789647220093, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.692454908531585, - "normalized_score": 69.2454908531585 - }, - "bbh": { - "name": "BBH", - "value": 0.640762564622586, - "normalized_score": 48.77464635932187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3119335347432024, - "normalized_score": 31.19335347432024 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.4564791666666667, - "normalized_score": 17.326562499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46575797872340424, - "normalized_score": 40.639775413711575 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-17", - "submission_date": "2024-08-21", - "generation": 0, - "base_model": "microsoft/Phi-3.5-MoE-instruct", - "hub_license": "mit", - "hub_hearts": 556, - "params_billions": 42.0, - "co2_cost": 9.264557227888623 - } - }, - { - "id": "microsoft/Phi-3.5-mini-instruct_bfloat16_64963004ad95869fa73a30279371c8778509ac84_True", - "model": { - "name": "microsoft/Phi-3.5-mini-instruct", - "sha": "64963004ad95869fa73a30279371c8778509ac84", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 28.184391192864627, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5774500547436359, - "normalized_score": 57.74500547436358 - }, - "bbh": { - "name": "BBH", - "value": 0.5517785126111956, - "normalized_score": 36.74585390851661 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19637462235649547, - "normalized_score": 19.637462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.402125, - "normalized_score": 10.098958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39619348404255317, - "normalized_score": 32.91038711583924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-08-21", - "generation": 0, - "base_model": "microsoft/Phi-3.5-mini-instruct", - "hub_license": "mit", - "hub_hearts": 844, - "params_billions": 3.821, - "co2_cost": 7.392008604674748 - } - }, - { - "id": "microsoft/Phi-4-mini-instruct_bfloat16_f984c153f9e5738b59f28190d593bd9ad40745bb_True", - "model": { - "name": "microsoft/Phi-4-mini-instruct", - "sha": "f984c153f9e5738b59f28190d593bd9ad40745bb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 29.412433568419846, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7377923908562614, - "normalized_score": 73.77923908562614 - }, - "bbh": { - "name": "BBH", - "value": 0.568862935505404, - "normalized_score": 38.73553611173529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16993957703927492, - "normalized_score": 16.993957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3873020833333333, - "normalized_score": 6.446093750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39320146276595747, - "normalized_score": 32.5779403073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-28", - "generation": 0, - "base_model": "microsoft/Phi-4-mini-instruct", - "hub_license": "mit", - "hub_hearts": 375, - "params_billions": 3.836, - "co2_cost": 0.8282398315230278 - } - }, - { - "id": "microsoft/phi-1_bfloat16_b9ac0e6d78d43970ecf88e9e0154b3a7da20ed89_False", - "model": { - "name": "microsoft/phi-1", - "sha": "b9ac0e6d78d43970ecf88e9e0154b3a7da20ed89", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 5.574318195377169, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20680571993421898, - "normalized_score": 20.6805719934219 - }, - "bbh": { - "name": "BBH", - "value": 0.31394755895837845, - "normalized_score": 4.273999212214679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.35251041666666666, - "normalized_score": 3.697135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11619015957446809, - "normalized_score": 1.798906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-10", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "microsoft/phi-1", - "hub_license": "mit", - "hub_hearts": 211, - "params_billions": 1.418, - "co2_cost": 0.5724584950561591 - } - }, - { - "id": "microsoft/phi-1_5_float16_675aa382d814580b22651a30acb1a585d7c25963_False", - "model": { - "name": "microsoft/phi-1_5", - "sha": "675aa382d814580b22651a30acb1a585d7c25963", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 7.170966845799231, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2032839532440591, - "normalized_score": 20.32839532440591 - }, - "bbh": { - "name": "BBH", - "value": 0.33597583211996657, - "normalized_score": 7.468938770070243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.34041666666666665, - "normalized_score": 3.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16913231382978725, - "normalized_score": 7.6813682033096935 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-10", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "microsoft/phi-1_5", - "hub_license": "mit", - "hub_hearts": 1328, - "params_billions": 1.418, - "co2_cost": 0.6817241381152663 - } - }, - { - "id": "microsoft/phi-2_float16_ef382358ec9e382308935a992d908de099b64c23_False", - "model": { - "name": "microsoft/phi-2", - "sha": "ef382358ec9e382308935a992d908de099b64c23", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 15.534291558214901, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.273875539125077, - "normalized_score": 27.3875539125077 - }, - "bbh": { - "name": "BBH", - "value": 0.4881208771249696, - "normalized_score": 28.038519293439304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.4098958333333333, - "normalized_score": 13.83697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26279920212765956, - "normalized_score": 18.088800236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-13", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "microsoft/phi-2", - "hub_license": "mit", - "hub_hearts": 3293, - "params_billions": 2.78, - "co2_cost": 0.8470419651804781 - } - }, - { - "id": "microsoft/phi-4_float16_381727a5ee103da6c1b14ecd3d39cd09832cbcf8_False", - "model": { - "name": "microsoft/phi-4", - "sha": "381727a5ee103da6c1b14ecd3d39cd09832cbcf8", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 29.48341679122318, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.048785001573602486, - "normalized_score": 4.878500157360248 - }, - "bbh": { - "name": "BBH", - "value": 0.6703464626619114, - "normalized_score": 52.57567157258284 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27870090634441086, - "normalized_score": 27.870090634441087 - }, - "gpqa": { - "name": "GPQA", - "value": 0.401006711409396, - "normalized_score": 20.134228187919465 - }, - "musr": { - "name": "MUSR", - "value": 0.5033541666666667, - "normalized_score": 23.719270833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5295046542553191, - "normalized_score": 47.722739361702125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 1919, - "params_billions": 14.66, - "co2_cost": 0.8783618221059865 - } - }, - { - "id": "microsoft/phi-4_bfloat16_381727a5ee103da6c1b14ecd3d39cd09832cbcf8_True", - "model": { - "name": "microsoft/phi-4", - "sha": "381727a5ee103da6c1b14ecd3d39cd09832cbcf8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 30.35812781134617, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0585269307659233, - "normalized_score": 5.852693076592331 - }, - "bbh": { - "name": "BBH", - "value": 0.6690562305322874, - "normalized_score": 52.42784845820486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3164652567975831, - "normalized_score": 31.64652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40604026845637586, - "normalized_score": 20.805369127516784 - }, - "musr": { - "name": "MUSR", - "value": 0.5033541666666667, - "normalized_score": 23.7859375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5286735372340425, - "normalized_score": 47.630393026004725 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 1919, - "params_billions": 14.66, - "co2_cost": 2.7731595658140327 - } - }, - { - "id": "migtissera/Llama-3-70B-Synthia-v3.5_float16_8744db0bccfc18f1847633da9d29fc89b35b4190_True", - "model": { - "name": "migtissera/Llama-3-70B-Synthia-v3.5", - "sha": "8744db0bccfc18f1847633da9d29fc89b35b4190", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.56935395079021, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6076499244227538, - "normalized_score": 60.764992442275386 - }, - "bbh": { - "name": "BBH", - "value": 0.6488638026271278, - "normalized_score": 49.118159695748176 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.49219791666666673, - "normalized_score": 23.39140625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4658410904255319, - "normalized_score": 40.64901004728132 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-08-28", - "generation": 0, - "base_model": "migtissera/Llama-3-70B-Synthia-v3.5", - "hub_license": "llama3", - "hub_hearts": 5, - "params_billions": 70.554, - "co2_cost": 17.539395979120005 - } - }, - { - "id": "migtissera/Llama-3-8B-Synthia-v3.5_float16_af4990801a24fee7acf16370cb5aa5643b5e9d6c_True", - "model": { - "name": "migtissera/Llama-3-8B-Synthia-v3.5", - "sha": "af4990801a24fee7acf16370cb5aa5643b5e9d6c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.948440145189334, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5069582042314393, - "normalized_score": 50.69582042314393 - }, - "bbh": { - "name": "BBH", - "value": 0.4887940933660044, - "normalized_score": 27.542339430057652 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.40438541666666666, - "normalized_score": 9.414843749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30302526595744683, - "normalized_score": 22.558362884160758 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-08-28", - "generation": 0, - "base_model": "migtissera/Llama-3-8B-Synthia-v3.5", - "hub_license": "llama3", - "hub_hearts": 15, - "params_billions": 8.03, - "co2_cost": 1.6573967325482626 - } - }, - { - "id": "migtissera/Tess-3-7B-SFT_bfloat16_404de3b56564dbd43cd64d97f8574b43189462f3_True", - "model": { - "name": "migtissera/Tess-3-7B-SFT", - "sha": "404de3b56564dbd43cd64d97f8574b43189462f3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.20945620208202, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3946262583279033, - "normalized_score": 39.46262583279033 - }, - "bbh": { - "name": "BBH", - "value": 0.46073483895076217, - "normalized_score": 24.123847398237004 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.4112708333333333, - "normalized_score": 10.275520833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30335771276595747, - "normalized_score": 22.595301418439718 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2024-07-20", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.248, - "co2_cost": 1.2943395475718065 - } - }, - { - "id": "migtissera/Tess-3-Mistral-Nemo-12B_bfloat16_0b82dea6e8f4aed4a1c2e10198d68991c30d171b_True", - "model": { - "name": "migtissera/Tess-3-Mistral-Nemo-12B", - "sha": "0b82dea6e8f4aed4a1c2e10198d68991c30d171b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.720173026027776, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.335499807178287, - "normalized_score": 33.549980717828696 - }, - "bbh": { - "name": "BBH", - "value": 0.489942302453045, - "normalized_score": 28.042728344416503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.44578125, - "normalized_score": 15.489322916666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25648271276595747, - "normalized_score": 17.386968085106382 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-16", - "generation": 0, - "base_model": "migtissera/Tess-3-Mistral-Nemo-12B", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 12.248, - "co2_cost": 3.7799837130254446 - } - }, - { - "id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B_bfloat16_3a4dbce32e765f659d418c57f0040d290b8b480d_True", - "model": { - "name": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", - "sha": "3a4dbce32e765f659d418c57f0040d290b8b480d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.14120139478955, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45387682460316403, - "normalized_score": 45.3876824603164 - }, - "bbh": { - "name": "BBH", - "value": 0.6206613823135703, - "normalized_score": 46.21582810863877 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.41130208333333335, - "normalized_score": 10.112760416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3731715425531915, - "normalized_score": 30.352393617021285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-05", - "submission_date": "2024-08-30", - "generation": 1, - "base_model": "microsoft/Phi-3-medium-128k-instruct", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 13.96, - "co2_cost": 4.476340449162089 - } - }, - { - "id": "migtissera/Tess-v2.5.2-Qwen2-72B_bfloat16_0435e634ad9bc8b1172395a535b78e6f25f3594f_True", - "model": { - "name": "migtissera/Tess-v2.5.2-Qwen2-72B", - "sha": "0435e634ad9bc8b1172395a535b78e6f25f3594f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.60333761198978, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44943084349525925, - "normalized_score": 44.943084349525925 - }, - "bbh": { - "name": "BBH", - "value": 0.6646791891060648, - "normalized_score": 52.30813577387958 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2938066465256798, - "normalized_score": 29.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.41883333333333334, - "normalized_score": 10.887500000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5561003989361702, - "normalized_score": 50.67782210401892 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "migtissera/Tess-v2.5.2-Qwen2-72B", - "hub_license": "other", - "hub_hearts": 11, - "params_billions": 72.0, - "co2_cost": 29.226174533933744 - } - }, - { - "id": "migtissera/Trinity-2-Codestral-22B_bfloat16_5f20b9d8af1a75c135c70bd7295e58301cce63fc_True", - "model": { - "name": "migtissera/Trinity-2-Codestral-22B", - "sha": "5f20b9d8af1a75c135c70bd7295e58301cce63fc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.995244491801476, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4202050559182968, - "normalized_score": 42.020505591829675 - }, - "bbh": { - "name": "BBH", - "value": 0.5593244825460373, - "normalized_score": 36.41273800501431 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.4110520833333333, - "normalized_score": 9.61484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3307845744680851, - "normalized_score": 25.642730496453904 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-09-16", - "generation": 1, - "base_model": "mistralai/Codestral-22B-v0.1", - "hub_license": "other", - "hub_hearts": 12, - "params_billions": 22.247, - "co2_cost": 3.0043146213260017 - } - }, - { - "id": "migtissera/Trinity-2-Codestral-22B-v0.2_float16_63513c3eb9b7c552fc163f58a2e7dc1fa09573b5_True", - "model": { - "name": "migtissera/Trinity-2-Codestral-22B-v0.2", - "sha": "63513c3eb9b7c552fc163f58a2e7dc1fa09573b5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.869825084599302, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43446832183052075, - "normalized_score": 43.44683218305208 - }, - "bbh": { - "name": "BBH", - "value": 0.5686364683055418, - "normalized_score": 37.61424608895926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.40447916666666667, - "normalized_score": 9.059895833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33402593085106386, - "normalized_score": 26.002881205673763 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-08-28", - "generation": 1, - "base_model": "mistralai/Codestral-22B-v0.1", - "hub_license": "other", - "hub_hearts": 7, - "params_billions": 22.247, - "co2_cost": 1.5535215928704613 - } - }, - { - "id": "migtissera/Trinity-2-Codestral-22B-v0.2_bfloat16_9452a82ac7bfa9092a061ec913e9078ef3525a03_True", - "model": { - "name": "migtissera/Trinity-2-Codestral-22B-v0.2", - "sha": "9452a82ac7bfa9092a061ec913e9078ef3525a03", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.250269105139868, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44301121025545553, - "normalized_score": 44.301121025545555 - }, - "bbh": { - "name": "BBH", - "value": 0.5706466356198404, - "normalized_score": 37.78604101957199 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4031458333333333, - "normalized_score": 8.859895833333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3353557180851064, - "normalized_score": 26.150635342789595 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-16", - "generation": 1, - "base_model": "mistralai/Codestral-22B-v0.1", - "hub_license": "other", - "hub_hearts": 7, - "params_billions": 22.247, - "co2_cost": 3.122415137327222 - } - }, - { - "id": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3_float16_03cffd19ee78646543e020e2ebc9d553a4c5242b_False", - "model": { - "name": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", - "sha": "03cffd19ee78646543e020e2ebc9d553a4c5242b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.009061305596173, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13881168632561602, - "normalized_score": 13.881168632561604 - }, - "bbh": { - "name": "BBH", - "value": 0.3067536965504715, - "normalized_score": 3.315965497566667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3792083333333333, - "normalized_score": 4.734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11062167553191489, - "normalized_score": 1.1801861702127647 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.553126557664898 - } - }, - { - "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5_bfloat16_40700de82968350c192318877fe522630d0ef76d_True", - "model": { - "name": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", - "sha": "40700de82968350c192318877fe522630d0ef76d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.684751175879422, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27779735546128714, - "normalized_score": 27.779735546128713 - }, - "bbh": { - "name": "BBH", - "value": 0.33746396801266015, - "normalized_score": 7.561477534247794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3528854166666667, - "normalized_score": 1.0773437500000014 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17918882978723405, - "normalized_score": 8.798758865248226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-12", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.837, - "co2_cost": 2.1898006910849936 - } - }, - { - "id": "ministral/Ministral-3b-instruct_bfloat16_2c95908929198d6e69af8638f0dbbd9bc6b93f9e_False", - "model": { - "name": "ministral/Ministral-3b-instruct", - "sha": "2c95908929198d6e69af8638f0dbbd9bc6b93f9e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 3.520082773406306, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1357642167227401, - "normalized_score": 13.57642167227401 - }, - "bbh": { - "name": "BBH", - "value": 0.31918598478332383, - "normalized_score": 4.675863578564667 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.33825, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10929188829787234, - "normalized_score": 1.032432033096926 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-14", - "submission_date": "2024-10-25", - "generation": 0, - "base_model": "ministral/Ministral-3b-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 48, - "params_billions": 3.316, - "co2_cost": 0.5289738610329241 - } - }, - { - "id": "mistral-community/Mistral-7B-v0.2_bfloat16_2c3e624962b1a3f3fbf52e15969565caa7bc064a_False", - "model": { - "name": "mistral-community/Mistral-7B-v0.2", - "sha": "2c3e624962b1a3f3fbf52e15969565caa7bc064a", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.215362442692104, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22663976028050017, - "normalized_score": 22.663976028050016 - }, - "bbh": { - "name": "BBH", - "value": 0.4510187962797583, - "normalized_score": 23.950865383029594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4031770833333333, - "normalized_score": 8.363802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2952958776595745, - "normalized_score": 21.699541962174944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-23", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "mistral-community/Mistral-7B-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 232, - "params_billions": 7.242, - "co2_cost": 1.1064265558408706 - } - }, - { - "id": "mistral-community/Mixtral-8x22B-v0.1_float16_ab1e8c1950cf359e2a25de9b274ab836adb6dbab_False", - "model": { - "name": "mistral-community/Mixtral-8x22B-v0.1", - "sha": "ab1e8c1950cf359e2a25de9b274ab836adb6dbab", - "precision": "float16", - "type": "❓other", - "weight_type": "Original", - "architecture": "Unknown", - "average_score": 16.827390145446955, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3166564417177914, - "normalized_score": 31.665644171779142 - }, - "bbh": { - "name": "BBH", - "value": 0.38000000000000006, - "normalized_score": 12.647903050108935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15428571428571428, - "normalized_score": 15.428571428571427 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33, - "normalized_score": 10.666666666666668 - }, - "musr": { - "name": "MUSR", - "value": 0.35333333333333333, - "normalized_score": 1.6666666666666659 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36, - "normalized_score": 28.888888888888893 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-10", - "submission_date": "", - "generation": 0, - "base_model": "mistral-community/Mixtral-8x22B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 674, - "params_billions": 0.0, - "co2_cost": 15.173201632269498 - } - }, - { - "id": "mistral-community/mixtral-8x22B-v0.3_bfloat16_211b177b79ab5ef245ee334d106c27623e786882_False", - "model": { - "name": "mistral-community/mixtral-8x22B-v0.3", - "sha": "211b177b79ab5ef245ee334d106c27623e786882", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 25.801994725345732, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25826362939223485, - "normalized_score": 25.826362939223486 - }, - "bbh": { - "name": "BBH", - "value": 0.6250002178435845, - "normalized_score": 45.73104089763324 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3775167785234899, - "normalized_score": 17.00223713646532 - }, - "musr": { - "name": "MUSR", - "value": 0.4036979166666667, - "normalized_score": 7.462239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46392952127659576, - "normalized_score": 40.4366134751773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-25", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "mistral-community/mixtral-8x22B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 140.63, - "co2_cost": 104.98897033710952 - } - }, - { - "id": "mistralai/Codestral-22B-v0.1_bfloat16_8f5fe23af91885222a1563283c87416745a5e212_True", - "model": { - "name": "mistralai/Codestral-22B-v0.1", - "sha": "8f5fe23af91885222a1563283c87416745a5e212", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.27991740686463, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5771752283939946, - "normalized_score": 57.717522839399464 - }, - "bbh": { - "name": "BBH", - "value": 0.5139136921003167, - "normalized_score": 30.737634411945635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4187083333333333, - "normalized_score": 10.738541666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3155751329787234, - "normalized_score": 23.95279255319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-29", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "mistralai/Codestral-22B-v0.1", - "hub_license": "other", - "hub_hearts": 1236, - "params_billions": 22.247, - "co2_cost": 2.6133391218762 - } - }, - { - "id": "mistralai/Ministral-8B-Instruct-2410_bfloat16_199e57c1d66379760f6413f79d27008d1d1dbd6e_True", - "model": { - "name": "mistralai/Ministral-8B-Instruct-2410", - "sha": "199e57c1d66379760f6413f79d27008d1d1dbd6e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.185603139774845, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5896399331551394, - "normalized_score": 58.963993315513946 - }, - "bbh": { - "name": "BBH", - "value": 0.47616402016891385, - "normalized_score": 25.82477440941784 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19561933534743203, - "normalized_score": 19.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.41375, - "normalized_score": 10.718750000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3291223404255319, - "normalized_score": 25.458037825059098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-12-01", - "generation": 0, - "base_model": "mistralai/Ministral-8B-Instruct-2410", - "hub_license": "other", - "hub_hearts": 447, - "params_billions": 8.02, - "co2_cost": 1.594172784313864 - } - }, - { - "id": "mistralai/Mistral-7B-Instruct-v0.1_bfloat16_73068f3702d050a2fd5aa2ca1e612e5036429398_True", - "model": { - "name": "mistralai/Mistral-7B-Instruct-v0.1", - "sha": "73068f3702d050a2fd5aa2ca1e612e5036429398", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.771229395030618, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4487060998151571, - "normalized_score": 44.87060998151571 - }, - "bbh": { - "name": "BBH", - "value": 0.33548084759810987, - "normalized_score": 7.647020535827543 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38476041666666666, - "normalized_score": 6.12838541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24143949468085107, - "normalized_score": 15.715499408983453 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-27", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 1601, - "params_billions": 7.242, - "co2_cost": 1.8629137373078644 - } - }, - { - "id": "mistralai/Mistral-7B-Instruct-v0.2_bfloat16_41b61a33a2483885c981aa79e0df6b32407ed873_True", - "model": { - "name": "mistralai/Mistral-7B-Instruct-v0.2", - "sha": "41b61a33a2483885c981aa79e0df6b32407ed873", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.50789159273764, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5496227786717023, - "normalized_score": 54.96227786717022 - }, - "bbh": { - "name": "BBH", - "value": 0.44597355203292793, - "normalized_score": 22.910601936713604 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.39660416666666665, - "normalized_score": 7.608854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2716921542553192, - "normalized_score": 19.076906028368796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-11", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "mistralai/Mistral-7B-Instruct-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 2688, - "params_billions": 7.242, - "co2_cost": 1.0688132581019694 - } - }, - { - "id": "mistralai/Mistral-7B-Instruct-v0.3_bfloat16_83e9aa141f2e28c82232fea5325f54edf17c43de_True", - "model": { - "name": "mistralai/Mistral-7B-Instruct-v0.3", - "sha": "83e9aa141f2e28c82232fea5325f54edf17c43de", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.225098776905867, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5465254413844156, - "normalized_score": 54.652544138441556 - }, - "bbh": { - "name": "BBH", - "value": 0.47219631712648397, - "normalized_score": 25.56911494885904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.37390625000000005, - "normalized_score": 4.304947916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30751329787234044, - "normalized_score": 23.057033096926716 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 1515, - "params_billions": 7.248, - "co2_cost": 1.0755669287782 - } - }, - { - "id": "mistralai/Mistral-7B-v0.1_bfloat16_26bca36bde8333b5d7f72e9ed20ccda6a618af24_False", - "model": { - "name": "mistralai/Mistral-7B-v0.1", - "sha": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.575358924083135, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2385548123423627, - "normalized_score": 23.855481234236272 - }, - "bbh": { - "name": "BBH", - "value": 0.4419401145517045, - "normalized_score": 22.0182553990746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4139375, - "normalized_score": 10.675520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30127992021276595, - "normalized_score": 22.364435579196215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-20", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 3655, - "params_billions": 7.242, - "co2_cost": 0.7780735286284418 - } - }, - { - "id": "mistralai/Mistral-7B-v0.3_bfloat16_b67d6a03ca097c5122fa65904fce0413500bf8c8_False", - "model": { - "name": "mistralai/Mistral-7B-v0.3", - "sha": "b67d6a03ca097c5122fa65904fce0413500bf8c8", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.229760590840252, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22663976028050017, - "normalized_score": 22.663976028050016 - }, - "bbh": { - "name": "BBH", - "value": 0.45168546294642503, - "normalized_score": 24.03725427191849 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4031770833333333, - "normalized_score": 8.363802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2952958776595745, - "normalized_score": 21.699541962174944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 450, - "params_billions": 7.248, - "co2_cost": 0.7629089045793841 - } - }, - { - "id": "mistralai/Mistral-Large-Instruct-2411_float16_3a5cb136f6106edf5c1210369068eb5a4f787cab_True", - "model": { - "name": "mistralai/Mistral-Large-Instruct-2411", - "sha": "3a5cb136f6106edf5c1210369068eb5a4f787cab", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 46.524214355965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8400577135334246, - "normalized_score": 84.00577135334247 - }, - "bbh": { - "name": "BBH", - "value": 0.6746647735675069, - "normalized_score": 52.7448919952634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4954682779456193, - "normalized_score": 49.546827794561935 - }, - "gpqa": { - "name": "GPQA", - "value": 0.43708053691275167, - "normalized_score": 24.94407158836689 - }, - "musr": { - "name": "MUSR", - "value": 0.454, - "normalized_score": 17.216666666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5561835106382979, - "normalized_score": 50.687056737588655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "mistralai/Mistral-Large-Instruct-2411", - "hub_license": "other", - "hub_hearts": 211, - "params_billions": 122.61, - "co2_cost": 52.54461045774598 - } - }, - { - "id": "mistralai/Mistral-Nemo-Base-2407_bfloat16_d2efb15544d5401f761235bef327babb850887d0_False", - "model": { - "name": "mistralai/Mistral-Nemo-Base-2407", - "sha": "d2efb15544d5401f761235bef327babb850887d0", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.239356042755924, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16299197241098062, - "normalized_score": 16.299197241098064 - }, - "bbh": { - "name": "BBH", - "value": 0.5035062000369291, - "normalized_score": 29.374736440966874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.3921354166666667, - "normalized_score": 6.516927083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34715757978723405, - "normalized_score": 27.46195330969267 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-18", - "submission_date": "2024-07-19", - "generation": 0, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 299, - "params_billions": 11.58, - "co2_cost": 3.4059909098206287 - } - }, - { - "id": "mistralai/Mistral-Nemo-Instruct-2407_bfloat16_4d14c1db68fe20dbf80b8eca85d39b909c5fe1d5_True", - "model": { - "name": "mistralai/Mistral-Nemo-Instruct-2407", - "sha": "4d14c1db68fe20dbf80b8eca85d39b909c5fe1d5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.665599941304677, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6380248850826917, - "normalized_score": 63.80248850826917 - }, - "bbh": { - "name": "BBH", - "value": 0.5036523950310812, - "normalized_score": 29.679970381152803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1268882175226586, - "normalized_score": 12.688821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.38999999999999996, - "normalized_score": 8.483333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3517287234042553, - "normalized_score": 27.969858156028373 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-08-29", - "generation": 1, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 1495, - "params_billions": 12.248, - "co2_cost": 4.468047939362798 - } - }, - { - "id": "mistralai/Mistral-Small-24B-Base-2501_bfloat16_a8f19b61efa3c0a8d8a5f901ed48b30ff6b8c70e_False", - "model": { - "name": "mistralai/Mistral-Small-24B-Base-2501", - "sha": "a8f19b61efa3c0a8d8a5f901ed48b30ff6b8c70e", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.195130187978368, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16723848278124265, - "normalized_score": 16.723848278124265 - }, - "bbh": { - "name": "BBH", - "value": 0.6441860347172437, - "normalized_score": 48.53757597081616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1971299093655589, - "normalized_score": 19.71299093655589 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.42366666666666664, - "normalized_score": 10.891666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5406416223404256, - "normalized_score": 48.96018026004729 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-30", - "generation": 0, - "base_model": "mistralai/Mistral-Small-24B-Base-2501", - "hub_license": "apache-2.0", - "hub_hearts": 231, - "params_billions": 23.572, - "co2_cost": 4.275624553104299 - } - }, - { - "id": "mistralai/Mistral-Small-Instruct-2409_bfloat16_63e53df6575e7085d62113f4383835ff979b3795_True", - "model": { - "name": "mistralai/Mistral-Small-Instruct-2409", - "sha": "63e53df6575e7085d62113f4383835ff979b3795", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.262748976418276, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.666975846310013, - "normalized_score": 66.69758463100129 - }, - "bbh": { - "name": "BBH", - "value": 0.5213075098146217, - "normalized_score": 30.7920960925092 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14350453172205438, - "normalized_score": 14.350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.36320833333333336, - "normalized_score": 3.001041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39602726063829785, - "normalized_score": 32.89191784869976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "mistralai/Mistral-Small-Instruct-2409", - "hub_license": "other", - "hub_hearts": 382, - "params_billions": 22.05, - "co2_cost": 1.3793375088517446 - } - }, - { - "id": "mistralai/Mistral-Small-Instruct-2409_float16_63e53df6575e7085d62113f4383835ff979b3795_False", - "model": { - "name": "mistralai/Mistral-Small-Instruct-2409", - "sha": "63e53df6575e7085d62113f4383835ff979b3795", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.918947504475216, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6282829558903709, - "normalized_score": 62.82829558903709 - }, - "bbh": { - "name": "BBH", - "value": 0.5830283846898211, - "normalized_score": 40.559713034899204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2039274924471299, - "normalized_score": 20.39274924471299 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.4063333333333334, - "normalized_score": 10.225000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.409906914893617, - "normalized_score": 34.43410165484633 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "mistralai/Mistral-Small-Instruct-2409", - "hub_license": "other", - "hub_hearts": 382, - "params_billions": 22.247, - "co2_cost": 3.220014976082721 - } - }, - { - "id": "mistralai/Mixtral-8x22B-Instruct-v0.1_bfloat16_b0c3516041d014f640267b14feb4e9a84c8e8c71_True", - "model": { - "name": "mistralai/Mixtral-8x22B-Instruct-v0.1", - "sha": "b0c3516041d014f640267b14feb4e9a84c8e8c71", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 33.88568028808198, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7183584001560305, - "normalized_score": 71.83584001560305 - }, - "bbh": { - "name": "BBH", - "value": 0.6124924926272018, - "normalized_score": 44.11434558724835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18731117824773413, - "normalized_score": 18.731117824773413 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.43111458333333336, - "normalized_score": 13.489322916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44830452127659576, - "normalized_score": 38.70050236406619 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-16", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mixtral-8x22B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 719, - "params_billions": 140.621, - "co2_cost": 47.14757859237797 - } - }, - { - "id": "mistralai/Mixtral-8x22B-v0.1_bfloat16_b03e260818710044a2f088d88fab12bb220884fb_False", - "model": { - "name": "mistralai/Mixtral-8x22B-v0.1", - "sha": "b03e260818710044a2f088d88fab12bb220884fb", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 25.74093627522265, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25826362939223485, - "normalized_score": 25.826362939223486 - }, - "bbh": { - "name": "BBH", - "value": 0.6239807473187268, - "normalized_score": 45.58840384342722 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.4036979166666667, - "normalized_score": 7.462239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46392952127659576, - "normalized_score": 40.4366134751773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-16", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "mistralai/Mixtral-8x22B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 215, - "params_billions": 140.621, - "co2_cost": 157.56579172301616 - } - }, - { - "id": "mistralai/Mixtral-8x7B-Instruct-v0.1_bfloat16_1e637f2d7cb0a9d6fb1922f305cb784995190a83_True", - "model": { - "name": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "sha": "1e637f2d7cb0a9d6fb1922f305cb784995190a83", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 23.8171027058463, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5599143605633053, - "normalized_score": 55.991436056330535 - }, - "bbh": { - "name": "BBH", - "value": 0.49623654013356494, - "normalized_score": 29.742398380967334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.42032291666666666, - "normalized_score": 11.073697916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36918218085106386, - "normalized_score": 29.909131205673756 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-10", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mixtral-8x7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 4352, - "params_billions": 46.703, - "co2_cost": 17.48498670577522 - } - }, - { - "id": "mistralai/Mixtral-8x7B-v0.1_bfloat16_985aa055896a8f943d4a9f2572e6ea1341823841_False", - "model": { - "name": "mistralai/Mixtral-8x7B-v0.1", - "sha": "985aa055896a8f943d4a9f2572e6ea1341823841", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.56528101279984, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24152692633324024, - "normalized_score": 24.152692633324023 - }, - "bbh": { - "name": "BBH", - "value": 0.508666743762444, - "normalized_score": 30.294194918961484 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.43213541666666666, - "normalized_score": 12.58359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3849734042553192, - "normalized_score": 31.663711583924346 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-01", - "submission_date": "2024-08-20", - "generation": 0, - "base_model": "mistralai/Mixtral-8x7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 1693, - "params_billions": 46.703, - "co2_cost": 23.377818804576776 - } - }, - { - "id": "mistralai/Mixtral-8x7B-v0.1_float16_985aa055896a8f943d4a9f2572e6ea1341823841_False", - "model": { - "name": "mistralai/Mixtral-8x7B-v0.1", - "sha": "985aa055896a8f943d4a9f2572e6ea1341823841", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.665108918316083, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23260947618984296, - "normalized_score": 23.260947618984297 - }, - "bbh": { - "name": "BBH", - "value": 0.5097711377553386, - "normalized_score": 30.4002992674255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.4413125, - "normalized_score": 13.6640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3871343085106383, - "normalized_score": 31.903812056737586 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-01", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "mistralai/Mixtral-8x7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 1693, - "params_billions": 46.703, - "co2_cost": 5.135099854813959 - } - }, - { - "id": "mixtao/MixTAO-7Bx2-MoE-v8.1_bfloat16_339130b87b6ef2484fea9fbfacba8a714ac03347_False", - "model": { - "name": "mixtao/MixTAO-7Bx2-MoE-v8.1", - "sha": "339130b87b6ef2484fea9fbfacba8a714ac03347", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 21.07792698379691, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41623337189767595, - "normalized_score": 41.6233371897676 - }, - "bbh": { - "name": "BBH", - "value": 0.5189059391733521, - "normalized_score": 32.31034233969924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.4463333333333333, - "normalized_score": 15.291666666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3123337765957447, - "normalized_score": 23.592641843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-26", - "submission_date": "2024-10-04", - "generation": 0, - "base_model": "mixtao/MixTAO-7Bx2-MoE-v8.1", - "hub_license": "apache-2.0", - "hub_hearts": 55, - "params_billions": 12.879, - "co2_cost": 1.848070143547014 - } - }, - { - "id": "mkurman/llama-3.2-MEDIT-3B-o1_bfloat16_b85c09ebdd588d98a0bc9daa52a78a4317d712db_True", - "model": { - "name": "mkurman/llama-3.2-MEDIT-3B-o1", - "sha": "b85c09ebdd588d98a0bc9daa52a78a4317d712db", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.028893537984814, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43816517950150047, - "normalized_score": 43.81651795015004 - }, - "bbh": { - "name": "BBH", - "value": 0.43996584807961553, - "normalized_score": 20.819346310345164 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13066465256797583, - "normalized_score": 13.066465256797583 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3565416666666667, - "normalized_score": 3.001041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27410239361702127, - "normalized_score": 19.344710401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "mkurman/llama-3.2-MEDIT-3B-o1 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 12, - "params_billions": 3.607, - "co2_cost": 1.1526858646753746 - } - }, - { - "id": "mkurman/phi-4-MedIT-11B-exp-1_bfloat16_7ee6cc2dac29514784142da4d4d2bb4d77dc96dc_True", - "model": { - "name": "mkurman/phi-4-MedIT-11B-exp-1", - "sha": "7ee6cc2dac29514784142da4d4d2bb4d77dc96dc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Phi3ForCausalLM", - "average_score": 24.60723512531726, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5947607902587357, - "normalized_score": 59.476079025873574 - }, - "bbh": { - "name": "BBH", - "value": 0.5413943771388249, - "normalized_score": 34.73718585543679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.38479166666666664, - "normalized_score": 6.232291666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38248005319148937, - "normalized_score": 31.386672576832154 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 11.514, - "co2_cost": 1.5993077108986922 - } - }, - { - "id": "mkurman/phi4-MedIT-10B-o1_bfloat16_2664ba0eb6272f27c6c8d88416ae6f9ace7ba01d_True", - "model": { - "name": "mkurman/phi4-MedIT-10B-o1", - "sha": "2664ba0eb6272f27c6c8d88416ae6f9ace7ba01d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaMedITForCausalLM", - "average_score": 18.92073804361014, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34629117408476173, - "normalized_score": 34.629117408476176 - }, - "bbh": { - "name": "BBH", - "value": 0.519820312240642, - "normalized_score": 31.19028076222879 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.39679166666666665, - "normalized_score": 8.365625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3507313829787234, - "normalized_score": 27.859042553191493 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "mkurman/phi4-MedIT-10B-o1", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 10.255, - "co2_cost": 1.718869422646144 - } - }, - { - "id": "mkxu/llama-3-8b-instruct-fpo_bfloat16_984bd038d56d9aa15ecb853111d2dfd8054a337e_True", - "model": { - "name": "mkxu/llama-3-8b-instruct-fpo", - "sha": "984bd038d56d9aa15ecb853111d2dfd8054a337e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.813703419888128, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6790161216682846, - "normalized_score": 67.90161216682846 - }, - "bbh": { - "name": "BBH", - "value": 0.4959114413700331, - "normalized_score": 28.867564932615277 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.36578125, - "normalized_score": 6.1559895833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36045545212765956, - "normalized_score": 28.93949468085106 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-16", - "submission_date": "2025-02-18", - "generation": 0, - "base_model": "mkxu/llama-3-8b-instruct-fpo", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.8134638721206331 - } - }, - { - "id": "mkxu/llama-3-8b-po1_float16_1b16e10de696c43cd2b49fac9f6195dc551438ee_False", - "model": { - "name": "mkxu/llama-3-8b-po1", - "sha": "1b16e10de696c43cd2b49fac9f6195dc551438ee", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.767002191701753, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4081149128756145, - "normalized_score": 40.81149128756145 - }, - "bbh": { - "name": "BBH", - "value": 0.49760854852246356, - "normalized_score": 29.181758873323105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3804166666666667, - "normalized_score": 6.8520833333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3562167553191489, - "normalized_score": 28.46852836879432 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-11-29", - "generation": 0, - "base_model": "mkxu/llama-3-8b-po1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.0243762813337047 - } - }, - { - "id": "mlabonne/AlphaMonarch-7B_float16_3de065d84411d74e5b3590f67f52b0b71faf6161_True", - "model": { - "name": "mlabonne/AlphaMonarch-7B", - "sha": "3de065d84411d74e5b3590f67f52b0b71faf6161", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.63062119200965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49394384677101205, - "normalized_score": 49.39438467710121 - }, - "bbh": { - "name": "BBH", - "value": 0.4625522037183211, - "normalized_score": 23.947378025426246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.41213541666666664, - "normalized_score": 9.316927083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24725731382978725, - "normalized_score": 16.36192375886525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-02-14", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mlabonne/AlphaMonarch-7B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 148, - "params_billions": 7.242, - "co2_cost": 1.1451434197618258 - } - }, - { - "id": "mlabonne/Beyonder-4x7B-v3_float16_8e923fa480f511ab54d79b44b0487768bdd3de4e_True", - "model": { - "name": "mlabonne/Beyonder-4x7B-v3", - "sha": "8e923fa480f511ab54d79b44b0487768bdd3de4e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.40685869832663, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5608385749810503, - "normalized_score": 56.08385749810503 - }, - "bbh": { - "name": "BBH", - "value": 0.4670522037183211, - "normalized_score": 24.557209180110334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.40454166666666663, - "normalized_score": 8.934375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2512466755319149, - "normalized_score": 16.805186170212764 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-21", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mlabonne/Beyonder-4x7B-v3 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 58, - "params_billions": 24.154, - "co2_cost": 2.7726054141650636 - } - }, - { - "id": "mlabonne/BigQwen2.5-52B-Instruct_bfloat16_425b9bffc9871085cc0d42c34138ce776f96ba02_True", - "model": { - "name": "mlabonne/BigQwen2.5-52B-Instruct", - "sha": "425b9bffc9871085cc0d42c34138ce776f96ba02", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.55000484859215, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7913480675718205, - "normalized_score": 79.13480675718205 - }, - "bbh": { - "name": "BBH", - "value": 0.7121004678698547, - "normalized_score": 59.80960695923371 - }, - "math": { - "name": "MATH Level 5", - "value": 0.547583081570997, - "normalized_score": 54.7583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.41130208333333335, - "normalized_score": 10.446093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5519448138297872, - "normalized_score": 50.21609042553191 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-25", - "generation": 1, - "base_model": "mlabonne/BigQwen2.5-52B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 52.268, - "co2_cost": 41.174805201061055 - } - }, - { - "id": "mlabonne/BigQwen2.5-Echo-47B-Instruct_bfloat16_f95fcf22f8ab87c2dbb1893b87c8a132820acb5e_True", - "model": { - "name": "mlabonne/BigQwen2.5-Echo-47B-Instruct", - "sha": "f95fcf22f8ab87c2dbb1893b87c8a132820acb5e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.03189501778666, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7356691356711305, - "normalized_score": 73.56691356711303 - }, - "bbh": { - "name": "BBH", - "value": 0.6125111878044905, - "normalized_score": 44.52224375363397 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4380664652567976, - "normalized_score": 43.80664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.4124791666666667, - "normalized_score": 10.193229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4734042553191489, - "normalized_score": 41.48936170212765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "mlabonne/BigQwen2.5-Echo-47B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 47.392, - "co2_cost": 17.04615353886009 - } - }, - { - "id": "mlabonne/ChimeraLlama-3-8B-v2_float16_d90a12b1574d7be084e53e0ad610282638ab29cf_False", - "model": { - "name": "mlabonne/ChimeraLlama-3-8B-v2", - "sha": "d90a12b1574d7be084e53e0ad610282638ab29cf", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.120505180125022, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44688315890725494, - "normalized_score": 44.6883158907255 - }, - "bbh": { - "name": "BBH", - "value": 0.5045597361952603, - "normalized_score": 28.47879573339652 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.3790833333333334, - "normalized_score": 5.252083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3568816489361702, - "normalized_score": 28.54240543735224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-22", - "submission_date": "2024-08-25", - "generation": 1, - "base_model": "mlabonne/ChimeraLlama-3-8B-v2 (Merge)", - "hub_license": "other", - "hub_hearts": 14, - "params_billions": 8.03, - "co2_cost": 1.6748195150215985 - } - }, - { - "id": "mlabonne/ChimeraLlama-3-8B-v3_float16_c8c1787e1426e3979ae82134f4eb7fa332f58ae0_False", - "model": { - "name": "mlabonne/ChimeraLlama-3-8B-v3", - "sha": "c8c1787e1426e3979ae82134f4eb7fa332f58ae0", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.697130053703557, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44078821970150317, - "normalized_score": 44.078821970150315 - }, - "bbh": { - "name": "BBH", - "value": 0.49781902726529204, - "normalized_score": 27.64609355033005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4003541666666666, - "normalized_score": 8.37760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36685505319148937, - "normalized_score": 29.650561465721044 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-01", - "submission_date": "2024-08-25", - "generation": 1, - "base_model": "mlabonne/ChimeraLlama-3-8B-v3 (Merge)", - "hub_license": "other", - "hub_hearts": 15, - "params_billions": 8.03, - "co2_cost": 1.6474791885168099 - } - }, - { - "id": "mlabonne/Daredevil-8B_bfloat16_717953c83631cc9adf2dddccfff06739308f10f7_True", - "model": { - "name": "mlabonne/Daredevil-8B", - "sha": "717953c83631cc9adf2dddccfff06739308f10f7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.40964638898709, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45477665926408595, - "normalized_score": 45.4776659264086 - }, - "bbh": { - "name": "BBH", - "value": 0.5194408746721715, - "normalized_score": 31.626854762529916 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.393875, - "normalized_score": 7.534375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.383061835106383, - "normalized_score": 31.451315011820324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-25", - "submission_date": "2024-07-02", - "generation": 1, - "base_model": "mlabonne/Daredevil-8B (Merge)", - "hub_license": "other", - "hub_hearts": 37, - "params_billions": 8.03, - "co2_cost": 3.0238293771520777 - } - }, - { - "id": "mlabonne/Daredevil-8B-abliterated_bfloat16_034c0ce8ceeba075d1dff2bac1b113a017c79390_True", - "model": { - "name": "mlabonne/Daredevil-8B-abliterated", - "sha": "034c0ce8ceeba075d1dff2bac1b113a017c79390", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.687995572747763, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44263664853699297, - "normalized_score": 44.2636648536993 - }, - "bbh": { - "name": "BBH", - "value": 0.4254272523147253, - "normalized_score": 19.865777111108127 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.40702083333333333, - "normalized_score": 9.17760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3700964095744681, - "normalized_score": 30.010712174940902 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-07-02", - "generation": 0, - "base_model": "mlabonne/Daredevil-8B-abliterated", - "hub_license": "other", - "hub_hearts": 38, - "params_billions": 8.03, - "co2_cost": 2.396723763195856 - } - }, - { - "id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated_bfloat16_4303ff3b524418e9aa5e787d60595a44a6173b02_False", - "model": { - "name": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", - "sha": "4303ff3b524418e9aa5e787d60595a44a6173b02", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.74585457101121, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34244360518978534, - "normalized_score": 34.24436051897853 - }, - "bbh": { - "name": "BBH", - "value": 0.6693171063183693, - "normalized_score": 52.75007315730804 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36577181208053694, - "normalized_score": 15.436241610738257 - }, - "musr": { - "name": "MUSR", - "value": 0.5029270833333334, - "normalized_score": 24.73255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4679188829787234, - "normalized_score": 40.87987588652482 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated (Merge)", - "hub_license": "", - "hub_hearts": 29, - "params_billions": 70.554, - "co2_cost": 51.325370823294065 - } - }, - { - "id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated_bfloat16_aef878bdf42c119d007322967006fcdef5ae6ee1_True", - "model": { - "name": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", - "sha": "aef878bdf42c119d007322967006fcdef5ae6ee1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.202552271437714, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7329463601023063, - "normalized_score": 73.29463601023062 - }, - "bbh": { - "name": "BBH", - "value": 0.48740648734902187, - "normalized_score": 27.12916478111247 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36488541666666663, - "normalized_score": 3.2106770833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3503158244680851, - "normalized_score": 27.812869385342786 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-10-13", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 153, - "params_billions": 8.03, - "co2_cost": 3.2739685009650388 - } - }, - { - "id": "mlabonne/NeuralBeagle14-7B_float16_1567ad618a0998139654cb355738bb9bc018ca64_True", - "model": { - "name": "mlabonne/NeuralBeagle14-7B", - "sha": "1567ad618a0998139654cb355738bb9bc018ca64", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.91007634574491, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49351941736813876, - "normalized_score": 49.35194173681387 - }, - "bbh": { - "name": "BBH", - "value": 0.46278709452353844, - "normalized_score": 23.959695145493203 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.43194791666666665, - "normalized_score": 12.893489583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2601396276595745, - "normalized_score": 17.793291962174944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-15", - "submission_date": "2024-06-27", - "generation": 2, - "base_model": "mlabonne/Beagle14-7B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 158, - "params_billions": 7.242, - "co2_cost": 1.3434142300269432 - } - }, - { - "id": "mlabonne/NeuralDaredevil-8B-abliterated_float16_2f4a5e8a8522f19dff345c7189b7891468763061_True", - "model": { - "name": "mlabonne/NeuralDaredevil-8B-abliterated", - "sha": "2f4a5e8a8522f19dff345c7189b7891468763061", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.186740676442742, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.756077208473517, - "normalized_score": 75.60772084735169 - }, - "bbh": { - "name": "BBH", - "value": 0.5110566504436299, - "normalized_score": 30.30798586214647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4019375, - "normalized_score": 9.075520833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38414228723404253, - "normalized_score": 31.57136524822695 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-27", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "mlabonne/NeuralDaredevil-8B-abliterated", - "hub_license": "llama3", - "hub_hearts": 196, - "params_billions": 8.03, - "co2_cost": 3.4388458273356126 - } - }, - { - "id": "mlabonne/NeuralDaredevil-8B-abliterated_bfloat16_89b01e3292e031ed85ad21545849182f5627021e_False", - "model": { - "name": "mlabonne/NeuralDaredevil-8B-abliterated", - "sha": "89b01e3292e031ed85ad21545849182f5627021e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.499914415098534, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41623337189767595, - "normalized_score": 41.6233371897676 - }, - "bbh": { - "name": "BBH", - "value": 0.5123964057729099, - "normalized_score": 29.763198395755456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4149583333333333, - "normalized_score": 10.903124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3801529255319149, - "normalized_score": 31.128102836879428 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-27", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "mlabonne/NeuralDaredevil-8B-abliterated", - "hub_license": "llama3", - "hub_hearts": 196, - "params_billions": 8.03, - "co2_cost": 0.9850067516335185 - } - }, - { - "id": "mlabonne/OrpoLlama-3-8B_float16_7f200e4c84ad0daa3ff6bc414012d8d0bacbf90e_True", - "model": { - "name": "mlabonne/OrpoLlama-3-8B", - "sha": "7f200e4c84ad0daa3ff6bc414012d8d0bacbf90e", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.157036730116474, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36527524745453177, - "normalized_score": 36.52752474545318 - }, - "bbh": { - "name": "BBH", - "value": 0.4424079063503051, - "normalized_score": 21.95410762879941 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3579375, - "normalized_score": 4.0088541666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2705285904255319, - "normalized_score": 18.947621158392433 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-18", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "other", - "hub_hearts": 53, - "params_billions": 8.03, - "co2_cost": 1.780599641553538 - } - }, - { - "id": "mlabonne/phixtral-2x2_8_float16_7744a977d83f132ae5808d8c3b70157031f7de44_True", - "model": { - "name": "mlabonne/phixtral-2x2_8", - "sha": "7744a977d83f132ae5808d8c3b70157031f7de44", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 15.553113591688318, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3431184811854767, - "normalized_score": 34.31184811854767 - }, - "bbh": { - "name": "BBH", - "value": 0.48885941873076205, - "normalized_score": 28.502644855771297 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3643541666666667, - "normalized_score": 7.710937500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2550698138297872, - "normalized_score": 17.229979314420802 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-07", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "mlabonne/phixtral-2x2_8", - "hub_license": "mit", - "hub_hearts": 148, - "params_billions": 4.458, - "co2_cost": 1.9219021341362852 - } - }, - { - "id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32_float16_78ba059029cfbdc819ee80f1e91827b9d3ba1620_True", - "model": { - "name": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", - "sha": "78ba059029cfbdc819ee80f1e91827b9d3ba1620", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.708088086359409, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3368983186833158, - "normalized_score": 33.68983186833158 - }, - "bbh": { - "name": "BBH", - "value": 0.32921013057720044, - "normalized_score": 7.2211690840719855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3249166666666667, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16381316489361702, - "normalized_score": 7.0903516548463354 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2025-01-07", - "generation": 3, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 0.494, - "co2_cost": 1.0003469456246883 - } - }, - { - "id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16_bfloat16_92ae924591721abf40ae8dbebb7f37f10a518448_False", - "model": { - "name": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", - "sha": "92ae924591721abf40ae8dbebb7f37f10a518448", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 38.66942426997438, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6282829558903709, - "normalized_score": 62.82829558903709 - }, - "bbh": { - "name": "BBH", - "value": 0.6713272911918485, - "normalized_score": 52.39286892867324 - }, - "math": { - "name": "MATH Level 5", - "value": 0.32250755287009064, - "normalized_score": 32.25075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.4618333333333333, - "normalized_score": 16.3625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5394780585106383, - "normalized_score": 48.83089539007093 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-02-06", - "generation": 2, - "base_model": "mistralai/Mistral-Small-24B-Instruct-2501 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 23.572, - "co2_cost": 2.578044788554092 - } - }, - { - "id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1_bfloat16_16f98b2d45684af2c4a9ff5da75b00ef13cca808_True", - "model": { - "name": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", - "sha": "16f98b2d45684af2c4a9ff5da75b00ef13cca808", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.38004447303165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4648931501748693, - "normalized_score": 46.48931501748693 - }, - "bbh": { - "name": "BBH", - "value": 0.6541763652331517, - "normalized_score": 50.02266053282724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2326283987915408, - "normalized_score": 23.26283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4140625, - "normalized_score": 10.757812500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5224401595744681, - "normalized_score": 46.937795508274235 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-28", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", - "hub_license": "llama3", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 32.19425003207018 - } - }, - { - "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1_bfloat16_16f6691b234d868a71b2addfc237a6c5088ecb48_True", - "model": { - "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", - "sha": "16f6691b234d868a71b2addfc237a6c5088ecb48", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.806892418961331, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.370396104558128, - "normalized_score": 37.0396104558128 - }, - "bbh": { - "name": "BBH", - "value": 0.34730320150504124, - "normalized_score": 7.891833484522146 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3285498489425982, - "normalized_score": 32.85498489425982 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.33955208333333337, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2198304521276596, - "normalized_score": 13.314494680851062 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-02-16", - "generation": 1, - "base_model": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 0.732228154167841 - } - }, - { - "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1_bfloat16_40f505b1ec4f6008fd9e6867bbe0d338addcafbd_True", - "model": { - "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", - "sha": "40f505b1ec4f6008fd9e6867bbe0d338addcafbd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.737904923149653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34731512387132807, - "normalized_score": 34.73151238713281 - }, - "bbh": { - "name": "BBH", - "value": 0.36983762765044165, - "normalized_score": 11.565400479411357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3496978851963746, - "normalized_score": 34.96978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.40088541666666666, - "normalized_score": 8.410677083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23262965425531915, - "normalized_score": 14.736628250591016 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-02-16", - "generation": 1, - "base_model": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 15, - "params_billions": 7.616, - "co2_cost": 0.6752667672164052 - } - }, - { - "id": "moeru-ai/L3.1-Moe-2x8B-v0.2_bfloat16_1a0b4d4d1e839e332c67c9c16a2fc1f7ccc7f81e_True", - "model": { - "name": "moeru-ai/L3.1-Moe-2x8B-v0.2", - "sha": "1a0b4d4d1e839e332c67c9c16a2fc1f7ccc7f81e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 28.878094301902436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7347947889377962, - "normalized_score": 73.47947889377961 - }, - "bbh": { - "name": "BBH", - "value": 0.5255688392585965, - "normalized_score": 32.94589106477927 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16993957703927492, - "normalized_score": 16.993957703927492 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.41985416666666664, - "normalized_score": 11.381770833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38580452127659576, - "normalized_score": 31.756057919621743 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "moeru-ai/L3.1-Moe-2x8B-v0.2 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 6, - "params_billions": 13.668, - "co2_cost": 3.853135891354159 - } - }, - { - "id": "moeru-ai/L3.1-Moe-4x8B-v0.1_bfloat16_f8d477fad4c02c099c80ef38865c01e2c882e996_True", - "model": { - "name": "moeru-ai/L3.1-Moe-4x8B-v0.1", - "sha": "f8d477fad4c02c099c80ef38865c01e2c882e996", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 19.441557290549515, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.433219413378724, - "normalized_score": 43.3219413378724 - }, - "bbh": { - "name": "BBH", - "value": 0.49392781736367014, - "normalized_score": 27.856764788876916 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3609166666666667, - "normalized_score": 3.9812500000000024 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34541223404255317, - "normalized_score": 27.268026004728128 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "moeru-ai/L3.1-Moe-4x8B-v0.1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 24.942, - "co2_cost": 8.704718244862379 - } - }, - { - "id": "moeru-ai/L3.1-Moe-4x8B-v0.2_bfloat16_fab49d865eb51f00e955c5624712184c39d207c9_True", - "model": { - "name": "moeru-ai/L3.1-Moe-4x8B-v0.2", - "sha": "fab49d865eb51f00e955c5624712184c39d207c9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 18.31051307041189, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5406554608438943, - "normalized_score": 54.065546084389425 - }, - "bbh": { - "name": "BBH", - "value": 0.446625675582615, - "normalized_score": 21.33700714055054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3233958333333333, - "normalized_score": 2.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27626329787234044, - "normalized_score": 19.58481087470449 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-10-30", - "generation": 1, - "base_model": "moeru-ai/L3.1-Moe-4x8B-v0.2 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 24.942, - "co2_cost": 6.732653962817818 - } - }, - { - "id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO_bfloat16_5206a32e0bd3067aef1ce90f5528ade7d866253f_True", - "model": { - "name": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", - "sha": "5206a32e0bd3067aef1ce90f5528ade7d866253f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 4.832138103419669, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22728914834860392, - "normalized_score": 22.728914834860394 - }, - "bbh": { - "name": "BBH", - "value": 0.28653625778742803, - "normalized_score": 1.3404688979507675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34447916666666667, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11677194148936171, - "normalized_score": 1.8635490543735225 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-01", - "submission_date": "2024-08-30", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 16.061, - "co2_cost": 3.065360523092142 - } - }, - { - "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct_float16_6422e27e96e15cf93b966c973aacc15f8a27a458_True", - "model": { - "name": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", - "sha": "6422e27e96e15cf93b966c973aacc15f8a27a458", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 12.519872991689503, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3902545246612322, - "normalized_score": 39.02545246612322 - }, - "bbh": { - "name": "BBH", - "value": 0.36496861927498697, - "normalized_score": 11.965057260762338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3643854166666667, - "normalized_score": 5.41484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19872007978723405, - "normalized_score": 10.968897754137116 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.6773750979939996 - } - }, - { - "id": "mosaicml/mpt-7b_bfloat16_039e37745f00858f0e01e988383a8c4393b1a4f5_False", - "model": { - "name": "mosaicml/mpt-7b", - "sha": "039e37745f00858f0e01e988383a8c4393b1a4f5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MPTForCausalLM", - "average_score": 6.032029339143736, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21519900530592162, - "normalized_score": 21.51990053059216 - }, - "bbh": { - "name": "BBH", - "value": 0.32997415960801324, - "normalized_score": 6.550600790794161 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.36723958333333334, - "normalized_score": 2.904947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12059507978723404, - "normalized_score": 2.288342198581559 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-05", - "submission_date": "2024-06-08", - "generation": 0, - "base_model": "mosaicml/mpt-7b", - "hub_license": "apache-2.0", - "hub_hearts": 1167, - "params_billions": 7.0, - "co2_cost": 1.2870068503861511 - } - }, - { - "id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection_bfloat16_0dd9c511521b05a2eb70d1dfb102c1766be3ae26_True", - "model": { - "name": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", - "sha": "0dd9c511521b05a2eb70d1dfb102c1766be3ae26", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.862792186560862, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2870394996387363, - "normalized_score": 28.703949963873633 - }, - "bbh": { - "name": "BBH", - "value": 0.41093712633583523, - "normalized_score": 17.973737754110076 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3211979166666667, - "normalized_score": 1.8666666666666683 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26512632978723405, - "normalized_score": 18.34736997635934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.544, - "co2_cost": 1.180675835076376 - } - }, - { - "id": "mrdayl/OpenCogito_bfloat16_f2a54023d176e00311001a73e609fb10ef7416fc_False", - "model": { - "name": "mrdayl/OpenCogito", - "sha": "f2a54023d176e00311001a73e609fb10ef7416fc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.158547225443215, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3933773498761065, - "normalized_score": 39.33773498761065 - }, - "bbh": { - "name": "BBH", - "value": 0.47196973414577464, - "normalized_score": 26.332720219789724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.42401041666666667, - "normalized_score": 11.501302083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3451628989361702, - "normalized_score": 27.24032210401891 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 6.847273411295232 - } - }, - { - "id": "mrdayl/OpenCognito_bfloat16_af631967849155d520801331fb1aca9ac5c6055e_False", - "model": { - "name": "mrdayl/OpenCognito", - "sha": "af631967849155d520801331fb1aca9ac5c6055e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.28566396506271, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40621661635571393, - "normalized_score": 40.6216616355714 - }, - "bbh": { - "name": "BBH", - "value": 0.4705607805549634, - "normalized_score": 25.985835789865746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.42934374999999997, - "normalized_score": 12.434635416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3443317819148936, - "normalized_score": 27.14797576832151 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "Removed", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7731262810208155 - } - }, - { - "id": "mrdayl/OpenCognito-r1_bfloat16_1dd62e221fee56966697cc391eeba52beea726f4_False", - "model": { - "name": "mrdayl/OpenCognito-r1", - "sha": "1dd62e221fee56966697cc391eeba52beea726f4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.15261487014885, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42412687225450696, - "normalized_score": 42.412687225450696 - }, - "bbh": { - "name": "BBH", - "value": 0.4673346036303057, - "normalized_score": 25.595543700365017 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1903323262839879, - "normalized_score": 19.033232628398792 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.42407291666666663, - "normalized_score": 11.775781250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3474900265957447, - "normalized_score": 27.498891843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-11", - "generation": 3, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 3.8373719910619957 - } - }, - { - "id": "mrdayl/OpenCognito-r2_bfloat16_0d3e3cd82084e6b1ff1b68a4b9732c1fb2c2efd3_False", - "model": { - "name": "mrdayl/OpenCognito-r2", - "sha": "0d3e3cd82084e6b1ff1b68a4b9732c1fb2c2efd3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.967469611464537, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3958751667797001, - "normalized_score": 39.58751667797001 - }, - "bbh": { - "name": "BBH", - "value": 0.46882818163435913, - "normalized_score": 25.775897615557778 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.42016666666666663, - "normalized_score": 11.354166666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34616023936170215, - "normalized_score": 27.351137706855795 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-13", - "generation": 4, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 1.5756295880841664 - } - }, - { - "id": "mrdayl/OpenThink_bfloat16_d23dff000bc8faba0b83a5bec444ff9afc6a000a_False", - "model": { - "name": "mrdayl/OpenThink", - "sha": "d23dff000bc8faba0b83a5bec444ff9afc6a000a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.3979226641244, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20540720842919008, - "normalized_score": 20.540720842919008 - }, - "bbh": { - "name": "BBH", - "value": 0.34597850879756104, - "normalized_score": 9.176574989757906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28851963746223563, - "normalized_score": 28.851963746223564 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.32888541666666665, - "normalized_score": 2.010677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18500664893617022, - "normalized_score": 9.445183215130024 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "mrdayl/OpenThink (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.7678727607437676 - } - }, - { - "id": "mrm8488/phi-4-14B-grpo-gsm8k-3e_float16_d8874d5f1dc81b1c251ebb9ccd492d95c25a86b5_True", - "model": { - "name": "mrm8488/phi-4-14B-grpo-gsm8k-3e", - "sha": "d8874d5f1dc81b1c251ebb9ccd492d95c25a86b5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.20729001022834, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.688533092195375, - "normalized_score": 68.8533092195375 - }, - "bbh": { - "name": "BBH", - "value": 0.6805415739665394, - "normalized_score": 54.020966104974086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.452416918429003, - "normalized_score": 45.2416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.39939583333333334, - "normalized_score": 8.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.526845079787234, - "normalized_score": 47.42723108747045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-11", - "submission_date": "2025-02-13", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9518350998409443 - } - }, - { - "id": "mrm8488/phi-4-14B-grpo-limo_float16_68d12df3c3240529fc9f6ce5a226e7c0d2d3d245_True", - "model": { - "name": "mrm8488/phi-4-14B-grpo-limo", - "sha": "68d12df3c3240529fc9f6ce5a226e7c0d2d3d245", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.0640880591307, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.681239112222237, - "normalized_score": 68.12391122222371 - }, - "bbh": { - "name": "BBH", - "value": 0.678485424233919, - "normalized_score": 53.67590173579206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4569486404833837, - "normalized_score": 45.69486404833837 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.3980625, - "normalized_score": 8.024479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5260970744680851, - "normalized_score": 47.344119385342786 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-13", - "generation": 2, - "base_model": "microsoft/phi-4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 0.9483571391292158 - } - }, - { - "id": "mukaj/Llama-3.1-Hawkish-8B_bfloat16_bd4968f565d94e3595e41a260f5550888df3fc85_True", - "model": { - "name": "mukaj/Llama-3.1-Hawkish-8B", - "sha": "bd4968f565d94e3595e41a260f5550888df3fc85", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.581501102977555, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6720468357291984, - "normalized_score": 67.20468357291985 - }, - "bbh": { - "name": "BBH", - "value": 0.4883822828416351, - "normalized_score": 28.13584109991744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.243202416918429, - "normalized_score": 24.3202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.39672916666666663, - "normalized_score": 8.557812499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33311170212765956, - "normalized_score": 25.901300236406616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-26", - "submission_date": "2024-12-18", - "generation": 0, - "base_model": "mukaj/Llama-3.1-Hawkish-8B", - "hub_license": "other", - "hub_hearts": 42, - "params_billions": 8.03, - "co2_cost": 1.364196599849899 - } - }, - { - "id": "natong19/Mistral-Nemo-Instruct-2407-abliterated_bfloat16_9c7087f62e6ab10ec4aeeb268e25cb3d4000696b_True", - "model": { - "name": "natong19/Mistral-Nemo-Instruct-2407-abliterated", - "sha": "9c7087f62e6ab10ec4aeeb268e25cb3d4000696b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.017625420548132, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6392239258500778, - "normalized_score": 63.92239258500778 - }, - "bbh": { - "name": "BBH", - "value": 0.5048447739625885, - "normalized_score": 29.915044266358993 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4033333333333333, - "normalized_score": 10.149999999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.351811835106383, - "normalized_score": 27.979092789598102 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-15", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "natong19/Mistral-Nemo-Instruct-2407-abliterated", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 12.248, - "co2_cost": 2.4757141885799543 - } - }, - { - "id": "natong19/Qwen2-7B-Instruct-abliterated_bfloat16_127962453ae87879719a82a97384ac1859787a25_True", - "model": { - "name": "natong19/Qwen2-7B-Instruct-abliterated", - "sha": "127962453ae87879719a82a97384ac1859787a25", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.515341600524966, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5836945970026197, - "normalized_score": 58.36945970026197 - }, - "bbh": { - "name": "BBH", - "value": 0.5553035842403061, - "normalized_score": 37.746833853463094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2764350453172205, - "normalized_score": 27.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4034270833333333, - "normalized_score": 8.92838541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3842253989361702, - "normalized_score": 31.580599881796683 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-14", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "natong19/Qwen2-7B-Instruct-abliterated", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 2.1516712075859235 - } - }, - { - "id": "nazimali/Mistral-Nemo-Kurdish_bfloat16_1eb544577a2874d8df0b77ca83ff1c88dd20f481_False", - "model": { - "name": "nazimali/Mistral-Nemo-Kurdish", - "sha": "1eb544577a2874d8df0b77ca83ff1c88dd20f481", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.48223781673813, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3401208792670115, - "normalized_score": 34.012087926701156 - }, - "bbh": { - "name": "BBH", - "value": 0.5133321102266589, - "normalized_score": 29.855897080399064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4115729166666667, - "normalized_score": 11.77994791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3234707446808511, - "normalized_score": 24.830082742316783 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "nazimali/Mistral-Nemo-Kurdish (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 3.6994544571417842 - } - }, - { - "id": "nazimali/Mistral-Nemo-Kurdish-Instruct_bfloat16_512140572f11203441e60ca26b5ede2b9979cb1d_True", - "model": { - "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "sha": "512140572f11203441e60ca26b5ede2b9979cb1d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.555957634452803, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4963917959901949, - "normalized_score": 49.63917959901949 - }, - "bbh": { - "name": "BBH", - "value": 0.4699417600389813, - "normalized_score": 25.56142263482943 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.397875, - "normalized_score": 8.40104166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3062666223404255, - "normalized_score": 22.91851359338061 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "nazimali/Mistral-Nemo-Kurdish-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.7021165130013975 - } - }, - { - "id": "nazimali/Mistral-Nemo-Kurdish-Instruct_float16_512140572f11203441e60ca26b5ede2b9979cb1d_True", - "model": { - "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "sha": "512140572f11203441e60ca26b5ede2b9979cb1d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.94862211656385, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4860004787297703, - "normalized_score": 48.60004787297703 - }, - "bbh": { - "name": "BBH", - "value": 0.47214400722999256, - "normalized_score": 26.02174135407743 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.40057291666666667, - "normalized_score": 8.838281250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30867686170212766, - "normalized_score": 23.186317966903072 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "nazimali/Mistral-Nemo-Kurdish-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 3.503441350239854 - } - }, - { - "id": "nbeerbower/BigKartoffel-mistral-nemo-20B_float16_a552090b42c2cb6ed573fc12cf9571eb0faa8174_True", - "model": { - "name": "nbeerbower/BigKartoffel-mistral-nemo-20B", - "sha": "a552090b42c2cb6ed573fc12cf9571eb0faa8174", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.763763321614324, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5857181168189294, - "normalized_score": 58.57181168189294 - }, - "bbh": { - "name": "BBH", - "value": 0.55148305168682, - "normalized_score": 35.79864416611619 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.42804166666666665, - "normalized_score": 12.538541666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3529753989361702, - "normalized_score": 28.10837765957446 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-05", - "generation": 1, - "base_model": "nbeerbower/BigKartoffel-mistral-nemo-20B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 20.427, - "co2_cost": 2.457533630709938 - } - }, - { - "id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B_bfloat16_c1b2106456bea0b5172d34c8c4b0818bb79a0429_True", - "model": { - "name": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", - "sha": "c1b2106456bea0b5172d34c8c4b0818bb79a0429", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.833865427806835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5191480826429429, - "normalized_score": 51.914808264294294 - }, - "bbh": { - "name": "BBH", - "value": 0.5217926041279988, - "normalized_score": 31.920697385721514 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3794895833333333, - "normalized_score": 5.602864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3080119680851064, - "normalized_score": 23.112440898345156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-02-15", - "generation": 1, - "base_model": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 23.153, - "co2_cost": 3.0603037906120116 - } - }, - { - "id": "nbeerbower/DoublePotato-Mistral-Nemo-13B_bfloat16_7c00ffb0a327260101eb2957a8f5af63443870bf_True", - "model": { - "name": "nbeerbower/DoublePotato-Mistral-Nemo-13B", - "sha": "7c00ffb0a327260101eb2957a8f5af63443870bf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.79626366510183, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6796156420519777, - "normalized_score": 67.96156420519776 - }, - "bbh": { - "name": "BBH", - "value": 0.5437915398770364, - "normalized_score": 35.211852906317226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.45997916666666666, - "normalized_score": 17.930729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.359624335106383, - "normalized_score": 28.847148345153663 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "nbeerbower/DoublePotato-Mistral-Nemo-13B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 13.338, - "co2_cost": 1.0616733141864731 - } - }, - { - "id": "nbeerbower/Dumpling-Qwen2.5-1.5B_bfloat16_ce189fd8d1d8daec8ccffdd293bfbda81c34a524_True", - "model": { - "name": "nbeerbower/Dumpling-Qwen2.5-1.5B", - "sha": "ce189fd8d1d8daec8ccffdd293bfbda81c34a524", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.625094396198312, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3698963195432563, - "normalized_score": 36.98963195432563 - }, - "bbh": { - "name": "BBH", - "value": 0.4159743091354106, - "normalized_score": 18.17835840988572 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.37276041666666665, - "normalized_score": 4.728385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2771775265957447, - "normalized_score": 19.686391843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "nbeerbower/Dumpling-Qwen2.5-1.5B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 1.544, - "co2_cost": 1.2052012329738229 - } - }, - { - "id": "nbeerbower/Dumpling-Qwen2.5-14B_bfloat16_4ce3f034a983caaf7a0aa49fd3aab2f531e9f2eb_True", - "model": { - "name": "nbeerbower/Dumpling-Qwen2.5-14B", - "sha": "4ce3f034a983caaf7a0aa49fd3aab2f531e9f2eb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.79872132286164, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6064010159709571, - "normalized_score": 60.64010159709571 - }, - "bbh": { - "name": "BBH", - "value": 0.6450644262798378, - "normalized_score": 49.666835922992384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30966767371601206, - "normalized_score": 30.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.43539583333333337, - "normalized_score": 14.357812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5170378989361702, - "normalized_score": 46.337544326241144 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-14", - "submission_date": "2025-02-15", - "generation": 1, - "base_model": "nbeerbower/Dumpling-Qwen2.5-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.77, - "co2_cost": 1.3805834781919855 - } - }, - { - "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16_bfloat16_5295fc4f2ebe7124ce53808dfe8796e2ede7b53c_True", - "model": { - "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", - "sha": "5295fc4f2ebe7124ce53808dfe8796e2ede7b53c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.2956618399845, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4860004787297703, - "normalized_score": 48.60004787297703 - }, - "bbh": { - "name": "BBH", - "value": 0.5214228032573378, - "normalized_score": 32.10172952662796 - }, - "math": { - "name": "MATH Level 5", - "value": 0.236404833836858, - "normalized_score": 23.6404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.4229895833333333, - "normalized_score": 11.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39586103723404253, - "normalized_score": 32.87344858156028 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 0.6338280131013287 - } - }, - { - "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5_bfloat16_e8de9ede88cc20ec887b80a9f722f562cc5a065f_True", - "model": { - "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", - "sha": "e8de9ede88cc20ec887b80a9f722f562cc5a065f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.052668360308672, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.417906709752346, - "normalized_score": 41.7906709752346 - }, - "bbh": { - "name": "BBH", - "value": 0.5300548108450988, - "normalized_score": 33.86711491322243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.4486041666666667, - "normalized_score": 16.142187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41215093085106386, - "normalized_score": 34.683436761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6772306626905065 - } - }, - { - "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B_bfloat16_dc0152d7482236dacce20f1f5a3d184073ff01b6_True", - "model": { - "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", - "sha": "dc0152d7482236dacce20f1f5a3d184073ff01b6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.375831467238891, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41148707651254224, - "normalized_score": 41.14870765125423 - }, - "bbh": { - "name": "BBH", - "value": 0.39965589836197535, - "normalized_score": 15.2183657081488 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13746223564954682, - "normalized_score": 13.746223564954683 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.35018750000000004, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27119348404255317, - "normalized_score": 19.02149822695035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.230458643725809 - } - }, - { - "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B_bfloat16_610b00c3b61b7ff606bbd0fb3608d8236c825869_True", - "model": { - "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", - "sha": "610b00c3b61b7ff606bbd0fb3608d8236c825869", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.16442636538604, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.783554302583811, - "normalized_score": 78.35543025838109 - }, - "bbh": { - "name": "BBH", - "value": 0.6372016353633118, - "normalized_score": 48.522478115727985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5045317220543807, - "normalized_score": 50.453172205438065 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.4406666666666667, - "normalized_score": 14.883333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5211103723404256, - "normalized_score": 46.790041371158395 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 1.395555570529591 - } - }, - { - "id": "nbeerbower/Flammades-Mistral-Nemo-12B_bfloat16_ddc76d1976af06aedc7f06bbffcaa34166c1cbdd_False", - "model": { - "name": "nbeerbower/Flammades-Mistral-Nemo-12B", - "sha": "ddc76d1976af06aedc7f06bbffcaa34166c1cbdd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.56672421581219, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38415958545548035, - "normalized_score": 38.41595854554804 - }, - "bbh": { - "name": "BBH", - "value": 0.5299609345270283, - "normalized_score": 32.39377187272594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.480625, - "normalized_score": 20.31145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36610704787234044, - "normalized_score": 29.56744976359338 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-05", - "submission_date": "2024-10-06", - "generation": 1, - "base_model": "nbeerbower/Flammades-Mistral-Nemo-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 3.254839563055808 - } - }, - { - "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B_bfloat16_f425bc69783891088e89e0afe44ec62b730567ba_False", - "model": { - "name": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", - "sha": "f425bc69783891088e89e0afe44ec62b730567ba", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.54244427540016, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7171094917042337, - "normalized_score": 71.71094917042336 - }, - "bbh": { - "name": "BBH", - "value": 0.5870114193661848, - "normalized_score": 41.08306318800703 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.46078125, - "normalized_score": 17.29765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41273271276595747, - "normalized_score": 34.748079196217496 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "nbeerbower/Gemma2-Gutenberg-Doppel-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 4, - "params_billions": 9.242, - "co2_cost": 3.8946312576793183 - } - }, - { - "id": "nbeerbower/Gutensuppe-mistral-nemo-12B_bfloat16_6ee13f347071bc3c4ee95c9dc3488a4093927143_False", - "model": { - "name": "nbeerbower/Gutensuppe-mistral-nemo-12B", - "sha": "6ee13f347071bc3c4ee95c9dc3488a4093927143", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.29422199063106, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29161070404305023, - "normalized_score": 29.161070404305022 - }, - "bbh": { - "name": "BBH", - "value": 0.5486832203098263, - "normalized_score": 35.56934797458052 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.42903125, - "normalized_score": 14.328906249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3680186170212766, - "normalized_score": 29.779846335697403 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/Gutensuppe-mistral-nemo-12B (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 12.248, - "co2_cost": 3.112498654874653 - } - }, - { - "id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B_bfloat16_5eec0dfd29999ef1d7775010b7e9c7be9ed89bfd_False", - "model": { - "name": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", - "sha": "5eec0dfd29999ef1d7775010b7e9c7be9ed89bfd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.363860758860653, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37214479802479644, - "normalized_score": 37.214479802479644 - }, - "bbh": { - "name": "BBH", - "value": 0.4981450458280896, - "normalized_score": 28.907334664767347 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.46230208333333334, - "normalized_score": 16.92109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29928523936170215, - "normalized_score": 22.14280437352246 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.162242015174426 - } - }, - { - "id": "nbeerbower/Kartoffel-Deepfry-12B_bfloat16_8c4da9fb61da7561424f8a20a6196a8a817b7430_True", - "model": { - "name": "nbeerbower/Kartoffel-Deepfry-12B", - "sha": "8c4da9fb61da7561424f8a20a6196a8a817b7430", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.14748758265313, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5021620411618949, - "normalized_score": 50.21620411618949 - }, - "bbh": { - "name": "BBH", - "value": 0.5365374219062301, - "normalized_score": 33.75497378644494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4791666666666667, - "normalized_score": 20.029166666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3582114361702128, - "normalized_score": 28.690159574468087 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "nbeerbower/Kartoffel-Deepfry-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 0.8020515214304915 - } - }, - { - "id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B_bfloat16_f335a582cdb7fb0e63a7343a908766ebd0ed9882_False", - "model": { - "name": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", - "sha": "f335a582cdb7fb0e63a7343a908766ebd0ed9882", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.87645031021374, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7228797368759337, - "normalized_score": 72.28797368759336 - }, - "bbh": { - "name": "BBH", - "value": 0.6825051293384551, - "normalized_score": 54.18258128894629 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3338368580060423, - "normalized_score": 33.383685800604226 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4681666666666667, - "normalized_score": 18.354166666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5343251329787234, - "normalized_score": 48.25834810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-17", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 15, - "params_billions": 70.554, - "co2_cost": 70.17863885649874 - } - }, - { - "id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B_bfloat16_5de156e97f776ce1b88ce5b2e2dc1e7709205a82_True", - "model": { - "name": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", - "sha": "5de156e97f776ce1b88ce5b2e2dc1e7709205a82", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.9233900222014, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7092159913474027, - "normalized_score": 70.92159913474026 - }, - "bbh": { - "name": "BBH", - "value": 0.6660891255994471, - "normalized_score": 52.556778995199046 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.48971875, - "normalized_score": 22.681510416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4736535904255319, - "normalized_score": 41.51706560283688 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 5, - "params_billions": 70.554, - "co2_cost": 19.987186070631697 - } - }, - { - "id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B_bfloat16_5c506391eb02075e02f4cf5953b443505d646bce_True", - "model": { - "name": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", - "sha": "5c506391eb02075e02f4cf5953b443505d646bce", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.867363852612016, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34948824674086976, - "normalized_score": 34.94882467408698 - }, - "bbh": { - "name": "BBH", - "value": 0.5586245741555749, - "normalized_score": 36.99243243937594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.43566666666666665, - "normalized_score": 14.758333333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36278257978723405, - "normalized_score": 29.19806442080379 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 20, - "params_billions": 12.248, - "co2_cost": 3.8372038801713693 - } - }, - { - "id": "nbeerbower/Lyra4-Gutenberg-12B_bfloat16_cb6911be3475da99a810071c04803d6edfb5965b_False", - "model": { - "name": "nbeerbower/Lyra4-Gutenberg-12B", - "sha": "cb6911be3475da99a810071c04803d6edfb5965b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.84411922998086, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2212185888996751, - "normalized_score": 22.12185888996751 - }, - "bbh": { - "name": "BBH", - "value": 0.538669487933139, - "normalized_score": 34.23559275480162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1299093655589124, - "normalized_score": 12.990936555891238 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.4037916666666666, - "normalized_score": 11.973958333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35713098404255317, - "normalized_score": 28.570109338061467 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-12", - "generation": 1, - "base_model": "nbeerbower/Lyra4-Gutenberg-12B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 20, - "params_billions": 12.248, - "co2_cost": 3.381067171584864 - } - }, - { - "id": "nbeerbower/Lyra4-Gutenberg2-12B_bfloat16_6a5f117695cc729de16da87654b979e6df72ed2f_False", - "model": { - "name": "nbeerbower/Lyra4-Gutenberg2-12B", - "sha": "6a5f117695cc729de16da87654b979e6df72ed2f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.944881796280978, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25851296781428834, - "normalized_score": 25.851296781428832 - }, - "bbh": { - "name": "BBH", - "value": 0.5344527944750038, - "normalized_score": 33.73063962440059 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11706948640483383, - "normalized_score": 11.706948640483382 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.39721874999999995, - "normalized_score": 11.485677083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35654920212765956, - "normalized_score": 28.50546690307328 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "nbeerbower/Lyra4-Gutenberg2-12B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 12, - "params_billions": 12.248, - "co2_cost": 3.6186791428073777 - } - }, - { - "id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated_bfloat16_8c9eecaace50659647c7d8b569237ad068a6c837_True", - "model": { - "name": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", - "sha": "8c9eecaace50659647c7d8b569237ad068a6c837", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.050923083044733, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6824880206740338, - "normalized_score": 68.24880206740337 - }, - "bbh": { - "name": "BBH", - "value": 0.5496040380079439, - "normalized_score": 36.077381004161374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.45216666666666666, - "normalized_score": 16.554166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35738031914893614, - "normalized_score": 28.59781323877068 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-19", - "submission_date": "2024-10-19", - "generation": 1, - "base_model": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 2.810848990859813 - } - }, - { - "id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT_bfloat16_5735876465b6f2523fdedb73120c3f97d04556d3_True", - "model": { - "name": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", - "sha": "5735876465b6f2523fdedb73120c3f97d04556d3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.33827572865676, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5716798095719358, - "normalized_score": 57.16798095719358 - }, - "bbh": { - "name": "BBH", - "value": 0.40762540890255944, - "normalized_score": 17.34657495584369 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4059375, - "normalized_score": 9.342187500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2728557180851064, - "normalized_score": 19.206190898345156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 0.8736325668731033 - } - }, - { - "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B_bfloat16_0eaaac89d4b53e94d5b78220b24439a026ee29e6_False", - "model": { - "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", - "sha": "0eaaac89d4b53e94d5b78220b24439a026ee29e6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.53798697951247, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3567068711020093, - "normalized_score": 35.67068711020093 - }, - "bbh": { - "name": "BBH", - "value": 0.5274606999473499, - "normalized_score": 32.42152675939865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.41321874999999997, - "normalized_score": 11.485677083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35787898936170215, - "normalized_score": 28.65322104018913 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 3.553542590294508 - } - }, - { - "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2_bfloat16_adc1ccd9d83d24e41bed895f989803af87ea2d2c_True", - "model": { - "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", - "sha": "adc1ccd9d83d24e41bed895f989803af87ea2d2c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.90126357738714, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6535869271311232, - "normalized_score": 65.35869271311232 - }, - "bbh": { - "name": "BBH", - "value": 0.5374496172235809, - "normalized_score": 34.35741284991507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.42330208333333336, - "normalized_score": 13.046093749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3546376329787234, - "normalized_score": 28.29307033096927 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-09", - "generation": 1, - "base_model": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 12.248, - "co2_cost": 2.809713066840293 - } - }, - { - "id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental_bfloat16_e52f7b7c3ade2a6212f29dd1054332cee21ab85d_True", - "model": { - "name": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", - "sha": "e52f7b7c3ade2a6212f29dd1054332cee21ab85d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.119465502148433, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33522498082864577, - "normalized_score": 33.52249808286457 - }, - "bbh": { - "name": "BBH", - "value": 0.5234089179237257, - "normalized_score": 32.0715397550976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3714895833333333, - "normalized_score": 4.002864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3454953457446808, - "normalized_score": 27.277260638297868 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.4343148284500358 - } - }, - { - "id": "nbeerbower/Mistral-Nemo-Prism-12B_bfloat16_a39e1c8c083c172aaa3ca81faf8ba3b4799a888f_True", - "model": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B", - "sha": "a39e1c8c083c172aaa3ca81faf8ba3b4799a888f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.924007108863105, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6858103166265509, - "normalized_score": 68.58103166265508 - }, - "bbh": { - "name": "BBH", - "value": 0.5475186352291487, - "normalized_score": 35.91800839002647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.46261458333333333, - "normalized_score": 17.960156249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3581283244680851, - "normalized_score": 28.680924940898343 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "nbeerbower/Mistral-Nemo-Prism-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 12.248, - "co2_cost": 1.9185760053957552 - } - }, - { - "id": "nbeerbower/Mistral-Nemo-Prism-12B-v2_bfloat16_d7545999274cb56b5f961580b5234e8a647e023a_True", - "model": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B-v2", - "sha": "d7545999274cb56b5f961580b5234e8a647e023a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.07046513725069, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6974006746543615, - "normalized_score": 69.74006746543614 - }, - "bbh": { - "name": "BBH", - "value": 0.5491875637377679, - "normalized_score": 36.19978764533613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.45997916666666666, - "normalized_score": 17.6640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3567154255319149, - "normalized_score": 28.52393617021276 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "nbeerbower/Mistral-Nemo-Prism-12B-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 12.248, - "co2_cost": 1.8711043736374597 - } - }, - { - "id": "nbeerbower/Mistral-Nemo-Prism-12B-v7_bfloat16_0c9da9f3903be14fda1fcae245c22f873442b86f_True", - "model": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B-v7", - "sha": "0c9da9f3903be14fda1fcae245c22f873442b86f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.034788345956486, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6961517662025647, - "normalized_score": 69.61517662025648 - }, - "bbh": { - "name": "BBH", - "value": 0.5521104600038905, - "normalized_score": 36.44001728504705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.46388541666666666, - "normalized_score": 18.085677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35904255319148937, - "normalized_score": 28.782505910165483 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "nbeerbower/Mistral-Nemo-Prism-12B-v7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 12.248, - "co2_cost": 1.9510222086933136 - } - }, - { - "id": "nbeerbower/Mistral-Small-Drummer-22B_bfloat16_53b21ece0c64ffc8aba81f294ad19e2c06e9852c_False", - "model": { - "name": "nbeerbower/Mistral-Small-Drummer-22B", - "sha": "53b21ece0c64ffc8aba81f294ad19e2c06e9852c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.819408797614738, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6331289866443259, - "normalized_score": 63.312898664432595 - }, - "bbh": { - "name": "BBH", - "value": 0.5793201948136216, - "normalized_score": 40.12177010845507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.40636458333333336, - "normalized_score": 9.795572916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40949135638297873, - "normalized_score": 34.38792848699764 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-10-01", - "generation": 1, - "base_model": "nbeerbower/Mistral-Small-Drummer-22B (Merge)", - "hub_license": "other", - "hub_hearts": 14, - "params_billions": 22.247, - "co2_cost": 3.22544340715805 - } - }, - { - "id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B_bfloat16_d8091aad5f882b714321e4d51f504cc61996ee67_False", - "model": { - "name": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", - "sha": "d8091aad5f882b714321e4d51f504cc61996ee67", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.972039733086746, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48932277468228746, - "normalized_score": 48.93227746822875 - }, - "bbh": { - "name": "BBH", - "value": 0.5858932329112819, - "normalized_score": 40.931345197471124 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.39706250000000004, - "normalized_score": 8.566145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41240026595744683, - "normalized_score": 34.711140661938536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2024-09-25", - "generation": 1, - "base_model": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B (Merge)", - "hub_license": "other", - "hub_hearts": 11, - "params_billions": 22.247, - "co2_cost": 3.177206465624349 - } - }, - { - "id": "nbeerbower/Nemo-Loony-12B-experimental_bfloat16_7b06f30502a9b58c028ac1079e1b3d2988b76866_True", - "model": { - "name": "nbeerbower/Nemo-Loony-12B-experimental", - "sha": "7b06f30502a9b58c028ac1079e1b3d2988b76866", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.46956739279853, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37344357416100393, - "normalized_score": 37.344357416100394 - }, - "bbh": { - "name": "BBH", - "value": 0.38222228797769536, - "normalized_score": 12.974588389942944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3340625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1589095744680851, - "normalized_score": 6.545508274231676 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "nbeerbower/Nemo-Loony-12B-experimental (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 2.475163168845021 - } - }, - { - "id": "nbeerbower/Nemoties-ChatML-12B_bfloat16_5e088ff3e7e9d09a868be5db3a8d5a03c2e7dd16_True", - "model": { - "name": "nbeerbower/Nemoties-ChatML-12B", - "sha": "5e088ff3e7e9d09a868be5db3a8d5a03c2e7dd16", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 26.439234908921165, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6381999760635115, - "normalized_score": 63.819997606351144 - }, - "bbh": { - "name": "BBH", - "value": 0.5470252374810588, - "normalized_score": 35.76579510118981 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.45086458333333335, - "normalized_score": 16.591406249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3550531914893617, - "normalized_score": 28.33924349881796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "nbeerbower/Nemoties-ChatML-12B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.6053630785359911 - } - }, - { - "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B_bfloat16_11a5060f9e7315ea07241106f086ac4694dded60_True", - "model": { - "name": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", - "sha": "11a5060f9e7315ea07241106f086ac4694dded60", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.327794892427, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8090832324897937, - "normalized_score": 80.90832324897937 - }, - "bbh": { - "name": "BBH", - "value": 0.6381735755183319, - "normalized_score": 48.23890863039215 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5415407854984894, - "normalized_score": 54.15407854984894 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.4100625, - "normalized_score": 10.024479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49210438829787234, - "normalized_score": 43.567154255319146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 14.77, - "co2_cost": 3.3812237309009148 - } - }, - { - "id": "nbeerbower/SmolNemo-12B-FFT-experimental_bfloat16_d8d7a90ae9b9cb79cdc0912a685c3cb8d7a25560_True", - "model": { - "name": "nbeerbower/SmolNemo-12B-FFT-experimental", - "sha": "d8d7a90ae9b9cb79cdc0912a685c3cb8d7a25560", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.496288137169051, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3348005514257725, - "normalized_score": 33.48005514257725 - }, - "bbh": { - "name": "BBH", - "value": 0.3336088810494464, - "normalized_score": 6.542438534179723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.38469791666666664, - "normalized_score": 5.920572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12167553191489362, - "normalized_score": 2.40839243498818 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "nbeerbower/SmolNemo-12B-FFT-experimental (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.248, - "co2_cost": 2.4508295116551655 - } - }, - { - "id": "nbeerbower/Stella-mistral-nemo-12B-v2_bfloat16_b81bab28f7dcb25a0aa0fe4dcf957f3083ee6b43_False", - "model": { - "name": "nbeerbower/Stella-mistral-nemo-12B-v2", - "sha": "b81bab28f7dcb25a0aa0fe4dcf957f3083ee6b43", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.49330971799377, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32743121584063617, - "normalized_score": 32.743121584063616 - }, - "bbh": { - "name": "BBH", - "value": 0.5483750956495209, - "normalized_score": 35.364516100686416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.4303958333333333, - "normalized_score": 14.432812499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3684341755319149, - "normalized_score": 29.826019503546092 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-07", - "submission_date": "2024-09-14", - "generation": 1, - "base_model": "nbeerbower/Stella-mistral-nemo-12B-v2 (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 12.248, - "co2_cost": 3.4817441830156293 - } - }, - { - "id": "nbeerbower/gemma2-gutenberg-27B_bfloat16_d4febe52e8b7b13a98126dbf1716ed1329f48922_False", - "model": { - "name": "nbeerbower/gemma2-gutenberg-27B", - "sha": "d4febe52e8b7b13a98126dbf1716ed1329f48922", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 10.4236638747709, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29470804133033685, - "normalized_score": 29.47080413303368 - }, - "bbh": { - "name": "BBH", - "value": 0.37965683503451614, - "normalized_score": 13.091524912026523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3727291666666666, - "normalized_score": 4.1578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19822140957446807, - "normalized_score": 10.913489952718674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "nbeerbower/gemma2-gutenberg-27B (Merge)", - "hub_license": "gemma", - "hub_hearts": 6, - "params_billions": 27.227, - "co2_cost": 15.390916539476832 - } - }, - { - "id": "nbeerbower/gemma2-gutenberg-9B_bfloat16_ebdab2d41f257fc9e7c858498653644d13386ce5_False", - "model": { - "name": "nbeerbower/gemma2-gutenberg-9B", - "sha": "ebdab2d41f257fc9e7c858498653644d13386ce5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.719246452807738, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2795948084416016, - "normalized_score": 27.95948084416016 - }, - "bbh": { - "name": "BBH", - "value": 0.5950904001490335, - "normalized_score": 42.35561106809721 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.45951041666666664, - "normalized_score": 16.705468749999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4192154255319149, - "normalized_score": 35.4683806146572 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-14", - "submission_date": "2024-08-03", - "generation": 1, - "base_model": "nbeerbower/gemma2-gutenberg-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 12, - "params_billions": 9.242, - "co2_cost": 5.619217537698095 - } - }, - { - "id": "nbeerbower/llama-3-gutenberg-8B_bfloat16_4ed3aac5e30c078bee79ae193c2d301d38860b20_False", - "model": { - "name": "nbeerbower/llama-3-gutenberg-8B", - "sha": "4ed3aac5e30c078bee79ae193c2d301d38860b20", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.30881705657422, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4371910973993448, - "normalized_score": 43.71910973993448 - }, - "bbh": { - "name": "BBH", - "value": 0.49936002561994197, - "normalized_score": 27.958132724191334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.40730208333333334, - "normalized_score": 10.046093749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.383061835106383, - "normalized_score": 31.451315011820324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-05", - "submission_date": "2024-07-10", - "generation": 1, - "base_model": "nbeerbower/llama-3-gutenberg-8B (Merge)", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.7671387810092376 - } - }, - { - "id": "nbeerbower/llama3.1-cc-8B_bfloat16_5269bb26f1afe005f144564f484e7554f185239f_False", - "model": { - "name": "nbeerbower/llama3.1-cc-8B", - "sha": "5269bb26f1afe005f144564f484e7554f185239f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.256041660753873, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5068086011782071, - "normalized_score": 50.680860117820714 - }, - "bbh": { - "name": "BBH", - "value": 0.4871187428614386, - "normalized_score": 26.48381169342659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.38851041666666664, - "normalized_score": 6.497135416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3346908244680851, - "normalized_score": 26.076758274231683 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-18", - "submission_date": "2024-09-14", - "generation": 1, - "base_model": "nbeerbower/llama3.1-cc-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8744749214923342 - } - }, - { - "id": "nbeerbower/llama3.1-kartoffeldes-70B_bfloat16_4377c98f475f2af018fa4fef77f12106001bc1bf_True", - "model": { - "name": "nbeerbower/llama3.1-kartoffeldes-70B", - "sha": "4377c98f475f2af018fa4fef77f12106001bc1bf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.11056038125863, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8230218043679659, - "normalized_score": 82.30218043679659 - }, - "bbh": { - "name": "BBH", - "value": 0.6893878613110068, - "normalized_score": 55.593932574971824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3217522658610272, - "normalized_score": 32.17522658610272 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.46460416666666665, - "normalized_score": 18.742187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4988364361702128, - "normalized_score": 44.31515957446809 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "nbeerbower/llama3.1-kartoffeldes-70B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 23.33503403405791 - } - }, - { - "id": "nbeerbower/mistral-nemo-bophades-12B_bfloat16_252a358e099f77a0a28125e00a57aa3a107b3910_True", - "model": { - "name": "nbeerbower/mistral-nemo-bophades-12B", - "sha": "252a358e099f77a0a28125e00a57aa3a107b3910", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.72860228091272, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6794405510711579, - "normalized_score": 67.94405510711579 - }, - "bbh": { - "name": "BBH", - "value": 0.4988471515853883, - "normalized_score": 29.543905352144947 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12311178247734139, - "normalized_score": 12.311178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.41778125, - "normalized_score": 12.089322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35006648936170215, - "normalized_score": 27.785165484633573 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-bophades-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 12.248, - "co2_cost": 4.104693009226808 - } - }, - { - "id": "nbeerbower/mistral-nemo-bophades3-12B_bfloat16_0cb218eece0991c7ff585fb31fe82f883fc71a55_True", - "model": { - "name": "nbeerbower/mistral-nemo-bophades3-12B", - "sha": "0cb218eece0991c7ff585fb31fe82f883fc71a55", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.16558651541493, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6577835698169745, - "normalized_score": 65.77835698169746 - }, - "bbh": { - "name": "BBH", - "value": 0.544933208169299, - "normalized_score": 35.24465712316055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4604479166666667, - "normalized_score": 18.889322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3371010638297872, - "normalized_score": 26.34456264775413 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-bophades3-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 1.8778242145913857 - } - }, - { - "id": "nbeerbower/mistral-nemo-cc-12B_bfloat16_fc32293e0b022d6daef9bfdb0c54d57a5226bf9a_False", - "model": { - "name": "nbeerbower/mistral-nemo-cc-12B", - "sha": "fc32293e0b022d6daef9bfdb0c54d57a5226bf9a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.20341027317436, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14353249378316202, - "normalized_score": 14.3532493783162 - }, - "bbh": { - "name": "BBH", - "value": 0.5399409546487519, - "normalized_score": 34.44654701952267 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.44236458333333334, - "normalized_score": 14.262239583333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3597905585106383, - "normalized_score": 28.865617612293136 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-18", - "submission_date": "2024-09-14", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-cc-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 2.9892448416100215 - } - }, - { - "id": "nbeerbower/mistral-nemo-gutades-12B_bfloat16_5689f929808a6165f94ba43f872b944a4bdaaea3_False", - "model": { - "name": "nbeerbower/mistral-nemo-gutades-12B", - "sha": "5689f929808a6165f94ba43f872b944a4bdaaea3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.075924659452493, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3425189608017837, - "normalized_score": 34.25189608017837 - }, - "bbh": { - "name": "BBH", - "value": 0.5407194259684368, - "normalized_score": 34.57440821872691 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4040416666666667, - "normalized_score": 8.671875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3560505319148936, - "normalized_score": 28.45005910165484 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-gutades-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 3.649240112002621 - } - }, - { - "id": "nbeerbower/mistral-nemo-gutenberg-12B_bfloat16_6aeb6f769a53eb111839db8f439b614730e39593_False", - "model": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B", - "sha": "6aeb6f769a53eb111839db8f439b614730e39593", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.024154964814482, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.350386973231027, - "normalized_score": 35.0386973231027 - }, - "bbh": { - "name": "BBH", - "value": 0.5281363707697807, - "normalized_score": 32.43387434197657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.41706250000000006, - "normalized_score": 10.966145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3562167553191489, - "normalized_score": 28.46852836879432 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-gutenberg-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 12.248, - "co2_cost": 3.1496291404937486 - } - }, - { - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v2_bfloat16_86bf9c105ff40835132e41699ac1a76ee0e5b683_True", - "model": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v2", - "sha": "86bf9c105ff40835132e41699ac1a76ee0e5b683", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.514319837220018, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6203395878491292, - "normalized_score": 62.033958784912926 - }, - "bbh": { - "name": "BBH", - "value": 0.5397203788283472, - "normalized_score": 34.73061633928093 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4286979166666667, - "normalized_score": 13.987239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34990026595744683, - "normalized_score": 27.76669621749409 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-gutenberg-12B-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 34, - "params_billions": 12.248, - "co2_cost": 4.211238079190083 - } - }, - { - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v3_bfloat16_3e1a716281f23280abd72e402139c578faca175a_False", - "model": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v3", - "sha": "3e1a716281f23280abd72e402139c578faca175a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.290512133580634, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21827085466562057, - "normalized_score": 21.827085466562057 - }, - "bbh": { - "name": "BBH", - "value": 0.544065799051091, - "normalized_score": 34.95791456295642 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.44503125, - "normalized_score": 14.995572916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3644448138297872, - "normalized_score": 29.38275709219858 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-15", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-gutenberg-12B-v3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 12.248, - "co2_cost": 3.6707380977300472 - } - }, - { - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v4_bfloat16_59409afe585ae6945a588c867f879a9d31e571e6_False", - "model": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v4", - "sha": "59409afe585ae6945a588c867f879a9d31e571e6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.83898144810974, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.237929804031082, - "normalized_score": 23.7929804031082 - }, - "bbh": { - "name": "BBH", - "value": 0.5269028864823667, - "normalized_score": 31.97125827358258 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.4104270833333333, - "normalized_score": 13.203385416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3575465425531915, - "normalized_score": 28.616282505910167 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-gutenberg-12B-v4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 20, - "params_billions": 12.248, - "co2_cost": 3.520922673562402 - } - }, - { - "id": "nbeerbower/mistral-nemo-gutenberg2-12B-test_bfloat16_10da6150b0bedf8fd59206d72c4c0335ac665df3_False", - "model": { - "name": "nbeerbower/mistral-nemo-gutenberg2-12B-test", - "sha": "10da6150b0bedf8fd59206d72c4c0335ac665df3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.970579950625815, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33847192116916447, - "normalized_score": 33.847192116916446 - }, - "bbh": { - "name": "BBH", - "value": 0.525477908630255, - "normalized_score": 32.04475928596384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4157291666666667, - "normalized_score": 10.966145833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35546875, - "normalized_score": 28.385416666666668 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-25", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-gutenberg2-12B-test (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.350053941716089 - } - }, - { - "id": "nbeerbower/mistral-nemo-kartoffel-12B_bfloat16_4e0cfd2462bd5761ec200a4bd6ebb20e5fa4a9ad_True", - "model": { - "name": "nbeerbower/mistral-nemo-kartoffel-12B", - "sha": "4e0cfd2462bd5761ec200a4bd6ebb20e5fa4a9ad", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.21995794267338, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7031709198260616, - "normalized_score": 70.31709198260614 - }, - "bbh": { - "name": "BBH", - "value": 0.5483796436144805, - "normalized_score": 36.052532566869004 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.46528125000000004, - "normalized_score": 18.426822916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35846077127659576, - "normalized_score": 28.717863475177303 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-12", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-kartoffel-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 12.248, - "co2_cost": 1.5291241048613553 - } - }, - { - "id": "nbeerbower/mistral-nemo-narwhal-12B_bfloat16_9384d7e572a09181c79e19d934ab865f7a7d4efc_True", - "model": { - "name": "nbeerbower/mistral-nemo-narwhal-12B", - "sha": "9384d7e572a09181c79e19d934ab865f7a7d4efc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.240878416352583, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5549187267561182, - "normalized_score": 55.49187267561182 - }, - "bbh": { - "name": "BBH", - "value": 0.5057374929934754, - "normalized_score": 29.562789508057296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.38469791666666664, - "normalized_score": 6.1872395833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34832114361702127, - "normalized_score": 27.59123817966903 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-13", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-narwhal-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 1.8852501869190186 - } - }, - { - "id": "nbeerbower/mistral-nemo-wissenschaft-12B_bfloat16_2480f9924415c72fe00ae9391bb15a6d05c889eb_True", - "model": { - "name": "nbeerbower/mistral-nemo-wissenschaft-12B", - "sha": "2480f9924415c72fe00ae9391bb15a6d05c889eb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.50992586494861, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6520133246452745, - "normalized_score": 65.20133246452745 - }, - "bbh": { - "name": "BBH", - "value": 0.5040306120993181, - "normalized_score": 29.567999415715217 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.41778125, - "normalized_score": 12.289322916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35322473404255317, - "normalized_score": 28.13608156028369 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-12", - "submission_date": "2024-08-30", - "generation": 1, - "base_model": "nbeerbower/mistral-nemo-wissenschaft-12B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 12.248, - "co2_cost": 2.858746838889556 - } - }, - { - "id": "nbrahme/IndusQ_bfloat16_d4224f753e6a2d6e7476752fb927c26c55ec9467_True", - "model": { - "name": "nbrahme/IndusQ", - "sha": "d4224f753e6a2d6e7476752fb927c26c55ec9467", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.636134043635501, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24397487555242311, - "normalized_score": 24.39748755524231 - }, - "bbh": { - "name": "BBH", - "value": 0.30624035198474986, - "normalized_score": 3.7470964959740556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26510067114093966, - "normalized_score": 2.0134228187919545 - }, - "musr": { - "name": "MUSR", - "value": 0.3366354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11203457446808511, - "normalized_score": 1.337174940898345 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-18", - "generation": 0, - "base_model": "nbrahme/IndusQ", - "hub_license": "osl-3.0", - "hub_hearts": 0, - "params_billions": 1.176, - "co2_cost": 0.30123373035412043 - } - }, - { - "id": "necva/IE-cont-Llama3.1-8B_float16_7c56e751113c503f77205f2ac70b52bd5918a15d_False", - "model": { - "name": "necva/IE-cont-Llama3.1-8B", - "sha": "7c56e751113c503f77205f2ac70b52bd5918a15d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.093186530380667, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20490742341431845, - "normalized_score": 20.490742341431847 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11668882978723404, - "normalized_score": 1.854314420803781 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "necva/IE-cont-Llama3.1-8B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.732556864105992 - } - }, - { - "id": "necva/replica-IEPile_float16_5f6ea567a0bd2addd2d4eb3c9d17ef40b18aab05_False", - "model": { - "name": "necva/replica-IEPile", - "sha": "5f6ea567a0bd2addd2d4eb3c9d17ef40b18aab05", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.56058539635963, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4677910167245132, - "normalized_score": 46.77910167245132 - }, - "bbh": { - "name": "BBH", - "value": 0.4778579652970231, - "normalized_score": 25.316518746574474 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12386706948640483, - "normalized_score": 12.386706948640484 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.3997604166666667, - "normalized_score": 8.93671875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3560505319148936, - "normalized_score": 28.45005910165484 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.65, - "co2_cost": 1.203072647367737 - } - }, - { - "id": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct_bfloat16_da9877089d4975e84e10cbb026125aa5574a4dc7_True", - "model": { - "name": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", - "sha": "da9877089d4975e84e10cbb026125aa5574a4dc7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.92479232482063, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7527050448365891, - "normalized_score": 75.27050448365891 - }, - "bbh": { - "name": "BBH", - "value": 0.5516128933222162, - "normalized_score": 36.13467912106424 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3806646525679758, - "normalized_score": 38.066465256797585 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.48248958333333336, - "normalized_score": 20.544531249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3923703457446808, - "normalized_score": 32.4855939716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2283502198182912 - } - }, - { - "id": "neopolita/jessi-v0.1-falcon3-10b-instruct_bfloat16_f50e54e6bf8af96e770139a2ae67ce51d5c00f14_True", - "model": { - "name": "neopolita/jessi-v0.1-falcon3-10b-instruct", - "sha": "f50e54e6bf8af96e770139a2ae67ce51d5c00f14", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.33560766198001, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.755152994055772, - "normalized_score": 75.5152994055772 - }, - "bbh": { - "name": "BBH", - "value": 0.5952883626256132, - "normalized_score": 41.44033637148312 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2001510574018127, - "normalized_score": 20.01510574018127 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3187919463087248, - "normalized_score": 9.172259507829976 - }, - "musr": { - "name": "MUSR", - "value": 0.42785416666666665, - "normalized_score": 12.448437499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4187998670212766, - "normalized_score": 35.422207446808514 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 1.5816365822647414 - } - }, - { - "id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct_bfloat16_e73d1545a57a4c8e20df50d6641b08015fb039a3_False", - "model": { - "name": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", - "sha": "e73d1545a57a4c8e20df50d6641b08015fb039a3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.775632974338876, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7326715337526651, - "normalized_score": 73.2671533752665 - }, - "bbh": { - "name": "BBH", - "value": 0.5292315105257686, - "normalized_score": 33.34225906937014 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4086102719033233, - "normalized_score": 40.86102719033233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3913645833333333, - "normalized_score": 7.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42278922872340424, - "normalized_score": 35.865469858156025 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-26", - "generation": 1, - "base_model": "neopolita/jessi-v0.1-qwen2.5-7b-instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 5.5377250785430245 - } - }, - { - "id": "neopolita/jessi-v0.1-virtuoso-small_bfloat16_b7cd58e26d637faf1576bee68bfa35432fa3ce11_True", - "model": { - "name": "neopolita/jessi-v0.1-virtuoso-small", - "sha": "b7cd58e26d637faf1576bee68bfa35432fa3ce11", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.86960435987991, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7959192719761344, - "normalized_score": 79.59192719761344 - }, - "bbh": { - "name": "BBH", - "value": 0.6442861439957068, - "normalized_score": 48.8603145722634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33987915407854985, - "normalized_score": 33.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.43616666666666665, - "normalized_score": 14.154166666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5129654255319149, - "normalized_score": 45.885047281323885 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "neopolita/jessi-v0.1-virtuoso-small (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.1233827346905625 - } - }, - { - "id": "neopolita/jessi-v0.2-falcon3-10b-instruct_bfloat16_94f3286c5724f51b319ff19ed1befd55341e34c2_True", - "model": { - "name": "neopolita/jessi-v0.2-falcon3-10b-instruct", - "sha": "94f3286c5724f51b319ff19ed1befd55341e34c2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.10450143941555, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7768099753099553, - "normalized_score": 77.68099753099554 - }, - "bbh": { - "name": "BBH", - "value": 0.6204846671314362, - "normalized_score": 45.02184351970056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.42813541666666666, - "normalized_score": 12.916927083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4354222074468085, - "normalized_score": 37.2691341607565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 1.7127002701328407 - } - }, - { - "id": "neopolita/jessi-v0.2-falcon3-7b-instruct_bfloat16_efd74223ccaf140aab43df0c9a271007e826124b_True", - "model": { - "name": "neopolita/jessi-v0.2-falcon3-7b-instruct", - "sha": "efd74223ccaf140aab43df0c9a271007e826124b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.040362033793667, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5770754930251731, - "normalized_score": 57.70754930251732 - }, - "bbh": { - "name": "BBH", - "value": 0.5363079188886575, - "normalized_score": 34.382892384888216 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2537764350453172, - "normalized_score": 25.377643504531722 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.44788541666666665, - "normalized_score": 15.552343749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3904587765957447, - "normalized_score": 32.273197399527184 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "neopolita/jessi-v0.2-falcon3-7b-instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.6919950892511235 - } - }, - { - "id": "neopolita/jessi-v0.3-falcon3-7b-instruct_bfloat16_6f95f4e365f8f087f6d037e72493ecff68d9298f_True", - "model": { - "name": "neopolita/jessi-v0.3-falcon3-7b-instruct", - "sha": "6f95f4e365f8f087f6d037e72493ecff68d9298f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.56157882788928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7509064836855099, - "normalized_score": 75.090648368551 - }, - "bbh": { - "name": "BBH", - "value": 0.538793502664194, - "normalized_score": 34.56526833956577 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3196308724832215, - "normalized_score": 9.284116331096197 - }, - "musr": { - "name": "MUSR", - "value": 0.46915625, - "normalized_score": 18.544531249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3970246010638298, - "normalized_score": 33.00273345153664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "neopolita/jessi-v0.3-falcon3-7b-instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.8228880258629983 - } - }, - { - "id": "neopolita/jessi-v0.4-falcon3-7b-instruct_bfloat16_c27fe9b5f1c31a3c7e8a7f19be037a6d1fb1b090_True", - "model": { - "name": "neopolita/jessi-v0.4-falcon3-7b-instruct", - "sha": "c27fe9b5f1c31a3c7e8a7f19be037a6d1fb1b090", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.58265268401903, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7603735865281896, - "normalized_score": 76.03735865281897 - }, - "bbh": { - "name": "BBH", - "value": 0.5521668757306609, - "normalized_score": 36.167444072028246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3768882175226586, - "normalized_score": 37.68882175226586 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.49712500000000004, - "normalized_score": 23.17395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40043218085106386, - "normalized_score": 33.38135342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "neopolita/jessi-v0.4-falcon3-7b-instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.321471697143095 - } - }, - { - "id": "neopolita/jessi-v0.5-falcon3-7b-instruct_bfloat16_1b70a742251a75c8a5fd047f0c9cdd5bffc27a43_True", - "model": { - "name": "neopolita/jessi-v0.5-falcon3-7b-instruct", - "sha": "1b70a742251a75c8a5fd047f0c9cdd5bffc27a43", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.17362960818127, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7411645544931892, - "normalized_score": 74.11645544931892 - }, - "bbh": { - "name": "BBH", - "value": 0.5589627302276082, - "normalized_score": 37.16806936900627 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37386706948640486, - "normalized_score": 37.38670694864049 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.48652083333333335, - "normalized_score": 21.248437499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3966090425531915, - "normalized_score": 32.95656028368795 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "neopolita/jessi-v0.5-falcon3-7b-instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.2662553132853336 - } - }, - { - "id": "neopolita/jessi-v0.6-falcon3-7b-instruct_bfloat16_b0d817e37046e128741e45821f1bc248f7c9d82b_True", - "model": { - "name": "neopolita/jessi-v0.6-falcon3-7b-instruct", - "sha": "b0d817e37046e128741e45821f1bc248f7c9d82b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 34.54828176574102, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7401904723910335, - "normalized_score": 74.01904723910336 - }, - "bbh": { - "name": "BBH", - "value": 0.5508818723957883, - "normalized_score": 35.85132240048754 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.49042708333333335, - "normalized_score": 22.203385416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3956948138297872, - "normalized_score": 32.8549793144208 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 1, - "base_model": "neopolita/jessi-v0.6-falcon3-7b-instruct (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.456, - "co2_cost": 1.278937826257381 - } - }, - { - "id": "neopolita/loki-v0.1-virtuoso_bfloat16_4d884bbc57fd00e74772d554449bed7cfccc1c2a_True", - "model": { - "name": "neopolita/loki-v0.1-virtuoso", - "sha": "4d884bbc57fd00e74772d554449bed7cfccc1c2a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.196962121784466, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7819308324135517, - "normalized_score": 78.19308324135517 - }, - "bbh": { - "name": "BBH", - "value": 0.6467251502613163, - "normalized_score": 49.45293175936923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3391238670694864, - "normalized_score": 33.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.43753125, - "normalized_score": 14.324739583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5128823138297872, - "normalized_score": 45.87581264775414 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "neopolita/loki-v0.1-virtuoso (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.458282865864776 - } - }, - { - "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b_float16_12006d09cf7310f40da990ce9107bffcb2b708df_True", - "model": { - "name": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", - "sha": "12006d09cf7310f40da990ce9107bffcb2b708df", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.574293533040167, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11500596195871399, - "normalized_score": 11.500596195871399 - }, - "bbh": { - "name": "BBH", - "value": 0.28767781029884354, - "normalized_score": 2.015537689546553 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0015105740181268882, - "normalized_score": 0.1510574018126888 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3723854166666667, - "normalized_score": 4.881510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10895944148936171, - "normalized_score": 0.9954934988179669 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.8742780635509655 - } - }, - { - "id": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b_float16_836a8aa29ce3e9d46c4f1ab2312f20cac9802649_True", - "model": { - "name": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", - "sha": "836a8aa29ce3e9d46c4f1ab2312f20cac9802649", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.947003320776368, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2586880587951081, - "normalized_score": 25.86880587951081 - }, - "bbh": { - "name": "BBH", - "value": 0.30859903405301287, - "normalized_score": 4.561587443058031 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.3527291666666667, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11452792553191489, - "normalized_score": 1.6142139479905429 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3564000813676969 - } - }, - { - "id": "netcat420/Llama3.1-MFANN-8b_float16_6714fe00996d2679e9325b503ab991f4ecc0273d_False", - "model": { - "name": "netcat420/Llama3.1-MFANN-8b", - "sha": "6714fe00996d2679e9325b503ab991f4ecc0273d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.117063178518029, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29695651981187693, - "normalized_score": 29.695651981187694 - }, - "bbh": { - "name": "BBH", - "value": 0.4281154680742545, - "normalized_score": 19.286683760193657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.33790625, - "normalized_score": 2.5716145833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27252327127659576, - "normalized_score": 19.169252364066196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "netcat420/Llama3.1-MFANN-8b (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.401012415623717 - } - }, - { - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2_float16_0e649dd355ad7d562f9346c96642c24eff35338e_False", - "model": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", - "sha": "0e649dd355ad7d562f9346c96642c24eff35338e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.213728182261345, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4209796672828096, - "normalized_score": 42.097966728280966 - }, - "bbh": { - "name": "BBH", - "value": 0.49237606236472237, - "normalized_score": 26.938370096724565 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.37276041666666665, - "normalized_score": 4.328385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35222739361702127, - "normalized_score": 28.02526595744681 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-09", - "generation": 0, - "base_model": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4082258702683519 - } - }, - { - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3_float16_381cf003a5e28d2b273226364b568cc60b857b5b_False", - "model": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", - "sha": "381cf003a5e28d2b273226364b568cc60b857b5b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.222030340557666, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4238021782204551, - "normalized_score": 42.38021782204551 - }, - "bbh": { - "name": "BBH", - "value": 0.4914021594225444, - "normalized_score": 26.978850946197483 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.37406249999999996, - "normalized_score": 4.491145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34898603723404253, - "normalized_score": 27.665115248226947 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4418208917092485 - } - }, - { - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4_float16_af160f1cf089ccbcbf00f99b951797a1f3daeb04_False", - "model": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", - "sha": "af160f1cf089ccbcbf00f99b951797a1f3daeb04", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.39947102160331, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41688275996577967, - "normalized_score": 41.68827599657797 - }, - "bbh": { - "name": "BBH", - "value": 0.4908971108837563, - "normalized_score": 26.706074443441803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.38209374999999995, - "normalized_score": 5.861718750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35164561170212766, - "normalized_score": 27.960623522458622 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2024-11-09", - "generation": 0, - "base_model": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4449346149215017 - } - }, - { - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5_float16_e0502b359816fe3ecd4f7206e5230398604fdfe2_False", - "model": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", - "sha": "e0502b359816fe3ecd4f7206e5230398604fdfe2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.493723327477287, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4328947193446721, - "normalized_score": 43.289471934467215 - }, - "bbh": { - "name": "BBH", - "value": 0.4951892200623516, - "normalized_score": 27.367143487206935 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.378125, - "normalized_score": 5.165625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3444980053191489, - "normalized_score": 27.16644503546099 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.411251889010262 - } - }, - { - "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES_float16_dbe0a3b69206c042de2b0a96fc156feeecaa49c7_False", - "model": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", - "sha": "dbe0a3b69206c042de2b0a96fc156feeecaa49c7", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.248004862747575, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42934746472692453, - "normalized_score": 42.93474647269245 - }, - "bbh": { - "name": "BBH", - "value": 0.49675121796238325, - "normalized_score": 27.59982935067644 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.3686979166666667, - "normalized_score": 4.587239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3531416223404255, - "normalized_score": 28.126846926713938 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.5466287967945442 - } - }, - { - "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2_float16_56abb76e65cbf9dc49af662b09894d119d49705a_False", - "model": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", - "sha": "56abb76e65cbf9dc49af662b09894d119d49705a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.05371896057877, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41281134057633745, - "normalized_score": 41.28113405763375 - }, - "bbh": { - "name": "BBH", - "value": 0.49782535474346185, - "normalized_score": 27.774394299037056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.37542708333333336, - "normalized_score": 5.128385416666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3527260638297872, - "normalized_score": 28.08067375886525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4828376977168216 - } - }, - { - "id": "netcat420/MFANN-SFT_float16_247f2ce5841d38cef59b73a7f8af857627d254bf_False", - "model": { - "name": "netcat420/MFANN-SFT", - "sha": "247f2ce5841d38cef59b73a7f8af857627d254bf", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.932006161848246, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36822298168858625, - "normalized_score": 36.82229816885863 - }, - "bbh": { - "name": "BBH", - "value": 0.485188719488523, - "normalized_score": 26.20853268810661 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.3725416666666666, - "normalized_score": 3.801041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3336103723404255, - "normalized_score": 25.956708037825056 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "netcat420/MFANN-SFT (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.3116005548268403 - } - }, - { - "id": "netcat420/MFANN-abliterated-phi2-merge-unretrained_float16_cfc2d479871655f620dd741d8938b0e4b6df1d3e_True", - "model": { - "name": "netcat420/MFANN-abliterated-phi2-merge-unretrained", - "sha": "cfc2d479871655f620dd741d8938b0e4b6df1d3e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 9.638779331398394, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3005037744296245, - "normalized_score": 30.05037744296245 - }, - "bbh": { - "name": "BBH", - "value": 0.4104131503721586, - "normalized_score": 17.590366415972785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.31834375, - "normalized_score": 0.5596354166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14777260638297873, - "normalized_score": 5.308067375886525 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "netcat420/MFANN-abliterated-phi2-merge-unretrained (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.775, - "co2_cost": 0.8952711221903973 - } - }, - { - "id": "netcat420/MFANN-llama3.1-Abliterated-SLERP_float16_0c7b2916727e6c28bbca2aa613b8247b66905915_False", - "model": { - "name": "netcat420/MFANN-llama3.1-Abliterated-SLERP", - "sha": "0c7b2916727e6c28bbca2aa613b8247b66905915", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.881633563903852, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25906262051357065, - "normalized_score": 25.906262051357064 - }, - "bbh": { - "name": "BBH", - "value": 0.45744999460878283, - "normalized_score": 22.28062513418979 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3809166666666666, - "normalized_score": 5.714583333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2928025265957447, - "normalized_score": 21.42250295508274 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "netcat420/MFANN-llama3.1-Abliterated-SLERP (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.547158362175965 - } - }, - { - "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3_float16_f90a20024060942826302c30860572c227dd4013_False", - "model": { - "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", - "sha": "f90a20024060942826302c30860572c227dd4013", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.042261525952018, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37993856301280604, - "normalized_score": 37.9938563012806 - }, - "bbh": { - "name": "BBH", - "value": 0.49305765460927126, - "normalized_score": 27.1872703942033 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125000000003, - "normalized_score": 3.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35305851063829785, - "normalized_score": 28.117612293144205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.5831393048927547 - } - }, - { - "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1_float16_6d306eb66466cb8e1456a36f3895890a117e91e4_False", - "model": { - "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", - "sha": "6d306eb66466cb8e1456a36f3895890a117e91e4", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.96623306213071, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4201551882338861, - "normalized_score": 42.01551882338861 - }, - "bbh": { - "name": "BBH", - "value": 0.492068920606988, - "normalized_score": 27.026315532744473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3686354166666667, - "normalized_score": 3.846093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3543051861702128, - "normalized_score": 28.25613179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-17", - "generation": 1, - "base_model": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.661015065343464 - } - }, - { - "id": "netcat420/MFANN-llama3.1-abliterated-v2_float16_3d0a5d3634726e1a63ac84bee561b346960ca1d7_False", - "model": { - "name": "netcat420/MFANN-llama3.1-abliterated-v2", - "sha": "3d0a5d3634726e1a63ac84bee561b346960ca1d7", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.77111132997354, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4429114748866341, - "normalized_score": 44.29114748866341 - }, - "bbh": { - "name": "BBH", - "value": 0.4940829733015402, - "normalized_score": 27.35361826731554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3845416666666666, - "normalized_score": 6.201041666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3490691489361702, - "normalized_score": 27.67434988179669 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "netcat420/MFANN-llama3.1-abliterated-v2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.649047154175584 - } - }, - { - "id": "netcat420/MFANN-phigments-slerp-V2_float16_94596dab22ab78f0d2ec00b8e33c8fa98581ad0f_False", - "model": { - "name": "netcat420/MFANN-phigments-slerp-V2", - "sha": "94596dab22ab78f0d2ec00b8e33c8fa98581ad0f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 16.26870816278764, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32316032571355113, - "normalized_score": 32.316032571355116 - }, - "bbh": { - "name": "BBH", - "value": 0.48272762171598743, - "normalized_score": 26.927491544080876 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.40372916666666664, - "normalized_score": 13.099479166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2716921542553192, - "normalized_score": 19.076906028368796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "netcat420/MFANN-phigments-slerp-V2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.8162400046189335 - } - }, - { - "id": "netcat420/MFANN-phigments-slerp-V3.2_float16_3fdb0794f6eb757bf2e4a6f378caed1863e9074c_False", - "model": { - "name": "netcat420/MFANN-phigments-slerp-V3.2", - "sha": "3fdb0794f6eb757bf2e4a6f378caed1863e9074c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 16.453301876572546, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35243598097492435, - "normalized_score": 35.24359809749243 - }, - "bbh": { - "name": "BBH", - "value": 0.4808549324972969, - "normalized_score": 26.91803539968917 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3707708333333333, - "normalized_score": 9.81302083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2705285904255319, - "normalized_score": 18.947621158392433 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "netcat420/MFANN-phigments-slerp-V3.2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.5783076437872776 - } - }, - { - "id": "netcat420/MFANN-phigments-slerp-V3.3_float16_c05613efa47825622aa16e2b4f881549cdbec997_False", - "model": { - "name": "netcat420/MFANN-phigments-slerp-V3.3", - "sha": "c05613efa47825622aa16e2b4f881549cdbec997", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 17.167734327311486, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36909732842192056, - "normalized_score": 36.90973284219205 - }, - "bbh": { - "name": "BBH", - "value": 0.48952950463630956, - "normalized_score": 28.170621881092462 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.38921874999999995, - "normalized_score": 11.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2802526595744681, - "normalized_score": 20.02807328605201 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "netcat420/MFANN-phigments-slerp-V3.3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.59147703430892 - } - }, - { - "id": "netcat420/MFANN3b_float16_ba03f833e89c335a5ee8f523a95892a15d22070e_False", - "model": { - "name": "netcat420/MFANN3b", - "sha": "ba03f833e89c335a5ee8f523a95892a15d22070e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.652447668077093, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2524435165361241, - "normalized_score": 25.244351653612416 - }, - "bbh": { - "name": "BBH", - "value": 0.4433128382028508, - "normalized_score": 22.23921095390767 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.36060416666666667, - "normalized_score": 6.142187500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23055186170212766, - "normalized_score": 14.505762411347517 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-14", - "generation": 1, - "base_model": "netcat420/MFANN3b (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.751985004082596 - } - }, - { - "id": "netcat420/MFANN3bv0.15_float16_20dbdfb9154cc2f6d43651fc8cea63a120220dc7_False", - "model": { - "name": "netcat420/MFANN3bv0.15", - "sha": "20dbdfb9154cc2f6d43651fc8cea63a120220dc7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 11.92455505796081, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2012105657433388, - "normalized_score": 20.12105657433388 - }, - "bbh": { - "name": "BBH", - "value": 0.453931293669888, - "normalized_score": 23.46934667082661 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3957916666666667, - "normalized_score": 8.773958333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24684175531914893, - "normalized_score": 16.315750591016545 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-04", - "submission_date": "2024-07-05", - "generation": 0, - "base_model": "netcat420/MFANN3bv0.15", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.9342759214225443 - } - }, - { - "id": "netcat420/MFANN3bv0.18_float16_3e792e3413217b63ea9caa0e8b8595fbeb236a69_False", - "model": { - "name": "netcat420/MFANN3bv0.18", - "sha": "3e792e3413217b63ea9caa0e8b8595fbeb236a69", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.649876255790772, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22064455644356973, - "normalized_score": 22.064455644356972 - }, - "bbh": { - "name": "BBH", - "value": 0.4514366169824164, - "normalized_score": 23.07340376774899 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.40236458333333336, - "normalized_score": 10.595572916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25, - "normalized_score": 16.666666666666664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-25", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "netcat420/MFANN3bv0.18", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.9650273588651338 - } - }, - { - "id": "netcat420/MFANN3bv0.19_float16_073d42274686f5cb6ef6ff9f6ade24eab198e1f2_False", - "model": { - "name": "netcat420/MFANN3bv0.19", - "sha": "073d42274686f5cb6ef6ff9f6ade24eab198e1f2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.591488714276778, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22581528123157665, - "normalized_score": 22.581528123157664 - }, - "bbh": { - "name": "BBH", - "value": 0.4515800678058734, - "normalized_score": 22.9070546869096 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.40239583333333334, - "normalized_score": 9.899479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25199468085106386, - "normalized_score": 16.888297872340427 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-04", - "submission_date": "2024-08-08", - "generation": 0, - "base_model": "netcat420/MFANN3bv0.19", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.9729765164528665 - } - }, - { - "id": "netcat420/MFANN3bv0.20_float16_ac8ba24559cbdb5704d77b602580d911c265fdee_False", - "model": { - "name": "netcat420/MFANN3bv0.20", - "sha": "ac8ba24559cbdb5704d77b602580d911c265fdee", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.5720051522728, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21934578030736224, - "normalized_score": 21.934578030736223 - }, - "bbh": { - "name": "BBH", - "value": 0.4493365019423472, - "normalized_score": 22.790710795250106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.4077291666666667, - "normalized_score": 10.166145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25, - "normalized_score": 16.666666666666664 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-29", - "submission_date": "2024-08-29", - "generation": 2, - "base_model": "netcat420/MFANN3bv0.19.12 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 1.0188364205113325 - } - }, - { - "id": "netcat420/MFANN3bv0.21_float16_8e78416dce916b69247fa03bd587369d0dade5ed_False", - "model": { - "name": "netcat420/MFANN3bv0.21", - "sha": "8e78416dce916b69247fa03bd587369d0dade5ed", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.00855289802916, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1909189838517356, - "normalized_score": 19.09189838517356 - }, - "bbh": { - "name": "BBH", - "value": 0.44700236898039053, - "normalized_score": 22.583425716572332 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03172205438066465, - "normalized_score": 3.1722054380664653 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.37594791666666666, - "normalized_score": 9.82682291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23927859042553193, - "normalized_score": 15.475398936170212 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "netcat420/MFANN3bv0.21 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 1.9486781620716105 - } - }, - { - "id": "netcat420/MFANN3bv0.22_float16_20c26f267ebe62ef1da037a5b840a304cb8d740b_False", - "model": { - "name": "netcat420/MFANN3bv0.22", - "sha": "20c26f267ebe62ef1da037a5b840a304cb8d740b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.256506387257199, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1979381374752324, - "normalized_score": 19.793813747523238 - }, - "bbh": { - "name": "BBH", - "value": 0.44851095830051274, - "normalized_score": 22.49153679693963 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.35213541666666665, - "normalized_score": 10.183593749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2517453457446808, - "normalized_score": 16.860593971631204 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-26", - "generation": 0, - "base_model": "netcat420/MFANN3bv0.22", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.7913362528706576 - } - }, - { - "id": "netcat420/MFANN3bv0.23_float16_93eacd43dcb307016e22a4d9f9f8deef49cd9111_False", - "model": { - "name": "netcat420/MFANN3bv0.23", - "sha": "93eacd43dcb307016e22a4d9f9f8deef49cd9111", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 11.448026000697595, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20480768804549704, - "normalized_score": 20.480768804549704 - }, - "bbh": { - "name": "BBH", - "value": 0.44954178056127364, - "normalized_score": 22.696340563264982 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3427395833333333, - "normalized_score": 7.042447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2417719414893617, - "normalized_score": 15.752437943262413 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-07", - "generation": 0, - "base_model": "netcat420/MFANN3bv0.23", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.7763674723624213 - } - }, - { - "id": "netcat420/MFANN3bv0.24_float16_55813c2586488a2e7be5883f7e695396f5629d3e_False", - "model": { - "name": "netcat420/MFANN3bv0.24", - "sha": "55813c2586488a2e7be5883f7e695396f5629d3e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 11.81028360635554, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2200450360598767, - "normalized_score": 22.00450360598767 - }, - "bbh": { - "name": "BBH", - "value": 0.4407346600666096, - "normalized_score": 21.545384724695676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3520729166666667, - "normalized_score": 8.375781250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23520611702127658, - "normalized_score": 15.022901891252952 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-11-22", - "generation": 0, - "base_model": "netcat420/MFANN3bv0.24", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.7690731161151534 - } - }, - { - "id": "netcat420/MFANN3bv1.1_float16_2089ba193df157575eae482f0df4907fd3ea14ae_True", - "model": { - "name": "netcat420/MFANN3bv1.1", - "sha": "2089ba193df157575eae482f0df4907fd3ea14ae", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 6.659216615990036, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2506948230694557, - "normalized_score": 25.06948230694557 - }, - "bbh": { - "name": "BBH", - "value": 0.3397086626022651, - "normalized_score": 8.39170874742591 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3223125, - "normalized_score": 0.45572916666666624 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11585771276595745, - "normalized_score": 1.7619680851063828 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "netcat420/MFANN3bv1.1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.775, - "co2_cost": 0.7763197255061175 - } - }, - { - "id": "netcat420/MFANN3bv1.2_float16_0643ded63b35beb43caf2d2c0bd8003fdb81b0ec_True", - "model": { - "name": "netcat420/MFANN3bv1.2", - "sha": "0643ded63b35beb43caf2d2c0bd8003fdb81b0ec", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 8.060474667336623, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2686050789682487, - "normalized_score": 26.86050789682487 - }, - "bbh": { - "name": "BBH", - "value": 0.3659932511014956, - "normalized_score": 11.121791518461544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.31555208333333334, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14502992021276595, - "normalized_score": 5.003324468085105 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "netcat420/MFANN3bv1.2 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.775, - "co2_cost": 0.7992262592949151 - } - }, - { - "id": "netcat420/MFANN3bv1.3_float16_dab137e547fa2e9f23dbf74ec602acfc6131e5a0_False", - "model": { - "name": "netcat420/MFANN3bv1.3", - "sha": "dab137e547fa2e9f23dbf74ec602acfc6131e5a0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 11.533255279970446, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25466650709007654, - "normalized_score": 25.466650709007656 - }, - "bbh": { - "name": "BBH", - "value": 0.4456312489762861, - "normalized_score": 22.637008666537838 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.329875, - "normalized_score": 3.801041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22755984042553193, - "normalized_score": 14.173315602836881 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-01", - "generation": 0, - "base_model": "netcat420/MFANN3bv1.3", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.8240365026004964 - } - }, - { - "id": "netcat420/MFANN3bv1.4_float16_bed48b1506d7f8866f7d23c89faee4ff76690f5a_False", - "model": { - "name": "netcat420/MFANN3bv1.4", - "sha": "bed48b1506d7f8866f7d23c89faee4ff76690f5a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 16.497599656783464, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35243598097492435, - "normalized_score": 35.24359809749243 - }, - "bbh": { - "name": "BBH", - "value": 0.4808549324972969, - "normalized_score": 26.91803539968917 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3707708333333333, - "normalized_score": 9.81302083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2705285904255319, - "normalized_score": 18.947621158392433 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "netcat420/MFANN3bv1.4 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.78, - "co2_cost": 0.6589172532409903 - } - }, - { - "id": "netcat420/MFANNv0.19_float16_af26a25549b7ad291766c479bebda58f15fbff42_False", - "model": { - "name": "netcat420/MFANNv0.19", - "sha": "af26a25549b7ad291766c479bebda58f15fbff42", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.38906583250676, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30567449921763146, - "normalized_score": 30.567449921763146 - }, - "bbh": { - "name": "BBH", - "value": 0.47313832038755316, - "normalized_score": 24.92410586579366 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.35269791666666667, - "normalized_score": 2.720572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24725731382978725, - "normalized_score": 16.36192375886525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-07-27", - "generation": 0, - "base_model": "netcat420/MFANNv0.19", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9141575270876836 - } - }, - { - "id": "netcat420/MFANNv0.20_float16_e612e57c933870b8990ac2bc217c434f3ffc84bd_False", - "model": { - "name": "netcat420/MFANNv0.20", - "sha": "e612e57c933870b8990ac2bc217c434f3ffc84bd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.46165681188948, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34786477657061043, - "normalized_score": 34.78647765706104 - }, - "bbh": { - "name": "BBH", - "value": 0.4574431878198548, - "normalized_score": 22.401696904581673 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.38739583333333333, - "normalized_score": 6.757812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32022938829787234, - "normalized_score": 24.469932033096924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-08-08", - "generation": 0, - "base_model": "netcat420/MFANNv0.20", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7357681975677204 - } - }, - { - "id": "netcat420/MFANNv0.21_float16_8c71d0eb419f54c489fa1ddf55d4bd18a1fb27d8_False", - "model": { - "name": "netcat420/MFANNv0.21", - "sha": "8c71d0eb419f54c489fa1ddf55d4bd18a1fb27d8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.886167017885262, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3233099287667832, - "normalized_score": 32.33099287667832 - }, - "bbh": { - "name": "BBH", - "value": 0.45763723048372523, - "normalized_score": 22.058431786302453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.3993333333333333, - "normalized_score": 8.81666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3031083776595745, - "normalized_score": 22.567597517730498 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-31", - "submission_date": "2024-09-02", - "generation": 2, - "base_model": "netcat420/MFANNv0.20.12 (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7588214654326866 - } - }, - { - "id": "netcat420/MFANNv0.22.1_float16_98108142480b802a3e1bb27e3d47075a4ea3a4f1_False", - "model": { - "name": "netcat420/MFANNv0.22.1", - "sha": "98108142480b802a3e1bb27e3d47075a4ea3a4f1", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.66737725944644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3089469274857378, - "normalized_score": 30.894692748573785 - }, - "bbh": { - "name": "BBH", - "value": 0.46608928527824584, - "normalized_score": 23.602792666118614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3753020833333333, - "normalized_score": 4.64609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33427526595744683, - "normalized_score": 26.03058510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "netcat420/MFANNv0.22.1 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.681057346999856 - } - }, - { - "id": "netcat420/MFANNv0.23_float16_cf7fb44a8c858602d7fcba58adcbd514c7e08ba4_False", - "model": { - "name": "netcat420/MFANNv0.23", - "sha": "cf7fb44a8c858602d7fcba58adcbd514c7e08ba4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.652655864272365, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3127435205255389, - "normalized_score": 31.274352052553894 - }, - "bbh": { - "name": "BBH", - "value": 0.4898102063834755, - "normalized_score": 27.04234546686432 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.3767916666666667, - "normalized_score": 5.498958333333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33876329787234044, - "normalized_score": 26.529255319148938 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "netcat420/MFANNv0.23 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6207594233633593 - } - }, - { - "id": "netcat420/MFANNv0.24_float16_57ce382fede1adce68bdb95a386255fa363077d7_False", - "model": { - "name": "netcat420/MFANNv0.24", - "sha": "57ce382fede1adce68bdb95a386255fa363077d7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.398373723349362, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3162409074588758, - "normalized_score": 31.624090745887578 - }, - "bbh": { - "name": "BBH", - "value": 0.479027491915232, - "normalized_score": 25.3517249924116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.3753958333333333, - "normalized_score": 4.624479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3347739361702128, - "normalized_score": 26.08599290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-09", - "generation": 1, - "base_model": "netcat420/MFANNv0.24 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4878053287353832 - } - }, - { - "id": "netcat420/MFANNv0.25_float16_cff1e1772fc7f4f3e68ad53d8589df3f52556e38_False", - "model": { - "name": "netcat420/MFANNv0.25", - "sha": "cff1e1772fc7f4f3e68ad53d8589df3f52556e38", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.59696503760558, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34666573580322435, - "normalized_score": 34.666573580322435 - }, - "bbh": { - "name": "BBH", - "value": 0.47940650861209216, - "normalized_score": 25.409784264889016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.36879166666666663, - "normalized_score": 3.632291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33427526595744683, - "normalized_score": 26.03058510638298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "netcat420/MFANNv0.25 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.4245766930388508 - } - }, - { - "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN_float16_cc114e017a8d69c0940fe3bdde0f2e1cafeb1078_True", - "model": { - "name": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", - "sha": "cc114e017a8d69c0940fe3bdde0f2e1cafeb1078", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.031122057978823, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5878413720040603, - "normalized_score": 58.784137200406036 - }, - "bbh": { - "name": "BBH", - "value": 0.5236664966992856, - "normalized_score": 32.26698603219006 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3376132930513595, - "normalized_score": 33.76132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.39257291666666666, - "normalized_score": 6.971614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.390375664893617, - "normalized_score": 32.263962765957444 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.389124408781623 - } - }, - { - "id": "netcat420/Qwen2.5-7b-MFANN-slerp_float16_7760e13b9b6c654ce6c5509f865e4e54f8a00ef6_True", - "model": { - "name": "netcat420/Qwen2.5-7b-MFANN-slerp", - "sha": "7760e13b9b6c654ce6c5509f865e4e54f8a00ef6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.709222898184922, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6532123654126606, - "normalized_score": 65.32123654126606 - }, - "bbh": { - "name": "BBH", - "value": 0.5088729928004616, - "normalized_score": 30.361031469633605 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28700906344410876, - "normalized_score": 28.700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40730208333333334, - "normalized_score": 8.979427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3416722074468085, - "normalized_score": 26.852467494089833 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 0, - "base_model": "netcat420/Qwen2.5-7b-MFANN-slerp", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.2955584130195623 - } - }, - { - "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp_float16_c70d4e9569bcde272d74d80e742f7a46ec6d37fc_True", - "model": { - "name": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", - "sha": "c70d4e9569bcde272d74d80e742f7a46ec6d37fc", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.248121471104624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15644711587476784, - "normalized_score": 15.644711587476785 - }, - "bbh": { - "name": "BBH", - "value": 0.2920111436321769, - "normalized_score": 1.7557228747317382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3791770833333333, - "normalized_score": 5.630468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11003989361702128, - "normalized_score": 1.1155437352245863 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.597223719083247 - } - }, - { - "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN_float16_f2fdf326b731948216853ada912b94cd0bc71fb9_True", - "model": { - "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", - "sha": "f2fdf326b731948216853ada912b94cd0bc71fb9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.396741920049962, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5742274941599401, - "normalized_score": 57.422749415994005 - }, - "bbh": { - "name": "BBH", - "value": 0.5071448530886461, - "normalized_score": 29.98074987281026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.256797583081571, - "normalized_score": 25.6797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.40584375, - "normalized_score": 9.63046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3156582446808511, - "normalized_score": 23.96202718676123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3103782032517788 - } - }, - { - "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained_float16_f3e8300e7948b878564f1f4de1d98fc03cc18e32_True", - "model": { - "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", - "sha": "f3e8300e7948b878564f1f4de1d98fc03cc18e32", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.058882677645745, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6486411610083467, - "normalized_score": 64.86411610083468 - }, - "bbh": { - "name": "BBH", - "value": 0.5065573474607916, - "normalized_score": 29.939052758341504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2990936555891239, - "normalized_score": 29.909365558912388 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.41520833333333335, - "normalized_score": 10.134374999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3431682180851064, - "normalized_score": 27.018690898345156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2968626346961016 - } - }, - { - "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b_float16_5d4016402d161c445c5a5982e5783c97bd37d3b2_True", - "model": { - "name": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", - "sha": "5d4016402d161c445c5a5982e5783c97bd37d3b2", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.630087573410409, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2675556412540947, - "normalized_score": 26.755564125409467 - }, - "bbh": { - "name": "BBH", - "value": 0.37890218644722085, - "normalized_score": 13.455601394009934 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23238255033557048, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35279166666666667, - "normalized_score": 2.232291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16771941489361702, - "normalized_score": 7.524379432624113 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.4050991694505164 - } - }, - { - "id": "netcat420/Qwen2.5-MFANN-7b_float16_30757ed2483d24b161febb79bb8f6485bba6cb20_True", - "model": { - "name": "netcat420/Qwen2.5-MFANN-7b", - "sha": "30757ed2483d24b161febb79bb8f6485bba6cb20", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.18533023472354, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6097233119234742, - "normalized_score": 60.97233119234742 - }, - "bbh": { - "name": "BBH", - "value": 0.5054347004252888, - "normalized_score": 30.29029020592803 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27870090634441086, - "normalized_score": 27.870090634441087 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4020625, - "normalized_score": 8.357812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32330452127659576, - "normalized_score": 24.811613475177303 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "netcat420/Qwen2.5-MFANN-7b", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.369620060318377 - } - }, - { - "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2_float16_85cb895126edc0161c332515811fc26aacfb29ba_True", - "model": { - "name": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", - "sha": "85cb895126edc0161c332515811fc26aacfb29ba", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.515188837080995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6606060807546199, - "normalized_score": 66.060608075462 - }, - "bbh": { - "name": "BBH", - "value": 0.5111030308243185, - "normalized_score": 30.830881032801738 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28700906344410876, - "normalized_score": 28.700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4259375, - "normalized_score": 12.1421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34383311170212766, - "normalized_score": 27.09256796690307 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-09", - "generation": 0, - "base_model": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.675172916438298 - } - }, - { - "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1_float16_62c8ce3c441cc8c0f7aa89189d008689c39935f9_True", - "model": { - "name": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", - "sha": "62c8ce3c441cc8c0f7aa89189d008689c39935f9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.982247626031953, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6554852236510238, - "normalized_score": 65.54852236510237 - }, - "bbh": { - "name": "BBH", - "value": 0.5074761993537673, - "normalized_score": 29.97691243171087 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29682779456193353, - "normalized_score": 29.68277945619335 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.41263541666666664, - "normalized_score": 10.112760416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34483045212765956, - "normalized_score": 27.203383569739948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.9456545319047678 - } - }, - { - "id": "netcat420/qwen2.5-MFANN-7b-v1.1_float16_7f57ae2d56726223d9ed464abc4f70881eacb701_True", - "model": { - "name": "netcat420/qwen2.5-MFANN-7b-v1.1", - "sha": "7f57ae2d56726223d9ed464abc4f70881eacb701", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.134594460492067, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6088489651901399, - "normalized_score": 60.88489651901399 - }, - "bbh": { - "name": "BBH", - "value": 0.49666375554657477, - "normalized_score": 29.471725204280187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2824773413897281, - "normalized_score": 28.247734138972806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.41139583333333335, - "normalized_score": 9.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3248005319148936, - "normalized_score": 24.977836879432623 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-09", - "generation": 0, - "base_model": "netcat420/qwen2.5-MFANN-7b-v1.1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6459238625100258 - } - }, - { - "id": "netease-youdao/Confucius-o1-14B_bfloat16_e6c64e53adbcbbdff9e2114b4f61bd4f2aa1602c_True", - "model": { - "name": "netease-youdao/Confucius-o1-14B", - "sha": "e6c64e53adbcbbdff9e2114b4f61bd4f2aa1602c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.52750121655241, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6378497941018719, - "normalized_score": 63.78497941018719 - }, - "bbh": { - "name": "BBH", - "value": 0.6299772409698484, - "normalized_score": 47.34523316427439 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4312688821752266, - "normalized_score": 43.126888217522655 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3649328859060403, - "normalized_score": 15.324384787472036 - }, - "musr": { - "name": "MUSR", - "value": 0.4338125, - "normalized_score": 14.193229166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5265126329787234, - "normalized_score": 47.39029255319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-27", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 37, - "params_billions": 14.77, - "co2_cost": 3.7832769050877437 - } - }, - { - "id": "newsbang/Homer-7B-v0.1_bfloat16_c953cc313ef5e5029efd057c0d3809a3b8d1cf9f_False", - "model": { - "name": "newsbang/Homer-7B-v0.1", - "sha": "c953cc313ef5e5029efd057c0d3809a3b8d1cf9f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.058437903590075, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6108724850064495, - "normalized_score": 61.087248500644954 - }, - "bbh": { - "name": "BBH", - "value": 0.5601389961416444, - "normalized_score": 37.30922654202453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3859516616314199, - "normalized_score": 38.59516616314199 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.43569791666666663, - "normalized_score": 12.79557291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4474734042553192, - "normalized_score": 38.60815602836879 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-14", - "generation": 0, - "base_model": "newsbang/Homer-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3814683906575003 - } - }, - { - "id": "newsbang/Homer-7B-v0.2_bfloat16_50b4ca941657ed362f5660aed8274a59a6b3fe2d_True", - "model": { - "name": "newsbang/Homer-7B-v0.2", - "sha": "50b4ca941657ed362f5660aed8274a59a6b3fe2d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.0139580788785, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7493827488840721, - "normalized_score": 74.93827488840721 - }, - "bbh": { - "name": "BBH", - "value": 0.5517330182832224, - "normalized_score": 36.4034863975643 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24773413897280966, - "normalized_score": 24.773413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.42975, - "normalized_score": 13.118750000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4409906914893617, - "normalized_score": 37.88785460992907 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3491966845781551 - } - }, - { - "id": "newsbang/Homer-v0.3-Qwen2.5-7B_bfloat16_4fa38c6c590d8e9bbf2075b2fa9cc37e75cde5d4_True", - "model": { - "name": "newsbang/Homer-v0.3-Qwen2.5-7B", - "sha": "4fa38c6c590d8e9bbf2075b2fa9cc37e75cde5d4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.314788831957532, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5154013572875525, - "normalized_score": 51.54013572875526 - }, - "bbh": { - "name": "BBH", - "value": 0.5480594290467807, - "normalized_score": 36.41367722607503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30891238670694865, - "normalized_score": 30.891238670694865 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.47436458333333337, - "normalized_score": 19.462239583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.445561835106383, - "normalized_score": 38.39575945626477 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.1712069632647955 - } - }, - { - "id": "newsbang/Homer-v0.4-Qwen2.5-7B_bfloat16_e5b73b06e63de7f77845463f8a11c93e82befd15_True", - "model": { - "name": "newsbang/Homer-v0.4-Qwen2.5-7B", - "sha": "e5b73b06e63de7f77845463f8a11c93e82befd15", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.944012752963395, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.799940823681166, - "normalized_score": 79.9940823681166 - }, - "bbh": { - "name": "BBH", - "value": 0.5533099174800821, - "normalized_score": 36.6037028382434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27794561933534745, - "normalized_score": 27.794561933534744 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.4310833333333333, - "normalized_score": 13.185416666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4362533244680851, - "normalized_score": 37.3614804964539 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.2794408280333436 - } - }, - { - "id": "newsbang/Homer-v0.5-Qwen2.5-7B_bfloat16_9dc7090b2226f9a2217f593518f734e3246001f9_True", - "model": { - "name": "newsbang/Homer-v0.5-Qwen2.5-7B", - "sha": "9dc7090b2226f9a2217f593518f734e3246001f9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.76384451852059, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7880756393037142, - "normalized_score": 78.80756393037142 - }, - "bbh": { - "name": "BBH", - "value": 0.5540181073562815, - "normalized_score": 36.67808911980736 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3723564954682779, - "normalized_score": 37.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.41930208333333335, - "normalized_score": 11.379427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4369182180851064, - "normalized_score": 37.43535756501182 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": true, - "is_moe": false, - "is_flagged": true, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-20", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3451683769919567 - } - }, - { - "id": "newsbang/Homer-v1.0-Qwen2.5-72B_bfloat16_c7f3c5c131c046626f8d33eb615c1a0aba19998b_False", - "model": { - "name": "newsbang/Homer-v1.0-Qwen2.5-72B", - "sha": "c7f3c5c131c046626f8d33eb615c1a0aba19998b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 47.464376408361055, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7627716680629618, - "normalized_score": 76.27716680629618 - }, - "bbh": { - "name": "BBH", - "value": 0.7309799550978827, - "normalized_score": 62.27406507872839 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4901812688821752, - "normalized_score": 49.01812688821752 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4161073825503356, - "normalized_score": 22.14765100671141 - }, - "musr": { - "name": "MUSR", - "value": 0.4677291666666667, - "normalized_score": 17.89947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.6145279255319149, - "normalized_score": 57.16976950354611 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "newsbang/Homer-v1.0-Qwen2.5-72B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 72.706, - "co2_cost": 29.548857722554818 - } - }, - { - "id": "newsbang/Homer-v1.0-Qwen2.5-7B_bfloat16_4795825dff1b68dd2cc02b3bd39598a161c09c66_False", - "model": { - "name": "newsbang/Homer-v1.0-Qwen2.5-7B", - "sha": "4795825dff1b68dd2cc02b3bd39598a161c09c66", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.62357604568507, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6392737935344885, - "normalized_score": 63.92737935344884 - }, - "bbh": { - "name": "BBH", - "value": 0.5655254177370223, - "normalized_score": 37.81084749225178 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3323262839879154, - "normalized_score": 33.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3221476510067114, - "normalized_score": 9.61968680089485 - }, - "musr": { - "name": "MUSR", - "value": 0.42782291666666666, - "normalized_score": 11.877864583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45345744680851063, - "normalized_score": 39.273049645390074 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "newsbang/Homer-v1.0-Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.2785042364113017 - } - }, - { - "id": "nguyentd/FinancialAdvice-Qwen2.5-7B_bfloat16_5c3421d5a980d0b2365b0d704ead30c9e534a019_False", - "model": { - "name": "nguyentd/FinancialAdvice-Qwen2.5-7B", - "sha": "5c3421d5a980d0b2365b0d704ead30c9e534a019", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.287932324484498, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.449605934476079, - "normalized_score": 44.960593447607906 - }, - "bbh": { - "name": "BBH", - "value": 0.4730934153895792, - "normalized_score": 25.630435622160334 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1148036253776435, - "normalized_score": 11.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.40248958333333335, - "normalized_score": 9.144531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.375249335106383, - "normalized_score": 30.583259456264773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-21", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "nguyentd/FinancialAdvice-Qwen2.5-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.3088900896654123 - } - }, - { - "id": "ngxson/MiniThinky-1B-Llama-3.2_float16_a5e5adf4f7e63f7127a72def90ba3a627bae36bf_True", - "model": { - "name": "ngxson/MiniThinky-1B-Llama-3.2", - "sha": "a5e5adf4f7e63f7127a72def90ba3a627bae36bf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.937116033349191, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2771479673931834, - "normalized_score": 27.71479673931834 - }, - "bbh": { - "name": "BBH", - "value": 0.31422650382721545, - "normalized_score": 4.347795393431276 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34336458333333336, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "ngxson/MiniThinky-1B-Llama-3.2 (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 1.236, - "co2_cost": 1.1223291009575076 - } - }, - { - "id": "ngxson/MiniThinky-v2-1B-Llama-3.2_float16_0eb811aca13439292d4151456577a527a2982c46_True", - "model": { - "name": "ngxson/MiniThinky-v2-1B-Llama-3.2", - "sha": "0eb811aca13439292d4151456577a527a2982c46", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.55067744157533, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2963071317437732, - "normalized_score": 29.63071317437732 - }, - "bbh": { - "name": "BBH", - "value": 0.32051111358951634, - "normalized_score": 4.893769484250589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3356145833333333, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1116190159574468, - "normalized_score": 1.2910017730496441 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "ngxson/MiniThinky-v2-1B-Llama-3.2 (Merge)", - "hub_license": "", - "hub_hearts": 38, - "params_billions": 1.236, - "co2_cost": 1.1046860596842587 - } - }, - { - "id": "nhyha/N3N_Delirium-v1_1030_0227_bfloat16_41eabc719bd611e2bd0094b0842df84916a57a46_True", - "model": { - "name": "nhyha/N3N_Delirium-v1_1030_0227", - "sha": "41eabc719bd611e2bd0094b0842df84916a57a46", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.094478522688185, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8022890375315275, - "normalized_score": 80.22890375315274 - }, - "bbh": { - "name": "BBH", - "value": 0.5890686677822234, - "normalized_score": 40.77504007448568 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.40981249999999997, - "normalized_score": 9.859895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41497672872340424, - "normalized_score": 34.99741430260047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-04", - "generation": 2, - "base_model": "unsloth/gemma-2-9b-it", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 4.294386987361042 - } - }, - { - "id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216_bfloat16_d0715a631898112c9c3b729d0334588a2ff636d8_False", - "model": { - "name": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", - "sha": "d0715a631898112c9c3b729d0334588a2ff636d8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.479351772868437, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4796063334175543, - "normalized_score": 47.960633341755425 - }, - "bbh": { - "name": "BBH", - "value": 0.5053741309920361, - "normalized_score": 28.980464124810663 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.40503125, - "normalized_score": 10.062239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36377992021276595, - "normalized_score": 29.308880023640665 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-11-04", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4914317034052258 - } - }, - { - "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532_bfloat16_6cfc55a717961ef206978b577bd74df97efe1455_False", - "model": { - "name": "nhyha/N3N_gemma-2-9b-it_20241029_1532", - "sha": "6cfc55a717961ef206978b577bd74df97efe1455", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.14813042759547, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6751940407008958, - "normalized_score": 67.51940407008958 - }, - "bbh": { - "name": "BBH", - "value": 0.5863124381827675, - "normalized_score": 40.9866677332497 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.4593541666666667, - "normalized_score": 16.3859375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4122340425531915, - "normalized_score": 34.692671394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-29", - "submission_date": "2024-11-04", - "generation": 1, - "base_model": "unsloth/gemma-2-9b-it", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 4.788088397014556 - } - }, - { - "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026_bfloat16_2d4c24278ed9d8b42a4035da16a5aea745797441_True", - "model": { - "name": "nhyha/N3N_gemma-2-9b-it_20241110_2026", - "sha": "2d4c24278ed9d8b42a4035da16a5aea745797441", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 29.119584296103525, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6282829558903709, - "normalized_score": 62.82829558903709 - }, - "bbh": { - "name": "BBH", - "value": 0.5867149609980419, - "normalized_score": 40.944105549057745 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1608761329305136, - "normalized_score": 16.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.40730208333333334, - "normalized_score": 9.779427083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40201130319148937, - "normalized_score": 33.55681146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "unsloth/gemma-2-9b-it", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 5.081099820650457 - } - }, - { - "id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314_bfloat16_4d93f65c1f870556f05c77a1ef4f26819d49daf7_False", - "model": { - "name": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", - "sha": "4d93f65c1f870556f05c77a1ef4f26819d49daf7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.4495497347518, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5694568190179834, - "normalized_score": 56.94568190179834 - }, - "bbh": { - "name": "BBH", - "value": 0.5558529241660143, - "normalized_score": 36.36518528982388 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3542296072507553, - "normalized_score": 35.422960725075534 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.42506249999999995, - "normalized_score": 11.099479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45420545212765956, - "normalized_score": 39.35616134751773 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-11-04", - "generation": 3, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3954515108393644 - } - }, - { - "id": "nidum/Nidum-Limitless-Gemma-2B_float16_e209e3513d2b34c0e6c433ede26e17604c25cb1a_True", - "model": { - "name": "nidum/Nidum-Limitless-Gemma-2B", - "sha": "e209e3513d2b34c0e6c433ede26e17604c25cb1a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 6.166007951282463, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24235140538216376, - "normalized_score": 24.235140538216378 - }, - "bbh": { - "name": "BBH", - "value": 0.3078801520076317, - "normalized_score": 3.4510601516101125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.37403125, - "normalized_score": 4.120572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11735372340425532, - "normalized_score": 1.9281914893617011 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-02", - "submission_date": "2024-08-07", - "generation": 0, - "base_model": "nidum/Nidum-Limitless-Gemma-2B", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 2.506, - "co2_cost": 0.7936273921827564 - } - }, - { - "id": "nisten/franqwenstein-35b_float16_7180aa73e82945a1d2ae0eb304508e21d57e4c27_False", - "model": { - "name": "nisten/franqwenstein-35b", - "sha": "7180aa73e82945a1d2ae0eb304508e21d57e4c27", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.57133162829882, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37986320740080765, - "normalized_score": 37.986320740080764 - }, - "bbh": { - "name": "BBH", - "value": 0.6646579178049268, - "normalized_score": 52.227468077653526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3406344410876133, - "normalized_score": 34.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4035234899328859, - "normalized_score": 20.46979865771812 - }, - "musr": { - "name": "MUSR", - "value": 0.49402083333333335, - "normalized_score": 22.119270833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5730551861702128, - "normalized_score": 52.56168735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "nisten/franqwenstein-35b (Merge)", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 34.714, - "co2_cost": 10.035539404554576 - } - }, - { - "id": "nisten/franqwenstein-35b_bfloat16_901351a987d664a1cd7f483115a167d3ae5694ec_True", - "model": { - "name": "nisten/franqwenstein-35b", - "sha": "901351a987d664a1cd7f483115a167d3ae5694ec", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.451116831224226, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39135383005979685, - "normalized_score": 39.13538300597969 - }, - "bbh": { - "name": "BBH", - "value": 0.6591132598701116, - "normalized_score": 51.68027687329707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.304380664652568, - "normalized_score": 30.438066465256803 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4681041666666667, - "normalized_score": 19.6796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5610871010638298, - "normalized_score": 51.23190011820331 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "nisten/franqwenstein-35b (Merge)", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 34.714, - "co2_cost": 6.328604130163559 - } - }, - { - "id": "nisten/tqwendo-36b_bfloat16_c50f38e8421785af4b8596f81e0098a6585b4f05_False", - "model": { - "name": "nisten/tqwendo-36b", - "sha": "c50f38e8421785af4b8596f81e0098a6585b4f05", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.04172043230399, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6777672132164878, - "normalized_score": 67.77672132164878 - }, - "bbh": { - "name": "BBH", - "value": 0.6431830832659088, - "normalized_score": 49.41493648187427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41540785498489424, - "normalized_score": 41.54078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.44295833333333334, - "normalized_score": 15.103124999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4380817819148936, - "normalized_score": 37.56464243498817 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "nisten/tqwendo-36b (Merge)", - "hub_license": "mit", - "hub_hearts": 9, - "params_billions": 35.69, - "co2_cost": 18.300782521646237 - } - }, - { - "id": "nlpguy/Lion-Lamarck-v.1.0.8_bfloat16_3f1c2632893f4a7d22ab50a1b87ebee9f054086f_True", - "model": { - "name": "nlpguy/Lion-Lamarck-v.1.0.8", - "sha": "3f1c2632893f4a7d22ab50a1b87ebee9f054086f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.883024988283445, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45090471061228654, - "normalized_score": 45.090471061228655 - }, - "bbh": { - "name": "BBH", - "value": 0.5868930914775694, - "normalized_score": 40.84844139218024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.554380664652568, - "normalized_score": 55.438066465256796 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35822147651006714, - "normalized_score": 14.429530201342287 - }, - "musr": { - "name": "MUSR", - "value": 0.4672708333333333, - "normalized_score": 19.008854166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46434507978723405, - "normalized_score": 40.48278664302601 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "nlpguy/Lion-Lamarck-v.1.0.8 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 4.057618841091383 - } - }, - { - "id": "nlpguy/Lion-Lamarck-v.1.0.9_bfloat16_2bb3b70d5eab9fbc39a10452c601fe36d00b9fca_False", - "model": { - "name": "nlpguy/Lion-Lamarck-v.1.0.9", - "sha": "2bb3b70d5eab9fbc39a10452c601fe36d00b9fca", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.365577681666096, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34089549063152436, - "normalized_score": 34.08954906315243 - }, - "bbh": { - "name": "BBH", - "value": 0.5918237099420903, - "normalized_score": 40.46884807072923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5641993957703928, - "normalized_score": 56.41993957703928 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3901006711409396, - "normalized_score": 18.680089485458616 - }, - "musr": { - "name": "MUSR", - "value": 0.5299583333333334, - "normalized_score": 27.378124999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47041223404255317, - "normalized_score": 41.15691489361702 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "nlpguy/Lion-Lamarck-v.1.0.9 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.9066220654126314 - } - }, - { - "id": "nlpguy/Lion-Lamarck-v.1.1.0_bfloat16_dedb68b932cb4bbf50d80150419fdca664ba63e5_False", - "model": { - "name": "nlpguy/Lion-Lamarck-v.1.1.0", - "sha": "dedb68b932cb4bbf50d80150419fdca664ba63e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.03161283098529, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3657750324694034, - "normalized_score": 36.57750324694034 - }, - "bbh": { - "name": "BBH", - "value": 0.5962460968547941, - "normalized_score": 41.16630363693309 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5755287009063444, - "normalized_score": 57.55287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3926174496644295, - "normalized_score": 19.01565995525727 - }, - "musr": { - "name": "MUSR", - "value": 0.53253125, - "normalized_score": 27.533072916666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4630984042553192, - "normalized_score": 40.3442671394799 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "nlpguy/Lion-Lamarck-v.1.1.0 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.8879522097473718 - } - }, - { - "id": "nlpguy/Miisce-one_bfloat16_c32e07ad0f4d9dcc0f0806c7b6a6dc013e0a5cfe_False", - "model": { - "name": "nlpguy/Miisce-one", - "sha": "c32e07ad0f4d9dcc0f0806c7b6a6dc013e0a5cfe", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.837021350039876, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6065761069517768, - "normalized_score": 60.65761069517769 - }, - "bbh": { - "name": "BBH", - "value": 0.6504562869685913, - "normalized_score": 49.71168185726318 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4169184290030212, - "normalized_score": 41.69184290030212 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.48198958333333336, - "normalized_score": 19.815364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5412234042553191, - "normalized_score": 49.02482269503546 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-10", - "submission_date": "2025-02-10", - "generation": 1, - "base_model": "nlpguy/Miisce-one (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 2.0187925477418864 - } - }, - { - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1_bfloat16_9e6d747cbb81e1f25915a0f42802cbeb85b61c3e_False", - "model": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", - "sha": "9e6d747cbb81e1f25915a0f42802cbeb85b61c3e", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.990222795976974, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16484040124647048, - "normalized_score": 16.484040124647045 - }, - "bbh": { - "name": "BBH", - "value": 0.44679984097967057, - "normalized_score": 22.06890968577206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3803541666666667, - "normalized_score": 4.8442708333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2537400265957447, - "normalized_score": 17.082225177304963 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 12.451, - "co2_cost": 5.868611599373479 - } - }, - { - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2_bfloat16_4ac077e496705687fdcbe51f3b915be42e91bf79_False", - "model": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", - "sha": "4ac077e496705687fdcbe51f3b915be42e91bf79", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.345444097364796, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15727159492369136, - "normalized_score": 15.727159492369136 - }, - "bbh": { - "name": "BBH", - "value": 0.3949668154807224, - "normalized_score": 14.382673288078204 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3790833333333334, - "normalized_score": 5.252083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1926529255319149, - "normalized_score": 10.2947695035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2 (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 12.451, - "co2_cost": 5.849237341350645 - } - }, - { - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3_bfloat16_6703b09d3d78cc020448ee93c53dc727312bcbaf_False", - "model": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", - "sha": "6703b09d3d78cc020448ee93c53dc727312bcbaf", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 5.202259190321894, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14120976786038822, - "normalized_score": 14.120976786038822 - }, - "bbh": { - "name": "BBH", - "value": 0.30524522602918064, - "normalized_score": 3.3982664477164874 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.40984375, - "normalized_score": 9.430468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11710438829787234, - "normalized_score": 1.9004875886524817 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-04", - "generation": 1, - "base_model": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3 (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 12.451, - "co2_cost": 9.086727159842797 - } - }, - { - "id": "nlpguy/StableProse_bfloat16_4937dc747684705e4b87df27b47eab5429f3a9c1_False", - "model": { - "name": "nlpguy/StableProse", - "sha": "4937dc747684705e4b87df27b47eab5429f3a9c1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.623904800931484, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19723888172271792, - "normalized_score": 19.723888172271792 - }, - "bbh": { - "name": "BBH", - "value": 0.5116558625577087, - "normalized_score": 30.180202714185956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4067083333333333, - "normalized_score": 8.871875000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3468251329787234, - "normalized_score": 27.425014775413715 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-16", - "submission_date": "2024-08-17", - "generation": 1, - "base_model": "nlpguy/StableProse (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.248, - "co2_cost": 3.588726111482876 - } - }, - { - "id": "nlpguy/StarFusion-alpha1_bfloat16_dccad965a710d7bee001b6387c8307e7c320291e_True", - "model": { - "name": "nlpguy/StarFusion-alpha1", - "sha": "dccad965a710d7bee001b6387c8307e7c320291e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.8283235530266, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5660092997690572, - "normalized_score": 56.60092997690572 - }, - "bbh": { - "name": "BBH", - "value": 0.4428694115507034, - "normalized_score": 21.933181635654744 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.40810416666666666, - "normalized_score": 8.879687500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3190658244680851, - "normalized_score": 24.34064716312057 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-13", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "nlpguy/StarFusion-alpha1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.7569972792351518 - } - }, - { - "id": "noname0202/Llama-3.2-4x3B-Instruct_bfloat16_b7db5c4ec1138be364127e0482adabc8355d0943_True", - "model": { - "name": "noname0202/Llama-3.2-4x3B-Instruct", - "sha": "b7db5c4ec1138be364127e0482adabc8355d0943", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.010126992420393, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7067181744438091, - "normalized_score": 70.6718174443809 - }, - "bbh": { - "name": "BBH", - "value": 0.4647311192852755, - "normalized_score": 24.689908534883614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15861027190332327, - "normalized_score": 15.861027190332328 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.36739583333333337, - "normalized_score": 4.424479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3285405585106383, - "normalized_score": 25.39339539007092 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.949, - "co2_cost": 2.384502028042734 - } - }, - { - "id": "noname0202/gemma-2-2b-it-ties_bfloat16_7ab51f4991186f6850d826e4ddc44a053de05f2f_True", - "model": { - "name": "noname0202/gemma-2-2b-it-ties", - "sha": "7ab51f4991186f6850d826e4ddc44a053de05f2f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 10.06382335103593, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12657083205893696, - "normalized_score": 12.657083205893695 - }, - "bbh": { - "name": "BBH", - "value": 0.42057403060290816, - "normalized_score": 18.13956937900525 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.39288541666666665, - "normalized_score": 7.144010416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2560671542553192, - "normalized_score": 17.340794917257686 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.4053951190182237 - } - }, - { - "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v1_bfloat16_06ca7f00ce3ddece15cb50a1292ce0912e19af4e_True", - "model": { - "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", - "sha": "06ca7f00ce3ddece15cb50a1292ce0912e19af4e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.85196649387241, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29880494864736673, - "normalized_score": 29.880494864736676 - }, - "bbh": { - "name": "BBH", - "value": 0.4519290530910057, - "normalized_score": 22.0002402150018 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.40801041666666665, - "normalized_score": 9.101302083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3125, - "normalized_score": 23.61111111111111 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 1.8314716525402794 - } - }, - { - "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v2_bfloat16_b32a4038b8617c7620ee7761609d926ddda8c1fe_True", - "model": { - "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", - "sha": "b32a4038b8617c7620ee7761609d926ddda8c1fe", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.130819899645495, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3993470657854493, - "normalized_score": 39.93470657854493 - }, - "bbh": { - "name": "BBH", - "value": 0.4515041184509401, - "normalized_score": 22.658058596327027 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.36115625, - "normalized_score": 7.011197916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36751994680851063, - "normalized_score": 29.724438534278963 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 1.625544838025149 - } - }, - { - "id": "noname0202/llama-math-1b-r16-0to512tokens-test_bfloat16_df274b741781f3f3ecce2ef86883863aaeb71c58_True", - "model": { - "name": "noname0202/llama-math-1b-r16-0to512tokens-test", - "sha": "df274b741781f3f3ecce2ef86883863aaeb71c58", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.804072106791523, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5469753587148765, - "normalized_score": 54.69753587148765 - }, - "bbh": { - "name": "BBH", - "value": 0.34884166022601404, - "normalized_score": 8.389239359006991 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3143125, - "normalized_score": 1.255729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17278922872340424, - "normalized_score": 8.087692080378249 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7330675322960788 - } - }, - { - "id": "noname0202/llama-math-1b-r32-0to512tokens-test_bfloat16_200ae9a8db697345c6471e12a225f2e7adb953c1_True", - "model": { - "name": "noname0202/llama-math-1b-r32-0to512tokens-test", - "sha": "200ae9a8db697345c6471e12a225f2e7adb953c1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.371257364913582, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5682577782505973, - "normalized_score": 56.825777825059724 - }, - "bbh": { - "name": "BBH", - "value": 0.3495183139510159, - "normalized_score": 8.191900397270365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09063444108761329, - "normalized_score": 9.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.32094791666666667, - "normalized_score": 1.6851562500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17603058510638298, - "normalized_score": 8.447842789598107 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.8009289669930701 - } - }, - { - "id": "noname0202/llama-math-1b-r32-test_bfloat16_3b2cd2f41ed1a9894dd1bdd275f45081d5c6caf1_True", - "model": { - "name": "noname0202/llama-math-1b-r32-test", - "sha": "3b2cd2f41ed1a9894dd1bdd275f45081d5c6caf1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.418127068599686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5819215237791282, - "normalized_score": 58.192152377912834 - }, - "bbh": { - "name": "BBH", - "value": 0.3485960127764988, - "normalized_score": 8.498754758774203 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.31564583333333335, - "normalized_score": 2.3223958333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17810837765957446, - "normalized_score": 8.678708628841607 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7467959408138168 - } - }, - { - "id": "noname0202/llama-math-1b-r8-512tokens-test_bfloat16_ba83c9aca75dd38a66df90eb2dd1cb56db6d3c9a_True", - "model": { - "name": "noname0202/llama-math-1b-r8-512tokens-test", - "sha": "ba83c9aca75dd38a66df90eb2dd1cb56db6d3c9a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.630751727519524, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5791987482103043, - "normalized_score": 57.91987482103042 - }, - "bbh": { - "name": "BBH", - "value": 0.3495762462148306, - "normalized_score": 8.396798396874248 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.31694791666666666, - "normalized_score": 2.485156250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17528257978723405, - "normalized_score": 8.36473108747045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7317208362766235 - } - }, - { - "id": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning_bfloat16_8322cabcc11053dca1f6fac6f3ffac4781ec9641_True", - "model": { - "name": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", - "sha": "8322cabcc11053dca1f6fac6f3ffac4781ec9641", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.559026792386994, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8413564896696322, - "normalized_score": 84.1356489669632 - }, - "bbh": { - "name": "BBH", - "value": 0.6198222551365405, - "normalized_score": 45.658280600233155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5302114803625377, - "normalized_score": 53.02114803625378 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.418, - "normalized_score": 11.350000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4849567819148936, - "normalized_score": 42.77297576832151 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 3.391751948378743 - } - }, - { - "id": "nothingiisreal/L3.1-8B-Celeste-V1.5_bfloat16_e7ea0e3d2727c8cf66c0481ffa251f28cb85429f_True", - "model": { - "name": "nothingiisreal/L3.1-8B-Celeste-V1.5", - "sha": "e7ea0e3d2727c8cf66c0481ffa251f28cb85429f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.172145831649967, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7326715337526651, - "normalized_score": 73.2671533752665 - }, - "bbh": { - "name": "BBH", - "value": 0.5011796822721141, - "normalized_score": 28.887966925667627 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.37486458333333333, - "normalized_score": 5.5914062499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37042885638297873, - "normalized_score": 30.04765070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-12-04", - "generation": 0, - "base_model": "nothingiisreal/L3.1-8B-Celeste-V1.5", - "hub_license": "llama3.1", - "hub_hearts": 39, - "params_billions": 8.03, - "co2_cost": 1.4143282385062868 - } - }, - { - "id": "nothingiisreal/MN-12B-Starcannon-v2_bfloat16_f2ff756e8c32d9107d4f6a3c18c730e3fe0cae88_True", - "model": { - "name": "nothingiisreal/MN-12B-Starcannon-v2", - "sha": "f2ff756e8c32d9107d4f6a3c18c730e3fe0cae88", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.181449904707133, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3925273828995953, - "normalized_score": 39.252738289959524 - }, - "bbh": { - "name": "BBH", - "value": 0.5004499888471767, - "normalized_score": 28.424782963573875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05966767371601209, - "normalized_score": 5.966767371601208 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.39781249999999996, - "normalized_score": 7.993229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31283244680851063, - "normalized_score": 23.648049645390067 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nothingiisreal/MN-12B-Starcannon-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 12.248, - "co2_cost": 3.445325569110211 - } - }, - { - "id": "nothingiisreal/MN-12B-Starcannon-v3_bfloat16_169480b62121c4f070e93a05158545c679712644_True", - "model": { - "name": "nothingiisreal/MN-12B-Starcannon-v3", - "sha": "169480b62121c4f070e93a05158545c679712644", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.144471264239638, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38073755413414184, - "normalized_score": 38.07375541341418 - }, - "bbh": { - "name": "BBH", - "value": 0.5170553444795719, - "normalized_score": 30.873001626388618 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.40463541666666664, - "normalized_score": 9.846093749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32646276595744683, - "normalized_score": 25.162529550827422 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-03", - "generation": 1, - "base_model": "nothingiisreal/MN-12B-Starcannon-v3 (Merge)", - "hub_license": "", - "hub_hearts": 13, - "params_billions": 12.248, - "co2_cost": 3.4913418617587566 - } - }, - { - "id": "nvidia/AceInstruct-1.5B_bfloat16_1e3d02075fcf988407b436eb5c10a407be86c71f_True", - "model": { - "name": "nvidia/AceInstruct-1.5B", - "sha": "1e3d02075fcf988407b436eb5c10a407be86c71f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.11586485792988, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3947758613811354, - "normalized_score": 39.47758613811354 - }, - "bbh": { - "name": "BBH", - "value": 0.3931958135346713, - "normalized_score": 15.468561041610593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31268882175226587, - "normalized_score": 31.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.34600000000000003, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2573969414893617, - "normalized_score": 17.48854905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceInstruct-1.5B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 18, - "params_billions": 1.777, - "co2_cost": 1.7064082962319682 - } - }, - { - "id": "nvidia/AceInstruct-72B_bfloat16_3c5c88ea8ab5d5067e23a482cc26014a0d23e848_True", - "model": { - "name": "nvidia/AceInstruct-72B", - "sha": "3c5c88ea8ab5d5067e23a482cc26014a0d23e848", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.40502231212503, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.711888899231816, - "normalized_score": 71.1888899231816 - }, - "bbh": { - "name": "BBH", - "value": 0.6139041785911337, - "normalized_score": 44.20381994540313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6261329305135952, - "normalized_score": 62.61329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3213087248322148, - "normalized_score": 9.507829977628639 - }, - "musr": { - "name": "MUSR", - "value": 0.42060416666666667, - "normalized_score": 11.875520833333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48736702127659576, - "normalized_score": 43.04078014184397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceInstruct-72B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 14, - "params_billions": 72.706, - "co2_cost": 80.78669497709723 - } - }, - { - "id": "nvidia/AceInstruct-7B_bfloat16_3bbb14f63afd2dc890c7932bfffb4f6dc3bfa1e8_True", - "model": { - "name": "nvidia/AceInstruct-7B", - "sha": "3bbb14f63afd2dc890c7932bfffb4f6dc3bfa1e8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.05654347527337, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5422290633297429, - "normalized_score": 54.22290633297429 - }, - "bbh": { - "name": "BBH", - "value": 0.550118130896558, - "normalized_score": 36.57481399081368 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5294561933534743, - "normalized_score": 52.94561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.4255, - "normalized_score": 11.6875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.417719414893617, - "normalized_score": 35.30215721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceInstruct-7B", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 19, - "params_billions": 7.616, - "co2_cost": 1.8501446641433856 - } - }, - { - "id": "nvidia/AceMath-1.5B-Instruct_bfloat16_166818c371eaafb212b243aecadd50b1079fa776_True", - "model": { - "name": "nvidia/AceMath-1.5B-Instruct", - "sha": "166818c371eaafb212b243aecadd50b1079fa776", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.18985986442293, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32123654126606294, - "normalized_score": 32.1236541266063 - }, - "bbh": { - "name": "BBH", - "value": 0.4024301274933693, - "normalized_score": 16.762509591549986 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5287009063444109, - "normalized_score": 52.87009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3606979166666667, - "normalized_score": 4.320572916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20636635638297873, - "normalized_score": 11.818484042553191 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceMath-1.5B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 7, - "params_billions": 1.777, - "co2_cost": 1.663243327095496 - } - }, - { - "id": "nvidia/AceMath-72B-Instruct_bfloat16_9bab369176cddd6cbc38b2002ffbef9a3152aade_True", - "model": { - "name": "nvidia/AceMath-72B-Instruct", - "sha": "9bab369176cddd6cbc38b2002ffbef9a3152aade", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.65560439276686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.494993284485166, - "normalized_score": 49.49932844851659 - }, - "bbh": { - "name": "BBH", - "value": 0.640215611099268, - "normalized_score": 48.68777244219516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.7145015105740181, - "normalized_score": 71.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.40615625, - "normalized_score": 9.602864583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44107380319148937, - "normalized_score": 37.89708924349882 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceMath-72B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 14, - "params_billions": 72.706, - "co2_cost": 88.01123585671756 - } - }, - { - "id": "nvidia/AceMath-72B-RM_bfloat16_bb8cb2e7bd45c1d74894d87a95249d5dd5c19bf4_True", - "model": { - "name": "nvidia/AceMath-72B-RM", - "sha": "bb8cb2e7bd45c1d74894d87a95249d5dd5c19bf4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForSequenceClassification", - "average_score": 3.4288267214523853, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14125963554479892, - "normalized_score": 14.12596355447989 - }, - "bbh": { - "name": "BBH", - "value": 0.2717426350897727, - "normalized_score": 1.4035016501209492 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23406040268456377, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3351458333333333, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11785239361702128, - "normalized_score": 1.9835992907801419 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceMath-72B-RM", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 8, - "params_billions": 71.461, - "co2_cost": 150.00072389582226 - } - }, - { - "id": "nvidia/AceMath-7B-Instruct_bfloat16_f29b4bd5ad5e4fc7bfb52343dca2dd07e948f964_True", - "model": { - "name": "nvidia/AceMath-7B-Instruct", - "sha": "f29b4bd5ad5e4fc7bfb52343dca2dd07e948f964", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.327490047445718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45317756885064964, - "normalized_score": 45.317756885064966 - }, - "bbh": { - "name": "BBH", - "value": 0.49938547326244365, - "normalized_score": 29.99382585790849 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6336858006042296, - "normalized_score": 63.368580060422964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4192708333333333, - "normalized_score": 11.208854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33834773936170215, - "normalized_score": 26.483082151300234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-13", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceMath-7B-Instruct", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 21, - "params_billions": 7.616, - "co2_cost": 1.9773082959878772 - } - }, - { - "id": "nvidia/AceMath-7B-RM_bfloat16_2a7b81019f94d1a78eec298f7cf5c677ff958f5a_True", - "model": { - "name": "nvidia/AceMath-7B-RM", - "sha": "2a7b81019f94d1a78eec298f7cf5c677ff958f5a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForSequenceClassification", - "average_score": 3.2243900063069617, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14937809456686035, - "normalized_score": 14.937809456686036 - }, - "bbh": { - "name": "BBH", - "value": 0.2422689292768334, - "normalized_score": 0.25152703505644675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35800000000000004, - "normalized_score": 2.6166666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11386303191489362, - "normalized_score": 1.540336879432624 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "nvidia/AceMath-7B-RM", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 6, - "params_billions": 7.071, - "co2_cost": 1.3542793026535587 - } - }, - { - "id": "nvidia/Hymba-1.5B-Base_bfloat16_85e5b833d75f26170c7684ba83140f1bf9fedf37_False", - "model": { - "name": "nvidia/Hymba-1.5B-Base", - "sha": "85e5b833d75f26170c7684ba83140f1bf9fedf37", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "HymbaForCausalLM", - "average_score": 8.035282134433706, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2295121389025563, - "normalized_score": 22.95121389025563 - }, - "bbh": { - "name": "BBH", - "value": 0.32564785214182224, - "normalized_score": 7.689941118138137 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3566354166666667, - "normalized_score": 5.179427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19223736702127658, - "normalized_score": 10.248596335697398 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-12-06", - "generation": 0, - "base_model": "nvidia/Hymba-1.5B-Base", - "hub_license": "other", - "hub_hearts": 139, - "params_billions": 1.523, - "co2_cost": 18.215828757603376 - } - }, - { - "id": "nvidia/Hymba-1.5B-Instruct_bfloat16_ffc758eefef247c0ee4d7ce41636562759027ce6_True", - "model": { - "name": "nvidia/Hymba-1.5B-Instruct", - "sha": "ffc758eefef247c0ee4d7ce41636562759027ce6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "HymbaForCausalLM", - "average_score": 14.192383567083992, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6009055971488984, - "normalized_score": 60.09055971488983 - }, - "bbh": { - "name": "BBH", - "value": 0.3067133908231881, - "normalized_score": 4.591463615472479 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.33158333333333334, - "normalized_score": 1.0479166666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20403922872340424, - "normalized_score": 11.55991430260047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "nvidia/Hymba-1.5B-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 224, - "params_billions": 1.523, - "co2_cost": 13.425331528485655 - } - }, - { - "id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base_bfloat16_40d82bc951b4f39e9c9e11176334250c30975098_False", - "model": { - "name": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", - "sha": "40d82bc951b4f39e9c9e11176334250c30975098", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.658051143450892, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16069362624502986, - "normalized_score": 16.069362624502986 - }, - "bbh": { - "name": "BBH", - "value": 0.4170704193104893, - "normalized_score": 19.44410955550794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.40106250000000004, - "normalized_score": 10.699479166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2798371010638298, - "normalized_score": 19.98190011820331 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-13", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", - "hub_license": "other", - "hub_hearts": 21, - "params_billions": 4.02, - "co2_cost": 0.9353812716104579 - } - }, - { - "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF_bfloat16_250db5cf2323e04a6d2025a2ca2b94a95c439e88_True", - "model": { - "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", - "sha": "250db5cf2323e04a6d2025a2ca2b94a95c439e88", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.90717314950126, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7380672172059026, - "normalized_score": 73.80672172059025 - }, - "bbh": { - "name": "BBH", - "value": 0.6316000668895038, - "normalized_score": 47.10953049372728 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42673716012084595, - "normalized_score": 42.6737160120846 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.4327604166666667, - "normalized_score": 13.195052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49185505319148937, - "normalized_score": 43.53945035460993 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-16", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-70B", - "hub_license": "llama3.1", - "hub_hearts": 2028, - "params_billions": 70.554, - "co2_cost": 27.257495267202522 - } - }, - { - "id": "nvidia/Minitron-4B-Base_bfloat16_d6321f64412982046a32d761701167e752fedc02_False", - "model": { - "name": "nvidia/Minitron-4B-Base", - "sha": "d6321f64412982046a32d761701167e752fedc02", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "NemotronForCausalLM", - "average_score": 11.977737055629914, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2217937295265451, - "normalized_score": 22.17937295265451 - }, - "bbh": { - "name": "BBH", - "value": 0.4083876243992497, - "normalized_score": 17.215600655061085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.413375, - "normalized_score": 9.938541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.261968085106383, - "normalized_score": 17.99645390070922 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-19", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "nvidia/Minitron-4B-Base", - "hub_license": "other", - "hub_hearts": 133, - "params_billions": 4.0, - "co2_cost": 2.3785336263128607 - } - }, - { - "id": "nvidia/Minitron-8B-Base_bfloat16_70fa5997afc42807f41eebd5d481f040556fdf97_False", - "model": { - "name": "nvidia/Minitron-8B-Base", - "sha": "70fa5997afc42807f41eebd5d481f040556fdf97", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "NemotronForCausalLM", - "average_score": 14.21649076588472, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24242676099416216, - "normalized_score": 24.242676099416215 - }, - "bbh": { - "name": "BBH", - "value": 0.43950631883576047, - "normalized_score": 22.04079297000523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.40255208333333337, - "normalized_score": 9.085677083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31806848404255317, - "normalized_score": 24.229831560283685 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-19", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "nvidia/Minitron-8B-Base", - "hub_license": "other", - "hub_hearts": 64, - "params_billions": 7.22, - "co2_cost": 2.8250414403326536 - } - }, - { - "id": "nvidia/Mistral-NeMo-Minitron-8B-Base_bfloat16_cc94637b669b62c4829b1e0c3b9074fecd883b74_False", - "model": { - "name": "nvidia/Mistral-NeMo-Minitron-8B-Base", - "sha": "cc94637b669b62c4829b1e0c3b9074fecd883b74", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.697925857529604, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19456597383830457, - "normalized_score": 19.456597383830456 - }, - "bbh": { - "name": "BBH", - "value": 0.5219098090521418, - "normalized_score": 31.822015157490153 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32550335570469796, - "normalized_score": 10.067114093959727 - }, - "musr": { - "name": "MUSR", - "value": 0.40915625000000005, - "normalized_score": 8.944531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37957114361702127, - "normalized_score": 31.063460401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-19", - "submission_date": "2024-08-22", - "generation": 0, - "base_model": "nvidia/Mistral-NeMo-Minitron-8B-Base", - "hub_license": "other", - "hub_hearts": 171, - "params_billions": 7.88, - "co2_cost": 5.115428506801799 - } - }, - { - "id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct_bfloat16_27964e305f862f9947f577332a943d7013abc30f_True", - "model": { - "name": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", - "sha": "27964e305f862f9947f577332a943d7013abc30f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.57259648330948, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5003889679384035, - "normalized_score": 50.03889679384034 - }, - "bbh": { - "name": "BBH", - "value": 0.5320919605840294, - "normalized_score": 34.126491245346166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.38857291666666666, - "normalized_score": 7.37161458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39910239361702127, - "normalized_score": 33.23359929078014 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-04", - "generation": 1, - "base_model": "nvidia/Mistral-NeMo-Minitron-8B-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 74, - "params_billions": 8.414, - "co2_cost": 3.987795925850581 - } - }, - { - "id": "nvidia/Nemotron-Mini-4B-Instruct_bfloat16_6a417790c444fd65a3da6a5c8821de6afc9654a6_True", - "model": { - "name": "nvidia/Nemotron-Mini-4B-Instruct", - "sha": "6a417790c444fd65a3da6a5c8821de6afc9654a6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "NemotronForCausalLM", - "average_score": 18.363511312885766, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6668761109411916, - "normalized_score": 66.68761109411916 - }, - "bbh": { - "name": "BBH", - "value": 0.3864840798591535, - "normalized_score": 14.203825178862052 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3767291666666666, - "normalized_score": 4.624479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26263297872340424, - "normalized_score": 18.070330969267136 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-10", - "submission_date": "2024-09-25", - "generation": 0, - "base_model": "nvidia/Nemotron-Mini-4B-Instruct", - "hub_license": "other", - "hub_hearts": 159, - "params_billions": 4.0, - "co2_cost": 2.2346275759953937 - } - }, - { - "id": "nvidia/OpenMath2-Llama3.1-8B_float16_4187cd28e77e76367261992b3274c77ffcbfd3d5_False", - "model": { - "name": "nvidia/OpenMath2-Llama3.1-8B", - "sha": "4187cd28e77e76367261992b3274c77ffcbfd3d5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.751664730325508, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23305939352030391, - "normalized_score": 23.305939352030393 - }, - "bbh": { - "name": "BBH", - "value": 0.40955241401694514, - "normalized_score": 16.294369976218587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2673716012084592, - "normalized_score": 26.73716012084592 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34355208333333337, - "normalized_score": 2.010677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15533577127659576, - "normalized_score": 6.148419030732861 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "nvidia/OpenMath2-Llama3.1-8B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 28, - "params_billions": 8.03, - "co2_cost": 2.5522116124404097 - } - }, - { - "id": "nxmwxm/Beast-Soul-new_float16_dd2ae8a96b7d088eb94a1cfa6ff84c3489e8c010_False", - "model": { - "name": "nxmwxm/Beast-Soul-new", - "sha": "dd2ae8a96b7d088eb94a1cfa6ff84c3489e8c010", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.81767341260428, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48687482546310457, - "normalized_score": 48.68748254631046 - }, - "bbh": { - "name": "BBH", - "value": 0.5227143628884523, - "normalized_score": 33.07275916855207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4459270833333333, - "normalized_score": 14.140885416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3101728723404255, - "normalized_score": 23.352541371158388 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-08-07", - "generation": 1, - "base_model": "nxmwxm/Beast-Soul-new (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3140460927306377 - } - }, - { - "id": "occiglot/occiglot-7b-es-en-instruct_float16_5858f6ee118eef70896f1870fd61052348ff571e_True", - "model": { - "name": "occiglot/occiglot-7b-es-en-instruct", - "sha": "5858f6ee118eef70896f1870fd61052348ff571e", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 12.457903975085708, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3485141646387142, - "normalized_score": 34.851416463871416 - }, - "bbh": { - "name": "BBH", - "value": 0.4110970229781084, - "normalized_score": 17.23541035561212 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37375, - "normalized_score": 4.452083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2310505319148936, - "normalized_score": 14.561170212765957 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-05", - "submission_date": "2024-09-02", - "generation": 0, - "base_model": "occiglot/occiglot-7b-es-en-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.3774762414851427 - } - }, - { - "id": "odyssey-labs/Astral-1-10B_float16_00b55cd83fb4b97cd2d83604c04bd0b96da4b26f_True", - "model": { - "name": "odyssey-labs/Astral-1-10B", - "sha": "00b55cd83fb4b97cd2d83604c04bd0b96da4b26f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.690197061048178, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38780657544204933, - "normalized_score": 38.78065754420493 - }, - "bbh": { - "name": "BBH", - "value": 0.4872563924334199, - "normalized_score": 28.313232406760367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.42797916666666663, - "normalized_score": 12.130729166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29853723404255317, - "normalized_score": 22.059692671394796 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.5137985130131593 - } - }, - { - "id": "olabs-ai/reflection_model_float16_a8b0fc584b10e0110e04f9d21c7f10d24391c1d5_False", - "model": { - "name": "olabs-ai/reflection_model", - "sha": "a8b0fc584b10e0110e04f9d21c7f10d24391c1d5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 14.079165535571102, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15986914719610634, - "normalized_score": 15.986914719610633 - }, - "bbh": { - "name": "BBH", - "value": 0.4712508645838735, - "normalized_score": 25.206881738811884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.35083333333333333, - "normalized_score": 5.754166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33111702127659576, - "normalized_score": 25.67966903073286 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-08", - "submission_date": "2024-09-08", - "generation": 0, - "base_model": "olabs-ai/reflection_model", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 9.3, - "co2_cost": 4.815086899243573 - } - }, - { - "id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train_float16_3115f5fa8573b9766a25a0e5e966b99652ecb77c_True", - "model": { - "name": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", - "sha": "3115f5fa8573b9766a25a0e5e966b99652ecb77c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.603005260709961, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2765484470094904, - "normalized_score": 27.654844700949038 - }, - "bbh": { - "name": "BBH", - "value": 0.31150775306414563, - "normalized_score": 4.336962243234709 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.345875, - "normalized_score": 3.267708333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11319813829787234, - "normalized_score": 1.466459810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-31", - "generation": 0, - "base_model": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.498, - "co2_cost": 0.7629543025609749 - } - }, - { - "id": "ontocord/RedPajama-3B-v1-AutoRedteam_float16_abfffba25b38db573761a30ee5cb2238224d3d35_False", - "model": { - "name": "ontocord/RedPajama-3B-v1-AutoRedteam", - "sha": "abfffba25b38db573761a30ee5cb2238224d3d35", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 3.5632816248649792, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13434021729012352, - "normalized_score": 13.434021729012354 - }, - "bbh": { - "name": "BBH", - "value": 0.30256825198631376, - "normalized_score": 2.9495223386158114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2424496644295302, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36606249999999996, - "normalized_score": 2.891145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1107878989361702, - "normalized_score": 1.1986554373522447 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.776, - "co2_cost": 0.3539917321904564 - } - }, - { - "id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only_float16_39445554944ee8b7b135c177d96348b5be4cea11_True", - "model": { - "name": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", - "sha": "39445554944ee8b7b135c177d96348b5be4cea11", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 3.9505437890141017, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.152475431854147, - "normalized_score": 15.247543185414699 - }, - "bbh": { - "name": "BBH", - "value": 0.3123669789182832, - "normalized_score": 3.7795558397643085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23154362416107382, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.366125, - "normalized_score": 2.9656249999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10995678191489362, - "normalized_score": 1.1063091016548463 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.776, - "co2_cost": 0.445491323704418 - } - }, - { - "id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train_float16_624ffe9d59c306768a13ae6953be54a04501f272_True", - "model": { - "name": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", - "sha": "624ffe9d59c306768a13ae6953be54a04501f272", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 6.038454723335806, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2847666414003732, - "normalized_score": 28.47666414003732 - }, - "bbh": { - "name": "BBH", - "value": 0.30927408550278385, - "normalized_score": 3.372124671371246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35796875, - "normalized_score": 2.512760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11070478723404255, - "normalized_score": 1.1894208037825047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-26", - "generation": 0, - "base_model": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 2.776, - "co2_cost": 0.66425869125511 - } - }, - { - "id": "ontocord/merged_0.2_expert_0.8_float16_dccbe9510988e82eb0025b8c02f6e866a4d90223_False", - "model": { - "name": "ontocord/merged_0.2_expert_0.8", - "sha": "dccbe9510988e82eb0025b8c02f6e866a4d90223", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.808386541486368, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17425763640473943, - "normalized_score": 17.425763640473946 - }, - "bbh": { - "name": "BBH", - "value": 0.3046000784127159, - "normalized_score": 3.2883157460305967 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.36206249999999995, - "normalized_score": 2.6911458333333322 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "ontocord/merged_0.2_expert_0.8", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.26600915283702076 - } - }, - { - "id": "ontocord/merged_0.2_expert_0.8-stack_2x_float16_2cf083ad639ee2dfe56225af554af62e3922357a_False", - "model": { - "name": "ontocord/merged_0.2_expert_0.8-stack_2x", - "sha": "2cf083ad639ee2dfe56225af554af62e3922357a", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.697285033470183, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17960345217356613, - "normalized_score": 17.960345217356615 - }, - "bbh": { - "name": "BBH", - "value": 0.30061312694162695, - "normalized_score": 2.8186720352946852 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3540625, - "normalized_score": 2.091145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11028922872340426, - "normalized_score": 1.1432476359338057 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "ontocord/merged_0.2_expert_0.8-stack_2x", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.512, - "co2_cost": 0.48560002672054386 - } - }, - { - "id": "ontocord/merged_0.5_expert_0.5_float16_2cb783ca7d4cdc171c583fbe94348d7417c2ce78_False", - "model": { - "name": "ontocord/merged_0.5_expert_0.5", - "sha": "2cb783ca7d4cdc171c583fbe94348d7417c2ce78", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.592377750754853, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1787291054402319, - "normalized_score": 17.872910544023192 - }, - "bbh": { - "name": "BBH", - "value": 0.3017011118802398, - "normalized_score": 3.1028049707296694 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.35424999999999995, - "normalized_score": 1.5145833333333325 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1107878989361702, - "normalized_score": 1.1986554373522447 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "ontocord/merged_0.5_expert_0.5", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.26981889893684985 - } - }, - { - "id": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful_float16_b48b9e6ace48c16205a8d09ccda47d9ed7cbe97b_True", - "model": { - "name": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", - "sha": "b48b9e6ace48c16205a8d09ccda47d9ed7cbe97b", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.00101442542838, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13184240038652995, - "normalized_score": 13.184240038652996 - }, - "bbh": { - "name": "BBH", - "value": 0.3004467893724157, - "normalized_score": 2.34885308225961 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36311458333333335, - "normalized_score": 3.489322916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2752249678935142 - } - }, - { - "id": "ontocord/ontocord_wide_7b-stacked-stage1_float16_791875c466470bb40c0d90297395a066c04c5029_True", - "model": { - "name": "ontocord/ontocord_wide_7b-stacked-stage1", - "sha": "791875c466470bb40c0d90297395a066c04c5029", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.907682384845091, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14845388014911545, - "normalized_score": 14.845388014911546 - }, - "bbh": { - "name": "BBH", - "value": 0.28965200351622594, - "normalized_score": 1.5650455535749657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3603541666666667, - "normalized_score": 4.5109375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11053856382978723, - "normalized_score": 1.1709515366430252 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "ontocord/ontocord_wide_7b-stacked-stage1", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.888, - "co2_cost": 0.5826571279896559 - } - }, - { - "id": "ontocord/ontocord_wide_7b-stacked-stage1-instruct_float16_a421c77d76f14dcd8add189ccae2bc15d1a63dd0_True", - "model": { - "name": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", - "sha": "a421c77d76f14dcd8add189ccae2bc15d1a63dd0", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.6656823104305456, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15302508455342934, - "normalized_score": 15.302508455342934 - }, - "bbh": { - "name": "BBH", - "value": 0.2853913447506418, - "normalized_score": 1.4889344424638544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35378125, - "normalized_score": 3.2226562499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11170212765957446, - "normalized_score": 1.300236406619384 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.888, - "co2_cost": 0.5963748017116766 - } - }, - { - "id": "ontocord/starcoder2-29b-ls_float16_5218578d74cbd7ca3a573ce2acc8f1d61e061f13_False", - "model": { - "name": "ontocord/starcoder2-29b-ls", - "sha": "5218578d74cbd7ca3a573ce2acc8f1d61e061f13", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Starcoder2ForCausalLM", - "average_score": 8.448973787058975, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21492417895628046, - "normalized_score": 21.492417895628044 - }, - "bbh": { - "name": "BBH", - "value": 0.37349755200329665, - "normalized_score": 10.973636465379014 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.36999999999999994, - "normalized_score": 3.5499999999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1869182180851064, - "normalized_score": 9.657579787234043 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-12", - "generation": 0, - "base_model": "ontocord/starcoder2-29b-ls", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 29.009, - "co2_cost": 2.5985542920611735 - } - }, - { - "id": "ontocord/starcoder2_3b-AutoRedteam_float16_5369e550124b39b17a6350d3e77a696329460c7b_False", - "model": { - "name": "ontocord/starcoder2_3b-AutoRedteam", - "sha": "5369e550124b39b17a6350d3e77a696329460c7b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Starcoder2ForCausalLM", - "average_score": 5.416510549136297, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15737133029251277, - "normalized_score": 15.737133029251275 - }, - "bbh": { - "name": "BBH", - "value": 0.3497644619743598, - "normalized_score": 8.637687220647841 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.3645729166666667, - "normalized_score": 3.1049479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13364361702127658, - "normalized_score": 3.7381796690307305 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "ontocord/starcoder2_3b-AutoRedteam", - "hub_license": "bigscience-openrail-m", - "hub_hearts": 0, - "params_billions": 3.181, - "co2_cost": 0.47940932554391447 - } - }, - { - "id": "ontocord/wide_3b-merge_test_float16_8d49cfe516485cc9dc177c0319736902c2eaa09b_True", - "model": { - "name": "ontocord/wide_3b-merge_test", - "sha": "8d49cfe516485cc9dc177c0319736902c2eaa09b", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.9416429369423778, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17628115622104903, - "normalized_score": 17.628115622104904 - }, - "bbh": { - "name": "BBH", - "value": 0.3011467446788138, - "normalized_score": 2.9348182406841157 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.342, - "normalized_score": 2.3499999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10663231382978723, - "normalized_score": 0.7369237588652473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b-merge_test", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2729645023063523 - } - }, - { - "id": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained_float16_e92c4a6984f7ef2c36338e8c21b55f0017fb7102_True", - "model": { - "name": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", - "sha": "e92c4a6984f7ef2c36338e8c21b55f0017fb7102", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.323686258344847, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13946107439371977, - "normalized_score": 13.946107439371977 - }, - "bbh": { - "name": "BBH", - "value": 0.30036095049490824, - "normalized_score": 2.5825845687939295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.36320833333333336, - "normalized_score": 4.067708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11402925531914894, - "normalized_score": 1.5588061465721037 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-27", - "generation": 0, - "base_model": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.27350878987754895 - } - }, - { - "id": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge_float16_e413857a7181e403b1ab0a444d626d3211cfd417_False", - "model": { - "name": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", - "sha": "e413857a7181e403b1ab0a444d626d3211cfd417", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.866410642270219, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16636413604790845, - "normalized_score": 16.636413604790846 - }, - "bbh": { - "name": "BBH", - "value": 0.30309127879396963, - "normalized_score": 3.1835355510761367 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3845416666666666, - "normalized_score": 5.667708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.26813200193851927 - } - }, - { - "id": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge_float16_195789ff05c22b511c05cd851666fad67ccce173_False", - "model": { - "name": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", - "sha": "195789ff05c22b511c05cd851666fad67ccce173", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.700634702140898, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16973629968483622, - "normalized_score": 16.973629968483625 - }, - "bbh": { - "name": "BBH", - "value": 0.2975125970659158, - "normalized_score": 2.5518024734392255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.37781249999999994, - "normalized_score": 4.593229166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1124501329787234, - "normalized_score": 1.383348108747044 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.26694897301946985 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue_float16_10b3121e0ea5a94c5ac712522441d59cdb5beb0b_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", - "sha": "10b3121e0ea5a94c5ac712522441d59cdb5beb0b", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.6361259036379225, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14800396281865452, - "normalized_score": 14.800396281865451 - }, - "bbh": { - "name": "BBH", - "value": 0.30953444521357315, - "normalized_score": 3.3516775197492783 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3579375, - "normalized_score": 3.7421875000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1107878989361702, - "normalized_score": 1.1986554373522447 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.27730493487784447 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue_float16_53427374dcdc3b0c6a67daf89e6653e1733d9b51_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", - "sha": "53427374dcdc3b0c6a67daf89e6653e1733d9b51", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.124470876297795, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12367407368005781, - "normalized_score": 12.367407368005782 - }, - "bbh": { - "name": "BBH", - "value": 0.3060091508023586, - "normalized_score": 3.3670533974072723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3672708333333334, - "normalized_score": 3.475520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2774853861444414 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue_float16_b8e967c6f14b5a16eacdd25d62bead5eb4a34f07_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", - "sha": "b8e967c6f14b5a16eacdd25d62bead5eb4a34f07", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.633703566106593, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1191527369601546, - "normalized_score": 11.915273696015461 - }, - "bbh": { - "name": "BBH", - "value": 0.2955590587949957, - "normalized_score": 2.2297571883124263 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.35530208333333335, - "normalized_score": 3.0460937500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11826795212765957, - "normalized_score": 2.0297724586288406 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2787363319062327 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue_bfloat16_14c37892622d92668836b3fdd54af2eed0acf1c1_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "sha": "14c37892622d92668836b3fdd54af2eed0acf1c1", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.8641325845274888, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1128328390891723, - "normalized_score": 11.283283908917229 - }, - "bbh": { - "name": "BBH", - "value": 0.3171441625189962, - "normalized_score": 4.415068812630222 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26845637583892623, - "normalized_score": 2.460850111856831 - }, - "musr": { - "name": "MUSR", - "value": 0.34603125, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11294880319148937, - "normalized_score": 1.4387559101654845 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-02", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.31105493523034483 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue_float16_0d15e00653a0c3f9e7994873d1ffbbc7580f051a_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "sha": "0d15e00653a0c3f9e7994873d1ffbbc7580f051a", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.7127305352192885, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1161551350416894, - "normalized_score": 11.61551350416894 - }, - "bbh": { - "name": "BBH", - "value": 0.3184343946486203, - "normalized_score": 4.554520467313206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.34469791666666666, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11236702127659574, - "normalized_score": 1.374113475177304 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.5560188956870915 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue_float16_09658cf698c378e6c02afdf867024a8529c53cbc_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", - "sha": "09658cf698c378e6c02afdf867024a8529c53cbc", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.284721666227881, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13169279733329786, - "normalized_score": 13.169279733329788 - }, - "bbh": { - "name": "BBH", - "value": 0.30640062669813056, - "normalized_score": 2.93878288661528 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34460416666666666, - "normalized_score": 5.075520833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11444481382978723, - "normalized_score": 1.6049793144208027 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2798149673720302 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue_float16_55a5b5212c6be3133e04626dfef338c2c7bd9e52_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", - "sha": "55a5b5212c6be3133e04626dfef338c2c7bd9e52", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.670762265147779, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.118178654857999, - "normalized_score": 11.8178654857999 - }, - "bbh": { - "name": "BBH", - "value": 0.3037498354512724, - "normalized_score": 2.9977998836734763 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.35669791666666667, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11619015957446809, - "normalized_score": 1.798906619385342 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.27235854321476405 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue_float16_1608275d180108e1b692fbaecae7cd19d0a48445_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", - "sha": "1608275d180108e1b692fbaecae7cd19d0a48445", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.5261580781434336, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12399876771410967, - "normalized_score": 12.399876771410966 - }, - "bbh": { - "name": "BBH", - "value": 0.30324371251012056, - "normalized_score": 3.275690229364662 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.34869791666666666, - "normalized_score": 2.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11278257978723404, - "normalized_score": 1.4202866430260035 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.272251793359482 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue_float16_411920369fdde84e168c9821ffb3a9cc1a260d0c_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", - "sha": "411920369fdde84e168c9821ffb3a9cc1a260d0c", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.767499605566051, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12981888057022034, - "normalized_score": 12.981888057022035 - }, - "bbh": { - "name": "BBH", - "value": 0.30518984588252307, - "normalized_score": 3.133656346349717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.39276041666666667, - "normalized_score": 7.928385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.27039480744781635 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue_float16_83824d233440e268368bc7b2ce41fb0f2c939574_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", - "sha": "83824d233440e268368bc7b2ce41fb0f2c939574", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.093186530380667, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20490742341431845, - "normalized_score": 20.490742341431847 - }, - "bbh": { - "name": "BBH", - "value": 0.2911778102988436, - "normalized_score": 2.3470409575204094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35753125, - "normalized_score": 4.524739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11668882978723404, - "normalized_score": 1.854314420803781 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-02", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2923910617635221 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical_float16_efe7ecbb26b6e3a02645587382532d5869325506_False", - "model": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", - "sha": "efe7ecbb26b6e3a02645587382532d5869325506", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.837021332568196, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.146105666298754, - "normalized_score": 14.610566629875397 - }, - "bbh": { - "name": "BBH", - "value": 0.29981162881428614, - "normalized_score": 2.477490056885344 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.39257291666666666, - "normalized_score": 7.10494791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1141123670212766, - "normalized_score": 1.5680407801418434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2758668885395803 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text_float16_1343c0ad6de8bba0e926a12bc839cdfed9336d2d_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", - "sha": "1343c0ad6de8bba0e926a12bc839cdfed9336d2d", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.304343127317076, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14872870649875664, - "normalized_score": 14.872870649875662 - }, - "bbh": { - "name": "BBH", - "value": 0.3068950688059236, - "normalized_score": 3.7308056255712647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.34739583333333335, - "normalized_score": 2.824479166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11461103723404255, - "normalized_score": 1.6234485815602824 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2704957114431379 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to_float16_2a9729fe267aa6a070236ebde081d11b02d4b42b_True", - "model": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", - "sha": "2a9729fe267aa6a070236ebde081d11b02d4b42b", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.2152701749908585, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12454842041339201, - "normalized_score": 12.454842041339202 - }, - "bbh": { - "name": "BBH", - "value": 0.3047398483929371, - "normalized_score": 3.8140871520054684 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.36581250000000004, - "normalized_score": 4.6598958333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11527593085106383, - "normalized_score": 1.6973256501182026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.5351948830890698 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math_float16_92eb1ef051529df71a66f1c7841781dcf9cbd4e7_False", - "model": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", - "sha": "92eb1ef051529df71a66f1c7841781dcf9cbd4e7", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.136738708504239, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19151850423542865, - "normalized_score": 19.151850423542868 - }, - "bbh": { - "name": "BBH", - "value": 0.3059577262726771, - "normalized_score": 3.1664911884922264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37003125, - "normalized_score": 3.4539062499999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10920877659574468, - "normalized_score": 1.0231973995271864 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.5454428641010042 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news_float16_303215f83d50a86121de57540d1285f592bc37ff_False", - "model": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", - "sha": "303215f83d50a86121de57540d1285f592bc37ff", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.449975118096195, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16581448334862608, - "normalized_score": 16.58144833486261 - }, - "bbh": { - "name": "BBH", - "value": 0.2925879483112595, - "normalized_score": 1.9437982768865123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.36209375000000005, - "normalized_score": 2.9283854166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2639636588021562 - } - }, - { - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software_float16_d3e034d69b18ca2ed506ff262c63ec8e1cf000bc_False", - "model": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", - "sha": "d3e034d69b18ca2ed506ff262c63ec8e1cf000bc", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.290233092582329, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1733832896714052, - "normalized_score": 17.33832896714052 - }, - "bbh": { - "name": "BBH", - "value": 0.2979956844198214, - "normalized_score": 2.4994883234192558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.35685416666666664, - "normalized_score": 1.6401041666666656 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11402925531914894, - "normalized_score": 1.5588061465721037 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-05", - "submission_date": "2025-03-05", - "generation": 0, - "base_model": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.759, - "co2_cost": 0.2774434066210583 - } - }, - { - "id": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked_float16_a567e6144b581cdca5917b5d5d9acf9b6023e1a3_False", - "model": { - "name": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", - "sha": "a567e6144b581cdca5917b5d5d9acf9b6023e1a3", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.97781016722033, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.12439881736015992, - "normalized_score": 12.439881736015993 - }, - "bbh": { - "name": "BBH", - "value": 0.30264484636677236, - "normalized_score": 3.014694718783778 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.3686354166666667, - "normalized_score": 3.579427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11145279255319149, - "normalized_score": 1.2725325059101646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-07", - "generation": 0, - "base_model": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.888, - "co2_cost": 0.5821794147377032 - } - }, - { - "id": "oobabooga/CodeBooga-34B-v0.1_float16_8a4e1e16ac46333cbd0c17d733d3d70a956071a6_True", - "model": { - "name": "oobabooga/CodeBooga-34B-v0.1", - "sha": "8a4e1e16ac46333cbd0c17d733d3d70a956071a6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.66170616238157, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5250180631834643, - "normalized_score": 52.50180631834643 - }, - "bbh": { - "name": "BBH", - "value": 0.3427441185661722, - "normalized_score": 8.562465862636055 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.43102083333333335, - "normalized_score": 12.977604166666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23595412234042554, - "normalized_score": 15.106013593380615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-19", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "oobabooga/CodeBooga-34B-v0.1", - "hub_license": "llama2", - "hub_hearts": 145, - "params_billions": 33.744, - "co2_cost": 4.174007137499851 - } - }, - { - "id": "oopere/Llama-FinSent-S_float16_0740011dfde2d1a23150dc214e7d74d65512b557_False", - "model": { - "name": "oopere/Llama-FinSent-S", - "sha": "0740011dfde2d1a23150dc214e7d74d65512b557", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.811805794594786, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21187670935340452, - "normalized_score": 21.18767093534045 - }, - "bbh": { - "name": "BBH", - "value": 0.31562055310321474, - "normalized_score": 4.1560152972776905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3832395833333333, - "normalized_score": 5.371614583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11303191489361702, - "normalized_score": 1.4479905437352243 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "oopere/Llama-FinSent-S (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 5, - "params_billions": 0.914, - "co2_cost": 0.36740712634287537 - } - }, - { - "id": "oopere/Llama-FinSent-S_bfloat16_9d9a76e8910865573e7c25c8d9d250355f2ece86_False", - "model": { - "name": "oopere/Llama-FinSent-S", - "sha": "9d9a76e8910865573e7c25c8d9d250355f2ece86", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.86640376735036, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2163980460733077, - "normalized_score": 21.63980460733077 - }, - "bbh": { - "name": "BBH", - "value": 0.3169254117559263, - "normalized_score": 4.30733068735382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3831770833333333, - "normalized_score": 5.363802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11336436170212766, - "normalized_score": 1.4849290780141835 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "oopere/Llama-FinSent-S (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 5, - "params_billions": 0.914, - "co2_cost": 0.3679716763261966 - } - }, - { - "id": "oopere/pruned10-llama-3.2-3B_float16_5958def83347d0a8f8b95d27e7cdff37329b988c_False", - "model": { - "name": "oopere/pruned10-llama-3.2-3B", - "sha": "5958def83347d0a8f8b95d27e7cdff37329b988c", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.919943456537481, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17762980004166723, - "normalized_score": 17.76298000416672 - }, - "bbh": { - "name": "BBH", - "value": 0.3340421117164456, - "normalized_score": 7.759477124183007 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3721666666666667, - "normalized_score": 4.687499999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16397938829787234, - "normalized_score": 7.1088209219858145 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "oopere/pruned10-llama-3.2-3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.001, - "co2_cost": 1.3210679670165064 - } - }, - { - "id": "oopere/pruned20-llama-1b_float16_3351c9a062055ce6c16dd2c9f0c229fb5dd7396b_False", - "model": { - "name": "oopere/pruned20-llama-1b", - "sha": "3351c9a062055ce6c16dd2c9f0c229fb5dd7396b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.989519628599489, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19936213690784896, - "normalized_score": 19.936213690784896 - }, - "bbh": { - "name": "BBH", - "value": 0.30313627830972034, - "normalized_score": 3.185394259848986 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36314583333333333, - "normalized_score": 4.393229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "oopere/pruned20-llama-1b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.075, - "co2_cost": 0.8029560920393041 - } - }, - { - "id": "oopere/pruned20-llama-3.2-3b_float16_e92642870b0ad66e589889305608f422ee9be975_False", - "model": { - "name": "oopere/pruned20-llama-3.2-3b", - "sha": "e92642870b0ad66e589889305608f422ee9be975", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.656559779770029, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17887870849346402, - "normalized_score": 17.887870849346402 - }, - "bbh": { - "name": "BBH", - "value": 0.32478483912909756, - "normalized_score": 6.3327454652272905 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.34184375, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12799202127659576, - "normalized_score": 3.1102245862884166 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "oopere/pruned20-llama-3.2-3b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 2.79, - "co2_cost": 1.212079009731279 - } - }, - { - "id": "oopere/pruned40-llama-1b_float16_3de470d9c61cb57cea821e93b43fb250aa14b975_False", - "model": { - "name": "oopere/pruned40-llama-1b", - "sha": "3de470d9c61cb57cea821e93b43fb250aa14b975", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.608357202270273, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22843832143157933, - "normalized_score": 22.843832143157933 - }, - "bbh": { - "name": "BBH", - "value": 0.29691563801419935, - "normalized_score": 2.6553089313766187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4286666666666667, - "normalized_score": 12.483333333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10821143617021277, - "normalized_score": 0.912381796690307 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "oopere/pruned40-llama-1b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 0.914, - "co2_cost": 0.7532425709708284 - } - }, - { - "id": "oopere/pruned40-llama-3.2-1B_bfloat16_fb1abfc3dedee4f37fdcd465881ffe9fd8d87060_False", - "model": { - "name": "oopere/pruned40-llama-3.2-1B", - "sha": "fb1abfc3dedee4f37fdcd465881ffe9fd8d87060", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.877693765654211, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22663976028050017, - "normalized_score": 22.663976028050016 - }, - "bbh": { - "name": "BBH", - "value": 0.2982489713475327, - "normalized_score": 2.7012729836642007 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.43523958333333335, - "normalized_score": 13.238281250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11145279255319149, - "normalized_score": 1.2725325059101646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "oopere/pruned40-llama-3.2-1B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 0.914, - "co2_cost": 0.3781415746182163 - } - }, - { - "id": "oopere/pruned40-llama-3.2-3b_float16_ceb2073cda2f21afa10efcbae74583fc9b319d54_False", - "model": { - "name": "oopere/pruned40-llama-3.2-3b", - "sha": "ceb2073cda2f21afa10efcbae74583fc9b319d54", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.371284651830273, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21829634259320824, - "normalized_score": 21.829634259320827 - }, - "bbh": { - "name": "BBH", - "value": 0.31671170280977073, - "normalized_score": 4.740101545945625 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.22986577181208054, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3539375, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11768617021276596, - "normalized_score": 1.965130023640662 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "oopere/pruned40-llama-3.2-3b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 2.367, - "co2_cost": 1.1953346374394476 - } - }, - { - "id": "oopere/pruned60-llama-1b_float16_86b157256928b50ee07cc3cf5b3884b70062f2fe_False", - "model": { - "name": "oopere/pruned60-llama-1b", - "sha": "86b157256928b50ee07cc3cf5b3884b70062f2fe", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.46756672622325, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18285039251408486, - "normalized_score": 18.285039251408485 - }, - "bbh": { - "name": "BBH", - "value": 0.3016193474185398, - "normalized_score": 2.9425264807533544 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.40879166666666666, - "normalized_score": 9.432291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11727061170212766, - "normalized_score": 1.9189568557919614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-25", - "generation": 1, - "base_model": "oopere/pruned60-llama-1b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 0.753, - "co2_cost": 0.7649757630301478 - } - }, - { - "id": "oopere/pruned60-llama-3.2-3b_float16_c8c061d55288274a59205fa740b51a951ca93335_False", - "model": { - "name": "oopere/pruned60-llama-3.2-3b", - "sha": "c8c061d55288274a59205fa740b51a951ca93335", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.128680683633621, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1824758307956223, - "normalized_score": 18.247583079562226 - }, - "bbh": { - "name": "BBH", - "value": 0.31662597093352013, - "normalized_score": 3.9884019153468855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3633333333333333, - "normalized_score": 4.016666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11311502659574468, - "normalized_score": 1.457225177304964 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "oopere/pruned60-llama-3.2-3b (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 1.944, - "co2_cost": 1.241768441814752 - } - }, - { - "id": "open-atlas/Atlas-Flash-1.5B-Preview_float16_160bf22e66b286a8ae7887a86fb21d7c49f7473e_True", - "model": { - "name": "open-atlas/Atlas-Flash-1.5B-Preview", - "sha": "160bf22e66b286a8ae7887a86fb21d7c49f7473e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.111374803615627, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3269569187533522, - "normalized_score": 32.69569187533522 - }, - "bbh": { - "name": "BBH", - "value": 0.3215460102660847, - "normalized_score": 5.6543805979986255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.34879166666666667, - "normalized_score": 1.698958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13738364361702127, - "normalized_score": 4.153738179669029 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "open-atlas/Atlas-Flash-1.5B-Preview (Merge)", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 1.777, - "co2_cost": 1.210487794649369 - } - }, - { - "id": "open-atlas/Atlas-Flash-7B-Preview_float16_d3d9a1e00c9c95e961ec8ec5f8e64e00b2cdb3a9_True", - "model": { - "name": "open-atlas/Atlas-Flash-7B-Preview", - "sha": "d3d9a1e00c9c95e961ec8ec5f8e64e00b2cdb3a9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.496191215913385, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3907543096761038, - "normalized_score": 39.07543096761039 - }, - "bbh": { - "name": "BBH", - "value": 0.3541994356643969, - "normalized_score": 9.39485410104266 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25755287009063443, - "normalized_score": 25.755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.38358333333333333, - "normalized_score": 5.78125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27842420212765956, - "normalized_score": 19.824911347517727 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "open-atlas/Atlas-Flash-7B-Preview (Merge)", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.3441327579015 - } - }, - { - "id": "open-neo/Kyro-n1-3B_float16_dc34677fa2a29372519e8e5fb339efd865d7ee76_True", - "model": { - "name": "open-neo/Kyro-n1-3B", - "sha": "dc34677fa2a29372519e8e5fb339efd865d7ee76", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.492573830963973, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45949746672163194, - "normalized_score": 45.94974667216319 - }, - "bbh": { - "name": "BBH", - "value": 0.46853756471175373, - "normalized_score": 25.78921982449477 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2854984894259819, - "normalized_score": 28.54984894259819 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.40879166666666666, - "normalized_score": 9.498958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34225398936170215, - "normalized_score": 26.917109929078016 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "open-neo/Kyro-n1-3B (Merge)", - "hub_license": "other", - "hub_hearts": 12, - "params_billions": 3.086, - "co2_cost": 0.7749305983569827 - } - }, - { - "id": "open-neo/Kyro-n1-7B_float16_86c48c5dc7fbeb7ce3ebf605608fc69985d2b0ee_True", - "model": { - "name": "open-neo/Kyro-n1-7B", - "sha": "86c48c5dc7fbeb7ce3ebf605608fc69985d2b0ee", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.918698187681176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5572669406064796, - "normalized_score": 55.72669406064797 - }, - "bbh": { - "name": "BBH", - "value": 0.5386561160683788, - "normalized_score": 34.40152837379154 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38972809667673713, - "normalized_score": 38.972809667673715 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.38841666666666663, - "normalized_score": 5.91875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.433344414893617, - "normalized_score": 37.038268321512994 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "open-neo/Kyro-n1-7B (Merge)", - "hub_license": "mit", - "hub_hearts": 6, - "params_billions": 7.616, - "co2_cost": 0.7370336620638953 - } - }, - { - "id": "open-thoughts/OpenThinker-7B_float16_5a931fd3fa8618acda2da8eaec4a3f10ee009739_True", - "model": { - "name": "open-thoughts/OpenThinker-7B", - "sha": "5a931fd3fa8618acda2da8eaec4a3f10ee009739", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.578519435108664, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4088895242401273, - "normalized_score": 40.88895242401273 - }, - "bbh": { - "name": "BBH", - "value": 0.5342727589615611, - "normalized_score": 34.50881791580879 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4259818731117825, - "normalized_score": 42.59818731117825 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.38199999999999995, - "normalized_score": 5.416666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41647273936170215, - "normalized_score": 35.163637706855795 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-02-14", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 126, - "params_billions": 7.616, - "co2_cost": 0.6851186403453313 - } - }, - { - "id": "openai-community/gpt2_bfloat16_607a30d783dfa663caf39e06633721c8d4cfcd7e_False", - "model": { - "name": "openai-community/gpt2", - "sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 6.510807087761722, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17925327021192655, - "normalized_score": 17.925327021192658 - }, - "bbh": { - "name": "BBH", - "value": 0.3035711244213359, - "normalized_score": 2.674981367986987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.44705208333333335, - "normalized_score": 15.348177083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11594082446808511, - "normalized_score": 1.7712027186761226 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openai-community/gpt2", - "hub_license": "mit", - "hub_hearts": 2628, - "params_billions": 0.137, - "co2_cost": 0.0859412568146148 - } - }, - { - "id": "openai-community/gpt2_float16_607a30d783dfa663caf39e06633721c8d4cfcd7e_False", - "model": { - "name": "openai-community/gpt2", - "sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 6.33423541829189, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17795449407571912, - "normalized_score": 17.795449407571912 - }, - "bbh": { - "name": "BBH", - "value": 0.30165801067653053, - "normalized_score": 2.8159113095085133 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.43902083333333336, - "normalized_score": 13.910937500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11652260638297872, - "normalized_score": 1.8358451536643017 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-08-12", - "generation": 0, - "base_model": "openai-community/gpt2", - "hub_license": "mit", - "hub_hearts": 2628, - "params_billions": 0.137, - "co2_cost": 0.23477379049762206 - } - }, - { - "id": "openai-community/gpt2-large_bfloat16_32b71b12589c2f8d625668d2335a01cac3249519_False", - "model": { - "name": "openai-community/gpt2-large", - "sha": "32b71b12589c2f8d625668d2335a01cac3249519", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.567707192929642, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20478220011790937, - "normalized_score": 20.47822001179094 - }, - "bbh": { - "name": "BBH", - "value": 0.30688418760118824, - "normalized_score": 3.2537905449787403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015104, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3788645833333333, - "normalized_score": 5.658072916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openai-community/gpt2-large", - "hub_license": "mit", - "hub_hearts": 301, - "params_billions": 0.812, - "co2_cost": 0.36092447686323786 - } - }, - { - "id": "openai-community/gpt2-medium_bfloat16_6dcaa7a952f72f9298047fd5137cd6e4f05f41da_False", - "model": { - "name": "openai-community/gpt2-medium", - "sha": "6dcaa7a952f72f9298047fd5137cd6e4f05f41da", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.902340287154445, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22084402718121252, - "normalized_score": 22.08440271812125 - }, - "bbh": { - "name": "BBH", - "value": 0.3050280232176266, - "normalized_score": 2.719972238356244 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.3884479166666666, - "normalized_score": 6.1559895833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11818484042553191, - "normalized_score": 2.020537825059101 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openai-community/gpt2-medium", - "hub_license": "mit", - "hub_hearts": 171, - "params_billions": 0.38, - "co2_cost": 0.24212383000119167 - } - }, - { - "id": "openai-community/gpt2-xl_bfloat16_15ea56dee5df4983c59b2538573817e1667135e2_False", - "model": { - "name": "openai-community/gpt2-xl", - "sha": "15ea56dee5df4983c59b2538573817e1667135e2", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 5.093480678758688, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20385798570016445, - "normalized_score": 20.385798570016444 - }, - "bbh": { - "name": "BBH", - "value": 0.30085761123260785, - "normalized_score": 2.580960647452716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.37095833333333333, - "normalized_score": 4.036458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11311502659574468, - "normalized_score": 1.457225177304964 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-03-02", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openai-community/gpt2-xl", - "hub_license": "mit", - "hub_hearts": 332, - "params_billions": 1.608, - "co2_cost": 0.43062739967696967 - } - }, - { - "id": "openbmb/MiniCPM-S-1B-sft-llama-format_bfloat16_7de07f8895c168a7ee01f624f50c44f6966c9735_True", - "model": { - "name": "openbmb/MiniCPM-S-1B-sft-llama-format", - "sha": "7de07f8895c168a7ee01f624f50c44f6966c9735", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.996065699730169, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3328767669782843, - "normalized_score": 33.28767669782843 - }, - "bbh": { - "name": "BBH", - "value": 0.30493136322070497, - "normalized_score": 3.898455214242885 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.33167708333333334, - "normalized_score": 1.359635416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1858377659574468, - "normalized_score": 9.537529550827422 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-14", - "submission_date": "2024-11-19", - "generation": 0, - "base_model": "openbmb/MiniCPM-S-1B-sft-llama-format", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 1.0, - "co2_cost": 1.0800736949422725 - } - }, - { - "id": "openchat/openchat-3.5-0106_bfloat16_ff058fda49726ecf4ea53dc1635f917cdb8ba36b_True", - "model": { - "name": "openchat/openchat-3.5-0106", - "sha": "ff058fda49726ecf4ea53dc1635f917cdb8ba36b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.70925524673515, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5966590867786362, - "normalized_score": 59.66590867786363 - }, - "bbh": { - "name": "BBH", - "value": 0.46169787083960595, - "normalized_score": 24.03871121391158 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.42543749999999997, - "normalized_score": 11.746354166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3291223404255319, - "normalized_score": 25.458037825059098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-07", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 354, - "params_billions": 7.242, - "co2_cost": 2.962835689481651 - } - }, - { - "id": "openchat/openchat-3.5-1210_bfloat16_801f5459b7577241500785f11c2b026912badd6e_True", - "model": { - "name": "openchat/openchat-3.5-1210", - "sha": "801f5459b7577241500785f11c2b026912badd6e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.727849608659103, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.603678240402133, - "normalized_score": 60.3678240402133 - }, - "bbh": { - "name": "BBH", - "value": 0.4535356846447984, - "normalized_score": 23.236296582166464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4414375, - "normalized_score": 14.279687500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3142453457446808, - "normalized_score": 23.805038416075647 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-12", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 272, - "params_billions": 7.242, - "co2_cost": 1.0329021207593168 - } - }, - { - "id": "openchat/openchat-3.6-8b-20240522_bfloat16_2264eb98558978f708e88ae52afb78e43b832801_True", - "model": { - "name": "openchat/openchat-3.6-8b-20240522", - "sha": "2264eb98558978f708e88ae52afb78e43b832801", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.10731592675394, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5343355629729118, - "normalized_score": 53.43355629729119 - }, - "bbh": { - "name": "BBH", - "value": 0.5338412089001999, - "normalized_score": 33.23293691836929 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.3998541666666667, - "normalized_score": 8.181770833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32288896276595747, - "normalized_score": 24.765440307328607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-07", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 152, - "params_billions": 8.03, - "co2_cost": 4.349911793665565 - } - }, - { - "id": "openchat/openchat_3.5_bfloat16_0fc98e324280bc4bf5d2c30ecf7b97b84fb8a19b_True", - "model": { - "name": "openchat/openchat_3.5", - "sha": "0fc98e324280bc4bf5d2c30ecf7b97b84fb8a19b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.635827111564595, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5931118321608887, - "normalized_score": 59.31118321608887 - }, - "bbh": { - "name": "BBH", - "value": 0.44263196862832893, - "normalized_score": 21.58216684769999 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.4228645833333333, - "normalized_score": 11.258072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31532579787234044, - "normalized_score": 23.92508865248227 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openchat/openchat_3.5", - "hub_license": "apache-2.0", - "hub_hearts": 1120, - "params_billions": 7.0, - "co2_cost": 1.002421182361135 - } - }, - { - "id": "openchat/openchat_v3.2_bfloat16_acc7ce92558681e749678648189812f15c1465fe_False", - "model": { - "name": "openchat/openchat_v3.2", - "sha": "acc7ce92558681e749678648189812f15c1465fe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.833145550526877, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2980558252104416, - "normalized_score": 29.805582521044165 - }, - "bbh": { - "name": "BBH", - "value": 0.4330564283474314, - "normalized_score": 20.32300299720885 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.433625, - "normalized_score": 13.103125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2421875, - "normalized_score": 15.79861111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-30", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openchat/openchat_v3.2", - "hub_license": "llama2", - "hub_hearts": 42, - "params_billions": 13.0, - "co2_cost": 10.604910103456026 - } - }, - { - "id": "openchat/openchat_v3.2_super_bfloat16_9479cc37d43234a57a33628637d1aca0293d745a_False", - "model": { - "name": "openchat/openchat_v3.2_super", - "sha": "9479cc37d43234a57a33628637d1aca0293d745a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.92357500340055, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2861906408329898, - "normalized_score": 28.61906408329898 - }, - "bbh": { - "name": "BBH", - "value": 0.42212089838803973, - "normalized_score": 19.153539587477535 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.41613541666666665, - "normalized_score": 9.916927083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24251994680851063, - "normalized_score": 15.83554964539007 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-04", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "openchat/openchat_v3.2_super", - "hub_license": "llama2", - "hub_hearts": 35, - "params_billions": 13.0, - "co2_cost": 10.055387243693328 - } - }, - { - "id": "orai-nlp/Llama-eus-8B_bfloat16_75b5645d222047b517a7a9190922ea1b5382c71f_False", - "model": { - "name": "orai-nlp/Llama-eus-8B", - "sha": "75b5645d222047b517a7a9190922ea1b5382c71f", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.943754358263163, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21612321972366655, - "normalized_score": 21.612321972366654 - }, - "bbh": { - "name": "BBH", - "value": 0.4418245490788701, - "normalized_score": 20.96137115221118 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.3918854166666667, - "normalized_score": 8.285677083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30576795212765956, - "normalized_score": 22.86310579196217 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "", - "hub_hearts": 9, - "params_billions": 8.03, - "co2_cost": 1.7385151629295175 - } - }, - { - "id": "oxyapi/oxy-1-small_bfloat16_0d100cf65c8574b025b499dd787d8bcbcf678418_True", - "model": { - "name": "oxyapi/oxy-1-small", - "sha": "0d100cf65c8574b025b499dd787d8bcbcf678418", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.10082946641055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6244608749229821, - "normalized_score": 62.44608749229821 - }, - "bbh": { - "name": "BBH", - "value": 0.5884593784818278, - "normalized_score": 41.1754471888895 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.4486666666666667, - "normalized_score": 16.28333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5000831117021277, - "normalized_score": 44.45367907801419 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "oxyapi/oxy-1-small (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 80, - "params_billions": 14.77, - "co2_cost": 2.7738172204911873 - } - }, - { - "id": "ozone-ai/0x-lite_float16_0b8888d3aa74b127e8e33c27306c05c7f0956bd3_True", - "model": { - "name": "ozone-ai/0x-lite", - "sha": "0b8888d3aa74b127e8e33c27306c05c7f0956bd3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.48460307410717, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7739874643723099, - "normalized_score": 77.39874643723098 - }, - "bbh": { - "name": "BBH", - "value": 0.6340580988016683, - "normalized_score": 47.5284725075208 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5045317220543807, - "normalized_score": 50.453172205438065 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31963087248322153, - "normalized_score": 9.284116331096204 - }, - "musr": { - "name": "MUSR", - "value": 0.4220625, - "normalized_score": 11.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5183676861702128, - "normalized_score": 46.485298463356976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "ozone-ai/0x-lite (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 61, - "params_billions": 14.77, - "co2_cost": 3.4526879760369664 - } - }, - { - "id": "ozone-research/Chirp-01_float16_eae888f412b7088e8d621b1da2d588944236a14b_True", - "model": { - "name": "ozone-research/Chirp-01", - "sha": "eae888f412b7088e8d621b1da2d588944236a14b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.252602808689176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6347524568145853, - "normalized_score": 63.47524568145853 - }, - "bbh": { - "name": "BBH", - "value": 0.4649560260501419, - "normalized_score": 25.038330362981494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3466767371601209, - "normalized_score": 34.66767371601209 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2718120805369128, - "normalized_score": 2.908277404921707 - }, - "musr": { - "name": "MUSR", - "value": 0.4487291666666667, - "normalized_score": 15.557812499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3508144946808511, - "normalized_score": 27.868277186761226 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "ozone-research/Chirp-01 (Merge)", - "hub_license": "other", - "hub_hearts": 13, - "params_billions": 3.086, - "co2_cost": 0.7157807215491061 - } - }, - { - "id": "paloalma/ECE-TW3-JRGL-V1_float16_2f08c7ab9db03b1b9f455c7beee6a41e99aa910e_False", - "model": { - "name": "paloalma/ECE-TW3-JRGL-V1", - "sha": "2f08c7ab9db03b1b9f455c7beee6a41e99aa910e", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.236001116528566, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5534947273235016, - "normalized_score": 55.349472732350165 - }, - "bbh": { - "name": "BBH", - "value": 0.6283667540784627, - "normalized_score": 46.69713905397109 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.46208333333333335, - "normalized_score": 17.460416666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.422124335106383, - "normalized_score": 35.791592789598106 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-03", - "submission_date": "2024-08-04", - "generation": 0, - "base_model": "paloalma/ECE-TW3-JRGL-V1", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 68.977, - "co2_cost": 12.383388365937348 - } - }, - { - "id": "paloalma/ECE-TW3-JRGL-V2_bfloat16_f2c15045f1a7a7a34540ab18abcee8a566a74ca6_False", - "model": { - "name": "paloalma/ECE-TW3-JRGL-V2", - "sha": "f2c15045f1a7a7a34540ab18abcee8a566a74ca6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.79271499886927, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2254894790267601, - "normalized_score": 22.54894790267601 - }, - "bbh": { - "name": "BBH", - "value": 0.6030988136029874, - "normalized_score": 43.17326773447519 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18504531722054382, - "normalized_score": 18.50453172205438 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.47932291666666665, - "normalized_score": 19.815364583333327 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4587765957446808, - "normalized_score": 39.86406619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-04", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "paloalma/ECE-TW3-JRGL-V2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 72.288, - "co2_cost": 25.09249892397867 - } - }, - { - "id": "paloalma/ECE-TW3-JRGL-V5_bfloat16_4061fa10de22945790cad825f7f4dec96d55b204_False", - "model": { - "name": "paloalma/ECE-TW3-JRGL-V5", - "sha": "4061fa10de22945790cad825f7f4dec96d55b204", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.49204681730805, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4552509563513699, - "normalized_score": 45.52509563513699 - }, - "bbh": { - "name": "BBH", - "value": 0.6024712037668832, - "normalized_score": 43.46251365157702 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18353474320241692, - "normalized_score": 18.35347432024169 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.4620520833333333, - "normalized_score": 16.88984375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.46476063829787234, - "normalized_score": 40.528959810874696 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-11", - "submission_date": "2024-08-30", - "generation": 0, - "base_model": "paloalma/ECE-TW3-JRGL-V5", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 72.289, - "co2_cost": 34.56184207719644 - } - }, - { - "id": "paloalma/Le_Triomphant-ECE-TW3_bfloat16_f72399253bb3e65c0f55e50461488c098f658a49_False", - "model": { - "name": "paloalma/Le_Triomphant-ECE-TW3", - "sha": "f72399253bb3e65c0f55e50461488c098f658a49", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.996294194213135, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5402055435134332, - "normalized_score": 54.02055435134332 - }, - "bbh": { - "name": "BBH", - "value": 0.6112057897556996, - "normalized_score": 44.96329362428286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19486404833836857, - "normalized_score": 19.486404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.4725, - "normalized_score": 18.495833333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.476313164893617, - "normalized_score": 41.81257387706855 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-01", - "submission_date": "2024-07-25", - "generation": 0, - "base_model": "paloalma/Le_Triomphant-ECE-TW3", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 72.289, - "co2_cost": 20.8367824579458 - } - }, - { - "id": "paloalma/TW3-JRGL-v2_bfloat16_aca3f0ba2bfb90038a9e2cd5b486821d4c181b46_False", - "model": { - "name": "paloalma/TW3-JRGL-v2", - "sha": "aca3f0ba2bfb90038a9e2cd5b486821d4c181b46", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.46253888329018, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5316127874040878, - "normalized_score": 53.16127874040878 - }, - "bbh": { - "name": "BBH", - "value": 0.6137525505395743, - "normalized_score": 45.61110998256794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17900302114803626, - "normalized_score": 17.900302114803626 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.48583333333333334, - "normalized_score": 20.69583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4857878989361702, - "normalized_score": 42.86532210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-01", - "submission_date": "2024-08-29", - "generation": 0, - "base_model": "paloalma/TW3-JRGL-v2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 72.289, - "co2_cost": 31.846913566660287 - } - }, - { - "id": "pankajmathur/Al_Dente_v1_8b_bfloat16_149d70e04085ecd90510a60f916efc55da1294e7_False", - "model": { - "name": "pankajmathur/Al_Dente_v1_8b", - "sha": "149d70e04085ecd90510a60f916efc55da1294e7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.300059065636294, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3693721547715617, - "normalized_score": 36.93721547715617 - }, - "bbh": { - "name": "BBH", - "value": 0.48347371404380524, - "normalized_score": 27.247898492647995 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.3987083333333334, - "normalized_score": 8.271875000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2859873670212766, - "normalized_score": 20.665263002364064 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-02", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/Al_Dente_v1_8b", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8170640097905246 - } - }, - { - "id": "pankajmathur/model_007_13b_v2_bfloat16_2c6ddf25cdb134f22e2543121b5a36b41342a9e2_False", - "model": { - "name": "pankajmathur/model_007_13b_v2", - "sha": "2c6ddf25cdb134f22e2543121b5a36b41342a9e2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.00740369607647, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30564901129004374, - "normalized_score": 30.564901129004376 - }, - "bbh": { - "name": "BBH", - "value": 0.4702292766687601, - "normalized_score": 25.454420185872465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.46109375, - "normalized_score": 17.203385416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24609375, - "normalized_score": 16.232638888888886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-08-12", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/model_007_13b_v2", - "hub_license": "llama2", - "hub_hearts": 4, - "params_billions": 13.0, - "co2_cost": 4.363560105318781 - } - }, - { - "id": "pankajmathur/orca_mini_3b_bfloat16_31e1a7bc3f7ea2f247b432d60036d975b8d590e9_False", - "model": { - "name": "pankajmathur/orca_mini_3b", - "sha": "31e1a7bc3f7ea2f247b432d60036d975b8d590e9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.1252754271378507, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.07421419611076388, - "normalized_score": 7.421419611076388 - }, - "bbh": { - "name": "BBH", - "value": 0.3196070040004752, - "normalized_score": 4.6859845437903855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3349270833333333, - "normalized_score": 4.199218749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11452792553191489, - "normalized_score": 1.6142139479905429 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-06-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_3b", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 161, - "params_billions": 3.426, - "co2_cost": 1.049551149033351 - } - }, - { - "id": "pankajmathur/orca_mini_7b_bfloat16_fec86e316b7b98d7be6cf74e98fb927092077abb_False", - "model": { - "name": "pankajmathur/orca_mini_7b", - "sha": "fec86e316b7b98d7be6cf74e98fb927092077abb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.405696058391802, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04121619525082337, - "normalized_score": 4.121619525082337 - }, - "bbh": { - "name": "BBH", - "value": 0.3332228472650342, - "normalized_score": 7.81893018360044 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.36975, - "normalized_score": 3.9187499999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12458444148936171, - "normalized_score": 2.731604609929078 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-06-23", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_7b", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 18, - "params_billions": 7.0, - "co2_cost": 0.5214411396948289 - } - }, - { - "id": "pankajmathur/orca_mini_phi-4_bfloat16_d060fa835868ce422521daf7054dbc64ad48aee3_True", - "model": { - "name": "pankajmathur/orca_mini_phi-4", - "sha": "d060fa835868ce422521daf7054dbc64ad48aee3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.67628242552744, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7780588837617521, - "normalized_score": 77.80588837617522 - }, - "bbh": { - "name": "BBH", - "value": 0.6856329737542378, - "normalized_score": 54.63137024552032 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29531722054380666, - "normalized_score": 29.531722054380666 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.47030208333333334, - "normalized_score": 18.254427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5255152925531915, - "normalized_score": 47.27947695035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "pankajmathur/orca_mini_phi-4 (Merge)", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 14.66, - "co2_cost": 1.7007696703119202 - } - }, - { - "id": "pankajmathur/orca_mini_v2_7b_bfloat16_66d3f32a4a6bca0a2a261f1bdb54d2582028f75f_False", - "model": { - "name": "pankajmathur/orca_mini_v2_7b", - "sha": "66d3f32a4a6bca0a2a261f1bdb54d2582028f75f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.502368522121576, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13578859647956312, - "normalized_score": 13.57885964795631 - }, - "bbh": { - "name": "BBH", - "value": 0.35363417847864514, - "normalized_score": 10.199953477088151 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35933333333333334, - "normalized_score": 2.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1541722074468085, - "normalized_score": 6.019134160756501 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-07-03", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v2_7b", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 37, - "params_billions": 7.0, - "co2_cost": 1.185022969094885 - } - }, - { - "id": "pankajmathur/orca_mini_v3_13b_bfloat16_7d6e567d24ce2f228beaf54e89c17b0e750bfe99_False", - "model": { - "name": "pankajmathur/orca_mini_v3_13b", - "sha": "7d6e567d24ce2f228beaf54e89c17b0e750bfe99", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.041296989515962, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28966253983873896, - "normalized_score": 28.966253983873898 - }, - "bbh": { - "name": "BBH", - "value": 0.4710970361474938, - "normalized_score": 25.549482064607844 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.45979166666666665, - "normalized_score": 17.107291666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23046875, - "normalized_score": 14.496527777777777 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-08-09", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v3_13b", - "hub_license": "other", - "hub_hearts": 31, - "params_billions": 13.0, - "co2_cost": 2.1943587430415645 - } - }, - { - "id": "pankajmathur/orca_mini_v3_70b_bfloat16_e8e856dfb5c737d1906b50f9e65fd3a4f8d77422_False", - "model": { - "name": "pankajmathur/orca_mini_v3_70b", - "sha": "e8e856dfb5c737d1906b50f9e65fd3a4f8d77422", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.29815949268638, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4014703209705803, - "normalized_score": 40.14703209705803 - }, - "bbh": { - "name": "BBH", - "value": 0.5949312065598904, - "normalized_score": 42.975787003923045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.5078541666666667, - "normalized_score": 25.11510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3757480053191489, - "normalized_score": 30.63866725768321 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-08-10", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v3_70b", - "hub_license": "other", - "hub_hearts": 23, - "params_billions": 70.0, - "co2_cost": 12.813073977150225 - } - }, - { - "id": "pankajmathur/orca_mini_v3_7b_bfloat16_6252eb7ca29da8d951ae7d2bca948bf84e04a2b9_False", - "model": { - "name": "pankajmathur/orca_mini_v3_7b", - "sha": "6252eb7ca29da8d951ae7d2bca948bf84e04a2b9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.644021205601403, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2820937335159599, - "normalized_score": 28.20937335159599 - }, - "bbh": { - "name": "BBH", - "value": 0.4095332668279368, - "normalized_score": 17.843955571096647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.49823958333333335, - "normalized_score": 22.713281249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20836103723404256, - "normalized_score": 12.04011524822695 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-08-07", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v3_7b", - "hub_license": "other", - "hub_hearts": 40, - "params_billions": 7.0, - "co2_cost": 1.2799002843238059 - } - }, - { - "id": "pankajmathur/orca_mini_v5_8b_bfloat16_f57c84d4cc0b3b74549458c0d38e868bd7fffad1_False", - "model": { - "name": "pankajmathur/orca_mini_v5_8b", - "sha": "f57c84d4cc0b3b74549458c0d38e868bd7fffad1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.498300732442388, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48060479527653294, - "normalized_score": 48.06047952765329 - }, - "bbh": { - "name": "BBH", - "value": 0.5064242853619262, - "normalized_score": 29.345795010726494 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4000104166666667, - "normalized_score": 7.701302083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3075964095744681, - "normalized_score": 23.066267730496453 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v5_8b", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.7579415515670855 - } - }, - { - "id": "pankajmathur/orca_mini_v5_8b_dpo_bfloat16_fdc0d0aaa85a58f1abaf2c24ce0ddca10c08f0f1_False", - "model": { - "name": "pankajmathur/orca_mini_v5_8b_dpo", - "sha": "fdc0d0aaa85a58f1abaf2c24ce0ddca10c08f0f1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.33420699350285, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48964746871633935, - "normalized_score": 48.964746871633935 - }, - "bbh": { - "name": "BBH", - "value": 0.5074598658862709, - "normalized_score": 29.605372989233754 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.389375, - "normalized_score": 6.938541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31158577127659576, - "normalized_score": 23.50953014184397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-30", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v5_8b_dpo", - "hub_license": "llama3", - "hub_hearts": 3, - "params_billions": 8.0, - "co2_cost": 1.6333793278135202 - } - }, - { - "id": "pankajmathur/orca_mini_v5_8b_orpo_bfloat16_4cdc018043ef439f15bd8a09c4f09c6bc528dfc7_False", - "model": { - "name": "pankajmathur/orca_mini_v5_8b_orpo", - "sha": "4cdc018043ef439f15bd8a09c4f09c6bc528dfc7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.99373010020274, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.08243239050164675, - "normalized_score": 8.243239050164673 - }, - "bbh": { - "name": "BBH", - "value": 0.496374377369289, - "normalized_score": 27.877628256858372 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.41312499999999996, - "normalized_score": 8.973958333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2947140957446808, - "normalized_score": 21.634899527186757 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v5_8b_orpo", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.0, - "co2_cost": 1.9436995385645837 - } - }, - { - "id": "pankajmathur/orca_mini_v6_8b_bfloat16_e95dc8e4c6b6ca5957b657cc2d905683142eaf3e_True", - "model": { - "name": "pankajmathur/orca_mini_v6_8b", - "sha": "e95dc8e4c6b6ca5957b657cc2d905683142eaf3e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 1.4763387606479779, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.011116060940526692, - "normalized_score": 1.1116060940526693 - }, - "bbh": { - "name": "BBH", - "value": 0.30286959112076134, - "normalized_score": 3.219809856556432 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23825503355704697, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3554583333333334, - "normalized_score": 2.7656250000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1124501329787234, - "normalized_score": 1.383348108747044 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-02", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v6_8b", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.432735692418171 - } - }, - { - "id": "pankajmathur/orca_mini_v6_8b_dpo_bfloat16_ebb11b63839d38e8c03c7ecac012e047fcb2346e_False", - "model": { - "name": "pankajmathur/orca_mini_v6_8b_dpo", - "sha": "ebb11b63839d38e8c03c7ecac012e047fcb2346e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.392492362112517, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3882564927725103, - "normalized_score": 38.82564927725103 - }, - "bbh": { - "name": "BBH", - "value": 0.520280774453148, - "normalized_score": 32.47882597428379 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.40903125, - "normalized_score": 9.262239583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.359624335106383, - "normalized_score": 28.847148345153663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-21", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v6_8b_dpo", - "hub_license": "llama3", - "hub_hearts": 2, - "params_billions": 8.0, - "co2_cost": 1.539647289874839 - } - }, - { - "id": "pankajmathur/orca_mini_v7_72b_bfloat16_447f11912cfa496e32e188a55214043a05760d3a_True", - "model": { - "name": "pankajmathur/orca_mini_v7_72b", - "sha": "447f11912cfa496e32e188a55214043a05760d3a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.215290911204995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5929622291076566, - "normalized_score": 59.29622291076566 - }, - "bbh": { - "name": "BBH", - "value": 0.6842301988001044, - "normalized_score": 55.05552307693972 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.5070416666666667, - "normalized_score": 24.21354166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5621675531914894, - "normalized_score": 51.35195035460993 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2025-01-02", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v7_72b", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 72.706, - "co2_cost": 28.10341439624203 - } - }, - { - "id": "pankajmathur/orca_mini_v7_7b_bfloat16_f5e84ff6ea25fb4585908ea45d1520bac416d803_False", - "model": { - "name": "pankajmathur/orca_mini_v7_7b", - "sha": "f5e84ff6ea25fb4585908ea45d1520bac416d803", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.986504270832167, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4387646998851935, - "normalized_score": 43.87646998851935 - }, - "bbh": { - "name": "BBH", - "value": 0.5274909601771501, - "normalized_score": 33.95043410425148 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.43597916666666664, - "normalized_score": 12.6640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4167220744680851, - "normalized_score": 35.19134160756501 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-20", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "pankajmathur/orca_mini_v7_7b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.8502188230290761 - } - }, - { - "id": "pankajmathur/orca_mini_v8_1_70b_bfloat16_84663295413819491b08cd3b7e50d04a5eb0bb1a_True", - "model": { - "name": "pankajmathur/orca_mini_v8_1_70b", - "sha": "84663295413819491b08cd3b7e50d04a5eb0bb1a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.191232219399744, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8571434903832941, - "normalized_score": 85.7143490383294 - }, - "bbh": { - "name": "BBH", - "value": 0.6781305630707934, - "normalized_score": 53.51972670972081 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3527190332326284, - "normalized_score": 35.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.43288590604026844, - "normalized_score": 24.384787472035793 - }, - "musr": { - "name": "MUSR", - "value": 0.44370833333333337, - "normalized_score": 15.996874999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49833776595744683, - "normalized_score": 44.25975177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v8_1_70b (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 5, - "params_billions": 70.554, - "co2_cost": 54.448936643551214 - } - }, - { - "id": "pankajmathur/orca_mini_v9_0_3B-Instruct_bfloat16_37710875f7841e72c99cd5494cf450bb5bd6c680_True", - "model": { - "name": "pankajmathur/orca_mini_v9_0_3B-Instruct", - "sha": "37710875f7841e72c99cd5494cf450bb5bd6c680", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.660838216117185, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5753766672429155, - "normalized_score": 57.53766672429155 - }, - "bbh": { - "name": "BBH", - "value": 0.4412946064233128, - "normalized_score": 21.368152564693613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.36590625000000004, - "normalized_score": 5.771614583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2603058510638298, - "normalized_score": 17.81176122931442 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_0_3B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 5, - "params_billions": 3.213, - "co2_cost": 1.2023625178594268 - } - }, - { - "id": "pankajmathur/orca_mini_v9_1_1B-Instruct_bfloat16_2c4cc6dacbff82ec76845fcc770322318742e794_True", - "model": { - "name": "pankajmathur/orca_mini_v9_1_1B-Instruct", - "sha": "2c4cc6dacbff82ec76845fcc770322318742e794", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.063245765360724, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3629270336041702, - "normalized_score": 36.29270336041702 - }, - "bbh": { - "name": "BBH", - "value": 0.3205118362595434, - "normalized_score": 6.406448543994878 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04607250755287009, - "normalized_score": 4.607250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.3380625, - "normalized_score": 2.024479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13738364361702127, - "normalized_score": 4.153738179669029 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_1_1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 3, - "params_billions": 1.236, - "co2_cost": 0.7135759715391796 - } - }, - { - "id": "pankajmathur/orca_mini_v9_2_14B_bfloat16_fc8e88751753f1757dc84d5ce0ad2384450645a2_True", - "model": { - "name": "pankajmathur/orca_mini_v9_2_14B", - "sha": "fc8e88751753f1757dc84d5ce0ad2384450645a2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.67628242552744, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7780588837617521, - "normalized_score": 77.80588837617522 - }, - "bbh": { - "name": "BBH", - "value": 0.6856329737542378, - "normalized_score": 54.63137024552032 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29531722054380666, - "normalized_score": 29.531722054380666 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.47030208333333334, - "normalized_score": 18.254427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5255152925531915, - "normalized_score": 47.27947695035461 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_2_14B (Merge)", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 14.66, - "co2_cost": 1.8814172421796664 - } - }, - { - "id": "pankajmathur/orca_mini_v9_2_70b_bfloat16_19f021126bc52484fd60fa5daeff59219592e534_True", - "model": { - "name": "pankajmathur/orca_mini_v9_2_70b", - "sha": "19f021126bc52484fd60fa5daeff59219592e534", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.72477703174064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8382591523823455, - "normalized_score": 83.82591523823456 - }, - "bbh": { - "name": "BBH", - "value": 0.6744868732778627, - "normalized_score": 53.03309980255676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2938066465256798, - "normalized_score": 29.38066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.47098958333333335, - "normalized_score": 19.207031250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48213098404255317, - "normalized_score": 42.45899822695035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_2_70b (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 4, - "params_billions": 70.554, - "co2_cost": 48.05524024512421 - } - }, - { - "id": "pankajmathur/orca_mini_v9_4_70B_bfloat16_6538f7ad108c90d4aeb317a90eadaf489d687319_True", - "model": { - "name": "pankajmathur/orca_mini_v9_4_70B", - "sha": "6538f7ad108c90d4aeb317a90eadaf489d687319", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.32550746750958, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8014645584826039, - "normalized_score": 80.14645584826039 - }, - "bbh": { - "name": "BBH", - "value": 0.6418899297276105, - "normalized_score": 48.69261714198851 - }, - "math": { - "name": "MATH Level 5", - "value": 0.32628398791540786, - "normalized_score": 32.62839879154079 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36577181208053694, - "normalized_score": 15.436241610738257 - }, - "musr": { - "name": "MUSR", - "value": 0.4647291666666667, - "normalized_score": 19.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45362367021276595, - "normalized_score": 39.29151891252955 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_4_70B (Merge)", - "hub_license": "llama3.3", - "hub_hearts": 2, - "params_billions": 70.554, - "co2_cost": 60.69019593236153 - } - }, - { - "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_bfloat16_eaf758bef610953480309044303c8c15985ac24d_True", - "model": { - "name": "pankajmathur/orca_mini_v9_5_1B-Instruct", - "sha": "eaf758bef610953480309044303c8c15985ac24d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.693501511833993, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46379384477630464, - "normalized_score": 46.37938447763047 - }, - "bbh": { - "name": "BBH", - "value": 0.3337001077145985, - "normalized_score": 6.6988165369101145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.31815625, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13696808510638298, - "normalized_score": 4.10756501182033 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_5_1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 1.236, - "co2_cost": 1.091074704963622 - } - }, - { - "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview_bfloat16_7f4581135998269b83f79624a2435cc314f5f45b_True", - "model": { - "name": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", - "sha": "7f4581135998269b83f79624a2435cc314f5f45b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.541822835013244, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3935768206137493, - "normalized_score": 39.35768206137493 - }, - "bbh": { - "name": "BBH", - "value": 0.32769514793198123, - "normalized_score": 5.582691663791198 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.33945833333333336, - "normalized_score": 3.0322916666666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13272938829787234, - "normalized_score": 3.6365986997635926 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 1.236, - "co2_cost": 0.7155956947297311 - } - }, - { - "id": "pankajmathur/orca_mini_v9_5_3B-Instruct_bfloat16_9d68ed7de708f52e8fa3b173fb7315a941d45b9c_True", - "model": { - "name": "pankajmathur/orca_mini_v9_5_3B-Instruct", - "sha": "9d68ed7de708f52e8fa3b173fb7315a941d45b9c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.152680776912927, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7207066140063919, - "normalized_score": 72.0706614006392 - }, - "bbh": { - "name": "BBH", - "value": 0.44963802133275826, - "normalized_score": 21.517904353100732 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1321752265861027, - "normalized_score": 13.217522658610271 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2869127516778524, - "normalized_score": 4.921700223713654 - }, - "musr": { - "name": "MUSR", - "value": 0.4269895833333333, - "normalized_score": 12.27369791666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2882313829787234, - "normalized_score": 20.914598108747047 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_5_3B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 6, - "params_billions": 3.213, - "co2_cost": 1.1120528502404838 - } - }, - { - "id": "pankajmathur/orca_mini_v9_6_1B-Instruct_bfloat16_6219f36f9cb41a659ca721e74b70364dda0a9a8a_True", - "model": { - "name": "pankajmathur/orca_mini_v9_6_1B-Instruct", - "sha": "6219f36f9cb41a659ca721e74b70364dda0a9a8a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.323670701472977, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6085741388404988, - "normalized_score": 60.857413884049876 - }, - "bbh": { - "name": "BBH", - "value": 0.3561349568441982, - "normalized_score": 9.659037433840986 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0770392749244713, - "normalized_score": 7.7039274924471295 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.33955208333333337, - "normalized_score": 2.2773437500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18085106382978725, - "normalized_score": 8.983451536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-06", - "submission_date": "2025-01-06", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_6_1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 6, - "params_billions": 1.236, - "co2_cost": 0.7610068389439243 - } - }, - { - "id": "pankajmathur/orca_mini_v9_6_3B-Instruct_bfloat16_2cd07c6364b883d29036b9c8fe1816221b693d71_True", - "model": { - "name": "pankajmathur/orca_mini_v9_6_3B-Instruct", - "sha": "2cd07c6364b883d29036b9c8fe1816221b693d71", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.086826299328436, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7316475839660989, - "normalized_score": 73.1647583966099 - }, - "bbh": { - "name": "BBH", - "value": 0.45683272658133456, - "normalized_score": 22.86989036357046 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13293051359516617, - "normalized_score": 13.293051359516618 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.4067708333333333, - "normalized_score": 8.813020833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28507313829787234, - "normalized_score": 20.563682033096924 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_6_3B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 3.213, - "co2_cost": 1.6625691395723998 - } - }, - { - "id": "pankajmathur/orca_mini_v9_7_1B-Instruct_bfloat16_b9e52b91802bd4ae941c0d328e9fa7818e0ce504_True", - "model": { - "name": "pankajmathur/orca_mini_v9_7_1B-Instruct", - "sha": "b9e52b91802bd4ae941c0d328e9fa7818e0ce504", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.485692413724527, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5610136659618701, - "normalized_score": 56.101366596187006 - }, - "bbh": { - "name": "BBH", - "value": 0.3181526961435924, - "normalized_score": 5.052028049769791 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.35269791666666667, - "normalized_score": 2.4539062499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1344747340425532, - "normalized_score": 3.830526004728132 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_7_1B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 1.236, - "co2_cost": 0.8512147170563129 - } - }, - { - "id": "pankajmathur/orca_mini_v9_7_3B-Instruct_bfloat16_46298c9816c60d18d8b1217a540b75a0a8cf9aab_True", - "model": { - "name": "pankajmathur/orca_mini_v9_7_3B-Instruct", - "sha": "46298c9816c60d18d8b1217a540b75a0a8cf9aab", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.034702793317214, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5618381450107935, - "normalized_score": 56.183814501079354 - }, - "bbh": { - "name": "BBH", - "value": 0.3297133908231881, - "normalized_score": 6.301038778871171 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.361875, - "normalized_score": 3.801041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13746675531914893, - "normalized_score": 4.162972813238769 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "pankajmathur/orca_mini_v9_7_3B-Instruct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 4, - "params_billions": 3.213, - "co2_cost": 1.0710980392937726 - } - }, - { - "id": "paulml/ECE-ILAB-Q1_bfloat16_393bea0ee85e4c752acd5fd77ce07f577fc13bd9_False", - "model": { - "name": "paulml/ECE-ILAB-Q1", - "sha": "393bea0ee85e4c752acd5fd77ce07f577fc13bd9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.50307248816295, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7864521691334547, - "normalized_score": 78.64521691334548 - }, - "bbh": { - "name": "BBH", - "value": 0.6717755530661759, - "normalized_score": 53.70222770817057 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3557401812688822, - "normalized_score": 35.57401812688822 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.46137500000000004, - "normalized_score": 18.805208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.550531914893617, - "normalized_score": 50.05910165484633 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-06", - "submission_date": "2024-09-16", - "generation": 0, - "base_model": "paulml/ECE-ILAB-Q1", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 72.706, - "co2_cost": 22.83028414132809 - } - }, - { - "id": "pints-ai/1.5-Pints-16K-v0.1_bfloat16_7862a52f250be68fad593f3a4030f00d658ede56_True", - "model": { - "name": "pints-ai/1.5-Pints-16K-v0.1", - "sha": "7862a52f250be68fad593f3a4030f00d658ede56", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.250927888464816, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1635914927946737, - "normalized_score": 16.35914927946737 - }, - "bbh": { - "name": "BBH", - "value": 0.3133077677150869, - "normalized_score": 3.658292060342125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23573825503355705, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.357875, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1118683510638298, - "normalized_score": 1.3187056737588652 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-09-09", - "generation": 0, - "base_model": "pints-ai/1.5-Pints-16K-v0.1", - "hub_license": "mit", - "hub_hearts": 14, - "params_billions": 1.566, - "co2_cost": 0.5598769163544511 - } - }, - { - "id": "pints-ai/1.5-Pints-2K-v0.1_bfloat16_2e865c18669161ebbf5e9ad79ae0502ee0153df0_True", - "model": { - "name": "pints-ai/1.5-Pints-2K-v0.1", - "sha": "2e865c18669161ebbf5e9ad79ae0502ee0153df0", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.044439684591542, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17615593292463996, - "normalized_score": 17.615593292463995 - }, - "bbh": { - "name": "BBH", - "value": 0.29801943389750435, - "normalized_score": 2.3744704635071923 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35018749999999993, - "normalized_score": 1.8401041666666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11037234042553191, - "normalized_score": 1.1524822695035455 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-09-09", - "generation": 0, - "base_model": "pints-ai/1.5-Pints-2K-v0.1", - "hub_license": "mit", - "hub_hearts": 16, - "params_billions": 1.566, - "co2_cost": 0.5828330308866719 - } - }, - { - "id": "piotr25691/thea-3b-25r_bfloat16_4661fb3c8b18bdf2059f703c4f69caea24057151_True", - "model": { - "name": "piotr25691/thea-3b-25r", - "sha": "4661fb3c8b18bdf2059f703c4f69caea24057151", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.996071012635184, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7344202272193336, - "normalized_score": 73.44202272193337 - }, - "bbh": { - "name": "BBH", - "value": 0.44844100293649863, - "normalized_score": 22.54671082396668 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1782477341389728, - "normalized_score": 17.82477341389728 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.33145833333333335, - "normalized_score": 3.565625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3182347074468085, - "normalized_score": 24.248300827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-12", - "generation": 1, - "base_model": "chuanli11/Llama-3.2-3B-Instruct-uncensored", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.38101522973834 - } - }, - { - "id": "piotr25691/thea-c-3b-25r_bfloat16_93a2333a84feda26f020bc8fa92f870462dacd89_True", - "model": { - "name": "piotr25691/thea-c-3b-25r", - "sha": "93a2333a84feda26f020bc8fa92f870462dacd89", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.25479609330286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7401904723910335, - "normalized_score": 74.01904723910336 - }, - "bbh": { - "name": "BBH", - "value": 0.4532410175874399, - "normalized_score": 22.767850009265825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15256797583081572, - "normalized_score": 15.256797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.33148958333333334, - "normalized_score": 1.2695312499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3178191489361702, - "normalized_score": 24.202127659574465 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-17", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.324912414241504 - } - }, - { - "id": "piotr25691/thea-rp-3b-25r_bfloat16_ed4c338e07356f1657cf4d08b768ff866bbf0a68_True", - "model": { - "name": "piotr25691/thea-rp-3b-25r", - "sha": "ed4c338e07356f1657cf4d08b768ff866bbf0a68", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.845382189211346, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6577835698169745, - "normalized_score": 65.77835698169746 - }, - "bbh": { - "name": "BBH", - "value": 0.4390291036559586, - "normalized_score": 20.007380927568594 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13217522658610273, - "normalized_score": 13.217522658610273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.381875, - "normalized_score": 5.934375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30601728723404253, - "normalized_score": 22.89080969267139 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-13", - "submission_date": "2024-10-16", - "generation": 2, - "base_model": "SicariusSicariiStuff/Impish_LLAMA_3B (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.3169066995150893 - } - }, - { - "id": "postbot/gpt2-medium-emailgen_bfloat16_a0299eb6760126e3bd04d2f10cd166c4563f82d2_False", - "model": { - "name": "postbot/gpt2-medium-emailgen", - "sha": "a0299eb6760126e3bd04d2f10cd166c4563f82d2", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 4.743048119298616, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1492030035860406, - "normalized_score": 14.92030035860406 - }, - "bbh": { - "name": "BBH", - "value": 0.31304286003933807, - "normalized_score": 3.673700346196318 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3911145833333333, - "normalized_score": 6.889322916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1146941489361702, - "normalized_score": 1.6326832151300221 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2022-09-29", - "submission_date": "2024-11-17", - "generation": 0, - "base_model": "postbot/gpt2-medium-emailgen", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 0.38, - "co2_cost": 0.15637270757147537 - } - }, - { - "id": "prince-canuma/Ministral-8B-Instruct-2410-HF_bfloat16_e0a14d7a6a8a1d1e5bef1a77a42e86e8bcae0ee7_True", - "model": { - "name": "prince-canuma/Ministral-8B-Instruct-2410-HF", - "sha": "e0a14d7a6a8a1d1e5bef1a77a42e86e8bcae0ee7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.74474818460179, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5911636679565775, - "normalized_score": 59.11636679565775 - }, - "bbh": { - "name": "BBH", - "value": 0.4585611339334732, - "normalized_score": 23.778464927274356 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19184290030211482, - "normalized_score": 19.184290030211482 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.41375, - "normalized_score": 10.718750000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32978723404255317, - "normalized_score": 25.53191489361702 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-17", - "generation": 1, - "base_model": "prince-canuma/Ministral-8B-Instruct-2410-HF (Merge)", - "hub_license": "other", - "hub_hearts": 10, - "params_billions": 8.02, - "co2_cost": 2.0338692157280276 - } - }, - { - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base_bfloat16_51a333f7c99f5052377154b76909dfe63ff7ab83_True", - "model": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", - "sha": "51a333f7c99f5052377154b76909dfe63ff7ab83", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.679044932010054, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5322123077877808, - "normalized_score": 53.221230778778086 - }, - "bbh": { - "name": "BBH", - "value": 0.5033213133882991, - "normalized_score": 29.847246369144035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.4222708333333333, - "normalized_score": 12.683854166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33294547872340424, - "normalized_score": 25.882830969267133 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "princeton-nlp/Llama-3-8B-ProLong-512k-Base (Merge)", - "hub_license": "llama3", - "hub_hearts": 9, - "params_billions": 8.03, - "co2_cost": 1.7573275822291825 - } - }, - { - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct_bfloat16_eae0626e8597575215276c2b248720f731bc50b8_True", - "model": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "sha": "eae0626e8597575215276c2b248720f731bc50b8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.942343537569432, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5508218194390884, - "normalized_score": 55.08218194390883 - }, - "bbh": { - "name": "BBH", - "value": 0.5028310716285619, - "normalized_score": 29.151153442911763 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.42664583333333334, - "normalized_score": 12.530729166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32313829787234044, - "normalized_score": 24.793144208037827 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 20, - "params_billions": 8.03, - "co2_cost": 2.344706085397087 - } - }, - { - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct_float16_bf92e493b7b0ef1db0242bfa97f1d8f92be02e9c_False", - "model": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "sha": "bf92e493b7b0ef1db0242bfa97f1d8f92be02e9c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.242001937233777, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3977734632996006, - "normalized_score": 39.77734632996006 - }, - "bbh": { - "name": "BBH", - "value": 0.49830327201612584, - "normalized_score": 28.669218583844156 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.425, - "normalized_score": 12.091666666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3246343085106383, - "normalized_score": 24.959367612293143 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-22", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 20, - "params_billions": 8.03, - "co2_cost": 1.448747104795808 - } - }, - { - "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base_bfloat16_97994d6918f80162a893e22d5e7bba586551f941_True", - "model": { - "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", - "sha": "97994d6918f80162a893e22d5e7bba586551f941", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.65219829915355, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5200722970606879, - "normalized_score": 52.007229706068784 - }, - "bbh": { - "name": "BBH", - "value": 0.49271325981523906, - "normalized_score": 28.687899000980178 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.4340520833333333, - "normalized_score": 14.62317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3347739361702128, - "normalized_score": 26.08599290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-22", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "princeton-nlp/Llama-3-8B-ProLong-64k-Base (Merge)", - "hub_license": "llama3", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 2.714774969456653 - } - }, - { - "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct_bfloat16_fe55aed18544c5744239e473bb0d3aa0151776d3_True", - "model": { - "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", - "sha": "fe55aed18544c5744239e473bb0d3aa0151776d3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.020991799409, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5563172382611471, - "normalized_score": 55.63172382611471 - }, - "bbh": { - "name": "BBH", - "value": 0.5083040804243396, - "normalized_score": 30.089572165686842 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2953020134228188, - "normalized_score": 6.040268456375841 - }, - "musr": { - "name": "MUSR", - "value": 0.43969791666666663, - "normalized_score": 14.595572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32746010638297873, - "normalized_score": 25.2733451536643 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-21", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 13, - "params_billions": 8.03, - "co2_cost": 2.417880176151947 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT_bfloat16_b622b7d814aa03aa722328bf88feaf1ad480b7fb_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT", - "sha": "b622b7d814aa03aa722328bf88feaf1ad480b7fb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.96420649511064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27959591661236627, - "normalized_score": 27.95959166123663 - }, - "bbh": { - "name": "BBH", - "value": 0.464303802632615, - "normalized_score": 24.345967199755464 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4117916666666667, - "normalized_score": 9.840625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3093417553191489, - "normalized_score": 23.26019503546099 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 2.620909789947218 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO_bfloat16_536ce7e7beb35175c48538fe46e7e9e100f228c9_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", - "sha": "536ce7e7beb35175c48538fe46e7e9e100f228c9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.953789309641664, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37034623687371726, - "normalized_score": 37.03462368737173 - }, - "bbh": { - "name": "BBH", - "value": 0.4594875922440002, - "normalized_score": 25.474648628373444 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3608541666666667, - "normalized_score": 2.5734375000000025 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2976230053191489, - "normalized_score": 21.95811170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9356921108847127 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO_bfloat16_3f5ec47c9beffb37cfbdcd837e76a336a9b1e651_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", - "sha": "3f5ec47c9beffb37cfbdcd837e76a336a9b1e651", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.376219112296344, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41111251479407973, - "normalized_score": 41.11125147940797 - }, - "bbh": { - "name": "BBH", - "value": 0.46658506064913546, - "normalized_score": 26.001873821480995 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.38673958333333336, - "normalized_score": 7.842447916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3078457446808511, - "normalized_score": 23.093971631205672 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8526797331815927 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO_bfloat16_85055cc4b9c707e0bd1239d20d1f62927a7a54c3_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", - "sha": "85055cc4b9c707e0bd1239d20d1f62927a7a54c3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.722473272112868, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4486562321307464, - "normalized_score": 44.86562321307464 - }, - "bbh": { - "name": "BBH", - "value": 0.4690068582318399, - "normalized_score": 25.705433288023944 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3919479166666667, - "normalized_score": 7.960156250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3115026595744681, - "normalized_score": 23.50029550827423 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8643816941174216 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO_bfloat16_49a8c2e5ccc7a28ed7bbedf093e352015fc1eb9b_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", - "sha": "49a8c2e5ccc7a28ed7bbedf093e352015fc1eb9b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.6446163938307, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4522533544329047, - "normalized_score": 45.225335443290476 - }, - "bbh": { - "name": "BBH", - "value": 0.4692852292721417, - "normalized_score": 25.55523001299593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3841979166666667, - "normalized_score": 5.5914062499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3054355053191489, - "normalized_score": 22.826167257683213 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7237038714780366 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO_bfloat16_54d58402e0168faff6503e41621ad6c8274a310a_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", - "sha": "54d58402e0168faff6503e41621ad6c8274a310a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.268325889820545, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45165383404921167, - "normalized_score": 45.16538340492117 - }, - "bbh": { - "name": "BBH", - "value": 0.47340573024653915, - "normalized_score": 26.48589369385502 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.3706770833333333, - "normalized_score": 7.634635416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30826130319148937, - "normalized_score": 23.140144799054376 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8131264465728392 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO_bfloat16_b41a964c2135ba34dcc6fa7edf76b6b9ea656949_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", - "sha": "b41a964c2135ba34dcc6fa7edf76b6b9ea656949", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.142302231808696, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4480068440626427, - "normalized_score": 44.80068440626427 - }, - "bbh": { - "name": "BBH", - "value": 0.46620140448752295, - "normalized_score": 25.526521127200017 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3062080536912752, - "normalized_score": 7.494407158836691 - }, - "musr": { - "name": "MUSR", - "value": 0.4027395833333334, - "normalized_score": 8.909114583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30144614361702127, - "normalized_score": 22.382904846335695 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.8048707562728685 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF_bfloat16_aea8c04b3940cebd1f8296a2c76914f0ce70c276_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", - "sha": "aea8c04b3940cebd1f8296a2c76914f0ce70c276", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.28272427355282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3357247658435174, - "normalized_score": 33.57247658435174 - }, - "bbh": { - "name": "BBH", - "value": 0.4520360167602379, - "normalized_score": 23.659142323042403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.37222916666666667, - "normalized_score": 7.561979166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2888962765957447, - "normalized_score": 20.988475177304963 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9029374463777384 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF_bfloat16_325092c1eddffc3ca7157be1ff9958128e5753ef_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", - "sha": "325092c1eddffc3ca7157be1ff9958128e5753ef", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.743113491063596, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4890479483326463, - "normalized_score": 48.90479483326463 - }, - "bbh": { - "name": "BBH", - "value": 0.4704075127777334, - "normalized_score": 26.37396261839453 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.40909375000000003, - "normalized_score": 10.270052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30634973404255317, - "normalized_score": 22.92774822695035 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9209421284151862 - } - }, - { - "id": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO_bfloat16_0a6e518b13b67abe8433bce3f7beee9beb74a794_True", - "model": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", - "sha": "0a6e518b13b67abe8433bce3f7beee9beb74a794", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.85850945722653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4685401401614383, - "normalized_score": 46.854014016143836 - }, - "bbh": { - "name": "BBH", - "value": 0.47412507033960827, - "normalized_score": 26.39594961870209 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.41268750000000004, - "normalized_score": 11.852604166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31050531914893614, - "normalized_score": 23.389479905437348 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-24", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7231300520645123 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-CPO_bfloat16_d4645ae4c3b99892f1c59f60a77330be35567835_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-CPO", - "sha": "d4645ae4c3b99892f1c59f60a77330be35567835", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.999076429407367, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7292993701157373, - "normalized_score": 72.92993701157374 - }, - "bbh": { - "name": "BBH", - "value": 0.4998793158888361, - "normalized_score": 28.604298572618475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35139583333333335, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36519281914893614, - "normalized_score": 29.465868794326234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-CPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.478832591107401 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2_bfloat16_5ed83728712693437bd547f4cd32923ac4e1172d_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", - "sha": "5ed83728712693437bd547f4cd32923ac4e1172d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.883954972962346, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7505817896514582, - "normalized_score": 75.05817896514581 - }, - "bbh": { - "name": "BBH", - "value": 0.5026669871217129, - "normalized_score": 29.086406714200834 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.36190625000000004, - "normalized_score": 2.8382812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37059507978723405, - "normalized_score": 30.06611997635934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5457710913976705 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-DPO_bfloat16_0afbf4c012ec7507f61c554999151b95a3651db3_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-DPO", - "sha": "0afbf4c012ec7507f61c554999151b95a3651db3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.498239725981037, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6757436934001781, - "normalized_score": 67.57436934001782 - }, - "bbh": { - "name": "BBH", - "value": 0.4991303079139502, - "normalized_score": 28.50739167799402 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.37381250000000005, - "normalized_score": 3.9265625000000015 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36652260638297873, - "normalized_score": 29.613622931442084 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-DPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1297075680762036 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2_bfloat16_d06275e02abbeaf29d911a3c0cf22922dcca6b0b_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", - "sha": "d06275e02abbeaf29d911a3c0cf22922dcca6b0b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.208963221170475, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7208063493752133, - "normalized_score": 72.08063493752132 - }, - "bbh": { - "name": "BBH", - "value": 0.505620320855615, - "normalized_score": 28.939587046939987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08987915407854985, - "normalized_score": 8.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3844479166666666, - "normalized_score": 5.555989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37691156914893614, - "normalized_score": 30.767952127659566 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2057596581828283 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-KTO_bfloat16_e697908201cbab01e0ca54088bb8cd2fd99b4574_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-KTO", - "sha": "e697908201cbab01e0ca54088bb8cd2fd99b4574", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.419046565350595, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6864098370102439, - "normalized_score": 68.6409837010244 - }, - "bbh": { - "name": "BBH", - "value": 0.4981903187457697, - "normalized_score": 28.64965788695442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.36984374999999997, - "normalized_score": 3.630468749999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35987367021276595, - "normalized_score": 28.874852245862886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-KTO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2050342644520593 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2_bfloat16_477d33ea62ed57a0429517170612aa1df21c78d6_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", - "sha": "477d33ea62ed57a0429517170612aa1df21c78d6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.659390077356022, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7290245437660962, - "normalized_score": 72.90245437660963 - }, - "bbh": { - "name": "BBH", - "value": 0.5079766897761946, - "normalized_score": 29.648405523209778 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.37775, - "normalized_score": 4.4520833333333325 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3667719414893617, - "normalized_score": 29.6413268321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2610727933649515 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO_bfloat16_4bb3ffcf9ede48cb01a10bf3223eb41b59aa3fef_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO", - "sha": "4bb3ffcf9ede48cb01a10bf3223eb41b59aa3fef", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.622591862806686, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.712813113649561, - "normalized_score": 71.2813113649561 - }, - "bbh": { - "name": "BBH", - "value": 0.5001206199104097, - "normalized_score": 28.839356158957287 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.35018750000000004, - "normalized_score": 3.240104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36461103723404253, - "normalized_score": 29.40122635933806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-ORPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2478082307124323 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2_bfloat16_3ea5c542a3d8d61f6afb6cdbef5972a501ddf759_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", - "sha": "3ea5c542a3d8d61f6afb6cdbef5972a501ddf759", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.9661449482921, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7633213207622442, - "normalized_score": 76.33213207622441 - }, - "bbh": { - "name": "BBH", - "value": 0.507835231782556, - "normalized_score": 29.60483732707141 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.37796874999999996, - "normalized_score": 4.8460937500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37308843085106386, - "normalized_score": 30.343158983451534 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1884649884315808 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO_bfloat16_9497ca226a68981f42df2e5b3a4a1a2ea702a942_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO", - "sha": "9497ca226a68981f42df2e5b3a4a1a2ea702a942", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.603754396673683, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6660017642078574, - "normalized_score": 66.60017642078574 - }, - "bbh": { - "name": "BBH", - "value": 0.5033626077797596, - "normalized_score": 29.032479102136296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.3752083333333333, - "normalized_score": 4.201041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36070478723404253, - "normalized_score": 28.967198581560282 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-RDPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1324999349471547 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2_bfloat16_4e5bc9779cba3a2f615379d3f8ef1bbb3ea487f7_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", - "sha": "4e5bc9779cba3a2f615379d3f8ef1bbb3ea487f7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.03222506807271, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7076922565459647, - "normalized_score": 70.76922565459647 - }, - "bbh": { - "name": "BBH", - "value": 0.5049218189829557, - "normalized_score": 28.85427665062166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.3804479166666666, - "normalized_score": 5.355989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37741023936170215, - "normalized_score": 30.82335992907802 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.1158963882127917 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF_bfloat16_73561d9b0fd42b94250246f8d794251fe9f9d2e9_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF", - "sha": "73561d9b0fd42b94250246f8d794251fe9f9d2e9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.084494106293988, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274509412802475, - "normalized_score": 72.74509412802476 - }, - "bbh": { - "name": "BBH", - "value": 0.49105468765647214, - "normalized_score": 27.21648494751436 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3475520833333334, - "normalized_score": 1.47734375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36436170212765956, - "normalized_score": 29.373522458628837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-RRHF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2784314455881307 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2_bfloat16_81191fbb214d17f0a4fec247da5d648f4cb61ef1_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", - "sha": "81191fbb214d17f0a4fec247da5d648f4cb61ef1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.753750599972534, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.712488419615509, - "normalized_score": 71.24884196155091 - }, - "bbh": { - "name": "BBH", - "value": 0.49838952572927536, - "normalized_score": 28.498723991187727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.37378125, - "normalized_score": 5.089322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3482380319148936, - "normalized_score": 27.582003546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.0117469934821683 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF_bfloat16_7e9001f6f4fe940c363bb7ea1814d33c79b21737_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", - "sha": "7e9001f6f4fe940c363bb7ea1814d33c79b21737", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.308144085338295, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7399655137258031, - "normalized_score": 73.99655137258031 - }, - "bbh": { - "name": "BBH", - "value": 0.5029422936734547, - "normalized_score": 29.211612180239623 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.3722916666666667, - "normalized_score": 5.369791666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35846077127659576, - "normalized_score": 28.717863475177303 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4503845366506314 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2_bfloat16_1821cc42189d8dab9e157c31b223dc60fc037c2d_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", - "sha": "1821cc42189d8dab9e157c31b223dc60fc037c2d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.728355019948566, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7109646848140712, - "normalized_score": 71.09646848140711 - }, - "bbh": { - "name": "BBH", - "value": 0.49838952572927536, - "normalized_score": 28.498723991187727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.37378125, - "normalized_score": 5.089322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3482380319148936, - "normalized_score": 27.582003546099287 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.0424785340309333 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO_bfloat16_f700cb6afb4509b10dea43ab72bb0e260e166be4_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO", - "sha": "f700cb6afb4509b10dea43ab72bb0e260e166be4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.664165370275043, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6503898544750152, - "normalized_score": 65.03898544750152 - }, - "bbh": { - "name": "BBH", - "value": 0.48446848524905367, - "normalized_score": 26.709132779658223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.39483333333333337, - "normalized_score": 8.154166666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3489029255319149, - "normalized_score": 27.655880614657207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-SimPO", - "hub_license": "", - "hub_hearts": 58, - "params_billions": 8.03, - "co2_cost": 1.0666912046425685 - } - }, - { - "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2_bfloat16_9ac0fbee445e7755e50520e9881d67588b4b854c_True", - "model": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", - "sha": "9ac0fbee445e7755e50520e9881d67588b4b854c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.751539678625036, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6808645505037745, - "normalized_score": 68.08645505037744 - }, - "bbh": { - "name": "BBH", - "value": 0.503833834044343, - "normalized_score": 29.214021710829382 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.3988020833333334, - "normalized_score": 7.8502604166666705 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36220079787234044, - "normalized_score": 29.1334219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-09-28", - "generation": 0, - "base_model": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.1599632156488955 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-CPO_bfloat16_7f67394668b94a9ddfb64daff8976b48b135d96c_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-CPO", - "sha": "7f67394668b94a9ddfb64daff8976b48b135d96c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.3989699937128, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46549267055856236, - "normalized_score": 46.549267055856234 - }, - "bbh": { - "name": "BBH", - "value": 0.43821512506663574, - "normalized_score": 21.857696499882195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4070833333333333, - "normalized_score": 9.252083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26512632978723405, - "normalized_score": 18.34736997635934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-CPO", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.619537565872139 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-DPO_bfloat16_17134fd80cfbf3980353967a30dc6f450f18f78f_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-DPO", - "sha": "17134fd80cfbf3980353967a30dc6f450f18f78f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.311853642274055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44033830237104216, - "normalized_score": 44.03383023710421 - }, - "bbh": { - "name": "BBH", - "value": 0.43501123979612694, - "normalized_score": 20.79098038827006 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.41222916666666665, - "normalized_score": 9.628645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26454454787234044, - "normalized_score": 18.28272754137116 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-DPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3352393409486394 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-IPO_bfloat16_eea781724e4d2ab8bdda7c13526f042de4cfae41_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-IPO", - "sha": "eea781724e4d2ab8bdda7c13526f042de4cfae41", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.273368181499578, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48295300912689443, - "normalized_score": 48.29530091268944 - }, - "bbh": { - "name": "BBH", - "value": 0.4458024605899282, - "normalized_score": 23.70349052130433 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.37762500000000004, - "normalized_score": 4.836458333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2791722074468085, - "normalized_score": 19.908023049645386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-IPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3346689175278992 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-KTO_bfloat16_02148bb9241b0f4bb0c75e93893eed005abe25e8_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-KTO", - "sha": "02148bb9241b0f4bb0c75e93893eed005abe25e8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.012992284438702, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.478481540091402, - "normalized_score": 47.848154009140195 - }, - "bbh": { - "name": "BBH", - "value": 0.44764334464528677, - "normalized_score": 23.10764227790982 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.43678124999999995, - "normalized_score": 13.030989583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28715093085106386, - "normalized_score": 20.794547872340427 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-KTO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.332033050953054 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO_bfloat16_2a63a6d9e1978c99444e440371268f7c2b7e0375_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", - "sha": "2a63a6d9e1978c99444e440371268f7c2b7e0375", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.4909336267166, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46064663980460735, - "normalized_score": 46.064663980460736 - }, - "bbh": { - "name": "BBH", - "value": 0.44395328626924213, - "normalized_score": 22.98200980704625 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.3579375, - "normalized_score": 4.275520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27767619680851063, - "normalized_score": 19.741799645390067 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3250104348805782 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF_bfloat16_0d5861072e9d01f420451bf6a5b108bc8d3a76bc_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", - "sha": "0d5861072e9d01f420451bf6a5b108bc8d3a76bc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.18202454301279, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44066299640509404, - "normalized_score": 44.0662996405094 - }, - "bbh": { - "name": "BBH", - "value": 0.42805937403716016, - "normalized_score": 19.59883081662414 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.4186770833333333, - "normalized_score": 10.034635416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23977726063829788, - "normalized_score": 15.530806737588652 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3380022734423447 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF_bfloat16_65d2cc49ad05258da3d982b39682c7f672f5e4ab_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", - "sha": "65d2cc49ad05258da3d982b39682c7f672f5e4ab", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.005885681302413, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5127284494031392, - "normalized_score": 51.27284494031392 - }, - "bbh": { - "name": "BBH", - "value": 0.44223991890402176, - "normalized_score": 22.304722895019296 - }, - "math": { - "name": "MATH Level 5", - "value": 0.035498489425981876, - "normalized_score": 3.5498489425981874 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.42608333333333337, - "normalized_score": 11.527083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2780917553191489, - "normalized_score": 19.78797281323877 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.336883911603808 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO_bfloat16_9d9e8b8de4f673d45bc826efc4a1444f9d480222_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", - "sha": "9d9e8b8de4f673d45bc826efc4a1444f9d480222", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.032014558172765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47006387496287627, - "normalized_score": 47.00638749628763 - }, - "bbh": { - "name": "BBH", - "value": 0.4398050727924064, - "normalized_score": 22.33288648076616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.39706250000000004, - "normalized_score": 8.0328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27019614361702127, - "normalized_score": 18.910682624113473 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.271412786584075 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-CPO_bfloat16_32492f8e5588f06005689ac944c2ea39c394c28e_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-CPO", - "sha": "32492f8e5588f06005689ac944c2ea39c394c28e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.540359200506423, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4203047912871182, - "normalized_score": 42.03047912871182 - }, - "bbh": { - "name": "BBH", - "value": 0.406922267565148, - "normalized_score": 17.248538100586853 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.41784375, - "normalized_score": 10.89713541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2701130319148936, - "normalized_score": 18.901447990543733 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-CPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.291844519171918 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-DPO_bfloat16_5e96cff70d8db87cf17c616429c17c8dc9352543_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-DPO", - "sha": "5e96cff70d8db87cf17c616429c17c8dc9352543", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.56219551319486, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.517624347841505, - "normalized_score": 51.7624347841505 - }, - "bbh": { - "name": "BBH", - "value": 0.4060358459697702, - "normalized_score": 16.875389341982814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3833333333333333, - "normalized_score": 5.750000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2748503989361702, - "normalized_score": 19.42782210401891 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-DPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2105331772003622 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-IPO_bfloat16_32ad99c6e7231bbe8ebd9d24b28e084c60848558_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-IPO", - "sha": "32ad99c6e7231bbe8ebd9d24b28e084c60848558", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.71968449150264, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4929198969844457, - "normalized_score": 49.29198969844457 - }, - "bbh": { - "name": "BBH", - "value": 0.4322183023180588, - "normalized_score": 20.094109548877523 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.43241666666666667, - "normalized_score": 12.785416666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2707779255319149, - "normalized_score": 18.975325059101653 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-IPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2514953431562663 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-KTO_bfloat16_834422e5b9b9eee6aac2f8d4822b925a6574d628_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-KTO", - "sha": "834422e5b9b9eee6aac2f8d4822b925a6574d628", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.702591779643708, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4907966417993147, - "normalized_score": 49.07966417993147 - }, - "bbh": { - "name": "BBH", - "value": 0.4139586477181159, - "normalized_score": 17.81264785919903 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.3952708333333333, - "normalized_score": 7.408854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28125, - "normalized_score": 20.138888888888886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-KTO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2067562596637862 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-ORPO_bfloat16_69c0481f4100629a49ae73f760ddbb61d8e98e48_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-ORPO", - "sha": "69c0481f4100629a49ae73f760ddbb61d8e98e48", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.088293109620597, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4719621714827768, - "normalized_score": 47.19621714827768 - }, - "bbh": { - "name": "BBH", - "value": 0.41040615756566107, - "normalized_score": 18.038372836612158 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3912395833333333, - "normalized_score": 6.638281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2662067819148936, - "normalized_score": 18.467420212765955 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-ORPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2485932476809387 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-RDPO_bfloat16_23ec6ab4f996134eb15c19322dabb34d7332d7cd_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-RDPO", - "sha": "23ec6ab4f996134eb15c19322dabb34d7332d7cd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.433078686509017, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4887232542985944, - "normalized_score": 48.872325429859444 - }, - "bbh": { - "name": "BBH", - "value": 0.40501479745073615, - "normalized_score": 17.04838760964466 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.3873333333333333, - "normalized_score": 6.416666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27767619680851063, - "normalized_score": 19.741799645390067 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-RDPO", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2212311182397022 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-RRHF_bfloat16_493d3ceb571232fe3b2f55c0bf78692760f4fc7e_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-RRHF", - "sha": "493d3ceb571232fe3b2f55c0bf78692760f4fc7e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.892023987011328, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49601723427173233, - "normalized_score": 49.60172342717323 - }, - "bbh": { - "name": "BBH", - "value": 0.41897663476657404, - "normalized_score": 19.20655206374787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.397875, - "normalized_score": 7.934375000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.26512632978723405, - "normalized_score": 18.34736997635934 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-07", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-RRHF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1755025923318254 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF_bfloat16_3d08c8b7c3e73beb2a3264848f17246b74c3d162_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", - "sha": "3d08c8b7c3e73beb2a3264848f17246b74c3d162", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.389143637193502, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5115294086357531, - "normalized_score": 51.15294086357531 - }, - "bbh": { - "name": "BBH", - "value": 0.4040013641288438, - "normalized_score": 16.653429432655862 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.39130208333333333, - "normalized_score": 6.712760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27152593085106386, - "normalized_score": 19.058436761229316 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-06", - "submission_date": "2024-10-16", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2449057092755338 - } - }, - { - "id": "princeton-nlp/Mistral-7B-Instruct-SimPO_bfloat16_03191ee1e60d21a698d11a515703a037073724f8_True", - "model": { - "name": "princeton-nlp/Mistral-7B-Instruct-SimPO", - "sha": "03191ee1e60d21a698d11a515703a037073724f8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.607315799541798, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4686897432146704, - "normalized_score": 46.868974321467036 - }, - "bbh": { - "name": "BBH", - "value": 0.4507226157033399, - "normalized_score": 22.38227741589404 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.40978125, - "normalized_score": 9.755989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2796708776595745, - "normalized_score": 19.963430851063833 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-24", - "submission_date": "2024-09-21", - "generation": 0, - "base_model": "princeton-nlp/Mistral-7B-Instruct-SimPO", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.141124919299414 - } - }, - { - "id": "princeton-nlp/Sheared-LLaMA-1.3B_bfloat16_a4b76938edbf571ea7d7d9904861cbdca08809b4_False", - "model": { - "name": "princeton-nlp/Sheared-LLaMA-1.3B", - "sha": "a4b76938edbf571ea7d7d9904861cbdca08809b4", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.580925572139816, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2197702097102355, - "normalized_score": 21.97702097102355 - }, - "bbh": { - "name": "BBH", - "value": 0.31970467392464424, - "normalized_score": 4.744629874421679 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3713020833333333, - "normalized_score": 3.5794270833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11710438829787234, - "normalized_score": 1.9004875886524817 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-10", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "princeton-nlp/Sheared-LLaMA-1.3B", - "hub_license": "apache-2.0", - "hub_hearts": 94, - "params_billions": 1.3, - "co2_cost": 0.7092001641474984 - } - }, - { - "id": "princeton-nlp/Sheared-LLaMA-2.7B_bfloat16_2f157a0306b75d37694ae05f6a4067220254d540_False", - "model": { - "name": "princeton-nlp/Sheared-LLaMA-2.7B", - "sha": "2f157a0306b75d37694ae05f6a4067220254d540", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.437920061018112, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24165214962964932, - "normalized_score": 24.16521496296493 - }, - "bbh": { - "name": "BBH", - "value": 0.32586855691245953, - "normalized_score": 5.655521329938437 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3567291666666667, - "normalized_score": 2.091145833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11868351063829788, - "normalized_score": 2.0759456264775418 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-10", - "submission_date": "2024-07-29", - "generation": 0, - "base_model": "princeton-nlp/Sheared-LLaMA-2.7B", - "hub_license": "apache-2.0", - "hub_hearts": 60, - "params_billions": 2.7, - "co2_cost": 0.9400995278216221 - } - }, - { - "id": "princeton-nlp/gemma-2-9b-it-DPO_bfloat16_f646c99fc3aa7afc7b22c3c7115fd03a40fc1d22_True", - "model": { - "name": "princeton-nlp/gemma-2-9b-it-DPO", - "sha": "f646c99fc3aa7afc7b22c3c7115fd03a40fc1d22", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.818727391139728, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27687203287277756, - "normalized_score": 27.687203287277757 - }, - "bbh": { - "name": "BBH", - "value": 0.5941444682956648, - "normalized_score": 41.59365445538448 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.38203125, - "normalized_score": 5.653906250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3723404255319149, - "normalized_score": 30.26004728132387 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-09-19", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "", - "hub_hearts": 9, - "params_billions": 9.242, - "co2_cost": 5.78125394205658 - } - }, - { - "id": "princeton-nlp/gemma-2-9b-it-SimPO_bfloat16_8c87091f412e3aa6f74f66bd86c57fb81cbc3fde_True", - "model": { - "name": "princeton-nlp/gemma-2-9b-it-SimPO", - "sha": "8c87091f412e3aa6f74f66bd86c57fb81cbc3fde", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.3449346084354, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3206857803960159, - "normalized_score": 32.06857803960159 - }, - "bbh": { - "name": "BBH", - "value": 0.5839179923162123, - "normalized_score": 40.093429916371655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.41232291666666665, - "normalized_score": 10.340364583333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39752327127659576, - "normalized_score": 33.05814125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-16", - "submission_date": "2024-08-10", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "mit", - "hub_hearts": 159, - "params_billions": 9.242, - "co2_cost": 5.5380074485080675 - } - }, - { - "id": "prithivMLmods/Bellatrix-1.5B-xElite_bfloat16_4ec39cef1bf7701abb30dda694b4918c517d1c0d_False", - "model": { - "name": "prithivMLmods/Bellatrix-1.5B-xElite", - "sha": "4ec39cef1bf7701abb30dda694b4918c517d1c0d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.228869542511774, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1964144026737944, - "normalized_score": 19.64144026737944 - }, - "bbh": { - "name": "BBH", - "value": 0.35011984799236834, - "normalized_score": 9.486709175278515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.28700906344410876, - "normalized_score": 28.700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.36190625000000004, - "normalized_score": 4.438281250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1657247340425532, - "normalized_score": 7.302748226950355 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "prithivMLmods/Bellatrix-1.5B-xElite (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 1.777, - "co2_cost": 1.1993278028558003 - } - }, - { - "id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1_bfloat16_db777568b86dc8aebb654b9167497912e004843e_False", - "model": { - "name": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", - "sha": "db777568b86dc8aebb654b9167497912e004843e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.322564666933152, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33522498082864577, - "normalized_score": 33.52249808286457 - }, - "bbh": { - "name": "BBH", - "value": 0.40221745714531076, - "normalized_score": 15.857580425391271 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.3682916666666667, - "normalized_score": 4.569791666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27509973404255317, - "normalized_score": 19.455526004728128 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "prithivMLmods/Bellatrix-Tiny-1.5B-R1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 1.544, - "co2_cost": 1.1689612409686612 - } - }, - { - "id": "prithivMLmods/Bellatrix-Tiny-1B-v2_bfloat16_d82282c0853688ed16e3b9e121a09d063c566cc5_False", - "model": { - "name": "prithivMLmods/Bellatrix-Tiny-1B-v2", - "sha": "d82282c0853688ed16e3b9e121a09d063c566cc5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.033864136016213, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15095169705270903, - "normalized_score": 15.095169705270905 - }, - "bbh": { - "name": "BBH", - "value": 0.3267684418723903, - "normalized_score": 6.032561968055522 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.34302083333333333, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14926861702127658, - "normalized_score": 5.474290780141842 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "prithivMLmods/Bellatrix-Tiny-1B-v2 (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 2, - "params_billions": 1.236, - "co2_cost": 0.7737320997953054 - } - }, - { - "id": "prithivMLmods/Blaze-14B-xElite_bfloat16_1795ffecee7322e697edfd0f900c7155ae2878b9_False", - "model": { - "name": "prithivMLmods/Blaze-14B-xElite", - "sha": "1795ffecee7322e697edfd0f900c7155ae2878b9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 29.122992008208428, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.03632029681245762, - "normalized_score": 3.6320296812457613 - }, - "bbh": { - "name": "BBH", - "value": 0.6627817236091689, - "normalized_score": 51.57326409688974 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3693353474320242, - "normalized_score": 36.933534743202415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.46248958333333334, - "normalized_score": 17.67786458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5111369680851063, - "normalized_score": 45.6818853427896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 0, - "base_model": "prithivMLmods/Blaze-14B-xElite", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 14.66, - "co2_cost": 1.8817105453993044 - } - }, - { - "id": "prithivMLmods/COCO-7B-Instruct-1M_float16_a8ccc848bd1db0f05172a4e1c2197a0d3b4f25c5_False", - "model": { - "name": "prithivMLmods/COCO-7B-Instruct-1M", - "sha": "a8ccc848bd1db0f05172a4e1c2197a0d3b4f25c5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.952308445124856, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4743103853331383, - "normalized_score": 47.431038533313824 - }, - "bbh": { - "name": "BBH", - "value": 0.5409956853800891, - "normalized_score": 34.67788338275968 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3496978851963746, - "normalized_score": 34.96978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4382395833333333, - "normalized_score": 13.513281250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41863364361702127, - "normalized_score": 35.403738179669034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "prithivMLmods/COCO-7B-Instruct-1M (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.3381043677295874 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite_bfloat16_a8661f82079677c777595e4259dbaf5a72c8f134_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite", - "sha": "a8661f82079677c777595e4259dbaf5a72c8f134", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.07735278337776, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6051521075191603, - "normalized_score": 60.51521075191603 - }, - "bbh": { - "name": "BBH", - "value": 0.6317361472468987, - "normalized_score": 46.93415830006768 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4788519637462236, - "normalized_score": 47.88519637462236 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.4859583333333333, - "normalized_score": 20.778124999999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5301695478723404, - "normalized_score": 47.796616430260045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 4.024797067435322 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite_float16_a8661f82079677c777595e4259dbaf5a72c8f134_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite", - "sha": "a8661f82079677c777595e4259dbaf5a72c8f134", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.249365220151496, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6063511482865463, - "normalized_score": 60.635114828654636 - }, - "bbh": { - "name": "BBH", - "value": 0.6295900497885079, - "normalized_score": 46.53280917875986 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37084592145015105, - "normalized_score": 37.08459214501511 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.48732291666666666, - "normalized_score": 20.948697916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5306682180851063, - "normalized_score": 47.85202423167848 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 2.022332794511404 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite-1M_bfloat16_07f093df0a87d5d13e4325aa54eb62de9322721c_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite-1M", - "sha": "07f093df0a87d5d13e4325aa54eb62de9322721c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.61517905195512, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5612884923115112, - "normalized_score": 56.12884923115112 - }, - "bbh": { - "name": "BBH", - "value": 0.6329399079569701, - "normalized_score": 46.93552255298414 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44561933534743203, - "normalized_score": 44.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.46760416666666665, - "normalized_score": 18.28385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5152094414893617, - "normalized_score": 46.13438238770685 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite-1M (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 3.893608616767551 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite-Stock_bfloat16_e3b7fa2d20fa3e7a92bb7a99ad05219c9a86a95d_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", - "sha": "e3b7fa2d20fa3e7a92bb7a99ad05219c9a86a95d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.739834208148515, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.614294516327788, - "normalized_score": 61.4294516327788 - }, - "bbh": { - "name": "BBH", - "value": 0.6328767168557433, - "normalized_score": 46.89789858349237 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46676737160120846, - "normalized_score": 46.676737160120844 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.48075, - "normalized_score": 20.060416666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5284242021276596, - "normalized_score": 47.60268912529552 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite-Stock (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.9744594479509305 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite2_bfloat16_0d948a368ff62658c06f90219849d8a6be29b78e_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite2", - "sha": "0d948a368ff62658c06f90219849d8a6be29b78e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.249808935674835, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6176168122803052, - "normalized_score": 61.761681228030525 - }, - "bbh": { - "name": "BBH", - "value": 0.6318256156619112, - "normalized_score": 46.806149932752305 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4690332326283988, - "normalized_score": 46.903323262839876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.49395833333333333, - "normalized_score": 22.24479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5300864361702128, - "normalized_score": 47.78738179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 4.025445929856857 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1_bfloat16_8d57bcd85bdfe2cb41f0e84ceb7beabcdc1e63fb_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", - "sha": "8d57bcd85bdfe2cb41f0e84ceb7beabcdc1e63fb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.56373181629569, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6325793339450436, - "normalized_score": 63.25793339450436 - }, - "bbh": { - "name": "BBH", - "value": 0.6362357624539174, - "normalized_score": 47.33709556238046 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3338368580060423, - "normalized_score": 33.383685800604226 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.48998958333333337, - "normalized_score": 21.41536458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5247672872340425, - "normalized_score": 47.196365248226954 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite2-R1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 14.766, - "co2_cost": 3.6780303849911 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite3_bfloat16_6be2c8ea522ff941fa1ed5bec18949ac4c3b5651_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite3", - "sha": "6be2c8ea522ff941fa1ed5bec18949ac4c3b5651", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.80335311719453, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5428285837134359, - "normalized_score": 54.28285837134359 - }, - "bbh": { - "name": "BBH", - "value": 0.6350402275340573, - "normalized_score": 47.07459951045248 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4705438066465257, - "normalized_score": 47.05438066465257 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37080536912751677, - "normalized_score": 16.10738255033557 - }, - "musr": { - "name": "MUSR", - "value": 0.4794791666666667, - "normalized_score": 20.13489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5334940159574468, - "normalized_score": 48.16600177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 4.024630380631692 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Elite4_bfloat16_59525af6aae57e700ff9cd6ce9c6b3257f422f4c_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite4", - "sha": "59525af6aae57e700ff9cd6ce9c6b3257f422f4c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.74386944728247, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6111971790405014, - "normalized_score": 61.11971790405015 - }, - "bbh": { - "name": "BBH", - "value": 0.6195264951573699, - "normalized_score": 45.20847500962782 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36253776435045315, - "normalized_score": 36.25377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35570469798657717, - "normalized_score": 14.093959731543624 - }, - "musr": { - "name": "MUSR", - "value": 0.46871875, - "normalized_score": 17.68984375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.514876994680851, - "normalized_score": 46.0974438534279 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Elite4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.9166011940560086 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-14B-Merge_bfloat16_ceb41ff76990a24d2f4ff29f1c342fcd7322948a_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-14B-Merge", - "sha": "ceb41ff76990a24d2f4ff29f1c342fcd7322948a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.01116092312885, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4949434168007554, - "normalized_score": 49.494341680075536 - }, - "bbh": { - "name": "BBH", - "value": 0.6319290054891645, - "normalized_score": 46.76666800624009 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4637462235649547, - "normalized_score": 46.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37080536912751677, - "normalized_score": 16.10738255033557 - }, - "musr": { - "name": "MUSR", - "value": 0.48608333333333337, - "normalized_score": 20.92708333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5355718085106383, - "normalized_score": 48.39686761229315 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-14B-Merge (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 4.138515371119 - } - }, - { - "id": "prithivMLmods/Calcium-Opus-20B-v1_bfloat16_28395429552eb6f22cd3dc8b54cd03e47c6132c9_False", - "model": { - "name": "prithivMLmods/Calcium-Opus-20B-v1", - "sha": "28395429552eb6f22cd3dc8b54cd03e47c6132c9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.041733901500965, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3092716215197897, - "normalized_score": 30.927162151978973 - }, - "bbh": { - "name": "BBH", - "value": 0.599033246250772, - "normalized_score": 41.805575892348564 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36178247734138974, - "normalized_score": 36.17824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.49433333333333335, - "normalized_score": 22.091666666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4734042553191489, - "normalized_score": 41.48936170212765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "prithivMLmods/Calcium-Opus-20B-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 19.173, - "co2_cost": 5.472539882729625 - } - }, - { - "id": "prithivMLmods/Codepy-Deepthink-3B_float16_73551f0560645b098ff8293e70ff633bfc72c125_False", - "model": { - "name": "prithivMLmods/Codepy-Deepthink-3B", - "sha": "73551f0560645b098ff8293e70ff633bfc72c125", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.43076520482992, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43271962836385236, - "normalized_score": 43.27196283638524 - }, - "bbh": { - "name": "BBH", - "value": 0.4259451388094382, - "normalized_score": 18.640887671757213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.3310208333333333, - "normalized_score": 3.977604166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3090093085106383, - "normalized_score": 23.223256501182032 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Codepy-Deepthink-3B (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 3, - "params_billions": 3.213, - "co2_cost": 1.211006964059197 - } - }, - { - "id": "prithivMLmods/Coma-II-14B_bfloat16_8ff81f7007503d74d8a4d7d076c1aaf70a9e8487_False", - "model": { - "name": "prithivMLmods/Coma-II-14B", - "sha": "8ff81f7007503d74d8a4d7d076c1aaf70a9e8487", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.45146855133309, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.416832892281369, - "normalized_score": 41.6832892281369 - }, - "bbh": { - "name": "BBH", - "value": 0.6320713788922736, - "normalized_score": 46.89147136445168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5513595166163142, - "normalized_score": 55.135951661631424 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4001677852348993, - "normalized_score": 20.022371364653242 - }, - "musr": { - "name": "MUSR", - "value": 0.5351041666666667, - "normalized_score": 28.08802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5039893617021277, - "normalized_score": 44.88770685579197 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "prithivMLmods/Coma-II-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 3.7005834476669515 - } - }, - { - "id": "prithivMLmods/Condor-Opus-14B-Exp_bfloat16_1da0a33b3d6937f6e494c2d856bd85f4ba19c12b_False", - "model": { - "name": "prithivMLmods/Condor-Opus-14B-Exp", - "sha": "1da0a33b3d6937f6e494c2d856bd85f4ba19c12b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.617199777677776, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40431831983581346, - "normalized_score": 40.43183198358135 - }, - "bbh": { - "name": "BBH", - "value": 0.6154220154262888, - "normalized_score": 44.077092224840555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5226586102719033, - "normalized_score": 52.26586102719033 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.5193854166666667, - "normalized_score": 25.423177083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5014128989361702, - "normalized_score": 44.60143321513003 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "prithivMLmods/Condor-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 1.7127687329836954 - } - }, - { - "id": "prithivMLmods/Cygnus-II-14B_bfloat16_abf660630df7ef04e9e8b4ff74260752bf9501f5_False", - "model": { - "name": "prithivMLmods/Cygnus-II-14B", - "sha": "abf660630df7ef04e9e8b4ff74260752bf9501f5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.52948756458168, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6184412913292286, - "normalized_score": 61.84412913292286 - }, - "bbh": { - "name": "BBH", - "value": 0.6660565208074918, - "normalized_score": 52.14038233919259 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4395770392749245, - "normalized_score": 43.957703927492446 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.46884375, - "normalized_score": 18.105468749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5390625, - "normalized_score": 48.78472222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "prithivMLmods/Cygnus-II-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 2.001975881015083 - } - }, - { - "id": "prithivMLmods/Deepthink-Llama-3-8B-Preview_float16_9037f9ba590696402412233fabdb0a1d7eb7a714_False", - "model": { - "name": "prithivMLmods/Deepthink-Llama-3-8B-Preview", - "sha": "9037f9ba590696402412233fabdb0a1d7eb7a714", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.957476415581784, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29553252037926037, - "normalized_score": 29.553252037926036 - }, - "bbh": { - "name": "BBH", - "value": 0.4664510845126107, - "normalized_score": 24.800879823702775 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3549848942598187, - "normalized_score": 35.49848942598187 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.37070833333333336, - "normalized_score": 7.738541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2738530585106383, - "normalized_score": 19.317006501182032 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "prithivMLmods/Deepthink-Llama-3-8B-Preview (Merge)", - "hub_license": "llama3", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 0.7295998014032185 - } - }, - { - "id": "prithivMLmods/Deepthink-Reasoning-14B_bfloat16_08fd00d4ac2bf07766c8bab7e73d17028487d23a_False", - "model": { - "name": "prithivMLmods/Deepthink-Reasoning-14B", - "sha": "08fd00d4ac2bf07766c8bab7e73d17028487d23a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.765949130344175, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5423542866261519, - "normalized_score": 54.23542866261519 - }, - "bbh": { - "name": "BBH", - "value": 0.6334054936091441, - "normalized_score": 47.30625659928727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4229607250755287, - "normalized_score": 42.296072507552864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36661073825503354, - "normalized_score": 15.548098434004473 - }, - "musr": { - "name": "MUSR", - "value": 0.47315625, - "normalized_score": 19.477864583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5295877659574468, - "normalized_score": 47.73197399527187 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "prithivMLmods/Deepthink-Reasoning-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 3.900782398935849 - } - }, - { - "id": "prithivMLmods/Deepthink-Reasoning-7B_float16_0ccaa3825ded55cf8cfa18f7db53d91848e3733b_False", - "model": { - "name": "prithivMLmods/Deepthink-Reasoning-7B", - "sha": "0ccaa3825ded55cf8cfa18f7db53d91848e3733b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.122241455484666, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48400244684104843, - "normalized_score": 48.40024468410485 - }, - "bbh": { - "name": "BBH", - "value": 0.5505070216145282, - "normalized_score": 35.623731448580884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33459214501510576, - "normalized_score": 33.45921450151057 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4432291666666666, - "normalized_score": 13.436979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43492353723404253, - "normalized_score": 37.21372635933806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "prithivMLmods/Deepthink-Reasoning-7B (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 10, - "params_billions": 7.616, - "co2_cost": 1.253995016550189 - } - }, - { - "id": "prithivMLmods/Dinobot-Opus-14B-Exp_bfloat16_f51cbd56106c3caa88882ab45252a2edec321d40_True", - "model": { - "name": "prithivMLmods/Dinobot-Opus-14B-Exp", - "sha": "f51cbd56106c3caa88882ab45252a2edec321d40", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.765081012881176, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8239958864701216, - "normalized_score": 82.39958864701215 - }, - "bbh": { - "name": "BBH", - "value": 0.6370093752306357, - "normalized_score": 48.19594986631396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5317220543806647, - "normalized_score": 53.17220543806647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 12.65390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4979222074468085, - "normalized_score": 44.213578605200944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-15", - "generation": 1, - "base_model": "prithivMLmods/Dinobot-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.77, - "co2_cost": 1.7046318064582169 - } - }, - { - "id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated_float16_253ce07bed5dcd928325172cd5c0cb4f7e98e8e6_True", - "model": { - "name": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", - "sha": "253ce07bed5dcd928325172cd5c0cb4f7e98e8e6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.39921663367112, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35423454212600347, - "normalized_score": 35.42345421260035 - }, - "bbh": { - "name": "BBH", - "value": 0.38277850218543213, - "normalized_score": 13.606416556106325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3066465256797583, - "normalized_score": 30.664652567975832 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.36596875, - "normalized_score": 3.0460937500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2757646276595745, - "normalized_score": 19.52940307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 0.7736586005657055 - } - }, - { - "id": "prithivMLmods/Elita-1_bfloat16_a304ec55887200703ebb1d0188e7b0fb0b8173de_False", - "model": { - "name": "prithivMLmods/Elita-1", - "sha": "a304ec55887200703ebb1d0188e7b0fb0b8173de", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.545369563806254, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4906470387460826, - "normalized_score": 49.06470387460827 - }, - "bbh": { - "name": "BBH", - "value": 0.6520409113818334, - "normalized_score": 49.928735220527614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3429003021148036, - "normalized_score": 34.29003021148036 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37583892617449666, - "normalized_score": 16.778523489932887 - }, - "musr": { - "name": "MUSR", - "value": 0.48341666666666666, - "normalized_score": 20.527083333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5381482712765957, - "normalized_score": 48.68314125295508 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "prithivMLmods/Elita-1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.843754231777964 - } - }, - { - "id": "prithivMLmods/Epimetheus-14B-Axo_bfloat16_206a58dd4edc1c57011840380014f723100b9620_False", - "model": { - "name": "prithivMLmods/Epimetheus-14B-Axo", - "sha": "206a58dd4edc1c57011840380014f723100b9620", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.08056006814201, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.554643900406477, - "normalized_score": 55.4643900406477 - }, - "bbh": { - "name": "BBH", - "value": 0.6613340892011862, - "normalized_score": 51.455447156499524 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41012084592145015, - "normalized_score": 41.012084592145015 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3926174496644295, - "normalized_score": 19.01565995525727 - }, - "musr": { - "name": "MUSR", - "value": 0.4819583333333333, - "normalized_score": 19.711458333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5304188829787234, - "normalized_score": 47.82432033096927 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "prithivMLmods/Epimetheus-14B-Axo (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.9946034537563262 - } - }, - { - "id": "prithivMLmods/Equuleus-Opus-14B-Exp_bfloat16_5c2cacb51ef84468d5fc1de3d786f79f592d0b7c_False", - "model": { - "name": "prithivMLmods/Equuleus-Opus-14B-Exp", - "sha": "5c2cacb51ef84468d5fc1de3d786f79f592d0b7c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.199750941616415, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7000735825387749, - "normalized_score": 70.0073582538775 - }, - "bbh": { - "name": "BBH", - "value": 0.6433769213927613, - "normalized_score": 48.61670086911542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45845921450151056, - "normalized_score": 45.84592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.4951666666666667, - "normalized_score": 21.89583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5374002659574468, - "normalized_score": 48.60002955082743 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "prithivMLmods/Equuleus-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.902014927917551 - } - }, - { - "id": "prithivMLmods/Eridanus-Opus-14B-r999_bfloat16_ad0375d28983a476829bc07f61e70dbebbfe6263_False", - "model": { - "name": "prithivMLmods/Eridanus-Opus-14B-r999", - "sha": "ad0375d28983a476829bc07f61e70dbebbfe6263", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.111313344425874, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.638574537781974, - "normalized_score": 63.85745377819741 - }, - "bbh": { - "name": "BBH", - "value": 0.6583918169279829, - "normalized_score": 51.03833480947816 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3859516616314199, - "normalized_score": 38.59516616314199 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.476875, - "normalized_score": 19.476041666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5361535904255319, - "normalized_score": 48.46151004728132 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "prithivMLmods/Eridanus-Opus-14B-r999 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.77, - "co2_cost": 1.9579416118707536 - } - }, - { - "id": "prithivMLmods/Evac-Opus-14B-Exp_bfloat16_8e7f3150f510d948d99f28aa5c5f98fd41e1777f_False", - "model": { - "name": "prithivMLmods/Evac-Opus-14B-Exp", - "sha": "8e7f3150f510d948d99f28aa5c5f98fd41e1777f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.323054993397584, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5916135852870383, - "normalized_score": 59.16135852870384 - }, - "bbh": { - "name": "BBH", - "value": 0.6475440673701862, - "normalized_score": 49.581751069166614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4214501510574018, - "normalized_score": 42.14501510574018 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.47278125, - "normalized_score": 18.630989583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5316655585106383, - "normalized_score": 47.96283983451538 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-16", - "generation": 1, - "base_model": "prithivMLmods/Evac-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 1.944293501458255 - } - }, - { - "id": "prithivMLmods/FastThink-0.5B-Tiny_float16_c07fd949ceba096d7c2e405bcfce99e269f7ca39_False", - "model": { - "name": "prithivMLmods/FastThink-0.5B-Tiny", - "sha": "c07fd949ceba096d7c2e405bcfce99e269f7ca39", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.516955448134702, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25798880304259364, - "normalized_score": 25.798880304259363 - }, - "bbh": { - "name": "BBH", - "value": 0.3205583807088257, - "normalized_score": 5.01960978303041 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3566354166666667, - "normalized_score": 3.579427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16489361702127658, - "normalized_score": 7.210401891252953 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/FastThink-0.5B-Tiny (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 0.494, - "co2_cost": 1.0750746175098636 - } - }, - { - "id": "prithivMLmods/GWQ-9B-Preview_float16_5a0e00ac0ff885f54ef32e607508895bae864006_False", - "model": { - "name": "prithivMLmods/GWQ-9B-Preview", - "sha": "5a0e00ac0ff885f54ef32e607508895bae864006", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.15453648521797, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5065836425129767, - "normalized_score": 50.65836425129767 - }, - "bbh": { - "name": "BBH", - "value": 0.5805745804247511, - "normalized_score": 40.6697225433022 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22658610271903323, - "normalized_score": 22.658610271903324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.4951041666666667, - "normalized_score": 21.821354166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39835438829787234, - "normalized_score": 33.150487588652474 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-08", - "generation": 0, - "base_model": "prithivMLmods/GWQ-9B-Preview", - "hub_license": "gemma", - "hub_hearts": 3, - "params_billions": 9.242, - "co2_cost": 4.922323401670357 - } - }, - { - "id": "prithivMLmods/GWQ-9B-Preview2_float16_42f5d4f7d19eb59c9408ff70cdbc30459ec1ad3d_False", - "model": { - "name": "prithivMLmods/GWQ-9B-Preview2", - "sha": "42f5d4f7d19eb59c9408ff70cdbc30459ec1ad3d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.047187909173402, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5208967761096114, - "normalized_score": 52.08967761096115 - }, - "bbh": { - "name": "BBH", - "value": 0.5797218710843371, - "normalized_score": 40.184860533159515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23716012084592145, - "normalized_score": 23.716012084592144 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.48598958333333336, - "normalized_score": 20.81536458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3996841755319149, - "normalized_score": 33.298241725768314 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-08", - "generation": 1, - "base_model": "prithivMLmods/GWQ-9B-Preview2 (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 10, - "params_billions": 9.242, - "co2_cost": 4.905647704544523 - } - }, - { - "id": "prithivMLmods/GWQ2b_float16_1d2a808ec30008a2cba697b1bb742ab67efb71f0_False", - "model": { - "name": "prithivMLmods/GWQ2b", - "sha": "1d2a808ec30008a2cba697b1bb742ab67efb71f0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.42971150895339, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41148707651254224, - "normalized_score": 41.14870765125423 - }, - "bbh": { - "name": "BBH", - "value": 0.41433702954085216, - "normalized_score": 17.6803497776584 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.43111458333333336, - "normalized_score": 12.755989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24725731382978725, - "normalized_score": 16.36192375886525 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/GWQ2b (Merge)", - "hub_license": "gemma", - "hub_hearts": 4, - "params_billions": 2.614, - "co2_cost": 2.408578642384382 - } - }, - { - "id": "prithivMLmods/Gaea-Opus-14B-Exp_bfloat16_3a4b38d8906d4eeafcc31601b08994e73eb75408_False", - "model": { - "name": "prithivMLmods/Gaea-Opus-14B-Exp", - "sha": "3a4b38d8906d4eeafcc31601b08994e73eb75408", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.11380825495326, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5956351369920699, - "normalized_score": 59.56351369920699 - }, - "bbh": { - "name": "BBH", - "value": 0.6560465337491567, - "normalized_score": 50.51229448373768 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42749244712990936, - "normalized_score": 42.74924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.48589583333333336, - "normalized_score": 20.1703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5400598404255319, - "normalized_score": 48.8955378250591 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "prithivMLmods/Gaea-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 2.0040682772167764 - } - }, - { - "id": "prithivMLmods/Galactic-Qwen-14B-Exp1_bfloat16_9e988a4a9bb65a420c511b23dbe0f09685c18e1f_False", - "model": { - "name": "prithivMLmods/Galactic-Qwen-14B-Exp1", - "sha": "9e988a4a9bb65a420c511b23dbe0f09685c18e1f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.469504673008466, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5832202999153357, - "normalized_score": 58.322029991533576 - }, - "bbh": { - "name": "BBH", - "value": 0.6582262489447345, - "normalized_score": 50.98957194551118 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40181268882175225, - "normalized_score": 40.181268882175225 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.4780520833333333, - "normalized_score": 19.356510416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.539561170212766, - "normalized_score": 48.84013002364066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "prithivMLmods/Galactic-Qwen-14B-Exp1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 2.1030774034920614 - } - }, - { - "id": "prithivMLmods/Galactic-Qwen-14B-Exp2_bfloat16_2afd2e6e33627c1241ae87f6710bfc0880285c7e_False", - "model": { - "name": "prithivMLmods/Galactic-Qwen-14B-Exp2", - "sha": "2afd2e6e33627c1241ae87f6710bfc0880285c7e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.56371836153858, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6620300801872365, - "normalized_score": 66.20300801872366 - }, - "bbh": { - "name": "BBH", - "value": 0.7203002699449659, - "normalized_score": 59.91731650130399 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3474320241691843, - "normalized_score": 34.74320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39932885906040266, - "normalized_score": 19.910514541387023 - }, - "musr": { - "name": "MUSR", - "value": 0.5353854166666667, - "normalized_score": 28.489843749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5690658244680851, - "normalized_score": 52.11842494089834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "prithivMLmods/Galactic-Qwen-14B-Exp2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.180329277429034 - } - }, - { - "id": "prithivMLmods/Gauss-Opus-14B-R999_bfloat16_12db6077af849038d206775339a1dc78df9a14cf_False", - "model": { - "name": "prithivMLmods/Gauss-Opus-14B-R999", - "sha": "12db6077af849038d206775339a1dc78df9a14cf", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.802494751158555, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39065457430728245, - "normalized_score": 39.065457430728245 - }, - "bbh": { - "name": "BBH", - "value": 0.6227831608555382, - "normalized_score": 44.93611504035883 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5755287009063444, - "normalized_score": 57.55287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.5338333333333334, - "normalized_score": 27.829166666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.500748005319149, - "normalized_score": 44.52755614657211 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "prithivMLmods/Gauss-Opus-14B-R999 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 1.8817541920229752 - } - }, - { - "id": "prithivMLmods/Jolt-v0.1_bfloat16_077b9bf6b346af0a865d4d9b8618c8349a03b9c6_False", - "model": { - "name": "prithivMLmods/Jolt-v0.1", - "sha": "077b9bf6b346af0a865d4d9b8618c8349a03b9c6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.194359554127736, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5092066827129793, - "normalized_score": 50.92066827129793 - }, - "bbh": { - "name": "BBH", - "value": 0.6521408461659391, - "normalized_score": 50.0297418150364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.48471875000000003, - "normalized_score": 20.489843749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5386469414893617, - "normalized_score": 48.73854905437352 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "prithivMLmods/Jolt-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.9516953293416273 - } - }, - { - "id": "prithivMLmods/Lacerta-Opus-14B-Elite8_bfloat16_944dbb7ef1ed67b64bba1078531eb2611268d3f9_False", - "model": { - "name": "prithivMLmods/Lacerta-Opus-14B-Elite8", - "sha": "944dbb7ef1ed67b64bba1078531eb2611268d3f9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.069826308128185, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.614144913274556, - "normalized_score": 61.41449132745559 - }, - "bbh": { - "name": "BBH", - "value": 0.6401384743047456, - "normalized_score": 48.182387387883786 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3648036253776435, - "normalized_score": 36.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.4635416666666667, - "normalized_score": 17.209375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5321642287234043, - "normalized_score": 48.01824763593381 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "prithivMLmods/Lacerta-Opus-14B-Elite8 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 2.021558179765879 - } - }, - { - "id": "prithivMLmods/Llama-3.1-5B-Instruct_float16_310ab744cd88aecedc534abd373d2f66a0c82f19_False", - "model": { - "name": "prithivMLmods/Llama-3.1-5B-Instruct", - "sha": "310ab744cd88aecedc534abd373d2f66a0c82f19", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.207173577211278, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14066011516110588, - "normalized_score": 14.066011516110587 - }, - "bbh": { - "name": "BBH", - "value": 0.3051074188361172, - "normalized_score": 3.109216174639221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.35400000000000004, - "normalized_score": 2.6166666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11835106382978723, - "normalized_score": 2.0390070921985806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-12", - "generation": 0, - "base_model": "prithivMLmods/Llama-3.1-5B-Instruct", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 5.413, - "co2_cost": 0.9991042465955755 - } - }, - { - "id": "prithivMLmods/Llama-3.1-8B-Open-SFT_float16_e5d7fa281735f7fcc09fdb5810a2118789040d67_False", - "model": { - "name": "prithivMLmods/Llama-3.1-8B-Open-SFT", - "sha": "e5d7fa281735f7fcc09fdb5810a2118789040d67", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.04370403547396, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4122616878770551, - "normalized_score": 41.226168787705504 - }, - "bbh": { - "name": "BBH", - "value": 0.4967982234773378, - "normalized_score": 28.179927919868437 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.39036458333333335, - "normalized_score": 8.72890625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35222739361702127, - "normalized_score": 28.02526595744681 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-18", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Llama-3.1-8B-Open-SFT (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 6, - "params_billions": 8.03, - "co2_cost": 1.4555876766450873 - } - }, - { - "id": "prithivMLmods/Llama-3.2-3B-Math-Oct_float16_5d72ae9689eb8307a741c6e7a455e427a792cd15_False", - "model": { - "name": "prithivMLmods/Llama-3.2-3B-Math-Oct", - "sha": "5d72ae9689eb8307a741c6e7a455e427a792cd15", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.441953756100272, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4585233846194763, - "normalized_score": 45.85233846194763 - }, - "bbh": { - "name": "BBH", - "value": 0.4371840952508727, - "normalized_score": 19.946749736299825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11555891238670694, - "normalized_score": 11.555891238670695 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.34698958333333335, - "normalized_score": 4.940364583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2911402925531915, - "normalized_score": 21.237810283687946 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-22", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/Llama-3.2-3B-Math-Oct (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.1913652082232167 - } - }, - { - "id": "prithivMLmods/Llama-3.2-6B-AlgoCode_float16_e111d34ff9033fe36b4f1c283a17d017b4e4e5c6_False", - "model": { - "name": "prithivMLmods/Llama-3.2-6B-AlgoCode", - "sha": "e111d34ff9033fe36b4f1c283a17d017b4e4e5c6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.301000017101474, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21357553513566227, - "normalized_score": 21.357553513566227 - }, - "bbh": { - "name": "BBH", - "value": 0.37477424449567703, - "normalized_score": 11.602526365445021 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2869127516778524, - "normalized_score": 4.921700223713654 - }, - "musr": { - "name": "MUSR", - "value": 0.40134374999999994, - "normalized_score": 7.701302083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17977061170212766, - "normalized_score": 8.863401300236404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-12", - "generation": 0, - "base_model": "prithivMLmods/Llama-3.2-6B-AlgoCode", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 6.339, - "co2_cost": 1.5547206744933495 - } - }, - { - "id": "prithivMLmods/Llama-8B-Distill-CoT_bfloat16_4c2d02c2cd92f4c371547201027202ac42d88a71_False", - "model": { - "name": "prithivMLmods/Llama-8B-Distill-CoT", - "sha": "4c2d02c2cd92f4c371547201027202ac42d88a71", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.756374392650613, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3341511633576688, - "normalized_score": 33.41511633576688 - }, - "bbh": { - "name": "BBH", - "value": 0.4297620873695442, - "normalized_score": 19.59512258030452 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4003021148036254, - "normalized_score": 40.03021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.3719791666666667, - "normalized_score": 6.997395833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.273188164893617, - "normalized_score": 19.243129432624112 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "prithivMLmods/Llama-8B-Distill-CoT (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.4350045268373062 - } - }, - { - "id": "prithivMLmods/Llama-Deepsync-1B_float16_03a9a38ffbb49f0f176a901a5fab3e444d6131fe_False", - "model": { - "name": "prithivMLmods/Llama-Deepsync-1B", - "sha": "03a9a38ffbb49f0f176a901a5fab3e444d6131fe", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.269419049452202, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3570071853792382, - "normalized_score": 35.70071853792382 - }, - "bbh": { - "name": "BBH", - "value": 0.33856262083940014, - "normalized_score": 7.7638727938116565 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35651041666666666, - "normalized_score": 4.230468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17378656914893617, - "normalized_score": 8.19850768321513 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Llama-Deepsync-1B (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 3, - "params_billions": 1.236, - "co2_cost": 0.7542739289022414 - } - }, - { - "id": "prithivMLmods/Llama-Deepsync-3B_float16_9f7c81f997f9a35797b511197e48a64ffb6d046f_False", - "model": { - "name": "prithivMLmods/Llama-Deepsync-3B", - "sha": "9f7c81f997f9a35797b511197e48a64ffb6d046f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.176506909330996, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4302218114602588, - "normalized_score": 43.02218114602588 - }, - "bbh": { - "name": "BBH", - "value": 0.4291521655271033, - "normalized_score": 18.963664295918168 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.33238541666666666, - "normalized_score": 3.8148437499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3031083776595745, - "normalized_score": 22.567597517730498 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Llama-Deepsync-3B (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 9, - "params_billions": 3.213, - "co2_cost": 1.2161541593013407 - } - }, - { - "id": "prithivMLmods/Llama-Express.1-Math_bfloat16_9c32d92f0ef3a4c4935992c9a5074d7a65ea91bc_True", - "model": { - "name": "prithivMLmods/Llama-Express.1-Math", - "sha": "9c32d92f0ef3a4c4935992c9a5074d7a65ea91bc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.170622691501343, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5084320713484665, - "normalized_score": 50.84320713484665 - }, - "bbh": { - "name": "BBH", - "value": 0.33638140090435265, - "normalized_score": 7.199019778023913 - }, - "math": { - "name": "MATH Level 5", - "value": 0.055891238670694864, - "normalized_score": 5.589123867069486 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.31434375, - "normalized_score": 0.8263020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16098736702127658, - "normalized_score": 6.776374113475176 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-25", - "generation": 1, - "base_model": "prithivMLmods/Llama-Express.1-Math (Merge)", - "hub_license": "llama3.2", - "hub_hearts": 1, - "params_billions": 1.236, - "co2_cost": 0.7120903218166346 - } - }, - { - "id": "prithivMLmods/LwQ-10B-Instruct_bfloat16_3db52014aba9ec7163c28af47aac1f07af8fe0f6_False", - "model": { - "name": "prithivMLmods/LwQ-10B-Instruct", - "sha": "3db52014aba9ec7163c28af47aac1f07af8fe0f6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.967461043098936, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3934770852449279, - "normalized_score": 39.34770852449279 - }, - "bbh": { - "name": "BBH", - "value": 0.5121712029712329, - "normalized_score": 31.59027306507356 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.45439583333333333, - "normalized_score": 16.8328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.331781914893617, - "normalized_score": 25.75354609929078 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "prithivMLmods/LwQ-10B-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 10.732, - "co2_cost": 1.4503505632541436 - } - }, - { - "id": "prithivMLmods/LwQ-Reasoner-10B_bfloat16_fcd46007bd9f098004843dd79042a99543a22293_False", - "model": { - "name": "prithivMLmods/LwQ-Reasoner-10B", - "sha": "fcd46007bd9f098004843dd79042a99543a22293", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.99437823708219, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29413400887423147, - "normalized_score": 29.413400887423144 - }, - "bbh": { - "name": "BBH", - "value": 0.5866254169962443, - "normalized_score": 40.337248394622954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3580060422960725, - "normalized_score": 35.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.40785416666666663, - "normalized_score": 8.581770833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41472739361702127, - "normalized_score": 34.96971040189125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "prithivMLmods/LwQ-Reasoner-10B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 2, - "params_billions": 10.306, - "co2_cost": 1.7891952569981955 - } - }, - { - "id": "prithivMLmods/Magellanic-Opus-14B-Exp_float16_64ae086663b4008fcd263a76f7d4360a50d9e81e_False", - "model": { - "name": "prithivMLmods/Magellanic-Opus-14B-Exp", - "sha": "64ae086663b4008fcd263a76f7d4360a50d9e81e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.055124365076594, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6866347956754744, - "normalized_score": 68.66347956754744 - }, - "bbh": { - "name": "BBH", - "value": 0.6382505935140227, - "normalized_score": 48.00332430163465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37990936555891236, - "normalized_score": 37.99093655589124 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.49262500000000004, - "normalized_score": 21.644791666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5272606382978723, - "normalized_score": 47.473404255319146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-14", - "generation": 1, - "base_model": "prithivMLmods/Magellanic-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.9412251013841815 - } - }, - { - "id": "prithivMLmods/Magellanic-Qwen-25B-R999_bfloat16_63ecd8209d9a194ec6b33c38c695323347b6b542_False", - "model": { - "name": "prithivMLmods/Magellanic-Qwen-25B-R999", - "sha": "63ecd8209d9a194ec6b33c38c695323347b6b542", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.9765268528155335, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18727199386516663, - "normalized_score": 18.727199386516666 - }, - "bbh": { - "name": "BBH", - "value": 0.26075689808294905, - "normalized_score": 2.00355862547039 - }, - "math": { - "name": "MATH Level 5", - "value": 0.005287009063444109, - "normalized_score": 0.5287009063444109 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3831145833333333, - "normalized_score": 5.155989583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1299867021276596, - "normalized_score": 3.331855791962176 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-04", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 24.962, - "co2_cost": 2.404560147513011 - } - }, - { - "id": "prithivMLmods/Megatron-Corpus-14B-Exp_float16_1d1f2380140d36b460f3ed1d5de0b0edbd48b9a7_False", - "model": { - "name": "prithivMLmods/Megatron-Corpus-14B-Exp", - "sha": "1d1f2380140d36b460f3ed1d5de0b0edbd48b9a7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.55395685759063, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49826571275327247, - "normalized_score": 49.82657127532725 - }, - "bbh": { - "name": "BBH", - "value": 0.6355171004470184, - "normalized_score": 47.91897959935681 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3429003021148036, - "normalized_score": 34.29003021148036 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.4766875, - "normalized_score": 18.85260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5260139627659575, - "normalized_score": 47.33488475177305 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "prithivMLmods/Megatron-Corpus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 1.8062468013764732 - } - }, - { - "id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2_bfloat16_5d7501a6e01875c268ad73343befd6a2906ccd14_False", - "model": { - "name": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", - "sha": "5d7501a6e01875c268ad73343befd6a2906ccd14", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.898674141338944, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48704991644392437, - "normalized_score": 48.704991644392436 - }, - "bbh": { - "name": "BBH", - "value": 0.632146083740281, - "normalized_score": 46.78841154317365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2590634441087613, - "normalized_score": 25.90634441087613 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.449, - "normalized_score": 15.358333333333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48096742021276595, - "normalized_score": 42.329713356974 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "prithivMLmods/Megatron-Corpus-14B-Exp.v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.5382793005985596 - } - }, - { - "id": "prithivMLmods/Megatron-Opus-14B-2.0_float16_c3ddba573bd07f5f233b28d1a5d95a8d25ba6e33_True", - "model": { - "name": "prithivMLmods/Megatron-Opus-14B-2.0", - "sha": "c3ddba573bd07f5f233b28d1a5d95a8d25ba6e33", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.80518021675177, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6693739278447852, - "normalized_score": 66.93739278447852 - }, - "bbh": { - "name": "BBH", - "value": 0.6870557211788685, - "normalized_score": 54.699622314981035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27794561933534745, - "normalized_score": 27.794561933534744 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.41403125, - "normalized_score": 10.520572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5170378989361702, - "normalized_score": 46.337544326241144 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-09", - "generation": 1, - "base_model": "prithivMLmods/Megatron-Opus-14B-2.0 (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 14.66, - "co2_cost": 0.8824604199255072 - } - }, - { - "id": "prithivMLmods/Megatron-Opus-14B-2.1_float16_d22ca0d0df544e18ba758544016f8cd3c004da92_False", - "model": { - "name": "prithivMLmods/Megatron-Opus-14B-2.1", - "sha": "d22ca0d0df544e18ba758544016f8cd3c004da92", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.50969719045679, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.02455484780382718, - "normalized_score": 2.455484780382718 - }, - "bbh": { - "name": "BBH", - "value": 0.6726960005125086, - "normalized_score": 52.531003009357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2998489425981873, - "normalized_score": 29.984894259818727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.49275, - "normalized_score": 21.92708333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5173703457446809, - "normalized_score": 46.3744828605201 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-18", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "prithivMLmods/Megatron-Opus-14B-2.1 (Merge)", - "hub_license": "llama3", - "hub_hearts": 4, - "params_billions": 14.66, - "co2_cost": 0.8557331146586711 - } - }, - { - "id": "prithivMLmods/Megatron-Opus-14B-Exp_bfloat16_d6c56465b7610abbebbf6cdedae6fda92087fbfc_False", - "model": { - "name": "prithivMLmods/Megatron-Opus-14B-Exp", - "sha": "d6c56465b7610abbebbf6cdedae6fda92087fbfc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.964774794825566, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4979410187192206, - "normalized_score": 49.79410187192205 - }, - "bbh": { - "name": "BBH", - "value": 0.6516090109599467, - "normalized_score": 50.00287913113638 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35347432024169184, - "normalized_score": 35.34743202416919 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.48865625, - "normalized_score": 21.082031249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5400598404255319, - "normalized_score": 48.8955378250591 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "prithivMLmods/Megatron-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.8280680323285137 - } - }, - { - "id": "prithivMLmods/Megatron-Opus-14B-Stock_bfloat16_a9d75a507fb0e9320e70c120b0f1823cc377cea2_False", - "model": { - "name": "prithivMLmods/Megatron-Opus-14B-Stock", - "sha": "a9d75a507fb0e9320e70c120b0f1823cc377cea2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.31374014966015, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5173750094194515, - "normalized_score": 51.73750094194515 - }, - "bbh": { - "name": "BBH", - "value": 0.6411753580495262, - "normalized_score": 48.128851193275885 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33459214501510576, - "normalized_score": 33.45921450151057 - }, - "gpqa": { - "name": "GPQA", - "value": 0.375, - "normalized_score": 16.666666666666664 - }, - "musr": { - "name": "MUSR", - "value": 0.48202083333333334, - "normalized_score": 20.185937499999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5293384308510638, - "normalized_score": 47.704270094562645 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "prithivMLmods/Megatron-Opus-14B-Stock (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 3.65052126683003 - } - }, - { - "id": "prithivMLmods/Megatron-Opus-7B-Exp_float16_1856f046b2fe15ccf1baac686aa4595ab4245f86_False", - "model": { - "name": "prithivMLmods/Megatron-Opus-7B-Exp", - "sha": "1856f046b2fe15ccf1baac686aa4595ab4245f86", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.617726041323735, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6017300761978217, - "normalized_score": 60.173007619782176 - }, - "bbh": { - "name": "BBH", - "value": 0.5367154102661396, - "normalized_score": 34.37153536149186 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1971299093655589, - "normalized_score": 19.71299093655589 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.4185833333333333, - "normalized_score": 11.056249999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3900432180851064, - "normalized_score": 32.227024231678485 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-03", - "generation": 0, - "base_model": "prithivMLmods/Megatron-Opus-7B-Exp", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 7.456, - "co2_cost": 1.196807166029562 - } - }, - { - "id": "prithivMLmods/Messier-Opus-14B-Elite7_bfloat16_d8748c313cc0daa3e1112e953b2826404a0e577a_False", - "model": { - "name": "prithivMLmods/Messier-Opus-14B-Elite7", - "sha": "d8748c313cc0daa3e1112e953b2826404a0e577a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.66277236825811, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7113392465325337, - "normalized_score": 71.13392465325336 - }, - "bbh": { - "name": "BBH", - "value": 0.6498611961862557, - "normalized_score": 49.704671266379385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4070996978851964, - "normalized_score": 40.70996978851964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4885625, - "normalized_score": 20.703645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5403922872340425, - "normalized_score": 48.93247635933806 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "prithivMLmods/Messier-Opus-14B-Elite7 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.7894127463904677 - } - }, - { - "id": "prithivMLmods/Omni-Reasoner-Merged_bfloat16_5c34ad1b2510c510025ac724a16bed7f5ae5f1c3_False", - "model": { - "name": "prithivMLmods/Omni-Reasoner-Merged", - "sha": "5c34ad1b2510c510025ac724a16bed7f5ae5f1c3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.234864632922996, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4599473840520929, - "normalized_score": 45.99473840520929 - }, - "bbh": { - "name": "BBH", - "value": 0.5507848245879011, - "normalized_score": 35.36177667333275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3330815709969788, - "normalized_score": 33.30815709969788 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4616458333333333, - "normalized_score": 16.205729166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43641954787234044, - "normalized_score": 37.37994976359338 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "prithivMLmods/Omni-Reasoner-Merged (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.2628315519731845 - } - }, - { - "id": "prithivMLmods/Omni-Reasoner3-Merged_bfloat16_a8fbe5740e04a78661dedd16597fa4d5a135ad95_False", - "model": { - "name": "prithivMLmods/Omni-Reasoner3-Merged", - "sha": "a8fbe5740e04a78661dedd16597fa4d5a135ad95", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.433737471025804, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.493469549683728, - "normalized_score": 49.346954968372806 - }, - "bbh": { - "name": "BBH", - "value": 0.4387847138827546, - "normalized_score": 20.586521670513378 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10876132930513595, - "normalized_score": 10.876132930513595 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.35222916666666665, - "normalized_score": 6.228645833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29496343085106386, - "normalized_score": 21.66260342789598 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "prithivMLmods/Omni-Reasoner3-Merged (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.1765997861113684 - } - }, - { - "id": "prithivMLmods/Pegasus-Opus-14B-Exp_bfloat16_7587dd36f6780374f07a0e57433a7f1cd382381e_False", - "model": { - "name": "prithivMLmods/Pegasus-Opus-14B-Exp", - "sha": "7587dd36f6780374f07a0e57433a7f1cd382381e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.62328046376648, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6981752860188744, - "normalized_score": 69.81752860188743 - }, - "bbh": { - "name": "BBH", - "value": 0.6547548394062034, - "normalized_score": 50.306948870287734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4086102719033233, - "normalized_score": 40.86102719033233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.4859583333333333, - "normalized_score": 20.378125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5412234042553191, - "normalized_score": 49.02482269503546 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "prithivMLmods/Pegasus-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 1.8781152230431404 - } - }, - { - "id": "prithivMLmods/Phi-4-Empathetic_float16_181a87cfc05f0ee538b14cf4a773ad3b816224fe_False", - "model": { - "name": "prithivMLmods/Phi-4-Empathetic", - "sha": "181a87cfc05f0ee538b14cf4a773ad3b816224fe", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.208396720096133, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.049659348306936704, - "normalized_score": 4.965934830693671 - }, - "bbh": { - "name": "BBH", - "value": 0.6726820578371974, - "normalized_score": 52.83893775599707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2620845921450151, - "normalized_score": 26.208459214501513 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.49913541666666666, - "normalized_score": 22.725260416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5065658244680851, - "normalized_score": 45.1739804964539 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-Empathetic (Merge)", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 14.66, - "co2_cost": 1.7952762660673902 - } - }, - { - "id": "prithivMLmods/Phi-4-Math-IO_float16_2e3f81b0c1613d33a4b0e216120fa3a3dd9206f8_False", - "model": { - "name": "prithivMLmods/Phi-4-Math-IO", - "sha": "2e3f81b0c1613d33a4b0e216120fa3a3dd9206f8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.821782745368306, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05897684809638426, - "normalized_score": 5.897684809638426 - }, - "bbh": { - "name": "BBH", - "value": 0.6668255086606543, - "normalized_score": 52.093770592369005 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45770392749244715, - "normalized_score": 45.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39848993288590606, - "normalized_score": 19.798657718120808 - }, - "musr": { - "name": "MUSR", - "value": 0.4872916666666667, - "normalized_score": 20.64479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5205285904255319, - "normalized_score": 46.725398936170215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-Math-IO (Merge)", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 14.66, - "co2_cost": 1.9338931212866448 - } - }, - { - "id": "prithivMLmods/Phi-4-QwQ_float16_f9d9cc11a7c9e56420b705ac97f06362321dd89a_False", - "model": { - "name": "prithivMLmods/Phi-4-QwQ", - "sha": "f9d9cc11a7c9e56420b705ac97f06362321dd89a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 31.262674551400632, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.05592937849350833, - "normalized_score": 5.592937849350833 - }, - "bbh": { - "name": "BBH", - "value": 0.6695574237334824, - "normalized_score": 52.28684974505503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45770392749244715, - "normalized_score": 45.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.4650625, - "normalized_score": 17.6328125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5275099734042553, - "normalized_score": 47.50110815602837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-QwQ (Merge)", - "hub_license": "mit", - "hub_hearts": 8, - "params_billions": 14.66, - "co2_cost": 1.9715288725951658 - } - }, - { - "id": "prithivMLmods/Phi-4-Super_bfloat16_d0632dd9df3d6a8ae4f10f2185d38eeb61cab9d2_False", - "model": { - "name": "prithivMLmods/Phi-4-Super", - "sha": "d0632dd9df3d6a8ae4f10f2185d38eeb61cab9d2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.387370956217424, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04813561350549875, - "normalized_score": 4.813561350549874 - }, - "bbh": { - "name": "BBH", - "value": 0.6720116458521787, - "normalized_score": 52.6972954461393 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34894259818731116, - "normalized_score": 34.894259818731115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.504375, - "normalized_score": 23.28020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.526595744680851, - "normalized_score": 47.399527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-Super (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.9252909321013356 - } - }, - { - "id": "prithivMLmods/Phi-4-Super-1_bfloat16_081e3442df878853ab8bd765430c961658ce5024_False", - "model": { - "name": "prithivMLmods/Phi-4-Super-1", - "sha": "081e3442df878853ab8bd765430c961658ce5024", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.230266805569844, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04176584795010572, - "normalized_score": 4.176584795010572 - }, - "bbh": { - "name": "BBH", - "value": 0.672933647971901, - "normalized_score": 52.905830976180994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35196374622356497, - "normalized_score": 35.196374622356494 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.5017395833333333, - "normalized_score": 22.91744791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5235206117021277, - "normalized_score": 47.05784574468086 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-Super-1 (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 14.66, - "co2_cost": 1.8711961475489614 - } - }, - { - "id": "prithivMLmods/Phi-4-Super-o1_bfloat16_081e3442df878853ab8bd765430c961658ce5024_False", - "model": { - "name": "prithivMLmods/Phi-4-Super-o1", - "sha": "081e3442df878853ab8bd765430c961658ce5024", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.230266805569844, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04176584795010572, - "normalized_score": 4.176584795010572 - }, - "bbh": { - "name": "BBH", - "value": 0.672933647971901, - "normalized_score": 52.905830976180994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35196374622356497, - "normalized_score": 35.196374622356494 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.5017395833333333, - "normalized_score": 22.91744791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5235206117021277, - "normalized_score": 47.05784574468086 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-24", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-Super-o1 (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 14.66, - "co2_cost": 1.9283047003225893 - } - }, - { - "id": "prithivMLmods/Phi-4-o1_float16_aa2a7571e9dbce0fefe98479fe04f298f2491b8c_False", - "model": { - "name": "prithivMLmods/Phi-4-o1", - "sha": "aa2a7571e9dbce0fefe98479fe04f298f2491b8c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.20428963456544, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.028976449154908976, - "normalized_score": 2.8976449154908974 - }, - "bbh": { - "name": "BBH", - "value": 0.6688727399756971, - "normalized_score": 52.170861675862824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3995468277945619, - "normalized_score": 39.95468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.49777083333333333, - "normalized_score": 22.154687499999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5173703457446809, - "normalized_score": 46.3744828605201 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "prithivMLmods/Phi-4-o1 (Merge)", - "hub_license": "mit", - "hub_hearts": 22, - "params_billions": 14.66, - "co2_cost": 1.7381666316866269 - } - }, - { - "id": "prithivMLmods/Phi4-Super_bfloat16_d27188b144a6ac8c2d70f761e8afd8b05c74fd16_False", - "model": { - "name": "prithivMLmods/Phi4-Super", - "sha": "d27188b144a6ac8c2d70f761e8afd8b05c74fd16", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 30.387370956217424, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04813561350549875, - "normalized_score": 4.813561350549874 - }, - "bbh": { - "name": "BBH", - "value": 0.6720116458521787, - "normalized_score": 52.6972954461393 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34894259818731116, - "normalized_score": 34.894259818731115 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.504375, - "normalized_score": 23.28020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.526595744680851, - "normalized_score": 47.399527186761226 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "prithivMLmods/Phi4-Super (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.66, - "co2_cost": 1.837064032587418 - } - }, - { - "id": "prithivMLmods/Porpoise-Opus-14B-Exp_bfloat16_5b35f2520b75aceb722cc54f2bdc27f70b5fd140_False", - "model": { - "name": "prithivMLmods/Porpoise-Opus-14B-Exp", - "sha": "5b35f2520b75aceb722cc54f2bdc27f70b5fd140", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.769424398512, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7098155117310957, - "normalized_score": 70.98155117310957 - }, - "bbh": { - "name": "BBH", - "value": 0.6518903547146537, - "normalized_score": 49.94661296400096 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4040785498489426, - "normalized_score": 40.40785498489426 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.4925625, - "normalized_score": 21.303645833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5396442819148937, - "normalized_score": 48.84936465721041 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "prithivMLmods/Porpoise-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.9288978684249287 - } - }, - { - "id": "prithivMLmods/Primal-Opus-14B-Optimus-v1_float16_240fd09db4f126801d50bd74a74700927918c2d4_False", - "model": { - "name": "prithivMLmods/Primal-Opus-14B-Optimus-v1", - "sha": "240fd09db4f126801d50bd74a74700927918c2d4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.0644115881976, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5013131823561483, - "normalized_score": 50.131318235614835 - }, - "bbh": { - "name": "BBH", - "value": 0.6419423743359406, - "normalized_score": 48.27170322245717 - }, - "math": { - "name": "MATH Level 5", - "value": 0.338368580060423, - "normalized_score": 33.8368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3724832214765101, - "normalized_score": 16.33109619686801 - }, - "musr": { - "name": "MUSR", - "value": 0.48471875000000003, - "normalized_score": 20.489843749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5259308510638298, - "normalized_score": 47.325650118203306 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-03", - "generation": 1, - "base_model": "prithivMLmods/Primal-Opus-14B-Optimus-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 3.9788106585351586 - } - }, - { - "id": "prithivMLmods/Primal-Opus-14B-Optimus-v2_bfloat16_fef7d92fabb736447402b52f0f4ccc932b75fecc_False", - "model": { - "name": "prithivMLmods/Primal-Opus-14B-Optimus-v2", - "sha": "fef7d92fabb736447402b52f0f4ccc932b75fecc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.91271332337514, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6403730989330532, - "normalized_score": 64.03730989330532 - }, - "bbh": { - "name": "BBH", - "value": 0.6543780845512958, - "normalized_score": 50.1813442955816 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4206948640483384, - "normalized_score": 42.06948640483384 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.48998958333333337, - "normalized_score": 21.14869791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.542220744680851, - "normalized_score": 49.13563829787233 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-27", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "prithivMLmods/Primal-Opus-14B-Optimus-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 1.9402900939156489 - } - }, - { - "id": "prithivMLmods/QwQ-LCoT-14B-Conversational_bfloat16_60ef4aa0a2660f9b6f28a3de773729969a1df9ae_False", - "model": { - "name": "prithivMLmods/QwQ-LCoT-14B-Conversational", - "sha": "60ef4aa0a2660f9b6f28a3de773729969a1df9ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.68306678902542, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4047427492386867, - "normalized_score": 40.47427492386867 - }, - "bbh": { - "name": "BBH", - "value": 0.6239828933798323, - "normalized_score": 45.626260309654924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4652567975830816, - "normalized_score": 46.52567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3498322147651007, - "normalized_score": 13.31096196868009 - }, - "musr": { - "name": "MUSR", - "value": 0.48471875000000003, - "normalized_score": 20.62317708333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.527842420212766, - "normalized_score": 47.538046690307326 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "prithivMLmods/QwQ-LCoT-14B-Conversational (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 3.908904000886237 - } - }, - { - "id": "prithivMLmods/QwQ-LCoT-3B-Instruct_float16_1f47223ac1c6069c3e53b75a45ad496f0fb9a124_False", - "model": { - "name": "prithivMLmods/QwQ-LCoT-3B-Instruct", - "sha": "1f47223ac1c6069c3e53b75a45ad496f0fb9a124", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.021306557737763, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4354424039326764, - "normalized_score": 43.54424039326764 - }, - "bbh": { - "name": "BBH", - "value": 0.47629783868435643, - "normalized_score": 26.621187622268376 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2824773413897281, - "normalized_score": 28.247734138972806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.43579166666666663, - "normalized_score": 12.773958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3582114361702128, - "normalized_score": 28.690159574468087 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-12", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/QwQ-LCoT-3B-Instruct (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 4, - "params_billions": 3.086, - "co2_cost": 1.530787238010945 - } - }, - { - "id": "prithivMLmods/QwQ-LCoT-7B-Instruct_float16_06f0076fcf5cb72222513e6c76bd33e1ebaa97b7_False", - "model": { - "name": "prithivMLmods/QwQ-LCoT-7B-Instruct", - "sha": "06f0076fcf5cb72222513e6c76bd33e1ebaa97b7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.863799774866248, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4986901421561457, - "normalized_score": 49.86901421561457 - }, - "bbh": { - "name": "BBH", - "value": 0.5466466326018563, - "normalized_score": 34.780933423406445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3716012084592145, - "normalized_score": 37.160120845921455 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.4801875, - "normalized_score": 19.390104166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4334275265957447, - "normalized_score": 37.04750295508275 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "prithivMLmods/QwQ-LCoT-7B-Instruct (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 17, - "params_billions": 7.616, - "co2_cost": 1.3006094039639227 - } - }, - { - "id": "prithivMLmods/QwQ-LCoT1-Merged_bfloat16_d85a4f359bc568afb7b1a2a6e6503934bb352ab6_False", - "model": { - "name": "prithivMLmods/QwQ-LCoT1-Merged", - "sha": "d85a4f359bc568afb7b1a2a6e6503934bb352ab6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.445290905975668, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47513486438206187, - "normalized_score": 47.513486438206186 - }, - "bbh": { - "name": "BBH", - "value": 0.548095531408024, - "normalized_score": 35.166254489441975 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3731117824773414, - "normalized_score": 37.31117824773414 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.46961458333333334, - "normalized_score": 17.76848958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4357546542553192, - "normalized_score": 37.30607269503546 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "prithivMLmods/QwQ-LCoT1-Merged (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3107590394812034 - } - }, - { - "id": "prithivMLmods/QwQ-LCoT2-7B-Instruct_bfloat16_f2ea462f6d3f6cf104313b1329909cb15a388841_False", - "model": { - "name": "prithivMLmods/QwQ-LCoT2-7B-Instruct", - "sha": "f2ea462f6d3f6cf104313b1329909cb15a388841", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.323930375701078, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5561177675235043, - "normalized_score": 55.61177675235044 - }, - "bbh": { - "name": "BBH", - "value": 0.5424862934133593, - "normalized_score": 34.366736926024 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3270392749244713, - "normalized_score": 32.703927492447136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4564375, - "normalized_score": 15.754687500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4341755319148936, - "normalized_score": 37.1306146572104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-24", - "generation": 1, - "base_model": "prithivMLmods/QwQ-LCoT2-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 2.0511623027956514 - } - }, - { - "id": "prithivMLmods/QwQ-MathOct-7B_float16_d2ff038987cc16a7b317034929dd9ab35265e308_False", - "model": { - "name": "prithivMLmods/QwQ-MathOct-7B", - "sha": "d2ff038987cc16a7b317034929dd9ab35265e308", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.497758657229422, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4684404047926169, - "normalized_score": 46.84404047926169 - }, - "bbh": { - "name": "BBH", - "value": 0.5485512215016556, - "normalized_score": 35.254667256728304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29531722054380666, - "normalized_score": 29.531722054380666 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.4600625, - "normalized_score": 15.307812499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4330119680851064, - "normalized_score": 37.00132978723404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "prithivMLmods/QwQ-MathOct-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3290464565624656 - } - }, - { - "id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT_bfloat16_cd1a92a4fffbc923013e2a77d9d7f2c8b2a738ae_False", - "model": { - "name": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", - "sha": "cd1a92a4fffbc923013e2a77d9d7f2c8b2a738ae", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.93165110675273, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21939564799177294, - "normalized_score": 21.939564799177294 - }, - "bbh": { - "name": "BBH", - "value": 0.36662076641982305, - "normalized_score": 11.476456072950837 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33459214501510576, - "normalized_score": 33.45921450151057 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.34339583333333334, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19132313829787234, - "normalized_score": 10.14701536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 1.777, - "co2_cost": 1.1748323105540768 - } - }, - { - "id": "prithivMLmods/QwQ-R1-Distill-7B-CoT_bfloat16_db0c74ffe611d00eb0a5df4413f3eced7fdacb78_False", - "model": { - "name": "prithivMLmods/QwQ-R1-Distill-7B-CoT", - "sha": "db0c74ffe611d00eb0a5df4413f3eced7fdacb78", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.192243475184284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3500378994401522, - "normalized_score": 35.00378994401522 - }, - "bbh": { - "name": "BBH", - "value": 0.438788672517715, - "normalized_score": 20.953831065455415 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46827794561933533, - "normalized_score": 46.82779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.37790624999999994, - "normalized_score": 4.504947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2804188829787234, - "normalized_score": 20.04654255319149 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-22", - "generation": 1, - "base_model": "prithivMLmods/QwQ-R1-Distill-7B-CoT (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.3422405283085903 - } - }, - { - "id": "prithivMLmods/Qwen-7B-Distill-Reasoner_float16_b83c5c3d748f756927b87ae978f94fdb033c526b_False", - "model": { - "name": "prithivMLmods/Qwen-7B-Distill-Reasoner", - "sha": "b83c5c3d748f756927b87ae978f94fdb033c526b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.484736250167103, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3395712265677292, - "normalized_score": 33.95712265677292 - }, - "bbh": { - "name": "BBH", - "value": 0.4409329229697952, - "normalized_score": 22.175998122509004 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3950151057401813, - "normalized_score": 39.50151057401813 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.36596874999999995, - "normalized_score": 2.779427083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2818317819148936, - "normalized_score": 20.203531323877066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "prithivMLmods/Qwen-7B-Distill-Reasoner (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.3208078435703543 - } - }, - { - "id": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct_bfloat16_ca8cf376e59e873d70f8b9dffcb19aecc9d32fab_False", - "model": { - "name": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", - "sha": "ca8cf376e59e873d70f8b9dffcb19aecc9d32fab", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.0677302519478, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13968603305895025, - "normalized_score": 13.968603305895027 - }, - "bbh": { - "name": "BBH", - "value": 0.28243669901671337, - "normalized_score": 1.3610670095980735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3723541666666667, - "normalized_score": 4.244270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11228390957446809, - "normalized_score": 1.3648788416075646 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 1.2216157349184233 - } - }, - { - "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M_bfloat16_bc7898d09ac620cf86afa3237daa8181f689345b_True", - "model": { - "name": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", - "sha": "bc7898d09ac620cf86afa3237daa8181f689345b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.33386526785314, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4192808415005519, - "normalized_score": 41.928084150055184 - }, - "bbh": { - "name": "BBH", - "value": 0.5934849375153814, - "normalized_score": 40.75990957906041 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5128398791540786, - "normalized_score": 51.283987915407856 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33221476510067116, - "normalized_score": 10.96196868008949 - }, - "musr": { - "name": "MUSR", - "value": 0.4606041666666667, - "normalized_score": 17.7421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.48994348404255317, - "normalized_score": 43.32705378250591 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-02-01", - "generation": 1, - "base_model": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M (Merge)", - "hub_license": "", - "hub_hearts": 5, - "params_billions": 14.77, - "co2_cost": 6.192776144696271 - } - }, - { - "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M_bfloat16_a42acdfe01bd887dde308deeb07d570979976838_False", - "model": { - "name": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", - "sha": "a42acdfe01bd887dde308deeb07d570979976838", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.383076380804261, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18612282078219125, - "normalized_score": 18.612282078219124 - }, - "bbh": { - "name": "BBH", - "value": 0.3125554204779005, - "normalized_score": 4.665734765589359 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3416875, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12009640957446809, - "normalized_score": 2.23293439716312 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 1.3227817770235382 - } - }, - { - "id": "prithivMLmods/SmolLM2-CoT-360M_float16_474240d772fbb3b8da6f8eb47f32dd34c6b78baf_False", - "model": { - "name": "prithivMLmods/SmolLM2-CoT-360M", - "sha": "474240d772fbb3b8da6f8eb47f32dd34c6b78baf", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.9507483871396545, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22156877086131466, - "normalized_score": 22.156877086131466 - }, - "bbh": { - "name": "BBH", - "value": 0.31352960121180296, - "normalized_score": 4.801205481265894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23657718120805368, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3793958333333333, - "normalized_score": 5.757812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1085438829787234, - "normalized_score": 0.9493203309692663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "prithivMLmods/SmolLM2-CoT-360M (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 0.362, - "co2_cost": 0.7755030844116371 - } - }, - { - "id": "prithivMLmods/Sombrero-Opus-14B-Elite5_float16_f38f6b061fe97d1ad8b30aa02a1b6e18a05a7569_True", - "model": { - "name": "prithivMLmods/Sombrero-Opus-14B-Elite5", - "sha": "f38f6b061fe97d1ad8b30aa02a1b6e18a05a7569", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.32332845547639, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7880756393037142, - "normalized_score": 78.80756393037142 - }, - "bbh": { - "name": "BBH", - "value": 0.6501539892126272, - "normalized_score": 50.17464726204971 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5354984894259819, - "normalized_score": 53.54984894259819 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.4286666666666667, - "normalized_score": 13.216666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.520029920212766, - "normalized_score": 46.669991134751776 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "prithivMLmods/Sombrero-Opus-14B-Elite5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 1.5750293066143821 - } - }, - { - "id": "prithivMLmods/Sombrero-Opus-14B-Elite6_bfloat16_bdadcbf346ce8cc10353ba5b8019487e1b982b08_False", - "model": { - "name": "prithivMLmods/Sombrero-Opus-14B-Elite6", - "sha": "bdadcbf346ce8cc10353ba5b8019487e1b982b08", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.88084511903062, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7226049105262924, - "normalized_score": 72.26049105262925 - }, - "bbh": { - "name": "BBH", - "value": 0.6487937804559186, - "normalized_score": 49.59519138828582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4078549848942598, - "normalized_score": 40.78549848942598 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.48859375, - "normalized_score": 20.740885416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5389793882978723, - "normalized_score": 48.775487588652474 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "prithivMLmods/Sombrero-Opus-14B-Elite6 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.4953646297693073 - } - }, - { - "id": "prithivMLmods/Sombrero-Opus-14B-Sm1_bfloat16_ada12c7016fdc48547b14e15f415791afec55f8f_False", - "model": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm1", - "sha": "ada12c7016fdc48547b14e15f415791afec55f8f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.223819597959995, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3812872068334242, - "normalized_score": 38.12872068334242 - }, - "bbh": { - "name": "BBH", - "value": 0.635462046379832, - "normalized_score": 47.03125392036906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5664652567975831, - "normalized_score": 56.646525679758305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4035234899328859, - "normalized_score": 20.46979865771812 - }, - "musr": { - "name": "MUSR", - "value": 0.5298958333333333, - "normalized_score": 27.236979166666657 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.512466755319149, - "normalized_score": 45.829639479905445 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "prithivMLmods/Sombrero-Opus-14B-Sm1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 1.8148194808807956 - } - }, - { - "id": "prithivMLmods/Sombrero-Opus-14B-Sm2_bfloat16_a676ec7bf723584225f276468bd8ebd5633de464_False", - "model": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm2", - "sha": "a676ec7bf723584225f276468bd8ebd5633de464", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.980475230049855, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4272242095417935, - "normalized_score": 42.722420954179356 - }, - "bbh": { - "name": "BBH", - "value": 0.6609367219259568, - "normalized_score": 51.25185799428797 - }, - "math": { - "name": "MATH Level 5", - "value": 0.486404833836858, - "normalized_score": 48.6404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.5088125, - "normalized_score": 24.534895833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5344913563829787, - "normalized_score": 48.27681737588653 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "prithivMLmods/Sombrero-Opus-14B-Sm2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 1.7291170921733863 - } - }, - { - "id": "prithivMLmods/Sombrero-Opus-14B-Sm4_bfloat16_ba5412590ee35408c9868c3523f9a96a9f891ceb_False", - "model": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm4", - "sha": "ba5412590ee35408c9868c3523f9a96a9f891ceb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.38545109416284, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4346932804957513, - "normalized_score": 43.469328049575125 - }, - "bbh": { - "name": "BBH", - "value": 0.6612776404137711, - "normalized_score": 51.15996014172698 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4879154078549849, - "normalized_score": 48.79154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.5191666666666667, - "normalized_score": 25.7625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5300033244680851, - "normalized_score": 47.778147163120565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-12", - "generation": 1, - "base_model": "prithivMLmods/Sombrero-Opus-14B-Sm4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.77, - "co2_cost": 1.7262493749468515 - } - }, - { - "id": "prithivMLmods/Sombrero-Opus-14B-Sm5_bfloat16_f3994069ea18d5000c4f47e3a013bf57c0c9e338_False", - "model": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm5", - "sha": "f3994069ea18d5000c4f47e3a013bf57c0c9e338", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.113179800786455, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6851609285584471, - "normalized_score": 68.5160928558447 - }, - "bbh": { - "name": "BBH", - "value": 0.6563944936055776, - "normalized_score": 50.59600634041891 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4093655589123867, - "normalized_score": 40.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.480625, - "normalized_score": 19.511458333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "prithivMLmods/Sombrero-Opus-14B-Sm5 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.9383475014210545 - } - }, - { - "id": "prithivMLmods/Sqweeks-7B-Instruct_float16_3806238f5f13f425ac429c41530adb0148b6881e_False", - "model": { - "name": "prithivMLmods/Sqweeks-7B-Instruct", - "sha": "3806238f5f13f425ac429c41530adb0148b6881e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 23.920659418100314, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21579852568961466, - "normalized_score": 21.579852568961464 - }, - "bbh": { - "name": "BBH", - "value": 0.4666692459456812, - "normalized_score": 24.982149693508465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5143504531722054, - "normalized_score": 51.43504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44760416666666664, - "normalized_score": 14.2171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3133311170212766, - "normalized_score": 23.703457446808507 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "prithivMLmods/Sqweeks-7B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.616, - "co2_cost": 2.0052051096519468 - } - }, - { - "id": "prithivMLmods/Tadpole-Opus-14B-Exp_bfloat16_9520a4148e858334a4f6ca7424307f95b0980b06_False", - "model": { - "name": "prithivMLmods/Tadpole-Opus-14B-Exp", - "sha": "9520a4148e858334a4f6ca7424307f95b0980b06", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.8786925782139, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5749522378400422, - "normalized_score": 57.495223784004224 - }, - "bbh": { - "name": "BBH", - "value": 0.636858708544215, - "normalized_score": 47.77876442051519 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31344410876132933, - "normalized_score": 31.344410876132933 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.47284375, - "normalized_score": 18.505468750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5322473404255319, - "normalized_score": 48.027482269503544 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "prithivMLmods/Tadpole-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 2.0833767582436806 - } - }, - { - "id": "prithivMLmods/Taurus-Opus-7B_float16_4b9918fb7ed2a92bdb1beae11deb337a3745d053_False", - "model": { - "name": "prithivMLmods/Taurus-Opus-7B", - "sha": "4b9918fb7ed2a92bdb1beae11deb337a3745d053", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.88865048007519, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42232831110342783, - "normalized_score": 42.23283111034278 - }, - "bbh": { - "name": "BBH", - "value": 0.5367364587851736, - "normalized_score": 34.23401639666226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21676737160120846, - "normalized_score": 21.676737160120847 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3263422818791946, - "normalized_score": 10.17897091722595 - }, - "musr": { - "name": "MUSR", - "value": 0.43988541666666664, - "normalized_score": 14.219010416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3951130319148936, - "normalized_score": 32.790336879432616 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "prithivMLmods/Taurus-Opus-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.456, - "co2_cost": 1.3636199514034044 - } - }, - { - "id": "prithivMLmods/Triangulum-10B_float16_d3776fbe6bfc884f1380fe128223759d76214049_False", - "model": { - "name": "prithivMLmods/Triangulum-10B", - "sha": "d3776fbe6bfc884f1380fe128223759d76214049", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.30066565051636, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3229353670483207, - "normalized_score": 32.29353670483207 - }, - "bbh": { - "name": "BBH", - "value": 0.5968023910391113, - "normalized_score": 42.24074650996805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3549848942598187, - "normalized_score": 35.49848942598187 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.41724999999999995, - "normalized_score": 10.589583333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4178025265957447, - "normalized_score": 35.311391843971634 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "prithivMLmods/Triangulum-10B (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 4, - "params_billions": 10.306, - "co2_cost": 1.7188278372236376 - } - }, - { - "id": "prithivMLmods/Triangulum-5B_float16_55e161fc171b17b3e6c15aef9d5318a51bdb48fb_False", - "model": { - "name": "prithivMLmods/Triangulum-5B", - "sha": "55e161fc171b17b3e6c15aef9d5318a51bdb48fb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.0117920459317, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1283206336963701, - "normalized_score": 12.832063369637007 - }, - "bbh": { - "name": "BBH", - "value": 0.3124115848614622, - "normalized_score": 4.293501650120951 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.3445416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12234042553191489, - "normalized_score": 2.482269503546098 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "prithivMLmods/Triangulum-5B (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 2, - "params_billions": 5.413, - "co2_cost": 0.9848582640288246 - } - }, - { - "id": "prithivMLmods/Triangulum-v2-10B_bfloat16_a23407caf6f232d305c5cdf1c802dfa430e57915_False", - "model": { - "name": "prithivMLmods/Triangulum-v2-10B", - "sha": "a23407caf6f232d305c5cdf1c802dfa430e57915", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.83383929882561, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6705231009277606, - "normalized_score": 67.05231009277605 - }, - "bbh": { - "name": "BBH", - "value": 0.6064531367418446, - "normalized_score": 42.75472604416384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24471299093655588, - "normalized_score": 24.47129909365559 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.42807291666666664, - "normalized_score": 12.575781249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44664228723404253, - "normalized_score": 38.5158096926714 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-31", - "generation": 0, - "base_model": "prithivMLmods/Triangulum-v2-10B", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 10.306, - "co2_cost": 1.815262165506309 - } - }, - { - "id": "prithivMLmods/Tucana-Opus-14B-r999_bfloat16_b4c9f74143dd04e9eee46b3cae63cb066cce0e69_False", - "model": { - "name": "prithivMLmods/Tucana-Opus-14B-r999", - "sha": "b4c9f74143dd04e9eee46b3cae63cb066cce0e69", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.75066582236797, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.606725710005009, - "normalized_score": 60.672571000500895 - }, - "bbh": { - "name": "BBH", - "value": 0.6556888858891955, - "normalized_score": 50.5867616437716 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40634441087613293, - "normalized_score": 40.6344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39177852348993286, - "normalized_score": 18.903803131991047 - }, - "musr": { - "name": "MUSR", - "value": 0.47303125, - "normalized_score": 18.99557291666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5383976063829787, - "normalized_score": 48.71084515366431 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "prithivMLmods/Tucana-Opus-14B-r999 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.77, - "co2_cost": 1.9954226689231802 - } - }, - { - "id": "prithivMLmods/Tulu-MathLingo-8B_float16_0fb551a24dfe1a576e2c5118a7581588d339a2e7_False", - "model": { - "name": "prithivMLmods/Tulu-MathLingo-8B", - "sha": "0fb551a24dfe1a576e2c5118a7581588d339a2e7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.79779228784994, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5589402784611497, - "normalized_score": 55.89402784611497 - }, - "bbh": { - "name": "BBH", - "value": 0.4658807905856453, - "normalized_score": 24.70335071867501 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14501510574018128, - "normalized_score": 14.501510574018129 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.38642708333333337, - "normalized_score": 7.603385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.304438164893617, - "normalized_score": 22.715351654846337 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "prithivMLmods/Tulu-MathLingo-8B (Merge)", - "hub_license": "creativeml-openrail-m", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.683147873413241 - } - }, - { - "id": "prithivMLmods/Viper-Coder-7B-Elite14_bfloat16_514675679f50691831cd28636c8f862a4004663e_False", - "model": { - "name": "prithivMLmods/Viper-Coder-7B-Elite14", - "sha": "514675679f50691831cd28636c8f862a4004663e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.487135883248206, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14882844186757802, - "normalized_score": 14.882844186757803 - }, - "bbh": { - "name": "BBH", - "value": 0.28285388717732607, - "normalized_score": 1.7889715785304021 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.34215625, - "normalized_score": 1.5361979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10887632978723404, - "normalized_score": 0.9862588652482256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.6799435661327026 - } - }, - { - "id": "prithivMLmods/Viper-Coder-Hybrid-v1.2_float16_890d476f14cce4a4850f3207ab9fc701e32c11ed_False", - "model": { - "name": "prithivMLmods/Viper-Coder-Hybrid-v1.2", - "sha": "890d476f14cce4a4850f3207ab9fc701e32c11ed", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.82547529143809, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6735705705306365, - "normalized_score": 67.35705705306366 - }, - "bbh": { - "name": "BBH", - "value": 0.6390749226915919, - "normalized_score": 48.286401555658045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3330815709969788, - "normalized_score": 33.30815709969788 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.48217708333333337, - "normalized_score": 20.305468749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5242686170212766, - "normalized_score": 47.140957446808514 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-21", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-Hybrid-v1.2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.9555707185530746 - } - }, - { - "id": "prithivMLmods/Viper-Coder-Hybrid-v1.3_float16_a296ce2b8ee219ea185c917fa907560516e18f3e_True", - "model": { - "name": "prithivMLmods/Viper-Coder-Hybrid-v1.3", - "sha": "a296ce2b8ee219ea185c917fa907560516e18f3e", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.41199336479261, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7554776880898239, - "normalized_score": 75.54776880898238 - }, - "bbh": { - "name": "BBH", - "value": 0.6470999423290662, - "normalized_score": 49.614467306055566 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4516616314199396, - "normalized_score": 45.16616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4403229166666667, - "normalized_score": 14.873697916666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5097240691489362, - "normalized_score": 45.524896572104026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-Hybrid-v1.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 14.766, - "co2_cost": 1.7981105929281844 - } - }, - { - "id": "prithivMLmods/Viper-Coder-HybridMini-v1.3_float16_c18d54cc4923f68958b4f32d3970d650b250c9cb_True", - "model": { - "name": "prithivMLmods/Viper-Coder-HybridMini-v1.3", - "sha": "c18d54cc4923f68958b4f32d3970d650b250c9cb", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 33.800748621674735, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.610372699991578, - "normalized_score": 61.037269999157786 - }, - "bbh": { - "name": "BBH", - "value": 0.5365472959273401, - "normalized_score": 33.666954037290026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46299093655589124, - "normalized_score": 46.299093655589125 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.45048958333333333, - "normalized_score": 15.611197916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4351728723404255, - "normalized_score": 37.241430260047274 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-HybridMini-v1.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 7.616, - "co2_cost": 0.6595156291478043 - } - }, - { - "id": "prithivMLmods/Viper-Coder-v0.1_bfloat16_39a88521fbb3b1af13922748004af131e8382c81_False", - "model": { - "name": "prithivMLmods/Viper-Coder-v0.1", - "sha": "39a88521fbb3b1af13922748004af131e8382c81", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.99646602214953, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5521460835028835, - "normalized_score": 55.214608350288344 - }, - "bbh": { - "name": "BBH", - "value": 0.6143056870893655, - "normalized_score": 44.62725748233725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3270392749244713, - "normalized_score": 32.703927492447136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3540268456375839, - "normalized_score": 13.870246085011187 - }, - "musr": { - "name": "MUSR", - "value": 0.43944791666666666, - "normalized_score": 13.030989583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3927859042553192, - "normalized_score": 32.5317671394799 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.3489197291447272 - } - }, - { - "id": "prithivMLmods/Viper-Coder-v1.1_bfloat16_7c0e6e2ee4684509ae4de063443259b8102fd979_False", - "model": { - "name": "prithivMLmods/Viper-Coder-v1.1", - "sha": "7c0e6e2ee4684509ae4de063443259b8102fd979", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.26026084781071, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.443236168920686, - "normalized_score": 44.3236168920686 - }, - "bbh": { - "name": "BBH", - "value": 0.6492289468853992, - "normalized_score": 49.268009957854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5460725075528701, - "normalized_score": 54.607250755287005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.401006711409396, - "normalized_score": 20.134228187919465 - }, - "musr": { - "name": "MUSR", - "value": 0.5219270833333334, - "normalized_score": 26.20755208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.523188164893617, - "normalized_score": 47.02090721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-14", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-v1.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 7, - "params_billions": 14.77, - "co2_cost": 1.8163804285158245 - } - }, - { - "id": "prithivMLmods/Viper-Coder-v1.6-r999_bfloat16_9ade17c8100f0468211214f8c118e17a67a325fb_False", - "model": { - "name": "prithivMLmods/Viper-Coder-v1.6-r999", - "sha": "9ade17c8100f0468211214f8c118e17a67a325fb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.588383013145055, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4432860366050967, - "normalized_score": 44.32860366050967 - }, - "bbh": { - "name": "BBH", - "value": 0.6492289468853992, - "normalized_score": 49.268009957854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5657099697885196, - "normalized_score": 56.57099697885196 - }, - "gpqa": { - "name": "GPQA", - "value": 0.401006711409396, - "normalized_score": 20.134228187919465 - }, - "musr": { - "name": "MUSR", - "value": 0.5219270833333334, - "normalized_score": 26.20755208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.523188164893617, - "normalized_score": 47.02090721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-01", - "submission_date": "2025-03-01", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-v1.6-r999 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 14.77, - "co2_cost": 1.7538858337264824 - } - }, - { - "id": "prithivMLmods/Viper-Coder-v1.7-Vsm6_bfloat16_8ffaf8fc58e0d330bdf65baf290a4512dc7befad_False", - "model": { - "name": "prithivMLmods/Viper-Coder-v1.7-Vsm6", - "sha": "8ffaf8fc58e0d330bdf65baf290a4512dc7befad", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.68288111891892, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5003889679384035, - "normalized_score": 50.03889679384034 - }, - "bbh": { - "name": "BBH", - "value": 0.6502342489348574, - "normalized_score": 49.533250464441856 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4645015105740181, - "normalized_score": 46.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39681208053691275, - "normalized_score": 19.574944071588366 - }, - "musr": { - "name": "MUSR", - "value": 0.47675, - "normalized_score": 18.860416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5287566489361702, - "normalized_score": 47.63962765957447 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "prithivMLmods/Viper-Coder-v1.7-Vsm6 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 1.631943501432983 - } - }, - { - "id": "prithivMLmods/Viper-OneCoder-UIGEN_bfloat16_bb9bce0cd291dda05c9cbf7dfd66554ac10a608f_False", - "model": { - "name": "prithivMLmods/Viper-OneCoder-UIGEN", - "sha": "bb9bce0cd291dda05c9cbf7dfd66554ac10a608f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.347294081128084, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4691895282295421, - "normalized_score": 46.91895282295421 - }, - "bbh": { - "name": "BBH", - "value": 0.6046507657311738, - "normalized_score": 42.73215347452439 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3867069486404834, - "normalized_score": 38.670694864048336 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.45141666666666663, - "normalized_score": 15.193750000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.390375664893617, - "normalized_score": 32.263962765957444 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "prithivMLmods/Viper-OneCoder-UIGEN (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 1.964400686121954 - } - }, - { - "id": "prithivMLmods/Volans-Opus-14B-Exp_bfloat16_57d238100de4e1ef2456b17181a4b9db54029664_False", - "model": { - "name": "prithivMLmods/Volans-Opus-14B-Exp", - "sha": "57d238100de4e1ef2456b17181a4b9db54029664", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.707074751250644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5867675545330834, - "normalized_score": 58.67675545330834 - }, - "bbh": { - "name": "BBH", - "value": 0.6521211711040636, - "normalized_score": 49.914266527494924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.425226586102719, - "normalized_score": 42.522658610271904 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.4871979166666667, - "normalized_score": 20.399739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5384807180851063, - "normalized_score": 48.72007978723404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "prithivMLmods/Volans-Opus-14B-Exp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 2.064902332390804 - } - }, - { - "id": "prithivMLmods/WebMind-7B-v0.1_float16_4016b7b6151142622bab81d805054ff4b4d41ff9_False", - "model": { - "name": "prithivMLmods/WebMind-7B-v0.1", - "sha": "4016b7b6151142622bab81d805054ff4b4d41ff9", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.805047322905597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5278161943642867, - "normalized_score": 52.78161943642867 - }, - "bbh": { - "name": "BBH", - "value": 0.5433559211614739, - "normalized_score": 35.064291045796445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3648036253776435, - "normalized_score": 36.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4537395833333333, - "normalized_score": 15.117447916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4279421542553192, - "normalized_score": 36.4380171394799 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-07", - "generation": 1, - "base_model": "prithivMLmods/WebMind-7B-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.2121225411059522 - } - }, - { - "id": "pszemraj/Llama-3-6.3b-v0.1_bfloat16_7000b39346162f95f19aa4ca3975242db61902d7_False", - "model": { - "name": "pszemraj/Llama-3-6.3b-v0.1", - "sha": "7000b39346162f95f19aa4ca3975242db61902d7", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.384306670382523, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10438968603305895, - "normalized_score": 10.438968603305895 - }, - "bbh": { - "name": "BBH", - "value": 0.41968070468284147, - "normalized_score": 18.67999639960586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3908333333333333, - "normalized_score": 6.154166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2839926861702128, - "normalized_score": 20.44363179669031 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-17", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "llama3", - "hub_hearts": 6, - "params_billions": 6.3, - "co2_cost": 1.6289265744595216 - } - }, - { - "id": "pszemraj/Mistral-v0.3-6B_bfloat16_ae11a699012b83996361f04808f4d45debf3b01c_False", - "model": { - "name": "pszemraj/Mistral-v0.3-6B", - "sha": "ae11a699012b83996361f04808f4d45debf3b01c", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.122379227914847, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2453744952282167, - "normalized_score": 24.537449522821667 - }, - "bbh": { - "name": "BBH", - "value": 0.3774050646438491, - "normalized_score": 13.515091344549441 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.39077083333333335, - "normalized_score": 6.613020833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2142619680851064, - "normalized_score": 12.695774231678486 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-25", - "submission_date": "2024-06-26", - "generation": 2, - "base_model": "pszemraj/Mistral-7B-v0.3-prune6 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 5.939, - "co2_cost": 1.061078907867203 - } - }, - { - "id": "qingy2019/LLaMa_3.2_3B_Catalysts_float16_3f4a318114beb37f32a2c143cbd68b6d15d18164_False", - "model": { - "name": "qingy2019/LLaMa_3.2_3B_Catalysts", - "sha": "3f4a318114beb37f32a2c143cbd68b6d15d18164", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.930930685006384, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.499239794855428, - "normalized_score": 49.9239794855428 - }, - "bbh": { - "name": "BBH", - "value": 0.44681268798954793, - "normalized_score": 21.345400954820075 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12915407854984895, - "normalized_score": 12.915407854984895 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.37877083333333333, - "normalized_score": 7.946354166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30078125, - "normalized_score": 22.309027777777775 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-19", - "submission_date": "2024-10-29", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.0, - "co2_cost": 1.299667653455569 - } - }, - { - "id": "qingy2019/OpenMath2-Llama3.1-8B_float16_38412f988f7688d884c9249b2a4e5cc76f98c1c6_False", - "model": { - "name": "qingy2019/OpenMath2-Llama3.1-8B", - "sha": "38412f988f7688d884c9249b2a4e5cc76f98c1c6", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.751664730325508, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23305939352030391, - "normalized_score": 23.305939352030393 - }, - "bbh": { - "name": "BBH", - "value": 0.40955241401694514, - "normalized_score": 16.294369976218587 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2673716012084592, - "normalized_score": 26.73716012084592 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.34355208333333337, - "normalized_score": 2.010677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15533577127659576, - "normalized_score": 6.148419030732861 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.0, - "co2_cost": 1.385612947044763 - } - }, - { - "id": "qingy2019/Oracle-14B_float16_0154031aa9306aa98da156a0f3c8e10d9f1377f6_False", - "model": { - "name": "qingy2019/Oracle-14B", - "sha": "0154031aa9306aa98da156a0f3c8e10d9f1377f6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 13.340250056368257, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23583203677353867, - "normalized_score": 23.583203677353865 - }, - "bbh": { - "name": "BBH", - "value": 0.4611577021562399, - "normalized_score": 23.18463030799016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06419939577039276, - "normalized_score": 6.419939577039275 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.37166666666666665, - "normalized_score": 10.491666666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23819813829787234, - "normalized_score": 15.355348699763594 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-23", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.668, - "co2_cost": 1.3930241047147078 - } - }, - { - "id": "qingy2019/Oracle-14B_bfloat16_0154031aa9306aa98da156a0f3c8e10d9f1377f6_False", - "model": { - "name": "qingy2019/Oracle-14B", - "sha": "0154031aa9306aa98da156a0f3c8e10d9f1377f6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 13.593017488054064, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24007854714380067, - "normalized_score": 24.007854714380066 - }, - "bbh": { - "name": "BBH", - "value": 0.4622299618883472, - "normalized_score": 23.30194605898976 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07250755287009064, - "normalized_score": 7.250755287009064 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.37033333333333335, - "normalized_score": 10.225000000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2378656914893617, - "normalized_score": 15.318410165484634 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-11-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.668, - "co2_cost": 2.7377749955379205 - } - }, - { - "id": "qingy2019/Qwen2.5-Math-14B-Instruct_float16_025d9637208b862c7b10b7590969fe6870ce01a0_False", - "model": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct", - "sha": "025d9637208b862c7b10b7590969fe6870ce01a0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.153923519286515, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6066259746361875, - "normalized_score": 60.66259746361875 - }, - "bbh": { - "name": "BBH", - "value": 0.6350068875885949, - "normalized_score": 47.0170855041099 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3716012084592145, - "normalized_score": 37.160120845921455 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3724832214765101, - "normalized_score": 16.33109619686801 - }, - "musr": { - "name": "MUSR", - "value": 0.4757291666666667, - "normalized_score": 19.632812499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5330784574468085, - "normalized_score": 48.119828605200944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.0, - "co2_cost": 3.8656545632811805 - } - }, - { - "id": "qingy2019/Qwen2.5-Math-14B-Instruct_bfloat16_025d9637208b862c7b10b7590969fe6870ce01a0_False", - "model": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct", - "sha": "025d9637208b862c7b10b7590969fe6870ce01a0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.380504031909005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6005310354304356, - "normalized_score": 60.05310354304357 - }, - "bbh": { - "name": "BBH", - "value": 0.6356492397286339, - "normalized_score": 47.06557227198695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2764350453172205, - "normalized_score": 27.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3691275167785235, - "normalized_score": 15.883668903803136 - }, - "musr": { - "name": "MUSR", - "value": 0.4756666666666667, - "normalized_score": 19.424999999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5339095744680851, - "normalized_score": 48.21217494089834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.0, - "co2_cost": 1.9718926104780867 - } - }, - { - "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha_bfloat16_e24aaa0779b576301bfb62b93789dea24ab10c88_False", - "model": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", - "sha": "e24aaa0779b576301bfb62b93789dea24ab10c88", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.84070516730529, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5980830862112528, - "normalized_score": 59.80830862112529 - }, - "bbh": { - "name": "BBH", - "value": 0.6375080075350833, - "normalized_score": 47.750107640063504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31419939577039274, - "normalized_score": 31.419939577039273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.4649375, - "normalized_score": 17.95052083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5330784574468085, - "normalized_score": 48.119828605200944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-03", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.0, - "co2_cost": 3.7862834271963415 - } - }, - { - "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro_bfloat16_295a9ce370c2bfeabe13f76d52c92f57ff6d0308_True", - "model": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", - "sha": "295a9ce370c2bfeabe13f76d52c92f57ff6d0308", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 20.249049077126084, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1921678923035324, - "normalized_score": 19.216789230353243 - }, - "bbh": { - "name": "BBH", - "value": 0.5318689754519911, - "normalized_score": 33.03690414223814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.283987915407855, - "normalized_score": 28.3987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.37403125, - "normalized_score": 4.25390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35580119680851063, - "normalized_score": 28.422355200945624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.3191387960289416 - } - }, - { - "id": "qingy2019/Qwen2.5-Ultimate-14B-Instruct_bfloat16_3eeba743112bed957ae6dc6a3f880355c8bedb66_True", - "model": { - "name": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", - "sha": "3eeba743112bed957ae6dc6a3f880355c8bedb66", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.440181360925095, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39380177927897975, - "normalized_score": 39.380177927897975 - }, - "bbh": { - "name": "BBH", - "value": 0.5841561592804249, - "normalized_score": 40.58060064469629 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2892749244712991, - "normalized_score": 28.92749244712991 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.4135, - "normalized_score": 9.887500000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4929355053191489, - "normalized_score": 43.659500591016545 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-02", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "qingy2019/Qwen2.5-Ultimate-14B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.9041787942847397 - } - }, - { - "id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct_bfloat16_66ec83182f1dfbad634582eb14606e6b64355f91_True", - "model": { - "name": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", - "sha": "66ec83182f1dfbad634582eb14606e6b64355f91", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.69666706814397, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20136016879657087, - "normalized_score": 20.136016879657085 - }, - "bbh": { - "name": "BBH", - "value": 0.8269136508088061, - "normalized_score": 76.69996850001888 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48036253776435045, - "normalized_score": 48.036253776435046 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3446354166666667, - "normalized_score": 3.579427083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11128656914893617, - "normalized_score": 1.2540632387706852 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 0, - "base_model": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 1.236, - "co2_cost": 0.3138860847843751 - } - }, - { - "id": "qingy2024/Eyas-17B-Instruct_bfloat16_afa6aa65deaef3eeb733e80f0fbffcf6d70a863f_True", - "model": { - "name": "qingy2024/Eyas-17B-Instruct", - "sha": "afa6aa65deaef3eeb733e80f0fbffcf6d70a863f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.566804942732524, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6574588757829227, - "normalized_score": 65.74588757829227 - }, - "bbh": { - "name": "BBH", - "value": 0.6084550080292097, - "normalized_score": 43.85006553278257 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24697885196374622, - "normalized_score": 24.69788519637462 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.45216666666666666, - "normalized_score": 15.354166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43425864361702127, - "normalized_score": 37.13984929078014 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-23", - "submission_date": "2024-12-23", - "generation": 1, - "base_model": "qingy2024/Eyas-17B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 17.431, - "co2_cost": 4.5715217976788916 - } - }, - { - "id": "qingy2024/Falcon3-2x10B-MoE-Instruct_bfloat16_e226b1f0beb60ff1e3770a694af51572b6d95dc5_True", - "model": { - "name": "qingy2024/Falcon3-2x10B-MoE-Instruct", - "sha": "e226b1f0beb60ff1e3770a694af51572b6d95dc5", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 35.53368391277467, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7849783020164276, - "normalized_score": 78.49783020164276 - }, - "bbh": { - "name": "BBH", - "value": 0.6184925726037823, - "normalized_score": 45.073852659055916 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2794561933534743, - "normalized_score": 27.945619335347434 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.42835416666666665, - "normalized_score": 12.910937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44232047872340424, - "normalized_score": 38.03560874704491 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "qingy2024/Falcon3-2x10B-MoE-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 18.799, - "co2_cost": 4.386573555253281 - } - }, - { - "id": "qingy2024/Fusion-14B-Instruct_bfloat16_2e15219659b919e04ad5b56bef259489cc264f09_True", - "model": { - "name": "qingy2024/Fusion-14B-Instruct", - "sha": "2e15219659b919e04ad5b56bef259489cc264f09", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.09742253326738, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7259770741632203, - "normalized_score": 72.59770741632202 - }, - "bbh": { - "name": "BBH", - "value": 0.6395930812164231, - "normalized_score": 48.57983591380125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3368580060422961, - "normalized_score": 33.68580060422961 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.44004166666666666, - "normalized_score": 14.805208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.504404920212766, - "normalized_score": 44.93388002364066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-05", - "generation": 1, - "base_model": "qingy2024/Fusion-14B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.0, - "co2_cost": 3.247361919434426 - } - }, - { - "id": "qingy2024/Fusion2-14B-Instruct_bfloat16_df00288ce3d37ef518189c19e7973e71b47ef214_True", - "model": { - "name": "qingy2024/Fusion2-14B-Instruct", - "sha": "df00288ce3d37ef518189c19e7973e71b47ef214", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.25779713463195, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6064010159709571, - "normalized_score": 60.64010159709571 - }, - "bbh": { - "name": "BBH", - "value": 0.611852372286455, - "normalized_score": 44.76704383085473 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31268882175226587, - "normalized_score": 31.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.46338541666666666, - "normalized_score": 17.223177083333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5050698138297872, - "normalized_score": 45.00775709219858 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-06", - "generation": 1, - "base_model": "qingy2024/Fusion2-14B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.337331892077327 - } - }, - { - "id": "qingy2024/Fusion4-14B-Instruct_float16_3f3c7178006857d7fdf942ab7e86bd2b0d7b624d_True", - "model": { - "name": "qingy2024/Fusion4-14B-Instruct", - "sha": "3f3c7178006857d7fdf942ab7e86bd2b0d7b624d", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.552180652224024, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7648949232480928, - "normalized_score": 76.48949232480928 - }, - "bbh": { - "name": "BBH", - "value": 0.6542520469477617, - "normalized_score": 50.69585563958964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38821752265861026, - "normalized_score": 38.82175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4325729166666667, - "normalized_score": 13.971614583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5193650265957447, - "normalized_score": 46.596114066193856 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "qingy2024/Fusion4-14B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 3.645661080723154 - } - }, - { - "id": "qingy2024/OwO-14B-Instruct_bfloat16_0c64ce33086d285d9374f0fb9360d52d0eb1ff92_False", - "model": { - "name": "qingy2024/OwO-14B-Instruct", - "sha": "0c64ce33086d285d9374f0fb9360d52d0eb1ff92", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.28644722085275, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1383119013107444, - "normalized_score": 13.83119013107444 - }, - "bbh": { - "name": "BBH", - "value": 0.6164807172760662, - "normalized_score": 44.94845230112603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4161631419939577, - "normalized_score": 41.616314199395774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.44068749999999995, - "normalized_score": 13.652604166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5181183510638298, - "normalized_score": 46.457594562647756 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-30", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 5.629522484161071 - } - }, - { - "id": "qingy2024/QwEnlarge-16B-Instruct_bfloat16_7b89422d7570b46b8eccd3f2cc33717bfe46bf15_True", - "model": { - "name": "qingy2024/QwEnlarge-16B-Instruct", - "sha": "7b89422d7570b46b8eccd3f2cc33717bfe46bf15", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.700136766667036, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7801821389468832, - "normalized_score": 78.01821389468832 - }, - "bbh": { - "name": "BBH", - "value": 0.5949341698087998, - "normalized_score": 42.5954533547226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45996978851963743, - "normalized_score": 45.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.410125, - "normalized_score": 9.898958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44755651595744683, - "normalized_score": 38.617390661938536 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "qingy2024/QwEnlarge-16B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 15.871, - "co2_cost": 1.7987979471737134 - } - }, - { - "id": "qingy2024/QwQ-14B-Math-v0.2_float16_308f732e0f2c1ac9e416e9c1e0523c0198ac658c_True", - "model": { - "name": "qingy2024/QwQ-14B-Math-v0.2", - "sha": "308f732e0f2c1ac9e416e9c1e0523c0198ac658c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.935415225800252, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33909692948044523, - "normalized_score": 33.90969294804452 - }, - "bbh": { - "name": "BBH", - "value": 0.573097955260854, - "normalized_score": 39.099213742951825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4811178247734139, - "normalized_score": 48.11178247734139 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.40209374999999997, - "normalized_score": 8.595052083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.47997007978723405, - "normalized_score": 42.21889775413712 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-23", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 14.77, - "co2_cost": 6.822342589515159 - } - }, - { - "id": "qingy2024/Qwarkstar-4B_bfloat16_c3dd554ec8f344e31b91b0532864388d6151700a_False", - "model": { - "name": "qingy2024/Qwarkstar-4B", - "sha": "c3dd554ec8f344e31b91b0532864388d6151700a", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.167330731742462, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19941200459225966, - "normalized_score": 19.941200459225964 - }, - "bbh": { - "name": "BBH", - "value": 0.40149118131308104, - "normalized_score": 16.574205111821954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.44283333333333336, - "normalized_score": 14.0875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24251994680851063, - "normalized_score": 15.83554964539007 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-05", - "submission_date": "2025-01-10", - "generation": 1, - "base_model": "qingy2024/Qwarkstar-4B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.473, - "co2_cost": 2.234656387692985 - } - }, - { - "id": "qingy2024/Qwarkstar-4B-Instruct-Preview_bfloat16_cd93b138d949e75eed3c4dba1f4dbdfe92ce255c_True", - "model": { - "name": "qingy2024/Qwarkstar-4B-Instruct-Preview", - "sha": "cd93b138d949e75eed3c4dba1f4dbdfe92ce255c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.873009662500476, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5324372664530114, - "normalized_score": 53.24372664530114 - }, - "bbh": { - "name": "BBH", - "value": 0.43584381808469397, - "normalized_score": 20.23401722066339 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1283987915407855, - "normalized_score": 12.83987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.38959374999999996, - "normalized_score": 6.19921875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.250249335106383, - "normalized_score": 16.694370567375884 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-17", - "generation": 1, - "base_model": "qingy2024/Qwarkstar-4B-Instruct-Preview (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 4.473, - "co2_cost": 1.934410461398533 - } - }, - { - "id": "qingy2024/Qwen2.5-4B_bfloat16_e2736ed3972e1a0b2c1d6357acec2c21369827e1_False", - "model": { - "name": "qingy2024/Qwen2.5-4B", - "sha": "e2736ed3972e1a0b2c1d6357acec2c21369827e1", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.27535656814343, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21584839337402537, - "normalized_score": 21.584839337402535 - }, - "bbh": { - "name": "BBH", - "value": 0.4269378314466817, - "normalized_score": 19.977752146023 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.46103125, - "normalized_score": 16.528906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2524933510638298, - "normalized_score": 16.943705673758867 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "qingy2024/Qwen2.5-4B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 4.168, - "co2_cost": 1.919529501444422 - } - }, - { - "id": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct_bfloat16_890f7a85cdb481969b11dd09c9bbf5bb4a97ee0a_True", - "model": { - "name": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", - "sha": "890f7a85cdb481969b11dd09c9bbf5bb4a97ee0a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.636502505800076, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4125110262991086, - "normalized_score": 41.25110262991086 - }, - "bbh": { - "name": "BBH", - "value": 0.3836795503038973, - "normalized_score": 13.001065842415562 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1578549848942598, - "normalized_score": 15.785498489425981 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.35800000000000004, - "normalized_score": 2.6166666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22440159574468085, - "normalized_score": 13.82239952718676 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 1, - "base_model": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 1.1199516994910768 - } - }, - { - "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha_float16_c82727eb404d3d55450759301b80f838e4d3e1fc_True", - "model": { - "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", - "sha": "c82727eb404d3d55450759301b80f838e4d3e1fc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.35285681111407, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7704402097545624, - "normalized_score": 77.04402097545623 - }, - "bbh": { - "name": "BBH", - "value": 0.646486159387426, - "normalized_score": 50.1795027462589 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42900302114803623, - "normalized_score": 42.90030211480362 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.40209374999999997, - "normalized_score": 8.728385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49659242021276595, - "normalized_score": 44.065824468085104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 3.138693243209943 - } - }, - { - "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview_float16_7b9e9b94d69f0de9627f728e9328fb394f7fea14_True", - "model": { - "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", - "sha": "7b9e9b94d69f0de9627f728e9328fb394f7fea14", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.91810674601417, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7825802204816554, - "normalized_score": 78.25802204816554 - }, - "bbh": { - "name": "BBH", - "value": 0.6293942245934432, - "normalized_score": 47.050807568284256 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47583081570996977, - "normalized_score": 47.583081570996974 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.4114583333333333, - "normalized_score": 10.165625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49933510638297873, - "normalized_score": 44.37056737588653 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-10", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 3.237960654243049 - } - }, - { - "id": "qingy2024/Qwen2.6-14B-Instruct_bfloat16_c21acf3c074e9522c5d0559ccc4ed715c48b8eff_False", - "model": { - "name": "qingy2024/Qwen2.6-14B-Instruct", - "sha": "c21acf3c074e9522c5d0559ccc4ed715c48b8eff", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.25438494517895, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5810970447302047, - "normalized_score": 58.10970447302047 - }, - "bbh": { - "name": "BBH", - "value": 0.6394142844483001, - "normalized_score": 48.04794799002712 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30513595166163143, - "normalized_score": 30.513595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.4569375, - "normalized_score": 16.017187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5285073138297872, - "normalized_score": 47.611923758865245 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 1, - "base_model": "qingy2024/Qwen2.6-14B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.5785724883461816 - } - }, - { - "id": "qingy2024/Qwen2.6-Math-14B-Instruct_bfloat16_45bb3f302922fbf185694bba2748a32ca3313a5e_False", - "model": { - "name": "qingy2024/Qwen2.6-Math-14B-Instruct", - "sha": "45bb3f302922fbf185694bba2748a32ca3313a5e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.19645398109978, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38623186478543603, - "normalized_score": 38.6231864785436 - }, - "bbh": { - "name": "BBH", - "value": 0.6324437508110833, - "normalized_score": 47.02211721984639 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42900302114803623, - "normalized_score": 42.90030211480362 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.4758541666666667, - "normalized_score": 19.515104166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5241023936170213, - "normalized_score": 47.122488179669034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 3, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.0, - "co2_cost": 3.112143161139537 - } - }, - { - "id": "qq8933/OpenLongCoT-Base-Gemma2-2B_bfloat16_39e5bc941f107ac28142c802aecfd257cc47c1bb_True", - "model": { - "name": "qq8933/OpenLongCoT-Base-Gemma2-2B", - "sha": "39e5bc941f107ac28142c802aecfd257cc47c1bb", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 5.473141918546371, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1965141380426158, - "normalized_score": 19.65141380426158 - }, - "bbh": { - "name": "BBH", - "value": 0.3106362870893106, - "normalized_score": 3.546298466806123 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.32225, - "normalized_score": 2.114583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1315658244680851, - "normalized_score": 3.507313829787232 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-11-12", - "generation": 2, - "base_model": "google/gemma-2-2b", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 3.204, - "co2_cost": 3.316973334195197 - } - }, - { - "id": "raphgg/test-2.5-72B_bfloat16_0f34d627ccd451c5bd74f495bcdb8b18787d6f3b_True", - "model": { - "name": "raphgg/test-2.5-72B", - "sha": "0f34d627ccd451c5bd74f495bcdb8b18787d6f3b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.73987796559809, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8437047035199936, - "normalized_score": 84.37047035199936 - }, - "bbh": { - "name": "BBH", - "value": 0.7266099425567868, - "normalized_score": 62.1541268705062 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4108761329305136, - "normalized_score": 41.08761329305136 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38926174496644295, - "normalized_score": 18.568232662192393 - }, - "musr": { - "name": "MUSR", - "value": 0.48118750000000005, - "normalized_score": 20.515104166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5836934840425532, - "normalized_score": 53.74372044917257 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-07-27", - "submission_date": "2024-12-27", - "generation": 0, - "base_model": "raphgg/test-2.5-72B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 72.706, - "co2_cost": 44.86470543487947 - } - }, - { - "id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat_bfloat16_cede47eac8a4e65aa27567d3f087c28185b537d9_True", - "model": { - "name": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", - "sha": "cede47eac8a4e65aa27567d3f087c28185b537d9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.54564856271941, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4451843331249973, - "normalized_score": 44.518433312499724 - }, - "bbh": { - "name": "BBH", - "value": 0.47594353379058535, - "normalized_score": 26.036695387358723 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.4304270833333333, - "normalized_score": 12.936718749999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2403590425531915, - "normalized_score": 15.595449172576831 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-08-26", - "generation": 1, - "base_model": "nvidia/Mistral-NeMo-Minitron-8B-Base", - "hub_license": "other", - "hub_hearts": 9, - "params_billions": 8.414, - "co2_cost": 2.9527959021153043 - } - }, - { - "id": "rasyosef/Phi-1_5-Instruct-v0.1_bfloat16_f4c405ee4bff5dc1a69383f3fe682342c9c87c77_True", - "model": { - "name": "rasyosef/Phi-1_5-Instruct-v0.1", - "sha": "f4c405ee4bff5dc1a69383f3fe682342c9c87c77", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 6.864747864349624, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24022815019703275, - "normalized_score": 24.022815019703277 - }, - "bbh": { - "name": "BBH", - "value": 0.3117898107092894, - "normalized_score": 4.820243721122045 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.34215625, - "normalized_score": 3.4028645833333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15616688829787234, - "normalized_score": 6.240765366430259 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-07-25", - "generation": 1, - "base_model": "microsoft/phi-1_5", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 1.415, - "co2_cost": 0.5900438699202877 - } - }, - { - "id": "rasyosef/phi-2-instruct-apo_float16_2d3722d6db77a8c844a50dd32ddc4278fdc89e1f_True", - "model": { - "name": "rasyosef/phi-2-instruct-apo", - "sha": "2d3722d6db77a8c844a50dd32ddc4278fdc89e1f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 12.547052522669906, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.31459194936102874, - "normalized_score": 31.459194936102875 - }, - "bbh": { - "name": "BBH", - "value": 0.44450964630048634, - "normalized_score": 21.672437586715603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030211480362537766, - "normalized_score": 3.0211480362537766 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.33421875, - "normalized_score": 3.6106770833333353 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21550864361702127, - "normalized_score": 12.834293735224584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-17", - "generation": 1, - "base_model": "microsoft/phi-2", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.775, - "co2_cost": 0.9901298108291215 - } - }, - { - "id": "rasyosef/phi-2-instruct-v0.1_float16_29aeb3ccf7c79e0169a038fbd0deaf9772a9fefd_True", - "model": { - "name": "rasyosef/phi-2-instruct-v0.1", - "sha": "29aeb3ccf7c79e0169a038fbd0deaf9772a9fefd", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 14.218631101919177, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3681476260765879, - "normalized_score": 36.81476260765879 - }, - "bbh": { - "name": "BBH", - "value": 0.47261184292654473, - "normalized_score": 26.35880186790661 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3523541666666667, - "normalized_score": 5.044270833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22465093085106383, - "normalized_score": 13.850103427895979 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-09", - "submission_date": "2024-08-10", - "generation": 1, - "base_model": "microsoft/phi-2", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 2.775, - "co2_cost": 0.49272607630116594 - } - }, - { - "id": "realtreetune/rho-1b-sft-MATH_bfloat16_b5f93df6af679a860caac9a9598e0f70c326b4fb_False", - "model": { - "name": "realtreetune/rho-1b-sft-MATH", - "sha": "b5f93df6af679a860caac9a9598e0f70c326b4fb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.569175419988618, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.212101668018635, - "normalized_score": 21.210166801863497 - }, - "bbh": { - "name": "BBH", - "value": 0.3144153389594046, - "normalized_score": 4.19762318329166 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03474320241691843, - "normalized_score": 3.474320241691843 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.34584375, - "normalized_score": 2.897135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11170212765957446, - "normalized_score": 1.300236406619384 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-06", - "submission_date": "2024-10-05", - "generation": 1, - "base_model": "realtreetune/rho-1b-sft-MATH (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.1, - "co2_cost": 0.5562682251492022 - } - }, - { - "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp_float16_9048af8616bc62b6efab2bc1bc77ba53c5dfed79_True", - "model": { - "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "sha": "9048af8616bc62b6efab2bc1bc77ba53c5dfed79", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 29.873991757143997, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7648949232480928, - "normalized_score": 76.48949232480928 - }, - "bbh": { - "name": "BBH", - "value": 0.597438766061506, - "normalized_score": 42.25120987807252 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.4244791666666667, - "normalized_score": 12.39322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4207114361702128, - "normalized_score": 35.63460401891253 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-11", - "submission_date": "2024-09-12", - "generation": 0, - "base_model": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 2.114373144767299 - } - }, - { - "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp_bfloat16_5a4f7299d9f8ea5faad2b1edc68b7bf634dac40b_False", - "model": { - "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "sha": "5a4f7299d9f8ea5faad2b1edc68b7bf634dac40b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.910552802690432, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28536505361330156, - "normalized_score": 28.536505361330157 - }, - "bbh": { - "name": "BBH", - "value": 0.5983926033872208, - "normalized_score": 42.703797634497924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10045317220543806, - "normalized_score": 10.045317220543806 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.46065625, - "normalized_score": 16.415364583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4162234042553192, - "normalized_score": 35.13593380614657 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-11", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 5.939655062300775 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.1_float16_6dc0997046db4e9932f87d338ecdc2a4158abbda_True", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.1", - "sha": "6dc0997046db4e9932f87d338ecdc2a4158abbda", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.72459862019961, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.751506004069203, - "normalized_score": 75.1506004069203 - }, - "bbh": { - "name": "BBH", - "value": 0.5995309756292291, - "normalized_score": 42.321861031477475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2039274924471299, - "normalized_score": 20.39274924471299 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.41914583333333333, - "normalized_score": 11.526562500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4158909574468085, - "normalized_score": 35.09899527186761 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.8496174324729506 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.2_float16_483116e575fb3a56de25243b14d715c58fe127bc_True", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.2", - "sha": "483116e575fb3a56de25243b14d715c58fe127bc", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.048864030373213, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7591745457608035, - "normalized_score": 75.91745457608036 - }, - "bbh": { - "name": "BBH", - "value": 0.6025964285724085, - "normalized_score": 43.02796904930725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.409875, - "normalized_score": 10.401041666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41630651595744683, - "normalized_score": 35.145168439716315 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-18", - "generation": 0, - "base_model": "recoilme/recoilme-gemma-2-9B-v0.2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 1.9140857936630222 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.2_bfloat16_483116e575fb3a56de25243b14d715c58fe127bc_False", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.2", - "sha": "483116e575fb3a56de25243b14d715c58fe127bc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 23.76285134105471, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2746989100032359, - "normalized_score": 27.469891000323585 - }, - "bbh": { - "name": "BBH", - "value": 0.6030832642626502, - "normalized_score": 43.56058143461737 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.46859375, - "normalized_score": 17.807552083333327 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4122340425531915, - "normalized_score": 34.692671394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "recoilme/recoilme-gemma-2-9B-v0.2", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 5.893568841337357 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.3_float16_772cab46d9d22cbcc3c574d193021803ce5c444c_True", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.3", - "sha": "772cab46d9d22cbcc3c574d193021803ce5c444c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.2074720895527, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.743937197746424, - "normalized_score": 74.39371977464239 - }, - "bbh": { - "name": "BBH", - "value": 0.5992527878628748, - "normalized_score": 42.026279212829245 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4203854166666667, - "normalized_score": 12.081510416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4072473404255319, - "normalized_score": 34.13859338061466 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-18", - "generation": 0, - "base_model": "recoilme/recoilme-gemma-2-9B-v0.3", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 1.8766372136690954 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.3_bfloat16_76c8fb761660e6eb237c91bb6e6761ee36266bba_False", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.3", - "sha": "76c8fb761660e6eb237c91bb6e6761ee36266bba", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 30.37598877443187, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.57607592299543, - "normalized_score": 57.60759229954299 - }, - "bbh": { - "name": "BBH", - "value": 0.6019827101058847, - "normalized_score": 43.326868296283614 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.46322916666666664, - "normalized_score": 17.03697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4039228723404255, - "normalized_score": 33.76920803782505 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-27", - "generation": 0, - "base_model": "recoilme/recoilme-gemma-2-9B-v0.3", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 5.110699417105108 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.4_bfloat16_2691f2cc8d80072f15d78cb7ae72831e1a12139e_False", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.4", - "sha": "2691f2cc8d80072f15d78cb7ae72831e1a12139e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 24.138127567307112, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2561891337207498, - "normalized_score": 25.618913372074985 - }, - "bbh": { - "name": "BBH", - "value": 0.5967285833554881, - "normalized_score": 42.44248167542507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.4726875, - "normalized_score": 18.3859375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4405751329787234, - "normalized_score": 37.841681442080386 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-18", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "recoilme/recoilme-gemma-2-9B-v0.4", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 5, - "params_billions": 10.159, - "co2_cost": 5.837820484416759 - } - }, - { - "id": "recoilme/recoilme-gemma-2-9B-v0.5_float16_b4035d3a16486dae4f726eb953be959a4573ea67_True", - "model": { - "name": "recoilme/recoilme-gemma-2-9B-v0.5", - "sha": "b4035d3a16486dae4f726eb953be959a4573ea67", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.229967443633754, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7664186580495308, - "normalized_score": 76.64186580495308 - }, - "bbh": { - "name": "BBH", - "value": 0.5981472549925003, - "normalized_score": 42.353355406534206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21148036253776434, - "normalized_score": 21.148036253776432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.4231770833333333, - "normalized_score": 12.163802083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41996343085106386, - "normalized_score": 35.551492316784866 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-26", - "submission_date": "2024-11-26", - "generation": 1, - "base_model": "recoilme/recoilme-gemma-2-9B-v0.5 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 5.791466966555618 - } - }, - { - "id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS_bfloat16_1523f26adec368380647e864dd2e9fa79f36fefe_True", - "model": { - "name": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", - "sha": "1523f26adec368380647e864dd2e9fa79f36fefe", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.776537692077188, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5359590331431713, - "normalized_score": 53.59590331431713 - }, - "bbh": { - "name": "BBH", - "value": 0.5128840998052852, - "normalized_score": 29.965932310715313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11329305135951662, - "normalized_score": 11.329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.38178124999999996, - "normalized_score": 8.822656249999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3179853723404255, - "normalized_score": 24.220596926713945 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 13, - "params_billions": 12.248, - "co2_cost": 2.1916965757123372 - } - }, - { - "id": "redrix/patricide-12B-Unslop-Mell_bfloat16_2f1a849859a24da80bd1f938a2ac6ab627ef75e8_False", - "model": { - "name": "redrix/patricide-12B-Unslop-Mell", - "sha": "2f1a849859a24da80bd1f938a2ac6ab627ef75e8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 23.021830606105613, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40739016919551235, - "normalized_score": 40.73901691955123 - }, - "bbh": { - "name": "BBH", - "value": 0.5398666865853622, - "normalized_score": 33.98944760745952 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13141993957703926, - "normalized_score": 13.141993957703926 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4025833333333333, - "normalized_score": 11.856249999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3570478723404255, - "normalized_score": 28.560874704491717 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-12", - "generation": 1, - "base_model": "redrix/patricide-12B-Unslop-Mell (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 16, - "params_billions": 12.248, - "co2_cost": 2.05910982261042 - } - }, - { - "id": "refuelai/Llama-3-Refueled_bfloat16_ff6d1c3ba37b31d4af421951c2300f2256fb3691_True", - "model": { - "name": "refuelai/Llama-3-Refueled", - "sha": "ff6d1c3ba37b31d4af421951c2300f2256fb3691", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.18144830627433, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4619952836252255, - "normalized_score": 46.19952836252256 - }, - "bbh": { - "name": "BBH", - "value": 0.5870766201705051, - "normalized_score": 41.721971003391026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.44540625, - "normalized_score": 14.642447916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30950797872340424, - "normalized_score": 23.278664302600472 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-03", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "refuelai/Llama-3-Refueled", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 192, - "params_billions": 8.03, - "co2_cost": 1.751971666755731 - } - }, - { - "id": "rhplus0831/maid-yuzu-v7_bfloat16_a0bd8c707bb80024778da4a0d057917faa53d2f6_True", - "model": { - "name": "rhplus0831/maid-yuzu-v7", - "sha": "a0bd8c707bb80024778da4a0d057917faa53d2f6", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.595223175883827, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6462430794735745, - "normalized_score": 64.62430794735747 - }, - "bbh": { - "name": "BBH", - "value": 0.480491692312673, - "normalized_score": 26.8198371046094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.41362499999999996, - "normalized_score": 9.769791666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35397273936170215, - "normalized_score": 28.219193262411352 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-09", - "submission_date": "2024-09-08", - "generation": 1, - "base_model": "rhplus0831/maid-yuzu-v7 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 46.703, - "co2_cost": 8.20857064636299 - } - }, - { - "id": "rhymes-ai/Aria_bfloat16_5cc2703b3afd585f232ec5027e9c039a2001bcec_True", - "model": { - "name": "rhymes-ai/Aria", - "sha": "5cc2703b3afd585f232ec5027e9c039a2001bcec", - "precision": "bfloat16", - "type": "multimodal", - "weight_type": "Original", - "architecture": "AriaForConditionalGeneration", - "average_score": 28.870163995252046, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4773079872516035, - "normalized_score": 47.730798725160355 - }, - "bbh": { - "name": "BBH", - "value": 0.5695312446413633, - "normalized_score": 39.28149335481041 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1933534743202417, - "normalized_score": 19.335347432024168 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.43375, - "normalized_score": 14.05208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44049202127659576, - "normalized_score": 37.83244680851063 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-10-10", - "generation": 1, - "base_model": "rhymes-ai/Aria (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 623, - "params_billions": 25.307, - "co2_cost": 15.501419246428505 - } - }, - { - "id": "rhysjones/phi-2-orange-v2_float16_f4085189114accfb65225deb8fbdf15767b7ee56_True", - "model": { - "name": "rhysjones/phi-2-orange-v2", - "sha": "f4085189114accfb65225deb8fbdf15767b7ee56", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "PhiForCausalLM", - "average_score": 15.324185096371076, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3669740732367895, - "normalized_score": 36.697407323678945 - }, - "bbh": { - "name": "BBH", - "value": 0.4770220109816213, - "normalized_score": 25.60654883732465 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3629583333333333, - "normalized_score": 6.969791666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25324135638297873, - "normalized_score": 17.026817375886523 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-04", - "submission_date": "2024-06-28", - "generation": 0, - "base_model": "rhysjones/phi-2-orange-v2", - "hub_license": "mit", - "hub_hearts": 26, - "params_billions": 2.78, - "co2_cost": 0.9418986908083168 - } - }, - { - "id": "riaz/FineLlama-3.1-8B_bfloat16_c4d8f16eb446910edce0c1afd0e6d5f3b06e2e7d_True", - "model": { - "name": "riaz/FineLlama-3.1-8B", - "sha": "c4d8f16eb446910edce0c1afd0e6d5f3b06e2e7d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.660648060300424, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43734070045257695, - "normalized_score": 43.734070045257695 - }, - "bbh": { - "name": "BBH", - "value": 0.45857296498013483, - "normalized_score": 24.14877809167861 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.3762916666666667, - "normalized_score": 7.76979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29637632978723405, - "normalized_score": 21.81959219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-12", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8421831610300277 - } - }, - { - "id": "riaz/FineLlama-3.1-8B_float16_c4d8f16eb446910edce0c1afd0e6d5f3b06e2e7d_True", - "model": { - "name": "riaz/FineLlama-3.1-8B", - "sha": "c4d8f16eb446910edce0c1afd0e6d5f3b06e2e7d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.14751095671923, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.413660199382084, - "normalized_score": 41.366019938208396 - }, - "bbh": { - "name": "BBH", - "value": 0.456451981676995, - "normalized_score": 23.773389590539722 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.37762500000000004, - "normalized_score": 7.76979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29778922872340424, - "normalized_score": 21.976580969267136 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-12", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 0.9019979013274062 - } - }, - { - "id": "rmdhirr/Gluon-8B_float16_cc949908c60ab7f696e133714222d6cab156e493_False", - "model": { - "name": "rmdhirr/Gluon-8B", - "sha": "cc949908c60ab7f696e133714222d6cab156e493", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.97696294457469, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5052848663767692, - "normalized_score": 50.52848663767692 - }, - "bbh": { - "name": "BBH", - "value": 0.5153305292144984, - "normalized_score": 30.34224724618852 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14425981873111782, - "normalized_score": 14.425981873111782 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4038854166666667, - "normalized_score": 9.085677083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38081781914893614, - "normalized_score": 31.201979905437344 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-14", - "generation": 1, - "base_model": "rmdhirr/Gluon-8B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.8061565768916727 - } - }, - { - "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b_bfloat16_00147618f151b8973b4b25f18281625105482af9_True", - "model": { - "name": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", - "sha": "00147618f151b8973b4b25f18281625105482af9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.44560837708102, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7047445223119102, - "normalized_score": 70.47445223119101 - }, - "bbh": { - "name": "BBH", - "value": 0.6165135323666455, - "normalized_score": 44.51949885999458 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3300604229607251, - "normalized_score": 33.00604229607251 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.3914583333333333, - "normalized_score": 6.965625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3939494680851064, - "normalized_score": 32.661052009456256 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "rombodawg/Rombos-Coder-V2.5-Qwen-14b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 14.77, - "co2_cost": 2.702802751219327 - } - }, - { - "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b_bfloat16_896a040ead29dd6352ef7fadbf2451ce72baeca9_True", - "model": { - "name": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", - "sha": "896a040ead29dd6352ef7fadbf2451ce72baeca9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.405414768306354, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6210388436016436, - "normalized_score": 62.103884360164365 - }, - "bbh": { - "name": "BBH", - "value": 0.5077090028113894, - "normalized_score": 30.221720429768265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3338368580060423, - "normalized_score": 33.383685800604226 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3979375, - "normalized_score": 7.608854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33976063829787234, - "normalized_score": 26.640070921985814 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "rombodawg/Rombos-Coder-V2.5-Qwen-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 1.2439234860282187 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b_bfloat16_aae2e55548c8090ce357c64ca78e8b9ef6baf118_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", - "sha": "aae2e55548c8090ce357c64ca78e8b9ef6baf118", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.38591999487923, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28466690603155187, - "normalized_score": 28.466690603155183 - }, - "bbh": { - "name": "BBH", - "value": 0.32936751831436256, - "normalized_score": 8.412218566269734 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.32358333333333333, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18658577127659576, - "normalized_score": 9.620641252955084 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 0.63, - "co2_cost": 1.2914136239785579 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b_bfloat16_1f634da015ed671efe7dc574bc2a1954f5b2cc93_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", - "sha": "1f634da015ed671efe7dc574bc2a1954f5b2cc93", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 16.35438589717866, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3402461025634206, - "normalized_score": 34.02461025634206 - }, - "bbh": { - "name": "BBH", - "value": 0.4256703145864387, - "normalized_score": 18.711343783972325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.4185520833333333, - "normalized_score": 10.352343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2922207446808511, - "normalized_score": 21.35786052009456 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 1.777, - "co2_cost": 1.4807159923900013 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b_bfloat16_834ddb1712ae6d1b232b2d5b26be658d90d23e43_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", - "sha": "834ddb1712ae6d1b232b2d5b26be658d90d23e43", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.50095591766085, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5840447789642593, - "normalized_score": 58.40447789642593 - }, - "bbh": { - "name": "BBH", - "value": 0.6481086261669653, - "normalized_score": 49.38690027144481 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4554380664652568, - "normalized_score": 45.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.4717291666666667, - "normalized_score": 18.832812499999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5375664893617021, - "normalized_score": 48.6184988179669 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-14b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 14.77, - "co2_cost": 4.365399723221313 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b_bfloat16_234abe4b494dbe83ba805b791f74feb33462a33d_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", - "sha": "234abe4b494dbe83ba805b791f74feb33462a33d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.83301184834238, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6826631116548536, - "normalized_score": 68.26631116548536 - }, - "bbh": { - "name": "BBH", - "value": 0.7045537070859799, - "normalized_score": 58.261894086787414 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4954682779456193, - "normalized_score": 49.546827794561935 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39681208053691275, - "normalized_score": 19.574944071588366 - }, - "musr": { - "name": "MUSR", - "value": 0.5034166666666667, - "normalized_score": 24.727083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5915890957446809, - "normalized_score": 54.621010638297875 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-32b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 61, - "params_billions": 32.764, - "co2_cost": 35.82537944703887 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b_bfloat16_26601a8da5afce3b5959d91bdd0faaab6df8bf95_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", - "sha": "26601a8da5afce3b5959d91bdd0faaab6df8bf95", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.921782051813356, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5342358276040905, - "normalized_score": 53.42358276040905 - }, - "bbh": { - "name": "BBH", - "value": 0.4808896246368473, - "normalized_score": 27.213596951125698 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2794561933534743, - "normalized_score": 27.945619335347434 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4041666666666666, - "normalized_score": 8.554166666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37608045212765956, - "normalized_score": 30.67560579196217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-3b (Merge)", - "hub_license": "other", - "hub_hearts": 5, - "params_billions": 3.397, - "co2_cost": 2.011588954205996 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b_bfloat16_5260f182e7859e13d515c4cb3926ac85ad057504_True", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", - "sha": "5260f182e7859e13d515c4cb3926ac85ad057504", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.50088713558663, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.715535889218385, - "normalized_score": 71.5535889218385 - }, - "bbh": { - "name": "BBH", - "value": 0.7229589065788488, - "normalized_score": 61.26714504573664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5422960725075529, - "normalized_score": 54.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39848993288590606, - "normalized_score": 19.798657718120808 - }, - "musr": { - "name": "MUSR", - "value": 0.4599166666666667, - "normalized_score": 17.322916666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.593500664893617, - "normalized_score": 54.83340721040189 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-72b (Merge)", - "hub_license": "other", - "hub_hearts": 37, - "params_billions": 72.706, - "co2_cost": 32.06789125179346 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b_bfloat16_dbd819e8f765181f774cb5b79812d081669eb302_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", - "sha": "dbd819e8f765181f774cb5b79812d081669eb302", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.74880353587022, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6237117514860571, - "normalized_score": 62.3711751486057 - }, - "bbh": { - "name": "BBH", - "value": 0.5543885046903589, - "normalized_score": 36.372350414300634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3814199395770393, - "normalized_score": 38.14199395770393 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.42909375, - "normalized_score": 12.003385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4468916223404255, - "normalized_score": 38.54351359338061 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5-Qwen-7b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 17, - "params_billions": 7.616, - "co2_cost": 2.6341681109122317 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b_bfloat16_a3305ce148f4273ab334052ab47d3aebb51d104c_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "sha": "a3305ce148f4273ab334052ab47d3aebb51d104c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.357124972053427, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2595125378440316, - "normalized_score": 25.95125378440316 - }, - "bbh": { - "name": "BBH", - "value": 0.3884043024656656, - "normalized_score": 14.881409184451359 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667675, - "normalized_score": 9.138972809667676 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.39911458333333333, - "normalized_score": 7.822656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27194148936170215, - "normalized_score": 19.104609929078016 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 0.9292438685729816 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b_float16_b65848c13b31f5b9d5d953df95d504d195082a3b_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "sha": "b65848c13b31f5b9d5d953df95d504d195082a3b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.608595258365547, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2566401592219755, - "normalized_score": 25.664015922197546 - }, - "bbh": { - "name": "BBH", - "value": 0.39000839740376536, - "normalized_score": 15.057744482096084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12084592145015106, - "normalized_score": 12.084592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.39911458333333333, - "normalized_score": 7.822656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27410239361702127, - "normalized_score": 19.344710401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-11-14", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b (Merge)", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.397, - "co2_cost": 2.5937231079345815 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b_bfloat16_951c9cdf68d6e679c78625d1a1f396eb71cdf746_False", - "model": { - "name": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", - "sha": "951c9cdf68d6e679c78625d1a1f396eb71cdf746", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.94623049260674, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7526551771521784, - "normalized_score": 75.26551771521784 - }, - "bbh": { - "name": "BBH", - "value": 0.6937699482580332, - "normalized_score": 55.80557342514651 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3330815709969788, - "normalized_score": 33.30815709969788 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40604026845637586, - "normalized_score": 20.805369127516784 - }, - "musr": { - "name": "MUSR", - "value": 0.46686458333333336, - "normalized_score": 18.391406250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5329122340425532, - "normalized_score": 48.101359338061464 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-17", - "submission_date": "2024-10-17", - "generation": 0, - "base_model": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", - "hub_license": "llama3.1", - "hub_hearts": 3, - "params_billions": 70.554, - "co2_cost": 23.901548933707822 - } - }, - { - "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b_bfloat16_887910d75a1837b8b8c7c3e50a257517d286ec60_True", - "model": { - "name": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", - "sha": "887910d75a1837b8b8c7c3e50a257517d286ec60", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.19934519573359, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8431550508207113, - "normalized_score": 84.31550508207113 - }, - "bbh": { - "name": "BBH", - "value": 0.6442096596344892, - "normalized_score": 49.27851764033085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5211480362537765, - "normalized_score": 52.11480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3338926174496644, - "normalized_score": 11.185682326621922 - }, - "musr": { - "name": "MUSR", - "value": 0.4220625, - "normalized_score": 12.291145833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49609375, - "normalized_score": 44.01041666666667 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "rombodawg/Rombos-LLM-V2.6-Qwen-14b (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 53, - "params_billions": 14.77, - "co2_cost": 5.251570998218114 - } - }, - { - "id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged_bfloat16_85ad1fb943d73866ba5c8dcfe4a4f2cbfba12d4d_True", - "model": { - "name": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", - "sha": "85ad1fb943d73866ba5c8dcfe4a4f2cbfba12d4d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.433823987631932, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5387571643239937, - "normalized_score": 53.87571643239937 - }, - "bbh": { - "name": "BBH", - "value": 0.4461693860075828, - "normalized_score": 21.937706578272657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125, - "normalized_score": 3.4539062499999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18085106382978725, - "normalized_score": 8.983451536643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-10-14", - "generation": 0, - "base_model": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 1.9282563866341134 - } - }, - { - "id": "rombodawg/rombos_Replete-Coder-Llama3-8B_bfloat16_938a45789cf94821ef6b12c98dc76622a0fa936a_True", - "model": { - "name": "rombodawg/rombos_Replete-Coder-Llama3-8B", - "sha": "938a45789cf94821ef6b12c98dc76622a0fa936a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.971032787867797, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4714125187834945, - "normalized_score": 47.14125187834945 - }, - "bbh": { - "name": "BBH", - "value": 0.32762771025266835, - "normalized_score": 7.087845117845117 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.39663541666666663, - "normalized_score": 7.712760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.13347739361702127, - "normalized_score": 3.719710401891251 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-10-14", - "generation": 0, - "base_model": "rombodawg/rombos_Replete-Coder-Llama3-8B", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 2.411204186683609 - } - }, - { - "id": "rootxhacker/Apollo-70B_float16_dea3d818bfdab718a2313e3ca023e54e3f4d9a3c_False", - "model": { - "name": "rootxhacker/Apollo-70B", - "sha": "dea3d818bfdab718a2313e3ca023e54e3f4d9a3c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 43.15901797697317, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5098560707810831, - "normalized_score": 50.9856070781083 - }, - "bbh": { - "name": "BBH", - "value": 0.6804215148524603, - "normalized_score": 53.528405173016914 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5611782477341389, - "normalized_score": 56.11782477341389 - }, - "gpqa": { - "name": "GPQA", - "value": 0.45721476510067116, - "normalized_score": 27.628635346756152 - }, - "musr": { - "name": "MUSR", - "value": 0.4947708333333333, - "normalized_score": 23.146354166666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5279255319148937, - "normalized_score": 47.54728132387708 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-02", - "generation": 1, - "base_model": "rootxhacker/Apollo-70B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 70.554, - "co2_cost": 15.70960535007151 - } - }, - { - "id": "rootxhacker/Apollo_v2-32B_bfloat16_2ce67ae5de87c736b78110d0e0219f4943406043_True", - "model": { - "name": "rootxhacker/Apollo_v2-32B", - "sha": "2ce67ae5de87c736b78110d0e0219f4943406043", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.81170120905305, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4280486885907171, - "normalized_score": 42.8048688590717 - }, - "bbh": { - "name": "BBH", - "value": 0.7072274795963693, - "normalized_score": 58.274951480798315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42749244712990936, - "normalized_score": 42.74924471299094 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.4993854166666667, - "normalized_score": 23.823177083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5869348404255319, - "normalized_score": 54.10387115839244 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-04", - "submission_date": "2025-03-11", - "generation": 1, - "base_model": "rootxhacker/Apollo_v2-32B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 62.07832901657737 - } - }, - { - "id": "rootxhacker/apollo-7B_float16_778170316e44277245135f1ed6a6ff7f0ca6725e_False", - "model": { - "name": "rootxhacker/apollo-7B", - "sha": "778170316e44277245135f1ed6a6ff7f0ca6725e", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.721175786000847, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29533304964161755, - "normalized_score": 29.533304964161758 - }, - "bbh": { - "name": "BBH", - "value": 0.3636262699883149, - "normalized_score": 11.072693643924145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.41312499999999996, - "normalized_score": 9.040625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17478390957446807, - "normalized_score": 8.309323286052008 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-02", - "submission_date": "2025-03-02", - "generation": 1, - "base_model": "rootxhacker/apollo-7B (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.4688116118963463 - } - }, - { - "id": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B_float16_9bd6ed02533f746473c7e8b926379d858e619925_False", - "model": { - "name": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", - "sha": "9bd6ed02533f746473c7e8b926379d858e619925", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.02740291549346, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3891807071902552, - "normalized_score": 38.91807071902552 - }, - "bbh": { - "name": "BBH", - "value": 0.5188437309746964, - "normalized_score": 32.78974404613455 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07326283987915408, - "normalized_score": 7.326283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.46719791666666666, - "normalized_score": 17.26640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30535239361702127, - "normalized_score": 22.816932624113473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9523481336611325 - } - }, - { - "id": "rubenroy/Geneva-12B-GCv2-5m_bfloat16_857f83b4043a3e28203a6b6bff19f0fad4cc1c83_False", - "model": { - "name": "rubenroy/Geneva-12B-GCv2-5m", - "sha": "857f83b4043a3e28203a6b6bff19f0fad4cc1c83", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.956630719456832, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2586381911106974, - "normalized_score": 25.863819111069738 - }, - "bbh": { - "name": "BBH", - "value": 0.5278373390214104, - "normalized_score": 32.64683054938012 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.3524791666666667, - "normalized_score": 5.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3249667553191489, - "normalized_score": 24.9963061465721 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-27", - "generation": 2, - "base_model": "mistralai/Mistral-Nemo-Base-2407", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 12.248, - "co2_cost": 0.8862036586829911 - } - }, - { - "id": "rubenroy/Gilgamesh-72B_float16_5aa7df9b748abcbda03e8eb69b64348e09cd72e3_True", - "model": { - "name": "rubenroy/Gilgamesh-72B", - "sha": "5aa7df9b748abcbda03e8eb69b64348e09cd72e3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.793671661266096, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8486006019583594, - "normalized_score": 84.86006019583593 - }, - "bbh": { - "name": "BBH", - "value": 0.7253327589560739, - "normalized_score": 61.83602130504769 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4380664652567976, - "normalized_score": 43.80664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.46264583333333337, - "normalized_score": 17.664062499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5802027925531915, - "normalized_score": 53.3558658392435 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "rubenroy/Gilgamesh-72B (Merge)", - "hub_license": "other", - "hub_hearts": 8, - "params_billions": 72.706, - "co2_cost": 51.29178285770202 - } - }, - { - "id": "rubenroy/Zurich-14B-GCv2-5m_bfloat16_08f86f70e83f376f963dd2f21b5a15cc6cf8f17b_False", - "model": { - "name": "rubenroy/Zurich-14B-GCv2-5m", - "sha": "08f86f70e83f376f963dd2f21b5a15cc6cf8f17b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.06368897687889, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6163679038285084, - "normalized_score": 61.63679038285083 - }, - "bbh": { - "name": "BBH", - "value": 0.6308359017750411, - "normalized_score": 46.7337399544949 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3074018126888218, - "normalized_score": 30.74018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3615771812080537, - "normalized_score": 14.876957494407161 - }, - "musr": { - "name": "MUSR", - "value": 0.4874479166666667, - "normalized_score": 21.364322916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5232712765957447, - "normalized_score": 47.030141843971634 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-27", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 14.77, - "co2_cost": 1.9337617528412068 - } - }, - { - "id": "ruizhe1217/sft-s1-qwen-0.5b_float16_2f8e051a801011cc906efe56c535aab5608aa341_False", - "model": { - "name": "ruizhe1217/sft-s1-qwen-0.5b", - "sha": "2f8e051a801011cc906efe56c535aab5608aa341", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.240285567836873, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27487510915482033, - "normalized_score": 27.48751091548203 - }, - "bbh": { - "name": "BBH", - "value": 0.33005365550588683, - "normalized_score": 8.27626354194646 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27097315436241615, - "normalized_score": 2.796420581655486 - }, - "musr": { - "name": "MUSR", - "value": 0.31958333333333333, - "normalized_score": 0.7812499999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1891622340425532, - "normalized_score": 9.90691489361702 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-26", - "submission_date": "2025-02-27", - "generation": 1, - "base_model": "Qwen/Qwen2.5-0.5B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.494, - "co2_cost": 0.5358550528471566 - } - }, - { - "id": "rwitz/go-bruins-v2_float16_6d9e57d3a36dbad364ec77ca642873d9fc7fd61c_True", - "model": { - "name": "rwitz/go-bruins-v2", - "sha": "6d9e57d3a36dbad364ec77ca642873d9fc7fd61c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.433967198473246, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40958877999264176, - "normalized_score": 40.958877999264175 - }, - "bbh": { - "name": "BBH", - "value": 0.37988446841089685, - "normalized_score": 12.69326018768569 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.41375, - "normalized_score": 10.985416666666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2760970744680851, - "normalized_score": 19.566341607565015 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2756394415625334 - } - }, - { - "id": "sabersaleh/Llama2-7B-CPO_bfloat16_cfc39fd915d4cb89283a901f0eed60f268ec8dce_False", - "model": { - "name": "sabersaleh/Llama2-7B-CPO", - "sha": "cfc39fd915d4cb89283a901f0eed60f268ec8dce", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.303190047047342, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1545488193548673, - "normalized_score": 15.454881935486728 - }, - "bbh": { - "name": "BBH", - "value": 0.3457919655499851, - "normalized_score": 8.656016246266047 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.40482291666666664, - "normalized_score": 9.269531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1605718085106383, - "normalized_score": 6.730200945626476 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "sabersaleh/Llama2-7B-CPO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 0.9544175459260744 - } - }, - { - "id": "sabersaleh/Llama2-7B-DPO_float16_e07f7224c0ecd95eb8c82ae28e00c32031258942_False", - "model": { - "name": "sabersaleh/Llama2-7B-DPO", - "sha": "e07f7224c0ecd95eb8c82ae28e00c32031258942", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.558004575587002, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14533105493424114, - "normalized_score": 14.533105493424115 - }, - "bbh": { - "name": "BBH", - "value": 0.3512218731420535, - "normalized_score": 9.362230727907603 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.4113645833333333, - "normalized_score": 10.453906250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16256648936170212, - "normalized_score": 6.951832151300234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "sabersaleh/Llama2-7B-DPO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 0.8351197963436573 - } - }, - { - "id": "sabersaleh/Llama2-7B-IPO_bfloat16_424beb187852f704718d75cf9f2ac6c63e10d941_False", - "model": { - "name": "sabersaleh/Llama2-7B-IPO", - "sha": "424beb187852f704718d75cf9f2ac6c63e10d941", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.804715247914783, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17685518867715438, - "normalized_score": 17.685518867715437 - }, - "bbh": { - "name": "BBH", - "value": 0.3474552716912811, - "normalized_score": 9.019805379879918 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.4047604166666667, - "normalized_score": 9.328385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16173537234042554, - "normalized_score": 6.859485815602836 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "sabersaleh/Llama2-7B-IPO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 0.8523306948320086 - } - }, - { - "id": "sabersaleh/Llama2-7B-KTO_bfloat16_60ebb9b532251942686b0cd79cbf56e6694f6e0c_False", - "model": { - "name": "sabersaleh/Llama2-7B-KTO", - "sha": "60ebb9b532251942686b0cd79cbf56e6694f6e0c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.882716013659315, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15284999357260956, - "normalized_score": 15.284999357260956 - }, - "bbh": { - "name": "BBH", - "value": 0.35007577568366255, - "normalized_score": 9.514963942405524 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.41669791666666667, - "normalized_score": 11.187239583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1636469414893617, - "normalized_score": 7.0718823877068555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "sabersaleh/Llama2-7B-KTO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 1.2560579107691545 - } - }, - { - "id": "sabersaleh/Llama2-7B-SPO_bfloat16_710558a6e70820a1d2f23823380a4accfed4d9b6_False", - "model": { - "name": "sabersaleh/Llama2-7B-SPO", - "sha": "710558a6e70820a1d2f23823380a4accfed4d9b6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.352632078107196, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15667207453999832, - "normalized_score": 15.667207453999833 - }, - "bbh": { - "name": "BBH", - "value": 0.33834029554844597, - "normalized_score": 7.766130774573562 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3874270833333333, - "normalized_score": 6.728385416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17569813829787234, - "normalized_score": 8.41090425531915 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "sabersaleh/Llama2-7B-SPO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 0.8157692025792285 - } - }, - { - "id": "sabersaleh/Llama2-7B-SimPO_bfloat16_860de39d93c457d719c3f299e06ba4897aa51f3d_False", - "model": { - "name": "sabersaleh/Llama2-7B-SimPO", - "sha": "860de39d93c457d719c3f299e06ba4897aa51f3d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.610783251808889, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1658643510330368, - "normalized_score": 16.58643510330368 - }, - "bbh": { - "name": "BBH", - "value": 0.34891553101294254, - "normalized_score": 8.98121133440231 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.40069791666666665, - "normalized_score": 8.587239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16414561170212766, - "normalized_score": 7.127290189125294 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "sabersaleh/Llama2-7B-SimPO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 0.9476039241157408 - } - }, - { - "id": "sabersaleh/Llama3_bfloat16_56cdeb0c32b330835c4a88f480066e0308ecf127_False", - "model": { - "name": "sabersaleh/Llama3", - "sha": "56cdeb0c32b330835c4a88f480066e0308ecf127", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.458608830485915, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3320777758569484, - "normalized_score": 33.20777758569484 - }, - "bbh": { - "name": "BBH", - "value": 0.47821899796340944, - "normalized_score": 26.70679448256402 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.39334375000000005, - "normalized_score": 7.101302083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.316156914893617, - "normalized_score": 24.017434988179666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-11-29", - "generation": 1, - "base_model": "sabersaleh/Llama3 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3932662824351285 - } - }, - { - "id": "sabersalehk/Llama3-001-300_bfloat16_17152ae8544f09f2fa25ae276d1d56ca3302e631_False", - "model": { - "name": "sabersalehk/Llama3-001-300", - "sha": "17152ae8544f09f2fa25ae276d1d56ca3302e631", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.12107684025811, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3178643776291351, - "normalized_score": 31.786437762913515 - }, - "bbh": { - "name": "BBH", - "value": 0.47445771982516544, - "normalized_score": 25.706819355250058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.40639583333333335, - "normalized_score": 9.366145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3158244680851064, - "normalized_score": 23.98049645390071 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "sabersalehk/Llama3-001-300 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.399065934688029 - } - }, - { - "id": "sabersalehk/Llama3-SimPO_bfloat16_022f6b4f31728945bc031e2cbf1ed461a8148642_False", - "model": { - "name": "sabersalehk/Llama3-SimPO", - "sha": "022f6b4f31728945bc031e2cbf1ed461a8148642", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.716097714176378, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36420142998355476, - "normalized_score": 36.42014299835547 - }, - "bbh": { - "name": "BBH", - "value": 0.48735382942408356, - "normalized_score": 27.448561942356932 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.40459375000000003, - "normalized_score": 11.007552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3156582446808511, - "normalized_score": 23.96202718676123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-02", - "submission_date": "2024-12-02", - "generation": 1, - "base_model": "sabersalehk/Llama3-SimPO (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4053791547614594 - } - }, - { - "id": "sabersalehk/Llama3_001_200_bfloat16_f0a8d4ac002abf89f19a43c32d945b415b7bfe5d_False", - "model": { - "name": "sabersalehk/Llama3_001_200", - "sha": "f0a8d4ac002abf89f19a43c32d945b415b7bfe5d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.287940552816096, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.321836061649756, - "normalized_score": 32.1836061649756 - }, - "bbh": { - "name": "BBH", - "value": 0.4727921518419169, - "normalized_score": 25.625567506925282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4037291666666667, - "normalized_score": 9.366145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31831781914893614, - "normalized_score": 24.257535460992905 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "sabersalehk/Llama3_001_200 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4521678393056572 - } - }, - { - "id": "sabersalehk/Llama3_01_300_bfloat16_159a25046a6bc0be3706e9a49389de4b72b21707_False", - "model": { - "name": "sabersalehk/Llama3_01_300", - "sha": "159a25046a6bc0be3706e9a49389de4b72b21707", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.69882386756007, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2958827023408999, - "normalized_score": 29.588270234089993 - }, - "bbh": { - "name": "BBH", - "value": 0.4691387139601247, - "normalized_score": 25.1552501785412 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.40648958333333335, - "normalized_score": 9.144531250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31241688829787234, - "normalized_score": 23.60187647754137 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-03", - "generation": 1, - "base_model": "sabersalehk/Llama3_01_300 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4485430669659736 - } - }, - { - "id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B_float16_ec1288fd8c06ac408a2a7e503ea62ac300e474e1_True", - "model": { - "name": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", - "sha": "ec1288fd8c06ac408a2a7e503ea62ac300e474e1", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.677867158266114, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49394384677101205, - "normalized_score": 49.39438467710121 - }, - "bbh": { - "name": "BBH", - "value": 0.4342316286386943, - "normalized_score": 19.90882095261725 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.4445104166666667, - "normalized_score": 16.03046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33892952127659576, - "normalized_score": 26.547724586288417 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-13", - "submission_date": "2024-07-09", - "generation": 1, - "base_model": "saishf/Fimbulvetr-Kuro-Lotus-10.7B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 18, - "params_billions": 10.732, - "co2_cost": 1.6183371562566071 - } - }, - { - "id": "saishf/Neural-SOVLish-Devil-8B-L3_bfloat16_3df738f6b3512f5f9571f862811717e1fc8c36b6_False", - "model": { - "name": "saishf/Neural-SOVLish-Devil-8B-L3", - "sha": "3df738f6b3512f5f9571f862811717e1fc8c36b6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.691330731173803, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41988036188424493, - "normalized_score": 41.98803618842449 - }, - "bbh": { - "name": "BBH", - "value": 0.5141802159065874, - "normalized_score": 30.100237081099607 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4109583333333333, - "normalized_score": 10.23645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3807347074468085, - "normalized_score": 31.19274527186761 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-28", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "saishf/Neural-SOVLish-Devil-8B-L3 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 1.3797564443562462 - } - }, - { - "id": "saishshinde15/TethysAI_Base_Reasoning_float16_6c3b2772655a55e5b8e30265b985a9ee84cdb6e8_True", - "model": { - "name": "saishshinde15/TethysAI_Base_Reasoning", - "sha": "6c3b2772655a55e5b8e30265b985a9ee84cdb6e8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.354839025207614, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6368757119997164, - "normalized_score": 63.68757119997163 - }, - "bbh": { - "name": "BBH", - "value": 0.4518558867290183, - "normalized_score": 23.597502964331113 - }, - "math": { - "name": "MATH Level 5", - "value": 0.31419939577039274, - "normalized_score": 31.419939577039273 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.4074583333333333, - "normalized_score": 9.765625000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3236369680851064, - "normalized_score": 24.848552009456267 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-21", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "saishshinde15/TethysAI_Base_Reasoning (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 0.7359453000675985 - } - }, - { - "id": "saishshinde15/TethysAI_Vortex_float16_9209d07a2dd5aa2226e4bde09cfeb30f5ed70c8d_True", - "model": { - "name": "saishshinde15/TethysAI_Vortex", - "sha": "9209d07a2dd5aa2226e4bde09cfeb30f5ed70c8d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.80337280546497, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4297718941297978, - "normalized_score": 42.97718941297978 - }, - "bbh": { - "name": "BBH", - "value": 0.4749261293502527, - "normalized_score": 26.91431404565564 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3149546827794562, - "normalized_score": 31.49546827794562 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.44578125, - "normalized_score": 15.155989583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3240525265957447, - "normalized_score": 24.894725177304963 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7435275277874127 - } - }, - { - "id": "saishshinde15/TethysAI_Vortex_Reasoning_float16_97e0c62c764fad13fcba5735c2d6564ee01e3951_False", - "model": { - "name": "saishshinde15/TethysAI_Vortex_Reasoning", - "sha": "97e0c62c764fad13fcba5735c2d6564ee01e3951", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.791497955306028, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40211970903868405, - "normalized_score": 40.211970903868405 - }, - "bbh": { - "name": "BBH", - "value": 0.4693805860486275, - "normalized_score": 25.73813775767069 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.40844791666666663, - "normalized_score": 9.62265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3380984042553192, - "normalized_score": 26.455378250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-25", - "generation": 1, - "base_model": "saishshinde15/TethysAI_Vortex_Reasoning (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 0.7884395573169932 - } - }, - { - "id": "sakaltcommunity/novablast-preview_float16_a9cb798ec06f69ca67f4645020ba2d0eee4ffd58_False", - "model": { - "name": "sakaltcommunity/novablast-preview", - "sha": "a9cb798ec06f69ca67f4645020ba2d0eee4ffd58", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.51641804260023, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4530279657974175, - "normalized_score": 45.30279657974175 - }, - "bbh": { - "name": "BBH", - "value": 0.7042765234852668, - "normalized_score": 58.18215998102389 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48942598187311176, - "normalized_score": 48.94259818731118 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.5021145833333334, - "normalized_score": 24.497656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5915059840425532, - "normalized_score": 54.61177600472813 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-18", - "generation": 1, - "base_model": "sakaltcommunity/novablast-preview (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 11.262021345541969 - } - }, - { - "id": "sakaltcommunity/sakaltum-7b_bfloat16_692d1c3efdae68a3ace336d865daceb713b93130_False", - "model": { - "name": "sakaltcommunity/sakaltum-7b", - "sha": "692d1c3efdae68a3ace336d865daceb713b93130", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.528323522415517, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2603868845773658, - "normalized_score": 26.038688457736583 - }, - "bbh": { - "name": "BBH", - "value": 0.4575213514148995, - "normalized_score": 23.75264450329209 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3775, - "normalized_score": 5.754166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2769281914893617, - "normalized_score": 19.65868794326241 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "sakaltcommunity/sakaltum-7b (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9443037628768556 - } - }, - { - "id": "sakhan10/quantized_open_llama_3b_v2_float16_e8d51ad5204806edf9c2eeb8c56139a440a70265_False", - "model": { - "name": "sakhan10/quantized_open_llama_3b_v2", - "sha": "e8d51ad5204806edf9c2eeb8c56139a440a70265", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.142500028294101, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18722212618075595, - "normalized_score": 18.722212618075595 - }, - "bbh": { - "name": "BBH", - "value": 0.3019800780121471, - "normalized_score": 2.805733273363854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.3681666666666667, - "normalized_score": 4.687499999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10954122340425532, - "normalized_score": 1.0601359338061456 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-23", - "submission_date": "2024-08-28", - "generation": 1, - "base_model": "openlm-research/open_llama_3b_v2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.0, - "co2_cost": 0.7854007472075212 - } - }, - { - "id": "saltlux/luxia-21.4b-alignment-v1.0_float16_87d5673e6d9f60462f195e9414a0bf6874c89ceb_True", - "model": { - "name": "saltlux/luxia-21.4b-alignment-v1.0", - "sha": "87d5673e6d9f60462f195e9414a0bf6874c89ceb", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.454573936782676, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36929679915956326, - "normalized_score": 36.92967991595633 - }, - "bbh": { - "name": "BBH", - "value": 0.6373342606775594, - "normalized_score": 48.02111296160791 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.43284374999999997, - "normalized_score": 12.50546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34034242021276595, - "normalized_score": 26.70471335697399 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-12", - "submission_date": "2024-06-29", - "generation": 0, - "base_model": "saltlux/luxia-21.4b-alignment-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 33, - "params_billions": 21.421, - "co2_cost": 3.4880949208019976 - } - }, - { - "id": "saltlux/luxia-21.4b-alignment-v1.2_bfloat16_eed12b5574fa49cc81e57a88aff24c08c13721c0_True", - "model": { - "name": "saltlux/luxia-21.4b-alignment-v1.2", - "sha": "eed12b5574fa49cc81e57a88aff24c08c13721c0", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.58071047593133, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.41153694419695297, - "normalized_score": 41.1536944196953 - }, - "bbh": { - "name": "BBH", - "value": 0.6371180708112368, - "normalized_score": 47.76916471884749 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.4458958333333334, - "normalized_score": 14.903645833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34732380319148937, - "normalized_score": 27.480422576832154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-27", - "submission_date": "2024-07-30", - "generation": 0, - "base_model": "saltlux/luxia-21.4b-alignment-v1.2", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 21.421, - "co2_cost": 4.091851552468708 - } - }, - { - "id": "sam-paech/Darkest-muse-v1_bfloat16_55f6ba0218e9615d18a76f244a874b941f8c434f_False", - "model": { - "name": "sam-paech/Darkest-muse-v1", - "sha": "55f6ba0218e9615d18a76f244a874b941f8c434f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.447324199858144, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7344202272193336, - "normalized_score": 73.44202272193337 - }, - "bbh": { - "name": "BBH", - "value": 0.5968439530708949, - "normalized_score": 42.61173126837064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.4502083333333333, - "normalized_score": 15.276041666666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4183843085106383, - "normalized_score": 35.37603427895981 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "sam-paech/Darkest-muse-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 67, - "params_billions": 10.159, - "co2_cost": 4.413894532223269 - } - }, - { - "id": "sam-paech/Delirium-v1_bfloat16_98dc2dad47af405013c0584d752504ca448bd8eb_False", - "model": { - "name": "sam-paech/Delirium-v1", - "sha": "98dc2dad47af405013c0584d752504ca448bd8eb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.09183474861921, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7207564816908026, - "normalized_score": 72.07564816908027 - }, - "bbh": { - "name": "BBH", - "value": 0.5962113834521733, - "normalized_score": 42.31507908993327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.45144791666666667, - "normalized_score": 15.230989583333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4189660904255319, - "normalized_score": 35.44067671394799 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-17", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "unsloth/gemma-2-9b-it", - "hub_license": "gemma", - "hub_hearts": 17, - "params_billions": 9.242, - "co2_cost": 4.791002491828395 - } - }, - { - "id": "sam-paech/Quill-v1_bfloat16_3cab1cac9d3de0d25b48ea86b4533aa220231f20_False", - "model": { - "name": "sam-paech/Quill-v1", - "sha": "3cab1cac9d3de0d25b48ea86b4533aa220231f20", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.06394735308967, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.712213593265868, - "normalized_score": 71.22135932658679 - }, - "bbh": { - "name": "BBH", - "value": 0.5969226347989487, - "normalized_score": 42.59766913903588 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2122356495468278, - "normalized_score": 21.22356495468278 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33976510067114096, - "normalized_score": 11.968680089485462 - }, - "musr": { - "name": "MUSR", - "value": 0.45547916666666666, - "normalized_score": 16.13489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4171376329787234, - "normalized_score": 35.237514775413715 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-20", - "submission_date": "2024-10-26", - "generation": 1, - "base_model": "sam-paech/Quill-v1 (Merge)", - "hub_license": "", - "hub_hearts": 13, - "params_billions": 9.242, - "co2_cost": 4.626938213438511 - } - }, - { - "id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base_bfloat16_2cb5807b852028defa07c56c96a7ff5c11f8df0e_False", - "model": { - "name": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", - "sha": "2cb5807b852028defa07c56c96a7ff5c11f8df0e", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.3386943375795655, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18040244329490196, - "normalized_score": 18.040244329490196 - }, - "bbh": { - "name": "BBH", - "value": 0.33540458231510667, - "normalized_score": 7.645606515056556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.36584375, - "normalized_score": 5.030468750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15433843085106383, - "normalized_score": 6.03760342789598 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-13", - "submission_date": "2025-02-06", - "generation": 0, - "base_model": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", - "hub_license": "llama2", - "hub_hearts": 109, - "params_billions": 6.87, - "co2_cost": 1.036453945253264 - } - }, - { - "id": "schnapss/testmerge-7b_bfloat16_ff84f5b87ba51db9622b1c553c076533890a8f50_False", - "model": { - "name": "schnapss/testmerge-7b", - "sha": "ff84f5b87ba51db9622b1c553c076533890a8f50", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.913446084822322, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.39222817679313116, - "normalized_score": 39.22281767931311 - }, - "bbh": { - "name": "BBH", - "value": 0.5187478405637375, - "normalized_score": 32.63816624149671 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06873111782477341, - "normalized_score": 6.873111782477341 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4685625, - "normalized_score": 17.703645833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30601728723404253, - "normalized_score": 22.89080969267139 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "schnapss/testmerge-7b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.9403094349551654 - } - }, - { - "id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1_bfloat16_2dcff66eac0c01dc50e4c41eea959968232187fe_True", - "model": { - "name": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", - "sha": "2dcff66eac0c01dc50e4c41eea959968232187fe", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 12.263004871086814, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4335186194851882, - "normalized_score": 43.35186194851882 - }, - "bbh": { - "name": "BBH", - "value": 0.32727821561411724, - "normalized_score": 5.7436460774299505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.38999999999999996, - "normalized_score": 6.6833333333333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2362034574468085, - "normalized_score": 15.133717494089835 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-06-27", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 14.483, - "co2_cost": 1.5301646187834628 - } - }, - { - "id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1_bfloat16_5a516f86087853f9d560c95eb9209c1d4ed9ff69_True", - "model": { - "name": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", - "sha": "5a516f86087853f9d560c95eb9209c1d4ed9ff69", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 25.82414451642259, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5020623057930734, - "normalized_score": 50.206230579307345 - }, - "bbh": { - "name": "BBH", - "value": 0.5502038722383045, - "normalized_score": 36.605419148768114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14803625377643503, - "normalized_score": 14.803625377643503 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.40730208333333334, - "normalized_score": 9.64609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39852061170212766, - "normalized_score": 33.16895685579196 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-01", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "microsoft/Phi-3-mini-4k-instruct", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.642, - "co2_cost": 2.5610053501203294 - } - }, - { - "id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1_bfloat16_afbda8b347ec881666061fa67447046fc5164ec8_True", - "model": { - "name": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", - "sha": "afbda8b347ec881666061fa67447046fc5164ec8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 13.208049800984782, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4035935761557113, - "normalized_score": 40.35935761557113 - }, - "bbh": { - "name": "BBH", - "value": 0.37177200995276305, - "normalized_score": 12.05197465522808 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.4173125, - "normalized_score": 9.864062499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22091090425531915, - "normalized_score": 13.434544917257682 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-31", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "deepseek-ai/deepseek-llm-7b-chat", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.0, - "co2_cost": 1.9831480049623817 - } - }, - { - "id": "securin/Securin-LLM-V2.5-Qwen-1.5B_bfloat16_8d14c68eec2049d59b2f3262b323c6036754864c_False", - "model": { - "name": "securin/Securin-LLM-V2.5-Qwen-1.5B", - "sha": "8d14c68eec2049d59b2f3262b323c6036754864c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.2256798816861005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1492030035860406, - "normalized_score": 14.92030035860406 - }, - "bbh": { - "name": "BBH", - "value": 0.3158416288115425, - "normalized_score": 4.863456136709561 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3606354166666667, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16148603723404256, - "normalized_score": 6.831781914893617 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-15", - "submission_date": "2024-12-08", - "generation": 1, - "base_model": "securin/Securin-LLM-V2.5-Qwen-1.5B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.543, - "co2_cost": 1.20957081758515 - } - }, - { - "id": "senseable/WestLake-7B-v2_float16_41625004c47628837678859753b94c50c82f3bec_True", - "model": { - "name": "senseable/WestLake-7B-v2", - "sha": "41625004c47628837678859753b94c50c82f3bec", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.257065193895503, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4418620371724801, - "normalized_score": 44.18620371724801 - }, - "bbh": { - "name": "BBH", - "value": 0.4073276290688943, - "normalized_score": 17.858141685089326 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.39371874999999995, - "normalized_score": 7.481510416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27642952127659576, - "normalized_score": 19.60328014184397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-22", - "submission_date": "2024-07-23", - "generation": 0, - "base_model": "senseable/WestLake-7B-v2", - "hub_license": "apache-2.0", - "hub_hearts": 111, - "params_billions": 7.242, - "co2_cost": 1.2620225825867748 - } - }, - { - "id": "sequelbox/Llama3.1-70B-PlumChat_float16_bef139c3f9ee73c32559518b951d0465ab36190c_False", - "model": { - "name": "sequelbox/Llama3.1-70B-PlumChat", - "sha": "bef139c3f9ee73c32559518b951d0465ab36190c", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 37.409205844366646, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5616131863455631, - "normalized_score": 56.16131863455631 - }, - "bbh": { - "name": "BBH", - "value": 0.6752815345736151, - "normalized_score": 52.81275213512472 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3028700906344411, - "normalized_score": 30.28700906344411 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.47737500000000005, - "normalized_score": 20.138541666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.516373005319149, - "normalized_score": 46.263667257683224 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-27", - "generation": 1, - "base_model": "sequelbox/Llama3.1-70B-PlumChat (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 70.554, - "co2_cost": 65.54413523224211 - } - }, - { - "id": "sequelbox/Llama3.1-8B-MOTH_float16_8db363e36b1efc9015ab14648e68bcfba9e8d8a0_True", - "model": { - "name": "sequelbox/Llama3.1-8B-MOTH", - "sha": "8db363e36b1efc9015ab14648e68bcfba9e8d8a0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.83650360825358, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5244938984117696, - "normalized_score": 52.449389841176966 - }, - "bbh": { - "name": "BBH", - "value": 0.490246673015408, - "normalized_score": 27.916332245365282 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1216012084592145, - "normalized_score": 12.16012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3689166666666666, - "normalized_score": 4.047916666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3338597074468085, - "normalized_score": 25.984411938534276 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-01", - "submission_date": "2024-09-19", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.9292739771594416 - } - }, - { - "id": "sequelbox/Llama3.1-8B-PlumChat_float16_1afdc9856591f573e4fcb52dba19a9d8da631e0b_True", - "model": { - "name": "sequelbox/Llama3.1-8B-PlumChat", - "sha": "1afdc9856591f573e4fcb52dba19a9d8da631e0b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.214730095703656, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.42427647530773904, - "normalized_score": 42.427647530773896 - }, - "bbh": { - "name": "BBH", - "value": 0.3873291395699702, - "normalized_score": 13.935991387298124 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03625377643504532, - "normalized_score": 3.625377643504532 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.3754583333333333, - "normalized_score": 4.765625000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21268284574468085, - "normalized_score": 12.520316193853429 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "sequelbox/Llama3.1-8B-PlumChat (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9785137872324547 - } - }, - { - "id": "sequelbox/Llama3.1-8B-PlumCode_float16_171cd599d574000607491f08e6cf7b7eb199e33d_False", - "model": { - "name": "sequelbox/Llama3.1-8B-PlumCode", - "sha": "171cd599d574000607491f08e6cf7b7eb199e33d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.823999899420981, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20448299401144518, - "normalized_score": 20.448299401144517 - }, - "bbh": { - "name": "BBH", - "value": 0.3368086861425416, - "normalized_score": 8.502927271642019 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027190332326283987, - "normalized_score": 2.719033232628399 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.37734375000000003, - "normalized_score": 8.967968750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23354388297872342, - "normalized_score": 14.838209219858156 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "sequelbox/Llama3.1-8B-PlumCode (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7813524957950064 - } - }, - { - "id": "sequelbox/Llama3.1-8B-PlumMath_float16_b857c30a626f7c020fcba89df7bece4bb7381ac2_False", - "model": { - "name": "sequelbox/Llama3.1-8B-PlumMath", - "sha": "b857c30a626f7c020fcba89df7bece4bb7381ac2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.936685074512214, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.224241678745728, - "normalized_score": 22.424167874572802 - }, - "bbh": { - "name": "BBH", - "value": 0.40323023090048143, - "normalized_score": 16.44658382894578 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3179530201342282, - "normalized_score": 9.060402684563762 - }, - "musr": { - "name": "MUSR", - "value": 0.39185416666666667, - "normalized_score": 8.981770833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29753989361702127, - "normalized_score": 21.948877068557916 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "sequelbox/Llama3.1-8B-PlumMath (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.7375443764930234 - } - }, - { - "id": "sequelbox/gemma-2-9B-MOTH_float16_8dff98ab82ba0087706afa0d6c69874a45548212_True", - "model": { - "name": "sequelbox/gemma-2-9B-MOTH", - "sha": "8dff98ab82ba0087706afa0d6c69874a45548212", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 4.729557867247653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20588150551647405, - "normalized_score": 20.588150551647406 - }, - "bbh": { - "name": "BBH", - "value": 0.30797000521562534, - "normalized_score": 3.2122172300496232 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3409479166666667, - "normalized_score": 0.6184895833333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11402925531914894, - "normalized_score": 1.5588061465721037 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-10", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 6.0558978861492285 - } - }, - { - "id": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct_bfloat16_5dde2c4f0f907b00cc490a6b1fe492697395eff3_True", - "model": { - "name": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", - "sha": "5dde2c4f0f907b00cc490a6b1fe492697395eff3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.684511231135144, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6967014189018471, - "normalized_score": 69.67014189018471 - }, - "bbh": { - "name": "BBH", - "value": 0.510381184158217, - "normalized_score": 30.055034247108825 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.39657291666666666, - "normalized_score": 8.504947916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35289228723404253, - "normalized_score": 28.09914302600473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-18", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.370784422238238 - } - }, - { - "id": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct_bfloat16_4afc818bdd3890a71ac8c31bde9e424e43a86bd7_True", - "model": { - "name": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", - "sha": "4afc818bdd3890a71ac8c31bde9e424e43a86bd7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.509819826801674, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6099981382731153, - "normalized_score": 60.99981382731153 - }, - "bbh": { - "name": "BBH", - "value": 0.49642264289263355, - "normalized_score": 29.491580809437377 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.3789895833333334, - "normalized_score": 7.607031250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35106382978723405, - "normalized_score": 27.89598108747045 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-18", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5135607022091058 - } - }, - { - "id": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208_bfloat16_8210bbb6d9284b11e168a184e0d6b68c58e419b0_True", - "model": { - "name": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", - "sha": "8210bbb6d9284b11e168a184e0d6b68c58e419b0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.958507942267545, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6051022398347496, - "normalized_score": 60.51022398347496 - }, - "bbh": { - "name": "BBH", - "value": 0.49813698712445653, - "normalized_score": 28.61268788658586 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10800604229607251, - "normalized_score": 10.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.38199999999999995, - "normalized_score": 7.149999999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2999501329787234, - "normalized_score": 22.21668144208038 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 1, - "base_model": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7465946737584666 - } - }, - { - "id": "sethuiyer/Llamaverse-3.1-8B-Instruct_bfloat16_6d81e7054eef74a3aa3f26255d57537a9bb15f19_True", - "model": { - "name": "sethuiyer/Llamaverse-3.1-8B-Instruct", - "sha": "6d81e7054eef74a3aa3f26255d57537a9bb15f19", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.19209991300966, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6185410266980501, - "normalized_score": 61.854102669805016 - }, - "bbh": { - "name": "BBH", - "value": 0.5414159562743479, - "normalized_score": 34.78211812089758 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3761666666666667, - "normalized_score": 8.420833333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3523105053191489, - "normalized_score": 28.034500591016542 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "sethuiyer/Llamaverse-3.1-8B-Instruct (Merge)", - "hub_license": "llama3.1", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.4322133247968183 - } - }, - { - "id": "sethuiyer/Llamazing-3.1-8B-Instruct_bfloat16_2c9c702cbe3fce894de728399efcc1c36d6a81ac_True", - "model": { - "name": "sethuiyer/Llamazing-3.1-8B-Instruct", - "sha": "2c9c702cbe3fce894de728399efcc1c36d6a81ac", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.606046468554243, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5711301568726534, - "normalized_score": 57.113015687265346 - }, - "bbh": { - "name": "BBH", - "value": 0.529106967510303, - "normalized_score": 32.850609039112555 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.39759374999999997, - "normalized_score": 8.999218749999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3606216755319149, - "normalized_score": 28.95796394799054 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.620574710742619 - } - }, - { - "id": "sethuiyer/Qwen2.5-7B-Anvita_bfloat16_dc6f8ca6507cc282938e70b23b02c1a3db7b7ddc_True", - "model": { - "name": "sethuiyer/Qwen2.5-7B-Anvita", - "sha": "dc6f8ca6507cc282938e70b23b02c1a3db7b7ddc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.898362120988242, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6480416406246536, - "normalized_score": 64.80416406246536 - }, - "bbh": { - "name": "BBH", - "value": 0.5465860266784314, - "normalized_score": 35.482447523885746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.43365625, - "normalized_score": 13.473697916666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4165558510638298, - "normalized_score": 35.172872340425535 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-27", - "generation": 1, - "base_model": "sethuiyer/Qwen2.5-7B-Anvita (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 2.1602466244661174 - } - }, - { - "id": "shadowml/BeagSake-7B_bfloat16_b7a3b25a188a4608fd05fc4247ddd504c1f529d1_True", - "model": { - "name": "shadowml/BeagSake-7B", - "sha": "b7a3b25a188a4608fd05fc4247ddd504c1f529d1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.000757229169064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5215960318621258, - "normalized_score": 52.15960318621258 - }, - "bbh": { - "name": "BBH", - "value": 0.47110342371098474, - "normalized_score": 25.19294464311316 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.41235416666666663, - "normalized_score": 9.844270833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25847739361702127, - "normalized_score": 17.608599290780138 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-31", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "shadowml/BeagSake-7B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 4.078669899162654 - } - }, - { - "id": "shadowml/Mixolar-4x7b_float16_bb793526b063765e9861cad8834160fb0945e66d_False", - "model": { - "name": "shadowml/Mixolar-4x7b", - "sha": "bb793526b063765e9861cad8834160fb0945e66d", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 20.252696525459793, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3893303102434873, - "normalized_score": 38.93303102434873 - }, - "bbh": { - "name": "BBH", - "value": 0.5215949876221495, - "normalized_score": 32.728963576299655 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.42575, - "normalized_score": 12.718749999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33053523936170215, - "normalized_score": 25.615026595744684 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "shadowml/Mixolar-4x7b", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 36.099, - "co2_cost": 4.709455400650262 - } - }, - { - "id": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT_bfloat16_97a578246d4edecb5fde3dae262a64e4ec9f489a_False", - "model": { - "name": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", - "sha": "97a578246d4edecb5fde3dae262a64e4ec9f489a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.49075907778522, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3041507644161935, - "normalized_score": 30.415076441619348 - }, - "bbh": { - "name": "BBH", - "value": 0.384316753625765, - "normalized_score": 13.659523241343651 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3604479166666667, - "normalized_score": 4.8226562500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19971742021276595, - "normalized_score": 11.079713356973993 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 6.738, - "co2_cost": 1.5280844722045834 - } - }, - { - "id": "shivam9980/NEPALI-LLM_bfloat16_5fe146065b53bfd6d8e242cffbe9176bc245551d_False", - "model": { - "name": "shivam9980/NEPALI-LLM", - "sha": "5fe146065b53bfd6d8e242cffbe9176bc245551d", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 6.93055339969496, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.041666112581284324, - "normalized_score": 4.166611258128433 - }, - "bbh": { - "name": "BBH", - "value": 0.3828457133787513, - "normalized_score": 13.12524427731519 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.41219791666666666, - "normalized_score": 9.991406250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2064494680851064, - "normalized_score": 11.827718676122931 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-24", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 10.273, - "co2_cost": 14.370848641479697 - } - }, - { - "id": "shivam9980/mistral-7b-news-cnn-merged_float16_a0d7029cb00c122843aef3d7ad61d514de334ea3_True", - "model": { - "name": "shivam9980/mistral-7b-news-cnn-merged", - "sha": "a0d7029cb00c122843aef3d7ad61d514de334ea3", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 17.196276123590845, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4634192830578421, - "normalized_score": 46.34192830578421 - }, - "bbh": { - "name": "BBH", - "value": 0.3635484854246454, - "normalized_score": 11.146535574656042 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0188821752265861, - "normalized_score": 1.8882175226586102 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.45226041666666666, - "normalized_score": 15.665885416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28274601063829785, - "normalized_score": 20.305112293144205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-18", - "submission_date": "2024-09-12", - "generation": 2, - "base_model": "mistralai/mistral-7b-instruct-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.723, - "co2_cost": 3.188185330941949 - } - }, - { - "id": "shivank21/mistral_dpo_self_bfloat16_2dcff66eac0c01dc50e4c41eea959968232187fe_True", - "model": { - "name": "shivank21/mistral_dpo_self", - "sha": "2dcff66eac0c01dc50e4c41eea959968232187fe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "", - "average_score": 9.824435617463962, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.340345837932242, - "normalized_score": 34.0345837932242 - }, - "bbh": { - "name": "BBH", - "value": 0.3216256961597798, - "normalized_score": 5.548411533266127 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02190332326283988, - "normalized_score": 2.190332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.32466666666666666, - "normalized_score": 3.683333333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2214095744680851, - "normalized_score": 13.48995271867612 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-03", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "shivank21/mistral_dpo_self", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.913, - "co2_cost": 2.2630629848127213 - } - }, - { - "id": "shuttleai/shuttle-3_float16_b48807a86c65e121f31f0ebdb2d1272bdd253a9a_True", - "model": { - "name": "shuttleai/shuttle-3", - "sha": "b48807a86c65e121f31f0ebdb2d1272bdd253a9a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.70460730741495, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.815403130360776, - "normalized_score": 81.5403130360776 - }, - "bbh": { - "name": "BBH", - "value": 0.7420334281529087, - "normalized_score": 64.05301565117443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.45996978851963743, - "normalized_score": 45.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.41191275167785235, - "normalized_score": 21.588366890380314 - }, - "musr": { - "name": "MUSR", - "value": 0.4376875, - "normalized_score": 14.64427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5716422872340425, - "normalized_score": 52.40469858156028 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-09", - "submission_date": "2024-12-04", - "generation": 1, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 36, - "params_billions": 72.706, - "co2_cost": 47.04132341607627 - } - }, - { - "id": "shyamieee/Padma-v7.0_bfloat16_caf70bd6e2f819cc6a18dda8516f2cbdc101fdde_False", - "model": { - "name": "shyamieee/Padma-v7.0", - "sha": "caf70bd6e2f819cc6a18dda8516f2cbdc101fdde", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.75621841010717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3841097177710696, - "normalized_score": 38.410971777106965 - }, - "bbh": { - "name": "BBH", - "value": 0.5118785631761485, - "normalized_score": 31.657520764874246 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.43855208333333334, - "normalized_score": 14.085677083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3029421542553192, - "normalized_score": 22.549128250591018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "shyamieee/Padma-v7.0 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1797979554990508 - } - }, - { - "id": "silma-ai/SILMA-9B-Instruct-v1.0_bfloat16_25d7b116ab3fb9f97417a297f8df4a7e34e7de68_True", - "model": { - "name": "silma-ai/SILMA-9B-Instruct-v1.0", - "sha": "25d7b116ab3fb9f97417a297f8df4a7e34e7de68", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 26.308011915634108, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5841943820174914, - "normalized_score": 58.419438201749145 - }, - "bbh": { - "name": "BBH", - "value": 0.5219015032853501, - "normalized_score": 30.71300262979214 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1163141993957704, - "normalized_score": 11.63141993957704 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.46369791666666665, - "normalized_score": 17.262239583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39195478723404253, - "normalized_score": 32.439420803782504 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-17", - "submission_date": "2024-11-12", - "generation": 0, - "base_model": "silma-ai/SILMA-9B-Instruct-v1.0", - "hub_license": "gemma", - "hub_hearts": 69, - "params_billions": 9.242, - "co2_cost": 2.4919978285857622 - } - }, - { - "id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0_bfloat16_c13e67581b7d38f79b9bfae90c273f15875d3aef_False", - "model": { - "name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", - "sha": "c13e67581b7d38f79b9bfae90c273f15875d3aef", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 8.452456221236272, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11807781131841291, - "normalized_score": 11.807781131841292 - }, - "bbh": { - "name": "BBH", - "value": 0.37932201246317715, - "normalized_score": 12.844188209052897 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.4042604166666666, - "normalized_score": 8.265885416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22581449468085107, - "normalized_score": 13.979388297872342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-26", - "submission_date": "2025-01-27", - "generation": 0, - "base_model": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", - "hub_license": "gemma", - "hub_hearts": 14, - "params_billions": 2.614, - "co2_cost": 2.3874880242826424 - } - }, - { - "id": "siqi00/Mistral-7B-DFT_bfloat16_d0ec860cddca6094253d50d474ee78bfe371df2b_True", - "model": { - "name": "siqi00/Mistral-7B-DFT", - "sha": "d0ec860cddca6094253d50d474ee78bfe371df2b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 20.75522180931725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5568668909604294, - "normalized_score": 55.68668909604294 - }, - "bbh": { - "name": "BBH", - "value": 0.46648773367771273, - "normalized_score": 25.364499387227273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.41911458333333335, - "normalized_score": 10.622656249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2962932180851064, - "normalized_score": 21.81035756501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.8755216987960303 - } - }, - { - "id": "siqi00/Mistral-7B-DFT2_bfloat16_77d3f365b9b65ffcdda6ee028fd303d145b117f4_True", - "model": { - "name": "siqi00/Mistral-7B-DFT2", - "sha": "77d3f365b9b65ffcdda6ee028fd303d145b117f4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.8755968493133, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5803723010501026, - "normalized_score": 58.037230105010266 - }, - "bbh": { - "name": "BBH", - "value": 0.39683798240076246, - "normalized_score": 15.39381048021219 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.44007291666666665, - "normalized_score": 14.109114583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28523936170212766, - "normalized_score": 20.582151300236408 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-09", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.40110483542063136 - } - }, - { - "id": "skumar9/Llama-medx_v2_float16_3c955655894733b2f851de017134c84b0a62f380_False", - "model": { - "name": "skumar9/Llama-medx_v2", - "sha": "3c955655894733b2f851de017134c84b0a62f380", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.88386236667153, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4462337708391512, - "normalized_score": 44.623377083915116 - }, - "bbh": { - "name": "BBH", - "value": 0.4908589512175783, - "normalized_score": 27.423041996880624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.36612500000000003, - "normalized_score": 3.3656250000000028 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34632646276595747, - "normalized_score": 27.369606973995275 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "skumar9/Llama-medx_v2", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 2.0199599668215353 - } - }, - { - "id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo_bfloat16_22302ebd8c551a5f302fcb8366cc61fdeedf0e00_False", - "model": { - "name": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", - "sha": "22302ebd8c551a5f302fcb8366cc61fdeedf0e00", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 10.140548181946363, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2352823840742563, - "normalized_score": 23.528238407425633 - }, - "bbh": { - "name": "BBH", - "value": 0.36884662302661564, - "normalized_score": 11.238865074478818 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.44286458333333334, - "normalized_score": 14.124739583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19464760638297873, - "normalized_score": 10.516400709219859 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-11", - "submission_date": "2024-07-01", - "generation": 1, - "base_model": "Removed", - "hub_license": "llama2", - "hub_hearts": 0, - "params_billions": 6.738, - "co2_cost": 1.2329408833483786 - } - }, - { - "id": "someon98/qwen-CoMa-0.5b_bfloat16_67336cfb494c0aa1995be0efdeeb9fb0c6a386fe_False", - "model": { - "name": "someon98/qwen-CoMa-0.5b", - "sha": "67336cfb494c0aa1995be0efdeeb9fb0c6a386fe", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.858059919414088, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22766371006706648, - "normalized_score": 22.76637100670665 - }, - "bbh": { - "name": "BBH", - "value": 0.29533439538939815, - "normalized_score": 2.1267939195880374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.40457291666666667, - "normalized_score": 8.704947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10987367021276596, - "normalized_score": 1.0970744680851066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "someon98/qwen-CoMa-0.5b (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 0.63, - "co2_cost": 1.0183575110985603 - } - }, - { - "id": "sometimesanotion/ChocoTrio-14B-v1_bfloat16_da10e1b6a7eb22cd4a1736fab5b17e8d026c57e9_False", - "model": { - "name": "sometimesanotion/ChocoTrio-14B-v1", - "sha": "da10e1b6a7eb22cd4a1736fab5b17e8d026c57e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.158305672336745, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7088912973133508, - "normalized_score": 70.88912973133507 - }, - "bbh": { - "name": "BBH", - "value": 0.6505840125855428, - "normalized_score": 50.01329228044157 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3972809667673716, - "normalized_score": 39.72809667673716 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3850671140939597, - "normalized_score": 18.008948545861294 - }, - "musr": { - "name": "MUSR", - "value": 0.4820520833333333, - "normalized_score": 19.756510416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5369847074468085, - "normalized_score": 48.55385638297872 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.8457620800763928 - } - }, - { - "id": "sometimesanotion/IF-reasoning-experiment-40_bfloat16_0064fffb67d18b0f946b6e7bf3227ca0c92af3eb_False", - "model": { - "name": "sometimesanotion/IF-reasoning-experiment-40", - "sha": "0064fffb67d18b0f946b6e7bf3227ca0c92af3eb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.780695768343925, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6329793835910938, - "normalized_score": 63.29793835910938 - }, - "bbh": { - "name": "BBH", - "value": 0.6111859401994667, - "normalized_score": 44.30640768056546 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3716012084592145, - "normalized_score": 37.160120845921455 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.5194166666666666, - "normalized_score": 25.860416666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5024933510638298, - "normalized_score": 44.72148345153664 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.809917598536154 - } - }, - { - "id": "sometimesanotion/IF-reasoning-experiment-80_bfloat16_d1441e8bd87f11235fd4c708f6ece69a9973c343_False", - "model": { - "name": "sometimesanotion/IF-reasoning-experiment-80", - "sha": "d1441e8bd87f11235fd4c708f6ece69a9973c343", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.64532031474884, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5462761029623622, - "normalized_score": 54.62761029623621 - }, - "bbh": { - "name": "BBH", - "value": 0.42103836132239286, - "normalized_score": 17.482339572802207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.5024583333333333, - "normalized_score": 22.973958333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3367686170212766, - "normalized_score": 26.307624113475175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.383, - "co2_cost": 3.773964867048081 - } - }, - { - "id": "sometimesanotion/KytheraMix-7B-v0.2_bfloat16_2052860a45a71fa30196077b99596264d1002429_False", - "model": { - "name": "sometimesanotion/KytheraMix-7B-v0.2", - "sha": "2052860a45a71fa30196077b99596264d1002429", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.38407922772431, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6128705168951715, - "normalized_score": 61.28705168951715 - }, - "bbh": { - "name": "BBH", - "value": 0.5635202746804572, - "normalized_score": 37.50149964913648 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29229607250755285, - "normalized_score": 29.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.45941666666666664, - "normalized_score": 15.927083333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.45054853723404253, - "normalized_score": 38.949837470449175 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.381778506653468 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.1-experimental_bfloat16_b0600e08e8c97b25d1abca543b997d9927245442_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.1-experimental", - "sha": "b0600e08e8c97b25d1abca543b997d9927245442", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.552164138470836, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5353850006870658, - "normalized_score": 53.53850006870659 - }, - "bbh": { - "name": "BBH", - "value": 0.6582539239967329, - "normalized_score": 50.794907669194664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3580060422960725, - "normalized_score": 35.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.47284375, - "normalized_score": 18.638802083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5408078457446809, - "normalized_score": 48.97864952718677 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.789386756229194 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.3_bfloat16_781637d1b65766fe933ebde070632e48f91390ab_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.3", - "sha": "781637d1b65766fe933ebde070632e48f91390ab", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.853034263826295, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5031616111916382, - "normalized_score": 50.31616111916382 - }, - "bbh": { - "name": "BBH", - "value": 0.6611400465373158, - "normalized_score": 51.27430858821048 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3406344410876133, - "normalized_score": 34.06344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.4688125, - "normalized_score": 18.001562500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5410571808510638, - "normalized_score": 49.006353427895974 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-06", - "submission_date": "2024-12-09", - "generation": 1, - "base_model": "sometimesanotion/Lamarck-14B-v0.3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 9.590061110558608 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence_bfloat16_add9a151dd5614603bebcf3d3740fa92e5d67632_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", - "sha": "add9a151dd5614603bebcf3d3740fa92e5d67632", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.62014600012022, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4906470387460826, - "normalized_score": 49.06470387460827 - }, - "bbh": { - "name": "BBH", - "value": 0.6535142192324058, - "normalized_score": 50.20804499847927 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33987915407854985, - "normalized_score": 33.987915407854985 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.4846875, - "normalized_score": 20.385937499999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5406416223404256, - "normalized_score": 48.96018026004729 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-12", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.4825269957712743 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.6_bfloat16_e9c144208c045fe6954ef3f658a3bda38dbd0d82_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.6", - "sha": "e9c144208c045fe6954ef3f658a3bda38dbd0d82", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.16744391023699, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6972510716011294, - "normalized_score": 69.72510716011294 - }, - "bbh": { - "name": "BBH", - "value": 0.6460312233782931, - "normalized_score": 49.29789462939962 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4040785498489426, - "normalized_score": 40.40785498489426 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38926174496644295, - "normalized_score": 18.568232662192393 - }, - "musr": { - "name": "MUSR", - "value": 0.4846875, - "normalized_score": 20.119270833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-05", - "generation": 1, - "base_model": "sometimesanotion/Lamarck-14B-v0.6 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 14.766, - "co2_cost": 3.8447694873403306 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock_bfloat16_c2d5adb04b1839aeeca77a3f2a5be08864116da1_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", - "sha": "c2d5adb04b1839aeeca77a3f2a5be08864116da1", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.457579354747615, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.669224324791553, - "normalized_score": 66.9224324791553 - }, - "bbh": { - "name": "BBH", - "value": 0.6143349188724702, - "normalized_score": 45.00658355961355 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3776435045317221, - "normalized_score": 37.764350453172206 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37416107382550334, - "normalized_score": 16.554809843400445 - }, - "musr": { - "name": "MUSR", - "value": 0.5180208333333333, - "normalized_score": 25.45260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5054022606382979, - "normalized_score": 45.04469562647754 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.775851332590963 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.6-model_stock_bfloat16_4d4227285a889ffd23618ad32ff7b08d1bcfa5ae_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.6-model_stock", - "sha": "4d4227285a889ffd23618ad32ff7b08d1bcfa5ae", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.67608193287934, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6789662539838739, - "normalized_score": 67.89662539838739 - }, - "bbh": { - "name": "BBH", - "value": 0.6269436532753222, - "normalized_score": 46.491326082885486 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4244712990936556, - "normalized_score": 42.44712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.50065625, - "normalized_score": 22.682031249999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.519780585106383, - "normalized_score": 46.64228723404256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-31", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.723278579007773 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.7-Fusion_bfloat16_f2413f4fa9d9fdc6a29b8c28f541875a7a8061df_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.7-Fusion", - "sha": "f2413f4fa9d9fdc6a29b8c28f541875a7a8061df", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.68165196521005, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6821134589555713, - "normalized_score": 68.21134589555712 - }, - "bbh": { - "name": "BBH", - "value": 0.6543636625652262, - "normalized_score": 50.42650008400056 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4040785498489426, - "normalized_score": 40.40785498489426 - }, - "gpqa": { - "name": "GPQA", - "value": 0.401006711409396, - "normalized_score": 20.134228187919465 - }, - "musr": { - "name": "MUSR", - "value": 0.49913541666666666, - "normalized_score": 22.125260416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5390625, - "normalized_score": 48.78472222222222 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-23", - "generation": 1, - "base_model": "sometimesanotion/Lamarck-14B-v0.7-Fusion (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 14.766, - "co2_cost": 5.515341471959302 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.7-rc1_bfloat16_7735f8b60b6cf5728ee26b84e4d7fab846657ac4_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.7-rc1", - "sha": "7735f8b60b6cf5728ee26b84e4d7fab846657ac4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.14125262846636, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7305482785675341, - "normalized_score": 73.0548278567534 - }, - "bbh": { - "name": "BBH", - "value": 0.6486027992626241, - "normalized_score": 49.50816072696898 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3851963746223565, - "normalized_score": 38.51963746223565 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38926174496644295, - "normalized_score": 18.568232662192393 - }, - "musr": { - "name": "MUSR", - "value": 0.47147916666666667, - "normalized_score": 18.134895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5415558510638298, - "normalized_score": 49.06176122931441 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.8975613656159886 - } - }, - { - "id": "sometimesanotion/Lamarck-14B-v0.7-rc4_bfloat16_724da952865e5fe0555e7d86bda9168541df0f2e_False", - "model": { - "name": "sometimesanotion/Lamarck-14B-v0.7-rc4", - "sha": "724da952865e5fe0555e7d86bda9168541df0f2e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.79013452973212, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7210811757248545, - "normalized_score": 72.10811757248545 - }, - "bbh": { - "name": "BBH", - "value": 0.6509652911243554, - "normalized_score": 49.854949919143934 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4025679758308157, - "normalized_score": 40.25679758308157 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38926174496644295, - "normalized_score": 18.568232662192393 - }, - "musr": { - "name": "MUSR", - "value": 0.4911979166666667, - "normalized_score": 21.06640625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-21", - "submission_date": "2025-01-21", - "generation": 1, - "base_model": "sometimesanotion/Lamarck-14B-v0.7-rc4 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 39, - "params_billions": 14.766, - "co2_cost": 3.8324623578695967 - } - }, - { - "id": "sometimesanotion/LamarckInfusion-14B-v1_bfloat16_39236e060b4aae1f882abeb6e2a3672076169c91_False", - "model": { - "name": "sometimesanotion/LamarckInfusion-14B-v1", - "sha": "39236e060b4aae1f882abeb6e2a3672076169c91", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.05767311482166, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7198322672730577, - "normalized_score": 71.98322672730578 - }, - "bbh": { - "name": "BBH", - "value": 0.6539252513912222, - "normalized_score": 50.34764346772722 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4169184290030212, - "normalized_score": 41.69184290030212 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.48989583333333336, - "normalized_score": 20.903645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5376496010638298, - "normalized_score": 48.62773345153664 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.981689278019317 - } - }, - { - "id": "sometimesanotion/LamarckInfusion-14B-v2_bfloat16_fb7c7f4ae83dcaab6d9e9e6c21af7fe83f584561_False", - "model": { - "name": "sometimesanotion/LamarckInfusion-14B-v2", - "sha": "fb7c7f4ae83dcaab6d9e9e6c21af7fe83f584561", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.11094293951407, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6811892445378263, - "normalized_score": 68.11892445378263 - }, - "bbh": { - "name": "BBH", - "value": 0.6564434429766982, - "normalized_score": 50.84149099483824 - }, - "math": { - "name": "MATH Level 5", - "value": 0.438821752265861, - "normalized_score": 43.8821752265861 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.4992604166666667, - "normalized_score": 22.40755208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5416389627659575, - "normalized_score": 49.07099586288417 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.8852153316741096 - } - }, - { - "id": "sometimesanotion/LamarckInfusion-14B-v2-hi_bfloat16_291a3e56e35b33172d8eaa574bbb64cdd13e46d3_False", - "model": { - "name": "sometimesanotion/LamarckInfusion-14B-v2-hi", - "sha": "291a3e56e35b33172d8eaa574bbb64cdd13e46d3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.509609454034056, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.685485622592499, - "normalized_score": 68.5485622592499 - }, - "bbh": { - "name": "BBH", - "value": 0.6555026541798943, - "normalized_score": 50.65842470890095 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4229607250755287, - "normalized_score": 42.296072507552864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.48471875000000003, - "normalized_score": 20.156510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5404753989361702, - "normalized_score": 48.94171099290781 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9025659110395232 - } - }, - { - "id": "sometimesanotion/LamarckInfusion-14B-v2-lo_bfloat16_50afe3fe5b5f3ba5455929f301f426a5f4229938_False", - "model": { - "name": "sometimesanotion/LamarckInfusion-14B-v2-lo", - "sha": "50afe3fe5b5f3ba5455929f301f426a5f4229938", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.58407798423975, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6787911630030541, - "normalized_score": 67.87911630030541 - }, - "bbh": { - "name": "BBH", - "value": 0.6528441920403686, - "normalized_score": 50.25299157009956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.42371601208459214, - "normalized_score": 42.37160120845921 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.4991041666666667, - "normalized_score": 22.02135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5397273936170213, - "normalized_score": 48.85859929078014 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.8154785258852455 - } - }, - { - "id": "sometimesanotion/LamarckInfusion-14B-v3_bfloat16_f2efbc9345e6d1edb59525901226e06dd38d23bf_False", - "model": { - "name": "sometimesanotion/LamarckInfusion-14B-v3", - "sha": "f2efbc9345e6d1edb59525901226e06dd38d23bf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.58315666578661, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7131378076836128, - "normalized_score": 71.31378076836128 - }, - "bbh": { - "name": "BBH", - "value": 0.6517667892516962, - "normalized_score": 50.091807278816894 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4123867069486405, - "normalized_score": 41.23867069486405 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.48202083333333334, - "normalized_score": 19.652604166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5407247340425532, - "normalized_score": 48.96941489361702 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-03-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.948833045177632 - } - }, - { - "id": "sometimesanotion/Qwen-14B-ProseStock-v4_bfloat16_7bbd108559500c0efca1f8925180bb1771425559_False", - "model": { - "name": "sometimesanotion/Qwen-14B-ProseStock-v4", - "sha": "7bbd108559500c0efca1f8925180bb1771425559", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.376459859146784, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4942186731206532, - "normalized_score": 49.42186731206532 - }, - "bbh": { - "name": "BBH", - "value": 0.6498268976192769, - "normalized_score": 49.54129977932439 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3640483383685801, - "normalized_score": 36.40483383685801 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.49383333333333335, - "normalized_score": 21.69583333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5386469414893617, - "normalized_score": 48.73854905437352 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.6889640300489233 - } - }, - { - "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso_bfloat16__False", - "model": { - "name": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.63616421583777, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4813295389566351, - "normalized_score": 48.132953895663505 - }, - "bbh": { - "name": "BBH", - "value": 0.6569729950776678, - "normalized_score": 50.65229487013111 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.4793541666666667, - "normalized_score": 19.519270833333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5377327127659575, - "normalized_score": 48.63696808510639 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-10", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.790309427252899 - } - }, - { - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso_bfloat16_0865365f6c0b221c08fdd5adf8965f3720645226_False", - "model": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso", - "sha": "0865365f6c0b221c08fdd5adf8965f3720645226", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.05694131448308, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45742407922091166, - "normalized_score": 45.74240792209116 - }, - "bbh": { - "name": "BBH", - "value": 0.6446348390056346, - "normalized_score": 49.178956415446315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.338368580060423, - "normalized_score": 33.8368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3926174496644295, - "normalized_score": 19.01565995525727 - }, - "musr": { - "name": "MUSR", - "value": 0.4858645833333333, - "normalized_score": 20.466406249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5329122340425532, - "normalized_score": 48.101359338061464 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-11", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.1353183809455882 - } - }, - { - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2_bfloat16_5768a4448e4e3a95a7f459ac2b106abbf8510840_False", - "model": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", - "sha": "5768a4448e4e3a95a7f459ac2b106abbf8510840", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.185823403667726, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4505301488938239, - "normalized_score": 45.053014889382396 - }, - "bbh": { - "name": "BBH", - "value": 0.6550336897572636, - "normalized_score": 50.419625407805114 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3580060422960725, - "normalized_score": 35.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.48189583333333336, - "normalized_score": 19.503645833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5379820478723404, - "normalized_score": 48.6646719858156 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.1650812295035533 - } - }, - { - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3_bfloat16_e2f4b596010057af0cd8f27ba992bf9d6af48801_False", - "model": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", - "sha": "e2f4b596010057af0cd8f27ba992bf9d6af48801", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.026521660758924, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7256523801291683, - "normalized_score": 72.56523801291684 - }, - "bbh": { - "name": "BBH", - "value": 0.641460062329604, - "normalized_score": 48.581587256263106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4003021148036254, - "normalized_score": 40.03021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.4806875, - "normalized_score": 19.385937499999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5343251329787234, - "normalized_score": 48.25834810874704 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 11, - "params_billions": 14.0, - "co2_cost": 3.857249308247871 - } - }, - { - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant_bfloat16_246b592926a9351b195650b5bcfe1cba9218a698_False", - "model": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", - "sha": "246b592926a9351b195650b5bcfe1cba9218a698", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 34.41237931041414, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6412973133507981, - "normalized_score": 64.12973133507981 - }, - "bbh": { - "name": "BBH", - "value": 0.5520788965536542, - "normalized_score": 35.653096662329 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.5319166666666667, - "normalized_score": 28.389583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4588597074468085, - "normalized_score": 39.873300827423165 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-28", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.934752419774387 - } - }, - { - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01_bfloat16_3c65fc0b2ffb89149b4c6e984414d3a13000fd7c_False", - "model": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", - "sha": "3c65fc0b2ffb89149b4c6e984414d3a13000fd7c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.27916996114231, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6872343160591674, - "normalized_score": 68.72343160591674 - }, - "bbh": { - "name": "BBH", - "value": 0.6358769213927613, - "normalized_score": 47.70662461639211 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3995468277945619, - "normalized_score": 39.95468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.48071875000000003, - "normalized_score": 19.55651041666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5275099734042553, - "normalized_score": 47.50110815602837 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.7879159156412023 - } - }, - { - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock_bfloat16_06ec138247d03a9308c886c8b326f210c18117e4_False", - "model": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", - "sha": "06ec138247d03a9308c886c8b326f210c18117e4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.224844265866444, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7161852772864887, - "normalized_score": 71.61852772864887 - }, - "bbh": { - "name": "BBH", - "value": 0.6420915332649074, - "normalized_score": 48.76100556664132 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4244712990936556, - "normalized_score": 42.44712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.47811458333333334, - "normalized_score": 19.230989583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5315824468085106, - "normalized_score": 47.953605200945624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.7699045334073404 - } - }, - { - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1_bfloat16_0f31fa5189c4d5106d374535ced13c9817cb2c8b_True", - "model": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", - "sha": "0f31fa5189c4d5106d374535ced13c9817cb2c8b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 32.15683651529989, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.748183708116686, - "normalized_score": 74.81837081166859 - }, - "bbh": { - "name": "BBH", - "value": 0.5523808037550308, - "normalized_score": 36.01177427952259 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29154078549848944, - "normalized_score": 29.154078549848943 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.40162499999999995, - "normalized_score": 8.569791666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43001994680851063, - "normalized_score": 36.6688829787234 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-07", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 1.3864242985911932 - } - }, - { - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose_bfloat16_12c9682b5d9e5e738e6b818b01ead86a76364ffc_False", - "model": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", - "sha": "12c9682b5d9e5e738e6b818b01ead86a76364ffc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.518790394446103, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5347101246913745, - "normalized_score": 53.47101246913745 - }, - "bbh": { - "name": "BBH", - "value": 0.5599089581177875, - "normalized_score": 37.44132686992394 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2892749244712991, - "normalized_score": 28.92749244712991 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32046979865771813, - "normalized_score": 9.395973154362418 - }, - "musr": { - "name": "MUSR", - "value": 0.45017708333333334, - "normalized_score": 14.70546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4525432180851064, - "normalized_score": 39.17146867612293 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6469028935169414 - } - }, - { - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason_bfloat16_2a1913c4153e05dfab7194910864f91c9dac3e16_False", - "model": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", - "sha": "2a1913c4153e05dfab7194910864f91c9dac3e16", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 29.020681749258376, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49172085621705963, - "normalized_score": 49.17208562170596 - }, - "bbh": { - "name": "BBH", - "value": 0.5498169530870823, - "normalized_score": 36.259832032643125 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2620845921450151, - "normalized_score": 26.208459214501513 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.4434166666666666, - "normalized_score": 13.660416666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4306848404255319, - "normalized_score": 36.74276004728132 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-08", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.613, - "co2_cost": 0.6705605680412755 - } - }, - { - "id": "sometimesanotion/Qwentessential-14B-v1_bfloat16_feea151e26c094b74bd8e76ef99b698854623b78_False", - "model": { - "name": "sometimesanotion/Qwentessential-14B-v1", - "sha": "feea151e26c094b74bd8e76ef99b698854623b78", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.27876153100695, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6279083941719084, - "normalized_score": 62.79083941719083 - }, - "bbh": { - "name": "BBH", - "value": 0.6545165968552056, - "normalized_score": 50.365974711716206 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4070996978851964, - "normalized_score": 40.70996978851964 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.4872916666666667, - "normalized_score": 20.778124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5381482712765957, - "normalized_score": 48.68314125295508 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9578394364648848 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v013_bfloat16_2e5ad6d32e76852a803b976078ac0ac2ff0aaaac_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v013", - "sha": "2e5ad6d32e76852a803b976078ac0ac2ff0aaaac", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.63606262403764, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6711226213114536, - "normalized_score": 67.11226213114536 - }, - "bbh": { - "name": "BBH", - "value": 0.6086634082040333, - "normalized_score": 43.96523461481209 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37084592145015105, - "normalized_score": 37.08459214501511 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.5154166666666666, - "normalized_score": 24.99374999999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49908577127659576, - "normalized_score": 44.3428634751773 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.847378348005329 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v1_bfloat16_cd71c7c9f4e18deed1fe8000ae4784b96c33281f_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v1", - "sha": "cd71c7c9f4e18deed1fe8000ae4784b96c33281f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.151308752643104, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5031616111916382, - "normalized_score": 50.31616111916382 - }, - "bbh": { - "name": "BBH", - "value": 0.6572572845221036, - "normalized_score": 50.73749377731295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.4780520833333333, - "normalized_score": 19.156510416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5409740691489362, - "normalized_score": 48.99711879432624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.8363009330752975 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v2_bfloat16_70f5b77f646b5f4cc6f7decf7bd3c7b3bd4cebcf_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v2", - "sha": "70f5b77f646b5f4cc6f7decf7bd3c7b3bd4cebcf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.91575826840247, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5378329499062487, - "normalized_score": 53.78329499062488 - }, - "bbh": { - "name": "BBH", - "value": 0.6555355668062347, - "normalized_score": 50.53548026965414 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37537764350453173, - "normalized_score": 37.53776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.47141666666666665, - "normalized_score": 18.193750000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5408909574468085, - "normalized_score": 48.9878841607565 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 4.006593147681782 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v3_bfloat16_2331e2c1afe4e224c9c019f4f03c2ad19bd15465_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v3", - "sha": "2331e2c1afe4e224c9c019f4f03c2ad19bd15465", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.159303783541326, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6157683834448153, - "normalized_score": 61.57683834448153 - }, - "bbh": { - "name": "BBH", - "value": 0.6538645567116264, - "normalized_score": 50.03761140499875 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35347432024169184, - "normalized_score": 35.34743202416919 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.48598958333333336, - "normalized_score": 20.615364583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5413065159574468, - "normalized_score": 49.03405732860521 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.826016412766472 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v5_bfloat16_8be868ce00f239bf06c859c0c40fcf4c54a9205c_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v5", - "sha": "8be868ce00f239bf06c859c0c40fcf4c54a9205c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.35077790070098, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.628557782240012, - "normalized_score": 62.8557782240012 - }, - "bbh": { - "name": "BBH", - "value": 0.654985060704008, - "normalized_score": 50.28397367122805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34441087613293053, - "normalized_score": 34.44108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3875838926174497, - "normalized_score": 18.34451901565996 - }, - "musr": { - "name": "MUSR", - "value": 0.4873854166666667, - "normalized_score": 21.089843749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5418051861702128, - "normalized_score": 49.08946513002365 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.767575484919844 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v6_bfloat16_951576b4056fe63d02cdc31a653585d9706beba9_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v6", - "sha": "951576b4056fe63d02cdc31a653585d9706beba9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.59946867765128, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6304062110755019, - "normalized_score": 63.04062110755019 - }, - "bbh": { - "name": "BBH", - "value": 0.6544517420216159, - "normalized_score": 50.2319102421481 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.48995833333333333, - "normalized_score": 21.178124999999994 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5399767287234043, - "normalized_score": 48.88630319148937 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.807936642717539 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v6-Prose_bfloat16_fc6086a7732bc8e87505f4c2bc49561a52ad04a9_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v6-Prose", - "sha": "fc6086a7732bc8e87505f4c2bc49561a52ad04a9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.69645443942003, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5642860942299764, - "normalized_score": 56.428609422997646 - }, - "bbh": { - "name": "BBH", - "value": 0.6545112522796068, - "normalized_score": 50.14060102445929 - }, - "math": { - "name": "MATH Level 5", - "value": 0.37009063444108764, - "normalized_score": 37.00906344410876 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.4912604166666667, - "normalized_score": 21.340885416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5392287234042553, - "normalized_score": 48.8031914893617 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.927070295037354 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v7_bfloat16_e9505b4931323752ebb0c901494c050835f0e4d8_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v7", - "sha": "e9505b4931323752ebb0c901494c050835f0e4d8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.15035648193577, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6109223526908603, - "normalized_score": 61.092235269086025 - }, - "bbh": { - "name": "BBH", - "value": 0.6551430222697051, - "normalized_score": 50.347065074109956 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35725075528700906, - "normalized_score": 35.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39093959731543626, - "normalized_score": 18.791946308724835 - }, - "musr": { - "name": "MUSR", - "value": 0.48198958333333336, - "normalized_score": 19.94869791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5409740691489362, - "normalized_score": 48.99711879432624 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.795123715981153 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v8_bfloat16_a856d3095937fd39f829824d6c6d9950cf56dc1d_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v8", - "sha": "a856d3095937fd39f829824d6c6d9950cf56dc1d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.48492951696213, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5411552458587658, - "normalized_score": 54.115524585876585 - }, - "bbh": { - "name": "BBH", - "value": 0.6534258495008117, - "normalized_score": 50.11142993537886 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39123867069486407, - "normalized_score": 39.12386706948641 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.48732291666666666, - "normalized_score": 20.74869791666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5412234042553191, - "normalized_score": 49.02482269503546 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.955932580216315 - } - }, - { - "id": "sometimesanotion/Qwentinuum-14B-v9_bfloat16_3109d6342d8740336dc83569def5b3d80abfac38_False", - "model": { - "name": "sometimesanotion/Qwentinuum-14B-v9", - "sha": "3109d6342d8740336dc83569def5b3d80abfac38", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.21744200410536, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5107304175144174, - "normalized_score": 51.073041751441735 - }, - "bbh": { - "name": "BBH", - "value": 0.6580257842849174, - "normalized_score": 50.80134720526892 - }, - "math": { - "name": "MATH Level 5", - "value": 0.34818731117824775, - "normalized_score": 34.818731117824775 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3859060402684564, - "normalized_score": 18.120805369127517 - }, - "musr": { - "name": "MUSR", - "value": 0.47811458333333334, - "normalized_score": 19.364322916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5421376329787234, - "normalized_score": 49.1264036643026 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.8053819230052954 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-qv256_bfloat16_13e8b600da0b78b23481738858b7ed2d533ee6e5_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-qv256", - "sha": "13e8b600da0b78b23481738858b7ed2d533ee6e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.12038665513343, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7006232352380573, - "normalized_score": 70.06232352380573 - }, - "bbh": { - "name": "BBH", - "value": 0.6312084721949004, - "normalized_score": 47.07821800122079 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38972809667673713, - "normalized_score": 38.972809667673715 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3783557046979866, - "normalized_score": 17.114093959731544 - }, - "musr": { - "name": "MUSR", - "value": 0.49259375, - "normalized_score": 21.07421875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5177859042553191, - "normalized_score": 46.42065602836879 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.927284904836732 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock_bfloat16_1fa94759545d9b591bcbbe93a2c90f2a346f9580_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", - "sha": "1fa94759545d9b591bcbbe93a2c90f2a346f9580", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.603760450568224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6859854076073706, - "normalized_score": 68.59854076073707 - }, - "bbh": { - "name": "BBH", - "value": 0.6249338707540049, - "normalized_score": 46.366653802814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4093655589123867, - "normalized_score": 40.93655589123867 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.5033229166666667, - "normalized_score": 23.348697916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.519281914893617, - "normalized_score": 46.58687943262411 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-01", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.819502496328202 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v10_bfloat16_49b05dd6652ff43233ced904b0b49775c06abf75_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v10", - "sha": "49b05dd6652ff43233ced904b0b49775c06abf75", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.47601045691917, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6756938257157675, - "normalized_score": 67.56938257157675 - }, - "bbh": { - "name": "BBH", - "value": 0.6316425399409628, - "normalized_score": 46.74625368312197 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4788519637462236, - "normalized_score": 47.88519637462236 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37919463087248323, - "normalized_score": 17.225950782997764 - }, - "musr": { - "name": "MUSR", - "value": 0.49913541666666666, - "normalized_score": 22.325260416666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.523936170212766, - "normalized_score": 47.10401891252955 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-22", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.020614495063573 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v11_bfloat16_1bac3b52bdcbba680213f3771451d32ea86f3d28_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v11", - "sha": "1bac3b52bdcbba680213f3771451d32ea86f3d28", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.51678135973019, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7192327468893647, - "normalized_score": 71.92327468893646 - }, - "bbh": { - "name": "BBH", - "value": 0.6367548394062034, - "normalized_score": 47.54895322758622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4645015105740181, - "normalized_score": 46.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3724832214765101, - "normalized_score": 16.33109619686801 - }, - "musr": { - "name": "MUSR", - "value": 0.4754479166666667, - "normalized_score": 18.764322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5327460106382979, - "normalized_score": 48.082890070921984 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "sometimesanotion/Qwenvergence-14B-v11 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 3.710329520646223 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v12-Prose_bfloat16_9e84960b3d2b763ac5b3a3316340af39a4130ba9_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v12-Prose", - "sha": "9e84960b3d2b763ac5b3a3316340af39a4130ba9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.05247832908145, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5412051135431766, - "normalized_score": 54.12051135431766 - }, - "bbh": { - "name": "BBH", - "value": 0.6504247508173936, - "normalized_score": 49.672529400653026 - }, - "math": { - "name": "MATH Level 5", - "value": 0.35347432024169184, - "normalized_score": 35.34743202416919 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.49913541666666666, - "normalized_score": 22.258593749999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5381482712765957, - "normalized_score": 48.68314125295508 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "sometimesanotion/Qwenvergence-14B-v12-Prose (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.5109968121064616 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS_bfloat16_51d945881cab30d74de0c8f91a8dda4ea7ed9dc4_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", - "sha": "51d945881cab30d74de0c8f91a8dda4ea7ed9dc4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.20324803823761, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6173419859306639, - "normalized_score": 61.734198593066395 - }, - "bbh": { - "name": "BBH", - "value": 0.6506726813719318, - "normalized_score": 49.86582018521094 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43051359516616317, - "normalized_score": 43.051359516616316 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.5150729166666667, - "normalized_score": 24.78411458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5369015957446809, - "normalized_score": 48.54462174940899 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS (Merge)", - "hub_license": "", - "hub_hearts": 7, - "params_billions": 14.766, - "co2_cost": 3.3611769809371155 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS_bfloat16_f2c9340915c7a0e49ba980baf66391bc1d568695_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", - "sha": "f2c9340915c7a0e49ba980baf66391bc1d568695", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.078775784940596, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.717808747456748, - "normalized_score": 71.78087474567481 - }, - "bbh": { - "name": "BBH", - "value": 0.6405077084802886, - "normalized_score": 48.43969044109607 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3859516616314199, - "normalized_score": 38.59516616314199 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.49265625, - "normalized_score": 21.548697916666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.534906914893617, - "normalized_score": 48.32299054373522 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 14.766, - "co2_cost": 5.834947059251281 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS_bfloat16__False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.71173844405559, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5032114788760489, - "normalized_score": 50.32114788760489 - }, - "bbh": { - "name": "BBH", - "value": 0.6550130348108012, - "normalized_score": 50.27819609278966 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3632930513595166, - "normalized_score": 36.329305135951664 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.4912916666666667, - "normalized_score": 21.178124999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.539311835106383, - "normalized_score": 48.81242612293145 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-02-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 1.9454235428550473 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v2-Prose_bfloat16_503b367e07a8ed3ce532d03ea35d40d8f17d6e35_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v2-Prose", - "sha": "503b367e07a8ed3ce532d03ea35d40d8f17d6e35", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.95506362229142, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47048830436574957, - "normalized_score": 47.04883043657496 - }, - "bbh": { - "name": "BBH", - "value": 0.6518830473518972, - "normalized_score": 49.93347199164365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3557401812688822, - "normalized_score": 35.57401812688822 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.49259375, - "normalized_score": 21.47421874999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5371509308510638, - "normalized_score": 48.5723256501182 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-15", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.3695775044675425 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v3_bfloat16_40c489fd71724f2fa3f7154e4874c6d00700c6c0_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v3", - "sha": "40c489fd71724f2fa3f7154e4874c6d00700c6c0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.51730006937248, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.504410519643435, - "normalized_score": 50.4410519643435 - }, - "bbh": { - "name": "BBH", - "value": 0.654823836148701, - "normalized_score": 50.352687515053724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3693353474320242, - "normalized_score": 36.933534743202415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.48859375, - "normalized_score": 20.740885416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5386469414893617, - "normalized_score": 48.73854905437352 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.8056301463694058 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v3-Prose_bfloat16_15e4222295ef31aee17c2e5b6e7a31ffd21e3c7b_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Prose", - "sha": "15e4222295ef31aee17c2e5b6e7a31ffd21e3c7b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.52186681948957, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49177072390147036, - "normalized_score": 49.17707239014703 - }, - "bbh": { - "name": "BBH", - "value": 0.6512913170949324, - "normalized_score": 49.79836668099142 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3648036253776435, - "normalized_score": 36.48036253776435 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3951342281879195, - "normalized_score": 19.35123042505593 - }, - "musr": { - "name": "MUSR", - "value": 0.49389583333333337, - "normalized_score": 21.7703125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5369847074468085, - "normalized_score": 48.55385638297872 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "sometimesanotion/Qwenvergence-14B-v3-Prose (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 14.766, - "co2_cost": 3.4226827050371154 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v3-Reason_float16_1e613b0e6bfdb08e7c21a3e6ba3b84e361cf8350_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "sha": "1e613b0e6bfdb08e7c21a3e6ba3b84e361cf8350", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.613265274956085, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5366837768232734, - "normalized_score": 53.66837768232734 - }, - "bbh": { - "name": "BBH", - "value": 0.6561283957466177, - "normalized_score": 50.69444798867054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3580060422960725, - "normalized_score": 35.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38674496644295303, - "normalized_score": 18.232662192393736 - }, - "musr": { - "name": "MUSR", - "value": 0.47402083333333334, - "normalized_score": 18.45260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5394780585106383, - "normalized_score": 48.83089539007093 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.791207955566846 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v3-Reason_bfloat16_6acf3cbc9c36b19d66ac683f073e32a9bf86d56e_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "sha": "6acf3cbc9c36b19d66ac683f073e32a9bf86d56e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.71404812686041, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5278161943642867, - "normalized_score": 52.78161943642867 - }, - "bbh": { - "name": "BBH", - "value": 0.6557437566824342, - "normalized_score": 50.635776137274746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3119335347432024, - "normalized_score": 31.19335347432024 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.47541666666666665, - "normalized_score": 18.927083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5396442819148937, - "normalized_score": 48.84936465721041 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-21", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 1.926171913330613 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v6-Prose_bfloat16_bbb6b0900b630a3120d036d3434ca0fa508ed559_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v6-Prose", - "sha": "bbb6b0900b630a3120d036d3434ca0fa508ed559", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.950847394921304, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5990073006289978, - "normalized_score": 59.900730062899775 - }, - "bbh": { - "name": "BBH", - "value": 0.6543750230807198, - "normalized_score": 50.11997604002554 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3564954682779456, - "normalized_score": 35.64954682779456 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3884228187919463, - "normalized_score": 18.456375838926174 - }, - "musr": { - "name": "MUSR", - "value": 0.48865625, - "normalized_score": 21.015364583333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5370678191489362, - "normalized_score": 48.56309101654846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 1, - "base_model": "sometimesanotion/Qwenvergence-14B-v6-Prose (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 3.87052828006981 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock_bfloat16__False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", - "sha": "", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.16067561259897, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48110458029140457, - "normalized_score": 48.11045802914046 - }, - "bbh": { - "name": "BBH", - "value": 0.6530441861690175, - "normalized_score": 49.91412568099364 - }, - "math": { - "name": "MATH Level 5", - "value": 0.36027190332326287, - "normalized_score": 36.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.48989583333333336, - "normalized_score": 21.036979166666658 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5387300531914894, - "normalized_score": 48.74778368794326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-26", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.0, - "co2_cost": 1.9661217268137188 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v8_bfloat16_2153b2ba874e99887b255967bb803222dc7d5c77_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v8", - "sha": "2153b2ba874e99887b255967bb803222dc7d5c77", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.212221774327226, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5913387589373973, - "normalized_score": 59.13387589373972 - }, - "bbh": { - "name": "BBH", - "value": 0.6522455361956444, - "normalized_score": 49.83459258547442 - }, - "math": { - "name": "MATH Level 5", - "value": 0.40483383685800606, - "normalized_score": 40.483383685800604 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3808724832214765, - "normalized_score": 17.4496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.47678125, - "normalized_score": 19.09765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.543467420212766, - "normalized_score": 49.27415780141844 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-16", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 4.021208808906965 - } - }, - { - "id": "sometimesanotion/Qwenvergence-14B-v9_bfloat16_68f4f5d82011dec96bccec481788dd7e591a6d75_False", - "model": { - "name": "sometimesanotion/Qwenvergence-14B-v9", - "sha": "68f4f5d82011dec96bccec481788dd7e591a6d75", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 39.81487420685213, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6598070896332842, - "normalized_score": 65.98070896332841 - }, - "bbh": { - "name": "BBH", - "value": 0.6165623747365094, - "normalized_score": 44.843355808017556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41389728096676737, - "normalized_score": 41.389728096676734 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.5141145833333334, - "normalized_score": 25.23098958333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5110538563829787, - "normalized_score": 45.67265070921986 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-17", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.8307809399176147 - } - }, - { - "id": "sometimesanotion/lamarck-14b-prose-model_stock_bfloat16_d71942f5b5471fca97914ea26a9f66bb5866693e_False", - "model": { - "name": "sometimesanotion/lamarck-14b-prose-model_stock", - "sha": "d71942f5b5471fca97914ea26a9f66bb5866693e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.677974423595586, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4276486389446668, - "normalized_score": 42.76486389446668 - }, - "bbh": { - "name": "BBH", - "value": 0.6487621585665343, - "normalized_score": 49.383875963761994 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3413897280966767, - "normalized_score": 34.13897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3934563758389262, - "normalized_score": 19.12751677852349 - }, - "musr": { - "name": "MUSR", - "value": 0.48459375, - "normalized_score": 20.274218749999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.535405585106383, - "normalized_score": 48.37839834515367 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 3.1150389719662797 - } - }, - { - "id": "sometimesanotion/lamarck-14b-reason-model_stock_bfloat16_0f1d7f04b9219ffe3bc26aa3146380fba249d61a_False", - "model": { - "name": "sometimesanotion/lamarck-14b-reason-model_stock", - "sha": "0f1d7f04b9219ffe3bc26aa3146380fba249d61a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.96126205505086, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49646715160219335, - "normalized_score": 49.64671516021933 - }, - "bbh": { - "name": "BBH", - "value": 0.6568898541408251, - "normalized_score": 50.715404125684955 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3580060422960725, - "normalized_score": 35.80060422960725 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38422818791946306, - "normalized_score": 17.897091722595075 - }, - "musr": { - "name": "MUSR", - "value": 0.47408333333333336, - "normalized_score": 18.793749999999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5402260638297872, - "normalized_score": 48.91400709219858 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-12-09", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 9.97910462331388 - } - }, - { - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415_float16_467eff1ac1c3395c130929bbe1f34a8194715e7c_True", - "model": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", - "sha": "467eff1ac1c3395c130929bbe1f34a8194715e7c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 8.889814609614424, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28933784580468713, - "normalized_score": 28.93378458046871 - }, - "bbh": { - "name": "BBH", - "value": 0.38041816886828617, - "normalized_score": 12.789212309485556 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24664429530201343, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3860625, - "normalized_score": 6.024479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.14012632978723405, - "normalized_score": 4.4584810874704495 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "unsloth/zephyr-sft-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.723, - "co2_cost": 3.255423129392052 - } - }, - { - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205_float16_467eff1ac1c3395c130929bbe1f34a8194715e7c_True", - "model": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", - "sha": "467eff1ac1c3395c130929bbe1f34a8194715e7c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 12.932104434694521, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3199377651298555, - "normalized_score": 31.993776512985548 - }, - "bbh": { - "name": "BBH", - "value": 0.39586243698929185, - "normalized_score": 16.710725154148115 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.4271770833333333, - "normalized_score": 12.097135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21243351063829788, - "normalized_score": 12.492612293144207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "unsloth/zephyr-sft-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.723, - "co2_cost": 3.177996296525118 - } - }, - { - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522_float16_467eff1ac1c3395c130929bbe1f34a8194715e7c_True", - "model": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", - "sha": "467eff1ac1c3395c130929bbe1f34a8194715e7c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 13.424509088727826, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37644117607946914, - "normalized_score": 37.64411760794691 - }, - "bbh": { - "name": "BBH", - "value": 0.3828367247244511, - "normalized_score": 14.138281987896178 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.4404166666666667, - "normalized_score": 14.11875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20553523936170212, - "normalized_score": 11.726137706855791 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "unsloth/zephyr-sft-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.723, - "co2_cost": 3.229396078207106 - } - }, - { - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps_bfloat16_4ae2af48b6ac53f14e153b91309624100ae3d7c2_True", - "model": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", - "sha": "4ae2af48b6ac53f14e153b91309624100ae3d7c2", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.853792634957259, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4275489035758454, - "normalized_score": 42.75489035758454 - }, - "bbh": { - "name": "BBH", - "value": 0.4197290890050172, - "normalized_score": 19.669907319611504 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.40863541666666664, - "normalized_score": 9.579427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27086103723404253, - "normalized_score": 18.98455969267139 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3976205387596685 - } - }, - { - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps_bfloat16_0393baf362e29cf51867596fb64746b5edafa6ed_True", - "model": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", - "sha": "0393baf362e29cf51867596fb64746b5edafa6ed", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.602364526823635, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40871443325930756, - "normalized_score": 40.871443325930755 - }, - "bbh": { - "name": "BBH", - "value": 0.4322585223071556, - "normalized_score": 21.35140303187909 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.38851041666666664, - "normalized_score": 6.1638020833333345 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27476728723404253, - "normalized_score": 19.418587470449168 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3513696384081009 - } - }, - { - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps_bfloat16_c4ee848caf14649f9260166653d4cdb30bcfc52a_True", - "model": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", - "sha": "c4ee848caf14649f9260166653d4cdb30bcfc52a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.475407146329456, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4032190144372487, - "normalized_score": 40.32190144372487 - }, - "bbh": { - "name": "BBH", - "value": 0.43053552565190517, - "normalized_score": 21.21356840671135 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.42575, - "normalized_score": 11.785416666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2711103723404255, - "normalized_score": 19.01226359338061 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-02", - "submission_date": "2024-10-03", - "generation": 0, - "base_model": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.3684496812307692 - } - }, - { - "id": "sophosympatheia/Midnight-Miqu-70B-v1.5_float16_f6062ca8ccba38ce91eef16f85138e279160b9b9_True", - "model": { - "name": "sophosympatheia/Midnight-Miqu-70B-v1.5", - "sha": "f6062ca8ccba38ce91eef16f85138e279160b9b9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.99019477918401, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6118465671086051, - "normalized_score": 61.18465671086051 - }, - "bbh": { - "name": "BBH", - "value": 0.5606228371685053, - "normalized_score": 38.541461590145985 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.42441666666666666, - "normalized_score": 11.652083333333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38248005319148937, - "normalized_score": 31.386672576832154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-11", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "sophosympatheia/Midnight-Miqu-70B-v1.5 (Merge)", - "hub_license": "other", - "hub_hearts": 196, - "params_billions": 68.977, - "co2_cost": 12.90593416636188 - } - }, - { - "id": "speakleash/Bielik-11B-v2_bfloat16_a620588280793e605d1e0b125fe2a663030206ab_False", - "model": { - "name": "speakleash/Bielik-11B-v2", - "sha": "a620588280793e605d1e0b125fe2a663030206ab", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.989069166924798, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23810489501190177, - "normalized_score": 23.810489501190176 - }, - "bbh": { - "name": "BBH", - "value": 0.49308409091594996, - "normalized_score": 27.817906537862154 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.39244791666666673, - "normalized_score": 7.555989583333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3137466755319149, - "normalized_score": 23.749630614657207 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-10-16", - "generation": 0, - "base_model": "speakleash/Bielik-11B-v2", - "hub_license": "apache-2.0", - "hub_hearts": 39, - "params_billions": 11.169, - "co2_cost": 1.837466067264166 - } - }, - { - "id": "speakleash/Bielik-11B-v2.0-Instruct_bfloat16_e4721e2af1152bad2e077c34375911a28aa1b8dc_True", - "model": { - "name": "speakleash/Bielik-11B-v2.0-Instruct", - "sha": "e4721e2af1152bad2e077c34375911a28aa1b8dc", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 24.661167243528862, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5252430218486948, - "normalized_score": 52.524302184869484 - }, - "bbh": { - "name": "BBH", - "value": 0.5361579931173499, - "normalized_score": 33.774676263963016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11858006042296072, - "normalized_score": 11.858006042296072 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31711409395973156, - "normalized_score": 8.948545861297541 - }, - "musr": { - "name": "MUSR", - "value": 0.4467083333333333, - "normalized_score": 14.738541666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3351063829787234, - "normalized_score": 26.12293144208038 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "speakleash/Bielik-11B-v2", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 11.169, - "co2_cost": 1.7768492123292696 - } - }, - { - "id": "speakleash/Bielik-11B-v2.1-Instruct_bfloat16_c91776047eb235f51238a9e42f80f19e3ed114e7_True", - "model": { - "name": "speakleash/Bielik-11B-v2.1-Instruct", - "sha": "c91776047eb235f51238a9e42f80f19e3ed114e7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.19716415968224, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5089817240477489, - "normalized_score": 50.89817240477488 - }, - "bbh": { - "name": "BBH", - "value": 0.5530119844151298, - "normalized_score": 36.29005304442506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26661631419939574, - "normalized_score": 26.661631419939575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.4185208333333333, - "normalized_score": 10.515104166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34466422872340424, - "normalized_score": 27.184914302600472 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "speakleash/Bielik-11B-v2", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 11.169, - "co2_cost": 2.6112462322667813 - } - }, - { - "id": "speakleash/Bielik-11B-v2.2-Instruct_bfloat16_b5502dab61fcc5e087e72c8a120057dea78082ad_True", - "model": { - "name": "speakleash/Bielik-11B-v2.2-Instruct", - "sha": "b5502dab61fcc5e087e72c8a120057dea78082ad", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.9792775947469, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5551935531057595, - "normalized_score": 55.519355310575946 - }, - "bbh": { - "name": "BBH", - "value": 0.5596561190863629, - "normalized_score": 36.95804119871526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2681268882175227, - "normalized_score": 26.812688821752268 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.41712499999999997, - "normalized_score": 10.107291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3486535904255319, - "normalized_score": 27.628176713947987 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-26", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "speakleash/Bielik-11B-v2", - "hub_license": "apache-2.0", - "hub_hearts": 59, - "params_billions": 11.169, - "co2_cost": 2.9218505367238095 - } - }, - { - "id": "speakleash/Bielik-11B-v2.3-Instruct_float16_7494fdc4d648707ea12b908d40b0ae708989b329_True", - "model": { - "name": "speakleash/Bielik-11B-v2.3-Instruct", - "sha": "7494fdc4d648707ea12b908d40b0ae708989b329", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.331123935725582, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.558290890393046, - "normalized_score": 55.829089039304606 - }, - "bbh": { - "name": "BBH", - "value": 0.5662699020280031, - "normalized_score": 38.062787893588194 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2084592145015106, - "normalized_score": 20.84592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34060402684563756, - "normalized_score": 12.080536912751676 - }, - "musr": { - "name": "MUSR", - "value": 0.4518229166666667, - "normalized_score": 16.011197916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34441489361702127, - "normalized_score": 27.157210401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-08-30", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "speakleash/Bielik-11B-v2.3-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 47, - "params_billions": 11.169, - "co2_cost": 1.8122839514567146 - } - }, - { - "id": "spmurrayzzz/Mistral-Syndicate-7B_bfloat16_c74379dd6055ef4a70339b105ea315cebec23d24_False", - "model": { - "name": "spmurrayzzz/Mistral-Syndicate-7B", - "sha": "c74379dd6055ef4a70339b105ea315cebec23d24", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.012817664482597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.249595517670891, - "normalized_score": 24.9595517670891 - }, - "bbh": { - "name": "BBH", - "value": 0.42450570755678535, - "normalized_score": 20.50625197041595 - }, - "math": { - "name": "MATH Level 5", - "value": 0.033987915407854986, - "normalized_score": 3.3987915407854987 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.43855208333333334, - "normalized_score": 13.619010416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2631316489361702, - "normalized_score": 18.125738770685576 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-06-27", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.1597182125585273 - } - }, - { - "id": "spow12/ChatWaifu_12B_v2.0_bfloat16_1fb38700b2e2a66d4ff32636817df76285cea5f1_True", - "model": { - "name": "spow12/ChatWaifu_12B_v2.0", - "sha": "1fb38700b2e2a66d4ff32636817df76285cea5f1", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.979985555353448, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47675833455232114, - "normalized_score": 47.67583345523211 - }, - "bbh": { - "name": "BBH", - "value": 0.5207681738205238, - "normalized_score": 31.165239578024238 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07099697885196375, - "normalized_score": 7.099697885196375 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.44317708333333333, - "normalized_score": 15.83046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33876329787234044, - "normalized_score": 26.529255319148938 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-10", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "spow12/ChatWaifu_12B_v2.0 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 20, - "params_billions": 12.248, - "co2_cost": 5.1788552099315455 - } - }, - { - "id": "spow12/ChatWaifu_22B_v2.0_preview_bfloat16_36af7ec06bc85405e8641986ad45c6d21353b114_True", - "model": { - "name": "spow12/ChatWaifu_22B_v2.0_preview", - "sha": "36af7ec06bc85405e8641986ad45c6d21353b114", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.545969322343282, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6744947849483814, - "normalized_score": 67.44947849483815 - }, - "bbh": { - "name": "BBH", - "value": 0.6170153091362338, - "normalized_score": 45.48829424136917 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31543624161073824, - "normalized_score": 8.7248322147651 - }, - "musr": { - "name": "MUSR", - "value": 0.3685416666666667, - "normalized_score": 3.5343750000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.39876994680851063, - "normalized_score": 33.19666075650118 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-23", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "spow12/ChatWaifu_22B_v2.0_preview (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 6, - "params_billions": 22.247, - "co2_cost": 2.9884076630530747 - } - }, - { - "id": "spow12/ChatWaifu_v1.4_bfloat16_c5b2b30a8e9fa23722b6e30aa2ca1dab7fe1c2b5_True", - "model": { - "name": "spow12/ChatWaifu_v1.4", - "sha": "c5b2b30a8e9fa23722b6e30aa2ca1dab7fe1c2b5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.706734115105316, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5690567693719332, - "normalized_score": 56.90567693719332 - }, - "bbh": { - "name": "BBH", - "value": 0.5176247229970669, - "normalized_score": 31.63055380047582 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10574018126888217, - "normalized_score": 10.574018126888216 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.47433333333333333, - "normalized_score": 20.025000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3474900265957447, - "normalized_score": 27.498891843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-03", - "submission_date": "2024-09-05", - "generation": 1, - "base_model": "spow12/ChatWaifu_v1.4 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 19, - "params_billions": 12.248, - "co2_cost": 2.8842853759884393 - } - }, - { - "id": "spow12/ChatWaifu_v2.0_22B_float16_54771319920ed791ba3f0262b036f37a92b880f2_True", - "model": { - "name": "spow12/ChatWaifu_v2.0_22B", - "sha": "54771319920ed791ba3f0262b036f37a92b880f2", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 28.838097623831434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6510891102275296, - "normalized_score": 65.10891102275296 - }, - "bbh": { - "name": "BBH", - "value": 0.592630190761292, - "normalized_score": 42.28622796334265 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18580060422960726, - "normalized_score": 18.580060422960727 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32466442953020136, - "normalized_score": 9.955257270693513 - }, - "musr": { - "name": "MUSR", - "value": 0.3841979166666667, - "normalized_score": 5.5914062499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3835605053191489, - "normalized_score": 31.506722813238763 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-11", - "generation": 1, - "base_model": "spow12/ChatWaifu_v2.0_22B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 10, - "params_billions": 22.247, - "co2_cost": 2.7398346907612154 - } - }, - { - "id": "spow12/ChatWaifu_v2.0_22B_bfloat16_a6e7c206d9af77d3f85faf0ce4a711d62815b2ab_True", - "model": { - "name": "spow12/ChatWaifu_v2.0_22B", - "sha": "a6e7c206d9af77d3f85faf0ce4a711d62815b2ab", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 29.03230470609974, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6517384982956334, - "normalized_score": 65.17384982956334 - }, - "bbh": { - "name": "BBH", - "value": 0.5908050619550995, - "normalized_score": 42.01979809251511 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.3841979166666667, - "normalized_score": 5.5914062499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3812333776595745, - "normalized_score": 31.24815307328605 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "spow12/ChatWaifu_v2.0_22B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 10, - "params_billions": 22.247, - "co2_cost": 2.791720231086567 - } - }, - { - "id": "ssmits/Qwen2.5-95B-Instruct_bfloat16_9c0e7df57a4fcf4d364efd916a0fc0abdd2d20a3_True", - "model": { - "name": "ssmits/Qwen2.5-95B-Instruct", - "sha": "9c0e7df57a4fcf4d364efd916a0fc0abdd2d20a3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.257345532181205, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8431051831363006, - "normalized_score": 84.31051831363006 - }, - "bbh": { - "name": "BBH", - "value": 0.7037799697488242, - "normalized_score": 58.530351322851054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5302114803625377, - "normalized_score": 53.02114803625378 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3640939597315436, - "normalized_score": 15.212527964205815 - }, - "musr": { - "name": "MUSR", - "value": 0.4283854166666667, - "normalized_score": 13.61484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5216921542553191, - "normalized_score": 46.85468380614657 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "ssmits/Qwen2.5-95B-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 3, - "params_billions": 94.648, - "co2_cost": 38.466989605628804 - } - }, - { - "id": "stabilityai/StableBeluga2_bfloat16_cb47d3db70ea3ddc2cabdeb358c303b328f65900_False", - "model": { - "name": "stabilityai/StableBeluga2", - "sha": "cb47d3db70ea3ddc2cabdeb358c303b328f65900", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.808722961321305, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37871403431783224, - "normalized_score": 37.87140343178322 - }, - "bbh": { - "name": "BBH", - "value": 0.5824128134553807, - "normalized_score": 41.26326112722379 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3162751677852349, - "normalized_score": 8.83668903803132 - }, - "musr": { - "name": "MUSR", - "value": 0.47296875, - "normalized_score": 18.654427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3326130319148936, - "normalized_score": 25.845892434988176 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-20", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "stabilityai/StableBeluga2", - "hub_license": "", - "hub_hearts": 885, - "params_billions": 68.977, - "co2_cost": 12.509347312088027 - } - }, - { - "id": "stabilityai/stablelm-2-12b_bfloat16_fead13ddbf4492970666650c3cd6f85f485411ec_False", - "model": { - "name": "stabilityai/stablelm-2-12b", - "sha": "fead13ddbf4492970666650c3cd6f85f485411ec", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 13.998663061157224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1569214129620518, - "normalized_score": 15.69214129620518 - }, - "bbh": { - "name": "BBH", - "value": 0.4508654171114765, - "normalized_score": 22.685797482043984 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2785234899328859, - "normalized_score": 3.8031319910514525 - }, - "musr": { - "name": "MUSR", - "value": 0.44788541666666665, - "normalized_score": 14.485677083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3071808510638298, - "normalized_score": 23.020094562647756 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-03-21", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "stabilityai/stablelm-2-12b", - "hub_license": "other", - "hub_hearts": 120, - "params_billions": 12.143, - "co2_cost": 2.946558448523118 - } - }, - { - "id": "stabilityai/stablelm-2-12b-chat_bfloat16_b6b62cd451b84e848514c00fafa66d9ead9297c5_True", - "model": { - "name": "stabilityai/stablelm-2-12b-chat", - "sha": "b6b62cd451b84e848514c00fafa66d9ead9297c5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 16.778178021081665, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4081647805600252, - "normalized_score": 40.81647805600252 - }, - "bbh": { - "name": "BBH", - "value": 0.4672024731282805, - "normalized_score": 25.253697090812636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05362537764350453, - "normalized_score": 5.362537764350453 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.3914270833333333, - "normalized_score": 7.7283854166666694 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2734375, - "normalized_score": 19.270833333333332 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-04", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "stabilityai/stablelm-2-12b-chat", - "hub_license": "other", - "hub_hearts": 88, - "params_billions": 12.143, - "co2_cost": 2.1761933718736635 - } - }, - { - "id": "stabilityai/stablelm-2-1_6b_float16_8879812cccd176fbbe9ceb747b815bcc7d6499f8_False", - "model": { - "name": "stabilityai/stablelm-2-1_6b", - "sha": "8879812cccd176fbbe9ceb747b815bcc7d6499f8", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 5.316831473392678, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11570521771122844, - "normalized_score": 11.570521771122843 - }, - "bbh": { - "name": "BBH", - "value": 0.338457720511071, - "normalized_score": 8.632695204968835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38819791666666664, - "normalized_score": 5.791406249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1463597074468085, - "normalized_score": 5.1510786052009445 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-18", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "stabilityai/stablelm-2-1_6b", - "hub_license": "other", - "hub_hearts": 189, - "params_billions": 1.645, - "co2_cost": 1.0997436553514433 - } - }, - { - "id": "stabilityai/stablelm-2-1_6b-chat_bfloat16_f3fe67057c2789ae1bb1fe42b038da99840d4f13_True", - "model": { - "name": "stabilityai/stablelm-2-1_6b-chat", - "sha": "f3fe67057c2789ae1bb1fe42b038da99840d4f13", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 8.867360692101089, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30599919325168334, - "normalized_score": 30.59991932516833 - }, - "bbh": { - "name": "BBH", - "value": 0.3390172395486522, - "normalized_score": 7.493378297410634 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.35796875, - "normalized_score": 5.712760416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16215093085106383, - "normalized_score": 6.905658983451536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-04-08", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "stabilityai/stablelm-2-1_6b-chat", - "hub_license": "other", - "hub_hearts": 33, - "params_billions": 1.645, - "co2_cost": 0.9908530170377292 - } - }, - { - "id": "stabilityai/stablelm-2-zephyr-1_6b_float16_2f275b1127d59fc31e4f7c7426d528768ada9ea4_True", - "model": { - "name": "stabilityai/stablelm-2-zephyr-1_6b", - "sha": "2f275b1127d59fc31e4f7c7426d528768ada9ea4", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 9.458167591621253, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32793100085550786, - "normalized_score": 32.79310008555078 - }, - "bbh": { - "name": "BBH", - "value": 0.3351608706280727, - "normalized_score": 6.708710147938231 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3511458333333333, - "normalized_score": 5.993229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17137632978723405, - "normalized_score": 7.930703309692672 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-01-19", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "stabilityai/stablelm-2-zephyr-1_6b", - "hub_license": "other", - "hub_hearts": 182, - "params_billions": 1.645, - "co2_cost": 0.9461773254818848 - } - }, - { - "id": "stabilityai/stablelm-3b-4e1t_bfloat16_fa4a6a92fca83c3b4223a3c9bf792887090ebfba_False", - "model": { - "name": "stabilityai/stablelm-3b-4e1t", - "sha": "fa4a6a92fca83c3b4223a3c9bf792887090ebfba", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 7.3261912916856, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22031986240951784, - "normalized_score": 22.031986240951785 - }, - "bbh": { - "name": "BBH", - "value": 0.3504211415826912, - "normalized_score": 9.013070349546279 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23741610738255034, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37778124999999996, - "normalized_score": 4.422656249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1668882978723404, - "normalized_score": 7.432033096926712 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-29", - "submission_date": "2024-08-10", - "generation": 0, - "base_model": "stabilityai/stablelm-3b-4e1t", - "hub_license": "cc-by-sa-4.0", - "hub_hearts": 310, - "params_billions": 2.795, - "co2_cost": 0.8685302527863359 - } - }, - { - "id": "stabilityai/stablelm-zephyr-3b_bfloat16_a14f62d95754d96aea2be6e24c0f6966636797b9_True", - "model": { - "name": "stabilityai/stablelm-zephyr-3b", - "sha": "a14f62d95754d96aea2be6e24c0f6966636797b9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "StableLmForCausalLM", - "average_score": 12.369206962303688, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36832271705740766, - "normalized_score": 36.832271705740766 - }, - "bbh": { - "name": "BBH", - "value": 0.3866361442837871, - "normalized_score": 14.7591192080273 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23909395973154363, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4183020833333333, - "normalized_score": 9.787760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17677859042553193, - "normalized_score": 8.530954491725769 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-11-21", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "stabilityai/stablelm-zephyr-3b", - "hub_license": "other", - "hub_hearts": 253, - "params_billions": 2.795, - "co2_cost": 0.7680472629918603 - } - }, - { - "id": "sthenno/tempesthenno-0120_bfloat16_89ddd0c32c5fdc31060cd50b3cbaf52dd4ffae8a_False", - "model": { - "name": "sthenno/tempesthenno-0120", - "sha": "89ddd0c32c5fdc31060cd50b3cbaf52dd4ffae8a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.46706051009951, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5390319906736348, - "normalized_score": 53.90319906736349 - }, - "bbh": { - "name": "BBH", - "value": 0.6373174111347703, - "normalized_score": 47.90901771207657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.33534743202416917, - "normalized_score": 33.53474320241692 - }, - "gpqa": { - "name": "GPQA", - "value": 0.39429530201342283, - "normalized_score": 19.239373601789712 - }, - "musr": { - "name": "MUSR", - "value": 0.46332291666666664, - "normalized_score": 16.548697916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5290059840425532, - "normalized_score": 47.667331560283685 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 14.766, - "co2_cost": 5.683677828224274 - } - }, - { - "id": "sthenno/tempesthenno-fusion-0309_bfloat16_1b89614a5b377efc9da0c320cbd2dbb8322e6c2a_True", - "model": { - "name": "sthenno/tempesthenno-fusion-0309", - "sha": "1b89614a5b377efc9da0c320cbd2dbb8322e6c2a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.138889973963565, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7691913013027656, - "normalized_score": 76.91913013027656 - }, - "bbh": { - "name": "BBH", - "value": 0.6580880569586895, - "normalized_score": 50.97985614656526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47658610271903323, - "normalized_score": 47.65861027190332 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.4325104166666667, - "normalized_score": 13.963802083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5258477393617021, - "normalized_score": 47.31641548463357 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "sthenno/tempesthenno-fusion-0309 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.6217801348510528 - } - }, - { - "id": "sthenno/tempesthenno-kto-0205-ckpt80_bfloat16_9ed4d0238da8de732203da2d07e342e56c2538dd_False", - "model": { - "name": "sthenno/tempesthenno-kto-0205-ckpt80", - "sha": "9ed4d0238da8de732203da2d07e342e56c2538dd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.7909439445551, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8054362425032248, - "normalized_score": 80.54362425032248 - }, - "bbh": { - "name": "BBH", - "value": 0.654273895095419, - "normalized_score": 50.64379713470144 - }, - "math": { - "name": "MATH Level 5", - "value": 0.459214501510574, - "normalized_score": 45.9214501510574 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34815436241610737, - "normalized_score": 13.087248322147648 - }, - "musr": { - "name": "MUSR", - "value": 0.4247604166666667, - "normalized_score": 12.928385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5285904255319149, - "normalized_score": 47.62115839243499 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "sthenno/tempesthenno-kto-0205-ckpt80 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 5.002422600889499 - } - }, - { - "id": "sthenno/tempesthenno-nuslerp-001_bfloat16_d507c25ccad162616c5d6d8fee3612324ee521f4_True", - "model": { - "name": "sthenno/tempesthenno-nuslerp-001", - "sha": "d507c25ccad162616c5d6d8fee3612324ee521f4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.58615155936238, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7926468437080281, - "normalized_score": 79.2646843708028 - }, - "bbh": { - "name": "BBH", - "value": 0.6577675676172494, - "normalized_score": 51.04491084341287 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47583081570996977, - "normalized_score": 47.583081570996974 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.43, - "normalized_score": 13.883333333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5256815159574468, - "normalized_score": 47.29794621749409 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "sthenno/tempesthenno-nuslerp-001 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 2.9542203017547903 - } - }, - { - "id": "sthenno/tempesthenno-nuslerp-0124_bfloat16_9769b900fbc116b28cb618e7f16c92552b78b5ff_False", - "model": { - "name": "sthenno/tempesthenno-nuslerp-0124", - "sha": "9769b900fbc116b28cb618e7f16c92552b78b5ff", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.287889272020685, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7003982765728267, - "normalized_score": 70.03982765728267 - }, - "bbh": { - "name": "BBH", - "value": 0.6468547741903091, - "normalized_score": 49.276795334077576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.411631419939577, - "normalized_score": 41.1631419939577 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3901006711409396, - "normalized_score": 18.680089485458616 - }, - "musr": { - "name": "MUSR", - "value": 0.48592708333333334, - "normalized_score": 20.207552083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5352393617021277, - "normalized_score": 48.35992907801419 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-27", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "sthenno/tempesthenno-nuslerp-0124 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 5.399205437411366 - } - }, - { - "id": "sthenno/tempesthenno-ppo-ckpt40_bfloat16_c7e00f975d12b48394474908d0596e4be2957e05_True", - "model": { - "name": "sthenno/tempesthenno-ppo-ckpt40", - "sha": "c7e00f975d12b48394474908d0596e4be2957e05", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.73562035742862, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7923221496739761, - "normalized_score": 79.23221496739761 - }, - "bbh": { - "name": "BBH", - "value": 0.6549600322869433, - "normalized_score": 50.57317166167434 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4735649546827795, - "normalized_score": 47.35649546827795 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3775167785234899, - "normalized_score": 17.00223713646532 - }, - "musr": { - "name": "MUSR", - "value": 0.4351770833333333, - "normalized_score": 14.56380208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5291722074468085, - "normalized_score": 47.685800827423165 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-27", - "generation": 1, - "base_model": "sthenno/tempesthenno-ppo-ckpt40 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 5.8322570455957 - } - }, - { - "id": "sthenno/tempesthenno-sft-0309-ckpt10_bfloat16_e13c4281c3cccf9fded2ec8c3b2ef6d24c906403_True", - "model": { - "name": "sthenno/tempesthenno-sft-0309-ckpt10", - "sha": "e13c4281c3cccf9fded2ec8c3b2ef6d24c906403", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.192396685999725, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7743620260907724, - "normalized_score": 77.43620260907724 - }, - "bbh": { - "name": "BBH", - "value": 0.6551647758995857, - "normalized_score": 50.60090254912357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.47205438066465255, - "normalized_score": 47.205438066465256 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3716442953020134, - "normalized_score": 16.21923937360179 - }, - "musr": { - "name": "MUSR", - "value": 0.4364166666666667, - "normalized_score": 14.385416666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5257646276595744, - "normalized_score": 47.307180851063826 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "sthenno/tempesthenno-sft-0309-ckpt10 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 1.550085406181084 - } - }, - { - "id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50_bfloat16_d82f4a9e3272a0776f5461664ec0cca123f21495_True", - "model": { - "name": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", - "sha": "d82f4a9e3272a0776f5461664ec0cca123f21495", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.88689161687085, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7393659933421101, - "normalized_score": 73.93659933421101 - }, - "bbh": { - "name": "BBH", - "value": 0.6601015847983588, - "normalized_score": 51.25931343801958 - }, - "math": { - "name": "MATH Level 5", - "value": 0.46827794561933533, - "normalized_score": 46.82779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3733221476510067, - "normalized_score": 16.442953020134222 - }, - "musr": { - "name": "MUSR", - "value": 0.44286458333333334, - "normalized_score": 15.058072916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5301695478723404, - "normalized_score": 47.796616430260045 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-13", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "sthenno/tempesthenno-sft-0314-stage1-ckpt50 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 14.766, - "co2_cost": 1.5142965828921595 - } - }, - { - "id": "sthenno/tempestissimo-14b-0309_bfloat16_cff092839c0ce638f754c4ab743c3cd1bdc69f16_True", - "model": { - "name": "sthenno/tempestissimo-14b-0309", - "sha": "cff092839c0ce638f754c4ab743c3cd1bdc69f16", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.88723985978792, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7548781677061308, - "normalized_score": 75.48781677061308 - }, - "bbh": { - "name": "BBH", - "value": 0.6587329699954757, - "normalized_score": 50.92276703756472 - }, - "math": { - "name": "MATH Level 5", - "value": 0.479607250755287, - "normalized_score": 47.9607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36661073825503354, - "normalized_score": 15.548098434004473 - }, - "musr": { - "name": "MUSR", - "value": 0.43123958333333334, - "normalized_score": 13.83828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.528091755319149, - "normalized_score": 47.56575059101655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "sthenno/tempestissimo-14b-0309 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.766, - "co2_cost": 1.5719156377988648 - } - }, - { - "id": "sthenno-com/miscii-14b-0130_bfloat16_df4b3c169aeab40831f87751076bc67c32209fe8_False", - "model": { - "name": "sthenno-com/miscii-14b-0130", - "sha": "df4b3c169aeab40831f87751076bc67c32209fe8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.085925833196264, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6647029880716498, - "normalized_score": 66.47029880716498 - }, - "bbh": { - "name": "BBH", - "value": 0.6505409113818335, - "normalized_score": 49.838838706366396 - }, - "math": { - "name": "MATH Level 5", - "value": 0.43202416918429004, - "normalized_score": 43.202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38171140939597314, - "normalized_score": 17.561521252796418 - }, - "musr": { - "name": "MUSR", - "value": 0.4911666666666667, - "normalized_score": 20.962500000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5363198138297872, - "normalized_score": 48.4799793144208 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-30", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "sthenno-com/miscii-14b-0130 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 14.766, - "co2_cost": 3.8863293871608944 - } - }, - { - "id": "sthenno-com/miscii-14b-0218_bfloat16_9fba75f9b793d0e79e1b0174f54c6919bbc66d67_True", - "model": { - "name": "sthenno-com/miscii-14b-0218", - "sha": "9fba75f9b793d0e79e1b0174f54c6919bbc66d67", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.89726019720522, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7655941790006073, - "normalized_score": 76.55941790006072 - }, - "bbh": { - "name": "BBH", - "value": 0.6558708629267258, - "normalized_score": 50.6445656375432 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5143504531722054, - "normalized_score": 51.43504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.38338926174496646, - "normalized_score": 17.785234899328863 - }, - "musr": { - "name": "MUSR", - "value": 0.4272708333333333, - "normalized_score": 13.208854166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5297539893617021, - "normalized_score": 47.75044326241135 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "sthenno-com/miscii-14b-0218 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 20, - "params_billions": 14.766, - "co2_cost": 1.5469423212307571 - } - }, - { - "id": "sthenno-com/miscii-14b-1028_bfloat16_a60c866621ee35d04e84cf366e972f2466d617b1_True", - "model": { - "name": "sthenno-com/miscii-14b-1028", - "sha": "a60c866621ee35d04e84cf366e972f2466d617b1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.38069997703781, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8236711924360696, - "normalized_score": 82.36711924360696 - }, - "bbh": { - "name": "BBH", - "value": 0.64483340535341, - "normalized_score": 49.262667655574724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5030211480362538, - "normalized_score": 50.30211480362537 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.41815625, - "normalized_score": 12.002864583333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5152925531914894, - "normalized_score": 46.1436170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "sthenno-com/miscii-14b-1028 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 14.77, - "co2_cost": 3.067456578018677 - } - }, - { - "id": "sthenno-com/miscii-14b-1225_bfloat16_3d26f676424307cc2496c6b11710bbfa35275685_True", - "model": { - "name": "sthenno-com/miscii-14b-1225", - "sha": "3d26f676424307cc2496c6b11710bbfa35275685", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.34951191764194, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.787800812954073, - "normalized_score": 78.7800812954073 - }, - "bbh": { - "name": "BBH", - "value": 0.6571708988407374, - "normalized_score": 50.91280572690238 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4516616314199396, - "normalized_score": 45.16616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3775167785234899, - "normalized_score": 17.00223713646532 - }, - "musr": { - "name": "MUSR", - "value": 0.4365729166666667, - "normalized_score": 14.771614583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5271775265957447, - "normalized_score": 47.464169621749406 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-24", - "generation": 1, - "base_model": "sthenno-com/miscii-14b-1225 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 25, - "params_billions": 14.766, - "co2_cost": 2.896994492005911 - } - }, - { - "id": "streamerbtw1002/Nexuim-R1-7B-Instruct_bfloat16_f53f6fa3ec8ec90cd2d62b8ee232ad49695c323a_True", - "model": { - "name": "streamerbtw1002/Nexuim-R1-7B-Instruct", - "sha": "f53f6fa3ec8ec90cd2d62b8ee232ad49695c323a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 30.443047255620304, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6934289906337407, - "normalized_score": 69.34289906337406 - }, - "bbh": { - "name": "BBH", - "value": 0.5175174748142363, - "normalized_score": 31.4442193637187 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44561933534743203, - "normalized_score": 44.561933534743204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.33555208333333336, - "normalized_score": 1.2106770833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.413813164893617, - "normalized_score": 34.86812943262411 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-11", - "submission_date": "2025-03-12", - "generation": 2, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 0.670074015460606 - } - }, - { - "id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose_bfloat16_2aff10399de6ed9206a59a48c49bd704962cca1a_True", - "model": { - "name": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", - "sha": "2aff10399de6ed9206a59a48c49bd704962cca1a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.768701692265338, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23181048506850713, - "normalized_score": 23.181048506850715 - }, - "bbh": { - "name": "BBH", - "value": 0.2822965317600308, - "normalized_score": 1.2076926870097697 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.3485416666666667, - "normalized_score": 2.7343749999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.109375, - "normalized_score": 1.041666666666666 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose (Merge)", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.7775533202619482 - } - }, - { - "id": "suayptalha/Clarus-7B-v0.1_bfloat16_0d2982fbacb05c10a97af807f0649fcad7a82479_True", - "model": { - "name": "suayptalha/Clarus-7B-v0.1", - "sha": "0d2982fbacb05c10a97af807f0649fcad7a82479", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.705259716178944, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7454110648634512, - "normalized_score": 74.54110648634511 - }, - "bbh": { - "name": "BBH", - "value": 0.5496611433440965, - "normalized_score": 36.03116366747631 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49244712990936557, - "normalized_score": 49.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.44295833333333334, - "normalized_score": 15.169791666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "suayptalha/Clarus-7B-v0.1 (Merge)", - "hub_license": "mit", - "hub_hearts": 5, - "params_billions": 7.616, - "co2_cost": 0.6465909529861601 - } - }, - { - "id": "suayptalha/Clarus-7B-v0.2_bfloat16_63712049fb59216dae1f2e2f5c993e235b21b6c7_True", - "model": { - "name": "suayptalha/Clarus-7B-v0.2", - "sha": "63712049fb59216dae1f2e2f5c993e235b21b6c7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.860642918566676, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7679423928509688, - "normalized_score": 76.79423928509688 - }, - "bbh": { - "name": "BBH", - "value": 0.5490057426751466, - "normalized_score": 36.01880357725931 - }, - "math": { - "name": "MATH Level 5", - "value": 0.48564954682779454, - "normalized_score": 48.56495468277945 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.44165625000000003, - "normalized_score": 15.073697916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4399933510638298, - "normalized_score": 37.77703900709219 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "suayptalha/Clarus-7B-v0.2 (Merge)", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 7.613, - "co2_cost": 0.6742416859737426 - } - }, - { - "id": "suayptalha/Clarus-7B-v0.3_bfloat16_e1e28ebc8cb7da944cc22aa9e65a322bff2731ef_True", - "model": { - "name": "suayptalha/Clarus-7B-v0.3", - "sha": "e1e28ebc8cb7da944cc22aa9e65a322bff2731ef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 36.776154141235224, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7509064836855099, - "normalized_score": 75.090648368551 - }, - "bbh": { - "name": "BBH", - "value": 0.5525985716155296, - "normalized_score": 36.45786933549141 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4879154078549849, - "normalized_score": 48.79154078549849 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.44022916666666667, - "normalized_score": 14.428645833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4384973404255319, - "normalized_score": 37.61081560283688 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-28", - "submission_date": "2025-02-28", - "generation": 1, - "base_model": "suayptalha/Clarus-7B-v0.3 (Merge)", - "hub_license": "mit", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 0.6434001187386952 - } - }, - { - "id": "suayptalha/DeepSeek-R1-Distill-Llama-3B_float16_5980166d03fa0d2a63f6dfbf59fe6b5abc7005e0_True", - "model": { - "name": "suayptalha/DeepSeek-R1-Distill-Llama-3B", - "sha": "5980166d03fa0d2a63f6dfbf59fe6b5abc7005e0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.27368245692195, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7092658590318134, - "normalized_score": 70.92658590318135 - }, - "bbh": { - "name": "BBH", - "value": 0.44517853159705956, - "normalized_score": 21.44875568223163 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20921450151057402, - "normalized_score": 20.921450151057403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.33958333333333335, - "normalized_score": 2.9145833333333346 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29778922872340424, - "normalized_score": 21.976580969267136 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-23", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "suayptalha/DeepSeek-R1-Distill-Llama-3B (Merge)", - "hub_license": "mit", - "hub_hearts": 11, - "params_billions": 3.213, - "co2_cost": 1.2163133549070893 - } - }, - { - "id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp_bfloat16_bf21816f8fbfcaab7cfc811c9ffd13b25988514b_True", - "model": { - "name": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", - "sha": "bf21816f8fbfcaab7cfc811c9ffd13b25988514b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.077231715414364, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7676176988169169, - "normalized_score": 76.76176988169169 - }, - "bbh": { - "name": "BBH", - "value": 0.5590927389495824, - "normalized_score": 37.28589655286154 - }, - "math": { - "name": "MATH Level 5", - "value": 0.39652567975830816, - "normalized_score": 39.65256797583081 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.48121875000000003, - "normalized_score": 20.485677083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.406000664893617, - "normalized_score": 34.00007387706855 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-20", - "submission_date": "2025-01-20", - "generation": 1, - "base_model": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp (Merge)", - "hub_license": "other", - "hub_hearts": 9, - "params_billions": 7.456, - "co2_cost": 1.4557438278499164 - } - }, - { - "id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B_bfloat16_5be9b48b59652687d3e5b88f9e935b51869756ad_True", - "model": { - "name": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", - "sha": "5be9b48b59652687d3e5b88f9e935b51869756ad", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.464381527434064, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7807816593305763, - "normalized_score": 78.07816593305762 - }, - "bbh": { - "name": "BBH", - "value": 0.5564653181490319, - "normalized_score": 36.98416750362379 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3610271903323263, - "normalized_score": 36.102719033232624 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.44159375, - "normalized_score": 14.732552083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4444813829787234, - "normalized_score": 38.27570921985816 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-24", - "generation": 1, - "base_model": "suayptalha/HomerCreativeAnvita-Mix-Qw7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 12, - "params_billions": 7.616, - "co2_cost": 1.2997613805312176 - } - }, - { - "id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16_float16_1ff4b55d952597429c249ca71dc08b823eba17c0_True", - "model": { - "name": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", - "sha": "1ff4b55d952597429c249ca71dc08b823eba17c0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.317372826484497, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6340532010620709, - "normalized_score": 63.405320106207085 - }, - "bbh": { - "name": "BBH", - "value": 0.43549964909074995, - "normalized_score": 20.204328973550357 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10649546827794562, - "normalized_score": 10.649546827794563 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.34057291666666667, - "normalized_score": 3.3716145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28523936170212766, - "normalized_score": 20.582151300236408 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-19", - "generation": 1, - "base_model": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 3.0, - "co2_cost": 1.1961295271944847 - } - }, - { - "id": "suayptalha/Lamarckvergence-14B_bfloat16_7a1463829bbb7f8f7ad4b92e96260a3a27997bbe_True", - "model": { - "name": "suayptalha/Lamarckvergence-14B", - "sha": "7a1463829bbb7f8f7ad4b92e96260a3a27997bbe", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.320333136542786, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7655941790006073, - "normalized_score": 76.55941790006072 - }, - "bbh": { - "name": "BBH", - "value": 0.651698573892736, - "normalized_score": 50.32923622182769 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5400302114803626, - "normalized_score": 54.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36325503355704697, - "normalized_score": 15.100671140939594 - }, - "musr": { - "name": "MUSR", - "value": 0.44215625000000003, - "normalized_score": 16.336197916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5283410904255319, - "normalized_score": 47.593454491725765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "suayptalha/Lamarckvergence-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 14.766, - "co2_cost": 3.161000268078386 - } - }, - { - "id": "suayptalha/Lix-14B-v0.1_bfloat16_058e2f097fec3761d7383464673e0dda25192f7e_True", - "model": { - "name": "suayptalha/Lix-14B-v0.1", - "sha": "058e2f097fec3761d7383464673e0dda25192f7e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.31763225045196, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7813313120298586, - "normalized_score": 78.13313120298585 - }, - "bbh": { - "name": "BBH", - "value": 0.6607910825152539, - "normalized_score": 51.473725053502925 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5294561933534743, - "normalized_score": 52.94561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3699664429530201, - "normalized_score": 15.99552572706935 - }, - "musr": { - "name": "MUSR", - "value": 0.43378125, - "normalized_score": 13.422656250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5314162234042553, - "normalized_score": 47.93513593380615 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 1, - "base_model": "suayptalha/Lix-14B-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 8, - "params_billions": 14.766, - "co2_cost": 1.61743506178036 - } - }, - { - "id": "suayptalha/Luminis-phi-4_bfloat16_8415367af0b7dfa4b2c3aaf0a4fd281b350b011f_True", - "model": { - "name": "suayptalha/Luminis-phi-4", - "sha": "8415367af0b7dfa4b2c3aaf0a4fd281b350b011f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 41.757466085959734, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6900069593124022, - "normalized_score": 69.00069593124022 - }, - "bbh": { - "name": "BBH", - "value": 0.6920213038130584, - "normalized_score": 55.80283046446502 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4637462235649547, - "normalized_score": 46.37462235649547 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.45715625, - "normalized_score": 16.677864583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5423869680851063, - "normalized_score": 49.15410756501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-02-06", - "generation": 1, - "base_model": "suayptalha/Luminis-phi-4 (Merge)", - "hub_license": "mit", - "hub_hearts": 11, - "params_billions": 14.66, - "co2_cost": 1.8387885590178514 - } - }, - { - "id": "suayptalha/Maestro-10B_float16_d37a2f19d52242ceb836466635982921f33a69b0_True", - "model": { - "name": "suayptalha/Maestro-10B", - "sha": "d37a2f19d52242ceb836466635982921f33a69b0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 32.83184082460664, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7767601076255447, - "normalized_score": 77.67601076255447 - }, - "bbh": { - "name": "BBH", - "value": 0.5746090622656775, - "normalized_score": 39.54498059710542 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19108761329305135, - "normalized_score": 19.108761329305135 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.43972916666666667, - "normalized_score": 13.832812500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42179188829787234, - "normalized_score": 35.754654255319146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-01-31", - "generation": 1, - "base_model": "suayptalha/Maestro-10B (Merge)", - "hub_license": "other", - "hub_hearts": 7, - "params_billions": 10.306, - "co2_cost": 1.8131166227010422 - } - }, - { - "id": "suayptalha/Rombos-2.5-T.E-8.1_bfloat16_c0ee2950b07377e1d0e01fc013a0f200b0306ea2_True", - "model": { - "name": "suayptalha/Rombos-2.5-T.E-8.1", - "sha": "c0ee2950b07377e1d0e01fc013a0f200b0306ea2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.40416180893529, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6925047762159957, - "normalized_score": 69.25047762159957 - }, - "bbh": { - "name": "BBH", - "value": 0.5514641249478369, - "normalized_score": 36.499861205880386 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49244712990936557, - "normalized_score": 49.244712990936556 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.41663541666666665, - "normalized_score": 10.979427083333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4445644946808511, - "normalized_score": 38.2849438534279 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-16", - "generation": 1, - "base_model": "suayptalha/Rombos-2.5-T.E-8.1 (Merge)", - "hub_license": "cc-by-nc-sa-4.0", - "hub_hearts": 7, - "params_billions": 7.616, - "co2_cost": 1.3720311227158388 - } - }, - { - "id": "sumink/Qmerft_bfloat16_dd12d37190a97eaff0c8180a1c679097a9aaa393_False", - "model": { - "name": "sumink/Qmerft", - "sha": "dd12d37190a97eaff0c8180a1c679097a9aaa393", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.9704632098131207, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15639724819035714, - "normalized_score": 15.639724819035713 - }, - "bbh": { - "name": "BBH", - "value": 0.29390930175643937, - "normalized_score": 1.9490136326917558 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.36876041666666665, - "normalized_score": 3.9283854166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11569148936170212, - "normalized_score": 1.7434988179669018 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "sumink/Qmerft (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.777, - "co2_cost": 1.2209124695691955 - } - }, - { - "id": "sumink/Qwenftmodel_float16_7fe96b05b36aaa1be229c436b4fe3b476be9e2dd_False", - "model": { - "name": "sumink/Qwenftmodel", - "sha": "7fe96b05b36aaa1be229c436b4fe3b476be9e2dd", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.104913951155993, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17290899258412123, - "normalized_score": 17.290899258412125 - }, - "bbh": { - "name": "BBH", - "value": 0.38226970256668574, - "normalized_score": 14.041351651308327 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.36171875000000003, - "normalized_score": 4.61484375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23387632978723405, - "normalized_score": 14.875147754137116 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-05", - "generation": 0, - "base_model": "sumink/Qwenftmodel", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 1.544, - "co2_cost": 2.0288285053678234 - } - }, - { - "id": "sumink/Qwenmplus_float16_2f6d29692e18a32bc179e81d09d4ecdefefb85d8_False", - "model": { - "name": "sumink/Qwenmplus", - "sha": "2f6d29692e18a32bc179e81d09d4ecdefefb85d8", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.390911516269796, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20403307668098425, - "normalized_score": 20.403307668098424 - }, - "bbh": { - "name": "BBH", - "value": 0.3675511408391697, - "normalized_score": 12.70658883354089 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.38283333333333336, - "normalized_score": 5.020833333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19921875, - "normalized_score": 11.024305555555555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "sumink/Qwenmplus", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 1.543, - "co2_cost": 2.1917080289432085 - } - }, - { - "id": "sumink/Qwensci_float16_5cfce5a410358536c582e79a8484600ae384991a_False", - "model": { - "name": "sumink/Qwensci", - "sha": "5cfce5a410358536c582e79a8484600ae384991a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.5625396368925974, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17398281005509825, - "normalized_score": 17.398281005509826 - }, - "bbh": { - "name": "BBH", - "value": 0.3281870591856875, - "normalized_score": 6.3198431947642675 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3608854166666667, - "normalized_score": 3.6106770833333353 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12599734042553193, - "normalized_score": 2.8885933806146578 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-03", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "sumink/Qwensci", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 1.543, - "co2_cost": 2.0854593464503473 - } - }, - { - "id": "sumink/bbhqwen_float16_0e0815e15549c966e25dcf7e1bbd84d998878ba7_False", - "model": { - "name": "sumink/bbhqwen", - "sha": "0e0815e15549c966e25dcf7e1bbd84d998878ba7", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 7.833827413484496, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18085236062536292, - "normalized_score": 18.085236062536293 - }, - "bbh": { - "name": "BBH", - "value": 0.3388245916050106, - "normalized_score": 6.631749430919456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.43523958333333335, - "normalized_score": 13.371614583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16165226063829788, - "normalized_score": 6.850251182033097 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "sumink/bbhqwen", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7892143760544216 - } - }, - { - "id": "sumink/bbhqwen2_float16_407c3fba41610ba6a42409221f58ae1bc758626d_False", - "model": { - "name": "sumink/bbhqwen2", - "sha": "407c3fba41610ba6a42409221f58ae1bc758626d", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.258601237356344, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15329991090307052, - "normalized_score": 15.329991090307054 - }, - "bbh": { - "name": "BBH", - "value": 0.30663248168563745, - "normalized_score": 3.8643035117477176 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.44305208333333335, - "normalized_score": 14.414843750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1149434840425532, - "normalized_score": 1.6603871158392434 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "sumink/bbhqwen2", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7929371277340248 - } - }, - { - "id": "sumink/bbhqwen3_float16_d8075190856fdab9a14a4aa56f2a962dda6b8436_False", - "model": { - "name": "sumink/bbhqwen3", - "sha": "d8075190856fdab9a14a4aa56f2a962dda6b8436", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.947842576914937, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1942911474886634, - "normalized_score": 19.42911474886634 - }, - "bbh": { - "name": "BBH", - "value": 0.2950842029929075, - "normalized_score": 2.187659932659932 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.3796145833333333, - "normalized_score": 5.218489583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11660571808510638, - "normalized_score": 1.8450797872340412 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "sumink/bbhqwen3", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7695893917829426 - } - }, - { - "id": "sumink/bbhqwen4_float16_44934bb91bbfec64cef43bc0a987937bdbba960a_False", - "model": { - "name": "sumink/bbhqwen4", - "sha": "44934bb91bbfec64cef43bc0a987937bdbba960a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.656083792655539, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14485675784695717, - "normalized_score": 14.485675784695719 - }, - "bbh": { - "name": "BBH", - "value": 0.3199395559502713, - "normalized_score": 4.8923014124501245 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006042296072507553, - "normalized_score": 0.6042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.4028958333333333, - "normalized_score": 8.295312499999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.15093085106382978, - "normalized_score": 5.658983451536641 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "sumink/bbhqwen4", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7885846459457425 - } - }, - { - "id": "sumink/bbhqwen5_float16_81acc4a3dcb973997691073e2395e3c43acd5620_False", - "model": { - "name": "sumink/bbhqwen5", - "sha": "81acc4a3dcb973997691073e2395e3c43acd5620", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 5.199436501746352, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1521507378200951, - "normalized_score": 15.215073782009512 - }, - "bbh": { - "name": "BBH", - "value": 0.29130964476405813, - "normalized_score": 2.8132645692499794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0022658610271903325, - "normalized_score": 0.22658610271903326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.4019375, - "normalized_score": 10.142187499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11311502659574468, - "normalized_score": 1.457225177304964 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "sumink/bbhqwen5", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.763493256291797 - } - }, - { - "id": "sumink/bbhqwen6_float16_1b75baaea3ec9cabe8998ea5f3ee2cdb60d3de27_False", - "model": { - "name": "sumink/bbhqwen6", - "sha": "1b75baaea3ec9cabe8998ea5f3ee2cdb60d3de27", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.3661285339650675, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18929551368147626, - "normalized_score": 18.929551368147628 - }, - "bbh": { - "name": "BBH", - "value": 0.2782242419852629, - "normalized_score": 2.129703501956035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.35796875, - "normalized_score": 2.2460937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11527593085106383, - "normalized_score": 1.6973256501182026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-25", - "submission_date": "2025-02-25", - "generation": 0, - "base_model": "sumink/bbhqwen6", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7747034439706267 - } - }, - { - "id": "sumink/flflmillama_bfloat16_e6e15070ab0783d5d75f6a67a57b26d86c989079_False", - "model": { - "name": "sumink/flflmillama", - "sha": "e6e15070ab0783d5d75f6a67a57b26d86c989079", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.04335499884608, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16756317681529453, - "normalized_score": 16.756317681529453 - }, - "bbh": { - "name": "BBH", - "value": 0.38511286094747693, - "normalized_score": 13.745933922898066 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.35911458333333335, - "normalized_score": 4.02265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.20960771276595744, - "normalized_score": 12.178634751773048 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "sumink/flflmillama", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1867441515549952 - } - }, - { - "id": "sumink/ftgpt_bfloat16_fea7c59fff2443a73a7fd11a78b1d80eb5f0c4e6_False", - "model": { - "name": "sumink/ftgpt", - "sha": "fea7c59fff2443a73a7fd11a78b1d80eb5f0c4e6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPT2LMHeadModel", - "average_score": 3.951784139825086, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.0787100449030794, - "normalized_score": 7.871004490307939 - }, - "bbh": { - "name": "BBH", - "value": 0.29190853217047663, - "normalized_score": 1.9312767142279632 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.41384375, - "normalized_score": 10.097135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1171875, - "normalized_score": 1.9097222222222217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-20", - "generation": 0, - "base_model": "sumink/ftgpt", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 0.124, - "co2_cost": 0.10563503907928061 - } - }, - { - "id": "sumink/llamaft_bfloat16_99b36b73b173c63decde2e4f8ef49f78d04ea568_False", - "model": { - "name": "sumink/llamaft", - "sha": "99b36b73b173c63decde2e4f8ef49f78d04ea568", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.156199769325905, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16086871722584964, - "normalized_score": 16.086871722584963 - }, - "bbh": { - "name": "BBH", - "value": 0.3762775648269859, - "normalized_score": 12.950582368134747 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01661631419939577, - "normalized_score": 1.6616314199395772 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.3498125, - "normalized_score": 3.0598958333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21143617021276595, - "normalized_score": 12.381796690307327 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "sumink/llamaft", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.1911480280954392 - } - }, - { - "id": "sumink/llamamerge_float16_ab032bb4dd5e7fe4950e00517afb641f3a0c26a6_False", - "model": { - "name": "sumink/llamamerge", - "sha": "ab032bb4dd5e7fe4950e00517afb641f3a0c26a6", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.736806871689032, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26718107953563214, - "normalized_score": 26.718107953563212 - }, - "bbh": { - "name": "BBH", - "value": 0.46316160070587903, - "normalized_score": 24.376393916804787 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.42397916666666663, - "normalized_score": 11.664062500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2589760638297872, - "normalized_score": 17.664007092198577 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "sumink/llamamerge", - "hub_license": "llama3", - "hub_hearts": 0, - "params_billions": 13.016, - "co2_cost": 1.8166414393378985 - } - }, - { - "id": "sumink/llftfl7_bfloat16_fba0f95abad2633bd64b4d4cedfd1910716b2025_False", - "model": { - "name": "sumink/llftfl7", - "sha": "fba0f95abad2633bd64b4d4cedfd1910716b2025", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.811247007957387, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17143512546709397, - "normalized_score": 17.143512546709395 - }, - "bbh": { - "name": "BBH", - "value": 0.37864273336631166, - "normalized_score": 13.272908076195748 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.36320833333333336, - "normalized_score": 3.001041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17428523936170212, - "normalized_score": 8.253915484633568 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 0, - "base_model": "sumink/llftfl7", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.2173806002200136 - } - }, - { - "id": "sumink/llmer_float16_c73859c891e2db1e79e0e32d43fd685418f0c2fc_False", - "model": { - "name": "sumink/llmer", - "sha": "c73859c891e2db1e79e0e32d43fd685418f0c2fc", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.983709841194923, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3191132860809319, - "normalized_score": 31.911328608093193 - }, - "bbh": { - "name": "BBH", - "value": 0.4884590875207178, - "normalized_score": 26.830896875618176 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0649546827794562, - "normalized_score": 6.495468277945619 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.4039166666666667, - "normalized_score": 8.189583333333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35289228723404253, - "normalized_score": 28.09914302600473 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "sumink/llmer (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.4185286218364976 - } - }, - { - "id": "sumink/qwft_float16_75279ae06c78ca9f3957ace3780f11dd95435b2b_False", - "model": { - "name": "sumink/qwft", - "sha": "75279ae06c78ca9f3957ace3780f11dd95435b2b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 3.1061410184104887, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.11965252197502627, - "normalized_score": 11.965252197502625 - }, - "bbh": { - "name": "BBH", - "value": 0.30021752093452153, - "normalized_score": 2.872788366329503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3580625, - "normalized_score": 2.024479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11294880319148937, - "normalized_score": 1.4387559101654845 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 0, - "base_model": "sumink/qwft", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3540079479977694 - } - }, - { - "id": "sumink/qwmer_float16_18b1f1a5a4be93ed37fc75c3d832e23324ea4993_False", - "model": { - "name": "sumink/qwmer", - "sha": "18b1f1a5a4be93ed37fc75c3d832e23324ea4993", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.672277450870531, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22124407682726277, - "normalized_score": 22.12440768272628 - }, - "bbh": { - "name": "BBH", - "value": 0.4298800979582788, - "normalized_score": 20.382371995631058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0007552870090634441, - "normalized_score": 0.0755287009063444 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.4031770833333333, - "normalized_score": 9.030468750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.22149268617021275, - "normalized_score": 13.499187352245862 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "sumink/qwmer (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.3534062474193447 - } - }, - { - "id": "sumink/solarmer3_bfloat16_cb294a464c44adf0e4c23ecd50ff1a65be3040e0_False", - "model": { - "name": "sumink/solarmer3", - "sha": "cb294a464c44adf0e4c23ecd50ff1a65be3040e0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.50211421532019, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3741428299135183, - "normalized_score": 37.41428299135184 - }, - "bbh": { - "name": "BBH", - "value": 0.5265990319952963, - "normalized_score": 33.44249367336016 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0581570996978852, - "normalized_score": 5.81570996978852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.44013541666666667, - "normalized_score": 15.050260416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.332280585106383, - "normalized_score": 25.80895390070922 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "sumink/solarmer3 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.419555202086502 - } - }, - { - "id": "sumink/somer_float16_b9b7857618e91e4f16ceeb02546f129f7cead152_False", - "model": { - "name": "sumink/somer", - "sha": "b9b7857618e91e4f16ceeb02546f129f7cead152", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.64702871989658, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.29902990731259727, - "normalized_score": 29.902990731259727 - }, - "bbh": { - "name": "BBH", - "value": 0.519370328606347, - "normalized_score": 31.718258352659863 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.465, - "normalized_score": 18.424999999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3447473404255319, - "normalized_score": 27.19414893617021 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "sumink/somer (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.439342359392516 - } - }, - { - "id": "sumink/somer2_bfloat16_4c274ca4914b34c26772de9dc2af0c7192529c0b_False", - "model": { - "name": "sumink/somer2", - "sha": "4c274ca4914b34c26772de9dc2af0c7192529c0b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.03735671593734, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3132433055404106, - "normalized_score": 31.324330554041055 - }, - "bbh": { - "name": "BBH", - "value": 0.5166793474130525, - "normalized_score": 31.375840981103366 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.46630208333333334, - "normalized_score": 18.654427083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34325132978723405, - "normalized_score": 27.027925531914892 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-15", - "submission_date": "2025-01-15", - "generation": 1, - "base_model": "sumink/somer2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.732, - "co2_cost": 1.5175328638862864 - } - }, - { - "id": "sumink/somerft_bfloat16_41944985c2aa6f7f704c5680859f6f154d931734_False", - "model": { - "name": "sumink/somerft", - "sha": "41944985c2aa6f7f704c5680859f6f154d931734", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 4.941854251868015, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14305819669587805, - "normalized_score": 14.305819669587805 - }, - "bbh": { - "name": "BBH", - "value": 0.3093455213252133, - "normalized_score": 3.6167949511136865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2483221476510067, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.40447916666666667, - "normalized_score": 8.99322916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11170212765957446, - "normalized_score": 1.300236406619384 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-17", - "submission_date": "2025-01-17", - "generation": 0, - "base_model": "sumink/somerft", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.543, - "co2_cost": 1.2931926679579668 - } - }, - { - "id": "sunbaby/BrainCog-8B-0.1-Instruct_bfloat16_6c03cb7af723c7f7785df9eee5d5838247619bee_True", - "model": { - "name": "sunbaby/BrainCog-8B-0.1-Instruct", - "sha": "6c03cb7af723c7f7785df9eee5d5838247619bee", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.380632683786093, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4253004250943053, - "normalized_score": 42.53004250943053 - }, - "bbh": { - "name": "BBH", - "value": 0.46182179983247446, - "normalized_score": 24.283467839476657 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.36559375, - "normalized_score": 6.332552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28582114361702127, - "normalized_score": 20.646793735224584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-31", - "submission_date": "2024-08-27", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.669108790177119 - } - }, - { - "id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_bfloat16_2b6e46e4c9d341dc8bf8350a167492c880116b66_False", - "model": { - "name": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", - "sha": "2b6e46e4c9d341dc8bf8350a167492c880116b66", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.82755308278925, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4815046299374548, - "normalized_score": 48.15046299374548 - }, - "bbh": { - "name": "BBH", - "value": 0.4935698792285044, - "normalized_score": 27.990827697552746 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.43873958333333335, - "normalized_score": 13.242447916666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3723404255319149, - "normalized_score": 30.26004728132387 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-10-25", - "generation": 1, - "base_model": "meta-llama/Meta-Llama-3-8B-Instruct", - "hub_license": "llama3", - "hub_hearts": 24, - "params_billions": 8.03, - "co2_cost": 1.6332776227685775 - } - }, - { - "id": "synergetic/FrankenQwen2.5-14B_bfloat16_24e41619569b50aa44698e0afabbbee30af998bd_True", - "model": { - "name": "synergetic/FrankenQwen2.5-14B", - "sha": "24e41619569b50aa44698e0afabbbee30af998bd", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.126546386620493, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1869472998311148, - "normalized_score": 18.69472998311148 - }, - "bbh": { - "name": "BBH", - "value": 0.6047748435655343, - "normalized_score": 44.273555426330965 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.3842604166666666, - "normalized_score": 5.532552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43816489361702127, - "normalized_score": 37.57387706855792 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-30", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "synergetic/FrankenQwen2.5-14B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 16.972, - "co2_cost": 4.517963472170647 - } - }, - { - "id": "talha2001/Beast-Soul-new_float16_e6cf8caa60264a3005df2ff4b9d967f684519d4b_False", - "model": { - "name": "talha2001/Beast-Soul-new", - "sha": "e6cf8caa60264a3005df2ff4b9d967f684519d4b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.79227783258031, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4853510906616666, - "normalized_score": 48.53510906616666 - }, - "bbh": { - "name": "BBH", - "value": 0.5227143628884523, - "normalized_score": 33.07275916855207 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4459270833333333, - "normalized_score": 14.140885416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3101728723404255, - "normalized_score": 23.352541371158388 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-08-07", - "generation": 1, - "base_model": "talha2001/Beast-Soul-new (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2857659556667733 - } - }, - { - "id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct_bfloat16_3e1429f20007740877c51e44ed63b870a57a2e17_True", - "model": { - "name": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", - "sha": "3e1429f20007740877c51e44ed63b870a57a2e17", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.366498008182751, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15090182936829835, - "normalized_score": 15.090182936829834 - }, - "bbh": { - "name": "BBH", - "value": 0.31434444692284963, - "normalized_score": 3.8421954101765152 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23993288590604026, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37613541666666667, - "normalized_score": 4.850260416666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11087101063829788, - "normalized_score": 1.2078900709219857 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-08-29", - "generation": 1, - "base_model": "pints-ai/1.5-Pints-16K-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.5, - "co2_cost": 0.5908680116894098 - } - }, - { - "id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct_bfloat16_5c229e26f3ab3d0f0f613ed242f3f0f57c930155_True", - "model": { - "name": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", - "sha": "5c229e26f3ab3d0f0f613ed242f3f0f57c930155", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.745857038769314, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1724092075692496, - "normalized_score": 17.24092075692496 - }, - "bbh": { - "name": "BBH", - "value": 0.3158349391752727, - "normalized_score": 4.0802054869970155 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3642916666666667, - "normalized_score": 4.569791666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11170212765957446, - "normalized_score": 1.300236406619384 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-15", - "generation": 1, - "base_model": "pints-ai/1.5-Pints-16K-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.5, - "co2_cost": 0.5956229144427482 - } - }, - { - "id": "tanliboy/lambda-gemma-2-9b-dpo_float16_b141471308bc41ffe15180a6668c735396c3949b_True", - "model": { - "name": "tanliboy/lambda-gemma-2-9b-dpo", - "sha": "b141471308bc41ffe15180a6668c735396c3949b", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.91040441700333, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45008023156336296, - "normalized_score": 45.0080231563363 - }, - "bbh": { - "name": "BBH", - "value": 0.547172399190412, - "normalized_score": 35.554545346782085 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09441087613293052, - "normalized_score": 9.441087613293051 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.40165625, - "normalized_score": 7.940364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.379155585106383, - "normalized_score": 31.017287234042552 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-09-18", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 4.4831744963862326 - } - }, - { - "id": "tanliboy/lambda-gemma-2-9b-dpo_bfloat16_b141471308bc41ffe15180a6668c735396c3949b_True", - "model": { - "name": "tanliboy/lambda-gemma-2-9b-dpo", - "sha": "b141471308bc41ffe15180a6668c735396c3949b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.97010860262216, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18292463995531855, - "normalized_score": 18.292463995531854 - }, - "bbh": { - "name": "BBH", - "value": 0.5487911206515993, - "normalized_score": 35.73966330720827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.40562499999999996, - "normalized_score": 8.569791666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3804853723404255, - "normalized_score": 31.165041371158388 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-09-18", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 9.242, - "co2_cost": 2.9035758929213187 - } - }, - { - "id": "tanliboy/lambda-qwen2.5-14b-dpo-test_bfloat16_96607eea3c67f14f73e576580610dba7530c5dd9_True", - "model": { - "name": "tanliboy/lambda-qwen2.5-14b-dpo-test", - "sha": "96607eea3c67f14f73e576580610dba7530c5dd9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.617400826626636, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8231215397367873, - "normalized_score": 82.31215397367873 - }, - "bbh": { - "name": "BBH", - "value": 0.6393505282981286, - "normalized_score": 48.454439828605324 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5460725075528701, - "normalized_score": 54.607250755287005 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3624161073825503, - "normalized_score": 14.988814317673373 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 12.587239583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4847905585106383, - "normalized_score": 42.75450650118203 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-20", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 14.77, - "co2_cost": 3.6014865819313373 - } - }, - { - "id": "tanliboy/lambda-qwen2.5-32b-dpo-test_bfloat16_675b60d6e859455a6139e6e284bbe1844b8ddf46_True", - "model": { - "name": "tanliboy/lambda-qwen2.5-32b-dpo-test", - "sha": "675b60d6e859455a6139e6e284bbe1844b8ddf46", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 45.924592588188716, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8083839767372794, - "normalized_score": 80.83839767372794 - }, - "bbh": { - "name": "BBH", - "value": 0.6763904009446838, - "normalized_score": 54.40796058706255 - }, - "math": { - "name": "MATH Level 5", - "value": 0.6102719033232629, - "normalized_score": 61.027190332326285 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.42742708333333335, - "normalized_score": 13.328385416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.565658244680851, - "normalized_score": 51.739804964539005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-30", - "generation": 2, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 32.764, - "co2_cost": 10.99860689314531 - } - }, - { - "id": "tannedbum/Ellaria-9B_float16_087b263326da56de637912814bc7073b83b8d59a_True", - "model": { - "name": "tannedbum/Ellaria-9B", - "sha": "087b263326da56de637912814bc7073b83b8d59a", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.04997199099277, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7825802204816554, - "normalized_score": 78.25802204816554 - }, - "bbh": { - "name": "BBH", - "value": 0.5942102115140485, - "normalized_score": 41.72156106008432 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20770392749244712, - "normalized_score": 20.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.4151458333333333, - "normalized_score": 10.859895833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42054521276595747, - "normalized_score": 35.61613475177305 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-04", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "tannedbum/Ellaria-9B (Merge)", - "hub_license": "", - "hub_hearts": 17, - "params_billions": 10.159, - "co2_cost": 3.7142266237103225 - } - }, - { - "id": "tannedbum/L3-Nymeria-Maid-8B_float16_17cf2c77399d63638254353ac86adf5692b79c62_True", - "model": { - "name": "tannedbum/L3-Nymeria-Maid-8B", - "sha": "17cf2c77399d63638254353ac86adf5692b79c62", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.043175935330492, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7250029920610646, - "normalized_score": 72.50029920610646 - }, - "bbh": { - "name": "BBH", - "value": 0.5146055785516804, - "normalized_score": 31.240944775904463 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.37505208333333334, - "normalized_score": 6.481510416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37466755319148937, - "normalized_score": 30.5186170212766 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-21", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "tannedbum/L3-Nymeria-Maid-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 12, - "params_billions": 8.03, - "co2_cost": 0.9115967213637292 - } - }, - { - "id": "tannedbum/L3-Nymeria-v2-8B_float16_6f0f2526cc89c9d749b850c3e1c3484db92e5c3b_True", - "model": { - "name": "tannedbum/L3-Nymeria-v2-8B", - "sha": "6f0f2526cc89c9d749b850c3e1c3484db92e5c3b", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.709418052441055, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7168346653545925, - "normalized_score": 71.68346653545925 - }, - "bbh": { - "name": "BBH", - "value": 0.5224198261531375, - "normalized_score": 32.262543662000034 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.369875, - "normalized_score": 5.134375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37533244680851063, - "normalized_score": 30.592494089834517 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-29", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "tannedbum/L3-Nymeria-v2-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 15, - "params_billions": 8.03, - "co2_cost": 0.9849768481433717 - } - }, - { - "id": "tannedbum/L3-Rhaenys-8B_float16_a159e2aabf9d6ef31444dc46c3dce9fdadca77d9_True", - "model": { - "name": "tannedbum/L3-Rhaenys-8B", - "sha": "a159e2aabf9d6ef31444dc46c3dce9fdadca77d9", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.454823240339877, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7362686560548235, - "normalized_score": 73.62686560548235 - }, - "bbh": { - "name": "BBH", - "value": 0.5299209893116719, - "normalized_score": 33.137944169076256 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2978187919463087, - "normalized_score": 6.375838926174497 - }, - "musr": { - "name": "MUSR", - "value": 0.3724791666666667, - "normalized_score": 5.726562500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3799035904255319, - "normalized_score": 31.100398936170215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-31", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "tannedbum/L3-Rhaenys-8B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 5, - "params_billions": 8.03, - "co2_cost": 1.095765884407275 - } - }, - { - "id": "teknium/CollectiveCognition-v1.1-Mistral-7B_float16_5f57f70ec99450c70da2540e94dd7fd67be4b23c_False", - "model": { - "name": "teknium/CollectiveCognition-v1.1-Mistral-7B", - "sha": "5f57f70ec99450c70da2540e94dd7fd67be4b23c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 14.256397052482129, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27904626391308396, - "normalized_score": 27.904626391308398 - }, - "bbh": { - "name": "BBH", - "value": 0.4493426704276236, - "normalized_score": 23.47613361696592 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.3869270833333333, - "normalized_score": 5.732552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28366023936170215, - "normalized_score": 20.40669326241135 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-04", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 79, - "params_billions": 7.0, - "co2_cost": 0.8586361225619709 - } - }, - { - "id": "teknium/OpenHermes-13B_bfloat16_bcad6fff9f8591e091d2d57356a3f102197e8c5f_False", - "model": { - "name": "teknium/OpenHermes-13B", - "sha": "bcad6fff9f8591e091d2d57356a3f102197e8c5f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.182264325006635, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2668065178171696, - "normalized_score": 26.680651781716954 - }, - "bbh": { - "name": "BBH", - "value": 0.42064384521911524, - "normalized_score": 18.21332824040884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.4042604166666666, - "normalized_score": 8.532552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23894614361702127, - "normalized_score": 15.438460401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-06", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "NousResearch/Llama-2-13b-hf", - "hub_license": "mit", - "hub_hearts": 55, - "params_billions": 13.0, - "co2_cost": 62.23823339896335 - } - }, - { - "id": "teknium/OpenHermes-2-Mistral-7B_bfloat16_4c6e34123b140ce773a8433cae5410949289102c_True", - "model": { - "name": "teknium/OpenHermes-2-Mistral-7B", - "sha": "4c6e34123b140ce773a8433cae5410949289102c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.44047612236278, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5286151854856226, - "normalized_score": 52.86151854856226 - }, - "bbh": { - "name": "BBH", - "value": 0.4947516371878204, - "normalized_score": 29.251839211223313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.45197916666666665, - "normalized_score": 16.064062499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2931349734042553, - "normalized_score": 21.4594414893617 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-12", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 255, - "params_billions": 7.0, - "co2_cost": 0.9500596701618965 - } - }, - { - "id": "teknium/OpenHermes-2.5-Mistral-7B_bfloat16_24c0bea14d53e6f67f1fbe2eca5bfe7cae389b33_True", - "model": { - "name": "teknium/OpenHermes-2.5-Mistral-7B", - "sha": "24c0bea14d53e6f67f1fbe2eca5bfe7cae389b33", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.317189027423062, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5571417173100706, - "normalized_score": 55.71417173100706 - }, - "bbh": { - "name": "BBH", - "value": 0.4870013259924984, - "normalized_score": 27.770026367807578 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05060422960725076, - "normalized_score": 5.0604229607250755 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.4241979166666667, - "normalized_score": 12.058072916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3054355053191489, - "normalized_score": 22.826167257683213 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-10-29", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 830, - "params_billions": 7.242, - "co2_cost": 0.9455666006211318 - } - }, - { - "id": "teknium/OpenHermes-7B_float16_9f55d6eb15f1edd52ee1fd863a220aa682e78a00_False", - "model": { - "name": "teknium/OpenHermes-7B", - "sha": "9f55d6eb15f1edd52ee1fd863a220aa682e78a00", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.569248719177576, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1812513021006485, - "normalized_score": 18.12513021006485 - }, - "bbh": { - "name": "BBH", - "value": 0.362033648602934, - "normalized_score": 12.08139546207365 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.4323854166666667, - "normalized_score": 12.681510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19331781914893617, - "normalized_score": 10.368646572104018 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-09-14", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "NousResearch/Llama-2-7b-hf", - "hub_license": "mit", - "hub_hearts": 13, - "params_billions": 7.0, - "co2_cost": 4.966179505229206 - } - }, - { - "id": "tensopolis/falcon3-10b-tensopolis-v1_bfloat16_39f61a967cedf5db5de6229284eeffde9b4ede83_True", - "model": { - "name": "tensopolis/falcon3-10b-tensopolis-v1", - "sha": "39f61a967cedf5db5de6229284eeffde9b4ede83", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.588967109820835, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7816560060639104, - "normalized_score": 78.16560060639105 - }, - "bbh": { - "name": "BBH", - "value": 0.618226655000786, - "normalized_score": 45.059280249567784 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27492447129909364, - "normalized_score": 27.492447129909365 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3296979865771812, - "normalized_score": 10.626398210290827 - }, - "musr": { - "name": "MUSR", - "value": 0.43753125, - "normalized_score": 14.191406249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4419880319148936, - "normalized_score": 37.99867021276595 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "tensopolis/falcon3-10b-tensopolis-v1 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 10.0902986546538 - } - }, - { - "id": "tensopolis/falcon3-10b-tensopolis-v2_bfloat16_acff1f3390161a0ffe72ba89b59ee47a04622419_True", - "model": { - "name": "tensopolis/falcon3-10b-tensopolis-v2", - "sha": "acff1f3390161a0ffe72ba89b59ee47a04622419", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.190439950096184, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7792080568447275, - "normalized_score": 77.92080568447275 - }, - "bbh": { - "name": "BBH", - "value": 0.618226655000786, - "normalized_score": 45.046927308391304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26661631419939574, - "normalized_score": 26.661631419939575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3271812080536913, - "normalized_score": 10.290827740492169 - }, - "musr": { - "name": "MUSR", - "value": 0.4296875, - "normalized_score": 13.177604166666661 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4424035904255319, - "normalized_score": 38.04484338061466 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "tensopolis/falcon3-10b-tensopolis-v2 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 0.8575561467328316 - } - }, - { - "id": "tensopolis/lamarckvergence-14b-tensopolis-v1_bfloat16_4125c5592b0131d408a994b0c6a13ce857bc8951_True", - "model": { - "name": "tensopolis/lamarckvergence-14b-tensopolis-v1", - "sha": "4125c5592b0131d408a994b0c6a13ce857bc8951", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.91732437905944, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7603735865281896, - "normalized_score": 76.03735865281897 - }, - "bbh": { - "name": "BBH", - "value": 0.6561154329558933, - "normalized_score": 50.983494714854295 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5166163141993958, - "normalized_score": 51.66163141993958 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36073825503355705, - "normalized_score": 14.76510067114094 - }, - "musr": { - "name": "MUSR", - "value": 0.44745833333333335, - "normalized_score": 16.832291666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5250166223404256, - "normalized_score": 47.224069148936174 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-10", - "generation": 1, - "base_model": "tensopolis/lamarckvergence-14b-tensopolis-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 3.1100675280897367 - } - }, - { - "id": "tensopolis/mistral-small-2501-tensopolis-v1_bfloat16_c5e5dec7262c0174932627029df32225ddfa77f4_True", - "model": { - "name": "tensopolis/mistral-small-2501-tensopolis-v1", - "sha": "c5e5dec7262c0174932627029df32225ddfa77f4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 39.245150466059876, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7762104549262623, - "normalized_score": 77.62104549262622 - }, - "bbh": { - "name": "BBH", - "value": 0.6474735931872574, - "normalized_score": 48.693238200527865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.44410876132930516, - "normalized_score": 44.41087613293052 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.42797916666666663, - "normalized_score": 11.930729166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4464760638297872, - "normalized_score": 38.49734042553191 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-08", - "generation": 1, - "base_model": "tensopolis/mistral-small-2501-tensopolis-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 23.572, - "co2_cost": 1.3811453118741102 - } - }, - { - "id": "tensopolis/mistral-small-r1-tensopolis_bfloat16_d7256a2ea0f80a2294177ef53d8ad596b8cf3d68_False", - "model": { - "name": "tensopolis/mistral-small-r1-tensopolis", - "sha": "d7256a2ea0f80a2294177ef53d8ad596b8cf3d68", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 25.876977987240537, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.462220242290456, - "normalized_score": 46.2220242290456 - }, - "bbh": { - "name": "BBH", - "value": 0.5435969591888976, - "normalized_score": 34.60228302469567 - }, - "math": { - "name": "MATH Level 5", - "value": 0.290785498489426, - "normalized_score": 29.078549848942597 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.37375, - "normalized_score": 7.385416666666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4035073138297872, - "normalized_score": 33.72303486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-02-24", - "generation": 2, - "base_model": "mistralai/Mistral-Small-24B-Instruct-2501 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 23.572, - "co2_cost": 1.4434002419959506 - } - }, - { - "id": "tensopolis/phi-4-tensopolis-v1_bfloat16_5890f8b5f7e4a040b8dd5fbcfcc45f4311ac2873_True", - "model": { - "name": "tensopolis/phi-4-tensopolis-v1", - "sha": "5890f8b5f7e4a040b8dd5fbcfcc45f4311ac2873", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.45533321662008, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6766679078179231, - "normalized_score": 67.66679078179232 - }, - "bbh": { - "name": "BBH", - "value": 0.6871833310149728, - "normalized_score": 55.036574654959814 - }, - "math": { - "name": "MATH Level 5", - "value": 0.49395770392749244, - "normalized_score": 49.39577039274924 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.4140625, - "normalized_score": 10.624479166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5383976063829787, - "normalized_score": 48.71084515366431 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-07", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "tensopolis/phi-4-tensopolis-v1 (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 14.66, - "co2_cost": 2.7062156886278963 - } - }, - { - "id": "tensopolis/qwen2.5-14b-tensopolis-v1_bfloat16_34f20381d4b43bbbd288f9a1f81a26bcbdcbd3c8_True", - "model": { - "name": "tensopolis/qwen2.5-14b-tensopolis-v1", - "sha": "34f20381d4b43bbbd288f9a1f81a26bcbdcbd3c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.14159022263494, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7990166092634211, - "normalized_score": 79.90166092634212 - }, - "bbh": { - "name": "BBH", - "value": 0.6363595324538928, - "normalized_score": 47.96504993841635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5294561933534743, - "normalized_score": 52.94561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3347315436241611, - "normalized_score": 11.297539149888143 - }, - "musr": { - "name": "MUSR", - "value": 0.41933333333333334, - "normalized_score": 11.283333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49110704787234044, - "normalized_score": 43.456338652482266 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "tensopolis/qwen2.5-14b-tensopolis-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.77, - "co2_cost": 11.845475420900796 - } - }, - { - "id": "tensopolis/qwen2.5-3b-or1-tensopolis_bfloat16_3f7cee86465d2518298e835f93b1b904a5a799d7_False", - "model": { - "name": "tensopolis/qwen2.5-3b-or1-tensopolis", - "sha": "3f7cee86465d2518298e835f93b1b904a5a799d7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 18.28025106583868, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35400958346077294, - "normalized_score": 35.400958346077296 - }, - "bbh": { - "name": "BBH", - "value": 0.44214988544006467, - "normalized_score": 22.108987593280478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.37492708333333336, - "normalized_score": 4.532552083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3197307180851064, - "normalized_score": 24.414524231678485 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-02-24", - "generation": 3, - "base_model": "Qwen/Qwen2.5-3B", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 3.086, - "co2_cost": 1.5197455737492314 - } - }, - { - "id": "tensopolis/qwen2.5-7b-tensopolis-v1_bfloat16_2cbc0765ff33ec948d54f305640f4db262221493_True", - "model": { - "name": "tensopolis/qwen2.5-7b-tensopolis-v1", - "sha": "2cbc0765ff33ec948d54f305640f4db262221493", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.49167672948769, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7660939640154789, - "normalized_score": 76.60939640154788 - }, - "bbh": { - "name": "BBH", - "value": 0.5378740884658956, - "normalized_score": 34.78352811189759 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4561933534743202, - "normalized_score": 45.61933534743202 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.433875, - "normalized_score": 13.467708333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42686170212765956, - "normalized_score": 36.31796690307328 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-03-07", - "generation": 1, - "base_model": "tensopolis/qwen2.5-7b-tensopolis-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 8.295151060589886 - } - }, - { - "id": "tensopolis/qwen2.5-7b-tensopolis-v2_bfloat16_d81d4112a0bebd9d1262e94136a330beac86299b_True", - "model": { - "name": "tensopolis/qwen2.5-7b-tensopolis-v2", - "sha": "d81d4112a0bebd9d1262e94136a330beac86299b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.37814971872235, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.752105524452896, - "normalized_score": 75.21055244528961 - }, - "bbh": { - "name": "BBH", - "value": 0.5414622323974015, - "normalized_score": 35.224120159606855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4818731117824773, - "normalized_score": 48.18731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2902684563758389, - "normalized_score": 5.369127516778524 - }, - "musr": { - "name": "MUSR", - "value": 0.42463541666666665, - "normalized_score": 12.24609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42428523936170215, - "normalized_score": 36.03169326241135 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "tensopolis/qwen2.5-7b-tensopolis-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 4.501444779713065 - } - }, - { - "id": "tensopolis/virtuoso-lite-tensopolis-v1_bfloat16_84293adf3f10f19f1c6e735633f0cc45b457ac62_True", - "model": { - "name": "tensopolis/virtuoso-lite-tensopolis-v1", - "sha": "84293adf3f10f19f1c6e735633f0cc45b457ac62", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.38947458816892, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.806910109620252, - "normalized_score": 80.6910109620252 - }, - "bbh": { - "name": "BBH", - "value": 0.610185430846048, - "normalized_score": 43.94133513211286 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2545317220543807, - "normalized_score": 25.45317220543807 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.4582395833333333, - "normalized_score": 17.446614583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4434840425531915, - "normalized_score": 38.16489361702128 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-03-08", - "generation": 1, - "base_model": "tensopolis/virtuoso-lite-tensopolis-v1 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 6.852792974488292 - } - }, - { - "id": "tensopolis/virtuoso-lite-tensopolis-v2_bfloat16_427b560bfaa1d5c1993bcc743d9e4b8695b1a053_True", - "model": { - "name": "tensopolis/virtuoso-lite-tensopolis-v2", - "sha": "427b560bfaa1d5c1993bcc743d9e4b8695b1a053", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.256178767362854, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8029384255996312, - "normalized_score": 80.29384255996311 - }, - "bbh": { - "name": "BBH", - "value": 0.6100187641793813, - "normalized_score": 43.93077957655731 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25, - "normalized_score": 25.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.4595416666666667, - "normalized_score": 17.676041666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44398271276595747, - "normalized_score": 38.22030141843972 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-06", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "tensopolis/virtuoso-lite-tensopolis-v2 (Merge)", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 10.306, - "co2_cost": 6.034670836864486 - } - }, - { - "id": "tensopolis/virtuoso-small-tensopolis-v1_bfloat16_aae1313e07e91002106d55f893978669f9b19389_True", - "model": { - "name": "tensopolis/virtuoso-small-tensopolis-v1", - "sha": "aae1313e07e91002106d55f893978669f9b19389", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 38.413589685550576, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7856276900845313, - "normalized_score": 78.56276900845313 - }, - "bbh": { - "name": "BBH", - "value": 0.6415395136436205, - "normalized_score": 48.17122576570145 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3527190332326284, - "normalized_score": 35.27190332326284 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.43263541666666666, - "normalized_score": 13.979427083333329 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4968417553191489, - "normalized_score": 44.093528368794324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-28", - "generation": 1, - "base_model": "tensopolis/virtuoso-small-tensopolis-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.667430653436983 - } - }, - { - "id": "tensopolis/virtuoso-small-tensopolis-v2_bfloat16_425502ec46cc5752412693ee7eea7f049088f904_True", - "model": { - "name": "tensopolis/virtuoso-small-tensopolis-v2", - "sha": "425502ec46cc5752412693ee7eea7f049088f904", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.113839873543775, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8020142111818863, - "normalized_score": 80.20142111818863 - }, - "bbh": { - "name": "BBH", - "value": 0.6515835977499008, - "normalized_score": 50.22972026624839 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38746223564954685, - "normalized_score": 38.74622356495468 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.43523958333333335, - "normalized_score": 14.83828125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.515375664893617, - "normalized_score": 46.15285165484633 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-29", - "submission_date": "2025-02-17", - "generation": 1, - "base_model": "tensopolis/virtuoso-small-tensopolis-v2 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.77, - "co2_cost": 4.714171092623901 - } - }, - { - "id": "tensopolis/virtuoso-small-v2-tensopolis-v1_bfloat16_71706588945ec9ad97f9d39e529e00d96d0119b3_True", - "model": { - "name": "tensopolis/virtuoso-small-v2-tensopolis-v1", - "sha": "71706588945ec9ad97f9d39e529e00d96d0119b3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.6978958480128, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8419061423689145, - "normalized_score": 84.19061423689145 - }, - "bbh": { - "name": "BBH", - "value": 0.6544753426578069, - "normalized_score": 50.96603012167689 - }, - "math": { - "name": "MATH Level 5", - "value": 0.452416918429003, - "normalized_score": 45.2416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.45092708333333337, - "normalized_score": 16.532552083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5175365691489362, - "normalized_score": 46.39295212765958 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-01", - "submission_date": "2025-03-09", - "generation": 1, - "base_model": "tensopolis/virtuoso-small-v2-tensopolis-v1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 14.766, - "co2_cost": 28.932246678908278 - } - }, - { - "id": "tensoropera/Fox-1-1.6B_bfloat16_6389dde4d7e52aa1200ad954c565f03c7fdcf8db_False", - "model": { - "name": "tensoropera/Fox-1-1.6B", - "sha": "6389dde4d7e52aa1200ad954c565f03c7fdcf8db", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.764365648440015, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27659831469390106, - "normalized_score": 27.659831469390106 - }, - "bbh": { - "name": "BBH", - "value": 0.3307369914593792, - "normalized_score": 7.399760932518088 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.35498958333333336, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1371343085106383, - "normalized_score": 4.12603427895981 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-13", - "submission_date": "2024-06-29", - "generation": 0, - "base_model": "tensoropera/Fox-1-1.6B", - "hub_license": "apache-2.0", - "hub_hearts": 31, - "params_billions": 1.665, - "co2_cost": 2.6856405963989034 - } - }, - { - "id": "tenyx/Llama3-TenyxChat-70B_bfloat16_a85d31e3af8fcc847cc9169f1144cf02f5351fab_True", - "model": { - "name": "tenyx/Llama3-TenyxChat-70B", - "sha": "a85d31e3af8fcc847cc9169f1144cf02f5351fab", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.69601460825378, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8087086707713311, - "normalized_score": 80.87086707713311 - }, - "bbh": { - "name": "BBH", - "value": 0.6511486901811531, - "normalized_score": 49.61562001611543 - }, - "math": { - "name": "MATH Level 5", - "value": 0.23564954682779457, - "normalized_score": 23.564954682779458 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.42603125000000003, - "normalized_score": 12.520572916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5210272606382979, - "normalized_score": 46.780806737588655 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-08-04", - "generation": 0, - "base_model": "tenyx/Llama3-TenyxChat-70B", - "hub_license": "llama3", - "hub_hearts": 64, - "params_billions": 70.554, - "co2_cost": 18.734013036098165 - } - }, - { - "id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106_bfloat16_3e2c48344212ca7a3c71b020bc785dd9f0919a7f_True", - "model": { - "name": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", - "sha": "3e2c48344212ca7a3c71b020bc785dd9f0919a7f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.330798858289842, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6101477413263474, - "normalized_score": 61.01477413263474 - }, - "bbh": { - "name": "BBH", - "value": 0.5007976986224548, - "normalized_score": 28.938504045379137 - }, - "math": { - "name": "MATH Level 5", - "value": 0.38821752265861026, - "normalized_score": 38.82175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29194630872483224, - "normalized_score": 5.592841163310966 - }, - "musr": { - "name": "MUSR", - "value": 0.4072708333333333, - "normalized_score": 9.475520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33527260638297873, - "normalized_score": 26.14140070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-08", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.3240648984274381 - } - }, - { - "id": "theprint/Boptruth-Agatha-7B_float16_ef7c7570be29a58f4a8358a6d4c75f59a5282191_False", - "model": { - "name": "theprint/Boptruth-Agatha-7B", - "sha": "ef7c7570be29a58f4a8358a6d4c75f59a5282191", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 17.51238060668224, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.312418826491487, - "normalized_score": 31.241882649148703 - }, - "bbh": { - "name": "BBH", - "value": 0.4983936045348778, - "normalized_score": 29.286422282807493 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05513595166163142, - "normalized_score": 5.5135951661631415 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.42766666666666664, - "normalized_score": 11.758333333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28607047872340424, - "normalized_score": 20.674497635933804 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-11", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "theprint/Boptruth-Agatha-7B", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.776210317612061 - } - }, - { - "id": "theprint/CleverBoi-7B-v2_bfloat16_1d82629c1e6778cf8568b532a3c09b668805b15a_False", - "model": { - "name": "theprint/CleverBoi-7B-v2", - "sha": "1d82629c1e6778cf8568b532a3c09b668805b15a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 15.095914857389625, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21699756645700075, - "normalized_score": 21.699756645700074 - }, - "bbh": { - "name": "BBH", - "value": 0.45317253321634526, - "normalized_score": 23.44418148733149 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.46953125, - "normalized_score": 18.65807291666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27086103723404253, - "normalized_score": 18.98455969267139 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-13", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.736, - "co2_cost": 3.0447950669431973 - } - }, - { - "id": "theprint/CleverBoi-7B-v3_float16_1d82629c1e6778cf8568b532a3c09b668805b15a_False", - "model": { - "name": "theprint/CleverBoi-7B-v3", - "sha": "1d82629c1e6778cf8568b532a3c09b668805b15a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 13.690467425790892, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23823011830831084, - "normalized_score": 23.823011830831085 - }, - "bbh": { - "name": "BBH", - "value": 0.4414430902840938, - "normalized_score": 21.93674717909172 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26593959731543626, - "normalized_score": 2.1252796420581683 - }, - "musr": { - "name": "MUSR", - "value": 0.4071770833333333, - "normalized_score": 9.497135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28681848404255317, - "normalized_score": 20.75760933806146 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-14", - "submission_date": "2024-09-22", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.736, - "co2_cost": 3.205779635795906 - } - }, - { - "id": "theprint/CleverBoi-Llama-3.1-8B-Instruct_bfloat16_3514c510ea4ba4d650522f467d4d0cef7de4a43c_False", - "model": { - "name": "theprint/CleverBoi-Llama-3.1-8B-Instruct", - "sha": "3514c510ea4ba4d650522f467d4d0cef7de4a43c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 13.970394725475051, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16816269719898758, - "normalized_score": 16.816269719898756 - }, - "bbh": { - "name": "BBH", - "value": 0.4559618469185147, - "normalized_score": 24.048603081139294 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.40143750000000006, - "normalized_score": 8.279687500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30751329787234044, - "normalized_score": 23.057033096926716 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-27", - "submission_date": "2024-09-13", - "generation": 3, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 16.061, - "co2_cost": 3.7404451902810583 - } - }, - { - "id": "theprint/CleverBoi-Llama-3.1-8B-v2_float16_a8b0fc584b10e0110e04f9d21c7f10d24391c1d5_False", - "model": { - "name": "theprint/CleverBoi-Llama-3.1-8B-v2", - "sha": "a8b0fc584b10e0110e04f9d21c7f10d24391c1d5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 14.145587569893708, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19613957632415324, - "normalized_score": 19.613957632415325 - }, - "bbh": { - "name": "BBH", - "value": 0.46678160110644784, - "normalized_score": 24.132844977310707 - }, - "math": { - "name": "MATH Level 5", - "value": 0.052870090634441085, - "normalized_score": 5.287009063444108 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2860738255033557, - "normalized_score": 4.809843400447425 - }, - "musr": { - "name": "MUSR", - "value": 0.37346875, - "normalized_score": 6.716927083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31881648936170215, - "normalized_score": 24.31294326241135 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-15", - "submission_date": "2024-09-22", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 9.3, - "co2_cost": 5.042758552185006 - } - }, - { - "id": "theprint/CleverBoi-Nemo-12B-v2_bfloat16_cd1f9ee1c484f857bb0e5ae6aac37dc434911f10_False", - "model": { - "name": "theprint/CleverBoi-Nemo-12B-v2", - "sha": "cd1f9ee1c484f857bb0e5ae6aac37dc434911f10", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 17.858393307746045, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2045827293802666, - "normalized_score": 20.45827293802666 - }, - "bbh": { - "name": "BBH", - "value": 0.5241085887165254, - "normalized_score": 31.65269522562221 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.313758389261745, - "normalized_score": 8.501118568232664 - }, - "musr": { - "name": "MUSR", - "value": 0.4186770833333333, - "normalized_score": 11.434635416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3228058510638298, - "normalized_score": 24.756205673758867 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-24", - "generation": 1, - "base_model": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 13.933, - "co2_cost": 7.011026557852458 - } - }, - { - "id": "theprint/Code-Llama-Bagel-8B_bfloat16_7fa415f3f758ab7930d7e1df27b2d16207513125_False", - "model": { - "name": "theprint/Code-Llama-Bagel-8B", - "sha": "7fa415f3f758ab7930d7e1df27b2d16207513125", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.665251333761097, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2529676813078188, - "normalized_score": 25.29676813078188 - }, - "bbh": { - "name": "BBH", - "value": 0.46974200049001086, - "normalized_score": 25.33815455229531 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06117824773413897, - "normalized_score": 6.117824773413897 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3679791666666667, - "normalized_score": 7.530729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28216422872340424, - "normalized_score": 20.240469858156025 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-21", - "submission_date": "2024-09-13", - "generation": 1, - "base_model": "theprint/Code-Llama-Bagel-8B (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6360934056784031 - } - }, - { - "id": "theprint/Conversely-Mistral-7B_bfloat16_d8cadc02ac76bd617a919d50b092e59d2d110aff_False", - "model": { - "name": "theprint/Conversely-Mistral-7B", - "sha": "d8cadc02ac76bd617a919d50b092e59d2d110aff", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 15.032655723513159, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2608113139802391, - "normalized_score": 26.08113139802391 - }, - "bbh": { - "name": "BBH", - "value": 0.4672348146697077, - "normalized_score": 25.70696557300103 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.4188958333333333, - "normalized_score": 10.628645833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28257978723404253, - "normalized_score": 20.286643026004725 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-05", - "submission_date": "2024-12-07", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.496, - "co2_cost": 2.0739591416532064 - } - }, - { - "id": "theprint/Llama-3.2-3B-VanRossum_bfloat16_7048abecd492a1f5d53981cb175431ec01bbced0_False", - "model": { - "name": "theprint/Llama-3.2-3B-VanRossum", - "sha": "7048abecd492a1f5d53981cb175431ec01bbced0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 17.58480896190996, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4782820693537591, - "normalized_score": 47.82820693537591 - }, - "bbh": { - "name": "BBH", - "value": 0.42787418229776697, - "normalized_score": 19.366361887075996 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09743202416918428, - "normalized_score": 9.743202416918429 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3441666666666667, - "normalized_score": 6.554166666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27701130319148937, - "normalized_score": 19.66792257683215 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-14", - "submission_date": "2024-11-14", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.696, - "co2_cost": 3.7091767505923166 - } - }, - { - "id": "theprint/ReWiz-7B_bfloat16_d9f28e67d52181d1478e7788e3edf252f5bf32a8_False", - "model": { - "name": "theprint/ReWiz-7B", - "sha": "d9f28e67d52181d1478e7788e3edf252f5bf32a8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 17.7870406487212, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.40479261692309737, - "normalized_score": 40.479261692309734 - }, - "bbh": { - "name": "BBH", - "value": 0.4564215411912313, - "normalized_score": 23.50442985462492 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04078549848942598, - "normalized_score": 4.078549848942599 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.46115625, - "normalized_score": 16.744531249999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2670378989361702, - "normalized_score": 18.559766548463354 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-08", - "submission_date": "2024-10-08", - "generation": 3, - "base_model": "mistralai/Mistral-7B-v0.3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.736, - "co2_cost": 2.8908113661516803 - } - }, - { - "id": "theprint/ReWiz-Llama-3.1-8B-v2_float16_a8b0fc584b10e0110e04f9d21c7f10d24391c1d5_False", - "model": { - "name": "theprint/ReWiz-Llama-3.1-8B-v2", - "sha": "a8b0fc584b10e0110e04f9d21c7f10d24391c1d5", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 15.893327785890378, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23790542427425895, - "normalized_score": 23.790542427425898 - }, - "bbh": { - "name": "BBH", - "value": 0.46324275457450953, - "normalized_score": 23.773287089432596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05740181268882175, - "normalized_score": 5.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3028523489932886, - "normalized_score": 7.046979865771815 - }, - "musr": { - "name": "MUSR", - "value": 0.381375, - "normalized_score": 9.33854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3310339095744681, - "normalized_score": 25.67043439716312 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-02", - "submission_date": "2024-11-03", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 9.3, - "co2_cost": 4.70618045737013 - } - }, - { - "id": "theprint/ReWiz-Llama-3.2-3B_float16_e6aed95ad8f104f105b8423cd5f87c75705a828c_False", - "model": { - "name": "theprint/ReWiz-Llama-3.2-3B", - "sha": "e6aed95ad8f104f105b8423cd5f87c75705a828c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.186254213130717, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4648931501748693, - "normalized_score": 46.48931501748693 - }, - "bbh": { - "name": "BBH", - "value": 0.4343257577815292, - "normalized_score": 19.293728122396512 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1095166163141994, - "normalized_score": 10.95166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.361375, - "normalized_score": 6.938541666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28873005319148937, - "normalized_score": 20.970005910165483 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-18", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "theprint/ReWiz-Llama-3.2-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 3.213, - "co2_cost": 1.9638951510232394 - } - }, - { - "id": "theprint/ReWiz-Nemo-12B-Instruct_float16_6f8ea24f8d19b48850d68bef1b5c50837d37761b_False", - "model": { - "name": "theprint/ReWiz-Nemo-12B-Instruct", - "sha": "6f8ea24f8d19b48850d68bef1b5c50837d37761b", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.173142405102812, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.10623811486854878, - "normalized_score": 10.623811486854878 - }, - "bbh": { - "name": "BBH", - "value": 0.5092407647626753, - "normalized_score": 29.926389365821837 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3238255033557047, - "normalized_score": 9.843400447427292 - }, - "musr": { - "name": "MUSR", - "value": 0.4095625, - "normalized_score": 10.22864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33394281914893614, - "normalized_score": 25.993646572104012 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-31", - "submission_date": "2024-11-02", - "generation": 1, - "base_model": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 12.248, - "co2_cost": 2.3501045504302636 - } - }, - { - "id": "theprint/ReWiz-Qwen-2.5-14B_bfloat16_e5524628f15c30d7542427c53a565e6e2d3ff760_False", - "model": { - "name": "theprint/ReWiz-Qwen-2.5-14B", - "sha": "e5524628f15c30d7542427c53a565e6e2d3ff760", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 30.03173400795292, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.27854647889821227, - "normalized_score": 27.85464788982123 - }, - "bbh": { - "name": "BBH", - "value": 0.6179492756426455, - "normalized_score": 44.86187336352475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.29229607250755285, - "normalized_score": 29.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3800335570469799, - "normalized_score": 17.337807606263986 - }, - "musr": { - "name": "MUSR", - "value": 0.45389583333333333, - "normalized_score": 15.436979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5092253989361702, - "normalized_score": 45.46948877068559 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-05", - "submission_date": "2024-11-10", - "generation": 2, - "base_model": "Qwen/Qwen2.5-14B", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 16.743, - "co2_cost": 11.856532712090587 - } - }, - { - "id": "theprint/ReWiz-Worldbuilder-7B_float16_e88c715097d824f115f59a97e612d662ffb1031f_False", - "model": { - "name": "theprint/ReWiz-Worldbuilder-7B", - "sha": "e88c715097d824f115f59a97e612d662ffb1031f", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.790699918990597, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25101951710350756, - "normalized_score": 25.101951710350757 - }, - "bbh": { - "name": "BBH", - "value": 0.46361558385510165, - "normalized_score": 25.07634729001636 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03700906344410876, - "normalized_score": 3.7009063444108756 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.45725, - "normalized_score": 16.389583333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.297124335106383, - "normalized_score": 21.90270390070922 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-28", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "theprint/ReWiz-Worldbuilder-7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.248, - "co2_cost": 1.2217343468651976 - } - }, - { - "id": "theprint/RuDolph-Hermes-7B_bfloat16_e07aea56963bbfe5c6753d1056566a56acc30d4a_False", - "model": { - "name": "theprint/RuDolph-Hermes-7B", - "sha": "e07aea56963bbfe5c6753d1056566a56acc30d4a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.03701308822906, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3604292167005767, - "normalized_score": 36.042921670057666 - }, - "bbh": { - "name": "BBH", - "value": 0.5052928613425586, - "normalized_score": 30.709648163100884 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0513595166163142, - "normalized_score": 5.13595166163142 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31208053691275167, - "normalized_score": 8.277404921700223 - }, - "musr": { - "name": "MUSR", - "value": 0.4226145833333333, - "normalized_score": 11.026822916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30726396276595747, - "normalized_score": 23.029329196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-10", - "submission_date": "2024-11-10", - "generation": 1, - "base_model": "theprint/RuDolph-Hermes-7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.0041344395875431 - } - }, - { - "id": "theprint/WorldBuilder-12B_bfloat16_20cfd0e98fb2628b00867147b2c6f423d27f3561_False", - "model": { - "name": "theprint/WorldBuilder-12B", - "sha": "20cfd0e98fb2628b00867147b2c6f423d27f3561", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 14.516406576626443, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.13743755457741016, - "normalized_score": 13.743755457741015 - }, - "bbh": { - "name": "BBH", - "value": 0.5010100641541125, - "normalized_score": 29.277996282041656 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4066458333333334, - "normalized_score": 8.997395833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31923204787234044, - "normalized_score": 24.35911643026005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-27", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "unsloth/mistral-nemo-base-2407-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 13.933, - "co2_cost": 5.662550205199193 - } - }, - { - "id": "theprint/phi-3-mini-4k-python_bfloat16_81453e5718775630581ab9950e6c0ccf0d7a4177_False", - "model": { - "name": "theprint/phi-3-mini-4k-python", - "sha": "81453e5718775630581ab9950e6c0ccf0d7a4177", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 17.728138144118287, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24087753826513653, - "normalized_score": 24.08775382651365 - }, - "bbh": { - "name": "BBH", - "value": 0.493759004635898, - "normalized_score": 28.44601616578647 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.3921666666666666, - "normalized_score": 9.22083333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35771276595744683, - "normalized_score": 28.63475177304965 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-03", - "submission_date": "2024-09-13", - "generation": 1, - "base_model": "unsloth/Phi-3-mini-4k-instruct-bnb-4bit", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 4.132, - "co2_cost": 2.751101854174533 - } - }, - { - "id": "thinkcoder/llama3-8b-instruct-lora-8-sft_bfloat16_b76d81a09b15d92f92a8a22711983775ac999383_True", - "model": { - "name": "thinkcoder/llama3-8b-instruct-lora-8-sft", - "sha": "b76d81a09b15d92f92a8a22711983775ac999383", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.36364436394473, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6480416406246536, - "normalized_score": 64.80416406246536 - }, - "bbh": { - "name": "BBH", - "value": 0.4865011845587858, - "normalized_score": 27.203772889314084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.32345833333333335, - "normalized_score": 2.232291666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34757313829787234, - "normalized_score": 27.508126477541374 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "thinkcoder/llama3-8b-instruct-lora-8-sft", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.7149804779098244 - } - }, - { - "id": "thirdeyeai/elevate360m_float16_f4321ba8704e732769d328952d217bdb564e1824_False", - "model": { - "name": "thirdeyeai/elevate360m", - "sha": "f4321ba8704e732769d328952d217bdb564e1824", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 1.918188276637857, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.04448862351892978, - "normalized_score": 4.448862351892978 - }, - "bbh": { - "name": "BBH", - "value": 0.2962583602962783, - "normalized_score": 2.339846843629065 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2407718120805369, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.34621875, - "normalized_score": 2.2773437500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1077127659574468, - "normalized_score": 0.8569739952718666 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-29", - "generation": 0, - "base_model": "thirdeyeai/elevate360m", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.362, - "co2_cost": 0.7375654899500736 - } - }, - { - "id": "thomas-yanxin/XinYuan-Qwen2-1_5B_float16_a01b362887832bea08d686737861ac3d5b437a32_True", - "model": { - "name": "thomas-yanxin/XinYuan-Qwen2-1_5B", - "sha": "a01b362887832bea08d686737861ac3d5b437a32", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 11.515091263493494, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2985556102253133, - "normalized_score": 29.855561022531326 - }, - "bbh": { - "name": "BBH", - "value": 0.3635491993150823, - "normalized_score": 12.125579560037659 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06722054380664652, - "normalized_score": 6.722054380664652 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.36339583333333336, - "normalized_score": 2.6244791666666685 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23570478723404256, - "normalized_score": 15.078309692671397 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-25", - "submission_date": "2024-09-04", - "generation": 1, - "base_model": "Removed", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 1.777, - "co2_cost": 2.704728139077506 - } - }, - { - "id": "thomas-yanxin/XinYuan-Qwen2-7B_float16_c62d83eee2f4812ac17fc17d307f4aa1a77c5359_True", - "model": { - "name": "thomas-yanxin/XinYuan-Qwen2-7B", - "sha": "c62d83eee2f4812ac17fc17d307f4aa1a77c5359", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 22.431711657583364, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.44376033369238066, - "normalized_score": 44.376033369238066 - }, - "bbh": { - "name": "BBH", - "value": 0.4936629157238895, - "normalized_score": 28.40148852275863 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14577039274924472, - "normalized_score": 14.577039274924472 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.40581249999999996, - "normalized_score": 9.259895833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3924534574468085, - "normalized_score": 32.494828605200944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-21", - "submission_date": "2024-09-03", - "generation": 0, - "base_model": "thomas-yanxin/XinYuan-Qwen2-7B", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 4.905125785403986 - } - }, - { - "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917_float16_6cee1b155fca9ae1f558f434953dfdadb9596af0_True", - "model": { - "name": "thomas-yanxin/XinYuan-Qwen2-7B-0917", - "sha": "6cee1b155fca9ae1f558f434953dfdadb9596af0", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 24.546893470710334, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37191983935956596, - "normalized_score": 37.19198393595659 - }, - "bbh": { - "name": "BBH", - "value": 0.5169215573786009, - "normalized_score": 32.61993813582105 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19788519637462235, - "normalized_score": 19.788519637462233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.4401041666666667, - "normalized_score": 13.6796875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4245345744680851, - "normalized_score": 36.059397163120565 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-17", - "generation": 0, - "base_model": "thomas-yanxin/XinYuan-Qwen2-7B-0917", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 2.9711289872712614 - } - }, - { - "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917_float16_bbbeafd1003c4d5e13f09b7223671957384b961a_True", - "model": { - "name": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", - "sha": "bbbeafd1003c4d5e13f09b7223671957384b961a", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 21.39759488657262, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35770644113175265, - "normalized_score": 35.770644113175265 - }, - "bbh": { - "name": "BBH", - "value": 0.5184106116987492, - "normalized_score": 33.43966927024198 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1933534743202417, - "normalized_score": 19.335347432024168 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.3675520833333333, - "normalized_score": 3.677343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38821476063829785, - "normalized_score": 32.023862293144205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-17", - "submission_date": "2024-09-24", - "generation": 0, - "base_model": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", - "hub_license": "other", - "hub_hearts": 4, - "params_billions": 7.616, - "co2_cost": 1.9424504667015179 - } - }, - { - "id": "tianyil1/MistralForCausalLM_Cal_DPO_float16_642d91baa32a7806a11bc66e0d65870fbcd15e6c_True", - "model": { - "name": "tianyil1/MistralForCausalLM_Cal_DPO", - "sha": "642d91baa32a7806a11bc66e0d65870fbcd15e6c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "MistralForCausalLM", - "average_score": 18.088644447193257, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5327619604870633, - "normalized_score": 53.276196048706325 - }, - "bbh": { - "name": "BBH", - "value": 0.43814239617517153, - "normalized_score": 21.78360838715177 - }, - "math": { - "name": "MATH Level 5", - "value": 0.028700906344410877, - "normalized_score": 2.8700906344410875 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.39765625, - "normalized_score": 7.540364583333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2763464095744681, - "normalized_score": 19.59404550827423 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-25", - "submission_date": "2025-01-25", - "generation": 2, - "base_model": "mistralai/Mistral-7B-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.3582537551579863 - } - }, - { - "id": "tiiuae/Falcon3-10B-Base_bfloat16_0b20cceec08ec598ed2de7a6dfbeb208f1eae656_False", - "model": { - "name": "tiiuae/Falcon3-10B-Base", - "sha": "0b20cceec08ec598ed2de7a6dfbeb208f1eae656", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 27.617850879493677, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3647754624396601, - "normalized_score": 36.47754624396601 - }, - "bbh": { - "name": "BBH", - "value": 0.595004253437141, - "normalized_score": 41.37546218651794 - }, - "math": { - "name": "MATH Level 5", - "value": 0.24924471299093656, - "normalized_score": 24.924471299093657 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.43979166666666664, - "normalized_score": 14.173958333333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4240359042553192, - "normalized_score": 36.003989361702125 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-03", - "submission_date": "2024-12-12", - "generation": 0, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "other", - "hub_hearts": 36, - "params_billions": 10.306, - "co2_cost": 1.620778879963983 - } - }, - { - "id": "tiiuae/Falcon3-10B-Instruct_bfloat16_9be8471432d7c4f35f72505fa2ca4101f0a2ed6d_True", - "model": { - "name": "tiiuae/Falcon3-10B-Instruct", - "sha": "9be8471432d7c4f35f72505fa2ca4101f0a2ed6d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 35.47541146366702, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7816560060639104, - "normalized_score": 78.16560060639105 - }, - "bbh": { - "name": "BBH", - "value": 0.6170469398052084, - "normalized_score": 44.82153982483132 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2764350453172205, - "normalized_score": 27.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.43232291666666667, - "normalized_score": 13.607031249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44290226063829785, - "normalized_score": 38.1002511820331 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "tiiuae/Falcon3-10B-Base", - "hub_license": "other", - "hub_hearts": 97, - "params_billions": 10.306, - "co2_cost": 1.680822286122655 - } - }, - { - "id": "tiiuae/Falcon3-1B-Base_bfloat16_cc56a5a7c3923821312ad14f52c5a7c3fa835cbc_False", - "model": { - "name": "tiiuae/Falcon3-1B-Base", - "sha": "cc56a5a7c3923821312ad14f52c5a7c3fa835cbc", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.8880961034972, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24280132271262472, - "normalized_score": 24.280132271262474 - }, - "bbh": { - "name": "BBH", - "value": 0.3571153918015637, - "normalized_score": 11.343173265854867 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.41473958333333333, - "normalized_score": 9.709114583333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16082114361702127, - "normalized_score": 6.757904846335696 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "tiiuae/Falcon3-1B-Base", - "hub_license": "other", - "hub_hearts": 23, - "params_billions": 1.669, - "co2_cost": 0.8027389735646433 - } - }, - { - "id": "tiiuae/Falcon3-1B-Instruct_bfloat16_27dd70ccb22fd3cc71c5adbc95eb670455afff3d_True", - "model": { - "name": "tiiuae/Falcon3-1B-Instruct", - "sha": "27dd70ccb22fd3cc71c5adbc95eb670455afff3d", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.164597322515025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5556678501930433, - "normalized_score": 55.566785019304334 - }, - "bbh": { - "name": "BBH", - "value": 0.3744535691366672, - "normalized_score": 12.961374062508185 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0634441087613293, - "normalized_score": 6.3444108761329305 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.4188958333333333, - "normalized_score": 10.561979166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.18384308510638298, - "normalized_score": 9.315898345153663 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "tiiuae/Falcon3-1B-Base", - "hub_license": "other", - "hub_hearts": 34, - "params_billions": 1.669, - "co2_cost": 0.7940407279002907 - } - }, - { - "id": "tiiuae/Falcon3-3B-Base_bfloat16_3d49753006a0fa5384031a737c60fbcd0f60b7f2_False", - "model": { - "name": "tiiuae/Falcon3-3B-Base", - "sha": "3d49753006a0fa5384031a737c60fbcd0f60b7f2", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 15.738743193619, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2764985793250797, - "normalized_score": 27.64985793250797 - }, - "bbh": { - "name": "BBH", - "value": 0.4421367825874385, - "normalized_score": 21.584784293773264 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11782477341389729, - "normalized_score": 11.782477341389729 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.3749895833333334, - "normalized_score": 6.273697916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2878989361702128, - "normalized_score": 20.877659574468087 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 0, - "base_model": "tiiuae/Falcon3-3B-Base", - "hub_license": "other", - "hub_hearts": 16, - "params_billions": 3.228, - "co2_cost": 0.9624329512252895 - } - }, - { - "id": "tiiuae/Falcon3-3B-Instruct_bfloat16_552213004cecf9bb6ce332f46da0d4324c8347f1_True", - "model": { - "name": "tiiuae/Falcon3-3B-Instruct", - "sha": "552213004cecf9bb6ce332f46da0d4324c8347f1", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.60234489991349, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6976755010040027, - "normalized_score": 69.76755010040026 - }, - "bbh": { - "name": "BBH", - "value": 0.4754430332167569, - "normalized_score": 26.287229468432695 - }, - "math": { - "name": "MATH Level 5", - "value": 0.25, - "normalized_score": 25.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.41359375, - "normalized_score": 11.13255208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.300531914893617, - "normalized_score": 22.281323877068555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-16", - "generation": 0, - "base_model": "tiiuae/Falcon3-3B-Instruct", - "hub_license": "other", - "hub_hearts": 25, - "params_billions": 3.228, - "co2_cost": 0.9609272919904578 - } - }, - { - "id": "tiiuae/Falcon3-7B-Base_bfloat16_a1cf49eb7a53210fc2ee82f3876bbc7efb2244fd_False", - "model": { - "name": "tiiuae/Falcon3-7B-Base", - "sha": "a1cf49eb7a53210fc2ee82f3876bbc7efb2244fd", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.745725360383613, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.34159474638403875, - "normalized_score": 34.15947463840388 - }, - "bbh": { - "name": "BBH", - "value": 0.5098880466426711, - "normalized_score": 31.55991854750336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19410876132930513, - "normalized_score": 19.410876132930515 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.47020833333333334, - "normalized_score": 18.142708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3910405585106383, - "normalized_score": 32.337839834515364 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-21", - "submission_date": "2024-12-12", - "generation": 0, - "base_model": "tiiuae/Falcon3-7B-Base", - "hub_license": "other", - "hub_hearts": 26, - "params_billions": 7.456, - "co2_cost": 1.2187440946071069 - } - }, - { - "id": "tiiuae/Falcon3-7B-Instruct_bfloat16_7aae4f3953f3dbfaa81aeecbb404a6bbba0e0c06_True", - "model": { - "name": "tiiuae/Falcon3-7B-Instruct", - "sha": "7aae4f3953f3dbfaa81aeecbb404a6bbba0e0c06", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 36.404684964282325, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7612479332615238, - "normalized_score": 76.1247933261524 - }, - "bbh": { - "name": "BBH", - "value": 0.563244278519333, - "normalized_score": 37.91581245917148 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4086102719033233, - "normalized_score": 40.86102719033233 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.48267708333333337, - "normalized_score": 21.167968749999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4087433510638298, - "normalized_score": 34.30481678486997 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-11-29", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "tiiuae/Falcon3-7B-Base", - "hub_license": "other", - "hub_hearts": 64, - "params_billions": 7.456, - "co2_cost": 1.237521337225013 - } - }, - { - "id": "tiiuae/Falcon3-Mamba-7B-Base_bfloat16_f08d14145ce86c32dd04f18bacb3f12b247042e2_False", - "model": { - "name": "tiiuae/Falcon3-Mamba-7B-Base", - "sha": "f08d14145ce86c32dd04f18bacb3f12b247042e2", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "FalconMambaForCausalLM", - "average_score": 18.138791975781, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28911288713945665, - "normalized_score": 28.911288713945666 - }, - "bbh": { - "name": "BBH", - "value": 0.4699280188827039, - "normalized_score": 25.534048802953162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19410876132930513, - "normalized_score": 19.410876132930515 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.3431458333333333, - "normalized_score": 4.393229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.30377327127659576, - "normalized_score": 22.641474586288414 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-12", - "generation": 0, - "base_model": "tiiuae/Falcon3-Mamba-7B-Base", - "hub_license": "other", - "hub_hearts": 21, - "params_billions": 7.273, - "co2_cost": 1.6726362873383394 - } - }, - { - "id": "tiiuae/Falcon3-Mamba-7B-Instruct_bfloat16_382561849d1509b5f1a4d7a38bb286b3c4f46fbd_True", - "model": { - "name": "tiiuae/Falcon3-Mamba-7B-Instruct", - "sha": "382561849d1509b5f1a4d7a38bb286b3c4f46fbd", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "FalconMambaForCausalLM", - "average_score": 28.109654708582763, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7165099713205406, - "normalized_score": 71.65099713205407 - }, - "bbh": { - "name": "BBH", - "value": 0.4678957688410694, - "normalized_score": 25.203505172398078 - }, - "math": { - "name": "MATH Level 5", - "value": 0.30060422960725075, - "normalized_score": 30.060422960725074 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.38686458333333335, - "normalized_score": 8.258072916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3369348404255319, - "normalized_score": 26.326093380614658 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "tiiuae/Falcon3-Mamba-7B-Instruct (Merge)", - "hub_license": "other", - "hub_hearts": 27, - "params_billions": 7.273, - "co2_cost": 1.6569950132433542 - } - }, - { - "id": "tiiuae/falcon-11B_bfloat16_066e3bf4e2d9aaeefa129af0a6d39727d27816b3_False", - "model": { - "name": "tiiuae/falcon-11B", - "sha": "066e3bf4e2d9aaeefa129af0a6d39727d27816b3", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "FalconForCausalLM", - "average_score": 13.851902586180215, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3261324397044287, - "normalized_score": 32.613243970442866 - }, - "bbh": { - "name": "BBH", - "value": 0.43916370355493844, - "normalized_score": 21.937999462890275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.027945619335347432, - "normalized_score": 2.794561933534743 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2709731543624161, - "normalized_score": 2.796420581655479 - }, - "musr": { - "name": "MUSR", - "value": 0.39864583333333337, - "normalized_score": 7.530729166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23894614361702127, - "normalized_score": 15.438460401891252 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-05-09", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "tiiuae/falcon-11B", - "hub_license": "unknown", - "hub_hearts": 212, - "params_billions": 11.103, - "co2_cost": 2.165741982835327 - } - }, - { - "id": "tiiuae/falcon-40b_bfloat16_4a70170c215b36a3cce4b4253f6d0612bb7d4146_False", - "model": { - "name": "tiiuae/falcon-40b", - "sha": "4a70170c215b36a3cce4b4253f6d0612bb7d4146", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "FalconForCausalLM", - "average_score": 11.40130446230009, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24964538535530173, - "normalized_score": 24.964538535530174 - }, - "bbh": { - "name": "BBH", - "value": 0.4018532495595801, - "normalized_score": 16.583304730312175 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.36314583333333333, - "normalized_score": 5.193229166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25049867021276595, - "normalized_score": 16.722074468085104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-24", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "tiiuae/falcon-40b", - "hub_license": "apache-2.0", - "hub_hearts": 2422, - "params_billions": 40.0, - "co2_cost": 43.58716780286837 - } - }, - { - "id": "tiiuae/falcon-40b-instruct_bfloat16_ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f_False", - "model": { - "name": "tiiuae/falcon-40b-instruct", - "sha": "ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "FalconForCausalLM", - "average_score": 10.484506782098748, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24544874266945038, - "normalized_score": 24.54487426694504 - }, - "bbh": { - "name": "BBH", - "value": 0.40538675151591974, - "normalized_score": 17.220114203264526 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37622916666666667, - "normalized_score": 5.161979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2261469414893617, - "normalized_score": 14.016326832151298 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-25", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "tiiuae/falcon-40b-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 1176, - "params_billions": 40.0, - "co2_cost": 39.466490974353526 - } - }, - { - "id": "tiiuae/falcon-7b_bfloat16_898df1396f35e447d5fe44e0a3ccaaaa69f30d36_False", - "model": { - "name": "tiiuae/falcon-7b", - "sha": "898df1396f35e447d5fe44e0a3ccaaaa69f30d36", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "FalconForCausalLM", - "average_score": 5.1734447203194796, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.182051401392749, - "normalized_score": 18.205140139274903 - }, - "bbh": { - "name": "BBH", - "value": 0.32852446117322215, - "normalized_score": 5.963936911876051 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24496644295302014, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37784375, - "normalized_score": 4.497135416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11253324468085106, - "normalized_score": 1.392582742316784 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-04-24", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "tiiuae/falcon-7b", - "hub_license": "apache-2.0", - "hub_hearts": 1083, - "params_billions": 7.0, - "co2_cost": 1.5716824748415301 - } - }, - { - "id": "tiiuae/falcon-7b-instruct_bfloat16_cf4b3c42ce2fdfe24f753f0f0d179202fea59c99_False", - "model": { - "name": "tiiuae/falcon-7b-instruct", - "sha": "cf4b3c42ce2fdfe24f753f0f0d179202fea59c99", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "FalconForCausalLM", - "average_score": 5.1165739086852, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.19688869976107837, - "normalized_score": 19.68886997610784 - }, - "bbh": { - "name": "BBH", - "value": 0.32034221512355765, - "normalized_score": 4.8231784606744315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.012084592145015106, - "normalized_score": 1.2084592145015105 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3633645833333334, - "normalized_score": 3.2539062500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1155252659574468, - "normalized_score": 1.725029550827422 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-04-25", - "submission_date": "2024-06-09", - "generation": 0, - "base_model": "tiiuae/falcon-7b-instruct", - "hub_license": "apache-2.0", - "hub_hearts": 950, - "params_billions": 7.0, - "co2_cost": 1.532429051211775 - } - }, - { - "id": "tiiuae/falcon-mamba-7b_bfloat16_5337fd73f19847e111ba2291f3f0e1617b90c37d_False", - "model": { - "name": "tiiuae/falcon-mamba-7b", - "sha": "5337fd73f19847e111ba2291f3f0e1617b90c37d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "FalconMambaForCausalLM", - "average_score": 15.179238027611218, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3335760227307987, - "normalized_score": 33.35760227307987 - }, - "bbh": { - "name": "BBH", - "value": 0.4284854988604366, - "normalized_score": 19.876877803543437 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3104026845637584, - "normalized_score": 8.05369127516779 - }, - "musr": { - "name": "MUSR", - "value": 0.42103124999999997, - "normalized_score": 10.862239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23021941489361702, - "normalized_score": 14.468823877068557 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-07-17", - "submission_date": "2024-07-23", - "generation": 0, - "base_model": "tiiuae/falcon-mamba-7b", - "hub_license": "other", - "hub_hearts": 233, - "params_billions": 7.0, - "co2_cost": 7.2208158349965865 - } - }, - { - "id": "tinycompany/BiBo-v0.3_bfloat16_82bb8be05e99b1ae1dbed7303975f6e638791ae2_True", - "model": { - "name": "tinycompany/BiBo-v0.3", - "sha": "82bb8be05e99b1ae1dbed7303975f6e638791ae2", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 19.54353096107359, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5183989592060179, - "normalized_score": 51.83989592060178 - }, - "bbh": { - "name": "BBH", - "value": 0.4641611514377814, - "normalized_score": 24.34266236145128 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3949895833333333, - "normalized_score": 7.807031249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.29945146276595747, - "normalized_score": 22.16127364066194 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-13", - "generation": 0, - "base_model": "tinycompany/BiBo-v0.3", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.39779613144884385 - } - }, - { - "id": "tinycompany/BiBo-v0.7_bfloat16_a0be861c117d1a7b9f712fe700a167ae3b265235_True", - "model": { - "name": "tinycompany/BiBo-v0.7", - "sha": "a0be861c117d1a7b9f712fe700a167ae3b265235", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.965356863831778, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3738181358794665, - "normalized_score": 37.38181358794665 - }, - "bbh": { - "name": "BBH", - "value": 0.43108167584271034, - "normalized_score": 19.67472884227719 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.40441666666666665, - "normalized_score": 8.585416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2650432180851064, - "normalized_score": 18.3381353427896 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-12", - "generation": 0, - "base_model": "tinycompany/BiBo-v0.7", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7902558830661202 - } - }, - { - "id": "tinycompany/ShawtyIsBad-bgem3_bfloat16_2b5b5543215711ec51012c0c4ad4bf87b434f3ec_False", - "model": { - "name": "tinycompany/ShawtyIsBad-bgem3", - "sha": "2b5b5543215711ec51012c0c4ad4bf87b434f3ec", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.610397878155718, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2608113139802391, - "normalized_score": 26.08113139802391 - }, - "bbh": { - "name": "BBH", - "value": 0.38529707856388956, - "normalized_score": 13.857811570359898 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04833836858006042, - "normalized_score": 4.833836858006042 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3053691275167785, - "normalized_score": 7.38255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.36946875, - "normalized_score": 5.916927083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25831117021276595, - "normalized_score": 17.59013002364066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/ShawtyIsBad-bgem3", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.436, - "co2_cost": 0.611048879853014 - } - }, - { - "id": "tinycompany/ShawtyIsBad-e5-large_bfloat16_7dd62553c6f019786669ecd84bee87adf8eb3fb5_False", - "model": { - "name": "tinycompany/ShawtyIsBad-e5-large", - "sha": "7dd62553c6f019786669ecd84bee87adf8eb3fb5", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.316339936795012, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24682287441765627, - "normalized_score": 24.682287441765624 - }, - "bbh": { - "name": "BBH", - "value": 0.3873483842947396, - "normalized_score": 14.177224162496438 - }, - "math": { - "name": "MATH Level 5", - "value": 0.045317220543806644, - "normalized_score": 4.531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.37204166666666666, - "normalized_score": 6.138541666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25689827127659576, - "normalized_score": 17.433141252955085 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/ShawtyIsBad-e5-large", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.436, - "co2_cost": 0.6184516264754303 - } - }, - { - "id": "tinycompany/ShawtyIsBad-ib_bfloat16_f7a222c25ba1f1c86c6aedb662738bf3edd15d32_False", - "model": { - "name": "tinycompany/ShawtyIsBad-ib", - "sha": "f7a222c25ba1f1c86c6aedb662738bf3edd15d32", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.35455968391428, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2565149359255664, - "normalized_score": 25.65149359255664 - }, - "bbh": { - "name": "BBH", - "value": 0.3880457874839807, - "normalized_score": 14.236689579644496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.3641041666666667, - "normalized_score": 5.279687499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.258061835106383, - "normalized_score": 17.56242612293144 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-07", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/ShawtyIsBad-ib", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.436, - "co2_cost": 0.6104909575072485 - } - }, - { - "id": "tinycompany/ShawtyIsBad-nomic-moe_bfloat16_366fb7526bf48bd32eb455f72903b3f3f179abb3_False", - "model": { - "name": "tinycompany/ShawtyIsBad-nomic-moe", - "sha": "366fb7526bf48bd32eb455f72903b3f3f179abb3", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.741407000136505, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2607614462958284, - "normalized_score": 26.076144629582842 - }, - "bbh": { - "name": "BBH", - "value": 0.3878019225656597, - "normalized_score": 14.318942650237615 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3070469798657718, - "normalized_score": 7.606263982102905 - }, - "musr": { - "name": "MUSR", - "value": 0.37470833333333337, - "normalized_score": 6.671875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2572307180851064, - "normalized_score": 17.47007978723404 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "tinycompany/ShawtyIsBad-nomic-moe", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.436, - "co2_cost": 0.6057768623498688 - } - }, - { - "id": "tinycompany/ShawtyIsBad-nomic1.5_bfloat16_961a85e0f31f8711c3150b98e9052b8ee565ddba_False", - "model": { - "name": "tinycompany/ShawtyIsBad-nomic1.5", - "sha": "961a85e0f31f8711c3150b98e9052b8ee565ddba", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 12.504999335501656, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2543916807404354, - "normalized_score": 25.439168074043536 - }, - "bbh": { - "name": "BBH", - "value": 0.3873599493472512, - "normalized_score": 14.217971903055156 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.311241610738255, - "normalized_score": 8.165548098434002 - }, - "musr": { - "name": "MUSR", - "value": 0.36283333333333334, - "normalized_score": 5.4875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25673204787234044, - "normalized_score": 17.414671985815602 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/ShawtyIsBad-nomic1.5", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.436, - "co2_cost": 0.6106685886710093 - } - }, - { - "id": "tinycompany/SigmaBoi-base_bfloat16_e3ce078082f17116b345712904539cb04935284a_False", - "model": { - "name": "tinycompany/SigmaBoi-base", - "sha": "e3ce078082f17116b345712904539cb04935284a", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.2508076506235, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24469961923252526, - "normalized_score": 24.469961923252527 - }, - "bbh": { - "name": "BBH", - "value": 0.4314363391906919, - "normalized_score": 20.769956753887325 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07779456193353475, - "normalized_score": 7.779456193353475 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.43427083333333333, - "normalized_score": 12.483854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2816655585106383, - "normalized_score": 20.18506205673759 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-base", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7619494892522348 - } - }, - { - "id": "tinycompany/SigmaBoi-bge-m3_bfloat16_f7612761837b259d29d3a80bf4ae209c140ee413_False", - "model": { - "name": "tinycompany/SigmaBoi-bge-m3", - "sha": "f7612761837b259d29d3a80bf4ae209c140ee413", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.455458242923891, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24502431326657714, - "normalized_score": 24.502431326657714 - }, - "bbh": { - "name": "BBH", - "value": 0.43509173985964184, - "normalized_score": 21.206314665455086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4383020833333333, - "normalized_score": 13.254427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28191489361702127, - "normalized_score": 20.212765957446805 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-bge-m3", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7659973634243759 - } - }, - { - "id": "tinycompany/SigmaBoi-bgem3_bfloat16_f7612761837b259d29d3a80bf4ae209c140ee413_False", - "model": { - "name": "tinycompany/SigmaBoi-bgem3", - "sha": "f7612761837b259d29d3a80bf4ae209c140ee413", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.455458242923891, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24502431326657714, - "normalized_score": 24.502431326657714 - }, - "bbh": { - "name": "BBH", - "value": 0.43509173985964184, - "normalized_score": 21.206314665455086 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4383020833333333, - "normalized_score": 13.254427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28191489361702127, - "normalized_score": 20.212765957446805 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-bgem3", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7750909044876523 - } - }, - { - "id": "tinycompany/SigmaBoi-ib_bfloat16_90316c1235d0bfe04addc42a6b8e2995c443eed1_False", - "model": { - "name": "tinycompany/SigmaBoi-ib", - "sha": "90316c1235d0bfe04addc42a6b8e2995c443eed1", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 14.968209771816907, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24774708883540117, - "normalized_score": 24.774708883540114 - }, - "bbh": { - "name": "BBH", - "value": 0.4343622024096135, - "normalized_score": 21.076579169361132 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07401812688821752, - "normalized_score": 7.401812688821751 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.42896874999999995, - "normalized_score": 11.254427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2824135638297872, - "normalized_score": 20.268173758865245 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-ib", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7716704676927227 - } - }, - { - "id": "tinycompany/SigmaBoi-nomic-moe_bfloat16_39ef773a558b70578273ab1bdf47427916da5b02_False", - "model": { - "name": "tinycompany/SigmaBoi-nomic-moe", - "sha": "39ef773a558b70578273ab1bdf47427916da5b02", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.186432252407451, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2474223948013493, - "normalized_score": 24.742239480134934 - }, - "bbh": { - "name": "BBH", - "value": 0.43341835214223373, - "normalized_score": 20.96864244921853 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29278523489932884, - "normalized_score": 5.7046979865771785 - }, - "musr": { - "name": "MUSR", - "value": 0.43163541666666666, - "normalized_score": 12.121093750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28366023936170215, - "normalized_score": 20.40669326241135 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-nomic-moe", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 1.5675806835217012 - } - }, - { - "id": "tinycompany/SigmaBoi-nomic1.5_bfloat16_df78255741edd3dc3ec68131e28a7e95527a1279_False", - "model": { - "name": "tinycompany/SigmaBoi-nomic1.5", - "sha": "df78255741edd3dc3ec68131e28a7e95527a1279", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.473023890881102, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24469961923252526, - "normalized_score": 24.469961923252527 - }, - "bbh": { - "name": "BBH", - "value": 0.43705348265770266, - "normalized_score": 21.43784511243409 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4316041666666666, - "normalized_score": 12.0171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28407579787234044, - "normalized_score": 20.45286643026005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-nomic1.5", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7843821506214192 - } - }, - { - "id": "tinycompany/SigmaBoi-nomic1.5-fp32_bfloat16_18c9210fc34610f0bb37c59cdd69f3164856e46d_False", - "model": { - "name": "tinycompany/SigmaBoi-nomic1.5-fp32", - "sha": "18c9210fc34610f0bb37c59cdd69f3164856e46d", - "precision": "bfloat16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 15.498419470905068, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24622335403396323, - "normalized_score": 24.62233540339632 - }, - "bbh": { - "name": "BBH", - "value": 0.43705348265770266, - "normalized_score": 21.43784511243409 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.4316041666666666, - "normalized_score": 12.0171875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28407579787234044, - "normalized_score": 20.45286643026005 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 0, - "base_model": "tinycompany/SigmaBoi-nomic1.5-fp32", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 2.943, - "co2_cost": 0.7882026155759113 - } - }, - { - "id": "tinycompany/Tamed-Shawty_bfloat16_fed82f9f13b28c191a302006dbf1b392873a9fe7_True", - "model": { - "name": "tinycompany/Tamed-Shawty", - "sha": "fed82f9f13b28c191a302006dbf1b392873a9fe7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.533997252162253, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38308576798450333, - "normalized_score": 38.30857679845033 - }, - "bbh": { - "name": "BBH", - "value": 0.3837059588999942, - "normalized_score": 14.653983733918906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2625838926174497, - "normalized_score": 1.6778523489932917 - }, - "musr": { - "name": "MUSR", - "value": 0.35009375000000004, - "normalized_score": 1.5950520833333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2601396276595745, - "normalized_score": 17.793291962174944 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "tinycompany/Tamed-Shawty", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.562, - "co2_cost": 0.6145020115350416 - } - }, - { - "id": "tklohj/WindyFloLLM_float16_21f4241ab3f091d1d309e9076a8d8e3f014908a8_False", - "model": { - "name": "tklohj/WindyFloLLM", - "sha": "21f4241ab3f091d1d309e9076a8d8e3f014908a8", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.243655403588258, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.26685638550158025, - "normalized_score": 26.685638550158025 - }, - "bbh": { - "name": "BBH", - "value": 0.4636616007058791, - "normalized_score": 24.39876319785054 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.4253125, - "normalized_score": 11.864062500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25814494680851063, - "normalized_score": 17.57166075650118 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-30", - "submission_date": "2024-07-10", - "generation": 1, - "base_model": "tklohj/WindyFloLLM (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.016, - "co2_cost": 2.197024186099759 - } - }, - { - "id": "togethercomputer/GPT-JT-6B-v1_float16_f34aa35f906895602c1f86f5685e598afdea8051_False", - "model": { - "name": "togethercomputer/GPT-JT-6B-v1", - "sha": "f34aa35f906895602c1f86f5685e598afdea8051", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTJForCausalLM", - "average_score": 6.877706827738106, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20610646418170453, - "normalized_score": 20.610646418170454 - }, - "bbh": { - "name": "BBH", - "value": 0.33026609127426704, - "normalized_score": 7.318523965141613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.010574018126888218, - "normalized_score": 1.0574018126888218 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.37365625, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16256648936170212, - "normalized_score": 6.951832151300234 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2022-11-24", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/GPT-JT-6B-v1", - "hub_license": "apache-2.0", - "hub_hearts": 300, - "params_billions": 6.0, - "co2_cost": 75.91762136171212 - } - }, - { - "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B_float16_d386708e84d862a65f7d2b4989f64750cb657227_False", - "model": { - "name": "togethercomputer/GPT-NeoXT-Chat-Base-20B", - "sha": "d386708e84d862a65f7d2b4989f64750cb657227", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.140295456712411, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18297561581049393, - "normalized_score": 18.297561581049393 - }, - "bbh": { - "name": "BBH", - "value": 0.33209702572173033, - "normalized_score": 6.830794983137852 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3460625, - "normalized_score": 1.7578124999999993 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11452792553191489, - "normalized_score": 1.6142139479905429 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-03-03", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/GPT-NeoXT-Chat-Base-20B", - "hub_license": "apache-2.0", - "hub_hearts": 696, - "params_billions": 20.0, - "co2_cost": 5.967175975443749 - } - }, - { - "id": "togethercomputer/LLaMA-2-7B-32K_float16_46c24bb5aef59722fa7aa6d75e832afd1d64b980_False", - "model": { - "name": "togethercomputer/LLaMA-2-7B-32K", - "sha": "46c24bb5aef59722fa7aa6d75e832afd1d64b980", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.8377158675946506, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.18649738250065384, - "normalized_score": 18.649738250065383 - }, - "bbh": { - "name": "BBH", - "value": 0.33995175217301715, - "normalized_score": 8.089984229889549 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3753645833333333, - "normalized_score": 4.320572916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17677859042553193, - "normalized_score": 8.530954491725769 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-07-26", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/LLaMA-2-7B-32K", - "hub_license": "llama2", - "hub_hearts": 538, - "params_billions": 7.0, - "co2_cost": 1.169145546277345 - } - }, - { - "id": "togethercomputer/Llama-2-7B-32K-Instruct_float16_d27380af003252f5eb0d218e104938b4e673e3f3_False", - "model": { - "name": "togethercomputer/Llama-2-7B-32K-Instruct", - "sha": "d27380af003252f5eb0d218e104938b4e673e3f3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 8.25854218578891, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2130003945087922, - "normalized_score": 21.300039450879225 - }, - "bbh": { - "name": "BBH", - "value": 0.34434724239927544, - "normalized_score": 8.563469919446954 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2516778523489933, - "normalized_score": 0.22371364653244186 - }, - "musr": { - "name": "MUSR", - "value": 0.40559375, - "normalized_score": 9.199218750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17810837765957446, - "normalized_score": 8.678708628841607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-08-08", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/Llama-2-7B-32K-Instruct", - "hub_license": "llama2", - "hub_hearts": 158, - "params_billions": 7.0, - "co2_cost": 1.179818726519121 - } - }, - { - "id": "togethercomputer/RedPajama-INCITE-7B-Base_float16_78f7e482443971f4873ba3239f0ac810a367833b_False", - "model": { - "name": "togethercomputer/RedPajama-INCITE-7B-Base", - "sha": "78f7e482443971f4873ba3239f0ac810a367833b", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.56181429403527, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20822971936683554, - "normalized_score": 20.822971936683555 - }, - "bbh": { - "name": "BBH", - "value": 0.31948898765013445, - "normalized_score": 5.0872422729164315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015861027190332326, - "normalized_score": 1.5861027190332326 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.36199999999999993, - "normalized_score": 3.0166666666666657 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1196808510638298, - "normalized_score": 2.186761229314421 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-04", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/RedPajama-INCITE-7B-Base", - "hub_license": "apache-2.0", - "hub_hearts": 93, - "params_billions": 7.0, - "co2_cost": 2.441214326653394 - } - }, - { - "id": "togethercomputer/RedPajama-INCITE-7B-Chat_float16_47b94a739e2f3164b438501c8684acc5d5acc146_False", - "model": { - "name": "togethercomputer/RedPajama-INCITE-7B-Chat", - "sha": "47b94a739e2f3164b438501c8684acc5d5acc146", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 4.050900591245242, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1557977278066641, - "normalized_score": 15.57977278066641 - }, - "bbh": { - "name": "BBH", - "value": 0.3175449328457368, - "normalized_score": 4.502173664381199 - }, - "math": { - "name": "MATH Level 5", - "value": 0.006797583081570997, - "normalized_score": 0.6797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2525167785234899, - "normalized_score": 0.33557046979865535 - }, - "musr": { - "name": "MUSR", - "value": 0.3447604166666667, - "normalized_score": 1.8617187499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11211768617021277, - "normalized_score": 1.3464095744680846 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-04", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "togethercomputer/RedPajama-INCITE-7B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 92, - "params_billions": 7.0, - "co2_cost": 2.438672213362967 - } - }, - { - "id": "togethercomputer/RedPajama-INCITE-7B-Instruct_float16_7f36397b9985a3f981cdb618f8fec1c565ca5927_False", - "model": { - "name": "togethercomputer/RedPajama-INCITE-7B-Instruct", - "sha": "7f36397b9985a3f981cdb618f8fec1c565ca5927", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 6.456725492718323, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2055069437980115, - "normalized_score": 20.55069437980115 - }, - "bbh": { - "name": "BBH", - "value": 0.337743947089799, - "normalized_score": 7.9054164937041635 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25083892617449666, - "normalized_score": 0.11185682326622093 - }, - "musr": { - "name": "MUSR", - "value": 0.3685104166666666, - "normalized_score": 5.03046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1272440159574468, - "normalized_score": 3.027112884160755 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-05", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/RedPajama-INCITE-7B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 103, - "params_billions": 7.0, - "co2_cost": 2.362237340653382 - } - }, - { - "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1_float16_094fbdd0c911feb485ce55de1952ab2e75277e1e_False", - "model": { - "name": "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "sha": "094fbdd0c911feb485ce55de1952ab2e75277e1e", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.521090384654182, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22936253584932426, - "normalized_score": 22.936253584932423 - }, - "bbh": { - "name": "BBH", - "value": 0.3060403878987615, - "normalized_score": 3.518607767474259 - }, - "math": { - "name": "MATH Level 5", - "value": 0.014350453172205438, - "normalized_score": 1.4350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24328859060402686, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.37387499999999996, - "normalized_score": 4.001041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11112034574468085, - "normalized_score": 1.2355939716312052 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-04", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "hub_license": "apache-2.0", - "hub_hearts": 90, - "params_billions": 3.0, - "co2_cost": 1.5522036912990087 - } - }, - { - "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1_float16_f0e0995eba801096ed04cb87931d96a8316871af_False", - "model": { - "name": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", - "sha": "f0e0995eba801096ed04cb87931d96a8316871af", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 4.848823926757166, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16521496296493304, - "normalized_score": 16.521496296493304 - }, - "bbh": { - "name": "BBH", - "value": 0.32166937119202416, - "normalized_score": 5.164727927050627 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24412751677852348, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3684479166666667, - "normalized_score": 5.089322916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11269946808510638, - "normalized_score": 1.4110520094562635 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-05", - "submission_date": "2024-06-13", - "generation": 0, - "base_model": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", - "hub_license": "apache-2.0", - "hub_hearts": 152, - "params_billions": 3.0, - "co2_cost": 1.5498179167454655 - } - }, - { - "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1_float16_0c66778ee09a036886741707733620b91057909a_False", - "model": { - "name": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", - "sha": "0c66778ee09a036886741707733620b91057909a", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "GPTNeoXForCausalLM", - "average_score": 5.777231560126986, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2124263620526869, - "normalized_score": 21.24263620526869 - }, - "bbh": { - "name": "BBH", - "value": 0.3146017752057237, - "normalized_score": 4.510786368926982 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24748322147651006, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.38860416666666664, - "normalized_score": 6.408854166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11095412234042554, - "normalized_score": 1.2171247044917257 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-05-05", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", - "hub_license": "apache-2.0", - "hub_hearts": 93, - "params_billions": 3.0, - "co2_cost": 1.5213420694196813 - } - }, - { - "id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1_bfloat16_1fae784584dd03680b72dd4de7eefbc5b7cabcd5_True", - "model": { - "name": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", - "sha": "1fae784584dd03680b72dd4de7eefbc5b7cabcd5", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.34514981791089, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5507719517546776, - "normalized_score": 55.077195175467764 - }, - "bbh": { - "name": "BBH", - "value": 0.5009389976232003, - "normalized_score": 29.267966131617708 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.43569791666666663, - "normalized_score": 13.795572916666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3087599734042553, - "normalized_score": 23.195552600472812 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-09-12", - "generation": 0, - "base_model": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", - "hub_license": "llama3", - "hub_hearts": 19, - "params_billions": 8.03, - "co2_cost": 1.7162203415060449 - } - }, - { - "id": "tomasmcm/sky-t1-coder-32b-flash_bfloat16_d336471a461bb2093e85df3898aeb6db3ae0857f_True", - "model": { - "name": "tomasmcm/sky-t1-coder-32b-flash", - "sha": "d336471a461bb2093e85df3898aeb6db3ae0857f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 44.868558910231286, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7780090160773414, - "normalized_score": 77.80090160773415 - }, - "bbh": { - "name": "BBH", - "value": 0.6822440044314982, - "normalized_score": 55.46594372212499 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5422960725075529, - "normalized_score": 54.229607250755286 - }, - "gpqa": { - "name": "GPQA", - "value": 0.36828859060402686, - "normalized_score": 15.771812080536915 - }, - "musr": { - "name": "MUSR", - "value": 0.4232708333333333, - "normalized_score": 12.808854166666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5782081117021277, - "normalized_score": 53.13423463356975 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "tomasmcm/sky-t1-coder-32b-flash (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 32.764, - "co2_cost": 50.04717672288697 - } - }, - { - "id": "trthminh1112/autotrain-llama32-1b-finetune_float16_ceffe98f8b1f7e38628ad8b82536c43373472197_False", - "model": { - "name": "trthminh1112/autotrain-llama32-1b-finetune", - "sha": "ceffe98f8b1f7e38628ad8b82536c43373472197", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.586088234847661, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17685518867715438, - "normalized_score": 17.685518867715437 - }, - "bbh": { - "name": "BBH", - "value": 0.29956269409410674, - "normalized_score": 2.852986635695443 - }, - "math": { - "name": "MATH Level 5", - "value": 0.015105740181268883, - "normalized_score": 1.5105740181268883 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25671140939597314, - "normalized_score": 0.8948545861297527 - }, - "musr": { - "name": "MUSR", - "value": 0.35127083333333337, - "normalized_score": 3.475520833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10987367021276596, - "normalized_score": 1.0970744680851066 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2025-03-03", - "generation": 1, - "base_model": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 1.1, - "co2_cost": 0.17441571738704964 - } - }, - { - "id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1_bfloat16_65a426bd2c9519fd56c06bd3ecae8685d780bfe6_True", - "model": { - "name": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", - "sha": "65a426bd2c9519fd56c06bd3ecae8685d780bfe6", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 28.427900675364572, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6017300761978217, - "normalized_score": 60.173007619782176 - }, - "bbh": { - "name": "BBH", - "value": 0.5101062293388118, - "normalized_score": 30.49889389653247 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3814199395770393, - "normalized_score": 38.14199395770393 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2684563758389262, - "normalized_score": 2.460850111856823 - }, - "musr": { - "name": "MUSR", - "value": 0.3794270833333333, - "normalized_score": 5.061718750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4080784574468085, - "normalized_score": 34.23093971631205 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-10", - "submission_date": "2025-01-19", - "generation": 1, - "base_model": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.616, - "co2_cost": 1.6264384026419691 - } - }, - { - "id": "universalml/NepaliGPT-2.0_float16_cf7dfdac366e392d97a2092afd3d660719b027c4_False", - "model": { - "name": "universalml/NepaliGPT-2.0", - "sha": "cf7dfdac366e392d97a2092afd3d660719b027c4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.586353879208668, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.03649538779327739, - "normalized_score": 3.649538779327739 - }, - "bbh": { - "name": "BBH", - "value": 0.46604761322722105, - "normalized_score": 24.216690252212942 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.4656770833333333, - "normalized_score": 17.50963541666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3299534574468085, - "normalized_score": 25.550384160756494 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-19", - "submission_date": "2025-03-13", - "generation": 1, - "base_model": "universalml/NepaliGPT-2.0 (Merge)", - "hub_license": "mit", - "hub_hearts": 2, - "params_billions": 8.03, - "co2_cost": 0.4203635608706261 - } - }, - { - "id": "unsloth/Llama-3.2-1B-Instruct_bfloat16_eb49081324edb2ff14f848ce16393c067c6f4976_True", - "model": { - "name": "unsloth/Llama-3.2-1B-Instruct", - "sha": "eb49081324edb2ff14f848ce16393c067c6f4976", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.532450983938604, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5809973093613834, - "normalized_score": 58.099730936138336 - }, - "bbh": { - "name": "BBH", - "value": 0.34847036874553655, - "normalized_score": 8.31684951238064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.3196145833333333, - "normalized_score": 1.9518229166666672 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.17420212765957446, - "normalized_score": 8.244680851063828 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-25", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "meta-llama/Llama-3.2-1B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 67, - "params_billions": 1.236, - "co2_cost": 0.722639371510048 - } - }, - { - "id": "unsloth/Llama-3.2-1B-Instruct-no-system-message_bfloat16_99fb160e6da969b35bfc81cce4026e7c383f0bf8_True", - "model": { - "name": "unsloth/Llama-3.2-1B-Instruct-no-system-message", - "sha": "99fb160e6da969b35bfc81cce4026e7c383f0bf8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.363515028823207, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5649853499824908, - "normalized_score": 56.49853499824909 - }, - "bbh": { - "name": "BBH", - "value": 0.3543744783345775, - "normalized_score": 9.386371925607728 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2726510067114094, - "normalized_score": 3.0201342281879207 - }, - "musr": { - "name": "MUSR", - "value": 0.3340625, - "normalized_score": 2.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1668882978723404, - "normalized_score": 7.432033096926712 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2025-01-24", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.236, - "co2_cost": 0.7138739509390423 - } - }, - { - "id": "unsloth/Phi-3-mini-4k-instruct_float16_636c707430a5509c80b1aa51d05c127ed339a975_True", - "model": { - "name": "unsloth/Phi-3-mini-4k-instruct", - "sha": "636c707430a5509c80b1aa51d05c127ed339a975", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 27.342019856110284, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.544027624480822, - "normalized_score": 54.4027624480822 - }, - "bbh": { - "name": "BBH", - "value": 0.5500239467441027, - "normalized_score": 36.73247326561403 - }, - "math": { - "name": "MATH Level 5", - "value": 0.16389728096676737, - "normalized_score": 16.389728096676738 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32298657718120805, - "normalized_score": 9.731543624161072 - }, - "musr": { - "name": "MUSR", - "value": 0.42841666666666667, - "normalized_score": 13.118749999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4030917553191489, - "normalized_score": 33.67686170212765 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-29", - "submission_date": "2024-11-25", - "generation": 0, - "base_model": "unsloth/Phi-3-mini-4k-instruct", - "hub_license": "mit", - "hub_hearts": 43, - "params_billions": 3.821, - "co2_cost": 0.9390662160488383 - } - }, - { - "id": "unsloth/phi-4_bfloat16_682399cd249206f583fc19473d5a28af0a9bcea7_True", - "model": { - "name": "unsloth/phi-4", - "sha": "682399cd249206f583fc19473d5a28af0a9bcea7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 40.728304291060965, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6882083981613231, - "normalized_score": 68.8208398161323 - }, - "bbh": { - "name": "BBH", - "value": 0.6885874406040138, - "normalized_score": 55.25314499847013 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5, - "normalized_score": 50.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.41142708333333333, - "normalized_score": 10.128385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5378158244680851, - "normalized_score": 48.646202718676115 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 80, - "params_billions": 14.66, - "co2_cost": 1.886538526666603 - } - }, - { - "id": "unsloth/phi-4-bnb-4bit_bfloat16_85ca2925f3cc4f3c42de4168e9ba0695be5d5845_True", - "model": { - "name": "unsloth/phi-4-bnb-4bit", - "sha": "85ca2925f3cc4f3c42de4168e9ba0695be5d5845", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.06049481375357, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6729710501469435, - "normalized_score": 67.29710501469435 - }, - "bbh": { - "name": "BBH", - "value": 0.6769854242339189, - "normalized_score": 53.53519912141298 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4607250755287009, - "normalized_score": 46.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.40072916666666664, - "normalized_score": 8.424479166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5255984042553191, - "normalized_score": 47.288711583924346 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 14, - "params_billions": 8.058, - "co2_cost": 3.0477431372705746 - } - }, - { - "id": "unsloth/phi-4-unsloth-bnb-4bit_bfloat16_227e8cbc0de0cd783703a3a2f217159a86041a5f_True", - "model": { - "name": "unsloth/phi-4-unsloth-bnb-4bit", - "sha": "227e8cbc0de0cd783703a3a2f217159a86041a5f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 39.21645140571378, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6793906833867471, - "normalized_score": 67.93906833867472 - }, - "bbh": { - "name": "BBH", - "value": 0.6791089896968764, - "normalized_score": 53.84008105932037 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4561933534743202, - "normalized_score": 45.61933534743202 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33640939597315433, - "normalized_score": 11.521252796420578 - }, - "musr": { - "name": "MUSR", - "value": 0.40339583333333334, - "normalized_score": 8.7578125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5285904255319149, - "normalized_score": 47.62115839243499 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-08", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "microsoft/phi-4", - "hub_license": "mit", - "hub_hearts": 49, - "params_billions": 8.483, - "co2_cost": 3.0375359662148482 - } - }, - { - "id": "upstage/SOLAR-10.7B-Instruct-v1.0_float16_c08c25ed66414a878fe0401a3596d536c083606c_True", - "model": { - "name": "upstage/SOLAR-10.7B-Instruct-v1.0", - "sha": "c08c25ed66414a878fe0401a3596d536c083606c", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.57236409322395, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4736609972650345, - "normalized_score": 47.36609972650345 - }, - "bbh": { - "name": "BBH", - "value": 0.5162494941446991, - "normalized_score": 31.872401888002116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.3899375, - "normalized_score": 6.942187500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31382978723404253, - "normalized_score": 23.758865248226947 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-12", - "submission_date": "2024-06-12", - "generation": 1, - "base_model": "upstage/SOLAR-10.7B-Instruct-v1.0 (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 623, - "params_billions": 10.732, - "co2_cost": 1.565551571277175 - } - }, - { - "id": "upstage/SOLAR-10.7B-v1.0_float16_a45090b8e56bdc2b8e32e46b3cd782fc0bea1fa5_False", - "model": { - "name": "upstage/SOLAR-10.7B-v1.0", - "sha": "a45090b8e56bdc2b8e32e46b3cd782fc0bea1fa5", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.8549748598855, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24212644671693329, - "normalized_score": 24.212644671693326 - }, - "bbh": { - "name": "BBH", - "value": 0.5093873084711799, - "normalized_score": 29.789358089019135 - }, - "math": { - "name": "MATH Level 5", - "value": 0.026435045317220542, - "normalized_score": 2.643504531722054 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.43715624999999997, - "normalized_score": 13.67786458333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3400099734042553, - "normalized_score": 26.667774822695034 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2023-12-12", - "submission_date": "2024-06-12", - "generation": 0, - "base_model": "upstage/SOLAR-10.7B-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 302, - "params_billions": 10.732, - "co2_cost": 2.16660205245516 - } - }, - { - "id": "upstage/solar-pro-preview-instruct_bfloat16_b4db141b5fb08b23f8bc323bc34e2cff3e9675f8_True", - "model": { - "name": "upstage/solar-pro-preview-instruct", - "sha": "b4db141b5fb08b23f8bc323bc34e2cff3e9675f8", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "SolarForCausalLM", - "average_score": 39.93865486453309, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8415814483348626, - "normalized_score": 84.15814483348626 - }, - "bbh": { - "name": "BBH", - "value": 0.6816843051379534, - "normalized_score": 54.82235099983529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.37080536912751677, - "normalized_score": 16.10738255033557 - }, - "musr": { - "name": "MUSR", - "value": 0.44165625000000003, - "normalized_score": 15.007031249999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.52734375, - "normalized_score": 47.48263888888889 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": true - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-11", - "generation": 0, - "base_model": "upstage/solar-pro-preview-instruct", - "hub_license": "mit", - "hub_hearts": 445, - "params_billions": 22.14, - "co2_cost": 3.483526305840195 - } - }, - { - "id": "utkmst/chimera-beta-test2-lora-merged_float16_3218d26312a8dc187491c4e9f8633cea056b7223_True", - "model": { - "name": "utkmst/chimera-beta-test2-lora-merged", - "sha": "3218d26312a8dc187491c4e9f8633cea056b7223", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.38980642084878, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6054269338688014, - "normalized_score": 60.542693386880146 - }, - "bbh": { - "name": "BBH", - "value": 0.47957156724192185, - "normalized_score": 25.61316406168906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09516616314199396, - "normalized_score": 9.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3036912751677852, - "normalized_score": 7.158836689038028 - }, - "musr": { - "name": "MUSR", - "value": 0.4117916666666667, - "normalized_score": 9.373958333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2992021276595745, - "normalized_score": 22.13356973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-08", - "submission_date": "2025-03-08", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.4398028314543256 - } - }, - { - "id": "uukuguy/speechless-code-mistral-7b-v1.0_bfloat16_1862e0a712efc6002112e9c1235a197d58419b37_False", - "model": { - "name": "uukuguy/speechless-code-mistral-7b-v1.0", - "sha": "1862e0a712efc6002112e9c1235a197d58419b37", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.19259169032542, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36652415590632853, - "normalized_score": 36.652415590632856 - }, - "bbh": { - "name": "BBH", - "value": 0.4571712887094195, - "normalized_score": 24.091412067845624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05211480362537765, - "normalized_score": 5.211480362537765 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.45017708333333334, - "normalized_score": 14.772135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3145777925531915, - "normalized_score": 23.841976950354614 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-10", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-code-mistral-7b-v1.0", - "hub_license": "apache-2.0", - "hub_hearts": 18, - "params_billions": 7.0, - "co2_cost": 1.292796678606731 - } - }, - { - "id": "uukuguy/speechless-codellama-34b-v2.0_bfloat16_419bc42a254102d6a5486a1a854068e912c4047c_False", - "model": { - "name": "uukuguy/speechless-codellama-34b-v2.0", - "sha": "419bc42a254102d6a5486a1a854068e912c4047c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.209357596769955, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.46042168113937687, - "normalized_score": 46.042168113937684 - }, - "bbh": { - "name": "BBH", - "value": 0.4813126697444618, - "normalized_score": 25.993293267840638 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2692953020134229, - "normalized_score": 2.572706935123052 - }, - "musr": { - "name": "MUSR", - "value": 0.37870833333333337, - "normalized_score": 7.205208333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25423869680851063, - "normalized_score": 17.137632978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-04", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-codellama-34b-v2.0", - "hub_license": "llama2", - "hub_hearts": 17, - "params_billions": 34.0, - "co2_cost": 1.9912541922771037 - } - }, - { - "id": "uukuguy/speechless-coder-ds-6.7b_bfloat16_c813a5268c6dfe267a720ad3b51773f1ab0feb59_False", - "model": { - "name": "uukuguy/speechless-coder-ds-6.7b", - "sha": "c813a5268c6dfe267a720ad3b51773f1ab0feb59", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 9.714852002598894, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.25046986440422525, - "normalized_score": 25.046986440422522 - }, - "bbh": { - "name": "BBH", - "value": 0.4036373344669979, - "normalized_score": 15.897457343156352 - }, - "math": { - "name": "MATH Level 5", - "value": 0.021148036253776436, - "normalized_score": 2.1148036253776437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.3819375, - "normalized_score": 5.3421875000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.171875, - "normalized_score": 7.986111111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-30", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-coder-ds-6.7b", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 6.7, - "co2_cost": 1.5772070117938326 - } - }, - { - "id": "uukuguy/speechless-instruct-mistral-7b-v0.2_bfloat16_87a4d214f7d028d61c3dc013a7410b3c34a24072_False", - "model": { - "name": "uukuguy/speechless-instruct-mistral-7b-v0.2", - "sha": "87a4d214f7d028d61c3dc013a7410b3c34a24072", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.10671348498029, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3261324397044287, - "normalized_score": 32.613243970442866 - }, - "bbh": { - "name": "BBH", - "value": 0.4606667950681749, - "normalized_score": 24.558747365322688 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28187919463087246, - "normalized_score": 4.250559284116329 - }, - "musr": { - "name": "MUSR", - "value": 0.4901770833333334, - "normalized_score": 21.172135416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2902260638297872, - "normalized_score": 21.136229314420802 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-22", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-instruct-mistral-7b-v0.2", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2352409646090445 - } - }, - { - "id": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b_bfloat16_954cc87b0ed5fa280126de546daf648861031512_False", - "model": { - "name": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", - "sha": "954cc87b0ed5fa280126de546daf648861031512", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.701595895131856, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.45617517076911485, - "normalized_score": 45.61751707691148 - }, - "bbh": { - "name": "BBH", - "value": 0.48455373040676664, - "normalized_score": 26.79172729423422 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02039274924471299, - "normalized_score": 2.0392749244712993 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2701342281879195, - "normalized_score": 2.684563758389265 - }, - "musr": { - "name": "MUSR", - "value": 0.4655, - "normalized_score": 17.754166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25590093085106386, - "normalized_score": 17.322325650118206 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-09-01", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", - "hub_license": "", - "hub_hearts": 32, - "params_billions": 13.016, - "co2_cost": 1.9590486657886075 - } - }, - { - "id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b_bfloat16_b1de043468a15198b55a6509293a4ee585139043_False", - "model": { - "name": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", - "sha": "b1de043468a15198b55a6509293a4ee585139043", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 18.340089485864258, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.37002154283966543, - "normalized_score": 37.00215428396654 - }, - "bbh": { - "name": "BBH", - "value": 0.4982774952761688, - "normalized_score": 29.65312947574292 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02945619335347432, - "normalized_score": 2.9456193353474323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.43613541666666666, - "normalized_score": 13.850260416666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2990359042553192, - "normalized_score": 22.11510047281324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-10-13", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", - "hub_license": "llama2", - "hub_hearts": 17, - "params_billions": 7.242, - "co2_cost": 1.311437449926933 - } - }, - { - "id": "uukuguy/speechless-zephyr-code-functionary-7b_bfloat16_d66fc775ece679966e352195c42444e9c70af7fa_False", - "model": { - "name": "uukuguy/speechless-zephyr-code-functionary-7b", - "sha": "d66fc775ece679966e352195c42444e9c70af7fa", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.460834340340238, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2695791610704043, - "normalized_score": 26.957916107040433 - }, - "bbh": { - "name": "BBH", - "value": 0.46642753957194555, - "normalized_score": 25.983622785908505 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04229607250755287, - "normalized_score": 4.229607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30033557046979864, - "normalized_score": 6.711409395973152 - }, - "musr": { - "name": "MUSR", - "value": 0.4267708333333333, - "normalized_score": 11.613020833333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3094248670212766, - "normalized_score": 23.269429669030732 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-23", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "uukuguy/speechless-zephyr-code-functionary-7b", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 7.242, - "co2_cost": 1.2679992531547388 - } - }, - { - "id": "v000000/L3-8B-Stheno-v3.2-abliterated_bfloat16_ddb17f127a1c068b105b79aadd76632615743f68_True", - "model": { - "name": "v000000/L3-8B-Stheno-v3.2-abliterated", - "sha": "ddb17f127a1c068b105b79aadd76632615743f68", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.620611328256683, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6717720093795574, - "normalized_score": 67.17720093795573 - }, - "bbh": { - "name": "BBH", - "value": 0.5141439214918061, - "normalized_score": 30.74630496568445 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06948640483383686, - "normalized_score": 6.948640483383686 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30956375838926176, - "normalized_score": 7.941834451901568 - }, - "musr": { - "name": "MUSR", - "value": 0.36196875, - "normalized_score": 5.979427083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3603723404255319, - "normalized_score": 28.930260047281326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-09", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "v000000/L3-8B-Stheno-v3.2-abliterated (Merge)", - "hub_license": "", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.0007103084614668 - } - }, - { - "id": "v000000/L3.1-Niitorm-8B-DPO-t0.0001_float16_a34150b5f63de4bc83d79b1de127faff3750289f_True", - "model": { - "name": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", - "sha": "a34150b5f63de4bc83d79b1de127faff3750289f", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.113230470994733, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7688666072687137, - "normalized_score": 76.88666072687137 - }, - "bbh": { - "name": "BBH", - "value": 0.5134234526726582, - "normalized_score": 30.51317301580421 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1623867069486405, - "normalized_score": 16.238670694864048 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.3879791666666667, - "normalized_score": 7.2640625000000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.38663563829787234, - "normalized_score": 31.848404255319146 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-19", - "submission_date": "2024-09-19", - "generation": 1, - "base_model": "v000000/L3.1-Niitorm-8B-DPO-t0.0001 (Merge)", - "hub_license": "", - "hub_hearts": 8, - "params_billions": 8.03, - "co2_cost": 1.7562181593962325 - } - }, - { - "id": "v000000/L3.1-Storniitova-8B_bfloat16_05b126857f43d1b1383e50f8c97d214ceb199723_True", - "model": { - "name": "v000000/L3.1-Storniitova-8B", - "sha": "05b126857f43d1b1383e50f8c97d214ceb199723", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.28170680403471, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7816560060639104, - "normalized_score": 78.16560060639105 - }, - "bbh": { - "name": "BBH", - "value": 0.5151452004311876, - "normalized_score": 30.810993185589904 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14652567975830816, - "normalized_score": 14.652567975830816 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.4028958333333333, - "normalized_score": 9.961979166666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37757646276595747, - "normalized_score": 30.841829196217496 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-12", - "submission_date": "2024-09-18", - "generation": 1, - "base_model": "v000000/L3.1-Storniitova-8B (Merge)", - "hub_license": "", - "hub_hearts": 7, - "params_billions": 8.03, - "co2_cost": 1.6270799995011005 - } - }, - { - "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta_bfloat16_f624854b4380e01322e752ce4daadd49ac86580f_True", - "model": { - "name": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", - "sha": "f624854b4380e01322e752ce4daadd49ac86580f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 40.87901381911441, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8045120280854798, - "normalized_score": 80.45120280854799 - }, - "bbh": { - "name": "BBH", - "value": 0.639849930188539, - "normalized_score": 48.6166718794722 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5264350453172205, - "normalized_score": 52.64350453172205 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.40730208333333334, - "normalized_score": 9.379427083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4930186170212766, - "normalized_score": 43.668735224586285 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-28", - "generation": 1, - "base_model": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 4, - "params_billions": 14.77, - "co2_cost": 3.604773014380301 - } - }, - { - "id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno_bfloat16_1069abb4c25855e67ffaefa08a0befbb376e7ca7_True", - "model": { - "name": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", - "sha": "1069abb4c25855e67ffaefa08a0befbb376e7ca7", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.36227864094851, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8197493760998595, - "normalized_score": 81.97493760998594 - }, - "bbh": { - "name": "BBH", - "value": 0.639010174859259, - "normalized_score": 48.45212383388064 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5324773413897281, - "normalized_score": 53.24773413897282 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3313758389261745, - "normalized_score": 10.850111856823268 - }, - "musr": { - "name": "MUSR", - "value": 0.4113645833333333, - "normalized_score": 10.053906250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4923537234042553, - "normalized_score": 43.59485815602837 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 14.77, - "co2_cost": 5.344519821833167 - } - }, - { - "id": "v000000/Qwen2.5-Lumen-14B_bfloat16_fbb1d184ed01dac52d307737893ebb6b0ace444c_True", - "model": { - "name": "v000000/Qwen2.5-Lumen-14B", - "sha": "fbb1d184ed01dac52d307737893ebb6b0ace444c", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 41.137851148922614, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8063604569209697, - "normalized_score": 80.63604569209697 - }, - "bbh": { - "name": "BBH", - "value": 0.6390809511149668, - "normalized_score": 48.50786084405761 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5362537764350453, - "normalized_score": 53.625377643504535 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.41139583333333335, - "normalized_score": 10.291145833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.49027593085106386, - "normalized_score": 43.363992316784866 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-20", - "submission_date": "2024-09-20", - "generation": 1, - "base_model": "v000000/Qwen2.5-Lumen-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 19, - "params_billions": 14.77, - "co2_cost": 3.673385087937254 - } - }, - { - "id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP_bfloat16_eccb4bde0dc91f586954109ecdce7c94f47e2625_False", - "model": { - "name": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", - "sha": "eccb4bde0dc91f586954109ecdce7c94f47e2625", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.27479354795199, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.290711977552893, - "normalized_score": 29.0711977552893 - }, - "bbh": { - "name": "BBH", - "value": 0.5057443268070797, - "normalized_score": 29.926041623092612 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12009063444108761, - "normalized_score": 12.009063444108762 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2961409395973154, - "normalized_score": 6.152125279642054 - }, - "musr": { - "name": "MUSR", - "value": 0.40106250000000004, - "normalized_score": 9.366145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3621176861702128, - "normalized_score": 29.12418735224587 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-29", - "generation": 1, - "base_model": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 8.03, - "co2_cost": 1.6134415622278524 - } - }, - { - "id": "vhab10/Llama-3.2-Instruct-3B-TIES_bfloat16_0e8661730f40a6a279bd273cfe9fe46bbd0507dd_False", - "model": { - "name": "vhab10/Llama-3.2-Instruct-3B-TIES", - "sha": "0e8661730f40a6a279bd273cfe9fe46bbd0507dd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.33432617767801, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4727367828472896, - "normalized_score": 47.27367828472896 - }, - "bbh": { - "name": "BBH", - "value": 0.43323649966514094, - "normalized_score": 19.183159360187968 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.34965625, - "normalized_score": 3.873697916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2915558510638298, - "normalized_score": 21.283983451536646 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-06", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "vhab10/Llama-3.2-Instruct-3B-TIES (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 1.848, - "co2_cost": 2.245852928376529 - } - }, - { - "id": "vhab10/llama-3-8b-merged-linear_float16_c37e7671b5ccfadbf3065fa5b48af05cd4f13292_True", - "model": { - "name": "vhab10/llama-3-8b-merged-linear", - "sha": "c37e7671b5ccfadbf3065fa5b48af05cd4f13292", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.91136833689406, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5916634529714491, - "normalized_score": 59.166345297144915 - }, - "bbh": { - "name": "BBH", - "value": 0.49370937443498536, - "normalized_score": 27.816051327740798 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.4190520833333333, - "normalized_score": 11.681510416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37042885638297873, - "normalized_score": 30.04765070921986 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-26", - "submission_date": "2024-09-26", - "generation": 1, - "base_model": "vhab10/llama-3-8b-merged-linear (Merge)", - "hub_license": "mit", - "hub_hearts": 0, - "params_billions": 4.65, - "co2_cost": 2.609886668917451 - } - }, - { - "id": "vicgalle/CarbonBeagle-11B_float16_3fe9bf5327606d013b182fed17a472f5f043759b_True", - "model": { - "name": "vicgalle/CarbonBeagle-11B", - "sha": "3fe9bf5327606d013b182fed17a472f5f043759b", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.470185912589034, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5415298075772285, - "normalized_score": 54.152980757722844 - }, - "bbh": { - "name": "BBH", - "value": 0.5293652486530874, - "normalized_score": 33.06060419684841 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.40203125, - "normalized_score": 9.187239583333339 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32762632978723405, - "normalized_score": 25.29181442080378 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-21", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "vicgalle/CarbonBeagle-11B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 9, - "params_billions": 10.732, - "co2_cost": 1.8307574022339386 - } - }, - { - "id": "vicgalle/CarbonBeagle-11B-truthy_float16_476cd2a6d938bddb38dfbeb4cb21e3e34303413d_True", - "model": { - "name": "vicgalle/CarbonBeagle-11B-truthy", - "sha": "476cd2a6d938bddb38dfbeb4cb21e3e34303413d", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.31996250525653, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5212214701436633, - "normalized_score": 52.12214701436632 - }, - "bbh": { - "name": "BBH", - "value": 0.5348420085288232, - "normalized_score": 33.98837559181831 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04909365558912387, - "normalized_score": 4.909365558912387 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29949664429530204, - "normalized_score": 6.599552572706939 - }, - "musr": { - "name": "MUSR", - "value": 0.37396874999999996, - "normalized_score": 4.112760416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.335688164893617, - "normalized_score": 26.18757387706856 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-10", - "submission_date": "2024-07-13", - "generation": 0, - "base_model": "vicgalle/CarbonBeagle-11B-truthy", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 10.732, - "co2_cost": 1.814546619696793 - } - }, - { - "id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B_float16_3cb5792509966a963645be24fdbeb2e7dc6cac15_True", - "model": { - "name": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", - "sha": "3cb5792509966a963645be24fdbeb2e7dc6cac15", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.56595247127864, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5762510139762497, - "normalized_score": 57.62510139762497 - }, - "bbh": { - "name": "BBH", - "value": 0.5054841203275775, - "normalized_score": 30.50962474895478 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07628398791540786, - "normalized_score": 7.628398791540786 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29697986577181207, - "normalized_score": 6.263982102908276 - }, - "musr": { - "name": "MUSR", - "value": 0.4183645833333333, - "normalized_score": 10.062239583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3097573138297872, - "normalized_score": 23.30636820330969 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-02", - "submission_date": "2024-07-24", - "generation": 2, - "base_model": "NousResearch/Meta-Llama-3-8B", - "hub_license": "apache-2.0", - "hub_hearts": 6, - "params_billions": 8.031, - "co2_cost": 1.4978545713733988 - } - }, - { - "id": "vicgalle/Configurable-Llama-3.1-8B-Instruct_float16_133b3ab1a5385ff9b3d17da2addfe3fc1fd6f733_True", - "model": { - "name": "vicgalle/Configurable-Llama-3.1-8B-Instruct", - "sha": "133b3ab1a5385ff9b3d17da2addfe3fc1fd6f733", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.01011138792457, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8312399987588488, - "normalized_score": 83.12399987588486 - }, - "bbh": { - "name": "BBH", - "value": 0.5044756225072481, - "normalized_score": 29.661397892084384 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1729607250755287, - "normalized_score": 17.29607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.3845416666666666, - "normalized_score": 5.934375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3592087765957447, - "normalized_score": 28.800975177304966 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-24", - "submission_date": "2024-08-05", - "generation": 0, - "base_model": "vicgalle/Configurable-Llama-3.1-8B-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 16, - "params_billions": 8.03, - "co2_cost": 1.5932191079420261 - } - }, - { - "id": "vicgalle/Configurable-Yi-1.5-9B-Chat_float16_992cb2232caae78eff6a836b2e0642f7cbf6018e_True", - "model": { - "name": "vicgalle/Configurable-Yi-1.5-9B-Chat", - "sha": "992cb2232caae78eff6a836b2e0642f7cbf6018e", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.162899498605693, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.43234506664538974, - "normalized_score": 43.234506664538976 - }, - "bbh": { - "name": "BBH", - "value": 0.5452196737175008, - "normalized_score": 35.33444508462291 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.42711458333333335, - "normalized_score": 12.022656249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4015126329787234, - "normalized_score": 33.5014036643026 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-12", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "vicgalle/Configurable-Yi-1.5-9B-Chat", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 8.829, - "co2_cost": 1.8838176236566786 - } - }, - { - "id": "vicgalle/ConfigurableBeagle-11B_float16_bbc16dbf94b8e8a99bb3e2ada6755faf9c2990dd_True", - "model": { - "name": "vicgalle/ConfigurableBeagle-11B", - "sha": "bbc16dbf94b8e8a99bb3e2ada6755faf9c2990dd", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.622956003400244, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5834452585805663, - "normalized_score": 58.34452585805663 - }, - "bbh": { - "name": "BBH", - "value": 0.5286592318626696, - "normalized_score": 32.392022902811185 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.39530208333333333, - "normalized_score": 7.379427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33743351063829785, - "normalized_score": 26.381501182033094 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-17", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "vicgalle/ConfigurableBeagle-11B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 10.732, - "co2_cost": 1.7597130454120933 - } - }, - { - "id": "vicgalle/ConfigurableHermes-7B_float16_1333a88eaf6591836b2d9825d1eaec7260f336c9_True", - "model": { - "name": "vicgalle/ConfigurableHermes-7B", - "sha": "1333a88eaf6591836b2d9825d1eaec7260f336c9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 19.536295414907375, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5410798902467675, - "normalized_score": 54.10798902467674 - }, - "bbh": { - "name": "BBH", - "value": 0.4572969627830424, - "normalized_score": 23.158164380406475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04758308157099698, - "normalized_score": 4.758308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27684563758389263, - "normalized_score": 3.5794183445190177 - }, - "musr": { - "name": "MUSR", - "value": 0.4056875, - "normalized_score": 9.110937500000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3025265957446808, - "normalized_score": 22.50295508274231 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-17", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "vicgalle/ConfigurableHermes-7B", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 7.242, - "co2_cost": 1.2345637627211006 - } - }, - { - "id": "vicgalle/ConfigurableSOLAR-10.7B_float16_9d9baad88ea9dbaa61881f15e4f0d16e931033b4_True", - "model": { - "name": "vicgalle/ConfigurableSOLAR-10.7B", - "sha": "9d9baad88ea9dbaa61881f15e4f0d16e931033b4", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.153450201779847, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5099558061499045, - "normalized_score": 50.995580614990445 - }, - "bbh": { - "name": "BBH", - "value": 0.48668100977360457, - "normalized_score": 27.450950141666922 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06646525679758308, - "normalized_score": 6.646525679758309 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.38047916666666665, - "normalized_score": 5.193229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31732047872340424, - "normalized_score": 24.146719858156025 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-10", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "vicgalle/ConfigurableSOLAR-10.7B", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 10.732, - "co2_cost": 1.3553626295511634 - } - }, - { - "id": "vicgalle/Humanish-RP-Llama-3.1-8B_float16_d27aa731db1d390a8d17b0a4565c9231ee5ae8b9_True", - "model": { - "name": "vicgalle/Humanish-RP-Llama-3.1-8B", - "sha": "d27aa731db1d390a8d17b0a4565c9231ee5ae8b9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 25.423199454688373, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6669259786256023, - "normalized_score": 66.69259786256023 - }, - "bbh": { - "name": "BBH", - "value": 0.5100385476143247, - "normalized_score": 29.958560315236678 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15181268882175228, - "normalized_score": 15.181268882175228 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28691275167785235, - "normalized_score": 4.921700223713646 - }, - "musr": { - "name": "MUSR", - "value": 0.39520833333333333, - "normalized_score": 8.26770833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34765625, - "normalized_score": 27.51736111111111 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-03", - "submission_date": "2024-08-03", - "generation": 0, - "base_model": "vicgalle/Humanish-RP-Llama-3.1-8B", - "hub_license": "apache-2.0", - "hub_hearts": 10, - "params_billions": 8.03, - "co2_cost": 1.5069010760109964 - } - }, - { - "id": "vicgalle/Merge-Mistral-Prometheus-7B_bfloat16_a7083581b508ce83c74f9267f07024bd462e7161_True", - "model": { - "name": "vicgalle/Merge-Mistral-Prometheus-7B", - "sha": "a7083581b508ce83c74f9267f07024bd462e7161", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 16.58664234759141, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48480143796238423, - "normalized_score": 48.48014379623842 - }, - "bbh": { - "name": "BBH", - "value": 0.420139773821292, - "normalized_score": 18.41040626692948 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01812688821752266, - "normalized_score": 1.812688821752266 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.41, - "normalized_score": 9.950000000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2716921542553192, - "normalized_score": 19.076906028368796 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "vicgalle/Merge-Mistral-Prometheus-7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 7.242, - "co2_cost": 1.2607111502563213 - } - }, - { - "id": "vicgalle/Merge-Mixtral-Prometheus-8x7B_bfloat16_ba53ee5b52a81e56b01e919c069a0d045cfd4e83_True", - "model": { - "name": "vicgalle/Merge-Mixtral-Prometheus-8x7B", - "sha": "ba53ee5b52a81e56b01e919c069a0d045cfd4e83", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.768981526162975, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5744025851407598, - "normalized_score": 57.44025851407598 - }, - "bbh": { - "name": "BBH", - "value": 0.5351498071096573, - "normalized_score": 34.65142126614313 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09290030211480363, - "normalized_score": 9.290030211480364 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3087248322147651, - "normalized_score": 7.829977628635347 - }, - "musr": { - "name": "MUSR", - "value": 0.40975, - "normalized_score": 9.585416666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3683510638297872, - "normalized_score": 29.81678486997636 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-04", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "vicgalle/Merge-Mixtral-Prometheus-8x7B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 46.703, - "co2_cost": 7.348018298275145 - } - }, - { - "id": "vicgalle/Roleplay-Llama-3-8B_float16_57297eb57dcc2c116f061d9dda341094203da01b_True", - "model": { - "name": "vicgalle/Roleplay-Llama-3-8B", - "sha": "57297eb57dcc2c116f061d9dda341094203da01b", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.020182936148974, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7320221456845614, - "normalized_score": 73.20221456845613 - }, - "bbh": { - "name": "BBH", - "value": 0.5012318206922323, - "normalized_score": 28.554603909240623 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09138972809667674, - "normalized_score": 9.138972809667674 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3528854166666666, - "normalized_score": 1.6773437499999992 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.370844414893617, - "normalized_score": 30.093823877068555 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-19", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "vicgalle/Roleplay-Llama-3-8B", - "hub_license": "apache-2.0", - "hub_hearts": 37, - "params_billions": 8.03, - "co2_cost": 2.252317027783774 - } - }, - { - "id": "viettelsecurity-ai/security-llama3.2-3b_float16_a33cd2c208d3cefef12601f7dc9a290a218fafa3_True", - "model": { - "name": "viettelsecurity-ai/security-llama3.2-3b", - "sha": "a33cd2c208d3cefef12601f7dc9a290a218fafa3", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.977384437548874, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5908888416069362, - "normalized_score": 59.088884160693624 - }, - "bbh": { - "name": "BBH", - "value": 0.44005776161052806, - "normalized_score": 20.597405725871987 - }, - "math": { - "name": "MATH Level 5", - "value": 0.12613293051359517, - "normalized_score": 12.613293051359516 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.33790625, - "normalized_score": 3.9049479166666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2837433510638298, - "normalized_score": 20.41592789598109 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-03", - "submission_date": "2025-03-04", - "generation": 1, - "base_model": "viettelsecurity-ai/security-llama3.2-3b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 0.606916288481658 - } - }, - { - "id": "vihangd/smart-dan-sft-v0.1_4bit_924b4a09153d4061fa9d58f03b10cd7cde7e3084_False", - "model": { - "name": "vihangd/smart-dan-sft-v0.1", - "sha": "924b4a09153d4061fa9d58f03b10cd7cde7e3084", - "precision": "4bit", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 3.871212537831616, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15764615664215392, - "normalized_score": 15.764615664215393 - }, - "bbh": { - "name": "BBH", - "value": 0.30617689187138886, - "normalized_score": 3.1255992643495936 - }, - "math": { - "name": "MATH Level 5", - "value": 0.009818731117824773, - "normalized_score": 0.9818731117824773 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2550335570469799, - "normalized_score": 0.6711409395973182 - }, - "musr": { - "name": "MUSR", - "value": 0.35018750000000004, - "normalized_score": 1.1067708333333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11419547872340426, - "normalized_score": 1.5772754137115832 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-09", - "submission_date": "2024-08-20", - "generation": 0, - "base_model": "vihangd/smart-dan-sft-v0.1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.379, - "co2_cost": 0.7220493353549226 - } - }, - { - "id": "voidful/smol-360m-ft_float16_3889a38fc79d2400997e01bf1d00c8059d72fead_True", - "model": { - "name": "voidful/smol-360m-ft", - "sha": "3889a38fc79d2400997e01bf1d00c8059d72fead", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.7899302409628595, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2013103011121602, - "normalized_score": 20.131030111216017 - }, - "bbh": { - "name": "BBH", - "value": 0.3011946898842932, - "normalized_score": 3.0227064431492727 - }, - "math": { - "name": "MATH Level 5", - "value": 0.008308157099697885, - "normalized_score": 0.8308157099697886 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24580536912751677, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3713645833333333, - "normalized_score": 3.787239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10871010638297872, - "normalized_score": 0.9677895981087459 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-24", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "voidful/smol-360m-ft (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 0.362, - "co2_cost": 0.7634589764630502 - } - }, - { - "id": "vonjack/MobileLLM-125M-HF_float16_7664f5e1b91faa04fac545f64db84c26316c7e63_False", - "model": { - "name": "vonjack/MobileLLM-125M-HF", - "sha": "7664f5e1b91faa04fac545f64db84c26316c7e63", - "precision": "float16", - "type": "pretrained", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.565351724961825, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.21072753627042912, - "normalized_score": 21.072753627042914 - }, - "bbh": { - "name": "BBH", - "value": 0.30272988561565645, - "normalized_score": 3.146583712799116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.00906344410876133, - "normalized_score": 0.906344410876133 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.37818749999999995, - "normalized_score": 5.106770833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1163563829787234, - "normalized_score": 1.8173758865248217 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-11-15", - "generation": 0, - "base_model": "vonjack/MobileLLM-125M-HF", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 0, - "params_billions": 0.125, - "co2_cost": 0.34362271680811607 - } - }, - { - "id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied_float16_96a48b8ea6f661f71ade001a0a2232b66ac38481_True", - "model": { - "name": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", - "sha": "96a48b8ea6f661f71ade001a0a2232b66ac38481", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 26.968080141379957, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5787488308798432, - "normalized_score": 57.87488308798432 - }, - "bbh": { - "name": "BBH", - "value": 0.5740684031598843, - "normalized_score": 40.20185213345416 - }, - "math": { - "name": "MATH Level 5", - "value": 0.13821752265861026, - "normalized_score": 13.821752265861026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33053691275167785, - "normalized_score": 10.738255033557047 - }, - "musr": { - "name": "MUSR", - "value": 0.3923541666666666, - "normalized_score": 7.110937500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3885472074468085, - "normalized_score": 32.06080082742317 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-24", - "submission_date": "2025-01-03", - "generation": 0, - "base_model": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", - "hub_license": "mit", - "hub_hearts": 11, - "params_billions": 3.821, - "co2_cost": 0.9022300106035944 - } - }, - { - "id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json_float16_4cacfb35723647d408f0414886d0dfe67404a14f_True", - "model": { - "name": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", - "sha": "4cacfb35723647d408f0414886d0dfe67404a14f", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Adapter", - "architecture": "?", - "average_score": 4.642406244437133, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.14158432957885078, - "normalized_score": 14.15843295788508 - }, - "bbh": { - "name": "BBH", - "value": 0.29747555432824196, - "normalized_score": 2.390836087243887 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0075528700906344415, - "normalized_score": 0.7552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25419463087248323, - "normalized_score": 0.5592841163310973 - }, - "musr": { - "name": "MUSR", - "value": 0.40413541666666664, - "normalized_score": 8.450260416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11386303191489362, - "normalized_score": 1.540336879432624 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-05", - "submission_date": "2024-11-05", - "generation": 1, - "base_model": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 4.132, - "co2_cost": 2.5703771540987903 - } - }, - { - "id": "vonjack/Qwen2.5-Coder-0.5B-Merged_bfloat16_38e4789c0fc5fad359de2f7bafdb65c3ae26b95b_True", - "model": { - "name": "vonjack/Qwen2.5-Coder-0.5B-Merged", - "sha": "38e4789c0fc5fad359de2f7bafdb65c3ae26b95b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 6.979693269720411, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30997087727230416, - "normalized_score": 30.997087727230415 - }, - "bbh": { - "name": "BBH", - "value": 0.3076017752057237, - "normalized_score": 3.5887384386437557 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0377643504531722, - "normalized_score": 3.7764350453172204 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.33034375, - "normalized_score": 0.8263020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.12017952127659574, - "normalized_score": 2.24216903073286 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-19", - "submission_date": "2024-11-19", - "generation": 1, - "base_model": "vonjack/Qwen2.5-Coder-0.5B-Merged (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.63, - "co2_cost": 0.9935574479094091 - } - }, - { - "id": "vonjack/SmolLM2-1.7B-Merged_bfloat16_232d54a335220b0d83d6036f6d8df3971d3e79bb_True", - "model": { - "name": "vonjack/SmolLM2-1.7B-Merged", - "sha": "232d54a335220b0d83d6036f6d8df3971d3e79bb", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 12.23422955589429, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.36979658417443495, - "normalized_score": 36.979658417443495 - }, - "bbh": { - "name": "BBH", - "value": 0.3586553457965105, - "normalized_score": 10.766530256983183 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06268882175226587, - "normalized_score": 6.268882175226587 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.34079166666666666, - "normalized_score": 3.832291666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2047872340425532, - "normalized_score": 11.643026004728133 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-18", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "vonjack/SmolLM2-1.7B-Merged (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 1.711, - "co2_cost": 0.6226545348609049 - } - }, - { - "id": "vonjack/SmolLM2-135M-Merged_bfloat16_a1700ca913a87ad713edfe57a2030a9d7c088970_True", - "model": { - "name": "vonjack/SmolLM2-135M-Merged", - "sha": "a1700ca913a87ad713edfe57a2030a9d7c088970", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 5.872429779192967, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.24829674153468353, - "normalized_score": 24.829674153468353 - }, - "bbh": { - "name": "BBH", - "value": 0.3099931265410582, - "normalized_score": 4.587041236226676 - }, - "math": { - "name": "MATH Level 5", - "value": 0.011329305135951661, - "normalized_score": 1.1329305135951662 - }, - "gpqa": { - "name": "GPQA", - "value": 0.23825503355704697, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36618749999999994, - "normalized_score": 3.4401041666666674 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.11120345744680851, - "normalized_score": 1.2448286052009452 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-11-15", - "generation": 1, - "base_model": "vonjack/SmolLM2-135M-Merged (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.135, - "co2_cost": 0.6910205312158826 - } - }, - { - "id": "vonjack/SmolLM2-360M-Merged_bfloat16_32bceedf56b29a4a9fdd459a36fbc7fae5e274c8_True", - "model": { - "name": "vonjack/SmolLM2-360M-Merged", - "sha": "32bceedf56b29a4a9fdd459a36fbc7fae5e274c8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.294376590792457, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.32058715319795916, - "normalized_score": 32.05871531979592 - }, - "bbh": { - "name": "BBH", - "value": 0.31548533684955926, - "normalized_score": 4.741734006734007 - }, - "math": { - "name": "MATH Level 5", - "value": 0.017371601208459216, - "normalized_score": 1.7371601208459215 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2558724832214765, - "normalized_score": 0.7829977628635317 - }, - "musr": { - "name": "MUSR", - "value": 0.3527291666666667, - "normalized_score": 3.357812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10979055851063829, - "normalized_score": 1.087839834515365 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-15", - "submission_date": "2024-11-15", - "generation": 1, - "base_model": "vonjack/SmolLM2-360M-Merged (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 0.362, - "co2_cost": 0.7714837221663239 - } - }, - { - "id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored_float16_baa7b3899e85af4b2f02b01fd93f203872140d27_False", - "model": { - "name": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", - "sha": "baa7b3899e85af4b2f02b01fd93f203872140d27", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.621994458789136, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.38840609582574237, - "normalized_score": 38.84060958257423 - }, - "bbh": { - "name": "BBH", - "value": 0.5301525050503222, - "normalized_score": 33.858639234912964 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.4639479166666667, - "normalized_score": 18.493489583333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3343583776595745, - "normalized_score": 26.03981973995272 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2023-12-14", - "submission_date": "2024-10-11", - "generation": 0, - "base_model": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", - "hub_license": "apache-2.0", - "hub_hearts": 36, - "params_billions": 10.732, - "co2_cost": 1.6039422180157412 - } - }, - { - "id": "wanlige/li-14b-v0.4_bfloat16_28003038d56fc3a65f3d807e8c4a527b437075dc_True", - "model": { - "name": "wanlige/li-14b-v0.4", - "sha": "28003038d56fc3a65f3d807e8c4a527b437075dc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 43.65996249118637, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.813279875175645, - "normalized_score": 81.32798751756451 - }, - "bbh": { - "name": "BBH", - "value": 0.6544457993364277, - "normalized_score": 50.384177102490106 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5574018126888217, - "normalized_score": 55.740181268882175 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3389261744966443, - "normalized_score": 11.85682326621924 - }, - "musr": { - "name": "MUSR", - "value": 0.446, - "normalized_score": 16.349999999999998 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5167054521276596, - "normalized_score": 46.30060579196218 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "wanlige/li-14b-v0.4 (Merge)", - "hub_license": "", - "hub_hearts": 15, - "params_billions": 14.77, - "co2_cost": 4.999544646290045 - } - }, - { - "id": "wanlige/li-14b-v0.4-slerp_float16_7ce44a61559fd66cb2eeace825f22321fc9ce269_False", - "model": { - "name": "wanlige/li-14b-v0.4-slerp", - "sha": "7ce44a61559fd66cb2eeace825f22321fc9ce269", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 37.79262489578497, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4605967721201967, - "normalized_score": 46.059677212019665 - }, - "bbh": { - "name": "BBH", - "value": 0.6587180444175935, - "normalized_score": 51.046628177994485 - }, - "math": { - "name": "MATH Level 5", - "value": 0.41918429003021146, - "normalized_score": 41.918429003021146 - }, - "gpqa": { - "name": "GPQA", - "value": 0.4001677852348993, - "normalized_score": 20.022371364653242 - }, - "musr": { - "name": "MUSR", - "value": 0.47675, - "normalized_score": 19.127083333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5372340425531915, - "normalized_score": 48.58156028368795 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "wanlige/li-14b-v0.4-slerp (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 14.766, - "co2_cost": 1.9885832311078941 - } - }, - { - "id": "wanlige/li-14b-v0.4-slerp0.1_bfloat16_7ce44a61559fd66cb2eeace825f22321fc9ce269_True", - "model": { - "name": "wanlige/li-14b-v0.4-slerp0.1", - "sha": "7ce44a61559fd66cb2eeace825f22321fc9ce269", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 42.906109293180656, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7922722819895655, - "normalized_score": 79.22722819895654 - }, - "bbh": { - "name": "BBH", - "value": 0.6571741435852609, - "normalized_score": 50.88127296750732 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5332326283987915, - "normalized_score": 53.32326283987915 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35906040268456374, - "normalized_score": 14.541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.4206666666666667, - "normalized_score": 11.750000000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5294215425531915, - "normalized_score": 47.71350472813239 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-24", - "submission_date": "2025-02-26", - "generation": 1, - "base_model": "wanlige/li-14b-v0.4-slerp0.1 (Merge)", - "hub_license": "", - "hub_hearts": 6, - "params_billions": 14.766, - "co2_cost": 1.5949348623708084 - } - }, - { - "id": "wannaphong/KhanomTanLLM-Instruct_bfloat16_351239c92c0ff3304d1dd98fdf4ac054a8c1acc3_True", - "model": { - "name": "wannaphong/KhanomTanLLM-Instruct", - "sha": "351239c92c0ff3304d1dd98fdf4ac054a8c1acc3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 4.8192843205392455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16211762567764643, - "normalized_score": 16.211762567764644 - }, - "bbh": { - "name": "BBH", - "value": 0.30931233392513263, - "normalized_score": 3.9448660598049243 - }, - "math": { - "name": "MATH Level 5", - "value": 0.013595166163141994, - "normalized_score": 1.3595166163141994 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2634228187919463, - "normalized_score": 1.7897091722595053 - }, - "musr": { - "name": "MUSR", - "value": 0.37006249999999996, - "normalized_score": 4.291145833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1118683510638298, - "normalized_score": 1.3187056737588652 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-24", - "submission_date": "2024-08-29", - "generation": 0, - "base_model": "wannaphong/KhanomTanLLM-Instruct", - "hub_license": "apache-2.0", - "hub_hearts": 3, - "params_billions": 3.447, - "co2_cost": 0.8034612656682864 - } - }, - { - "id": "waqasali1707/Beast-Soul-new_bfloat16_a23d68c4556d91a129de3f8fd8b9e0ff0890f4cc_False", - "model": { - "name": "waqasali1707/Beast-Soul-new", - "sha": "a23d68c4556d91a129de3f8fd8b9e0ff0890f4cc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 22.10838822353402, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5029865202108184, - "normalized_score": 50.298652021081836 - }, - "bbh": { - "name": "BBH", - "value": 0.522494907014536, - "normalized_score": 33.044262388969805 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0702416918429003, - "normalized_score": 7.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.4485625, - "normalized_score": 14.503645833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3107546542553192, - "normalized_score": 23.417183806146575 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-07", - "submission_date": "2024-08-07", - "generation": 1, - "base_model": "waqasali1707/Beast-Soul-new (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 1.2737761309085094 - } - }, - { - "id": "wave-on-discord/qwent-7b_bfloat16_40000e76d2a4d0ad054aff9fe873c5beb0e4925e_False", - "model": { - "name": "wave-on-discord/qwent-7b", - "sha": "40000e76d2a4d0ad054aff9fe873c5beb0e4925e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 8.797033099146908, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.20148539209297997, - "normalized_score": 20.148539209297997 - }, - "bbh": { - "name": "BBH", - "value": 0.4228103286118343, - "normalized_score": 18.066398100675865 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0037764350453172208, - "normalized_score": 0.3776435045317221 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2651006711409396, - "normalized_score": 2.0134228187919474 - }, - "musr": { - "name": "MUSR", - "value": 0.38165625000000003, - "normalized_score": 5.473697916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.16032247340425532, - "normalized_score": 6.702497044917257 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 1, - "base_model": "wave-on-discord/qwent-7b (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 2.646991877058367 - } - }, - { - "id": "weathermanj/Menda-3B-500_float16_aff308a2ed453aa67e059bdf16a9eba2c72f2497_True", - "model": { - "name": "weathermanj/Menda-3B-500", - "sha": "aff308a2ed453aa67e059bdf16a9eba2c72f2497", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.910059821789428, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6353021095138676, - "normalized_score": 63.53021095138676 - }, - "bbh": { - "name": "BBH", - "value": 0.4766312519942703, - "normalized_score": 26.596424541570496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3723564954682779, - "normalized_score": 37.235649546827794 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.39679166666666665, - "normalized_score": 7.565625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3474900265957447, - "normalized_score": 27.498891843971627 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "weathermanj/Menda-3B-500", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7291662157880165 - } - }, - { - "id": "weathermanj/Menda-3b-750_float16_0aeddae1b5f658ff21023e134438c030e90955de_True", - "model": { - "name": "weathermanj/Menda-3b-750", - "sha": "0aeddae1b5f658ff21023e134438c030e90955de", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.833449062404906, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6335035483627884, - "normalized_score": 63.350354836278854 - }, - "bbh": { - "name": "BBH", - "value": 0.4736825577251204, - "normalized_score": 26.375984192530563 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3716012084592145, - "normalized_score": 37.160120845921455 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.39418749999999997, - "normalized_score": 7.240104166666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3505651595744681, - "normalized_score": 27.840573286052013 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-09", - "submission_date": "2025-03-09", - "generation": 0, - "base_model": "weathermanj/Menda-3b-750", - "hub_license": "other", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 0.7407846285793263 - } - }, - { - "id": "weathermanj/Menda-3b-Optim-100_float16_fdf027643cafe4c8b88368928eab13a366a4c546_True", - "model": { - "name": "weathermanj/Menda-3b-Optim-100", - "sha": "fdf027643cafe4c8b88368928eab13a366a4c546", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.957516413922587, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6398234462337709, - "normalized_score": 63.98234462337709 - }, - "bbh": { - "name": "BBH", - "value": 0.47348022177793836, - "normalized_score": 26.024032164105297 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3716012084592145, - "normalized_score": 37.160120845921455 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.39930208333333334, - "normalized_score": 7.979427083333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3460771276595745, - "normalized_score": 27.341903073286055 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "weathermanj/Menda-3b-Optim-100", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.7290687191951919 - } - }, - { - "id": "weathermanj/Menda-3b-Optim-200_float16_3328a697d8dc4d1421119475f5c56bd3ede751d4_True", - "model": { - "name": "weathermanj/Menda-3b-Optim-200", - "sha": "3328a697d8dc4d1421119475f5c56bd3ede751d4", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 27.967747014863694, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6374752323834094, - "normalized_score": 63.74752323834093 - }, - "bbh": { - "name": "BBH", - "value": 0.47460604908284837, - "normalized_score": 26.07213126581908 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3731117824773414, - "normalized_score": 37.31117824773414 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2827181208053691, - "normalized_score": 4.36241610738255 - }, - "musr": { - "name": "MUSR", - "value": 0.40330208333333334, - "normalized_score": 8.712760416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3484042553191489, - "normalized_score": 27.60047281323877 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-10", - "submission_date": "2025-03-10", - "generation": 0, - "base_model": "weathermanj/Menda-3b-Optim-200", - "hub_license": "other", - "hub_hearts": 0, - "params_billions": 3.086, - "co2_cost": 0.694796800217475 - } - }, - { - "id": "win10/ArliAI-RPMax-v1.3-merge-13.3B_bfloat16_4d3ed351827f1afc1652e13aafeb1eae79b8f562_True", - "model": { - "name": "win10/ArliAI-RPMax-v1.3-merge-13.3B", - "sha": "4d3ed351827f1afc1652e13aafeb1eae79b8f562", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.531629526309775, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.3038260703821416, - "normalized_score": 30.38260703821416 - }, - "bbh": { - "name": "BBH", - "value": 0.4581388671914119, - "normalized_score": 23.029799582073213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.4325104166666667, - "normalized_score": 14.16380208333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.31998005319148937, - "normalized_score": 24.442228132387704 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-16", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "win10/ArliAI-RPMax-v1.3-merge-13.3B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 13.265, - "co2_cost": 2.9026103780280716 - } - }, - { - "id": "win10/Breeze-13B-32k-Instruct-v1_0_bfloat16_220c957cf5d9c534a4ef75c11a18221c461de40a_True", - "model": { - "name": "win10/Breeze-13B-32k-Instruct-v1_0", - "sha": "220c957cf5d9c534a4ef75c11a18221c461de40a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 15.461558427858103, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.35843118481185476, - "normalized_score": 35.84311848118548 - }, - "bbh": { - "name": "BBH", - "value": 0.46112304746712934, - "normalized_score": 25.258698638977545 - }, - "math": { - "name": "MATH Level 5", - "value": 0.01283987915407855, - "normalized_score": 1.283987915407855 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26426174496644295, - "normalized_score": 1.9015659955257262 - }, - "musr": { - "name": "MUSR", - "value": 0.42019791666666667, - "normalized_score": 11.058072916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2568151595744681, - "normalized_score": 17.423906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "win10/Breeze-13B-32k-Instruct-v1_0", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 12.726, - "co2_cost": 2.897622042858872 - } - }, - { - "id": "win10/EVA-Norns-Qwen2.5-v0.1_bfloat16_90c3ca66e700b4a7d2c509634f9b9748a2e4c3ab_True", - "model": { - "name": "win10/EVA-Norns-Qwen2.5-v0.1", - "sha": "90c3ca66e700b4a7d2c509634f9b9748a2e4c3ab", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.432796717883942, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6219630580193884, - "normalized_score": 62.19630580193885 - }, - "bbh": { - "name": "BBH", - "value": 0.507240838017382, - "normalized_score": 30.060941501467855 - }, - "math": { - "name": "MATH Level 5", - "value": 0.26132930513595165, - "normalized_score": 26.132930513595166 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.40451041666666665, - "normalized_score": 8.563802083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3425033244680851, - "normalized_score": 26.944813829787233 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "win10/EVA-Norns-Qwen2.5-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 7.616, - "co2_cost": 1.313321814515779 - } - }, - { - "id": "win10/Llama-3.2-3B-Instruct-24-9-29_float16_4defb10e2415111abb873d695dd40c387c1d6d57_True", - "model": { - "name": "win10/Llama-3.2-3B-Instruct-24-9-29", - "sha": "4defb10e2415111abb873d695dd40c387c1d6d57", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.004698090064014, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7332211864519476, - "normalized_score": 73.32211864519475 - }, - "bbh": { - "name": "BBH", - "value": 0.4614234982167829, - "normalized_score": 24.196425775209622 - }, - "math": { - "name": "MATH Level 5", - "value": 0.17069486404833836, - "normalized_score": 17.069486404833835 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.35552083333333334, - "normalized_score": 1.4401041666666685 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3228058510638298, - "normalized_score": 24.756205673758867 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-10-11", - "generation": 2, - "base_model": "meta-llama/Llama-3.2-3B-Instruct", - "hub_license": "llama3.2", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 1.4272112484883925 - } - }, - { - "id": "win10/Norns-Qwen2.5-12B_bfloat16_464793295c8633a95e6faedad24dfa8f0fd35663_True", - "model": { - "name": "win10/Norns-Qwen2.5-12B", - "sha": "464793295c8633a95e6faedad24dfa8f0fd35663", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 17.708127562863925, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.48969733640074997, - "normalized_score": 48.969733640075 - }, - "bbh": { - "name": "BBH", - "value": 0.46189201103923744, - "normalized_score": 23.769257476895735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.3554895833333333, - "normalized_score": 2.2028645833333327 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2660405585106383, - "normalized_score": 18.44895094562648 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-17", - "generation": 1, - "base_model": "win10/Norns-Qwen2.5-12B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 12.277, - "co2_cost": 3.2459439428137693 - } - }, - { - "id": "win10/Norns-Qwen2.5-7B_bfloat16_148d9156f734a8050812892879cf13d1ca01f137_True", - "model": { - "name": "win10/Norns-Qwen2.5-7B", - "sha": "148d9156f734a8050812892879cf13d1ca01f137", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.380789509358355, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6122211288270678, - "normalized_score": 61.22211288270678 - }, - "bbh": { - "name": "BBH", - "value": 0.5072887832228614, - "normalized_score": 30.250415044309648 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2628398791540785, - "normalized_score": 26.283987915407852 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28439597315436244, - "normalized_score": 4.5861297539149914 - }, - "musr": { - "name": "MUSR", - "value": 0.40847916666666667, - "normalized_score": 9.1265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34133976063829785, - "normalized_score": 26.81552895981087 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-17", - "submission_date": "2024-11-18", - "generation": 1, - "base_model": "win10/Norns-Qwen2.5-7B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 7.616, - "co2_cost": 1.299827184284067 - } - }, - { - "id": "win10/Qwen2.5-2B-Instruct_bfloat16_6cc7fca3447d50772978d2d7dec255abdc73d54b_False", - "model": { - "name": "win10/Qwen2.5-2B-Instruct", - "sha": "6cc7fca3447d50772978d2d7dec255abdc73d54b", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 10.57067746533664, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22728914834860392, - "normalized_score": 22.728914834860394 - }, - "bbh": { - "name": "BBH", - "value": 0.3705905854806977, - "normalized_score": 12.07194568570475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.022658610271903322, - "normalized_score": 2.2658610271903323 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2676174496644295, - "normalized_score": 2.348993288590602 - }, - "musr": { - "name": "MUSR", - "value": 0.43784375000000003, - "normalized_score": 13.63046875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.19340093085106383, - "normalized_score": 10.37788120567376 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "win10/Qwen2.5-2B-Instruct (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 2.9, - "co2_cost": 2.052183070259754 - } - }, - { - "id": "win10/llama3-13.45b-Instruct_bfloat16_94cc0f415e355c6d3d47168a6ff5239ca586904a_True", - "model": { - "name": "win10/llama3-13.45b-Instruct", - "sha": "94cc0f415e355c6d3d47168a6ff5239ca586904a", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.340222099927356, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4144348107465968, - "normalized_score": 41.44348107465968 - }, - "bbh": { - "name": "BBH", - "value": 0.486541523346714, - "normalized_score": 26.67569043948038 - }, - "math": { - "name": "MATH Level 5", - "value": 0.02416918429003021, - "normalized_score": 2.416918429003021 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.38476041666666666, - "normalized_score": 6.3283854166666655 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3345246010638298, - "normalized_score": 26.0582890070922 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-09", - "submission_date": "2024-06-26", - "generation": 1, - "base_model": "win10/llama3-13.45b-Instruct (Merge)", - "hub_license": "llama3", - "hub_hearts": 1, - "params_billions": 13.265, - "co2_cost": 4.273069289198686 - } - }, - { - "id": "win10/miscii-14b-1M-0128_bfloat16_dfe9d4fbb26228489f18691f045ac9ef309dc3bd_False", - "model": { - "name": "win10/miscii-14b-1M-0128", - "sha": "dfe9d4fbb26228489f18691f045ac9ef309dc3bd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 35.33959615681345, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4180818007331658, - "normalized_score": 41.80818007331658 - }, - "bbh": { - "name": "BBH", - "value": 0.5741994518517665, - "normalized_score": 37.27434361892823 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4773413897280967, - "normalized_score": 47.73413897280967 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3825503355704698, - "normalized_score": 17.67337807606264 - }, - "musr": { - "name": "MUSR", - "value": 0.5431041666666667, - "normalized_score": 28.7546875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44913563829787234, - "normalized_score": 38.79284869976359 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-30", - "generation": 1, - "base_model": "win10/miscii-14b-1M-0128 (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 2, - "params_billions": 14.766, - "co2_cost": 3.7769663221798067 - } - }, - { - "id": "winglian/Llama-3-8b-64k-PoSE_bfloat16_5481d9b74a3ec5a95789673e194c8ff86e2bc2bc_True", - "model": { - "name": "winglian/Llama-3-8b-64k-PoSE", - "sha": "5481d9b74a3ec5a95789673e194c8ff86e2bc2bc", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 11.143207450567283, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.28569085581811815, - "normalized_score": 28.569085581811816 - }, - "bbh": { - "name": "BBH", - "value": 0.37021796005121793, - "normalized_score": 13.30731679540503 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04154078549848943, - "normalized_score": 4.1540785498489425 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.33955208333333337, - "normalized_score": 3.077343750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2466755319148936, - "normalized_score": 16.297281323877066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-24", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "winglian/Llama-3-8b-64k-PoSE", - "hub_license": "", - "hub_hearts": 75, - "params_billions": 8.03, - "co2_cost": 1.8220424320327848 - } - }, - { - "id": "winglian/llama-3-8b-256k-PoSE_bfloat16_93e7b0b6433c96583ffcef3bc47203e6fdcbbe8b_True", - "model": { - "name": "winglian/llama-3-8b-256k-PoSE", - "sha": "93e7b0b6433c96583ffcef3bc47203e6fdcbbe8b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 6.633243929125161, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2909114482905358, - "normalized_score": 29.091144829053576 - }, - "bbh": { - "name": "BBH", - "value": 0.3156583397739859, - "normalized_score": 5.502848923020156 - }, - "math": { - "name": "MATH Level 5", - "value": 0.019637462235649546, - "normalized_score": 1.9637462235649545 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2575503355704698, - "normalized_score": 1.0067114093959737 - }, - "musr": { - "name": "MUSR", - "value": 0.33155208333333336, - "normalized_score": 0.9440104166666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.1116190159574468, - "normalized_score": 1.2910017730496441 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-06-26", - "generation": 0, - "base_model": "winglian/llama-3-8b-256k-PoSE", - "hub_license": "", - "hub_hearts": 42, - "params_billions": 8.03, - "co2_cost": 2.101445621651442 - } - }, - { - "id": "wzhouad/gemma-2-9b-it-WPO-HB_bfloat16_5934cb2faf589341e96e2e79aec82b2d4b7be252_True", - "model": { - "name": "wzhouad/gemma-2-9b-it-WPO-HB", - "sha": "5934cb2faf589341e96e2e79aec82b2d4b7be252", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 24.97756909058171, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5437029304467702, - "normalized_score": 54.37029304467701 - }, - "bbh": { - "name": "BBH", - "value": 0.5628624376751974, - "normalized_score": 36.66169583479774 - }, - "math": { - "name": "MATH Level 5", - "value": 0.15332326283987915, - "normalized_score": 15.332326283987916 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3498322147651007, - "normalized_score": 13.31096196868009 - }, - "musr": { - "name": "MUSR", - "value": 0.3674583333333333, - "normalized_score": 3.9656250000000006 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.33602061170212766, - "normalized_score": 26.22451241134752 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-08", - "submission_date": "2025-01-07", - "generation": 2, - "base_model": "google/gemma-2-9b", - "hub_license": "", - "hub_hearts": 34, - "params_billions": 9.242, - "co2_cost": 5.179894574744527 - } - }, - { - "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B_bfloat16_a5dfd03848e2d1accf4e3de52fa565d27f4bcf99_True", - "model": { - "name": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", - "sha": "a5dfd03848e2d1accf4e3de52fa565d27f4bcf99", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 26.028524707214856, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4436107306391486, - "normalized_score": 44.36107306391486 - }, - "bbh": { - "name": "BBH", - "value": 0.45690468424066283, - "normalized_score": 22.725259750809624 - }, - "math": { - "name": "MATH Level 5", - "value": 0.27794561933534745, - "normalized_score": 27.794561933534744 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28523489932885904, - "normalized_score": 4.697986577181204 - }, - "musr": { - "name": "MUSR", - "value": 0.47396875000000005, - "normalized_score": 19.04609375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4379155585106383, - "normalized_score": 37.54617316784869 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-28", - "submission_date": "2025-01-29", - "generation": 1, - "base_model": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 14.77, - "co2_cost": 4.47491141011125 - } - }, - { - "id": "xMaulana/FinMatcha-3B-Instruct_float16_be2c0c04fc4dc3fb93631e3c663721da92fea8fc_True", - "model": { - "name": "xMaulana/FinMatcha-3B-Instruct", - "sha": "be2c0c04fc4dc3fb93631e3c663721da92fea8fc", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.14212432324268, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7548283000217202, - "normalized_score": 75.48283000217202 - }, - "bbh": { - "name": "BBH", - "value": 0.453555265188897, - "normalized_score": 23.191022969435476 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14350453172205438, - "normalized_score": 14.350453172205437 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.36333333333333334, - "normalized_score": 5.016666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3181515957446808, - "normalized_score": 24.23906619385342 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "xMaulana/FinMatcha-3B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 3.213, - "co2_cost": 7.3131482898189555 - } - }, - { - "id": "xinchen9/Llama3.1_8B_Instruct_CoT_bfloat16_cab1b33ddff08de11c5daea8ae079d126d503d8b_False", - "model": { - "name": "xinchen9/Llama3.1_8B_Instruct_CoT", - "sha": "cab1b33ddff08de11c5daea8ae079d126d503d8b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 16.316624597778073, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2973565694579272, - "normalized_score": 29.73565694579272 - }, - "bbh": { - "name": "BBH", - "value": 0.4398206147249642, - "normalized_score": 21.14286611806116 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.43706249999999996, - "normalized_score": 13.166145833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2878989361702128, - "normalized_score": 20.877659574468087 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-16", - "submission_date": "2024-09-19", - "generation": 0, - "base_model": "xinchen9/Llama3.1_8B_Instruct_CoT", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.8041996739094426 - } - }, - { - "id": "xinchen9/Llama3.1_CoT_bfloat16_3cb467f51a59ff163bb942fcde3ef60573c12b79_True", - "model": { - "name": "xinchen9/Llama3.1_CoT", - "sha": "3cb467f51a59ff163bb942fcde3ef60573c12b79", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.741515006547855, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.22461624046419057, - "normalized_score": 22.461624046419058 - }, - "bbh": { - "name": "BBH", - "value": 0.43410143664277245, - "normalized_score": 19.899123541883053 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.43045833333333333, - "normalized_score": 11.773958333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2738530585106383, - "normalized_score": 19.317006501182032 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-04", - "submission_date": "2024-09-06", - "generation": 0, - "base_model": "xinchen9/Llama3.1_CoT", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9001976883287475 - } - }, - { - "id": "xinchen9/Llama3.1_CoT_V1_bfloat16_c5ed4b8bfc364ebae1843af14799818551f5251f_False", - "model": { - "name": "xinchen9/Llama3.1_CoT_V1", - "sha": "c5ed4b8bfc364ebae1843af14799818551f5251f", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 14.734826092960331, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2452991396162183, - "normalized_score": 24.52991396162183 - }, - "bbh": { - "name": "BBH", - "value": 0.4376001847280673, - "normalized_score": 20.166003338515427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03323262839879154, - "normalized_score": 3.3232628398791544 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.45721875, - "normalized_score": 16.419010416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2805019946808511, - "normalized_score": 20.05577718676123 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-06", - "submission_date": "2024-09-07", - "generation": 0, - "base_model": "xinchen9/Llama3.1_CoT_V1", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.8211828454337167 - } - }, - { - "id": "xinchen9/Mistral-7B-CoT_bfloat16_9a3c8103dac20d5497d1b8fc041bb5125ff4dc00_False", - "model": { - "name": "xinchen9/Mistral-7B-CoT", - "sha": "9a3c8103dac20d5497d1b8fc041bb5125ff4dc00", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 11.26567589705759, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2783470081605695, - "normalized_score": 27.83470081605695 - }, - "bbh": { - "name": "BBH", - "value": 0.38726762098069667, - "normalized_score": 14.80619341451162 - }, - "math": { - "name": "MATH Level 5", - "value": 0.024924471299093656, - "normalized_score": 2.492447129909366 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24916107382550334, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.3994270833333333, - "normalized_score": 8.195052083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2283909574468085, - "normalized_score": 14.265661938534278 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-09", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "xinchen9/Mistral-7B-CoT", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 2.515921799473043 - } - }, - { - "id": "xinchen9/llama3-b8-ft-dis_float16_e4da730f28f79543262de37908943c35f8df81fe_False", - "model": { - "name": "xinchen9/llama3-b8-ft-dis", - "sha": "e4da730f28f79543262de37908943c35f8df81fe", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 13.97349198756153, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.154598687039278, - "normalized_score": 15.459868703927802 - }, - "bbh": { - "name": "BBH", - "value": 0.4625789691224553, - "normalized_score": 24.72745698442778 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03927492447129909, - "normalized_score": 3.927492447129909 - }, - "gpqa": { - "name": "GPQA", - "value": 0.31291946308724833, - "normalized_score": 8.389261744966444 - }, - "musr": { - "name": "MUSR", - "value": 0.365375, - "normalized_score": 6.405208333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3243849734042553, - "normalized_score": 24.931663711583923 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-06-28", - "submission_date": "2024-07-11", - "generation": 0, - "base_model": "xinchen9/llama3-b8-ft-dis", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.1246535608556596 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table_bfloat16_c083d6796f54f66b4cec2261657a02801c761093_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", - "sha": "c083d6796f54f66b4cec2261657a02801c761093", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.823848851733704, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6374752323834094, - "normalized_score": 63.74752323834093 - }, - "bbh": { - "name": "BBH", - "value": 0.4912273915261041, - "normalized_score": 27.42282120153998 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09214501510574018, - "normalized_score": 9.214501510574017 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.38199999999999995, - "normalized_score": 5.483333333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3686003989361702, - "normalized_score": 29.844488770685572 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2484614630480797 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table_bfloat16_5416d34b5243559914a377ee9d95ce4830bf8dba_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", - "sha": "5416d34b5243559914a377ee9d95ce4830bf8dba", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.502405108853072, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7274509412802475, - "normalized_score": 72.74509412802476 - }, - "bbh": { - "name": "BBH", - "value": 0.5056858683165713, - "normalized_score": 29.398353220629613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.38190624999999995, - "normalized_score": 5.104947916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3696808510638298, - "normalized_score": 29.964539007092196 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5005271922689942 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table_bfloat16_235204157d7fac0d64fa609d5aee3cebb49ccd11_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", - "sha": "235204157d7fac0d64fa609d5aee3cebb49ccd11", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.639173242532337, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6568593553992297, - "normalized_score": 65.68593553992297 - }, - "bbh": { - "name": "BBH", - "value": 0.49518319163897667, - "normalized_score": 27.695199510550037 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35939583333333336, - "normalized_score": 2.291145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37017952127659576, - "normalized_score": 30.019946808510632 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3434827058247885 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table_bfloat16_9db00cbbba84453b18956fcc76f264f94a205955_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", - "sha": "9db00cbbba84453b18956fcc76f264f94a205955", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.073734600438332, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6620799478716473, - "normalized_score": 66.20799478716472 - }, - "bbh": { - "name": "BBH", - "value": 0.500449109241973, - "normalized_score": 28.508587310114308 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08610271903323263, - "normalized_score": 8.610271903323262 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3805416666666666, - "normalized_score": 5.001041666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3599567819148936, - "normalized_score": 28.88408687943262 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.438456591582074 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001_bfloat16_1062757826de031a4ae82277e6e737e19e82e514_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", - "sha": "1062757826de031a4ae82277e6e737e19e82e514", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.42453458288846, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6042278931014153, - "normalized_score": 60.42278931014154 - }, - "bbh": { - "name": "BBH", - "value": 0.4936062924421171, - "normalized_score": 27.61371406788812 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3793333333333333, - "normalized_score": 5.216666666666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.370844414893617, - "normalized_score": 30.093823877068555 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2300050028863556 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002_bfloat16_e5d2f179b4a7bd851dcf2b7db6358b13001bf1af_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", - "sha": "e5d2f179b4a7bd851dcf2b7db6358b13001bf1af", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.203175690452312, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7131876753680235, - "normalized_score": 71.31876753680235 - }, - "bbh": { - "name": "BBH", - "value": 0.4996376240562969, - "normalized_score": 28.574878539626724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08534743202416918, - "normalized_score": 8.534743202416918 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3872083333333333, - "normalized_score": 6.067708333333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3664394946808511, - "normalized_score": 29.604388297872337 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6829369566351955 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001_bfloat16_0e319ad47ed2b2636b72d07ee9b32657e1e50412_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", - "sha": "0e319ad47ed2b2636b72d07ee9b32657e1e50412", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.791088842864806, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.594710922574325, - "normalized_score": 59.4710922574325 - }, - "bbh": { - "name": "BBH", - "value": 0.48992211803775065, - "normalized_score": 26.943904089240508 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10725075528700906, - "normalized_score": 10.725075528700906 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.35809374999999993, - "normalized_score": 2.328385416666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37042885638297873, - "normalized_score": 30.04765070921986 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.359682794746034 - } - }, - { - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002_bfloat16_0877f2458ea667edcf9213383df41294c788190f_True", - "model": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", - "sha": "0877f2458ea667edcf9213383df41294c788190f", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.12157632033239, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6453188650558297, - "normalized_score": 64.53188650558296 - }, - "bbh": { - "name": "BBH", - "value": 0.4951075713814987, - "normalized_score": 28.046977965255564 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.393875, - "normalized_score": 7.334375000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3529753989361702, - "normalized_score": 28.10837765957446 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-30", - "submission_date": "2024-10-01", - "generation": 0, - "base_model": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.5382385227104545 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table_bfloat16_d2b87100e5ba3215fddbd308bb17b7bf12fe6c9e_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", - "sha": "d2b87100e5ba3215fddbd308bb17b7bf12fe6c9e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 21.357658866973082, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.575601625908146, - "normalized_score": 57.5601625908146 - }, - "bbh": { - "name": "BBH", - "value": 0.4901206199104098, - "normalized_score": 26.866404089240515 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09969788519637462, - "normalized_score": 9.969788519637463 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.36596874999999995, - "normalized_score": 2.9794270833333325 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36585771276595747, - "normalized_score": 29.539745862884164 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9728607299994527 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table_bfloat16_19a48ccf5ea463afbbbc61d650b8fb63ff2d94c7_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", - "sha": "19a48ccf5ea463afbbbc61d650b8fb63ff2d94c7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.132871091204738, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7034457461757027, - "normalized_score": 70.34457461757027 - }, - "bbh": { - "name": "BBH", - "value": 0.5091868512191421, - "normalized_score": 29.73123940180749 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09667673716012085, - "normalized_score": 9.667673716012084 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37390624999999994, - "normalized_score": 3.904947916666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3692652925531915, - "normalized_score": 29.918365839243506 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1803061762843055 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table_bfloat16_0fe230b3432fb2b0f89942d7926291a4dbeb2820_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", - "sha": "0fe230b3432fb2b0f89942d7926291a4dbeb2820", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.08358104722241, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6023794642659255, - "normalized_score": 60.23794642659256 - }, - "bbh": { - "name": "BBH", - "value": 0.49695315361511977, - "normalized_score": 27.892403263090213 - }, - "math": { - "name": "MATH Level 5", - "value": 0.1042296072507553, - "normalized_score": 10.42296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.36736458333333327, - "normalized_score": 3.187239583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3657746010638298, - "normalized_score": 29.53051122931442 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3310428701238795 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table_bfloat16_d1e19da1029f2d4d45de015754bc52dcb1ea5570_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", - "sha": "d1e19da1029f2d4d45de015754bc52dcb1ea5570", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.235948102250973, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6620300801872365, - "normalized_score": 66.20300801872366 - }, - "bbh": { - "name": "BBH", - "value": 0.49999369392208165, - "normalized_score": 28.43982384277912 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.38181249999999994, - "normalized_score": 5.1265624999999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3614527925531915, - "normalized_score": 29.050310283687946 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1768382510919406 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001_bfloat16_a478aa202c59773eba615ae37feb4cc750757695_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", - "sha": "a478aa202c59773eba615ae37feb4cc750757695", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.90534117256461, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5336363072203975, - "normalized_score": 53.363630722039744 - }, - "bbh": { - "name": "BBH", - "value": 0.49148727192613517, - "normalized_score": 27.145373836403248 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37796874999999996, - "normalized_score": 4.712760416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3624501329787234, - "normalized_score": 29.161125886524825 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1728857969296893 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002_float16_8ef9ef7e2bf522e707a7b090af55f2ec1eafd4b9_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", - "sha": "8ef9ef7e2bf522e707a7b090af55f2ec1eafd4b9", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.550849013748405, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6851609285584471, - "normalized_score": 68.5160928558447 - }, - "bbh": { - "name": "BBH", - "value": 0.507516320435292, - "normalized_score": 29.740550305634912 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07175226586102719, - "normalized_score": 7.175226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25838926174496646, - "normalized_score": 1.1185682326621946 - }, - "musr": { - "name": "MUSR", - "value": 0.3831770833333333, - "normalized_score": 5.630468750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3621176861702128, - "normalized_score": 29.12418735224587 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.7389475842988935 - } - }, - { - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001_bfloat16_86673872245ad902f8d466bdc20edae9c115b965_True", - "model": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", - "sha": "86673872245ad902f8d466bdc20edae9c115b965", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.7748680596979, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5482242671666733, - "normalized_score": 54.82242671666733 - }, - "bbh": { - "name": "BBH", - "value": 0.48871746894288526, - "normalized_score": 26.839803365680336 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3632708333333334, - "normalized_score": 2.9421874999999997 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36710438829787234, - "normalized_score": 29.678265366430256 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-28", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3501876436837452 - } - }, - { - "id": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table_float16_abb3afe2b0398b24ed823b0124c8a72d354487bd_True", - "model": { - "name": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", - "sha": "abb3afe2b0398b24ed823b0124c8a72d354487bd", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.684961068627572, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6900069593124022, - "normalized_score": 69.00069593124022 - }, - "bbh": { - "name": "BBH", - "value": 0.4978456981516493, - "normalized_score": 28.11988708608538 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3673333333333333, - "normalized_score": 3.0833333333333326 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37159242021276595, - "normalized_score": 30.17693557919622 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-23", - "generation": 0, - "base_model": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 2.0678831259876036 - } - }, - { - "id": "xwen-team/Xwen-7B-Chat_bfloat16_d7318b170105d022ab3c5a5d56e385a838f1fae9_True", - "model": { - "name": "xwen-team/Xwen-7B-Chat", - "sha": "d7318b170105d022ab3c5a5d56e385a838f1fae9", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 31.576751501854343, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6864098370102439, - "normalized_score": 68.6409837010244 - }, - "bbh": { - "name": "BBH", - "value": 0.506762793166296, - "normalized_score": 30.9216327073333 - }, - "math": { - "name": "MATH Level 5", - "value": 0.4509063444108761, - "normalized_score": 45.090634441087616 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2609060402684564, - "normalized_score": 1.4541387024608499 - }, - "musr": { - "name": "MUSR", - "value": 0.3914270833333333, - "normalized_score": 6.795052083333332 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42902260638297873, - "normalized_score": 36.55806737588653 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-31", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "Qwen/Qwen2.5-7B", - "hub_license": "apache-2.0", - "hub_hearts": 32, - "params_billions": 7.616, - "co2_cost": 2.0556345468545887 - } - }, - { - "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B_bfloat16_2a98d9cb91c7aa775acbf5bfe7bb91beb2faf682_True", - "model": { - "name": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", - "sha": "2a98d9cb91c7aa775acbf5bfe7bb91beb2faf682", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 26.404880965062546, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6696487541944263, - "normalized_score": 66.96487541944263 - }, - "bbh": { - "name": "BBH", - "value": 0.5070848048063867, - "normalized_score": 29.437347820739546 - }, - "math": { - "name": "MATH Level 5", - "value": 0.14123867069486404, - "normalized_score": 14.123867069486403 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2911073825503356, - "normalized_score": 5.480984340044745 - }, - "musr": { - "name": "MUSR", - "value": 0.42893749999999997, - "normalized_score": 13.750520833333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35804521276595747, - "normalized_score": 28.671690307328607 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-07-27", - "submission_date": "2024-07-28", - "generation": 0, - "base_model": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", - "hub_license": "llama3.1", - "hub_hearts": 7, - "params_billions": 24.942, - "co2_cost": 4.752369138298316 - } - }, - { - "id": "yam-peleg/Hebrew-Gemma-11B-Instruct_float16_a40259d1efbcac4829ed44d3b589716f615ed362_True", - "model": { - "name": "yam-peleg/Hebrew-Gemma-11B-Instruct", - "sha": "a40259d1efbcac4829ed44d3b589716f615ed362", - "precision": "float16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "GemmaForCausalLM", - "average_score": 14.058232008864843, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30207737691547315, - "normalized_score": 30.207737691547315 - }, - "bbh": { - "name": "BBH", - "value": 0.40357843109818686, - "normalized_score": 16.86274051283721 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06570996978851963, - "normalized_score": 6.570996978851963 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.4088541666666667, - "normalized_score": 9.973437500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25540226063829785, - "normalized_score": 17.266917848699762 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-03-06", - "submission_date": "2024-07-31", - "generation": 0, - "base_model": "yam-peleg/Hebrew-Gemma-11B-Instruct", - "hub_license": "other", - "hub_hearts": 23, - "params_billions": 10.475, - "co2_cost": 3.874534345261228 - } - }, - { - "id": "yam-peleg/Hebrew-Mistral-7B_bfloat16_3d32134b5959492fd7efbbf16395352594bc89f7_False", - "model": { - "name": "yam-peleg/Hebrew-Mistral-7B", - "sha": "3d32134b5959492fd7efbbf16395352594bc89f7", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 13.302117179699644, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.23283443485507344, - "normalized_score": 23.28344348550734 - }, - "bbh": { - "name": "BBH", - "value": 0.43340366992362034, - "normalized_score": 20.176940422218426 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04984894259818731, - "normalized_score": 4.984894259818732 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27936241610738255, - "normalized_score": 3.9149888143176734 - }, - "musr": { - "name": "MUSR", - "value": 0.39765625, - "normalized_score": 7.673697916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.27800864361702127, - "normalized_score": 19.778738179669027 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-04-26", - "submission_date": "2024-07-11", - "generation": 0, - "base_model": "yam-peleg/Hebrew-Mistral-7B", - "hub_license": "apache-2.0", - "hub_hearts": 69, - "params_billions": 7.504, - "co2_cost": 2.0942131773492205 - } - }, - { - "id": "yam-peleg/Hebrew-Mistral-7B-200K_float16_7b51c7b31e3d9e29ea964c579a45233cfad255fe_False", - "model": { - "name": "yam-peleg/Hebrew-Mistral-7B-200K", - "sha": "7b51c7b31e3d9e29ea964c579a45233cfad255fe", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 10.64429135893812, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1855731680829089, - "normalized_score": 18.557316808290892 - }, - "bbh": { - "name": "BBH", - "value": 0.4149272793394017, - "normalized_score": 17.49360317518456 - }, - "math": { - "name": "MATH Level 5", - "value": 0.023413897280966767, - "normalized_score": 2.3413897280966767 - }, - "gpqa": { - "name": "GPQA", - "value": 0.276006711409396, - "normalized_score": 3.467561521252797 - }, - "musr": { - "name": "MUSR", - "value": 0.3764791666666667, - "normalized_score": 4.5265625000000025 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25731382978723405, - "normalized_score": 17.47931442080378 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-05", - "submission_date": "2024-07-11", - "generation": 0, - "base_model": "yam-peleg/Hebrew-Mistral-7B-200K", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 7.504, - "co2_cost": 0.7353117338467755 - } - }, - { - "id": "yam-peleg/Hebrew-Mistral-7B-200K_bfloat16_7b51c7b31e3d9e29ea964c579a45233cfad255fe_True", - "model": { - "name": "yam-peleg/Hebrew-Mistral-7B-200K", - "sha": "7b51c7b31e3d9e29ea964c579a45233cfad255fe", - "precision": "bfloat16", - "type": "continuouslypretrained", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 8.386669075864491, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17698041197356346, - "normalized_score": 17.698041197356346 - }, - "bbh": { - "name": "BBH", - "value": 0.3410500846818921, - "normalized_score": 7.671323719331375 - }, - "math": { - "name": "MATH Level 5", - "value": 0.030966767371601207, - "normalized_score": 3.096676737160121 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.37399999999999994, - "normalized_score": 4.416666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2529089095744681, - "normalized_score": 16.989878841607567 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-05-05", - "submission_date": "2024-08-06", - "generation": 0, - "base_model": "yam-peleg/Hebrew-Mistral-7B-200K", - "hub_license": "apache-2.0", - "hub_hearts": 15, - "params_billions": 7.504, - "co2_cost": 2.6696632132469658 - } - }, - { - "id": "yanng1242/Marcoro14-7B-slerp_float16_187c9df776cb1191a30a2f09737160316f56e875_False", - "model": { - "name": "yanng1242/Marcoro14-7B-slerp", - "sha": "187c9df776cb1191a30a2f09737160316f56e875", - "precision": "float16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "MistralForCausalLM", - "average_score": 21.933478090702422, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4059916576904835, - "normalized_score": 40.59916576904835 - }, - "bbh": { - "name": "BBH", - "value": 0.5251655292981787, - "normalized_score": 32.97528227053496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07477341389728097, - "normalized_score": 7.477341389728097 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3145973154362416, - "normalized_score": 8.612975391498878 - }, - "musr": { - "name": "MUSR", - "value": 0.468625, - "normalized_score": 17.844791666666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3168218085106383, - "normalized_score": 24.09131205673759 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-19", - "submission_date": "2025-01-19", - "generation": 0, - "base_model": "yanng1242/Marcoro14-7B-slerp", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 7.242, - "co2_cost": 0.846102654796972 - } - }, - { - "id": "yasserrmd/Coder-GRPO-3B_float16_1a6217ef44d7eeefdaa10290457502d68233a989_True", - "model": { - "name": "yasserrmd/Coder-GRPO-3B", - "sha": "1a6217ef44d7eeefdaa10290457502d68233a989", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 25.914051108530288, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6207640172520024, - "normalized_score": 62.07640172520024 - }, - "bbh": { - "name": "BBH", - "value": 0.4469120364616385, - "normalized_score": 22.912311342227735 - }, - "math": { - "name": "MATH Level 5", - "value": 0.3202416918429003, - "normalized_score": 32.02416918429003 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.4114583333333333, - "normalized_score": 10.365625000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3197307180851064, - "normalized_score": 24.414524231678485 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-08", - "submission_date": "2025-03-05", - "generation": 1, - "base_model": "yasserrmd/Coder-GRPO-3B (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 1, - "params_billions": 3.086, - "co2_cost": 0.7510040579702157 - } - }, - { - "id": "yasserrmd/Text2SQL-1.5B_bfloat16_aeef22ad5852dcf530b5e012a935a948d46d0e96_False", - "model": { - "name": "yasserrmd/Text2SQL-1.5B", - "sha": "aeef22ad5852dcf530b5e012a935a948d46d0e96", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 13.233972018030462, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2857407235025289, - "normalized_score": 28.574072350252884 - }, - "bbh": { - "name": "BBH", - "value": 0.38577157961565695, - "normalized_score": 13.675719585052784 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06797583081570997, - "normalized_score": 6.797583081570997 - }, - "gpqa": { - "name": "GPQA", - "value": 0.287751677852349, - "normalized_score": 5.033557046979867 - }, - "musr": { - "name": "MUSR", - "value": 0.39423958333333337, - "normalized_score": 10.179947916666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23628656914893617, - "normalized_score": 15.142952127659573 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-06", - "submission_date": "2025-03-06", - "generation": 3, - "base_model": "Qwen/Qwen2.5-Coder-1.5B-Instruct (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 5, - "params_billions": 1.544, - "co2_cost": 0.5655182145088332 - } - }, - { - "id": "ycros/BagelMIsteryTour-v2-8x7B_float16_98a8b319707be3dab1659594da69a37ed8f8c148_True", - "model": { - "name": "ycros/BagelMIsteryTour-v2-8x7B", - "sha": "98a8b319707be3dab1659594da69a37ed8f8c148", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.258614269254906, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.599431730031871, - "normalized_score": 59.9431730031871 - }, - "bbh": { - "name": "BBH", - "value": 0.515923595752544, - "normalized_score": 31.69928662894613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.07854984894259819, - "normalized_score": 7.854984894259818 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30453020134228187, - "normalized_score": 7.270693512304249 - }, - "musr": { - "name": "MUSR", - "value": 0.4202916666666667, - "normalized_score": 11.303125000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.34732380319148937, - "normalized_score": 27.480422576832154 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-19", - "submission_date": "2024-06-28", - "generation": 1, - "base_model": "ycros/BagelMIsteryTour-v2-8x7B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 16, - "params_billions": 46.703, - "co2_cost": 3.64913185802464 - } - }, - { - "id": "ycros/BagelMIsteryTour-v2-8x7B_bfloat16_98a8b319707be3dab1659594da69a37ed8f8c148_True", - "model": { - "name": "ycros/BagelMIsteryTour-v2-8x7B", - "sha": "98a8b319707be3dab1659594da69a37ed8f8c148", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 24.82550730859936, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6262095683896506, - "normalized_score": 62.62095683896506 - }, - "bbh": { - "name": "BBH", - "value": 0.5141943573573103, - "normalized_score": 31.366123015915477 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30788590604026844, - "normalized_score": 7.718120805369126 - }, - "musr": { - "name": "MUSR", - "value": 0.41375, - "normalized_score": 10.31875 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3480718085106383, - "normalized_score": 27.56353427895981 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-01-19", - "submission_date": "2024-08-04", - "generation": 1, - "base_model": "ycros/BagelMIsteryTour-v2-8x7B (Merge)", - "hub_license": "cc-by-nc-4.0", - "hub_hearts": 16, - "params_billions": 46.703, - "co2_cost": 7.238673064292047 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table_bfloat16_97b2d0e790a6fcdf39c34a2043f0818368c7dcb3_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", - "sha": "97b2d0e790a6fcdf39c34a2043f0818368c7dcb3", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.61656457033402, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6708976626462231, - "normalized_score": 67.08976626462231 - }, - "bbh": { - "name": "BBH", - "value": 0.49866134349131935, - "normalized_score": 28.170106538118223 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11178247734138973, - "normalized_score": 11.178247734138973 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37269791666666663, - "normalized_score": 3.85390625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37159242021276595, - "normalized_score": 30.17693557919622 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2365060445940472 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table_bfloat16_e8786291c206d5cd1b01d29466e3b397278f4e2b_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", - "sha": "e8786291c206d5cd1b01d29466e3b397278f4e2b", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.978481283429158, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7332710541363582, - "normalized_score": 73.32710541363582 - }, - "bbh": { - "name": "BBH", - "value": 0.5080359954971677, - "normalized_score": 29.308127928492553 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10347432024169184, - "normalized_score": 10.347432024169184 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.38060416666666663, - "normalized_score": 5.008854166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3748337765957447, - "normalized_score": 30.537086288416077 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2813256734746974 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table_bfloat16_0d9cb29aa87b0c17ed011ffbc83803f3f6dd18e7_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", - "sha": "0d9cb29aa87b0c17ed011ffbc83803f3f6dd18e7", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.457640244691195, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6784664689690023, - "normalized_score": 67.84664689690022 - }, - "bbh": { - "name": "BBH", - "value": 0.49412091896520455, - "normalized_score": 27.469588233937547 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3646666666666667, - "normalized_score": 2.7500000000000018 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37175864361702127, - "normalized_score": 30.1954048463357 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3591089095296276 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table_bfloat16_7a326a956e6169b287a04ef93cdc0342a0f3311a_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", - "sha": "7a326a956e6169b287a04ef93cdc0342a0f3311a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.089793367665113, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7131876753680235, - "normalized_score": 71.31876753680235 - }, - "bbh": { - "name": "BBH", - "value": 0.5025359954971677, - "normalized_score": 28.604424224788854 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09894259818731117, - "normalized_score": 9.894259818731117 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.3713333333333333, - "normalized_score": 3.683333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.36826795212765956, - "normalized_score": 29.807550236406616 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2963682534332266 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001_bfloat16_e5c8baadbf6ce17b344596ad42bd3546f66e253e_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", - "sha": "e5c8baadbf6ce17b344596ad42bd3546f66e253e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.246034978048385, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6495653754260917, - "normalized_score": 64.95653754260917 - }, - "bbh": { - "name": "BBH", - "value": 0.4979459532536201, - "normalized_score": 28.09919885125768 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37796874999999996, - "normalized_score": 4.8460937500000005 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.37200797872340424, - "normalized_score": 30.223108747044915 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.1644703259402496 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002_bfloat16_064e237b850151938caf171a4c8c7e34c93e580e_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", - "sha": "064e237b850151938caf171a4c8c7e34c93e580e", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 24.470596545324664, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7196073086078272, - "normalized_score": 71.96073086078272 - }, - "bbh": { - "name": "BBH", - "value": 0.5045147424411157, - "normalized_score": 28.785910542437534 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08761329305135952, - "normalized_score": 8.761329305135952 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2600671140939597, - "normalized_score": 1.342281879194629 - }, - "musr": { - "name": "MUSR", - "value": 0.3831458333333333, - "normalized_score": 5.593229166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3734208776595745, - "normalized_score": 30.38009751773049 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2120430270932958 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001_bfloat16_b685b90063258e05f8b4930fdbce2e565f13f620_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", - "sha": "b685b90063258e05f8b4930fdbce2e565f13f620", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.724716099715508, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6504397221594258, - "normalized_score": 65.04397221594259 - }, - "bbh": { - "name": "BBH", - "value": 0.49578758563187125, - "normalized_score": 27.82525272195498 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09365558912386707, - "normalized_score": 9.365558912386707 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.36603125, - "normalized_score": 2.853906249999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3702626329787234, - "normalized_score": 30.029181442080382 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2981844362850772 - } - }, - { - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002_bfloat16_5ab3f2cfc96bdda3b5a629ab4a81adf7394ba90a_True", - "model": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", - "sha": "5ab3f2cfc96bdda3b5a629ab4a81adf7394ba90a", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 23.749108318905765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7015973173402128, - "normalized_score": 70.15973173402128 - }, - "bbh": { - "name": "BBH", - "value": 0.4991547169583548, - "normalized_score": 28.12061516996449 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08685800604229607, - "normalized_score": 8.685800604229607 - }, - "gpqa": { - "name": "GPQA", - "value": 0.25922818791946306, - "normalized_score": 1.230425055928408 - }, - "musr": { - "name": "MUSR", - "value": 0.37790624999999994, - "normalized_score": 4.6382812499999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.366938164893617, - "normalized_score": 29.659796099290777 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-29", - "submission_date": "2024-09-29", - "generation": 0, - "base_model": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.2153799016409517 - } - }, - { - "id": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002_bfloat16_7a046b74179225d6055dd8aa601b5234f817b1e5_True", - "model": { - "name": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", - "sha": "7a046b74179225d6055dd8aa601b5234f817b1e5", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 22.73807535003498, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6489658550423987, - "normalized_score": 64.89658550423985 - }, - "bbh": { - "name": "BBH", - "value": 0.49145217071254876, - "normalized_score": 27.28106392287093 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0755287009063444, - "normalized_score": 7.552870090634441 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.38987499999999997, - "normalized_score": 7.134375000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3519780585106383, - "normalized_score": 27.997562056737586 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.3440313590094741 - } - }, - { - "id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8_bfloat16_2c00dbc74e55d42fbc8b08f474fb9568f820edb9_False", - "model": { - "name": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", - "sha": "2c00dbc74e55d42fbc8b08f474fb9568f820edb9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 9.679667500944214, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15052726764983576, - "normalized_score": 15.052726764983579 - }, - "bbh": { - "name": "BBH", - "value": 0.3975573100103517, - "normalized_score": 15.175392374828268 - }, - "math": { - "name": "MATH Level 5", - "value": 0.004531722054380665, - "normalized_score": 0.4531722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28942953020134227, - "normalized_score": 5.257270693512303 - }, - "musr": { - "name": "MUSR", - "value": 0.3874583333333333, - "normalized_score": 6.765625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23836436170212766, - "normalized_score": 15.373817966903072 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-13", - "submission_date": "2024-11-13", - "generation": 0, - "base_model": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 1.357, - "co2_cost": 1.0968554918483069 - } - }, - { - "id": "ymcki/Llama-3.1-8B-GRPO-Instruct_bfloat16_ae73ec53fb75499a33a506b354b55b29d02392b9_True", - "model": { - "name": "ymcki/Llama-3.1-8B-GRPO-Instruct", - "sha": "ae73ec53fb75499a33a506b354b55b29d02392b9", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.168375624484387, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.744536718130117, - "normalized_score": 74.45367181301171 - }, - "bbh": { - "name": "BBH", - "value": 0.5131586337530801, - "normalized_score": 30.353176522305507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.29446308724832215, - "normalized_score": 5.92841163310962 - }, - "musr": { - "name": "MUSR", - "value": 0.38165625000000003, - "normalized_score": 7.607031250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.3738364361702128, - "normalized_score": 30.426270685579198 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-20", - "submission_date": "2025-02-20", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.6567906669597681 - } - }, - { - "id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct_bfloat16_9d8bfc910b2be95b38a3738a938c7abf575892ac_True", - "model": { - "name": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", - "sha": "9d8bfc910b2be95b38a3738a938c7abf575892ac", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 7.6591553865170505, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.33540007180946557, - "normalized_score": 33.54000718094656 - }, - "bbh": { - "name": "BBH", - "value": 0.3126261967336083, - "normalized_score": 4.467782695872583 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04003021148036254, - "normalized_score": 4.003021148036254 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2533557046979866, - "normalized_score": 0.44742729306487633 - }, - "musr": { - "name": "MUSR", - "value": 0.35260416666666666, - "normalized_score": 2.408854166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10979055851063829, - "normalized_score": 1.087839834515365 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-03-12", - "submission_date": "2025-03-12", - "generation": 2, - "base_model": "meta-llama/Meta-Llama-3.1-8B", - "hub_license": "llama3.1", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 0.7521236769130275 - } - }, - { - "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18_bfloat16_aed2a9061ffa21beaec0d617a9605e160136aab4_True", - "model": { - "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", - "sha": "aed2a9061ffa21beaec0d617a9605e160136aab4", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 15.288362728327092, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4630945890237902, - "normalized_score": 46.30945890237902 - }, - "bbh": { - "name": "BBH", - "value": 0.4052902505118913, - "normalized_score": 16.30199203988385 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04305135951661632, - "normalized_score": 4.305135951661631 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28859060402684567, - "normalized_score": 5.145413870246088 - }, - "musr": { - "name": "MUSR", - "value": 0.3754270833333333, - "normalized_score": 4.728385416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.23445811170212766, - "normalized_score": 14.939790189125294 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-16", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 7.227142161386585 - } - }, - { - "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge_bfloat16_b72be0a7879f0d82cb2024cfc1d02c370ce3efe8_True", - "model": { - "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", - "sha": "b72be0a7879f0d82cb2024cfc1d02c370ce3efe8", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.505537805625195, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5218209905273563, - "normalized_score": 52.18209905273564 - }, - "bbh": { - "name": "BBH", - "value": 0.414688942270627, - "normalized_score": 17.34833699622109 - }, - "math": { - "name": "MATH Level 5", - "value": 0.054380664652567974, - "normalized_score": 5.438066465256798 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2835570469798658, - "normalized_score": 4.4742729306487705 - }, - "musr": { - "name": "MUSR", - "value": 0.35139583333333335, - "normalized_score": 3.357812500000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.24609375, - "normalized_score": 16.232638888888886 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-30", - "submission_date": "2024-11-16", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.7865524740093672 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17_bfloat16_e6f82b93dae0b8207aa3252ab4157182e2610787_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17", - "sha": "e6f82b93dae0b8207aa3252ab4157182e2610787", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 15.644976187098441, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5081572449988254, - "normalized_score": 50.815724499882535 - }, - "bbh": { - "name": "BBH", - "value": 0.40762664531580056, - "normalized_score": 16.23474918432881 - }, - "math": { - "name": "MATH Level 5", - "value": 0.03851963746223565, - "normalized_score": 3.8519637462235647 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27181208053691275, - "normalized_score": 2.9082774049216997 - }, - "musr": { - "name": "MUSR", - "value": 0.37006249999999996, - "normalized_score": 3.891145833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2455119680851064, - "normalized_score": 16.16799645390071 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-18", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 2.614, - "co2_cost": 1.9889121882743397 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24_bfloat16_38f56fcb99bd64278a1d90dd23aea527036329a0_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", - "sha": "38f56fcb99bd64278a1d90dd23aea527036329a0", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.44776069045957, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.505484337114412, - "normalized_score": 50.54843371144119 - }, - "bbh": { - "name": "BBH", - "value": 0.38123590457353557, - "normalized_score": 13.114728218255616 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0256797583081571, - "normalized_score": 2.56797583081571 - }, - "gpqa": { - "name": "GPQA", - "value": 0.28104026845637586, - "normalized_score": 4.138702460850116 - }, - "musr": { - "name": "MUSR", - "value": 0.35015625, - "normalized_score": 2.0695312500000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2282247340425532, - "normalized_score": 14.247192671394798 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 1.4097172171633234 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO_bfloat16_531b2e2043285cb40cd0433f5ad43441f8ac6b6c_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", - "sha": "531b2e2043285cb40cd0433f5ad43441f8ac6b6c", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 14.844141845651569, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47478468242042227, - "normalized_score": 47.478468242042226 - }, - "bbh": { - "name": "BBH", - "value": 0.38979797271028965, - "normalized_score": 14.389413087436518 - }, - "math": { - "name": "MATH Level 5", - "value": 0.061933534743202415, - "normalized_score": 6.193353474320242 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27432885906040266, - "normalized_score": 3.243847874720355 - }, - "musr": { - "name": "MUSR", - "value": 0.37676041666666665, - "normalized_score": 4.5283854166666675 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21908244680851063, - "normalized_score": 13.231382978723403 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-18", - "submission_date": "2024-10-27", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 2.614, - "co2_cost": 11.290834446830782 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca_bfloat16_5503b5e892be463fa4b1d265b8ba9ba4304af012_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", - "sha": "5503b5e892be463fa4b1d265b8ba9ba4304af012", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 12.53043213387293, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.30647349033896726, - "normalized_score": 30.647349033896724 - }, - "bbh": { - "name": "BBH", - "value": 0.40715971926711275, - "normalized_score": 16.922412033306475 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0324773413897281, - "normalized_score": 3.2477341389728096 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26929530201342283, - "normalized_score": 2.572706935123044 - }, - "musr": { - "name": "MUSR", - "value": 0.39691666666666664, - "normalized_score": 7.9145833333333355 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2249002659574468, - "normalized_score": 13.877807328605199 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "", - "submission_date": "2024-10-27", - "generation": 0, - "base_model": "Removed", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 2.3693327538000957 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18_bfloat16_c50b85f9b60b444f85fe230b8d77fcbc7b18ef91_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18", - "sha": "c50b85f9b60b444f85fe230b8d77fcbc7b18ef91", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.245944384743186, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5175246124726836, - "normalized_score": 51.75246124726836 - }, - "bbh": { - "name": "BBH", - "value": 0.4132188791645781, - "normalized_score": 17.143414938177205 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0445619335347432, - "normalized_score": 4.45619335347432 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27348993288590606, - "normalized_score": 3.1319910514541416 - }, - "musr": { - "name": "MUSR", - "value": 0.37415624999999997, - "normalized_score": 4.269531250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25049867021276595, - "normalized_score": 16.722074468085104 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-10-18", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 2.614, - "co2_cost": 1.906288848838165 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO_bfloat16_b9f41f53827b8a5a600546b41f63023bf84617a3_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", - "sha": "b9f41f53827b8a5a600546b41f63023bf84617a3", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 15.132293957042885, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.47423502972113984, - "normalized_score": 47.42350297211399 - }, - "bbh": { - "name": "BBH", - "value": 0.40389353402379324, - "normalized_score": 16.538078577820993 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04682779456193353, - "normalized_score": 4.682779456193353 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26174496644295303, - "normalized_score": 1.5659955257270708 - }, - "musr": { - "name": "MUSR", - "value": 0.3953333333333333, - "normalized_score": 7.416666666666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.21850066489361702, - "normalized_score": 13.166740543735225 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 3.220754256066701 - } - }, - { - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24_bfloat16_06c129ba5261ee88e32035c88f90ca11d835175d_True", - "model": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-24", - "sha": "06c129ba5261ee88e32035c88f90ca11d835175d", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 16.334186539569448, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.49786566310722213, - "normalized_score": 49.786566310722215 - }, - "bbh": { - "name": "BBH", - "value": 0.41096027770392857, - "normalized_score": 16.772590130572933 - }, - "math": { - "name": "MATH Level 5", - "value": 0.04380664652567976, - "normalized_score": 4.380664652567976 - }, - "gpqa": { - "name": "GPQA", - "value": 0.27768456375838924, - "normalized_score": 3.6912751677852316 - }, - "musr": { - "name": "MUSR", - "value": 0.39148958333333334, - "normalized_score": 7.002864583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2473404255319149, - "normalized_score": 16.37115839243499 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-25", - "generation": 3, - "base_model": "google/gemma-2-2b", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 2.614, - "co2_cost": 1.6208845800460434 - } - }, - { - "id": "yuchenxie/ArlowGPT-3B-Multilingual_float16_336f9084b4718be34ec7348e8082670539aebb4c_True", - "model": { - "name": "yuchenxie/ArlowGPT-3B-Multilingual", - "sha": "336f9084b4718be34ec7348e8082670539aebb4c", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 20.501174920235083, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.6395486198841297, - "normalized_score": 63.95486198841297 - }, - "bbh": { - "name": "BBH", - "value": 0.4301403132173714, - "normalized_score": 19.503170070297383 - }, - "math": { - "name": "MATH Level 5", - "value": 0.11253776435045318, - "normalized_score": 11.253776435045317 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2802013422818792, - "normalized_score": 4.026845637583895 - }, - "musr": { - "name": "MUSR", - "value": 0.37266666666666665, - "normalized_score": 4.083333333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2816655585106383, - "normalized_score": 20.18506205673759 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-03", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "yuchenxie/ArlowGPT-3B-Multilingual (Merge)", - "hub_license": "mit", - "hub_hearts": 1, - "params_billions": 3.213, - "co2_cost": 1.2236435031470276 - } - }, - { - "id": "yuchenxie/ArlowGPT-8B_float16_f7d0149059f1324a7725676b6ab67df59cd4c599_True", - "model": { - "name": "yuchenxie/ArlowGPT-8B", - "sha": "f7d0149059f1324a7725676b6ab67df59cd4c599", - "precision": "float16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 28.973572026793594, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7846536079823756, - "normalized_score": 78.46536079823757 - }, - "bbh": { - "name": "BBH", - "value": 0.5080162816130412, - "normalized_score": 29.842908708676827 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2039274924471299, - "normalized_score": 20.39274924471299 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2936241610738255, - "normalized_score": 5.8165548098433995 - }, - "musr": { - "name": "MUSR", - "value": 0.3882291666666667, - "normalized_score": 8.36197916666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.378656914893617, - "normalized_score": 30.961879432624112 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-05", - "submission_date": "2025-01-12", - "generation": 1, - "base_model": "yuchenxie/ArlowGPT-8B (Merge)", - "hub_license": "mit", - "hub_hearts": 3, - "params_billions": 8.03, - "co2_cost": 1.4335595564415415 - } - }, - { - "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO_bfloat16_0da9f780f7dd94ed1e10c8d3e082472ff2922177_True", - "model": { - "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", - "sha": "0da9f780f7dd94ed1e10c8d3e082472ff2922177", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 18.088167586242665, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4690897928607206, - "normalized_score": 46.90897928607207 - }, - "bbh": { - "name": "BBH", - "value": 0.4399870586095269, - "normalized_score": 21.238562899271304 - }, - "math": { - "name": "MATH Level 5", - "value": 0.05664652567975831, - "normalized_score": 5.664652567975831 - }, - "gpqa": { - "name": "GPQA", - "value": 0.30201342281879195, - "normalized_score": 6.935123042505594 - }, - "musr": { - "name": "MUSR", - "value": 0.40121875, - "normalized_score": 9.619010416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.2634640957446808, - "normalized_score": 18.162677304964536 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-24", - "submission_date": "2024-09-30", - "generation": 0, - "base_model": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.9440590206951107 - } - }, - { - "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties_bfloat16_998d15b32900bc230727c8a7984e005f611723e9_False", - "model": { - "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", - "sha": "998d15b32900bc230727c8a7984e005f611723e9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 19.17256512798472, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4012708502329375, - "normalized_score": 40.127085023293745 - }, - "bbh": { - "name": "BBH", - "value": 0.4615794426716074, - "normalized_score": 23.492187889680057 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2751677852348993, - "normalized_score": 3.355704697986576 - }, - "musr": { - "name": "MUSR", - "value": 0.42109375, - "normalized_score": 11.003385416666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.35738031914893614, - "normalized_score": 28.59781323877068 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.828288143464655 - } - }, - { - "id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp_bfloat16_28789950975ecf5aac846c3f2c0a5d6841651ee6_False", - "model": { - "name": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", - "sha": "28789950975ecf5aac846c3f2c0a5d6841651ee6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "LlamaForCausalLM", - "average_score": 17.72531595014861, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.2884878788281759, - "normalized_score": 28.84878788281759 - }, - "bbh": { - "name": "BBH", - "value": 0.4977912063897858, - "normalized_score": 28.54692976096071 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3011744966442953, - "normalized_score": 6.823266219239373 - }, - "musr": { - "name": "MUSR", - "value": 0.39982291666666664, - "normalized_score": 11.011197916666662 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.32571476063829785, - "normalized_score": 25.07941784869976 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-09-22", - "submission_date": "2024-09-23", - "generation": 1, - "base_model": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp (Merge)", - "hub_license": "apache-2.0", - "hub_hearts": 0, - "params_billions": 8.03, - "co2_cost": 1.6533204310572152 - } - }, - { - "id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo_bfloat16_bbc011dae0416c1664a0287f3a7a0f9563deac91_False", - "model": { - "name": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", - "sha": "bbc011dae0416c1664a0287f3a7a0f9563deac91", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.62411186744714, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.5382075116247114, - "normalized_score": 53.82075116247113 - }, - "bbh": { - "name": "BBH", - "value": 0.4257464897414603, - "normalized_score": 19.061804188812648 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08383685800604229, - "normalized_score": 8.38368580060423 - }, - "gpqa": { - "name": "GPQA", - "value": 0.26677852348993286, - "normalized_score": 2.2371364653243813 - }, - "musr": { - "name": "MUSR", - "value": 0.45756250000000004, - "normalized_score": 16.761979166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.25731382978723405, - "normalized_score": 17.47931442080378 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-08-18", - "submission_date": "2024-10-17", - "generation": 1, - "base_model": "zake7749/gemma-2-2b-it-chinese-kyara-dpo (Merge)", - "hub_license": "gemma", - "hub_hearts": 11, - "params_billions": 2.614, - "co2_cost": 2.558618143456469 - } - }, - { - "id": "zake7749/gemma-2-9b-it-chinese-kyara_bfloat16_6f440abe1e2fde914e6607e2b6c5b04cc69c51f4_True", - "model": { - "name": "zake7749/gemma-2-9b-it-chinese-kyara", - "sha": "6f440abe1e2fde914e6607e2b6c5b04cc69c51f4", - "precision": "bfloat16", - "type": "chatmodels", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.38318153545784, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17642965110351644, - "normalized_score": 17.642965110351646 - }, - "bbh": { - "name": "BBH", - "value": 0.5953692987878404, - "normalized_score": 41.100635505919165 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10498489425981873, - "normalized_score": 10.498489425981873 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.4241979166666667, - "normalized_score": 11.991406250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41788563829787234, - "normalized_score": 35.32062647754137 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-17", - "submission_date": "2025-02-24", - "generation": 1, - "base_model": "zake7749/gemma-2-9b-it-chinese-kyara (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 9.242, - "co2_cost": 2.0677900362682515 - } - }, - { - "id": "zelk12/Gemma-2-TM-9B_bfloat16_42366d605e6bdad354a5632547e37d34d300ff7a_True", - "model": { - "name": "zelk12/Gemma-2-TM-9B", - "sha": "42366d605e6bdad354a5632547e37d34d300ff7a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.52554437747302, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8044621604010691, - "normalized_score": 80.44621604010692 - }, - "bbh": { - "name": "BBH", - "value": 0.5986592993557701, - "normalized_score": 42.04949103777593 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20241691842900303, - "normalized_score": 20.241691842900302 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.41523958333333333, - "normalized_score": 11.238281249999995 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.40882646276595747, - "normalized_score": 34.314051418439725 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-06", - "submission_date": "2024-11-06", - "generation": 1, - "base_model": "zelk12/Gemma-2-TM-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.9357853459010563 - } - }, - { - "id": "zelk12/MT-Gen1-gemma-2-9B_bfloat16_b78f8883614cbbdf182ebb4acf8a8c124bc782ae_True", - "model": { - "name": "zelk12/MT-Gen1-gemma-2-9B", - "sha": "b78f8883614cbbdf182ebb4acf8a8c124bc782ae", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.51416587215041, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7886252920029965, - "normalized_score": 78.86252920029965 - }, - "bbh": { - "name": "BBH", - "value": 0.6099997385328262, - "normalized_score": 44.01124668886745 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22205438066465258, - "normalized_score": 22.20543806646526 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.4216875, - "normalized_score": 11.577604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4380817819148936, - "normalized_score": 37.56464243498817 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-23", - "generation": 1, - "base_model": "zelk12/MT-Gen1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 6.725492826278124 - } - }, - { - "id": "zelk12/MT-Gen2-GI-gemma-2-9B_bfloat16_e970fbcbf974f4626dcc6db7d2b02d4f24c72744_True", - "model": { - "name": "zelk12/MT-Gen2-GI-gemma-2-9B", - "sha": "e970fbcbf974f4626dcc6db7d2b02d4f24c72744", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.7634808902102, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7913979352562313, - "normalized_score": 79.13979352562313 - }, - "bbh": { - "name": "BBH", - "value": 0.6095558882654465, - "normalized_score": 44.00259101447648 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.42832291666666666, - "normalized_score": 12.673697916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43558843085106386, - "normalized_score": 37.28760342789598 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-10", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "zelk12/MT-Gen2-GI-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.737011711006484 - } - }, - { - "id": "zelk12/MT-Gen2-gemma-2-9B_bfloat16_c723f8b9b7334fddd1eb8b6e5230b76fb18139a5_True", - "model": { - "name": "zelk12/MT-Gen2-gemma-2-9B", - "sha": "c723f8b9b7334fddd1eb8b6e5230b76fb18139a5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.81518978196506, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7907485471881275, - "normalized_score": 79.07485471881274 - }, - "bbh": { - "name": "BBH", - "value": 0.6100494662695, - "normalized_score": 44.10778153097642 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2190332326283988, - "normalized_score": 21.90332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.4322916666666667, - "normalized_score": 13.303125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-10", - "submission_date": "2024-11-10", - "generation": 1, - "base_model": "zelk12/MT-Gen2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.9788952495086507 - } - }, - { - "id": "zelk12/MT-Gen3-gemma-2-9B_bfloat16_84627594655776ce67f1e01233113b658333fa71_True", - "model": { - "name": "zelk12/MT-Gen3-gemma-2-9B", - "sha": "84627594655776ce67f1e01233113b658333fa71", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.862851201023894, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8020142111818863, - "normalized_score": 80.20142111818863 - }, - "bbh": { - "name": "BBH", - "value": 0.6097112889343964, - "normalized_score": 43.95064827244946 - }, - "math": { - "name": "MATH Level 5", - "value": 0.229607250755287, - "normalized_score": 22.9607250755287 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.4216875, - "normalized_score": 11.577604166666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43558843085106386, - "normalized_score": 37.28760342789598 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-28", - "submission_date": "2024-11-30", - "generation": 1, - "base_model": "zelk12/MT-Gen3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.6264962573562407 - } - }, - { - "id": "zelk12/MT-Gen4-gemma-2-9B_bfloat16_d44beca936d18a5b4b65799487504c1097ae1cb2_True", - "model": { - "name": "zelk12/MT-Gen4-gemma-2-9B", - "sha": "d44beca936d18a5b4b65799487504c1097ae1cb2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.69186259861687, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7883005979689446, - "normalized_score": 78.83005979689446 - }, - "bbh": { - "name": "BBH", - "value": 0.6109884725351095, - "normalized_score": 43.96040416470536 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22356495468277945, - "normalized_score": 22.356495468277945 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.4228020833333333, - "normalized_score": 11.383593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-13", - "submission_date": "2024-12-13", - "generation": 1, - "base_model": "zelk12/MT-Gen4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.5633668079338543 - } - }, - { - "id": "zelk12/MT-Gen5-gemma-2-9B_bfloat16_aef27049b2a3c52138016e9602280150f70eae32_True", - "model": { - "name": "zelk12/MT-Gen5-gemma-2-9B", - "sha": "aef27049b2a3c52138016e9602280150f70eae32", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.56384281127872, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7923221496739761, - "normalized_score": 79.23221496739761 - }, - "bbh": { - "name": "BBH", - "value": 0.6132787046647334, - "normalized_score": 44.39824361895261 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.42016666666666663, - "normalized_score": 10.887500000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4402426861702128, - "normalized_score": 37.80474290780142 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-22", - "submission_date": "2024-12-22", - "generation": 1, - "base_model": "zelk12/MT-Gen5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.5748551883605884 - } - }, - { - "id": "zelk12/MT-Gen6-gemma-2-9B_bfloat16_bd348fb1c1524e0d7d625200a292e46387b04da2_True", - "model": { - "name": "zelk12/MT-Gen6-gemma-2-9B", - "sha": "bd348fb1c1524e0d7d625200a292e46387b04da2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.816468872767587, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1615668648075994, - "normalized_score": 16.15668648075994 - }, - "bbh": { - "name": "BBH", - "value": 0.5844669261858688, - "normalized_score": 39.39691509660613 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0823262839879154, - "normalized_score": 8.23262839879154 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33305369127516776, - "normalized_score": 11.073825503355701 - }, - "musr": { - "name": "MUSR", - "value": 0.40692708333333333, - "normalized_score": 8.86588541666667 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4165558510638298, - "normalized_score": 35.172872340425535 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-23", - "submission_date": "2025-01-23", - "generation": 1, - "base_model": "zelk12/MT-Gen6-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 4.016062111190977 - } - }, - { - "id": "zelk12/MT-Gen6fix-gemma-2-9B_bfloat16_f733983a7f923b19fb6d1cbc9f1cdffe788984ef_True", - "model": { - "name": "zelk12/MT-Gen6fix-gemma-2-9B", - "sha": "f733983a7f923b19fb6d1cbc9f1cdffe788984ef", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.06441030320394, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.15759518078697854, - "normalized_score": 15.759518078697855 - }, - "bbh": { - "name": "BBH", - "value": 0.5917309697578781, - "normalized_score": 40.78635026219728 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08157099697885196, - "normalized_score": 8.157099697885197 - }, - "gpqa": { - "name": "GPQA", - "value": 0.337248322147651, - "normalized_score": 11.633109619686799 - }, - "musr": { - "name": "MUSR", - "value": 0.40841666666666665, - "normalized_score": 9.385416666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4119847074468085, - "normalized_score": 34.66496749408983 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-02", - "submission_date": "2025-02-02", - "generation": 1, - "base_model": "zelk12/MT-Gen6fix-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.8012175211404218 - } - }, - { - "id": "zelk12/MT-Gen7-gemma-2-9B_bfloat16_b9316aea6888346724d9631e1987327e103529eb_True", - "model": { - "name": "zelk12/MT-Gen7-gemma-2-9B", - "sha": "b9316aea6888346724d9631e1987327e103529eb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.391800709714477, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16641289556155447, - "normalized_score": 16.641289556155446 - }, - "bbh": { - "name": "BBH", - "value": 0.5935242633580781, - "normalized_score": 40.939071043896035 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0891238670694864, - "normalized_score": 8.91238670694864 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33557046979865773, - "normalized_score": 11.409395973154364 - }, - "musr": { - "name": "MUSR", - "value": 0.40978125, - "normalized_score": 9.755989583333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4122340425531915, - "normalized_score": 34.692671394799056 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-15", - "submission_date": "2025-02-15", - "generation": 1, - "base_model": "zelk12/MT-Gen7-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 2.058312500916082 - } - }, - { - "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B_bfloat16_2f279c5c648c22e77327d0c0098f90b69312afd3_True", - "model": { - "name": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", - "sha": "2f279c5c648c22e77327d0c0098f90b69312afd3", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.703708309941995, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7907485471881275, - "normalized_score": 79.07485471881274 - }, - "bbh": { - "name": "BBH", - "value": 0.6142243374633075, - "normalized_score": 44.5016839041576 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.4228020833333333, - "normalized_score": 11.250260416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4395777925531915, - "normalized_score": 37.730865839243506 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-02", - "submission_date": "2025-01-02", - "generation": 1, - "base_model": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.69223875048688 - } - }, - { - "id": "zelk12/MT-Merge-gemma-2-9B_bfloat16_f4c3b001bc8692bcbbd7005b6f8db048e651aa46_True", - "model": { - "name": "zelk12/MT-Merge-gemma-2-9B", - "sha": "f4c3b001bc8692bcbbd7005b6f8db048e651aa46", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.87860604484159, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8035379459833243, - "normalized_score": 80.35379459833243 - }, - "bbh": { - "name": "BBH", - "value": 0.6118379158679297, - "normalized_score": 44.32084182103274 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34815436241610737, - "normalized_score": 13.087248322147648 - }, - "musr": { - "name": "MUSR", - "value": 0.425625, - "normalized_score": 12.103125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43617021276595747, - "normalized_score": 37.35224586288417 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-22", - "submission_date": "2024-10-22", - "generation": 1, - "base_model": "zelk12/MT-Merge-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 6.438111674016961 - } - }, - { - "id": "zelk12/MT-Merge1-gemma-2-9B_bfloat16_71bb4577c877715f3f6646a224b184544639c856_True", - "model": { - "name": "zelk12/MT-Merge1-gemma-2-9B", - "sha": "71bb4577c877715f3f6646a224b184544639c856", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.85532711993079, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7901490268044344, - "normalized_score": 79.01490268044344 - }, - "bbh": { - "name": "BBH", - "value": 0.6099997385328262, - "normalized_score": 44.05824559954283 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22885196374622357, - "normalized_score": 22.885196374622357 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.4243854166666667, - "normalized_score": 12.148177083333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43741688829787234, - "normalized_score": 37.49076536643025 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-07", - "submission_date": "2024-11-07", - "generation": 1, - "base_model": "zelk12/MT-Merge1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.05468073156111 - } - }, - { - "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B_bfloat16_6d73ec2204800f7978c376567d3c6361c0a072cd_True", - "model": { - "name": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", - "sha": "6d73ec2204800f7978c376567d3c6361c0a072cd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.891868837941765, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7955945779420825, - "normalized_score": 79.55945779420827 - }, - "bbh": { - "name": "BBH", - "value": 0.60838922159878, - "normalized_score": 43.840199946938355 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.43222916666666666, - "normalized_score": 13.228645833333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.437250664893617, - "normalized_score": 37.47229609929077 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-28", - "generation": 1, - "base_model": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.6897704411880747 - } - }, - { - "id": "zelk12/MT-Merge2-gemma-2-9B_bfloat16_a695e722e6fab77852f9fe59bbc4d69fe23c4208_True", - "model": { - "name": "zelk12/MT-Merge2-gemma-2-9B", - "sha": "a695e722e6fab77852f9fe59bbc4d69fe23c4208", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.82072732965434, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7877010775852515, - "normalized_score": 78.77010775852516 - }, - "bbh": { - "name": "BBH", - "value": 0.6106681877306871, - "normalized_score": 44.157196877023516 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2348942598187311, - "normalized_score": 23.48942598187311 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.4216875, - "normalized_score": 11.510937500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43816489361702127, - "normalized_score": 37.57387706855792 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-25", - "submission_date": "2024-11-25", - "generation": 1, - "base_model": "zelk12/MT-Merge2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.701582383026738 - } - }, - { - "id": "zelk12/MT-Merge3-gemma-2-9B_bfloat16_3f02f5e76d3aade3340307eb34b15bc9dd5a2023_True", - "model": { - "name": "zelk12/MT-Merge3-gemma-2-9B", - "sha": "3f02f5e76d3aade3340307eb34b15bc9dd5a2023", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.63973964338121, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7858526487497617, - "normalized_score": 78.58526487497618 - }, - "bbh": { - "name": "BBH", - "value": 0.6102112889343964, - "normalized_score": 44.06607310905077 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.42575, - "normalized_score": 12.452083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4373337765957447, - "normalized_score": 37.48153073286053 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-11", - "generation": 1, - "base_model": "zelk12/MT-Merge3-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.7620410574299785 - } - }, - { - "id": "zelk12/MT-Merge4-gemma-2-9B_bfloat16_5f076ad8a3f3c403840a1cd572a6018bea34e889_True", - "model": { - "name": "zelk12/MT-Merge4-gemma-2-9B", - "sha": "5f076ad8a3f3c403840a1cd572a6018bea34e889", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.59930867791183, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7807317916461656, - "normalized_score": 78.07317916461656 - }, - "bbh": { - "name": "BBH", - "value": 0.6118218058684427, - "normalized_score": 44.05349239999947 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21676737160120846, - "normalized_score": 21.676737160120847 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.42943749999999997, - "normalized_score": 12.479687500000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43899601063829785, - "normalized_score": 37.66622340425532 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-21", - "submission_date": "2024-12-21", - "generation": 1, - "base_model": "zelk12/MT-Merge4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.74677027237328 - } - }, - { - "id": "zelk12/MT-Merge5-gemma-2-9B_bfloat16_d8adfc6c5395baaeb3f5e0b50c585ed3f662c4d9_True", - "model": { - "name": "zelk12/MT-Merge5-gemma-2-9B", - "sha": "d8adfc6c5395baaeb3f5e0b50c585ed3f662c4d9", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.6922400282852, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7843787816327346, - "normalized_score": 78.43787816327344 - }, - "bbh": { - "name": "BBH", - "value": 0.6122674386670167, - "normalized_score": 44.240598262546506 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.42813541666666666, - "normalized_score": 12.250260416666668 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-30", - "submission_date": "2024-12-30", - "generation": 1, - "base_model": "zelk12/MT-Merge5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.601579747299585 - } - }, - { - "id": "zelk12/MT-Merge6-gemma-2-9B_bfloat16_ce24f52c594decba760d14f77cc4d978a2b8f0dd_True", - "model": { - "name": "zelk12/MT-Merge6-gemma-2-9B", - "sha": "ce24f52c594decba760d14f77cc4d978a2b8f0dd", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.20346619991928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16946036516443036, - "normalized_score": 16.946036516443037 - }, - "bbh": { - "name": "BBH", - "value": 0.5949106849534558, - "normalized_score": 41.321961057304115 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08006042296072508, - "normalized_score": 8.006042296072508 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3288590604026846, - "normalized_score": 10.514541387024611 - }, - "musr": { - "name": "MUSR", - "value": 0.40978125, - "normalized_score": 9.822656250000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41148603723404253, - "normalized_score": 34.6095596926714 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-13", - "submission_date": "2025-02-13", - "generation": 1, - "base_model": "zelk12/MT-Merge6-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 2.0751509617498924 - } - }, - { - "id": "zelk12/MT-gemma-2-9B_bfloat16_24e1f894517b86dd866c1a5999ced4a5924dcd90_True", - "model": { - "name": "zelk12/MT-gemma-2-9B", - "sha": "24e1f894517b86dd866c1a5999ced4a5924dcd90", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.61322699458912, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7968434863938794, - "normalized_score": 79.68434863938793 - }, - "bbh": { - "name": "BBH", - "value": 0.6063604478633632, - "normalized_score": 43.32424255563143 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.40711458333333334, - "normalized_score": 9.555989583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42237367021276595, - "normalized_score": 35.819296690307326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-10-11", - "generation": 1, - "base_model": "zelk12/MT-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 6.046797818737478 - } - }, - { - "id": "zelk12/MT1-Gen1-gemma-2-9B_bfloat16_939ac6c12059a18fc1117cdb3861f46816eff2fb_True", - "model": { - "name": "zelk12/MT1-Gen1-gemma-2-9B", - "sha": "939ac6c12059a18fc1117cdb3861f46816eff2fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.93165507194035, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7974430067775724, - "normalized_score": 79.74430067775725 - }, - "bbh": { - "name": "BBH", - "value": 0.6117787046647335, - "normalized_score": 44.27328174531427 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.43095833333333333, - "normalized_score": 13.103125000000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43758311170212766, - "normalized_score": 37.50923463356973 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-23", - "submission_date": "2024-10-24", - "generation": 1, - "base_model": "zelk12/MT1-Gen1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 6.724969438917603 - } - }, - { - "id": "zelk12/MT1-Gen2-gemma-2-9B_bfloat16_aeaca7dc7d50a425a5d3c38d7c4a7daf1c772ad4_True", - "model": { - "name": "zelk12/MT1-Gen2-gemma-2-9B", - "sha": "aeaca7dc7d50a425a5d3c38d7c4a7daf1c772ad4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 35.005439602301884, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7983672211953173, - "normalized_score": 79.83672211953174 - }, - "bbh": { - "name": "BBH", - "value": 0.6095989894691557, - "normalized_score": 43.91919055805058 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22507552870090636, - "normalized_score": 22.507552870090635 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.42835416666666665, - "normalized_score": 12.844270833333335 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43550531914893614, - "normalized_score": 37.278368794326234 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-11", - "submission_date": "2024-11-11", - "generation": 1, - "base_model": "zelk12/MT1-Gen2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.991990303629385 - } - }, - { - "id": "zelk12/MT1-Gen3-gemma-2-9B_bfloat16_5cc4ee1c70f08a5b1a195d43f044d9bf6fca29f5_True", - "model": { - "name": "zelk12/MT1-Gen3-gemma-2-9B", - "sha": "5cc4ee1c70f08a5b1a195d43f044d9bf6fca29f5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.739851169246734, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.795969139660545, - "normalized_score": 79.5969139660545 - }, - "bbh": { - "name": "BBH", - "value": 0.6101551392017761, - "normalized_score": 43.99030612548967 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.42432291666666666, - "normalized_score": 12.007031250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43492353723404253, - "normalized_score": 37.21372635933806 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-01", - "submission_date": "2024-12-01", - "generation": 1, - "base_model": "zelk12/MT1-Gen3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.889753245978531 - } - }, - { - "id": "zelk12/MT1-Gen4-gemma-2-9B_bfloat16_5eaf1ef67f32805c6fbc0b51418a8caf866661a2_True", - "model": { - "name": "zelk12/MT1-Gen4-gemma-2-9B", - "sha": "5eaf1ef67f32805c6fbc0b51418a8caf866661a2", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.28920938772912, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7941207108250552, - "normalized_score": 79.41207108250552 - }, - "bbh": { - "name": "BBH", - "value": 0.6057567677609054, - "normalized_score": 43.145368161068774 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21601208459214502, - "normalized_score": 21.6012084592145 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.42311458333333335, - "normalized_score": 12.089322916666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42860704787234044, - "normalized_score": 36.51189420803782 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-14", - "submission_date": "2024-12-14", - "generation": 1, - "base_model": "zelk12/MT1-Gen4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.484124550367983 - } - }, - { - "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B_bfloat16_53a780fd3a2d42709a0f517cac019234d7d71267_True", - "model": { - "name": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", - "sha": "53a780fd3a2d42709a0f517cac019234d7d71267", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.781043838460555, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7929216700576691, - "normalized_score": 79.29216700576691 - }, - "bbh": { - "name": "BBH", - "value": 0.6000001533684681, - "normalized_score": 42.20102785338743 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20317220543806647, - "normalized_score": 20.31722054380665 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.4244791666666667, - "normalized_score": 12.593229166666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42179188829787234, - "normalized_score": 35.754654255319146 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.4142540478852608 - } - }, - { - "id": "zelk12/MT1-Gen5-gemma-2-9B_bfloat16_4eb54f9a0a9f482537b0e79000ffe7fb9d024c38_True", - "model": { - "name": "zelk12/MT1-Gen5-gemma-2-9B", - "sha": "4eb54f9a0a9f482537b0e79000ffe7fb9d024c38", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.556617199966034, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7794828831943688, - "normalized_score": 77.94828831943687 - }, - "bbh": { - "name": "BBH", - "value": 0.6017455017631886, - "normalized_score": 42.49676419899819 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20770392749244712, - "normalized_score": 20.770392749244714 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.41914583333333333, - "normalized_score": 11.459895833333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42220744680851063, - "normalized_score": 35.80082742316785 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-24", - "submission_date": "2024-12-24", - "generation": 1, - "base_model": "zelk12/MT1-Gen5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.5104606731262233 - } - }, - { - "id": "zelk12/MT1-Gen6-gemma-2-9B_bfloat16_7834a5b83bf7a9a75a0f7d75603cc166627f1e26_True", - "model": { - "name": "zelk12/MT1-Gen6-gemma-2-9B", - "sha": "7834a5b83bf7a9a75a0f7d75603cc166627f1e26", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 19.919694219754245, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16336542595867853, - "normalized_score": 16.33654259586785 - }, - "bbh": { - "name": "BBH", - "value": 0.5943545352208355, - "normalized_score": 41.26198928071469 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08081570996978851, - "normalized_score": 8.08157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.40444791666666663, - "normalized_score": 8.62265625 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4133144946808511, - "normalized_score": 34.812721631205676 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-04", - "submission_date": "2025-02-04", - "generation": 1, - "base_model": "zelk12/MT1-Gen6-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.846159834892288 - } - }, - { - "id": "zelk12/MT1-Gen7-gemma-2-9B_bfloat16_41b009dd08c26b26d2cf4df3bc67d822a9e6f38e_True", - "model": { - "name": "zelk12/MT1-Gen7-gemma-2-9B", - "sha": "41b009dd08c26b26d2cf4df3bc67d822a9e6f38e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.19902054572098, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16336542595867853, - "normalized_score": 16.33654259586785 - }, - "bbh": { - "name": "BBH", - "value": 0.5937953240176393, - "normalized_score": 41.18207626381973 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08308157099697885, - "normalized_score": 8.308157099697885 - }, - "gpqa": { - "name": "GPQA", - "value": 0.32802013422818793, - "normalized_score": 10.402684563758392 - }, - "musr": { - "name": "MUSR", - "value": 0.41111458333333334, - "normalized_score": 10.022656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4144780585106383, - "normalized_score": 34.94200650118203 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-19", - "submission_date": "2025-02-19", - "generation": 1, - "base_model": "zelk12/MT1-Gen7-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 4.130285589661694 - } - }, - { - "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B_bfloat16_e9177c45a9dc1ff2ace378d4809ea92ff6e477c4_True", - "model": { - "name": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", - "sha": "e9177c45a9dc1ff2ace378d4809ea92ff6e477c4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.87300068519695, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7928718023732585, - "normalized_score": 79.28718023732586 - }, - "bbh": { - "name": "BBH", - "value": 0.6122674386670167, - "normalized_score": 44.2263771296489 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.4255, - "normalized_score": 11.887500000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43816489361702127, - "normalized_score": 37.57387706855792 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.7559596618826125 - } - }, - { - "id": "zelk12/MT1-gemma-2-9B_bfloat16_3a5e77518ca9c3c8ea2edac4c03bc220ee91f3ed_True", - "model": { - "name": "zelk12/MT1-gemma-2-9B", - "sha": "3a5e77518ca9c3c8ea2edac4c03bc220ee91f3ed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.86746462523027, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7946703635243377, - "normalized_score": 79.46703635243378 - }, - "bbh": { - "name": "BBH", - "value": 0.6108745950756924, - "normalized_score": 44.16152621661877 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22356495468277945, - "normalized_score": 22.356495468277945 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.43222916666666666, - "normalized_score": 13.161979166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4357546542553192, - "normalized_score": 37.30607269503546 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-12", - "submission_date": "2024-10-14", - "generation": 1, - "base_model": "zelk12/MT1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 6.6914387804832085 - } - }, - { - "id": "zelk12/MT2-Gen1-gemma-2-9B_bfloat16_167abf8eb4ea01fecd42dc32ad68160c51a8685a_True", - "model": { - "name": "zelk12/MT2-Gen1-gemma-2-9B", - "sha": "167abf8eb4ea01fecd42dc32ad68160c51a8685a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.46173377708927, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7855778224001206, - "normalized_score": 78.55778224001206 - }, - "bbh": { - "name": "BBH", - "value": 0.6100802027920743, - "normalized_score": 44.141103157274806 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.42432291666666666, - "normalized_score": 12.007031250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4376662234042553, - "normalized_score": 37.51846926713948 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-27", - "generation": 1, - "base_model": "zelk12/MT2-Gen1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 6.766419553650529 - } - }, - { - "id": "zelk12/MT2-Gen2-gemma-2-9B_bfloat16_24c487499b5833424ffb9932eed838bb254f61b4_True", - "model": { - "name": "zelk12/MT2-Gen2-gemma-2-9B", - "sha": "24c487499b5833424ffb9932eed838bb254f61b4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.64186698980044, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7889001183526376, - "normalized_score": 78.89001183526376 - }, - "bbh": { - "name": "BBH", - "value": 0.6092917531936446, - "normalized_score": 44.04450256220759 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21827794561933533, - "normalized_score": 21.827794561933533 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.42702083333333335, - "normalized_score": 12.577604166666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43882978723404253, - "normalized_score": 37.64775413711584 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-12", - "submission_date": "2024-11-12", - "generation": 1, - "base_model": "zelk12/MT2-Gen2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 3, - "params_billions": 10.159, - "co2_cost": 4.074882610785808 - } - }, - { - "id": "zelk12/MT2-Gen3-gemma-2-9B_bfloat16_bb750c2b76328c6dbc9adf9ae3d09551f3723758_True", - "model": { - "name": "zelk12/MT2-Gen3-gemma-2-9B", - "sha": "bb750c2b76328c6dbc9adf9ae3d09551f3723758", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.26447113111225, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7810066179958066, - "normalized_score": 78.10066179958066 - }, - "bbh": { - "name": "BBH", - "value": 0.6104772065373926, - "normalized_score": 44.007274058843926 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.4230833333333333, - "normalized_score": 12.052083333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43741688829787234, - "normalized_score": 37.49076536643025 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-04", - "submission_date": "2024-12-04", - "generation": 1, - "base_model": "zelk12/MT2-Gen3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.848753509583258 - } - }, - { - "id": "zelk12/MT2-Gen4-gemma-2-9B_bfloat16_7a07de3719c3b8b8e90e79a65798bcc4ef454fc6_True", - "model": { - "name": "zelk12/MT2-Gen4-gemma-2-9B", - "sha": "7a07de3719c3b8b8e90e79a65798bcc4ef454fc6", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.202321593914604, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7895993741051521, - "normalized_score": 78.95993741051521 - }, - "bbh": { - "name": "BBH", - "value": 0.609655139201776, - "normalized_score": 43.778361681045226 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22356495468277945, - "normalized_score": 22.356495468277945 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.41254166666666664, - "normalized_score": 10.467708333333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43209773936170215, - "normalized_score": 36.89974881796691 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-15", - "submission_date": "2024-12-15", - "generation": 1, - "base_model": "zelk12/MT2-Gen4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.572871445220205 - } - }, - { - "id": "zelk12/MT2-Gen5-gemma-2-9B_bfloat16_94711cc263eab1464fa6b01c28ee5171b4467d84_True", - "model": { - "name": "zelk12/MT2-Gen5-gemma-2-9B", - "sha": "94711cc263eab1464fa6b01c28ee5171b4467d84", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.04923355550058, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7749116787900548, - "normalized_score": 77.49116787900549 - }, - "bbh": { - "name": "BBH", - "value": 0.6063933817527739, - "normalized_score": 43.12428137338583 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2107250755287009, - "normalized_score": 21.07250755287009 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.42441666666666666, - "normalized_score": 12.385416666666663 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43018617021276595, - "normalized_score": 36.68735224586289 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-25", - "submission_date": "2024-12-25", - "generation": 1, - "base_model": "zelk12/MT2-Gen5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.5242714236898025 - } - }, - { - "id": "zelk12/MT2-Gen6-gemma-2-9B_bfloat16_a1b1a2009841dd0b5bf00ca65b631bc771146a65_True", - "model": { - "name": "zelk12/MT2-Gen6-gemma-2-9B", - "sha": "a1b1a2009841dd0b5bf00ca65b631bc771146a65", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 20.837841635914646, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.16641289556155447, - "normalized_score": 16.641289556155446 - }, - "bbh": { - "name": "BBH", - "value": 0.595964957637105, - "normalized_score": 41.771094311913785 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08459214501510574, - "normalized_score": 8.459214501510575 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.41371874999999997, - "normalized_score": 10.748177083333331 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.42096077127659576, - "normalized_score": 35.662307919621746 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-05", - "submission_date": "2025-02-05", - "generation": 1, - "base_model": "zelk12/MT2-Gen6-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.8988147594218003 - } - }, - { - "id": "zelk12/MT2-Gen7-gemma-2-9B_bfloat16_ccefd8eab76f7a2a25e2974e9545ba176078fe8f_True", - "model": { - "name": "zelk12/MT2-Gen7-gemma-2-9B", - "sha": "ccefd8eab76f7a2a25e2974e9545ba176078fe8f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.28290945432224, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17615482475387528, - "normalized_score": 17.615482475387527 - }, - "bbh": { - "name": "BBH", - "value": 0.6078922830693557, - "normalized_score": 43.57419892144867 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10196374622356495, - "normalized_score": 10.196374622356496 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.42032291666666666, - "normalized_score": 11.540364583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4311003989361702, - "normalized_score": 36.788933215130015 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-22", - "submission_date": "2025-02-22", - "generation": 1, - "base_model": "zelk12/MT2-Gen7-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 2.0336772099521174 - } - }, - { - "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B_bfloat16_76d8a9cc371af30b5843fb69edc25ff767d6741f_True", - "model": { - "name": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", - "sha": "76d8a9cc371af30b5843fb69edc25ff767d6741f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.675340724715895, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7901490268044344, - "normalized_score": 79.01490268044344 - }, - "bbh": { - "name": "BBH", - "value": 0.6108461203950706, - "normalized_score": 44.04081717896338 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.42283333333333334, - "normalized_score": 11.354166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4390791223404255, - "normalized_score": 37.67545803782505 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-07", - "submission_date": "2025-01-07", - "generation": 1, - "base_model": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.6579498577309617 - } - }, - { - "id": "zelk12/MT2-gemma-2-9B_bfloat16_d20d7169ce0f53d586504c50b4b7dc470bf8a781_True", - "model": { - "name": "zelk12/MT2-gemma-2-9B", - "sha": "d20d7169ce0f53d586504c50b4b7dc470bf8a781", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.516135363494435, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7885754243185858, - "normalized_score": 78.85754243185858 - }, - "bbh": { - "name": "BBH", - "value": 0.611511004530543, - "normalized_score": 44.16748136989228 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.42165625, - "normalized_score": 11.540364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43683510638297873, - "normalized_score": 37.42612293144209 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-14", - "submission_date": "2024-10-15", - "generation": 1, - "base_model": "zelk12/MT2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.3882197422474745 - } - }, - { - "id": "zelk12/MT3-Gen1-gemma-2-9B_bfloat16_cd78df9e67e2e710d8d305f5a03a92c01b1b425d_True", - "model": { - "name": "zelk12/MT3-Gen1-gemma-2-9B", - "sha": "cd78df9e67e2e710d8d305f5a03a92c01b1b425d", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.08858123026874, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7837792612490415, - "normalized_score": 78.37792612490415 - }, - "bbh": { - "name": "BBH", - "value": 0.6106760932030332, - "normalized_score": 44.119494687402835 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3464765100671141, - "normalized_score": 12.863534675615215 - }, - "musr": { - "name": "MUSR", - "value": 0.41511458333333334, - "normalized_score": 10.755989583333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43267952127659576, - "normalized_score": 36.96439125295508 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-24", - "submission_date": "2024-10-28", - "generation": 1, - "base_model": "zelk12/MT3-Gen1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.2273318034282115 - } - }, - { - "id": "zelk12/MT3-Gen2-gemma-2-9B_bfloat16_e4ef057d20751d89934025e9088ba98d89b921b5_True", - "model": { - "name": "zelk12/MT3-Gen2-gemma-2-9B", - "sha": "e4ef057d20751d89934025e9088ba98d89b921b5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.34982932139744, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7843289139483238, - "normalized_score": 78.43289139483238 - }, - "bbh": { - "name": "BBH", - "value": 0.6091473194676166, - "normalized_score": 43.94022574925496 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22356495468277945, - "normalized_score": 22.356495468277945 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3573825503355705, - "normalized_score": 14.317673378076066 - }, - "musr": { - "name": "MUSR", - "value": 0.41111458333333334, - "normalized_score": 10.022656250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43326130319148937, - "normalized_score": 37.02903368794326 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-20", - "submission_date": "2024-11-20", - "generation": 1, - "base_model": "zelk12/MT3-Gen2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.8382167971966807 - } - }, - { - "id": "zelk12/MT3-Gen3-gemma-2-9B_bfloat16_4ad54d6295f6364aa87f7aaa2a7bd112fb92ec00_True", - "model": { - "name": "zelk12/MT3-Gen3-gemma-2-9B", - "sha": "4ad54d6295f6364aa87f7aaa2a7bd112fb92ec00", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.43703358555928, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7856276900845313, - "normalized_score": 78.56276900845313 - }, - "bbh": { - "name": "BBH", - "value": 0.6088892215987798, - "normalized_score": 43.78374025194924 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21525679758308158, - "normalized_score": 21.525679758308158 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.42575, - "normalized_score": 12.518750000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4302692819148936, - "normalized_score": 36.696586879432616 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-07", - "submission_date": "2024-12-07", - "generation": 1, - "base_model": "zelk12/MT3-Gen3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.8089251355109486 - } - }, - { - "id": "zelk12/MT3-Gen4-gemma-2-9B_bfloat16_22066f5a275797ae870d2c58e8c75ac933ee1adf_True", - "model": { - "name": "zelk12/MT3-Gen4-gemma-2-9B", - "sha": "22066f5a275797ae870d2c58e8c75ac933ee1adf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.517532467321914, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7737126380226687, - "normalized_score": 77.37126380226687 - }, - "bbh": { - "name": "BBH", - "value": 0.6100843629460684, - "normalized_score": 43.77959090180507 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20619335347432025, - "normalized_score": 20.619335347432024 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.4476354166666667, - "normalized_score": 14.721093750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4387466755319149, - "normalized_score": 37.63851950354609 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-16", - "submission_date": "2024-12-16", - "generation": 1, - "base_model": "zelk12/MT3-Gen4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 3.6417066861965695 - } - }, - { - "id": "zelk12/MT3-Gen5-gemma-2-9B_bfloat16_b02315782a4719734b159220ca1eef0770d022a5_True", - "model": { - "name": "zelk12/MT3-Gen5-gemma-2-9B", - "sha": "b02315782a4719734b159220ca1eef0770d022a5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.7576818343369, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7990166092634211, - "normalized_score": 79.90166092634212 - }, - "bbh": { - "name": "BBH", - "value": 0.6098615465467813, - "normalized_score": 43.951198645912804 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22658610271903323, - "normalized_score": 22.658610271903324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.41911458333333335, - "normalized_score": 11.422656250000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43168218085106386, - "normalized_score": 36.8535756501182 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-26", - "submission_date": "2024-12-26", - "generation": 1, - "base_model": "zelk12/MT3-Gen5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.8812542327353103 - } - }, - { - "id": "zelk12/MT3-Gen5-gemma-2-9B_v1_bfloat16_40bfcc25ff421ff83d67a9c46474a0b40abf4f4a_True", - "model": { - "name": "zelk12/MT3-Gen5-gemma-2-9B_v1", - "sha": "40bfcc25ff421ff83d67a9c46474a0b40abf4f4a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.734556510838615, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7996161296471141, - "normalized_score": 79.9616129647114 - }, - "bbh": { - "name": "BBH", - "value": 0.6113330718661595, - "normalized_score": 44.15960180869316 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.348993288590604, - "normalized_score": 13.19910514541387 - }, - "musr": { - "name": "MUSR", - "value": 0.4203854166666667, - "normalized_score": 11.481510416666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4359208776595745, - "normalized_score": 37.32454196217494 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-27", - "submission_date": "2024-12-27", - "generation": 1, - "base_model": "zelk12/MT3-Gen5-gemma-2-9B_v1 (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.8563037332700265 - } - }, - { - "id": "zelk12/MT3-Gen6-gemma-2-9B_bfloat16_a8f0594d31040041bcfa0ab1e9521543d9b91040_True", - "model": { - "name": "zelk12/MT3-Gen6-gemma-2-9B", - "sha": "a8f0594d31040041bcfa0ab1e9521543d9b91040", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 21.10285538804693, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17615482475387528, - "normalized_score": 17.615482475387527 - }, - "bbh": { - "name": "BBH", - "value": 0.6020072592121909, - "normalized_score": 42.63936302517612 - }, - "math": { - "name": "MATH Level 5", - "value": 0.08836858006042296, - "normalized_score": 8.836858006042297 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.4125729166666667, - "normalized_score": 10.638281250000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41023936170212766, - "normalized_score": 34.47104018912529 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-02-12", - "submission_date": "2025-02-12", - "generation": 1, - "base_model": "zelk12/MT3-Gen6-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 2.047318577948042 - } - }, - { - "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B_bfloat16_6499758258ac6278e7fdc4ba6719ab28f35709e8_True", - "model": { - "name": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", - "sha": "6499758258ac6278e7fdc4ba6719ab28f35709e8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.46770825046867, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17615482475387528, - "normalized_score": 17.615482475387527 - }, - "bbh": { - "name": "BBH", - "value": 0.6123461203950705, - "normalized_score": 44.206519793342466 - }, - "math": { - "name": "MATH Level 5", - "value": 0.10120845921450151, - "normalized_score": 10.120845921450151 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.42546875, - "normalized_score": 11.78359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4389128989361702, - "normalized_score": 37.65698877068557 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-09", - "submission_date": "2025-01-09", - "generation": 1, - "base_model": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.8925640601125973 - } - }, - { - "id": "zelk12/MT3-gemma-2-9B_bfloat16_d501b6ea59896fac3dc0a623501a5493b3573cde_True", - "model": { - "name": "zelk12/MT3-gemma-2-9B", - "sha": "d501b6ea59896fac3dc0a623501a5493b3573cde", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.21556543403697, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7786085364610345, - "normalized_score": 77.86085364610345 - }, - "bbh": { - "name": "BBH", - "value": 0.61307842026088, - "normalized_score": 44.24846451595969 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21676737160120846, - "normalized_score": 21.676737160120847 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3447986577181208, - "normalized_score": 12.639821029082773 - }, - "musr": { - "name": "MUSR", - "value": 0.4242916666666667, - "normalized_score": 11.903125000000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43267952127659576, - "normalized_score": 36.96439125295508 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-15", - "submission_date": "2024-10-16", - "generation": 1, - "base_model": "zelk12/MT3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.273305775537166 - } - }, - { - "id": "zelk12/MT4-Gen1-gemma-2-9B_bfloat16_6ed2c66246c7f354decfd3579acb534dc4b0b48c_True", - "model": { - "name": "zelk12/MT4-Gen1-gemma-2-9B", - "sha": "6ed2c66246c7f354decfd3579acb534dc4b0b48c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.703100960707964, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7894996387363307, - "normalized_score": 78.94996387363307 - }, - "bbh": { - "name": "BBH", - "value": 0.6093827996028333, - "normalized_score": 44.0095244503664 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21978851963746224, - "normalized_score": 21.978851963746223 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.43222916666666666, - "normalized_score": 13.0953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4389128989361702, - "normalized_score": 37.65698877068557 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-29", - "generation": 1, - "base_model": "zelk12/MT4-Gen1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 4.2071216016304085 - } - }, - { - "id": "zelk12/MT4-Gen2-gemma-2-9B_bfloat16_4d61a5799b11641a24e8b0f3eda0e987ff392089_True", - "model": { - "name": "zelk12/MT4-Gen2-gemma-2-9B", - "sha": "4d61a5799b11641a24e8b0f3eda0e987ff392089", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 35.05354383653148, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8050616807847621, - "normalized_score": 80.50616807847622 - }, - "bbh": { - "name": "BBH", - "value": 0.6108348543973539, - "normalized_score": 44.17665766133724 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2326283987915408, - "normalized_score": 23.26283987915408 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.42565625, - "normalized_score": 12.207031250000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4367519946808511, - "normalized_score": 37.41688829787233 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-22", - "submission_date": "2024-11-22", - "generation": 1, - "base_model": "zelk12/MT4-Gen2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 4, - "params_billions": 10.159, - "co2_cost": 3.954094529653287 - } - }, - { - "id": "zelk12/MT4-Gen3-gemma-2-9B_bfloat16_f93026d28ca1707e8c21620be8558eed6be43b1c_True", - "model": { - "name": "zelk12/MT4-Gen3-gemma-2-9B", - "sha": "f93026d28ca1707e8c21620be8558eed6be43b1c", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.3726822248935, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7840540875986826, - "normalized_score": 78.40540875986827 - }, - "bbh": { - "name": "BBH", - "value": 0.6087112889343964, - "normalized_score": 43.894390102514826 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2190332326283988, - "normalized_score": 21.90332326283988 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.42432291666666666, - "normalized_score": 11.940364583333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4380817819148936, - "normalized_score": 37.56464243498817 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-08", - "generation": 1, - "base_model": "zelk12/MT4-Gen3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.9174025312379754 - } - }, - { - "id": "zelk12/MT4-Gen4-gemma-2-9B_bfloat16_51f3deb0aba90d82fc3f21894b3171fa5afbffa5_True", - "model": { - "name": "zelk12/MT4-Gen4-gemma-2-9B", - "sha": "51f3deb0aba90d82fc3f21894b3171fa5afbffa5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.381140380385126, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7874262512356104, - "normalized_score": 78.74262512356104 - }, - "bbh": { - "name": "BBH", - "value": 0.6076031496231499, - "normalized_score": 43.475810110859705 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.42435416666666664, - "normalized_score": 12.044270833333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4323470744680851, - "normalized_score": 36.927452718676115 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-19", - "submission_date": "2024-12-19", - "generation": 1, - "base_model": "zelk12/MT4-Gen4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.587482458465672 - } - }, - { - "id": "zelk12/MT4-Gen5-gemma-2-9B_bfloat16_59681ccdc7e6f1991cc5663464806665bc3bf4c8_True", - "model": { - "name": "zelk12/MT4-Gen5-gemma-2-9B", - "sha": "59681ccdc7e6f1991cc5663464806665bc3bf4c8", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.72051149657049, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7788833628106757, - "normalized_score": 77.88833628106758 - }, - "bbh": { - "name": "BBH", - "value": 0.6106664051994928, - "normalized_score": 43.94789156904174 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22658610271903323, - "normalized_score": 22.658610271903324 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3565436241610738, - "normalized_score": 14.205816554809845 - }, - "musr": { - "name": "MUSR", - "value": 0.42683333333333334, - "normalized_score": 12.020833333333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43841422872340424, - "normalized_score": 37.60158096926713 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-28", - "submission_date": "2024-12-28", - "generation": 1, - "base_model": "zelk12/MT4-Gen5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.6947234173841594 - } - }, - { - "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B_bfloat16_25e64938f38ed3db0113007a2814b069fd2952b0_True", - "model": { - "name": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", - "sha": "25e64938f38ed3db0113007a2814b069fd2952b0", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.332038216896635, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1770790391716202, - "normalized_score": 17.70790391716202 - }, - "bbh": { - "name": "BBH", - "value": 0.6120127870617372, - "normalized_score": 44.17398166698081 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09516616314199396, - "normalized_score": 9.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.4228020833333333, - "normalized_score": 11.383593750000001 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4390791223404255, - "normalized_score": 37.67545803782505 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-11", - "submission_date": "2025-01-11", - "generation": 1, - "base_model": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.916799717428739 - } - }, - { - "id": "zelk12/MT4-gemma-2-9B_bfloat16_2167ea02baf9145a697a7d828a17c75b86e5e282_True", - "model": { - "name": "zelk12/MT4-gemma-2-9B", - "sha": "2167ea02baf9145a697a7d828a17c75b86e5e282", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.02640198293455, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7761605872418517, - "normalized_score": 77.61605872418517 - }, - "bbh": { - "name": "BBH", - "value": 0.607313601341302, - "normalized_score": 43.55382749958519 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2084592145015106, - "normalized_score": 20.84592145015106 - }, - "gpqa": { - "name": "GPQA", - "value": 0.33808724832214765, - "normalized_score": 11.74496644295302 - }, - "musr": { - "name": "MUSR", - "value": 0.43092708333333335, - "normalized_score": 12.999218750000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43658577127659576, - "normalized_score": 37.39841903073286 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-16", - "submission_date": "2024-10-20", - "generation": 1, - "base_model": "zelk12/MT4-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 6.310517143448776 - } - }, - { - "id": "zelk12/MT5-Gen1-gemma-2-9B_bfloat16_0291b776e80f38381788cd8f1fb2c3435ad891b5_True", - "model": { - "name": "zelk12/MT5-Gen1-gemma-2-9B", - "sha": "0291b776e80f38381788cd8f1fb2c3435ad891b5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.44043157912841, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7831298731809377, - "normalized_score": 78.31298731809378 - }, - "bbh": { - "name": "BBH", - "value": 0.6110476837383056, - "normalized_score": 44.18333461079421 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2212990936555891, - "normalized_score": 22.129909365558913 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34731543624161076, - "normalized_score": 12.975391498881436 - }, - "musr": { - "name": "MUSR", - "value": 0.4203854166666667, - "normalized_score": 11.614843750000004 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43683510638297873, - "normalized_score": 37.42612293144209 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-25", - "submission_date": "2024-10-31", - "generation": 1, - "base_model": "zelk12/MT5-Gen1-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 4.03450573615006 - } - }, - { - "id": "zelk12/MT5-Gen2-gemma-2-9B_bfloat16_3ee2822fcba6708bd9032b79249a2789e5996b6a_True", - "model": { - "name": "zelk12/MT5-Gen2-gemma-2-9B", - "sha": "3ee2822fcba6708bd9032b79249a2789e5996b6a", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.55154984197392, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7962439660101863, - "normalized_score": 79.62439660101863 - }, - "bbh": { - "name": "BBH", - "value": 0.610541261742359, - "normalized_score": 44.11321466977781 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.41629166666666667, - "normalized_score": 10.436458333333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4379155585106383, - "normalized_score": 37.54617316784869 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-11-23", - "submission_date": "2024-11-23", - "generation": 1, - "base_model": "zelk12/MT5-Gen2-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.7167614366335604 - } - }, - { - "id": "zelk12/MT5-Gen3-gemma-2-9B_bfloat16_4b3811c689fec5c9cc483bb1ed696734e5e88fcf_True", - "model": { - "name": "zelk12/MT5-Gen3-gemma-2-9B", - "sha": "4b3811c689fec5c9cc483bb1ed696734e5e88fcf", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.48864530182997, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7825303527972447, - "normalized_score": 78.25303527972446 - }, - "bbh": { - "name": "BBH", - "value": 0.6090494662695, - "normalized_score": 43.88591333925529 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21676737160120846, - "normalized_score": 21.676737160120847 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.42305208333333333, - "normalized_score": 12.081510416666669 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4375, - "normalized_score": 37.5 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-08", - "submission_date": "2024-12-08", - "generation": 1, - "base_model": "zelk12/MT5-Gen3-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.874666248807024 - } - }, - { - "id": "zelk12/MT5-Gen4-gemma-2-9B_bfloat16_2f826d76460a5b7f150622a57f2d5419adfc464f_True", - "model": { - "name": "zelk12/MT5-Gen4-gemma-2-9B", - "sha": "2f826d76460a5b7f150622a57f2d5419adfc464f", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.658891352635635, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7834545672149895, - "normalized_score": 78.34545672149895 - }, - "bbh": { - "name": "BBH", - "value": 0.6131056160021203, - "normalized_score": 44.32321082390572 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2243202416918429, - "normalized_score": 22.432024169184288 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.42283333333333334, - "normalized_score": 11.354166666666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4396609042553192, - "normalized_score": 37.74010047281324 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-20", - "submission_date": "2024-12-20", - "generation": 1, - "base_model": "zelk12/MT5-Gen4-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.6434405353962145 - } - }, - { - "id": "zelk12/MT5-Gen5-gemma-2-9B_bfloat16_d1f68652d7dda810da8207a371d26376c6a6e847_True", - "model": { - "name": "zelk12/MT5-Gen5-gemma-2-9B", - "sha": "d1f68652d7dda810da8207a371d26376c6a6e847", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.634253143757086, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7947202312087482, - "normalized_score": 79.47202312087482 - }, - "bbh": { - "name": "BBH", - "value": 0.6111664051994928, - "normalized_score": 44.1150811115254 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2258308157099698, - "normalized_score": 22.58308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34815436241610737, - "normalized_score": 13.087248322147648 - }, - "musr": { - "name": "MUSR", - "value": 0.41911458333333335, - "normalized_score": 11.555989583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43292885638297873, - "normalized_score": 36.99209515366431 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-29", - "submission_date": "2024-12-29", - "generation": 1, - "base_model": "zelk12/MT5-Gen5-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.7832908239042484 - } - }, - { - "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B_bfloat16_a90f9ca13af28c72695fabc56da4ddd8e3a8e173_True", - "model": { - "name": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", - "sha": "a90f9ca13af28c72695fabc56da4ddd8e3a8e173", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.353756658265933, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17615482475387528, - "normalized_score": 17.615482475387527 - }, - "bbh": { - "name": "BBH", - "value": 0.6126794537284038, - "normalized_score": 44.27440650358212 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09818731117824774, - "normalized_score": 9.818731117824774 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.4227708333333333, - "normalized_score": 11.213020833333333 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43899601063829785, - "normalized_score": 37.66622340425532 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-14", - "submission_date": "2025-01-14", - "generation": 1, - "base_model": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 4.124418721495757 - } - }, - { - "id": "zelk12/MT5-gemma-2-9B_bfloat16_b627ae7d796b1ae85b59c55e0e043b8d3ae73d83_True", - "model": { - "name": "zelk12/MT5-gemma-2-9B", - "sha": "b627ae7d796b1ae85b59c55e0e043b8d3ae73d83", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.77304917830787, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8047868544351211, - "normalized_score": 80.4786854435121 - }, - "bbh": { - "name": "BBH", - "value": 0.6112225549321132, - "normalized_score": 44.27125659181852 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2258308157099698, - "normalized_score": 22.58308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.4203854166666667, - "normalized_score": 11.481510416666671 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4366688829787234, - "normalized_score": 37.4076536643026 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-19", - "submission_date": "2024-10-21", - "generation": 1, - "base_model": "zelk12/MT5-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 6.539660404219592 - } - }, - { - "id": "zelk12/MTM-Merge-gemma-2-9B_bfloat16_843f23c68cf50f5bdc0206f93e72ce0f9feeca6e_True", - "model": { - "name": "zelk12/MTM-Merge-gemma-2-9B", - "sha": "843f23c68cf50f5bdc0206f93e72ce0f9feeca6e", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.61498534418492, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7798075772284205, - "normalized_score": 77.98075772284206 - }, - "bbh": { - "name": "BBH", - "value": 0.6133348543973538, - "normalized_score": 44.380677269180374 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2175226586102719, - "normalized_score": 21.75226586102719 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3548657718120805, - "normalized_score": 13.982102908277403 - }, - "musr": { - "name": "MUSR", - "value": 0.4267708333333333, - "normalized_score": 11.946354166666666 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43882978723404253, - "normalized_score": 37.64775413711584 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "zelk12/MTM-Merge-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 3.586691793579361 - } - }, - { - "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B_bfloat16_ce68b2468bcba0c5dcde79bbf5346db81f069b12_True", - "model": { - "name": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", - "sha": "ce68b2468bcba0c5dcde79bbf5346db81f069b12", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 22.385497094651754, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.17860277397305815, - "normalized_score": 17.860277397305815 - }, - "bbh": { - "name": "BBH", - "value": 0.6116794537284039, - "normalized_score": 44.16046314846229 - }, - "math": { - "name": "MATH Level 5", - "value": 0.09592145015105741, - "normalized_score": 9.592145015105741 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3523489932885906, - "normalized_score": 13.646532438478745 - }, - "musr": { - "name": "MUSR", - "value": 0.42410416666666667, - "normalized_score": 11.479687499999999 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43816489361702127, - "normalized_score": 37.57387706855792 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-16", - "submission_date": "2025-01-16", - "generation": 1, - "base_model": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.7956793552339922 - } - }, - { - "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B_bfloat16_23e7337dabbf023177c25ded4923286a2e3936fc_True", - "model": { - "name": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", - "sha": "23e7337dabbf023177c25ded4923286a2e3936fc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.11401781690754, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7496575752337131, - "normalized_score": 74.96575752337131 - }, - "bbh": { - "name": "BBH", - "value": 0.6069712638522043, - "normalized_score": 43.66476423073275 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2258308157099698, - "normalized_score": 22.58308157099698 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.43092708333333335, - "normalized_score": 12.932552083333334 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44007646276595747, - "normalized_score": 37.786273640661946 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.8372893374940626 - } - }, - { - "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B_bfloat16_28fbcc2fa23f46aaaed327984784251527c78815_True", - "model": { - "name": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", - "sha": "28fbcc2fa23f46aaaed327984784251527c78815", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.87751505283704, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7646200968984517, - "normalized_score": 76.46200968984516 - }, - "bbh": { - "name": "BBH", - "value": 0.6097862253440982, - "normalized_score": 43.91481856092577 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20694864048338368, - "normalized_score": 20.694864048338367 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3422818791946309, - "normalized_score": 12.304250559284117 - }, - "musr": { - "name": "MUSR", - "value": 0.4282916666666667, - "normalized_score": 12.703125000000002 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43467420212765956, - "normalized_score": 37.186022458628834 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.825165974191258 - } - }, - { - "id": "zelk12/Rv0.4MT4g2-gemma-2-9B_bfloat16_ef595241d2c62203c27d84e6643d384a7cf99bd4_True", - "model": { - "name": "zelk12/Rv0.4MT4g2-gemma-2-9B", - "sha": "ef595241d2c62203c27d84e6643d384a7cf99bd4", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.255961795847774, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7320221456845614, - "normalized_score": 73.20221456845613 - }, - "bbh": { - "name": "BBH", - "value": 0.604119644415618, - "normalized_score": 43.19904579899195 - }, - "math": { - "name": "MATH Level 5", - "value": 0.19486404833836857, - "normalized_score": 19.486404833836858 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35318791946308725, - "normalized_score": 13.758389261744966 - }, - "musr": { - "name": "MUSR", - "value": 0.4230833333333333, - "normalized_score": 11.918750000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.44173869680851063, - "normalized_score": 37.97096631205674 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-04", - "submission_date": "2025-01-04", - "generation": 1, - "base_model": "zelk12/Rv0.4MT4g2-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 3.706506822326515 - } - }, - { - "id": "zelk12/T31122024203920-gemma-2-9B_bfloat16_25cb58c73a3adf43cee33b50238b1d332b5ccc13_True", - "model": { - "name": "zelk12/T31122024203920-gemma-2-9B", - "sha": "25cb58c73a3adf43cee33b50238b1d332b5ccc13", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.20907135232025, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7676176988169169, - "normalized_score": 76.76176988169169 - }, - "bbh": { - "name": "BBH", - "value": 0.6095634089448112, - "normalized_score": 43.72899711113373 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2054380664652568, - "normalized_score": 20.54380664652568 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35067114093959734, - "normalized_score": 13.422818791946312 - }, - "musr": { - "name": "MUSR", - "value": 0.4321979166666667, - "normalized_score": 13.324739583333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.437250664893617, - "normalized_score": 37.47229609929077 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-31", - "submission_date": "2024-12-31", - "generation": 1, - "base_model": "zelk12/T31122024203920-gemma-2-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.7327384775668433 - } - }, - { - "id": "zelk12/Test01012025155054_bfloat16_c607186b0b079975e3305e0223e0a55f0cbc19e5_True", - "model": { - "name": "zelk12/Test01012025155054", - "sha": "c607186b0b079975e3305e0223e0a55f0cbc19e5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 3.591417057729718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1555229014570229, - "normalized_score": 15.552290145702292 - }, - "bbh": { - "name": "BBH", - "value": 0.28295044895258115, - "normalized_score": 1.2805465682883084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36702083333333335, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10904255319148937, - "normalized_score": 1.0047281323877066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "zelk12/Test01012025155054 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.817, - "co2_cost": 1.4009482075272097 - } - }, - { - "id": "zelk12/Test01012025155054t0.5_gemma-2_bfloat16_14fcae0d420d303df84bd9b9c8744a6f0fa147fb_True", - "model": { - "name": "zelk12/Test01012025155054t0.5_gemma-2", - "sha": "14fcae0d420d303df84bd9b9c8744a6f0fa147fb", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 3.591417057729718, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.1555229014570229, - "normalized_score": 15.552290145702292 - }, - "bbh": { - "name": "BBH", - "value": 0.28295044895258115, - "normalized_score": 1.2805465682883084 - }, - "math": { - "name": "MATH Level 5", - "value": 0.0, - "normalized_score": 0.0 - }, - "gpqa": { - "name": "GPQA", - "value": 0.24161073825503357, - "normalized_score": 0.0 - }, - "musr": { - "name": "MUSR", - "value": 0.36702083333333335, - "normalized_score": 3.7109374999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.10904255319148937, - "normalized_score": 1.0047281323877066 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2025-01-01", - "submission_date": "2025-01-01", - "generation": 1, - "base_model": "zelk12/Test01012025155054t0.5_gemma-2 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 3.817, - "co2_cost": 1.3959284921606305 - } - }, - { - "id": "zelk12/gemma-2-S2MTM-9B_bfloat16_fd6860743943114eeca6fc2e800e27c87873bcc5_True", - "model": { - "name": "zelk12/gemma-2-S2MTM-9B", - "sha": "fd6860743943114eeca6fc2e800e27c87873bcc5", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.89283041556518, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7822555264476034, - "normalized_score": 78.22555264476034 - }, - "bbh": { - "name": "BBH", - "value": 0.6060836790982922, - "normalized_score": 43.11572752288462 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20468277945619334, - "normalized_score": 20.468277945619334 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34563758389261745, - "normalized_score": 12.751677852348994 - }, - "musr": { - "name": "MUSR", - "value": 0.42184375, - "normalized_score": 12.163802083333328 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4296875, - "normalized_score": 36.63194444444445 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": true, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-12-11", - "submission_date": "2024-12-11", - "generation": 1, - "base_model": "zelk12/gemma-2-S2MTM-9B (Merge)", - "hub_license": "gemma", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 3.5302052373960766 - } - }, - { - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1_bfloat16_b4208ddf6c741884c16c77b9433d9ead8f216354_True", - "model": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", - "sha": "b4208ddf6c741884c16c77b9433d9ead8f216354", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.91991864588516, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7648949232480928, - "normalized_score": 76.48949232480928 - }, - "bbh": { - "name": "BBH", - "value": 0.6074511952177571, - "normalized_score": 43.706516090138706 - }, - "math": { - "name": "MATH Level 5", - "value": 0.2280966767371601, - "normalized_score": 22.80966767371601 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3498322147651007, - "normalized_score": 13.31096196868009 - }, - "musr": { - "name": "MUSR", - "value": 0.41362499999999996, - "normalized_score": 10.303125000000003 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43209773936170215, - "normalized_score": 36.89974881796691 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-03", - "submission_date": "2024-10-03", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 2, - "params_billions": 10.159, - "co2_cost": 6.8863826958517835 - } - }, - { - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25_bfloat16_e652c9e07265526851dad994f4640aa265b9ab56_True", - "model": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", - "sha": "e652c9e07265526851dad994f4640aa265b9ab56", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.28211941982605, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7706651684197928, - "normalized_score": 77.06651684197928 - }, - "bbh": { - "name": "BBH", - "value": 0.6075432245295168, - "normalized_score": 43.85035014659934 - }, - "math": { - "name": "MATH Level 5", - "value": 0.21450151057401812, - "normalized_score": 21.45015105740181 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34312080536912754, - "normalized_score": 12.416107382550338 - }, - "musr": { - "name": "MUSR", - "value": 0.43226041666666665, - "normalized_score": 13.132552083333337 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4399933510638298, - "normalized_score": 37.77703900709219 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-04", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.389981437107325 - } - }, - { - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75_bfloat16_eb0e589291630ba20328db650f74af949d217a97_True", - "model": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", - "sha": "eb0e589291630ba20328db650f74af949d217a97", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 31.782789348821325, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7208063493752133, - "normalized_score": 72.08063493752132 - }, - "bbh": { - "name": "BBH", - "value": 0.5995203934792884, - "normalized_score": 42.487153128065906 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20166163141993956, - "normalized_score": 20.166163141993955 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3498322147651007, - "normalized_score": 13.31096196868009 - }, - "musr": { - "name": "MUSR", - "value": 0.3951145833333333, - "normalized_score": 7.755989583333336 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4140625, - "normalized_score": 34.895833333333336 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-04", - "submission_date": "2024-10-04", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 7.50290559085358 - } - }, - { - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2_bfloat16_76f56b25bf6d8704282f8c77bfda28ca384883bc_True", - "model": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", - "sha": "76f56b25bf6d8704282f8c77bfda28ca384883bc", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.62606373114868, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.759999024809727, - "normalized_score": 75.99990248097271 - }, - "bbh": { - "name": "BBH", - "value": 0.6066260664115647, - "normalized_score": 43.63358839796041 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22280966767371602, - "normalized_score": 22.280966767371602 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34815436241610737, - "normalized_score": 13.087248322147648 - }, - "musr": { - "name": "MUSR", - "value": 0.4109583333333333, - "normalized_score": 9.836458333333338 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.43226396276595747, - "normalized_score": 36.91821808510639 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-11", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.8273508762924 - } - }, - { - "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1_bfloat16_1e3e623e9f0b386bfd967c629dd39c87daef5bed_True", - "model": { - "name": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", - "sha": "1e3e623e9f0b386bfd967c629dd39c87daef5bed", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 33.904825121052134, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7615227596111651, - "normalized_score": 76.15227596111652 - }, - "bbh": { - "name": "BBH", - "value": 0.6098779556010631, - "normalized_score": 43.94125829423596 - }, - "math": { - "name": "MATH Level 5", - "value": 0.20996978851963746, - "normalized_score": 20.996978851963746 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3414429530201342, - "normalized_score": 12.192393736017896 - }, - "musr": { - "name": "MUSR", - "value": 0.43102083333333335, - "normalized_score": 13.310937499999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4315159574468085, - "normalized_score": 36.83510638297872 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 9.698969799762018 - } - }, - { - "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1_bfloat16_8af6620b39c9a36239879b6b2bd88f66e9e9d930_True", - "model": { - "name": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", - "sha": "8af6620b39c9a36239879b6b2bd88f66e9e9d930", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 34.40699088269145, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7943955371746965, - "normalized_score": 79.43955371746965 - }, - "bbh": { - "name": "BBH", - "value": 0.6064399292200404, - "normalized_score": 43.39057008013784 - }, - "math": { - "name": "MATH Level 5", - "value": 0.22054380664652568, - "normalized_score": 22.054380664652566 - }, - "gpqa": { - "name": "GPQA", - "value": 0.35151006711409394, - "normalized_score": 13.534675615212524 - }, - "musr": { - "name": "MUSR", - "value": 0.42022916666666665, - "normalized_score": 11.0953125 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.4323470744680851, - "normalized_score": 36.927452718676115 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 0, - "params_billions": 10.159, - "co2_cost": 9.808855629370424 - } - }, - { - "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1_bfloat16_ced039b03be6f65ac0f713efcee76c6534e65639_True", - "model": { - "name": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", - "sha": "ced039b03be6f65ac0f713efcee76c6534e65639", - "precision": "bfloat16", - "type": "basemergesandmoerges", - "weight_type": "Original", - "architecture": "Gemma2ForCausalLM", - "average_score": 32.586530614782504, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.744536718130117, - "normalized_score": 74.45367181301171 - }, - "bbh": { - "name": "BBH", - "value": 0.597759349920723, - "normalized_score": 42.1326829485998 - }, - "math": { - "name": "MATH Level 5", - "value": 0.18882175226586104, - "normalized_score": 18.882175226586103 - }, - "gpqa": { - "name": "GPQA", - "value": 0.34395973154362414, - "normalized_score": 12.527964205816552 - }, - "musr": { - "name": "MUSR", - "value": 0.42946875, - "normalized_score": 12.18359375 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.41805186170212766, - "normalized_score": 35.33909574468085 - } - }, - "features": { - "is_not_available_on_hub": false, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-07", - "submission_date": "2024-10-07", - "generation": 1, - "base_model": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1 (Merge)", - "hub_license": "", - "hub_hearts": 1, - "params_billions": 10.159, - "co2_cost": 6.264440877177395 - } - }, - { - "id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2_bfloat16_5894fbf0a900e682dfc0ed794db337093bd8d26b_True", - "model": { - "name": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", - "sha": "5894fbf0a900e682dfc0ed794db337093bd8d26b", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.88867299768722, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.8334131216283904, - "normalized_score": 83.34131216283905 - }, - "bbh": { - "name": "BBH", - "value": 0.6934020817780425, - "normalized_score": 56.53381848053764 - }, - "math": { - "name": "MATH Level 5", - "value": 0.595166163141994, - "normalized_score": 59.516616314199396 - }, - "gpqa": { - "name": "GPQA", - "value": 0.3674496644295302, - "normalized_score": 15.659955257270694 - }, - "musr": { - "name": "MUSR", - "value": 0.43542708333333335, - "normalized_score": 14.928385416666664 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5621675531914894, - "normalized_score": 51.35195035460993 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-11", - "submission_date": "2024-12-07", - "generation": 2, - "base_model": "Qwen/Qwen2.5-32B", - "hub_license": "apache-2.0", - "hub_hearts": 14, - "params_billions": 32.764, - "co2_cost": 13.489578110184901 - } - }, - { - "id": "zetasepic/Qwen2.5-72B-Instruct-abliterated_bfloat16_af94b3c05c9857dbac73afb1cbce00e4833ec9ef_False", - "model": { - "name": "zetasepic/Qwen2.5-72B-Instruct-abliterated", - "sha": "af94b3c05c9857dbac73afb1cbce00e4833ec9ef", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "Qwen2ForCausalLM", - "average_score": 46.33795303791254, - "has_chat_template": false - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.7152610628687439, - "normalized_score": 71.52610628687438 - }, - "bbh": { - "name": "BBH", - "value": 0.7152257183282452, - "normalized_score": 59.912975835046495 - }, - "math": { - "name": "MATH Level 5", - "value": 0.5241691842900302, - "normalized_score": 52.416918429003026 - }, - "gpqa": { - "name": "GPQA", - "value": 0.40687919463087246, - "normalized_score": 20.917225950782996 - }, - "musr": { - "name": "MUSR", - "value": 0.4719166666666667, - "normalized_score": 19.122916666666665 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.5871841755319149, - "normalized_score": 54.13157505910166 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": false, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-10-01", - "submission_date": "2024-11-08", - "generation": 2, - "base_model": "Qwen/Qwen2.5-72B", - "hub_license": "other", - "hub_hearts": 28, - "params_billions": 72.706, - "co2_cost": 37.618363240455366 - } - }, - { - "id": "zhengr/MixTAO-7Bx2-MoE-v8.1_bfloat16_828e963abf2db0f5af9ed0d4034e538fc1cf5f40_True", - "model": { - "name": "zhengr/MixTAO-7Bx2-MoE-v8.1", - "sha": "828e963abf2db0f5af9ed0d4034e538fc1cf5f40", - "precision": "bfloat16", - "type": "fine-tunedondomain-specificdatasets", - "weight_type": "Original", - "architecture": "MixtralForCausalLM", - "average_score": 17.067606418207944, - "has_chat_template": true - }, - "evaluations": { - "ifeval": { - "name": "IFEval", - "value": 0.4187810564856802, - "normalized_score": 41.878105648568024 - }, - "bbh": { - "name": "BBH", - "value": 0.42019437560239653, - "normalized_score": 19.17690717348315 - }, - "math": { - "name": "MATH Level 5", - "value": 0.06042296072507553, - "normalized_score": 6.042296072507553 - }, - "gpqa": { - "name": "GPQA", - "value": 0.2986577181208054, - "normalized_score": 6.487695749440718 - }, - "musr": { - "name": "MUSR", - "value": 0.39762499999999995, - "normalized_score": 8.303124999999996 - }, - "mmlu_pro": { - "name": "MMLU-PRO", - "value": 0.28465757978723405, - "normalized_score": 20.517508865248228 - } - }, - "features": { - "is_not_available_on_hub": true, - "is_merged": false, - "is_moe": true, - "is_flagged": false, - "is_official_provider": false - }, - "metadata": { - "upload_date": "2024-02-26", - "submission_date": "2024-06-27", - "generation": 0, - "base_model": "zhengr/MixTAO-7Bx2-MoE-v8.1", - "hub_license": "apache-2.0", - "hub_hearts": 55, - "params_billions": 12.879, - "co2_cost": 1.8547801756431757 - } - } -] \ No newline at end of file diff --git a/data/global-mmlu-lite/Alibaba/Qwen-3-235B-A22B-Instruct/d1b63dce-9740-4347-b7b2-01099fa8b9e7.json b/data/global-mmlu-lite/Alibaba/Qwen-3-235B-A22B-Instruct/d1b63dce-9740-4347-b7b2-01099fa8b9e7.json deleted file mode 100644 index 3db64feb3..000000000 --- a/data/global-mmlu-lite/Alibaba/Qwen-3-235B-A22B-Instruct/d1b63dce-9740-4347-b7b2-01099fa8b9e7.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/alibaba_qwen-3-235b-a22b-instruct/1764290503.995032", - "retrieved_timestamp": "1764290503.995032", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Qwen 3 235B A22B Instruct", - "id": "alibaba/qwen-3-235b-a22b-instruct", - "developer": "Alibaba", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.8798437499999998 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.8521875 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.9075 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.89, - "details": { - "confidence_interval": 0.030663, - "stddev": 0.030663 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.8875, - "details": { - "confidence_interval": 0.030966, - "stddev": 0.030966 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.885, - "details": { - "confidence_interval": 0.031264, - "stddev": 0.031264 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.8775, - "details": { - "confidence_interval": 0.03213, - "stddev": 0.03213 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.875, - "details": { - "confidence_interval": 0.03241, - "stddev": 0.03241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.8875, - "details": { - "confidence_interval": 0.030966, - "stddev": 0.030966 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.875, - "details": { - "confidence_interval": 0.03241, - "stddev": 0.03241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.87, - "details": { - "confidence_interval": 0.032957, - "stddev": 0.032957 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.8775, - "details": { - "confidence_interval": 0.03213, - "stddev": 0.03213 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Anthropic/Claude-3.5-Haiku/b34d5c62-d44a-44ce-9d14-f97445a407a3.json b/data/global-mmlu-lite/Anthropic/Claude-3.5-Haiku/b34d5c62-d44a-44ce-9d14-f97445a407a3.json deleted file mode 100644 index 0a8366480..000000000 --- a/data/global-mmlu-lite/Anthropic/Claude-3.5-Haiku/b34d5c62-d44a-44ce-9d14-f97445a407a3.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-35-haiku/1764290504.010163", - "retrieved_timestamp": "1764290504.010163", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Claude 3.5 Haiku", - "id": "anthropic/claude-35-haiku", - "developer": "Anthropic", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.6114062499999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.5834375 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.6393749999999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.695, - "details": { - "confidence_interval": 0.045119, - "stddev": 0.045119 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.485, - "details": { - "confidence_interval": 0.048977, - "stddev": 0.048977 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.675, - "details": { - "confidence_interval": 0.0459, - "stddev": 0.0459 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.565, - "details": { - "confidence_interval": 0.048583, - "stddev": 0.048583 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.61, - "details": { - "confidence_interval": 0.047799, - "stddev": 0.047799 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.6575, - "details": { - "confidence_interval": 0.046505, - "stddev": 0.046505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.5475, - "details": { - "confidence_interval": 0.048777, - "stddev": 0.048777 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.48, - "details": { - "confidence_interval": 0.04896, - "stddev": 0.04896 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.655, - "details": { - "confidence_interval": 0.046585, - "stddev": 0.046585 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.6575, - "details": { - "confidence_interval": 0.046505, - "stddev": 0.046505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.5225, - "details": { - "confidence_interval": 0.048949, - "stddev": 0.048949 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.485, - "details": { - "confidence_interval": 0.048977, - "stddev": 0.048977 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.69, - "details": { - "confidence_interval": 0.045324, - "stddev": 0.045324 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.6675, - "details": { - "confidence_interval": 0.046168, - "stddev": 0.046168 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.69, - "details": { - "confidence_interval": 0.045324, - "stddev": 0.045324 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.7, - "details": { - "confidence_interval": 0.044908, - "stddev": 0.044908 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Anthropic/Claude-3.7-Sonnet/462fd172-5786-45a9-a938-361fef294d8b.json b/data/global-mmlu-lite/Anthropic/Claude-3.7-Sonnet/462fd172-5786-45a9-a938-361fef294d8b.json deleted file mode 100644 index ad6e04396..000000000 --- a/data/global-mmlu-lite/Anthropic/Claude-3.7-Sonnet/462fd172-5786-45a9-a938-361fef294d8b.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-37-sonnet/1764290503.997274", - "retrieved_timestamp": "1764290503.997274", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Claude 3.7 Sonnet", - "id": "anthropic/claude-37-sonnet", - "developer": "Anthropic", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.8078124999999998 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.779375 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.8362499999999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.7625, - "details": { - "confidence_interval": 0.041703, - "stddev": 0.041703 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.825, - "details": { - "confidence_interval": 0.037236, - "stddev": 0.037236 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.8125, - "details": { - "confidence_interval": 0.03825, - "stddev": 0.03825 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.7675, - "details": { - "confidence_interval": 0.041397, - "stddev": 0.041397 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.805, - "details": { - "confidence_interval": 0.038827, - "stddev": 0.038827 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.8175, - "details": { - "confidence_interval": 0.037852, - "stddev": 0.037852 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.8225, - "details": { - "confidence_interval": 0.037444, - "stddev": 0.037444 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.8425, - "details": { - "confidence_interval": 0.035698, - "stddev": 0.035698 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.83, - "details": { - "confidence_interval": 0.036811, - "stddev": 0.036811 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.77, - "details": { - "confidence_interval": 0.041241, - "stddev": 0.041241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.8075, - "details": { - "confidence_interval": 0.038637, - "stddev": 0.038637 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.8125, - "details": { - "confidence_interval": 0.03825, - "stddev": 0.03825 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.81, - "details": { - "confidence_interval": 0.038445, - "stddev": 0.038445 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.835, - "details": { - "confidence_interval": 0.036375, - "stddev": 0.036375 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.8125, - "details": { - "confidence_interval": 0.03825, - "stddev": 0.03825 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Anthropic/Claude-Opus-4.1/562a23d0-d80a-4564-a68b-6b478817fa0e.json b/data/global-mmlu-lite/Anthropic/Claude-Opus-4.1/562a23d0-d80a-4564-a68b-6b478817fa0e.json deleted file mode 100644 index 1ea305e4d..000000000 --- a/data/global-mmlu-lite/Anthropic/Claude-Opus-4.1/562a23d0-d80a-4564-a68b-6b478817fa0e.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-opus-41/1764290503.991577", - "retrieved_timestamp": "1764290503.991577", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Claude Opus 4.1", - "id": "anthropic/claude-opus-41", - "developer": "Anthropic", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.94296875 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.933125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.9528125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.945, - "details": { - "confidence_interval": 0.022342, - "stddev": 0.022342 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.9475, - "details": { - "confidence_interval": 0.021857, - "stddev": 0.021857 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.945, - "details": { - "confidence_interval": 0.022342, - "stddev": 0.022342 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.9475, - "details": { - "confidence_interval": 0.021857, - "stddev": 0.021857 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.95, - "details": { - "confidence_interval": 0.021358, - "stddev": 0.021358 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.945, - "details": { - "confidence_interval": 0.022342, - "stddev": 0.022342 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.945, - "details": { - "confidence_interval": 0.022342, - "stddev": 0.022342 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.93, - "details": { - "confidence_interval": 0.025004, - "stddev": 0.025004 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.9375, - "details": { - "confidence_interval": 0.023722, - "stddev": 0.023722 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.945, - "details": { - "confidence_interval": 0.022342, - "stddev": 0.022342 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.945, - "details": { - "confidence_interval": 0.022342, - "stddev": 0.022342 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Anthropic/Claude-Sonnet-4/0e7e63be-9a07-48fd-a525-7378f6d0477f.json b/data/global-mmlu-lite/Anthropic/Claude-Sonnet-4/0e7e63be-9a07-48fd-a525-7378f6d0477f.json deleted file mode 100644 index c7ab57be5..000000000 --- a/data/global-mmlu-lite/Anthropic/Claude-Sonnet-4/0e7e63be-9a07-48fd-a525-7378f6d0477f.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-sonnet-4/1764290503.993483", - "retrieved_timestamp": "1764290503.993483", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Claude Sonnet 4", - "id": "anthropic/claude-sonnet-4", - "developer": "Anthropic", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.90578125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.8912500000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.9203125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.9, - "details": { - "confidence_interval": 0.029399, - "stddev": 0.029399 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.9025, - "details": { - "confidence_interval": 0.02907, - "stddev": 0.02907 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.9, - "details": { - "confidence_interval": 0.029399, - "stddev": 0.029399 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.8975, - "details": { - "confidence_interval": 0.029723, - "stddev": 0.029723 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.8975, - "details": { - "confidence_interval": 0.029723, - "stddev": 0.029723 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.9175, - "details": { - "confidence_interval": 0.026962, - "stddev": 0.026962 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.8925, - "details": { - "confidence_interval": 0.030355, - "stddev": 0.030355 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Cohere/Aya-Expanse-32B/33c55930-eba4-45d1-a214-bfb0338812b3.json b/data/global-mmlu-lite/Cohere/Aya-Expanse-32B/33c55930-eba4-45d1-a214-bfb0338812b3.json deleted file mode 100644 index 2d9b76f92..000000000 --- a/data/global-mmlu-lite/Cohere/Aya-Expanse-32B/33c55930-eba4-45d1-a214-bfb0338812b3.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/cohere_aya-expanse-32b/1764290504.007959", - "retrieved_timestamp": "1764290504.007959", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Aya Expanse 32B", - "id": "cohere/aya-expanse-32b", - "developer": "Cohere", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.7353330772982066 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.6890979334287393 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.7815399940651198 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.7425, - "details": { - "confidence_interval": 0.04285, - "stddev": 0.04285 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.754386, - "details": { - "confidence_interval": 0.042236, - "stddev": 0.042236 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.734336, - "details": { - "confidence_interval": 0.043339, - "stddev": 0.043339 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.7425, - "details": { - "confidence_interval": 0.04285, - "stddev": 0.04285 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.7325, - "details": { - "confidence_interval": 0.043379, - "stddev": 0.043379 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.7375, - "details": { - "confidence_interval": 0.043119, - "stddev": 0.043119 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.759398, - "details": { - "confidence_interval": 0.041942, - "stddev": 0.041942 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.730479, - "details": { - "confidence_interval": 0.043647, - "stddev": 0.043647 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.741855, - "details": { - "confidence_interval": 0.042939, - "stddev": 0.042939 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.7525, - "details": { - "confidence_interval": 0.042292, - "stddev": 0.042292 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.754386, - "details": { - "confidence_interval": 0.042236, - "stddev": 0.042236 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.736181, - "details": { - "confidence_interval": 0.043296, - "stddev": 0.043296 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.707071, - "details": { - "confidence_interval": 0.044824, - "stddev": 0.044824 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.694236, - "details": { - "confidence_interval": 0.045207, - "stddev": 0.045207 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.743003, - "details": { - "confidence_interval": 0.043203, - "stddev": 0.043203 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.7025, - "details": { - "confidence_interval": 0.044801, - "stddev": 0.044801 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Cohere/Command-A/ba5f478e-7484-4c7d-9691-1c4da2aa39a1.json b/data/global-mmlu-lite/Cohere/Command-A/ba5f478e-7484-4c7d-9691-1c4da2aa39a1.json deleted file mode 100644 index 75fcf23d9..000000000 --- a/data/global-mmlu-lite/Cohere/Command-A/ba5f478e-7484-4c7d-9691-1c4da2aa39a1.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/cohere_command-a/1764290503.996772", - "retrieved_timestamp": "1764290503.996772", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Command A", - "id": "cohere/command-a", - "developer": "Cohere", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.838546365914787 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.7993200376884423 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.8777732412060302 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.8425, - "details": { - "confidence_interval": 0.035698, - "stddev": 0.035698 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.855, - "details": { - "confidence_interval": 0.034505, - "stddev": 0.034505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.8225, - "details": { - "confidence_interval": 0.037444, - "stddev": 0.037444 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.8425, - "details": { - "confidence_interval": 0.035698, - "stddev": 0.035698 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.8375, - "details": { - "confidence_interval": 0.036152, - "stddev": 0.036152 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.842105, - "details": { - "confidence_interval": 0.035779, - "stddev": 0.035779 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.854637, - "details": { - "confidence_interval": 0.034584, - "stddev": 0.034584 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.8375, - "details": { - "confidence_interval": 0.036152, - "stddev": 0.036152 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.845, - "details": { - "confidence_interval": 0.035466, - "stddev": 0.035466 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.85, - "details": { - "confidence_interval": 0.034992, - "stddev": 0.034992 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.84, - "details": { - "confidence_interval": 0.035927, - "stddev": 0.035927 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.8525, - "details": { - "confidence_interval": 0.034751, - "stddev": 0.034751 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.8275, - "details": { - "confidence_interval": 0.037025, - "stddev": 0.037025 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.815, - "details": { - "confidence_interval": 0.038052, - "stddev": 0.038052 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.835, - "details": { - "confidence_interval": 0.036375, - "stddev": 0.036375 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.8175, - "details": { - "confidence_interval": 0.037852, - "stddev": 0.037852 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/DeepSeek/DeepSeek-R1/a00e87b5-bb92-4ff5-aea7-b4e8357663c2.json b/data/global-mmlu-lite/DeepSeek/DeepSeek-R1/a00e87b5-bb92-4ff5-aea7-b4e8357663c2.json deleted file mode 100644 index 5b642255d..000000000 --- a/data/global-mmlu-lite/DeepSeek/DeepSeek-R1/a00e87b5-bb92-4ff5-aea7-b4e8357663c2.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/deepseek_deepseek-r1/1764290504.0088", - "retrieved_timestamp": "1764290504.0088", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "DeepSeek-R1", - "id": "deepseek/deepseek-r1", - "developer": "DeepSeek", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.674375 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.6671875 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.6815625000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.6825, - "details": { - "confidence_interval": 0.045619, - "stddev": 0.045619 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.715, - "details": { - "confidence_interval": 0.044238, - "stddev": 0.044238 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.655, - "details": { - "confidence_interval": 0.046585, - "stddev": 0.046585 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.6375, - "details": { - "confidence_interval": 0.04711, - "stddev": 0.04711 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.6925, - "details": { - "confidence_interval": 0.045222, - "stddev": 0.045222 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.6475, - "details": { - "confidence_interval": 0.046819, - "stddev": 0.046819 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.655, - "details": { - "confidence_interval": 0.046585, - "stddev": 0.046585 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.6775, - "details": { - "confidence_interval": 0.045808, - "stddev": 0.045808 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.7725, - "details": { - "confidence_interval": 0.041083, - "stddev": 0.041083 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.6575, - "details": { - "confidence_interval": 0.046505, - "stddev": 0.046505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.635, - "details": { - "confidence_interval": 0.047179, - "stddev": 0.047179 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.7175, - "details": { - "confidence_interval": 0.04412, - "stddev": 0.04412 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.6775, - "details": { - "confidence_interval": 0.045808, - "stddev": 0.045808 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.77, - "details": { - "confidence_interval": 0.041241, - "stddev": 0.041241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.5075, - "details": { - "confidence_interval": 0.048994, - "stddev": 0.048994 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.69, - "details": { - "confidence_interval": 0.045324, - "stddev": 0.045324 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/DeepSeek/Deepseek-V3.1/a1dabd04-29d3-4170-88f7-ee899b26c24a.json b/data/global-mmlu-lite/DeepSeek/Deepseek-V3.1/a1dabd04-29d3-4170-88f7-ee899b26c24a.json deleted file mode 100644 index e51c4f590..000000000 --- a/data/global-mmlu-lite/DeepSeek/Deepseek-V3.1/a1dabd04-29d3-4170-88f7-ee899b26c24a.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/deepseek_deepseek-v31/1764290503.997821", - "retrieved_timestamp": "1764290503.997821", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Deepseek V3.1", - "id": "deepseek/deepseek-v31", - "developer": "DeepSeek", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.8043661366877002 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.7793102525957433 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.8294756436687251 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.805, - "details": { - "confidence_interval": 0.038827, - "stddev": 0.038827 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.825, - "details": { - "confidence_interval": 0.037236, - "stddev": 0.037236 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.815657, - "details": { - "confidence_interval": 0.038192, - "stddev": 0.038192 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.8175, - "details": { - "confidence_interval": 0.037852, - "stddev": 0.037852 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.756892, - "details": { - "confidence_interval": 0.04209, - "stddev": 0.04209 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.776382, - "details": { - "confidence_interval": 0.040935, - "stddev": 0.040935 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.8075, - "details": { - "confidence_interval": 0.038637, - "stddev": 0.038637 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.831169, - "details": { - "confidence_interval": 0.037419, - "stddev": 0.037419 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.8125, - "details": { - "confidence_interval": 0.03825, - "stddev": 0.03825 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.824561, - "details": { - "confidence_interval": 0.037319, - "stddev": 0.037319 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.8125, - "details": { - "confidence_interval": 0.03825, - "stddev": 0.03825 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.801008, - "details": { - "confidence_interval": 0.039273, - "stddev": 0.039273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.783069, - "details": { - "confidence_interval": 0.041549, - "stddev": 0.041549 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.816121, - "details": { - "confidence_interval": 0.038106, - "stddev": 0.038106 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Google/Gemini-2.5-Flash-Preview/ea9d0ff1-0801-4de7-a99a-febdcde420fa.json b/data/global-mmlu-lite/Google/Gemini-2.5-Flash-Preview/ea9d0ff1-0801-4de7-a99a-febdcde420fa.json deleted file mode 100644 index 4c96e18bb..000000000 --- a/data/global-mmlu-lite/Google/Gemini-2.5-Flash-Preview/ea9d0ff1-0801-4de7-a99a-febdcde420fa.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/google_gemini-25-flash-preview/1764290503.993019", - "retrieved_timestamp": "1764290503.993019", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Gemini 2.5 Flash Preview", - "id": "google/gemini-25-flash-preview", - "developer": "Google", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.90921875 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.8925000000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.9259375 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.9225, - "details": { - "confidence_interval": 0.026203, - "stddev": 0.026203 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.925, - "details": { - "confidence_interval": 0.025812, - "stddev": 0.025812 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.89, - "details": { - "confidence_interval": 0.030663, - "stddev": 0.030663 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.8825, - "details": { - "confidence_interval": 0.031557, - "stddev": 0.031557 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.93, - "details": { - "confidence_interval": 0.025004, - "stddev": 0.025004 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.9025, - "details": { - "confidence_interval": 0.02907, - "stddev": 0.02907 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Google/Gemini-2.5-Flash/32612d44-2a0e-44b6-9f23-bdbf8bccb714.json b/data/global-mmlu-lite/Google/Gemini-2.5-Flash/32612d44-2a0e-44b6-9f23-bdbf8bccb714.json deleted file mode 100644 index 917b42cf9..000000000 --- a/data/global-mmlu-lite/Google/Gemini-2.5-Flash/32612d44-2a0e-44b6-9f23-bdbf8bccb714.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/google_gemini-25-flash/1764290503.992547", - "retrieved_timestamp": "1764290503.992547", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Gemini 2.5 Flash", - "id": "google/gemini-25-flash", - "developer": "Google", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.91453125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.9 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.9290625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.9325, - "details": { - "confidence_interval": 0.024586, - "stddev": 0.024586 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.9025, - "details": { - "confidence_interval": 0.02907, - "stddev": 0.02907 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.925, - "details": { - "confidence_interval": 0.025812, - "stddev": 0.025812 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.9225, - "details": { - "confidence_interval": 0.026203, - "stddev": 0.026203 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.9175, - "details": { - "confidence_interval": 0.026962, - "stddev": 0.026962 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Google/Gemini-2.5-Pro/4d20140d-a955-4927-9140-49fe597519c6.json b/data/global-mmlu-lite/Google/Gemini-2.5-Pro/4d20140d-a955-4927-9140-49fe597519c6.json deleted file mode 100644 index 0bed5c410..000000000 --- a/data/global-mmlu-lite/Google/Gemini-2.5-Pro/4d20140d-a955-4927-9140-49fe597519c6.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/google_gemini-25-pro/1764290503.992078", - "retrieved_timestamp": "1764290503.992078", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Gemini 2.5 Pro", - "id": "google/gemini-25-pro", - "developer": "Google", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.93234375 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.9240625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.940625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.9475, - "details": { - "confidence_interval": 0.021857, - "stddev": 0.021857 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.9275, - "details": { - "confidence_interval": 0.025412, - "stddev": 0.025412 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.9275, - "details": { - "confidence_interval": 0.025412, - "stddev": 0.025412 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.93, - "details": { - "confidence_interval": 0.025004, - "stddev": 0.025004 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.9275, - "details": { - "confidence_interval": 0.025412, - "stddev": 0.025412 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.925, - "details": { - "confidence_interval": 0.025812, - "stddev": 0.025812 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.935, - "details": { - "confidence_interval": 0.024159, - "stddev": 0.024159 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.9375, - "details": { - "confidence_interval": 0.023722, - "stddev": 0.023722 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.9275, - "details": { - "confidence_interval": 0.025412, - "stddev": 0.025412 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.93, - "details": { - "confidence_interval": 0.025004, - "stddev": 0.025004 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.9375, - "details": { - "confidence_interval": 0.023722, - "stddev": 0.023722 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.925, - "details": { - "confidence_interval": 0.025812, - "stddev": 0.025812 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.9275, - "details": { - "confidence_interval": 0.025412, - "stddev": 0.025412 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.93, - "details": { - "confidence_interval": 0.025004, - "stddev": 0.025004 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Google/Gemini-3-Pro-Preview/658d3005-8fe2-4560-acb9-e2e271b72cea.json b/data/global-mmlu-lite/Google/Gemini-3-Pro-Preview/658d3005-8fe2-4560-acb9-e2e271b72cea.json deleted file mode 100644 index 7658e46a9..000000000 --- a/data/global-mmlu-lite/Google/Gemini-3-Pro-Preview/658d3005-8fe2-4560-acb9-e2e271b72cea.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/google_gemini-3-pro-preview/1764290503.990892", - "retrieved_timestamp": "1764290503.990892", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Gemini 3 Pro Preview", - "id": "google/gemini-3-pro-preview", - "developer": "Google", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.9453125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.939688 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.950937 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.9475, - "details": { - "confidence_interval": 0.021857, - "stddev": 0.021857 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.9575, - "details": { - "confidence_interval": 0.019769, - "stddev": 0.019769 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.955, - "details": { - "confidence_interval": 0.020315, - "stddev": 0.020315 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.955, - "details": { - "confidence_interval": 0.020315, - "stddev": 0.020315 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.9475, - "details": { - "confidence_interval": 0.021857, - "stddev": 0.021857 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.94, - "details": { - "confidence_interval": 0.023273, - "stddev": 0.023273 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.9475, - "details": { - "confidence_interval": 0.021857, - "stddev": 0.021857 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.9425, - "details": { - "confidence_interval": 0.022814, - "stddev": 0.022814 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Google/Gemma-3-27B/21e8fec0-ea47-4375-9c99-c5a3811296e9.json b/data/global-mmlu-lite/Google/Gemma-3-27B/21e8fec0-ea47-4375-9c99-c5a3811296e9.json deleted file mode 100644 index c2ed48e62..000000000 --- a/data/global-mmlu-lite/Google/Gemma-3-27B/21e8fec0-ea47-4375-9c99-c5a3811296e9.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/google_gemma-3-27b/1764290504.001706", - "retrieved_timestamp": "1764290504.001706", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Gemma 3 27B", - "id": "google/gemma-3-27b", - "developer": "Google", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.7630186674677049 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.7527856328378291 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.7732575382793239 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.78, - "details": { - "confidence_interval": 0.040595, - "stddev": 0.040595 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.733668, - "details": { - "confidence_interval": 0.043428, - "stddev": 0.043428 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.75, - "details": { - "confidence_interval": 0.042648, - "stddev": 0.042648 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.775, - "details": { - "confidence_interval": 0.040922, - "stddev": 0.040922 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.748092, - "details": { - "confidence_interval": 0.042919, - "stddev": 0.042919 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.733503, - "details": { - "confidence_interval": 0.043656, - "stddev": 0.043656 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.756281, - "details": { - "confidence_interval": 0.042179, - "stddev": 0.042179 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.75, - "details": { - "confidence_interval": 0.042434, - "stddev": 0.042434 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.79798, - "details": { - "confidence_interval": 0.039545, - "stddev": 0.039545 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.748111, - "details": { - "confidence_interval": 0.042701, - "stddev": 0.042701 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.749373, - "details": { - "confidence_interval": 0.042523, - "stddev": 0.042523 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.785, - "details": { - "confidence_interval": 0.04026, - "stddev": 0.04026 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.744361, - "details": { - "confidence_interval": 0.042802, - "stddev": 0.042802 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.77193, - "details": { - "confidence_interval": 0.04117, - "stddev": 0.04117 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Google/Gemma-3-4B/dd08c6cc-919b-414d-a97e-025a7b485987.json b/data/global-mmlu-lite/Google/Gemma-3-4B/dd08c6cc-919b-414d-a97e-025a7b485987.json deleted file mode 100644 index 4c8c5380d..000000000 --- a/data/global-mmlu-lite/Google/Gemma-3-4B/dd08c6cc-919b-414d-a97e-025a7b485987.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/google_gemma-3-4b/1764290504.009719", - "retrieved_timestamp": "1764290504.009719", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Gemma 3 4B", - "id": "google/gemma-3-4b", - "developer": "Google", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.6510937500000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.6115625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.690625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.6525, - "details": { - "confidence_interval": 0.046664, - "stddev": 0.046664 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.67, - "details": { - "confidence_interval": 0.04608, - "stddev": 0.04608 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.68, - "details": { - "confidence_interval": 0.045714, - "stddev": 0.045714 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.6525, - "details": { - "confidence_interval": 0.046664, - "stddev": 0.046664 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.6575, - "details": { - "confidence_interval": 0.046505, - "stddev": 0.046505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.6475, - "details": { - "confidence_interval": 0.046819, - "stddev": 0.046819 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.6775, - "details": { - "confidence_interval": 0.045808, - "stddev": 0.045808 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.6675, - "details": { - "confidence_interval": 0.046168, - "stddev": 0.046168 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.6325, - "details": { - "confidence_interval": 0.047247, - "stddev": 0.047247 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.66, - "details": { - "confidence_interval": 0.046423, - "stddev": 0.046423 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.68, - "details": { - "confidence_interval": 0.045714, - "stddev": 0.045714 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.6725, - "details": { - "confidence_interval": 0.045991, - "stddev": 0.045991 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.6075, - "details": { - "confidence_interval": 0.047853, - "stddev": 0.047853 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.5825, - "details": { - "confidence_interval": 0.048327, - "stddev": 0.048327 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.6475, - "details": { - "confidence_interval": 0.046819, - "stddev": 0.046819 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.63, - "details": { - "confidence_interval": 0.047314, - "stddev": 0.047314 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/IBM/Granite-4.0-Small/75bb331f-e492-4dfd-9f1b-b83cad2f04d9.json b/data/global-mmlu-lite/IBM/Granite-4.0-Small/75bb331f-e492-4dfd-9f1b-b83cad2f04d9.json deleted file mode 100644 index 79d640997..000000000 --- a/data/global-mmlu-lite/IBM/Granite-4.0-Small/75bb331f-e492-4dfd-9f1b-b83cad2f04d9.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/ibm_granite-40-small/1764290504.007292", - "retrieved_timestamp": "1764290504.007292", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Granite 4.0 Small", - "id": "ibm/granite-40-small", - "developer": "IBM", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.7503477705089479 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.7181731581654567 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.7825538827013044 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.761307, - "details": { - "confidence_interval": 0.04188, - "stddev": 0.04188 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.77, - "details": { - "confidence_interval": 0.041241, - "stddev": 0.041241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.761307, - "details": { - "confidence_interval": 0.04188, - "stddev": 0.04188 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.755, - "details": { - "confidence_interval": 0.042148, - "stddev": 0.042148 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.759398, - "details": { - "confidence_interval": 0.041942, - "stddev": 0.041942 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.7575, - "details": { - "confidence_interval": 0.042002, - "stddev": 0.042002 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.761421, - "details": { - "confidence_interval": 0.042085, - "stddev": 0.042085 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.7525, - "details": { - "confidence_interval": 0.042292, - "stddev": 0.042292 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.740554, - "details": { - "confidence_interval": 0.043118, - "stddev": 0.043118 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.7525, - "details": { - "confidence_interval": 0.042292, - "stddev": 0.042292 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.756962, - "details": { - "confidence_interval": 0.042298, - "stddev": 0.042298 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.763819, - "details": { - "confidence_interval": 0.041728, - "stddev": 0.041728 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.73183, - "details": { - "confidence_interval": 0.043468, - "stddev": 0.043468 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.692112, - "details": { - "confidence_interval": 0.045639, - "stddev": 0.045639 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.7475, - "details": { - "confidence_interval": 0.042575, - "stddev": 0.042575 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.741855, - "details": { - "confidence_interval": 0.042939, - "stddev": 0.042939 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Mistral-AI/Mistral-Medium-3/b08417e3-22f1-40e7-a621-f25531972052.json b/data/global-mmlu-lite/Mistral-AI/Mistral-Medium-3/b08417e3-22f1-40e7-a621-f25531972052.json deleted file mode 100644 index 65ece62ea..000000000 --- a/data/global-mmlu-lite/Mistral-AI/Mistral-Medium-3/b08417e3-22f1-40e7-a621-f25531972052.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/mistral-ai_mistral-medium-3/1764290504.01061", - "retrieved_timestamp": "1764290504.01061", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Mistral Medium 3", - "id": "mistral-ai/mistral-medium-3", - "developer": "Mistral AI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.55109375 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.5390625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.5631250000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.455, - "details": { - "confidence_interval": 0.0488, - "stddev": 0.0488 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.38, - "details": { - "confidence_interval": 0.047567, - "stddev": 0.047567 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.5175, - "details": { - "confidence_interval": 0.048969, - "stddev": 0.048969 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.4775, - "details": { - "confidence_interval": 0.048949, - "stddev": 0.048949 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.41, - "details": { - "confidence_interval": 0.048199, - "stddev": 0.048199 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.555, - "details": { - "confidence_interval": 0.048702, - "stddev": 0.048702 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.515, - "details": { - "confidence_interval": 0.048977, - "stddev": 0.048977 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.535, - "details": { - "confidence_interval": 0.048879, - "stddev": 0.048879 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.58, - "details": { - "confidence_interval": 0.048368, - "stddev": 0.048368 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.595, - "details": { - "confidence_interval": 0.048107, - "stddev": 0.048107 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.5175, - "details": { - "confidence_interval": 0.048969, - "stddev": 0.048969 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.5375, - "details": { - "confidence_interval": 0.048861, - "stddev": 0.048861 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.7075, - "details": { - "confidence_interval": 0.04458, - "stddev": 0.04458 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.7675, - "details": { - "confidence_interval": 0.041397, - "stddev": 0.041397 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.535, - "details": { - "confidence_interval": 0.048879, - "stddev": 0.048879 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.7325, - "details": { - "confidence_interval": 0.043379, - "stddev": 0.043379 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/Mistral-AI/Mistral-Small-3.1/f5efe093-1cec-4e7f-8413-05039461ed27.json b/data/global-mmlu-lite/Mistral-AI/Mistral-Small-3.1/f5efe093-1cec-4e7f-8413-05039461ed27.json deleted file mode 100644 index b785a18fb..000000000 --- a/data/global-mmlu-lite/Mistral-AI/Mistral-Small-3.1/f5efe093-1cec-4e7f-8413-05039461ed27.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/mistral-ai_mistral-small-31/1764290503.99838", - "retrieved_timestamp": "1764290503.99838", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Mistral Small 3.1", - "id": "mistral-ai/mistral-small-31", - "developer": "Mistral AI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.78515625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.7537499999999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.8165625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.7875, - "details": { - "confidence_interval": 0.040089, - "stddev": 0.040089 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.8, - "details": { - "confidence_interval": 0.039199, - "stddev": 0.039199 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.7725, - "details": { - "confidence_interval": 0.041083, - "stddev": 0.041083 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.7975, - "details": { - "confidence_interval": 0.039382, - "stddev": 0.039382 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.8, - "details": { - "confidence_interval": 0.039199, - "stddev": 0.039199 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.795, - "details": { - "confidence_interval": 0.039562, - "stddev": 0.039562 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.785, - "details": { - "confidence_interval": 0.04026, - "stddev": 0.04026 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.805, - "details": { - "confidence_interval": 0.038827, - "stddev": 0.038827 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.77, - "details": { - "confidence_interval": 0.041241, - "stddev": 0.041241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.79, - "details": { - "confidence_interval": 0.039915, - "stddev": 0.039915 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.7825, - "details": { - "confidence_interval": 0.040429, - "stddev": 0.040429 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.775, - "details": { - "confidence_interval": 0.040922, - "stddev": 0.040922 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.735, - "details": { - "confidence_interval": 0.04325, - "stddev": 0.04325 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.7925, - "details": { - "confidence_interval": 0.03974, - "stddev": 0.03974 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.7825, - "details": { - "confidence_interval": 0.040429, - "stddev": 0.040429 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/OpenAI/GPT-4.1/ab0d8833-09d3-4d42-b1f4-e0d3e410ea7f.json b/data/global-mmlu-lite/OpenAI/GPT-4.1/ab0d8833-09d3-4d42-b1f4-e0d3e410ea7f.json deleted file mode 100644 index 1941b09e8..000000000 --- a/data/global-mmlu-lite/OpenAI/GPT-4.1/ab0d8833-09d3-4d42-b1f4-e0d3e410ea7f.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/openai_gpt-41/1764290503.995484", - "retrieved_timestamp": "1764290503.995484", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "GPT-4.1", - "id": "openai/gpt-41", - "developer": "OpenAI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.8754687499999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.8540625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.896875 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.8825, - "details": { - "confidence_interval": 0.031557, - "stddev": 0.031557 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.8625, - "details": { - "confidence_interval": 0.033748, - "stddev": 0.033748 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.875, - "details": { - "confidence_interval": 0.03241, - "stddev": 0.03241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.8875, - "details": { - "confidence_interval": 0.030966, - "stddev": 0.030966 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.8775, - "details": { - "confidence_interval": 0.03213, - "stddev": 0.03213 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.885, - "details": { - "confidence_interval": 0.031264, - "stddev": 0.031264 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.87, - "details": { - "confidence_interval": 0.032957, - "stddev": 0.032957 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.875, - "details": { - "confidence_interval": 0.03241, - "stddev": 0.03241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.885, - "details": { - "confidence_interval": 0.031264, - "stddev": 0.031264 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.875, - "details": { - "confidence_interval": 0.03241, - "stddev": 0.03241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.87, - "details": { - "confidence_interval": 0.032957, - "stddev": 0.032957 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.8575, - "details": { - "confidence_interval": 0.034256, - "stddev": 0.034256 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/OpenAI/GPT-5/744ce6aa-57ad-4f39-ac32-6ccce3fb727c.json b/data/global-mmlu-lite/OpenAI/GPT-5/744ce6aa-57ad-4f39-ac32-6ccce3fb727c.json deleted file mode 100644 index fec4c936c..000000000 --- a/data/global-mmlu-lite/OpenAI/GPT-5/744ce6aa-57ad-4f39-ac32-6ccce3fb727c.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/openai_gpt-5/1764290503.993994", - "retrieved_timestamp": "1764290503.993994", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "GPT-5", - "id": "openai/gpt-5", - "developer": "OpenAI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.8895312499999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.8912500000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.8878125000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.8925, - "details": { - "confidence_interval": 0.030355, - "stddev": 0.030355 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.9, - "details": { - "confidence_interval": 0.029399, - "stddev": 0.029399 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.865, - "details": { - "confidence_interval": 0.033488, - "stddev": 0.033488 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.795, - "details": { - "confidence_interval": 0.039562, - "stddev": 0.039562 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.8875, - "details": { - "confidence_interval": 0.030966, - "stddev": 0.030966 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.8875, - "details": { - "confidence_interval": 0.030966, - "stddev": 0.030966 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.865, - "details": { - "confidence_interval": 0.033488, - "stddev": 0.033488 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.9125, - "details": { - "confidence_interval": 0.027691, - "stddev": 0.027691 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.895, - "details": { - "confidence_interval": 0.030042, - "stddev": 0.030042 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.915, - "details": { - "confidence_interval": 0.02733, - "stddev": 0.02733 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/OpenAI/o3-mini/c38e906d-d904-4515-8312-76c1082343c3.json b/data/global-mmlu-lite/OpenAI/o3-mini/c38e906d-d904-4515-8312-76c1082343c3.json deleted file mode 100644 index 522d9eccb..000000000 --- a/data/global-mmlu-lite/OpenAI/o3-mini/c38e906d-d904-4515-8312-76c1082343c3.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/openai_o3-mini/1764290504.001088", - "retrieved_timestamp": "1764290504.001088", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "o3 mini", - "id": "openai/o3-mini", - "developer": "OpenAI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.7799999999999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.7650000000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.795 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.7725, - "details": { - "confidence_interval": 0.041083, - "stddev": 0.041083 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.8025, - "details": { - "confidence_interval": 0.039014, - "stddev": 0.039014 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.77, - "details": { - "confidence_interval": 0.041241, - "stddev": 0.041241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.7525, - "details": { - "confidence_interval": 0.042292, - "stddev": 0.042292 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.74, - "details": { - "confidence_interval": 0.042985, - "stddev": 0.042985 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.7525, - "details": { - "confidence_interval": 0.042292, - "stddev": 0.042292 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.7425, - "details": { - "confidence_interval": 0.04285, - "stddev": 0.04285 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.8, - "details": { - "confidence_interval": 0.039199, - "stddev": 0.039199 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.81, - "details": { - "confidence_interval": 0.038445, - "stddev": 0.038445 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.8075, - "details": { - "confidence_interval": 0.038637, - "stddev": 0.038637 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.7975, - "details": { - "confidence_interval": 0.039382, - "stddev": 0.039382 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.775, - "details": { - "confidence_interval": 0.040922, - "stddev": 0.040922 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.765, - "details": { - "confidence_interval": 0.041551, - "stddev": 0.041551 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.7725, - "details": { - "confidence_interval": 0.041083, - "stddev": 0.041083 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.8125, - "details": { - "confidence_interval": 0.03825, - "stddev": 0.03825 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.8075, - "details": { - "confidence_interval": 0.038637, - "stddev": 0.038637 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/OpenAI/o4-mini/16f3cc58-7107-4443-b872-c8515feb67ef.json b/data/global-mmlu-lite/OpenAI/o4-mini/16f3cc58-7107-4443-b872-c8515feb67ef.json deleted file mode 100644 index 960d40b95..000000000 --- a/data/global-mmlu-lite/OpenAI/o4-mini/16f3cc58-7107-4443-b872-c8515feb67ef.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/openai_o4-mini/1764290503.995967", - "retrieved_timestamp": "1764290503.995967", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "o4 mini", - "id": "openai/o4-mini", - "developer": "OpenAI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.87046875 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.8503125 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.890625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.865, - "details": { - "confidence_interval": 0.033488, - "stddev": 0.033488 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.8675, - "details": { - "confidence_interval": 0.033225, - "stddev": 0.033225 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.8875, - "details": { - "confidence_interval": 0.030966, - "stddev": 0.030966 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.8775, - "details": { - "confidence_interval": 0.03213, - "stddev": 0.03213 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.87, - "details": { - "confidence_interval": 0.032957, - "stddev": 0.032957 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.87, - "details": { - "confidence_interval": 0.032957, - "stddev": 0.032957 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.8675, - "details": { - "confidence_interval": 0.033225, - "stddev": 0.033225 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.855, - "details": { - "confidence_interval": 0.034505, - "stddev": 0.034505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.885, - "details": { - "confidence_interval": 0.031264, - "stddev": 0.031264 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.88, - "details": { - "confidence_interval": 0.031846, - "stddev": 0.031846 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.855, - "details": { - "confidence_interval": 0.034505, - "stddev": 0.034505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.8525, - "details": { - "confidence_interval": 0.034751, - "stddev": 0.034751 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.8525, - "details": { - "confidence_interval": 0.034751, - "stddev": 0.034751 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.89, - "details": { - "confidence_interval": 0.030663, - "stddev": 0.030663 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json b/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json new file mode 100644 index 000000000..a4dc797fb --- /dev/null +++ b/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/alibaba_qwen3-235b-a22b-instruct-2507/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen3-235b-a22b-instruct-2507", + "id": "alibaba/qwen3-235b-a22b-instruct-2507", + "developer": "alibaba", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Qwen 3 235B A22B Instruct 2506" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8798 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8522 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.89, + "details": { + "confidence_interval": 0.0306626327370121 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8875, + "details": { + "confidence_interval": 0.0309655314070612 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.885, + "details": { + "confidence_interval": 0.0312635759101603 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8775, + "details": { + "confidence_interval": 0.0321299242960121 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.875, + "details": { + "confidence_interval": 0.0324098580108514 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8875, + "details": { + "confidence_interval": 0.0309655314070612 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.875, + "details": { + "confidence_interval": 0.0324098580108514 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.87, + "details": { + "confidence_interval": 0.0329571309666248 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8775, + "details": { + "confidence_interval": 0.0321299242960121 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json b/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json new file mode 100644 index 000000000..6c40a8d5c --- /dev/null +++ b/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/anthropic_claude-3-5-haiku-20241022/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "claude-3-5-haiku-20241022", + "id": "anthropic/claude-3-5-haiku-20241022", + "developer": "anthropic", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Claude 3.5 Haiku" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6114 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5834 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6394 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.695, + "details": { + "confidence_interval": 0.045119098880536 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.485, + "details": { + "confidence_interval": 0.0489770450552826 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.675, + "details": { + "confidence_interval": 0.0458998918514459 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.565, + "details": { + "confidence_interval": 0.0485832929528273 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61, + "details": { + "confidence_interval": 0.0477986153942541 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575, + "details": { + "confidence_interval": 0.0465046373306654 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5475, + "details": { + "confidence_interval": 0.048777490036628 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.48, + "details": { + "confidence_interval": 0.0489598846415423 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655, + "details": { + "confidence_interval": 0.0465852352416072 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575, + "details": { + "confidence_interval": 0.0465046373306654 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5225, + "details": { + "confidence_interval": 0.048949462883814 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.485, + "details": { + "confidence_interval": 0.0489770450552826 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.69, + "details": { + "confidence_interval": 0.0453235049876571 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6675, + "details": { + "confidence_interval": 0.0461678398924898 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.69, + "details": { + "confidence_interval": 0.0453235049876571 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7, + "details": { + "confidence_interval": 0.0449084165927102 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json b/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json new file mode 100644 index 000000000..c33d9b0ec --- /dev/null +++ b/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/anthropic_claude-3-7-sonnet-20250219/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "claude-3-7-sonnet-20250219", + "id": "anthropic/claude-3-7-sonnet-20250219", + "developer": "anthropic", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Claude 3.7 Sonnet" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8078 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7794 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8362 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7625, + "details": { + "confidence_interval": 0.0417032427788918 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.825, + "details": { + "confidence_interval": 0.0372360919417476 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8125, + "details": { + "confidence_interval": 0.0382499098762049 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7675, + "details": { + "confidence_interval": 0.0413969901513152 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.805, + "details": { + "confidence_interval": 0.0388269557903546 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8175, + "details": { + "confidence_interval": 0.037852399096026 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8225, + "details": { + "confidence_interval": 0.0374442578609762 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8425, + "details": { + "confidence_interval": 0.0356979542967269 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.83, + "details": { + "confidence_interval": 0.036811337913744 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.77, + "details": { + "confidence_interval": 0.0412408279846843 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8075, + "details": { + "confidence_interval": 0.0386371183112584 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8125, + "details": { + "confidence_interval": 0.0382499098762049 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.81, + "details": { + "confidence_interval": 0.0384447822371523 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.835, + "details": { + "confidence_interval": 0.0363750253959063 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8125, + "details": { + "confidence_interval": 0.0382499098762049 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json b/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json new file mode 100644 index 000000000..9cd771388 --- /dev/null +++ b/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/anthropic_claude-opus-4-1-20250805/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "claude-opus-4-1-20250805", + "id": "anthropic/claude-opus-4-1-20250805", + "developer": "anthropic", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Claude Opus 4.1" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.943 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9331 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9528 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.945, + "details": { + "confidence_interval": 0.0223416551650486 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9475, + "details": { + "confidence_interval": 0.0218568391591684 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.0228135408783901 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.0232732828307025 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.945, + "details": { + "confidence_interval": 0.0223416551650486 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9475, + "details": { + "confidence_interval": 0.0218568391591684 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.0228135408783901 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.0232732828307025 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.0232732828307025 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.95, + "details": { + "confidence_interval": 0.0213582123539735 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.945, + "details": { + "confidence_interval": 0.0223416551650486 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.945, + "details": { + "confidence_interval": 0.0223416551650486 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.93, + "details": { + "confidence_interval": 0.0250039481496016 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9375, + "details": { + "confidence_interval": 0.0237215870977811 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.945, + "details": { + "confidence_interval": 0.0223416551650486 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.945, + "details": { + "confidence_interval": 0.0223416551650486 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json b/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json new file mode 100644 index 000000000..6fd34931c --- /dev/null +++ b/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/anthropic_claude-sonnet-4-20250514/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "claude-sonnet-4-20250514", + "id": "anthropic/claude-sonnet-4-20250514", + "developer": "anthropic", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Claude Sonnet 4" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9058 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8913 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9203 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9, + "details": { + "confidence_interval": 0.0293994597681008 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9025, + "details": { + "confidence_interval": 0.0290699315059157 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9, + "details": { + "confidence_interval": 0.0293994597681008 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8975, + "details": { + "confidence_interval": 0.0297233158642432 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8975, + "details": { + "confidence_interval": 0.0297233158642432 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9175, + "details": { + "confidence_interval": 0.0269617517795541 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8925, + "details": { + "confidence_interval": 0.0303547345865505 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json b/data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json new file mode 100644 index 000000000..da92ce8b3 --- /dev/null +++ b/data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/cohere_command-a-03-2025/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "command-a-03-2025", + "id": "cohere/command-a-03-2025", + "developer": "cohere", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Command A " + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8385 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7993 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8778 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8425, + "details": { + "confidence_interval": 0.0356979542967269 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.855, + "details": { + "confidence_interval": 0.034505248053577 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8225, + "details": { + "confidence_interval": 0.0374442578609762 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8425, + "details": { + "confidence_interval": 0.0356979542967269 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8375, + "details": { + "confidence_interval": 0.0361524043591446 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8421, + "details": { + "confidence_interval": 0.0357790381242715 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8546, + "details": { + "confidence_interval": 0.0345843751705089 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8375, + "details": { + "confidence_interval": 0.0361524043591446 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.845, + "details": { + "confidence_interval": 0.0354660072830454 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.85, + "details": { + "confidence_interval": 0.0349923562952861 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.84, + "details": { + "confidence_interval": 0.0359267332741682 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8525, + "details": { + "confidence_interval": 0.0347505193336969 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8275, + "details": { + "confidence_interval": 0.0370251346228631 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.815, + "details": { + "confidence_interval": 0.0380524622623213 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.835, + "details": { + "confidence_interval": 0.0363750253959063 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8175, + "details": { + "confidence_interval": 0.037852399096026 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json b/data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json new file mode 100644 index 000000000..9c1f481e7 --- /dev/null +++ b/data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/deepseek_deepseek-r1-0528/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-r1-0528", + "id": "deepseek/deepseek-r1-0528", + "developer": "deepseek", + "inference_platform": "unknown", + "additional_details": { + "display_name": "DeepSeek-R1" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6744 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6672 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6816 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6825, + "details": { + "confidence_interval": 0.0456185301529649 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.715, + "details": { + "confidence_interval": 0.0442378025897236 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655, + "details": { + "confidence_interval": 0.0465852352416072 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6375, + "details": { + "confidence_interval": 0.0471099014100216 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6925, + "details": { + "confidence_interval": 0.0452220810763167 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6475, + "details": { + "confidence_interval": 0.046818505067596 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655, + "details": { + "confidence_interval": 0.0465852352416072 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6775, + "details": { + "confidence_interval": 0.0458076069884696 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7725, + "details": { + "confidence_interval": 0.0410826112430601 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575, + "details": { + "confidence_interval": 0.0465046373306654 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.635, + "details": { + "confidence_interval": 0.0471792888396587 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7175, + "details": { + "confidence_interval": 0.0441202814428089 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6775, + "details": { + "confidence_interval": 0.0458076069884696 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.77, + "details": { + "confidence_interval": 0.0412408279846843 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075, + "details": { + "confidence_interval": 0.0489935869046875 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.69, + "details": { + "confidence_interval": 0.0453235049876571 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json b/data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json new file mode 100644 index 000000000..48717a932 --- /dev/null +++ b/data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json @@ -0,0 +1,448 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/deepseek_deepseek-v3.1/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-v3.1", + "id": "deepseek/deepseek-v3.1", + "developer": "deepseek", + "inference_platform": "unknown" + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8044 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7793 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8295 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.805, + "details": { + "confidence_interval": 0.0388269557903546 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.825, + "details": { + "confidence_interval": 0.0372360919417476 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8157, + "details": { + "confidence_interval": 0.0381916132135631 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8175, + "details": { + "confidence_interval": 0.037852399096026 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7569, + "details": { + "confidence_interval": 0.0420899186250792 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7764, + "details": { + "confidence_interval": 0.0409352762868413 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8075, + "details": { + "confidence_interval": 0.0386371183112584 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8312, + "details": { + "confidence_interval": 0.0374186973347394 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8125, + "details": { + "confidence_interval": 0.0382499098762049 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8246, + "details": { + "confidence_interval": 0.0373194914033146 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8125, + "details": { + "confidence_interval": 0.0382499098762049 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.801, + "details": { + "confidence_interval": 0.0392725803132057 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7831, + "details": { + "confidence_interval": 0.0415492484871426 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8161, + "details": { + "confidence_interval": 0.0381062547094242 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json b/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json new file mode 100644 index 000000000..7e5476dde --- /dev/null +++ b/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash-preview-05-20/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemini-2.5-flash-preview-05-20", + "id": "google/gemini-2.5-flash-preview-05-20", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Gemini 2.5 Flash Preview" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9092 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8925 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9259 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9225, + "details": { + "confidence_interval": 0.0262030674045044 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.925, + "details": { + "confidence_interval": 0.0258118773864695 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.89, + "details": { + "confidence_interval": 0.0306626327370121 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8825, + "details": { + "confidence_interval": 0.0315569037846059 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.93, + "details": { + "confidence_interval": 0.0250039481496016 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9025, + "details": { + "confidence_interval": 0.0290699315059157 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json b/data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json new file mode 100644 index 000000000..13616c5b7 --- /dev/null +++ b/data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemini-2.5-flash", + "id": "google/gemini-2.5-flash", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Gemini 2.5 Flash" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9145 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9291 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9325, + "details": { + "confidence_interval": 0.0245863693763976 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9025, + "details": { + "confidence_interval": 0.0290699315059157 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.925, + "details": { + "confidence_interval": 0.0258118773864695 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9225, + "details": { + "confidence_interval": 0.0262030674045044 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9175, + "details": { + "confidence_interval": 0.0269617517795541 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json b/data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json new file mode 100644 index 000000000..e386875a5 --- /dev/null +++ b/data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-pro/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemini-2.5-pro", + "id": "google/gemini-2.5-pro", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Gemini 2.5 Pro" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9323 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9241 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9406 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9475, + "details": { + "confidence_interval": 0.0218568391591684 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9275, + "details": { + "confidence_interval": 0.0254123049217328 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9275, + "details": { + "confidence_interval": 0.0254123049217328 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.93, + "details": { + "confidence_interval": 0.0250039481496016 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.0228135408783901 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9275, + "details": { + "confidence_interval": 0.0254123049217328 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.925, + "details": { + "confidence_interval": 0.0258118773864695 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.935, + "details": { + "confidence_interval": 0.0241590904127041 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9375, + "details": { + "confidence_interval": 0.0237215870977811 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9275, + "details": { + "confidence_interval": 0.0254123049217328 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.93, + "details": { + "confidence_interval": 0.0250039481496016 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.0232732828307025 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9375, + "details": { + "confidence_interval": 0.0237215870977811 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.925, + "details": { + "confidence_interval": 0.0258118773864695 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9275, + "details": { + "confidence_interval": 0.0254123049217328 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.93, + "details": { + "confidence_interval": 0.0250039481496016 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json b/data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json new file mode 100644 index 000000000..358a13ce5 --- /dev/null +++ b/data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/google_gemini-3-pro-preview/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemini-3-pro-preview", + "id": "google/gemini-3-pro-preview", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Gemini 3 Pro Preview" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9453 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9397 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9509 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9475, + "details": { + "confidence_interval": 0.02185683916 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.02281354088 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.02281354088 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.02327328283 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9575, + "details": { + "confidence_interval": 0.01976887483 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.02281354088 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.955, + "details": { + "confidence_interval": 0.02031543089 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.955, + "details": { + "confidence_interval": 0.02031543089 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.02327328283 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.02327328283 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.02281354088 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9475, + "details": { + "confidence_interval": 0.02185683916 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.94, + "details": { + "confidence_interval": 0.02327328283 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.02281354088 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9475, + "details": { + "confidence_interval": 0.02185683916 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9425, + "details": { + "confidence_interval": 0.02281354088 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json b/data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json new file mode 100644 index 000000000..0fda04c23 --- /dev/null +++ b/data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/google_gemma-3-27b-it/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-3-27b-it", + "id": "google/gemma-3-27b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Gemma 3 27B" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.763 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7528 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7733 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.78, + "details": { + "confidence_interval": 0.0405953917837699 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7337, + "details": { + "confidence_interval": 0.0434278012181211 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.75, + "details": { + "confidence_interval": 0.0426482420232902 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.775, + "details": { + "confidence_interval": 0.0409223160958216 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7481, + "details": { + "confidence_interval": 0.0429190922512267 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7335, + "details": { + "confidence_interval": 0.0436563406109071 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7563, + "details": { + "confidence_interval": 0.0421786424783666 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.75, + "details": { + "confidence_interval": 0.0424344650278564 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.798, + "details": { + "confidence_interval": 0.0395452064293286 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7481, + "details": { + "confidence_interval": 0.0427012467707135 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7494, + "details": { + "confidence_interval": 0.0425230435928108 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.785, + "details": { + "confidence_interval": 0.0402598501134396 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7444, + "details": { + "confidence_interval": 0.0428022952090576 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7719, + "details": { + "confidence_interval": 0.0411703730204029 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json b/data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json new file mode 100644 index 000000000..291f3c560 --- /dev/null +++ b/data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/google_gemma-3-4b-it/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-3-4b-it", + "id": "google/gemma-3-4b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Gemma 3 4B" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6511 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6116 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6906 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6525, + "details": { + "confidence_interval": 0.0466644077020903 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.67, + "details": { + "confidence_interval": 0.046079999600029 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.68, + "details": { + "confidence_interval": 0.0457138228379294 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6525, + "details": { + "confidence_interval": 0.0466644077020903 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575, + "details": { + "confidence_interval": 0.0465046373306654 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6475, + "details": { + "confidence_interval": 0.046818505067596 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6775, + "details": { + "confidence_interval": 0.0458076069884696 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6675, + "details": { + "confidence_interval": 0.0461678398924898 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6325, + "details": { + "confidence_interval": 0.0472473039906172 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.66, + "details": { + "confidence_interval": 0.0464226065447579 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.68, + "details": { + "confidence_interval": 0.0457138228379294 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6725, + "details": { + "confidence_interval": 0.0459906864522658 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6075, + "details": { + "confidence_interval": 0.0478532090532308 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5825, + "details": { + "confidence_interval": 0.0483274967299978 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6475, + "details": { + "confidence_interval": 0.046818505067596 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.63, + "details": { + "confidence_interval": 0.0473139527809662 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json b/data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json new file mode 100644 index 000000000..8c936e6ff --- /dev/null +++ b/data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/mistralai_mistral-medium-3/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-medium-3", + "id": "mistralai/mistral-medium-3", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Mistral Medium 3" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5511 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5631 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.455, + "details": { + "confidence_interval": 0.0488002497704065 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38, + "details": { + "confidence_interval": 0.0475669974392838 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175, + "details": { + "confidence_interval": 0.0489690784681949 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4775, + "details": { + "confidence_interval": 0.048949462883814 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.41, + "details": { + "confidence_interval": 0.0481987782191081 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.555, + "details": { + "confidence_interval": 0.0487017528493824 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.515, + "details": { + "confidence_interval": 0.0489770450552826 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.535, + "details": { + "confidence_interval": 0.0488789043994999 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.58, + "details": { + "confidence_interval": 0.0483678449158397 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.595, + "details": { + "confidence_interval": 0.0481065364404039 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175, + "details": { + "confidence_interval": 0.0489690784681949 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5375, + "details": { + "confidence_interval": 0.0488610953035984 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7075, + "details": { + "confidence_interval": 0.0445804299504003 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7675, + "details": { + "confidence_interval": 0.0413969901513152 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.535, + "details": { + "confidence_interval": 0.0488789043994999 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7325, + "details": { + "confidence_interval": 0.0433794261948387 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json b/data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json new file mode 100644 index 000000000..31a088393 --- /dev/null +++ b/data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/mistralai_mistral-small-2503/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-small-2503", + "id": "mistralai/mistral-small-2503", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Mistral Small 3.1" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7852 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7537 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8166 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7875, + "details": { + "confidence_interval": 0.0400887803670033 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8, + "details": { + "confidence_interval": 0.039199279690801 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7725, + "details": { + "confidence_interval": 0.0410826112430601 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7975, + "details": { + "confidence_interval": 0.0393818356106108 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8, + "details": { + "confidence_interval": 0.039199279690801 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.795, + "details": { + "confidence_interval": 0.0395620320289107 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.785, + "details": { + "confidence_interval": 0.0402598501134396 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.805, + "details": { + "confidence_interval": 0.0388269557903546 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.77, + "details": { + "confidence_interval": 0.0412408279846843 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.79, + "details": { + "confidence_interval": 0.039915473764981 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7825, + "details": { + "confidence_interval": 0.0404287113993418 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.775, + "details": { + "confidence_interval": 0.0409223160958216 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.735, + "details": { + "confidence_interval": 0.0432498595893876 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925, + "details": { + "confidence_interval": 0.039739901042451 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7825, + "details": { + "confidence_interval": 0.0404287113993418 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json b/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json new file mode 100644 index 000000000..9e6900ce9 --- /dev/null +++ b/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/openai_gpt-4.1-2025-04-14/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-4.1-2025-04-14", + "id": "openai/gpt-4.1-2025-04-14", + "developer": "openai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "GPT-4.1" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8755 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8541 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8969 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8825, + "details": { + "confidence_interval": 0.0315569037846059 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8625, + "details": { + "confidence_interval": 0.0337480742790123 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.875, + "details": { + "confidence_interval": 0.0324098580108514 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8875, + "details": { + "confidence_interval": 0.0309655314070612 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8775, + "details": { + "confidence_interval": 0.0321299242960121 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.885, + "details": { + "confidence_interval": 0.0312635759101603 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.87, + "details": { + "confidence_interval": 0.0329571309666248 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.875, + "details": { + "confidence_interval": 0.0324098580108514 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.885, + "details": { + "confidence_interval": 0.0312635759101603 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.875, + "details": { + "confidence_interval": 0.0324098580108514 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.87, + "details": { + "confidence_interval": 0.0329571309666248 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8575, + "details": { + "confidence_interval": 0.0342564686873586 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json b/data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json new file mode 100644 index 000000000..a1689b336 --- /dev/null +++ b/data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/openai_gpt-5-2025-08-07/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-5-2025-08-07", + "id": "openai/gpt-5-2025-08-07", + "developer": "openai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "GPT-5" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8895 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8913 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8878 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8925, + "details": { + "confidence_interval": 0.0303547345865505 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9, + "details": { + "confidence_interval": 0.0293994597681008 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.865, + "details": { + "confidence_interval": 0.0334882947381079 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.795, + "details": { + "confidence_interval": 0.0395620320289107 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8875, + "details": { + "confidence_interval": 0.0309655314070612 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8875, + "details": { + "confidence_interval": 0.0309655314070612 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.865, + "details": { + "confidence_interval": 0.0334882947381079 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9125, + "details": { + "confidence_interval": 0.0276909948229923 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.895, + "details": { + "confidence_interval": 0.0300416832365769 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.915, + "details": { + "confidence_interval": 0.0273299039414468 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json b/data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json new file mode 100644 index 000000000..f888f274b --- /dev/null +++ b/data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/openai_o3-mini-2025-01-31/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "o3-mini-2025-01-31", + "id": "openai/o3-mini-2025-01-31", + "developer": "openai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "o3 mini" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.78 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.765 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.795 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7725, + "details": { + "confidence_interval": 0.0410826112430601 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8025, + "details": { + "confidence_interval": 0.0390143311477458 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.77, + "details": { + "confidence_interval": 0.0412408279846843 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7525, + "details": { + "confidence_interval": 0.0422920706585954 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.74, + "details": { + "confidence_interval": 0.0429853660302419 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7525, + "details": { + "confidence_interval": 0.0422920706585954 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7425, + "details": { + "confidence_interval": 0.042850405989882 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8, + "details": { + "confidence_interval": 0.039199279690801 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.81, + "details": { + "confidence_interval": 0.0384447822371523 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8075, + "details": { + "confidence_interval": 0.0386371183112584 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7975, + "details": { + "confidence_interval": 0.0393818356106108 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.775, + "details": { + "confidence_interval": 0.0409223160958216 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.765, + "details": { + "confidence_interval": 0.0415511209081742 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7725, + "details": { + "confidence_interval": 0.0410826112430601 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8125, + "details": { + "confidence_interval": 0.0382499098762049 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8075, + "details": { + "confidence_interval": 0.0386371183112584 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json b/data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json new file mode 100644 index 000000000..438f36e4d --- /dev/null +++ b/data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/unknown_aya-expanse-32b/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aya-expanse-32b", + "id": "unknown/aya-expanse-32b", + "developer": "unknown", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Aya Expanse 32B" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7353 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6891 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7815 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7425, + "details": { + "confidence_interval": 0.042850405989882 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7544, + "details": { + "confidence_interval": 0.0422362190048598 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7343, + "details": { + "confidence_interval": 0.0433386611155747 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7425, + "details": { + "confidence_interval": 0.042850405989882 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7325, + "details": { + "confidence_interval": 0.0433794261948387 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7375, + "details": { + "confidence_interval": 0.043118511644326 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7594, + "details": { + "confidence_interval": 0.0419416660786681 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7305, + "details": { + "confidence_interval": 0.0436468814160691 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7419, + "details": { + "confidence_interval": 0.0429391274814829 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7525, + "details": { + "confidence_interval": 0.0422920706585954 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7544, + "details": { + "confidence_interval": 0.0422362190048598 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7362, + "details": { + "confidence_interval": 0.0432964154917688 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7071, + "details": { + "confidence_interval": 0.044824280008073 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6942, + "details": { + "confidence_interval": 0.0452072976819525 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.743, + "details": { + "confidence_interval": 0.0432027486149424 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7025, + "details": { + "confidence_interval": 0.0448006943140238 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json b/data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json new file mode 100644 index 000000000..a8365f097 --- /dev/null +++ b/data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/unknown_granite-4.0-h-small/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-4.0-h-small", + "id": "unknown/granite-4.0-h-small", + "developer": "unknown", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Granite 4.0 Small" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7503 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7182 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7826 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7613, + "details": { + "confidence_interval": 0.0418799929410023 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.77, + "details": { + "confidence_interval": 0.0412408279846843 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7613, + "details": { + "confidence_interval": 0.0418799929410023 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.755, + "details": { + "confidence_interval": 0.0421477711557175 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7594, + "details": { + "confidence_interval": 0.0419416660786681 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7575, + "details": { + "confidence_interval": 0.0420015468835339 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7614, + "details": { + "confidence_interval": 0.0420850950563913 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7525, + "details": { + "confidence_interval": 0.0422920706585954 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7406, + "details": { + "confidence_interval": 0.0431176028953377 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7525, + "details": { + "confidence_interval": 0.0422920706585954 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.757, + "details": { + "confidence_interval": 0.0422983725746105 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7638, + "details": { + "confidence_interval": 0.0417276763767606 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7318, + "details": { + "confidence_interval": 0.0434682405808651 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6921, + "details": { + "confidence_interval": 0.0456390297025301 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7475, + "details": { + "confidence_interval": 0.0425749733789813 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7419, + "details": { + "confidence_interval": 0.0429391274814829 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json b/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json new file mode 100644 index 000000000..8cd940025 --- /dev/null +++ b/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/unknown_o4-mini-2025-04-16/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "o4-mini-2025-04-16", + "id": "unknown/o4-mini-2025-04-16", + "developer": "unknown", + "inference_platform": "unknown", + "additional_details": { + "display_name": "o4 mini" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8705 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8503 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8906 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.865, + "details": { + "confidence_interval": 0.0334882947381079 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8675, + "details": { + "confidence_interval": 0.0332246776628893 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8875, + "details": { + "confidence_interval": 0.0309655314070612 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8775, + "details": { + "confidence_interval": 0.0321299242960121 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.87, + "details": { + "confidence_interval": 0.0329571309666248 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.87, + "details": { + "confidence_interval": 0.0329571309666248 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8675, + "details": { + "confidence_interval": 0.0332246776628893 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.855, + "details": { + "confidence_interval": 0.034505248053577 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.885, + "details": { + "confidence_interval": 0.0312635759101603 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.88, + "details": { + "confidence_interval": 0.0318456453642134 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.855, + "details": { + "confidence_interval": 0.034505248053577 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8525, + "details": { + "confidence_interval": 0.0347505193336969 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8525, + "details": { + "confidence_interval": 0.0347505193336969 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.89, + "details": { + "confidence_interval": 0.0306626327370121 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/xAI/Grok-3-Mini/d3dd93e4-0cfe-4141-a835-3921fb80ed27.json b/data/global-mmlu-lite/xAI/Grok-3-Mini/d3dd93e4-0cfe-4141-a835-3921fb80ed27.json deleted file mode 100644 index 6d41f0fad..000000000 --- a/data/global-mmlu-lite/xAI/Grok-3-Mini/d3dd93e4-0cfe-4141-a835-3921fb80ed27.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/xai_grok-3-mini/1764290504.009271", - "retrieved_timestamp": "1764290504.009271", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Grok 3 Mini", - "id": "xai/grok-3-mini", - "developer": "xAI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.673028486744793 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.6717123654890614 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.674347939190904 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.755, - "details": { - "confidence_interval": 0.042148, - "stddev": 0.042148 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.5075, - "details": { - "confidence_interval": 0.048994, - "stddev": 0.048994 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.735516, - "details": { - "confidence_interval": 0.043386, - "stddev": 0.043386 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.659148, - "details": { - "confidence_interval": 0.046509, - "stddev": 0.046509 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.485, - "details": { - "confidence_interval": 0.048977, - "stddev": 0.048977 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.56, - "details": { - "confidence_interval": 0.048645, - "stddev": 0.048645 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.725, - "details": { - "confidence_interval": 0.043758, - "stddev": 0.043758 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.69598, - "details": { - "confidence_interval": 0.045191, - "stddev": 0.045191 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.6575, - "details": { - "confidence_interval": 0.046505, - "stddev": 0.046505 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.7325, - "details": { - "confidence_interval": 0.043379, - "stddev": 0.043379 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.6275, - "details": { - "confidence_interval": 0.047379, - "stddev": 0.047379 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.61, - "details": { - "confidence_interval": 0.047799, - "stddev": 0.047799 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.7625, - "details": { - "confidence_interval": 0.041703, - "stddev": 0.041703 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.829574, - "details": { - "confidence_interval": 0.036894, - "stddev": 0.036894 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.556391, - "details": { - "confidence_interval": 0.048747, - "stddev": 0.048747 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.869347, - "details": { - "confidence_interval": 0.03311, - "stddev": 0.03311 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/xAI/Grok-4/96c76d71-942b-452b-919b-ad13bd1614d6.json b/data/global-mmlu-lite/xAI/Grok-4/96c76d71-942b-452b-919b-ad13bd1614d6.json deleted file mode 100644 index a0bafc029..000000000 --- a/data/global-mmlu-lite/xAI/Grok-4/96c76d71-942b-452b-919b-ad13bd1614d6.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "global-mmlu-lite/xai_grok-4/1764290503.994506", - "retrieved_timestamp": "1764290503.994506", - "source_data": [ - "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - ], - "source_metadata": { - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://www.kaggle.com/organizations/cohere-labs", - "evaluator_relationship": "third_party", - "source_type": "documentation", - "source_name": "Kaggle Global MMLU Lite Leaderboard" - }, - "model_info": { - "name": "Grok 4", - "id": "xai/grok-4", - "developer": "xAI", - "inference_platform": "Kaggle" - }, - "evaluation_results": [ - { - "evaluation_name": "Global MMLU Lite", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite" - }, - "score_details": { - "score": 0.8881249999999999 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Sensitive", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Sensitive" - }, - "score_details": { - "score": 0.88625 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Global MMLU Lite Culturally Agnostic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Global MMLU Lite Culturally Agnostic" - }, - "score_details": { - "score": 0.8900000000000001 - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Arabic", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Arabic" - }, - "score_details": { - "score": 0.885, - "details": { - "confidence_interval": 0.031264, - "stddev": 0.031264 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "English", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task English" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Bengali", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Bengali" - }, - "score_details": { - "score": 0.8925, - "details": { - "confidence_interval": 0.030355, - "stddev": 0.030355 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "German", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task German" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "French", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task French" - }, - "score_details": { - "score": 0.875, - "details": { - "confidence_interval": 0.03241, - "stddev": 0.03241 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Hindi", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Hindi" - }, - "score_details": { - "score": 0.8675, - "details": { - "confidence_interval": 0.033225, - "stddev": 0.033225 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Indonesian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Indonesian" - }, - "score_details": { - "score": 0.89, - "details": { - "confidence_interval": 0.030663, - "stddev": 0.030663 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Italian", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Italian" - }, - "score_details": { - "score": 0.9025, - "details": { - "confidence_interval": 0.02907, - "stddev": 0.02907 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Japanese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Japanese" - }, - "score_details": { - "score": 0.87, - "details": { - "confidence_interval": 0.032957, - "stddev": 0.032957 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Korean", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Korean" - }, - "score_details": { - "score": 0.895, - "details": { - "confidence_interval": 0.030042, - "stddev": 0.030042 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Portuguese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Portuguese" - }, - "score_details": { - "score": 0.8725, - "details": { - "confidence_interval": 0.032686, - "stddev": 0.032686 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Spanish", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Spanish" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Swahili", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Swahili" - }, - "score_details": { - "score": 0.91, - "details": { - "confidence_interval": 0.028045, - "stddev": 0.028045 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Yoruba", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Yoruba" - }, - "score_details": { - "score": 0.905, - "details": { - "confidence_interval": 0.028735, - "stddev": 0.028735 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Chinese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Chinese" - }, - "score_details": { - "score": 0.8525, - "details": { - "confidence_interval": 0.034751, - "stddev": 0.034751 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - }, - { - "evaluation_name": "Burmese", - "metric_config": { - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0, - "evaluation_description": "Global MMLU Lite accuracy for task Burmese" - }, - "score_details": { - "score": 0.9075, - "details": { - "confidence_interval": 0.028393, - "stddev": 0.028393 - } - }, - "detailed_evaluation_results_url": "https://www.kaggle.com/benchmarks/cohere-labs/global-mmlu-lite" - } - ] -} \ No newline at end of file diff --git a/data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json b/data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json new file mode 100644 index 000000000..18e2509ab --- /dev/null +++ b/data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/xai_grok-3-mini/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grok-3-mini", + "id": "xai/grok-3-mini", + "developer": "xai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Grok 3 Mini" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.673 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6717 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6743 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.755, + "details": { + "confidence_interval": 0.0421477711557175 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075, + "details": { + "confidence_interval": 0.0489935869046875 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7355, + "details": { + "confidence_interval": 0.0433858795425096 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6591, + "details": { + "confidence_interval": 0.0465089008517938 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.485, + "details": { + "confidence_interval": 0.0489770450552826 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.56, + "details": { + "confidence_interval": 0.0486450268120758 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.725, + "details": { + "confidence_interval": 0.0437575951229009 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.696, + "details": { + "confidence_interval": 0.0451914267324277 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575, + "details": { + "confidence_interval": 0.0465046373306654 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7325, + "details": { + "confidence_interval": 0.0433794261948387 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6275, + "details": { + "confidence_interval": 0.04737924097692 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61, + "details": { + "confidence_interval": 0.0477986153942541 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7625, + "details": { + "confidence_interval": 0.0417032427788918 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8296, + "details": { + "confidence_interval": 0.0368941238003664 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5564, + "details": { + "confidence_interval": 0.0487474461160897 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8693, + "details": { + "confidence_interval": 0.0331103067375873 + } + } + } + ] +} \ No newline at end of file diff --git a/data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json b/data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json new file mode 100644 index 000000000..0a731d29e --- /dev/null +++ b/data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json @@ -0,0 +1,451 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "global-mmlu-lite/xai_grok-4-0709/1770682039.8556428", + "retrieved_timestamp": "1770682039.8556428", + "source_metadata": { + "source_name": "Global MMLU Lite", + "source_type": "documentation", + "source_organization_name": "Cohere Labs", + "source_organization_url": "https://cohere.com", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grok-4-0709", + "id": "xai/grok-4-0709", + "developer": "xai", + "inference_platform": "unknown", + "additional_details": { + "display_name": "Grok 4" + } + }, + "evaluation_results": [ + { + "evaluation_name": "Global MMLU Lite", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Global MMLU Lite", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8881 + } + }, + { + "evaluation_name": "Culturally Sensitive", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Sensitive", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8862 + } + }, + { + "evaluation_name": "Culturally Agnostic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Culturally Agnostic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.89 + } + }, + { + "evaluation_name": "Arabic", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Arabic", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.885, + "details": { + "confidence_interval": 0.0312635759101603 + } + } + }, + { + "evaluation_name": "English", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - English", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "Bengali", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Bengali", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8925, + "details": { + "confidence_interval": 0.0303547345865505 + } + } + }, + { + "evaluation_name": "German", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - German", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + }, + { + "evaluation_name": "French", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - French", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.875, + "details": { + "confidence_interval": 0.0324098580108514 + } + } + }, + { + "evaluation_name": "Hindi", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Hindi", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8675, + "details": { + "confidence_interval": 0.0332246776628893 + } + } + }, + { + "evaluation_name": "Indonesian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Indonesian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.89, + "details": { + "confidence_interval": 0.0306626327370121 + } + } + }, + { + "evaluation_name": "Italian", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Italian", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9025, + "details": { + "confidence_interval": 0.0290699315059157 + } + } + }, + { + "evaluation_name": "Japanese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Japanese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.87, + "details": { + "confidence_interval": 0.0329571309666248 + } + } + }, + { + "evaluation_name": "Korean", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Korean", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.895, + "details": { + "confidence_interval": 0.0300416832365769 + } + } + }, + { + "evaluation_name": "Portuguese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Portuguese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8725, + "details": { + "confidence_interval": 0.0326855581520567 + } + } + }, + { + "evaluation_name": "Spanish", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Spanish", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + }, + { + "evaluation_name": "Swahili", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Swahili", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.91, + "details": { + "confidence_interval": 0.0280452971732717 + } + } + }, + { + "evaluation_name": "Yoruba", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Yoruba", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.905, + "details": { + "confidence_interval": 0.0287345359327925 + } + } + }, + { + "evaluation_name": "Chinese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Chinese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8525, + "details": { + "confidence_interval": 0.0347505193336969 + } + } + }, + { + "evaluation_name": "Burmese", + "source_data": { + "dataset_name": "global-mmlu-lite", + "source_type": "url", + "url": [ + "https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite" + ] + }, + "metric_config": { + "evaluation_description": "Global MMLU Lite - Burmese", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9075, + "details": { + "confidence_interval": 0.0283930651251164 + } + } + } + ] +} \ No newline at end of file diff --git a/data/hellaswag/eleutherai/pythia-1b-v0/a796664d-51fa-49b0-ae93-b446171f5521.json b/data/hellaswag/eleutherai/pythia-1b-v0/a796664d-51fa-49b0-ae93-b446171f5521.json deleted file mode 100644 index 3f264f0c6..000000000 --- a/data/hellaswag/eleutherai/pythia-1b-v0/a796664d-51fa-49b0-ae93-b446171f5521.json +++ /dev/null @@ -1,1343 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "helm/eleutherai/pythia-1b-v0/hellaswag/1751729998", - "retrieved_timestamp": "1751729998", - "source_data": { - "dataset_name": "hellaswag", - "samples_number": 10, - "sample_ids": [ - "id44874", - "id47299", - "id45277", - "id41992", - "id49438", - "id42841", - "id46128", - "id47975", - "id41468", - "id44284" - ], - "additional_details": { - "scenario_name": "helm.benchmark.scenarios.commonsense_scenario.HellaSwagScenario", - "scenario_args": {} - } - }, - "source_metadata": { - "source_name": "helm", - "source_type": "evaluation_run", - "source_organization_name": "Unknown", - "evaluator_relationship": "other" - }, - "model_info": { - "name": "eleutherai/pythia-1b-v0", - "id": "eleutherai/pythia-1b-v0", - "developer": "eleutherai", - "inference_platform": "huggingface" - }, - "evaluation_results": [ - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1751729998", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.3, - "details": { - "count": 1, - "sum": 0.3, - "sum_squared": 0.09, - "min": 0.3, - "max": 0.3, - "mean": 0.3, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about common sense.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - } - ], - "detailed_evaluation_results_per_samples": [ - { - "sample_id": "id44874", - "input": "Personal Care and Style: [header] How to dye your hair with semi permanent hair dye [title] Find the color you want. [step] There are many popular brands and hundreds of different colors to choose from. Semi-permanent dyes can be found in a variety of places, ranging from grocery stores to specialized fashion shops, with the biggest selection at beauty supply stores. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Personal Care and Style: [header] How to dye your hair with semi permanent hair dye [title] Find the color you want. [step] There are many popular brands and hundreds of different colors to choose from. Semi-permanent dyes can be found in a variety of places, ranging from grocery stores to specialized fashion shops, with the biggest selection at beauty supply stores. \nA. It is important to select the color that represents your hair type when you register your hair color. [substeps] Traditional semi-permanent dyes will generally not be available for hair color, like blow-dryers, curling irons, and appliances.\nB. If you're not planning on dying your hair, there are other coloration measures you can take to dye your hair. [step] Photoshop hd darkers work well, but don't lack the style that can be coupled with it.\nC. Pick the color that's your favorite, matches your wardrobe best, and/or is most flattering for your eye color and skin tone. Semi-permanent dyes work on all hair colors, but show up brightest on light hair.\nD. However, you can also take your color, added color, and texture into account when deciding what to dye, and what you will use it for. [substeps] Consider adding your hair dye to your hair if you have it long or curly.\nAnswer:", - "ground_truth": [ - "C" - ], - "response": "B", - "choices": [ - [ - "A", - "It is important to select the color that represents your hair type when you register your hair color. [substeps] Traditional semi-permanent dyes will generally not be available for hair color, like blow-dryers, curling irons, and appliances." - ], - [ - "B", - "If you're not planning on dying your hair, there are other coloration measures you can take to dye your hair. [step] Photoshop hd darkers work well, but don't lack the style that can be coupled with it." - ], - [ - "C", - "Pick the color that's your favorite, matches your wardrobe best, and/or is most flattering for your eye color and skin tone. Semi-permanent dyes work on all hair colors, but show up brightest on light hair." - ], - [ - "D", - "However, you can also take your color, added color, and texture into account when deciding what to dye, and what you will use it for. [substeps] Consider adding your hair dye to your hair if you have it long or curly." - ] - ] - }, - { - "sample_id": "id47299", - "input": "Home and Garden: [header] How to grow grape vines [title] Choose a type of grape. [step] As with any plant, certain types of grapes grow better in different areas and offer up different flavors and appearances. There are three general types of grapes: american, european, and muscadine grapes. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Home and Garden: [header] How to grow grape vines [title] Choose a type of grape. [step] As with any plant, certain types of grapes grow better in different areas and offer up different flavors and appearances. There are three general types of grapes: american, european, and muscadine grapes. \nA. The' bat' variety is quite dark, with a hectic shape and a bit of texture. Popular grapes grow quickly for the same years and are often planted with white grapes and a rose color.\nB. [substeps] Traditional grape grapes are made of rich grapes and have light yellow and orange coloring. If the grapes are not from the wild and you are looking for a more bitter flavor, look for grapes grown in known regions that are not based on the wild grape.\nC. [substeps] American grapes are naturally sweet and plump, with skins that are slightly crisped. European grapes grow best in warm, dry conditions with ripened fruits on a stalk.\nD. American grapes grow best in warm, sunny climates like that of central california. European grapes are common in europe and northern parts of the us, and muscadine grapes are commonly found in the southern us.\nAnswer:", - "ground_truth": [ - "D" - ], - "response": "B", - "choices": [ - [ - "A", - "The' bat' variety is quite dark, with a hectic shape and a bit of texture. Popular grapes grow quickly for the same years and are often planted with white grapes and a rose color." - ], - [ - "B", - "[substeps] Traditional grape grapes are made of rich grapes and have light yellow and orange coloring. If the grapes are not from the wild and you are looking for a more bitter flavor, look for grapes grown in known regions that are not based on the wild grape." - ], - [ - "C", - "[substeps] American grapes are naturally sweet and plump, with skins that are slightly crisped. European grapes grow best in warm, dry conditions with ripened fruits on a stalk." - ], - [ - "D", - "American grapes grow best in warm, sunny climates like that of central california. European grapes are common in europe and northern parts of the us, and muscadine grapes are commonly found in the southern us." - ] - ] - }, - { - "sample_id": "id45277", - "input": "Personal Care and Style: [header] How to do edwardian hairstyles [title] Backcomb or tease your hair to create volume. [step] Scrunch some volumizing powder or mousse into your roots, then grab a teasing brush or boar bristle flat brush. Starting at your hairline, take a section of hair about the size of your brush and hold the ends up in the air. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Personal Care and Style: [header] How to do edwardian hairstyles [title] Backcomb or tease your hair to create volume. [step] Scrunch some volumizing powder or mousse into your roots, then grab a teasing brush or boar bristle flat brush. Starting at your hairline, take a section of hair about the size of your brush and hold the ends up in the air. \nA. Gently pull the ends up towards the roots. [title] Flatten your hairstyle to create natural streaks, and then repeat the process for extra volume.\nB. Lightly brush the hair back towards the root for 2-3 strokes. Work towards the back of your hair, repeating the process until all of your hair is teased.\nC. Sweep the styling iron from your ear (teasing the ends slightly in the direction toward your root) to create a teasing mist. Move around so the entire section is teased.\nD. Pin the sections above or below your roots so your hair lays flat. Lift a section of hair up and pull it down.\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "B", - "choices": [ - [ - "A", - "Gently pull the ends up towards the roots. [title] Flatten your hairstyle to create natural streaks, and then repeat the process for extra volume." - ], - [ - "B", - "Lightly brush the hair back towards the root for 2-3 strokes. Work towards the back of your hair, repeating the process until all of your hair is teased." - ], - [ - "C", - "Sweep the styling iron from your ear (teasing the ends slightly in the direction toward your root) to create a teasing mist. Move around so the entire section is teased." - ], - [ - "D", - "Pin the sections above or below your roots so your hair lays flat. Lift a section of hair up and pull it down." - ] - ] - }, - { - "sample_id": "id41992", - "input": "Having an ice cream: A young child is seen holding an ice cream cone and speaking to the camera while smiling. She", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Having an ice cream: A young child is seen holding an ice cream cone and speaking to the camera while smiling. She\nA. continues speaking while using her mouth and pointing to the camera.\nB. continues speaking more and picking up ice cream and taking a chunk.\nC. licks the ice cream cone and continues eating around her toy.\nD. speaks to the camera man holding a doll in the other hand, then sits down on the ground.\nAnswer:", - "ground_truth": [ - "D" - ], - "response": "D", - "choices": [ - [ - "A", - "continues speaking while using her mouth and pointing to the camera." - ], - [ - "B", - "continues speaking more and picking up ice cream and taking a chunk." - ], - [ - "C", - "licks the ice cream cone and continues eating around her toy." - ], - [ - "D", - "speaks to the camera man holding a doll in the other hand, then sits down on the ground." - ] - ] - }, - { - "sample_id": "id49438", - "input": "Personal Care and Style: [header] How to use a credit card system [title] Open a merchant account. [step] A merchant account is a bank account, but it is different from a business checking account. A merchant account communicates with your customers' credit card issuers. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Personal Care and Style: [header] How to use a credit card system [title] Open a merchant account. [step] A merchant account is a bank account, but it is different from a business checking account. A merchant account communicates with your customers' credit card issuers. \nA. You fill out credits and other credit forms into your merchant account. You can do any transactions you'd like on a merchant account.\nB. It takes the customer's credit card information and verifies and approves the sale. Your merchant account communicates with the card issuer to authenticate the user, confirming that the card is not stolen and that the customer has enough of a balance to pay for the sale.\nC. Credit cards handle the transactions linked to your account. [title] Determine your business's credit score.\nD. Credit card companies offer a variety of services : [substeps] Subscription. Credit card issuers charge a fee for a card and fill out forms to use.\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "D", - "choices": [ - [ - "A", - "You fill out credits and other credit forms into your merchant account. You can do any transactions you'd like on a merchant account." - ], - [ - "B", - "It takes the customer's credit card information and verifies and approves the sale. Your merchant account communicates with the card issuer to authenticate the user, confirming that the card is not stolen and that the customer has enough of a balance to pay for the sale." - ], - [ - "C", - "Credit cards handle the transactions linked to your account. [title] Determine your business's credit score." - ], - [ - "D", - "Credit card companies offer a variety of services : [substeps] Subscription. Credit card issuers charge a fee for a card and fill out forms to use." - ] - ] - }, - { - "sample_id": "id42841", - "input": "Playing accordion: The fingers are pressing the black keys. One finger is pressing one key. The fingers", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Playing accordion: The fingers are pressing the black keys. One finger is pressing one key. The fingers\nA. are moving to different keys and pressing them.\nB. are pumping the hand.\nC. stopped playing and stop at the bottom.\nD. flay the black keys in the middle of the keyboard.\nAnswer:", - "ground_truth": [ - "A" - ], - "response": "D", - "choices": [ - [ - "A", - "are moving to different keys and pressing them." - ], - [ - "B", - "are pumping the hand." - ], - [ - "C", - "stopped playing and stop at the bottom." - ], - [ - "D", - "flay the black keys in the middle of the keyboard." - ] - ] - }, - { - "sample_id": "id46128", - "input": "Personal Care and Style: [header] How to wear a denim dress [title] Consider the wash of the dress. [step] Denim comes in a variety of colors, but most denim dresses are usually some shade of blue. However, you still have options when it comes to the depth of the blue. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Personal Care and Style: [header] How to wear a denim dress [title] Consider the wash of the dress. [step] Denim comes in a variety of colors, but most denim dresses are usually some shade of blue. However, you still have options when it comes to the depth of the blue. \nA. Some denim washes work best for more polished looks, while others are ideal for a relaxed outfit. [substeps] Dark wash denim tends to have a more dressed up look, so they work well for work or an evening out.\nB. Depending on your dress's style, some of the colors will show up darker, and others won't. [substeps] In general, the darker the shade of the denim, the darker the dress.\nC. [substeps] Jeans tend to look better when washed and ironed on both sides. Likewise, jeans can also be washed and ironed on both sides of the dress, but with less work involved.\nD. [substeps] Look for dresses made of the same material or a lighter blue. Choose washes made of the same material, usually denim colors such as fuchsia, navy, and tan.\nAnswer:", - "ground_truth": [ - "A" - ], - "response": "B", - "choices": [ - [ - "A", - "Some denim washes work best for more polished looks, while others are ideal for a relaxed outfit. [substeps] Dark wash denim tends to have a more dressed up look, so they work well for work or an evening out." - ], - [ - "B", - "Depending on your dress's style, some of the colors will show up darker, and others won't. [substeps] In general, the darker the shade of the denim, the darker the dress." - ], - [ - "C", - "[substeps] Jeans tend to look better when washed and ironed on both sides. Likewise, jeans can also be washed and ironed on both sides of the dress, but with less work involved." - ], - [ - "D", - "[substeps] Look for dresses made of the same material or a lighter blue. Choose washes made of the same material, usually denim colors such as fuchsia, navy, and tan." - ] - ] - }, - { - "sample_id": "id47975", - "input": "Family Life: [header] How to adopt a russian baby [title] Fill out form i-600a, application for advance processing of orphan petition. [step] This form is available through the u.s. citizenship and immigration services, and it is the necessary first step in obtaining an immigrant visa for your adoptive child. You do not need to have a specific child in mind to complete this form. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Family Life: [header] How to adopt a russian baby [title] Fill out form i-600a, application for advance processing of orphan petition. [step] This form is available through the u.s. citizenship and immigration services, and it is the necessary first step in obtaining an immigrant visa for your adoptive child. You do not need to have a specific child in mind to complete this form. \nA. [substeps] The form needs to include information about the child and the country they are from. Don't forget to include all the information that you know: \" parent, child, next of kin.\nB. [title] Contact the uscis or a state-licensed adoption agency to request a home study. [step] Home studies must be performed in order to determine the fitness of you and the home environment you intend to bring a child into.\nC. [title] Complete section ii of form ii of visa application, application for advance processing of a russian baby. [step] This form is available through the u.s.\nD. The form is brief to provide basic information about yourself and your adoptive child, as well as what you are looking for and need to bring with you. [title] Submit the completed form via the mail or by fax.\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "A", - "choices": [ - [ - "A", - "[substeps] The form needs to include information about the child and the country they are from. Don't forget to include all the information that you know: \" parent, child, next of kin." - ], - [ - "B", - "[title] Contact the uscis or a state-licensed adoption agency to request a home study. [step] Home studies must be performed in order to determine the fitness of you and the home environment you intend to bring a child into." - ], - [ - "C", - "[title] Complete section ii of form ii of visa application, application for advance processing of a russian baby. [step] This form is available through the u.s." - ], - [ - "D", - "The form is brief to provide basic information about yourself and your adoptive child, as well as what you are looking for and need to bring with you. [title] Submit the completed form via the mail or by fax." - ] - ] - }, - { - "sample_id": "id41468", - "input": "Running a marathon: There are pictures of male participants shown dominating the scene where she was the only female participant. She talks about her experience as she shows more pictures of her participation against all odds. She", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Running a marathon: There are pictures of male participants shown dominating the scene where she was the only female participant. She talks about her experience as she shows more pictures of her participation against all odds. She\nA. eventually walks off the set.\nB. also shares information on how things changed later when she was finally allowed to participate freely.\nC. talks about her experience again in waking and reading.\nD. challenges her opponents with her stance and high kick, doing incredible falls on the mat in the process.\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "D", - "choices": [ - [ - "A", - "eventually walks off the set." - ], - [ - "B", - "also shares information on how things changed later when she was finally allowed to participate freely." - ], - [ - "C", - "talks about her experience again in waking and reading." - ], - [ - "D", - "challenges her opponents with her stance and high kick, doing incredible falls on the mat in the process." - ] - ] - }, - { - "sample_id": "id44284", - "input": "Personal Care and Style: [header] How to keep hair from curling with humidity [title] Lock in moisture with conditioner. [step] Dry hair, especially curly hair, needs lots of moisture. This is because curly hair tends to be on the drier end of the spectrum compared to straight hair. ", - "prompt": "The following are multiple choice questions (with answers) about common sense.\n\nQuestion: Putting on makeup: The words how to apply mascara appear. A woman with long black hair\nA. presents a bow and ribbon, followed by painted eye shadow, and a gift wrap.\nB. appears on a black screen with white lines, soap, and information at the bottom.\nC. appears in the large window.\nD. is talking to the camera.\nAnswer: D\n\nQuestion: Polishing shoes: The man uses the polish on his shoes. The man brushes his shoes with occasional blowing. The man shines his shoes. The man\nA. replaces the frame and arm strap on the shoes.\nB. shines balls of polish onto his shoes.\nC. puts the shoes on the stand to style them.\nD. talks to the camera.\nAnswer: D\n\nQuestion: Laying tile: The floor is swept, cleaned, and prepared for the process. The men begin to lay the vinyl flooring across the floor one piece at a time. The final result\nA. is shown on the screen.\nB. of the carpet is shown.\nC. is displayed in the male success rate.\nD. is seen with one ran through the floor.\nAnswer: A\n\nQuestion: Waterskiing: A person is water skiing behind a boat. They are going back and forth behind the boat. Words\nA. are on the screen.\nB. come onto the screen at the end.\nC. appear on the screen.\nD. are shown on the screen.\nAnswer: D\n\nQuestion: Ping-pong: We see an instructional title screen. The man demonstrates hitting a ball and we see him in play hitting the ball. We\nA. see the man beating a bag over a net.\nB. see the ending title screen.\nC. see a disc fly from 2 people.\nD. see the ending title screen again.\nAnswer: B\n\nQuestion: Personal Care and Style: [header] How to keep hair from curling with humidity [title] Lock in moisture with conditioner. [step] Dry hair, especially curly hair, needs lots of moisture. This is because curly hair tends to be on the drier end of the spectrum compared to straight hair. \nA. Instead of styling damp hair, work in conditioner to restore moisture to your hair. [substeps] In severe weather, it's a good idea to mist your hair twice a day, once in the morning and once at night.\nB. If you have curly hair or textured hair, moisturizing is the key to preventing frizz when it's humid. [substeps] Choose a shampoo and conditioner that is made for curly hair.\nC. As such, this may make your hair release moisture into the air instead of in wet curls. [substeps] Use a steam wand or mini-cooler to draw moisture from your hair, such as a hair dryer or a head dryer.\nD. Set your hair loose with a spritz of water as needed. If it tends to curl less, wipe away the excess moisture with a silk scarf.\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "B", - "choices": [ - [ - "A", - "Instead of styling damp hair, work in conditioner to restore moisture to your hair. [substeps] In severe weather, it's a good idea to mist your hair twice a day, once in the morning and once at night." - ], - [ - "B", - "If you have curly hair or textured hair, moisturizing is the key to preventing frizz when it's humid. [substeps] Choose a shampoo and conditioner that is made for curly hair." - ], - [ - "C", - "As such, this may make your hair release moisture into the air instead of in wet curls. [substeps] Use a steam wand or mini-cooler to draw moisture from your hair, such as a hair dryer or a head dryer." - ], - [ - "D", - "Set your hair loose with a spritz of water as needed. If it tends to curl less, wipe away the excess moisture with a silk scarf." - ] - ] - } - ] -} \ No newline at end of file diff --git a/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/0d7928c3-c769-474e-8249-7a5c70c4c559.json b/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/0d7928c3-c769-474e-8249-7a5c70c4c559.json new file mode 100644 index 000000000..f776710f3 --- /dev/null +++ b/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/0d7928c3-c769-474e-8249-7a5c70c4c559.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/0-hero_Matter-0.2-7B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Matter-0.2-7B-DPO", + "id": "0-hero/Matter-0.2-7B-DPO", + "developer": "0-hero", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3303 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3596 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json b/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json deleted file mode 100644 index 402a462d4..000000000 --- a/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/0-hero_Matter-0.2-7B-DPO/1762652579.4626381", - "retrieved_timestamp": "1762652579.462642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "0-hero/Matter-0.2-7B-DPO", - "developer": "0-hero", - "inference_platform": "unknown", - "id": "0-hero/Matter-0.2-7B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3302792147058693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596254301656297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json deleted file mode 100644 index 3e3805028..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-32K/1762652579.463656", - "retrieved_timestamp": "1762652579.463657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B-32K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B-32K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3118691737922047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6015685776542417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4709109042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/f63536ed-752b-4538-9b92-2514a617a4bf.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/f63536ed-752b-4538-9b92-2514a617a4bf.json new file mode 100644 index 000000000..7d0d73c85 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/f63536ed-752b-4538-9b92-2514a617a4bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-32K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-34B-32K", + "id": "01-ai/Yi-1.5-34B-32K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6016 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json deleted file mode 100644 index 104d552b4..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat-16K/1762652579.464125", - "retrieved_timestamp": "1762652579.4641259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B-Chat-16K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B-Chat-16K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456449997118756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100218256499571 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43976041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45445478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/8ff13de2-ea43-4392-992f-ba70b6023e96.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/8ff13de2-ea43-4392-992f-ba70b6023e96.json new file mode 100644 index 000000000..8682b3811 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/8ff13de2-ea43-4392-992f-ba70b6023e96.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat-16K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-34B-Chat-16K", + "id": "01-ai/Yi-1.5-34B-Chat-16K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/02bac8a7-bd09-4e73-979a-7dbaa7a8ed75.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/02bac8a7-bd09-4e73-979a-7dbaa7a8ed75.json new file mode 100644 index 000000000..1a02c9bdc --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/02bac8a7-bd09-4e73-979a-7dbaa7a8ed75.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-34B-Chat", + "id": "01-ai/Yi-1.5-34B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6067 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2772 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.452 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json deleted file mode 100644 index 4c019d7c6..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat/1762652579.463886", - "retrieved_timestamp": "1762652579.4638872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066758423205982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6083748310271819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4281979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45204454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B/74e4406d-b2b6-4c3f-b059-f52cccf1fff4.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B/74e4406d-b2b6-4c3f-b059-f52cccf1fff4.json new file mode 100644 index 000000000..948057bc5 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B/74e4406d-b2b6-4c3f-b059-f52cccf1fff4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-34B", + "id": "01-ai/Yi-1.5-34B", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2841 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5976 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json deleted file mode 100644 index b8a794c63..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B/1762652579.4633532", - "retrieved_timestamp": "1762652579.463354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2841172533322695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5976391706360018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4665890957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json b/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json deleted file mode 100644 index 7361137b1..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B-Chat/1762652579.464571", - "retrieved_timestamp": "1762652579.464572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-6B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-6B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5145270105542183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571311331954389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43917708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3193151595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/ec8a6d6c-b8ea-48a3-9af6-d357e0057ec1.json b/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/ec8a6d6c-b8ea-48a3-9af6-d357e0057ec1.json new file mode 100644 index 000000000..3a37bdc49 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/ec8a6d6c-b8ea-48a3-9af6-d357e0057ec1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-6B-Chat", + "id": "01-ai/Yi-1.5-6B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5145 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4571 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3193 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-6B/05307b41-d832-4533-99bd-c8608bf8e64c.json b/data/hfopenllm_v2/01-ai/Yi-1.5-6B/05307b41-d832-4533-99bd-c8608bf8e64c.json new file mode 100644 index 000000000..8abcdb009 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-6B/05307b41-d832-4533-99bd-c8608bf8e64c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-6B", + "id": "01-ai/Yi-1.5-6B", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json b/data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json deleted file mode 100644 index 464db170c..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B/1762652579.464354", - "retrieved_timestamp": "1762652579.464355", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-6B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-6B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26166017278598563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44925820198929056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43740625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/c09bd9b0-6f85-4120-94a9-b628c68bccb7.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/c09bd9b0-6f85-4120-94a9-b628c68bccb7.json new file mode 100644 index 000000000..510b97e1d --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/c09bd9b0-6f85-4120-94a9-b628c68bccb7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-32K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-9B-32K", + "id": "01-ai/Yi-1.5-9B-32K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2303 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4963 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json deleted file mode 100644 index 130724922..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-32K/1762652579.4649951", - "retrieved_timestamp": "1762652579.464996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B-32K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B-32K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23031113002389217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496332115988265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37649601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json deleted file mode 100644 index 592ac34a4..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat-16K/1762652579.465471", - "retrieved_timestamp": "1762652579.465471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B-Chat-16K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B-Chat-16K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214040966856829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153383364651778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40990624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39935172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/9f971385-1146-4436-91a6-0e52d4db1f07.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/9f971385-1146-4436-91a6-0e52d4db1f07.json new file mode 100644 index 000000000..67dbc9f74 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/9f971385-1146-4436-91a6-0e52d4db1f07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat-16K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-9B-Chat-16K", + "id": "01-ai/Yi-1.5-9B-Chat-16K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5153 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/80ed14ca-b4cd-4ceb-8fdb-24705e47bd0e.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/80ed14ca-b4cd-4ceb-8fdb-24705e47bd0e.json new file mode 100644 index 000000000..9ac18fdab --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/80ed14ca-b4cd-4ceb-8fdb-24705e47bd0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-9B-Chat", + "id": "01-ai/Yi-1.5-9B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json deleted file mode 100644 index b58ae19b7..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat/1762652579.465226", - "retrieved_timestamp": "1762652579.465226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6045525871354672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555906430281685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42590625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39752327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json deleted file mode 100644 index 2f07e388c..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B/1762652579.464781", - "retrieved_timestamp": "1762652579.464782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29358435617494916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514294179104191 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43278124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3916223404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B/db88e3f5-58a9-4783-9093-a6df96483342.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B/db88e3f5-58a9-4783-9093-a6df96483342.json new file mode 100644 index 000000000..465841ce2 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B/db88e3f5-58a9-4783-9093-a6df96483342.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-9B", + "id": "01-ai/Yi-1.5-9B", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-34B-200K/8cd90f8a-d8dc-469b-95b9-260fcef804d2.json b/data/hfopenllm_v2/01-ai/Yi-34B-200K/8cd90f8a-d8dc-469b-95b9-260fcef804d2.json new file mode 100644 index 000000000..ba5c90e1f --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-34B-200K/8cd90f8a-d8dc-469b-95b9-260fcef804d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-200K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-34B-200K", + "id": "01-ai/Yi-34B-200K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5442 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4535 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json b/data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json deleted file mode 100644 index e04d1051a..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-200K/1762652579.465893", - "retrieved_timestamp": "1762652579.465894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-34B-200K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-34B-200K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15424850507763843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441817925289527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171874999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json b/data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json deleted file mode 100644 index aaa10e627..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-Chat/1762652579.466115", - "retrieved_timestamp": "1762652579.4661162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-34B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-34B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4698887839820565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5560872910766164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-34B-Chat/b2c82703-2b5c-407d-b84f-a8f8261ac894.json b/data/hfopenllm_v2/01-ai/Yi-34B-Chat/b2c82703-2b5c-407d-b84f-a8f8261ac894.json new file mode 100644 index 000000000..a62cda89d --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-34B-Chat/b2c82703-2b5c-407d-b84f-a8f8261ac894.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-34B-Chat", + "id": "01-ai/Yi-34B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4699 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5561 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json b/data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json deleted file mode 100644 index 9cc4cb961..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B/1762652579.4656792", - "retrieved_timestamp": "1762652579.46568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-34B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3045751938190667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5457099951794562 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-34B/55462e67-5eca-4e9d-9095-51fcf12de5fa.json b/data/hfopenllm_v2/01-ai/Yi-34B/55462e67-5eca-4e9d-9095-51fcf12de5fa.json new file mode 100644 index 000000000..1781d005a --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-34B/55462e67-5eca-4e9d-9095-51fcf12de5fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-34B", + "id": "01-ai/Yi-34B", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-6B-200K/25a119f0-5eaa-4fa9-8cd4-e0f437ada456.json b/data/hfopenllm_v2/01-ai/Yi-6B-200K/25a119f0-5eaa-4fa9-8cd4-e0f437ada456.json new file mode 100644 index 000000000..8ffbe3e70 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-6B-200K/25a119f0-5eaa-4fa9-8cd4-e0f437ada456.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-200K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-6B-200K", + "id": "01-ai/Yi-6B-200K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0843 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4587 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json b/data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json deleted file mode 100644 index 0945f8955..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-200K/1762652579.4665558", - "retrieved_timestamp": "1762652579.466557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-6B-200K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-6B-200K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08433068702154728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42892948109603307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45873958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2844082446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json b/data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json deleted file mode 100644 index ad5be32a4..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-Chat/1762652579.466805", - "retrieved_timestamp": "1762652579.466806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-6B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-6B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33952135888331847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41326019207548687 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36879166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3061003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-6B-Chat/efc036b6-d8de-4393-87a1-d4f86fb44d91.json b/data/hfopenllm_v2/01-ai/Yi-6B-Chat/efc036b6-d8de-4393-87a1-d4f86fb44d91.json new file mode 100644 index 000000000..a454f4866 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-6B-Chat/efc036b6-d8de-4393-87a1-d4f86fb44d91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-6B-Chat", + "id": "01-ai/Yi-6B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3061 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json b/data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json deleted file mode 100644 index e435c0939..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B/1762652579.4663382", - "retrieved_timestamp": "1762652579.4663382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-6B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-6B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28933784580468713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309230591000865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39368749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29911901595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-6B/a5144406-eb85-43b2-a49d-be6b06d6b04a.json b/data/hfopenllm_v2/01-ai/Yi-6B/a5144406-eb85-43b2-a49d-be6b06d6b04a.json new file mode 100644 index 000000000..b7fc1616d --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-6B/a5144406-eb85-43b2-a49d-be6b06d6b04a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-6B", + "id": "01-ai/Yi-6B", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4309 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3937 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2991 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json b/data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json deleted file mode 100644 index 2fda970ce..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B-200K/1762652579.467233", - "retrieved_timestamp": "1762652579.467233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-9B-200K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-9B-200K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23270921155866434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793302602023641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42940625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36220079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-9B-200K/900184ad-656d-416b-956f-5f6e3a991d1b.json b/data/hfopenllm_v2/01-ai/Yi-9B-200K/900184ad-656d-416b-956f-5f6e3a991d1b.json new file mode 100644 index 000000000..de7d6ef80 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-9B-200K/900184ad-656d-416b-956f-5f6e3a991d1b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B-200K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-9B-200K", + "id": "01-ai/Yi-9B-200K", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4294 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json b/data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json deleted file mode 100644 index d8dec6521..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B/1762652579.46702", - "retrieved_timestamp": "1762652579.4670231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-9B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2708779372066118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49396075125308075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40540624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/01-ai/Yi-9B/7a58954a-5d7d-4640-99fd-773249640237.json b/data/hfopenllm_v2/01-ai/Yi-9B/7a58954a-5d7d-4640-99fd-773249640237.json new file mode 100644 index 000000000..f8eee73fd --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-9B/7a58954a-5d7d-4640-99fd-773249640237.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-9B", + "id": "01-ai/Yi-9B", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2709 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4054 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/4ea3146c-b912-424a-b0a9-7c37348348c8.json b/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/4ea3146c-b912-424a-b0a9-7c37348348c8.json new file mode 100644 index 000000000..aa071d68a --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/4ea3146c-b912-424a-b0a9-7c37348348c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-Coder-9B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-Coder-9B-Chat", + "id": "01-ai/Yi-Coder-9B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4814 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3992 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2425 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json b/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json deleted file mode 100644 index ed39aac8f..000000000 --- a/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-Coder-9B-Chat/1762652579.4674509", - "retrieved_timestamp": "1762652579.4674518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "01-ai/Yi-Coder-9B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-Coder-9B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4817041006750976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48142000339111674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json deleted file mode 100644 index 1761c187a..000000000 --- a/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/1762652579.468073", - "retrieved_timestamp": "1762652579.468074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", - "developer": "1-800-LLMs", - "inference_platform": "unknown", - "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30774677854758703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284322714967584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516373005319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/b0276278-6d86-49c0-a246-cd9110ac1deb.json b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/b0276278-6d86-49c0-a246-cd9110ac1deb.json new file mode 100644 index 000000000..14f396bdb --- /dev/null +++ b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/b0276278-6d86-49c0-a246-cd9110ac1deb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-14B-Hindi-Custom-Instruct", + "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", + "developer": "1-800-LLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6284 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4491 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi/04216f67-1385-43bf-b7de-5bae7a60f379.json b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi/04216f67-1385-43bf-b7de-5bae7a60f379.json new file mode 100644 index 000000000..96c0dfd9a --- /dev/null +++ b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi/04216f67-1385-43bf-b7de-5bae7a60f379.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-14B-Hindi", + "id": "1-800-LLMs/Qwen-2.5-14B-Hindi", + "developer": "1-800-LLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5826 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1024m/PHI-4-Hindi/fbf7b76b-7ced-4217-8e14-1d02184e271c.json b/data/hfopenllm_v2/1024m/PHI-4-Hindi/fbf7b76b-7ced-4217-8e14-1d02184e271c.json new file mode 100644 index 000000000..0b0fe7e6b --- /dev/null +++ b/data/hfopenllm_v2/1024m/PHI-4-Hindi/fbf7b76b-7ced-4217-8e14-1d02184e271c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/1024m_PHI-4-Hindi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PHI-4-Hindi", + "id": "1024m/PHI-4-Hindi", + "developer": "1024m", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.671 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2334 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5239 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1024m/QWEN-14B-B100/74ac8aba-6dfb-464c-81b5-d02a9192b9cc.json b/data/hfopenllm_v2/1024m/QWEN-14B-B100/74ac8aba-6dfb-464c-81b5-d02a9192b9cc.json new file mode 100644 index 000000000..21260063a --- /dev/null +++ b/data/hfopenllm_v2/1024m/QWEN-14B-B100/74ac8aba-6dfb-464c-81b5-d02a9192b9cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/1024m_QWEN-14B-B100/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QWEN-14B-B100", + "id": "1024m/QWEN-14B-B100", + "developer": "1024m", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.41 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/152334H/miqu-1-70b-sf/295938e1-ade2-4d36-beca-3cbe506b5b90.json b/data/hfopenllm_v2/152334H/miqu-1-70b-sf/295938e1-ade2-4d36-beca-3cbe506b5b90.json new file mode 100644 index 000000000..133dae7a4 --- /dev/null +++ b/data/hfopenllm_v2/152334H/miqu-1-70b-sf/295938e1-ade2-4d36-beca-3cbe506b5b90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/152334H_miqu-1-70b-sf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miqu-1-70b-sf", + "id": "152334H/miqu-1-70b-sf", + "developer": "152334H", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4582 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json b/data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json deleted file mode 100644 index 93595795a..000000000 --- a/data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/152334H_miqu-1-70b-sf/1762652579.469194", - "retrieved_timestamp": "1762652579.469195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "152334H/miqu-1-70b-sf", - "developer": "152334H", - "inference_platform": "unknown", - "id": "152334H/miqu-1-70b-sf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181740005407873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6102361685099691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45820833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42278922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json b/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json deleted file mode 100644 index 630c3e805..000000000 --- a/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-7B-v0.1/1762652579.469481", - "retrieved_timestamp": "1762652579.469482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "1TuanPham/T-VisStar-7B-v0.1", - "developer": "1TuanPham", - "inference_platform": "unknown", - "id": "1TuanPham/T-VisStar-7B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.294 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36070404305021786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052203113352468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/f331782f-ea09-41bd-8c6a-e964c88d7e09.json b/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/f331782f-ea09-41bd-8c6a-e964c88d7e09.json new file mode 100644 index 000000000..95ed6976e --- /dev/null +++ b/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/f331782f-ea09-41bd-8c6a-e964c88d7e09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T-VisStar-7B-v0.1", + "id": "1TuanPham/T-VisStar-7B-v0.1", + "developer": "1TuanPham", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.294 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3211 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json b/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json deleted file mode 100644 index d21431fc2..000000000 --- a/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-v0.1/1762652579.469921", - "retrieved_timestamp": "1762652579.469923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "1TuanPham/T-VisStar-v0.1", - "developer": "1TuanPham", - "inference_platform": "unknown", - "id": "1TuanPham/T-VisStar-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.294 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36070404305021786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052203113352468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/e4e3d79a-1de9-43be-a029-0be4f60e472b.json b/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/e4e3d79a-1de9-43be-a029-0be4f60e472b.json new file mode 100644 index 000000000..32ac26ea2 --- /dev/null +++ b/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/e4e3d79a-1de9-43be-a029-0be4f60e472b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T-VisStar-v0.1", + "id": "1TuanPham/T-VisStar-v0.1", + "developer": "1TuanPham", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.294 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3211 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json b/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json deleted file mode 100644 index f1d053d4e..000000000 --- a/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_L-3.1-Science-Writer-8B/1762652579.470164", - "retrieved_timestamp": "1762652579.470165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", - "developer": "3rd-Degree-Burn", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42625012743963797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5041306326216103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36494348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/6914ac28-b543-4f36-81f1-f7491c018e3b.json b/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/6914ac28-b543-4f36-81f1-f7491c018e3b.json new file mode 100644 index 000000000..c548f8a55 --- /dev/null +++ b/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/6914ac28-b543-4f36-81f1-f7491c018e3b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_L-3.1-Science-Writer-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L-3.1-Science-Writer-8B", + "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", + "developer": "3rd-Degree-Burn", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/b7378f41-46ab-41af-94cc-e7fb10738658.json b/data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/b7378f41-46ab-41af-94cc-e7fb10738658.json new file mode 100644 index 000000000..44e2bc2aa --- /dev/null +++ b/data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/b7378f41-46ab-41af-94cc-e7fb10738658.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Squareroot-v1", + "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", + "developer": "3rd-Degree-Burn", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2892 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/acedae59-6192-4ac4-a354-d520ecd6ba36.json b/data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/acedae59-6192-4ac4-a354-d520ecd6ba36.json new file mode 100644 index 000000000..56cd9d2ea --- /dev/null +++ b/data/hfopenllm_v2/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/acedae59-6192-4ac4-a354-d520ecd6ba36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Squareroot", + "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", + "developer": "3rd-Degree-Burn", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/3rd-Degree-Burn/Llama-Squared-8B/ff105961-761d-4261-8a44-20acf2e7f440.json b/data/hfopenllm_v2/3rd-Degree-Burn/Llama-Squared-8B/ff105961-761d-4261-8a44-20acf2e7f440.json new file mode 100644 index 000000000..4375cd5c9 --- /dev/null +++ b/data/hfopenllm_v2/3rd-Degree-Burn/Llama-Squared-8B/ff105961-761d-4261-8a44-20acf2e7f440.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-Squared-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Squared-8B", + "id": "3rd-Degree-Burn/Llama-Squared-8B", + "developer": "3rd-Degree-Burn", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2755 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json b/data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json deleted file mode 100644 index f8594dea2..000000000 --- a/data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/4season_final_model_test_v2/1762652579.4714398", - "retrieved_timestamp": "1762652579.4714408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "4season/final_model_test_v2", - "developer": "4season", - "inference_platform": "unknown", - "id": "4season/final_model_test_v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191132860809319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6342049783295018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/4season/final_model_test_v2/fa0901f6-514e-44ae-84dc-0b793f26169e.json b/data/hfopenllm_v2/4season/final_model_test_v2/fa0901f6-514e-44ae-84dc-0b793f26169e.json new file mode 100644 index 000000000..2c3cdad71 --- /dev/null +++ b/data/hfopenllm_v2/4season/final_model_test_v2/fa0901f6-514e-44ae-84dc-0b793f26169e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/4season_final_model_test_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "final_model_test_v2", + "id": "4season/final_model_test_v2", + "developer": "4season", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6342 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json deleted file mode 100644 index db98ee338..000000000 --- a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/1762652579.471838", - "retrieved_timestamp": "1762652579.471839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", - "developer": "AALF", - "inference_platform": "unknown", - "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7189579205397235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119887898349903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38200000000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3732546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/d2dff5df-343b-40f3-85de-14eb72dab050.json b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/d2dff5df-343b-40f3-85de-14eb72dab050.json new file mode 100644 index 000000000..6aa520b9d --- /dev/null +++ b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/d2dff5df-343b-40f3-85de-14eb72dab050.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-Llama-3.1-8B-Instruct-preview", + "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", + "developer": "AALF", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.719 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json deleted file mode 100644 index 8f67e9b7a..000000000 --- a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-SFT-preview/1762652579.472149", - "retrieved_timestamp": "1762652579.47215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", - "developer": "AALF", - "inference_platform": "unknown", - "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7280504616639405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240303130445233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37433510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/8fa3010f-b7a1-4fc1-9156-ba70453add86.json b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/8fa3010f-b7a1-4fc1-9156-ba70453add86.json new file mode 100644 index 000000000..5b5e4c7c1 --- /dev/null +++ b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/8fa3010f-b7a1-4fc1-9156-ba70453add86.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-SFT-preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-Llama-3.1-8B-SFT-preview", + "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", + "developer": "AALF", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2251 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3743 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K-100steps/58034f99-3b01-46d6-aea9-90c75d073bb0.json b/data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K-100steps/58034f99-3b01-46d6-aea9-90c75d073bb0.json new file mode 100644 index 000000000..410931148 --- /dev/null +++ b/data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K-100steps/58034f99-3b01-46d6-aea9-90c75d073bb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K-100steps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-27b-it-SimPO-37K-100steps", + "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps", + "developer": "AALF", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2568 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K/e6c08c9c-6d01-45c7-8a24-219b756b8632.json b/data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K/e6c08c9c-6d01-45c7-8a24-219b756b8632.json new file mode 100644 index 000000000..3cc0a4181 --- /dev/null +++ b/data/hfopenllm_v2/AALF/gemma-2-27b-it-SimPO-37K/e6c08c9c-6d01-45c7-8a24-219b756b8632.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-27b-it-SimPO-37K", + "id": "AALF/gemma-2-27b-it-SimPO-37K", + "developer": "AALF", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AELLM/gemma-2-aeria-infinity-9b/cd97ad01-1d20-4cbd-a9bb-2acf3d9fdcc7.json b/data/hfopenllm_v2/AELLM/gemma-2-aeria-infinity-9b/cd97ad01-1d20-4cbd-a9bb-2acf3d9fdcc7.json new file mode 100644 index 000000000..4e8d23811 --- /dev/null +++ b/data/hfopenllm_v2/AELLM/gemma-2-aeria-infinity-9b/cd97ad01-1d20-4cbd-a9bb-2acf3d9fdcc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-aeria-infinity-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-aeria-infinity-9b", + "id": "AELLM/gemma-2-aeria-infinity-9b", + "developer": "AELLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7594 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3862 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AELLM/gemma-2-lyco-infinity-9b/95f44ef8-e5ba-4bdc-97a7-2c5a678b07be.json b/data/hfopenllm_v2/AELLM/gemma-2-lyco-infinity-9b/95f44ef8-e5ba-4bdc-97a7-2c5a678b07be.json new file mode 100644 index 000000000..8c6280d2f --- /dev/null +++ b/data/hfopenllm_v2/AELLM/gemma-2-lyco-infinity-9b/95f44ef8-e5ba-4bdc-97a7-2c5a678b07be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-lyco-infinity-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-lyco-infinity-9b", + "id": "AELLM/gemma-2-lyco-infinity-9b", + "developer": "AELLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.584 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AGI-0/Art-v0-3B/082f25f0-994c-438a-8086-b1e439aca466.json b/data/hfopenllm_v2/AGI-0/Art-v0-3B/082f25f0-994c-438a-8086-b1e439aca466.json new file mode 100644 index 000000000..521160bae --- /dev/null +++ b/data/hfopenllm_v2/AGI-0/Art-v0-3B/082f25f0-994c-438a-8086-b1e439aca466.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AGI-0_Art-v0-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Art-v0-3B", + "id": "AGI-0/Art-v0-3B", + "developer": "AGI-0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3192 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3401 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2462 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json b/data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json deleted file mode 100644 index 1e3cd0456..000000000 --- a/data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AGI-0_Art-v0-3B/1762652579.473539", - "retrieved_timestamp": "1762652579.47354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AGI-0/Art-v0-3B", - "developer": "AGI-0", - "inference_platform": "unknown", - "id": "AGI-0/Art-v0-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.319238509377341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3400959483013824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/AGI-0/Artificium-llama3.1-8B-001/31423cbd-08cd-4079-b1c5-ba412acf1b51.json b/data/hfopenllm_v2/AGI-0/Artificium-llama3.1-8B-001/31423cbd-08cd-4079-b1c5-ba412acf1b51.json new file mode 100644 index 000000000..39747f5fd --- /dev/null +++ b/data/hfopenllm_v2/AGI-0/Artificium-llama3.1-8B-001/31423cbd-08cd-4079-b1c5-ba412acf1b51.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AGI-0_Artificium-llama3.1-8B-001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Artificium-llama3.1-8B-001", + "id": "AGI-0/Artificium-llama3.1-8B-001", + "developer": "AGI-0", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5248 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AGI-0/smartllama3.1-8B-001/2669bd86-da65-4d87-8464-bfa8c741ce0b.json b/data/hfopenllm_v2/AGI-0/smartllama3.1-8B-001/2669bd86-da65-4d87-8464-bfa8c741ce0b.json new file mode 100644 index 000000000..2a80fe730 --- /dev/null +++ b/data/hfopenllm_v2/AGI-0/smartllama3.1-8B-001/2669bd86-da65-4d87-8464-bfa8c741ce0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AGI-0_smartllama3.1-8B-001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smartllama3.1-8B-001", + "id": "AGI-0/smartllama3.1-8B-001", + "developer": "AGI-0", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.467 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json deleted file mode 100644 index 21e6b5be2..000000000 --- a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-CoT/1762652579.474318", - "retrieved_timestamp": "1762652579.4743192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AI-MO/NuminaMath-7B-CoT", - "developer": "AI-MO", - "inference_platform": "unknown", - "id": "AI-MO/NuminaMath-7B-CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.91 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2688544173903022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314193495860012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26963746223564955 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33034375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28681848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/ab2c19ff-5671-446f-b09e-731e2ae515ca.json b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/ab2c19ff-5671-446f-b09e-731e2ae515ca.json new file mode 100644 index 000000000..885230cbc --- /dev/null +++ b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/ab2c19ff-5671-446f-b09e-731e2ae515ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NuminaMath-7B-CoT", + "id": "AI-MO/NuminaMath-7B-CoT", + "developer": "AI-MO", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.91 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2696 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2868 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json deleted file mode 100644 index 27ba37142..000000000 --- a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-TIR/1762652579.474566", - "retrieved_timestamp": "1762652579.474567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AI-MO/NuminaMath-7B-TIR", - "developer": "AI-MO", - "inference_platform": "unknown", - "id": "AI-MO/NuminaMath-7B-TIR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.91 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27562423259174545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41436913375897894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35092708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2732712765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/36250dc3-cb51-43be-8ab0-6788eb5bda7c.json b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/36250dc3-cb51-43be-8ab0-6788eb5bda7c.json new file mode 100644 index 000000000..d1e8152de --- /dev/null +++ b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/36250dc3-cb51-43be-8ab0-6788eb5bda7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-TIR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NuminaMath-7B-TIR", + "id": "AI-MO/NuminaMath-7B-TIR", + "developer": "AI-MO", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.91 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2756 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3509 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2733 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json b/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json deleted file mode 100644 index 0a0d4a1bd..000000000 --- a/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_Llama-3-8B-instruct/1762652579.474785", - "retrieved_timestamp": "1762652579.474786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AI-Sweden-Models/Llama-3-8B-instruct", - "developer": "AI-Sweden-Models", - "inference_platform": "unknown", - "id": "AI-Sweden-Models/Llama-3-8B-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24012841482821137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173460154515302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47709375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25972406914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/cd616d6a-151f-4aaa-93b5-9c4a758f95b5.json b/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/cd616d6a-151f-4aaa-93b5-9c4a758f95b5.json new file mode 100644 index 000000000..4c342ae03 --- /dev/null +++ b/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/cd616d6a-151f-4aaa-93b5-9c4a758f95b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_Llama-3-8B-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-instruct", + "id": "AI-Sweden-Models/Llama-3-8B-instruct", + "developer": "AI-Sweden-Models", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2401 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4771 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-Sweden-Models/gpt-sw3-40b/9cb09cae-9b1b-43b1-afbf-f44b0a44053c.json b/data/hfopenllm_v2/AI-Sweden-Models/gpt-sw3-40b/9cb09cae-9b1b-43b1-afbf-f44b0a44053c.json new file mode 100644 index 000000000..d5848d50e --- /dev/null +++ b/data/hfopenllm_v2/AI-Sweden-Models/gpt-sw3-40b/9cb09cae-9b1b-43b1-afbf-f44b0a44053c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_gpt-sw3-40b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-sw3-40b", + "id": "AI-Sweden-Models/gpt-sw3-40b", + "developer": "AI-Sweden-Models", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 39.927 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.147 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3268 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3632 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI4free/Dhanishtha/038c32da-add5-4299-ac17-df6ef3fdea58.json b/data/hfopenllm_v2/AI4free/Dhanishtha/038c32da-add5-4299-ac17-df6ef3fdea58.json new file mode 100644 index 000000000..ead44ad6b --- /dev/null +++ b/data/hfopenllm_v2/AI4free/Dhanishtha/038c32da-add5-4299-ac17-df6ef3fdea58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AI4free_Dhanishtha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dhanishtha", + "id": "AI4free/Dhanishtha", + "developer": "AI4free", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2451 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.256 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3569 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1643 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json b/data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json deleted file mode 100644 index 2ded5b849..000000000 --- a/data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AI4free_Dhanishtha/1762652579.475332", - "retrieved_timestamp": "1762652579.475332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AI4free/Dhanishtha", - "developer": "AI4free", - "inference_platform": "unknown", - "id": "AI4free/Dhanishtha", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2451240486353985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34039444943326375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25604229607250756 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35694791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16431183510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/AI4free/t2/25eb4bdf-beb4-4ad2-a5e9-3a2f31c46cb5.json b/data/hfopenllm_v2/AI4free/t2/25eb4bdf-beb4-4ad2-a5e9-3a2f31c46cb5.json new file mode 100644 index 000000000..f06b40189 --- /dev/null +++ b/data/hfopenllm_v2/AI4free/t2/25eb4bdf-beb4-4ad2-a5e9-3a2f31c46cb5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AI4free_t2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "t2", + "id": "AI4free/t2", + "developer": "AI4free", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.291 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1896 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json b/data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json deleted file mode 100644 index 8b8a21147..000000000 --- a/data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AI4free_t2/1762652579.475577", - "retrieved_timestamp": "1762652579.475578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AI4free/t2", - "developer": "AI4free", - "inference_platform": "unknown", - "id": "AI4free/t2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3866828902866616 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2910111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3846354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json b/data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json deleted file mode 100644 index aad9194d0..000000000 --- a/data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AIDC-AI_Marco-o1/1762652579.47579", - "retrieved_timestamp": "1762652579.4757912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AIDC-AI/Marco-o1", - "developer": "AIDC-AI", - "inference_platform": "unknown", - "id": "AIDC-AI/Marco-o1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.477083028586373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364362696398749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37462235649546827 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41165226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/AIDC-AI/Marco-o1/77655d60-872f-468a-acc6-d584ef5bf46a.json b/data/hfopenllm_v2/AIDC-AI/Marco-o1/77655d60-872f-468a-acc6-d584ef5bf46a.json new file mode 100644 index 000000000..a71a6c124 --- /dev/null +++ b/data/hfopenllm_v2/AIDC-AI/Marco-o1/77655d60-872f-468a-acc6-d584ef5bf46a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AIDC-AI_Marco-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marco-o1", + "id": "AIDC-AI/Marco-o1", + "developer": "AIDC-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3746 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/4de378c8-ccf6-4f0b-8287-3d138a8645b9.json b/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/4de378c8-ccf6-4f0b-8287-3d138a8645b9.json new file mode 100644 index 000000000..90198bb55 --- /dev/null +++ b/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/4de378c8-ccf6-4f0b-8287-3d138a8645b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aashraf995_Creative-7B-nerd/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Creative-7B-nerd", + "id": "Aashraf995/Creative-7B-nerd", + "developer": "Aashraf995", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4722 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4492 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json b/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json deleted file mode 100644 index a161150a8..000000000 --- a/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aashraf995_Creative-7B-nerd/1762652579.476046", - "retrieved_timestamp": "1762652579.476046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aashraf995/Creative-7B-nerd", - "developer": "Aashraf995", - "inference_platform": "unknown", - "id": "Aashraf995/Creative-7B-nerd", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4721871301480073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5606785565640195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - } - ] -} diff --git a/data/hfopenllm_v2/Aashraf995/Gemma-Evo-10B/8039cadf-6644-44e7-8452-90e9c8069e28.json b/data/hfopenllm_v2/Aashraf995/Gemma-Evo-10B/8039cadf-6644-44e7-8452-90e9c8069e28.json new file mode 100644 index 000000000..915792a80 --- /dev/null +++ b/data/hfopenllm_v2/Aashraf995/Gemma-Evo-10B/8039cadf-6644-44e7-8452-90e9c8069e28.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aashraf995_Gemma-Evo-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-Evo-10B", + "id": "Aashraf995/Gemma-Evo-10B", + "developer": "Aashraf995", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6044 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aashraf995/Qwen-Evo-7B/8914d89d-c873-4704-998e-dc807e96030b.json b/data/hfopenllm_v2/Aashraf995/Qwen-Evo-7B/8914d89d-c873-4704-998e-dc807e96030b.json new file mode 100644 index 000000000..b958767ce --- /dev/null +++ b/data/hfopenllm_v2/Aashraf995/Qwen-Evo-7B/8914d89d-c873-4704-998e-dc807e96030b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aashraf995_Qwen-Evo-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-Evo-7B", + "id": "Aashraf995/Qwen-Evo-7B", + "developer": "Aashraf995", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5709 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aashraf995/QwenStock-14B/c2e9fc29-db07-4b49-a98a-084158831ac4.json b/data/hfopenllm_v2/Aashraf995/QwenStock-14B/c2e9fc29-db07-4b49-a98a-084158831ac4.json new file mode 100644 index 000000000..2181091a7 --- /dev/null +++ b/data/hfopenllm_v2/Aashraf995/QwenStock-14B/c2e9fc29-db07-4b49-a98a-084158831ac4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aashraf995_QwenStock-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock-14B", + "id": "Aashraf995/QwenStock-14B", + "developer": "Aashraf995", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5009 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3573 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/58724539-6fc5-40d9-ba43-87410959894d.json b/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/58724539-6fc5-40d9-ba43-87410959894d.json new file mode 100644 index 000000000..51b7d1999 --- /dev/null +++ b/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/58724539-6fc5-40d9-ba43-87410959894d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AbacusResearch_Jallabi-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jallabi-34B", + "id": "AbacusResearch/Jallabi-34B", + "developer": "AbacusResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6023 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4822 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json b/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json deleted file mode 100644 index 2f471d1fc..000000000 --- a/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AbacusResearch_Jallabi-34B/1762652579.477037", - "retrieved_timestamp": "1762652579.4770381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AbacusResearch/Jallabi-34B", - "developer": "AbacusResearch", - "inference_platform": "unknown", - "id": "AbacusResearch/Jallabi-34B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528604103777976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023380603196266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48217708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681682180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json b/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json deleted file mode 100644 index 720216ee7..000000000 --- a/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ahdoot_StructuredThinker-v0.3-MoreStructure/1762652579.4772868", - "retrieved_timestamp": "1762652579.477288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ahdoot/StructuredThinker-v0.3-MoreStructure", - "developer": "Ahdoot", - "inference_platform": "unknown", - "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192808415005519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48376906494893984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.290785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36103723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/b13324cf-f6f5-4bf1-9cf3-c196120c4bcf.json b/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/b13324cf-f6f5-4bf1-9cf3-c196120c4bcf.json new file mode 100644 index 000000000..a962961f2 --- /dev/null +++ b/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/b13324cf-f6f5-4bf1-9cf3-c196120c4bcf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ahdoot_StructuredThinker-v0.3-MoreStructure/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StructuredThinker-v0.3-MoreStructure", + "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure", + "developer": "Ahdoot", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4193 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4838 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2908 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json b/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json deleted file mode 100644 index 5f94888cd..000000000 --- a/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ahdoot_Test_StealthThinker/1762652579.4775438", - "retrieved_timestamp": "1762652579.4775438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ahdoot/Test_StealthThinker", - "developer": "Ahdoot", - "inference_platform": "unknown", - "id": "Ahdoot/Test_StealthThinker", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42200361706937595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46466398134666304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35970744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/782b2df0-d1b3-414c-a4bd-59052a4441a9.json b/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/782b2df0-d1b3-414c-a4bd-59052a4441a9.json new file mode 100644 index 000000000..717c7bcb8 --- /dev/null +++ b/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/782b2df0-d1b3-414c-a4bd-59052a4441a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ahdoot_Test_StealthThinker/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Test_StealthThinker", + "id": "Ahdoot/Test_StealthThinker", + "developer": "Ahdoot", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.422 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.179 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json deleted file mode 100644 index ed2a26d4f..000000000 --- a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/1762652579.478028", - "retrieved_timestamp": "1762652579.478029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7097656440466851 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4477501104993749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/b508e41e-0f1c-49ce-8b80-5e7ec82b8f15.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/b508e41e-0f1c-49ce-8b80-5e7ec82b8f15.json new file mode 100644 index 000000000..8c4cb6a4c --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/b508e41e-0f1c-49ce-8b80-5e7ec82b8f15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cybernet-Sec-3B-R1-V0-Coder", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7098 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/2824e8d4-2749-4b18-a3a1-b987ed215ac6.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/2824e8d4-2749-4b18-a3a1-b987ed215ac6.json new file mode 100644 index 000000000..5acadd460 --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/2824e8d4-2749-4b18-a3a1-b987ed215ac6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cybernet-Sec-3B-R1-V0", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4497 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.301 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json deleted file mode 100644 index 134a985bb..000000000 --- a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0/1762652579.4777558", - "retrieved_timestamp": "1762652579.477757", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358018945287394 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4497434194912941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33136458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.301030585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/53176984-ba93-4a64-b81e-21f6e0f65bcd.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/53176984-ba93-4a64-b81e-21f6e0f65bcd.json new file mode 100644 index 000000000..6634b986e --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/53176984-ba93-4a64-b81e-21f6e0f65bcd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cybernet-Sec-3B-R1-V1.1", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3088 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json deleted file mode 100644 index 4bd0400b8..000000000 --- a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/1762652579.478466", - "retrieved_timestamp": "1762652579.478467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6730209178313542 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4391775517124728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35409375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/53252698-7d17-4f2a-9106-3b744ae7a985.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/53252698-7d17-4f2a-9106-3b744ae7a985.json new file mode 100644 index 000000000..0a69dcaf4 --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/53252698-7d17-4f2a-9106-3b744ae7a985.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cybernet-Sec-3B-R1-V1", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6146 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2876 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json deleted file mode 100644 index 7c9561532..000000000 --- a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1/1762652579.478252", - "retrieved_timestamp": "1762652579.4782531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6145693426774292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282342020189216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32869791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876496010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json b/data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json deleted file mode 100644 index 3a7dd8faf..000000000 --- a/data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M0/1762652579.4786859", - "retrieved_timestamp": "1762652579.478687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Alepach/notHumpback-M0", - "developer": "Alepach", - "inference_platform": "unknown", - "id": "Alepach/notHumpback-M0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23500755772461512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27849287879199425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35523958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M0/6dd0f3a2-27ee-48f1-9d97-ef6954d298c8.json b/data/hfopenllm_v2/Alepach/notHumpback-M0/6dd0f3a2-27ee-48f1-9d97-ef6954d298c8.json new file mode 100644 index 000000000..c2db43af7 --- /dev/null +++ b/data/hfopenllm_v2/Alepach/notHumpback-M0/6dd0f3a2-27ee-48f1-9d97-ef6954d298c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "notHumpback-M0", + "id": "Alepach/notHumpback-M0", + "developer": "Alepach", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3552 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json b/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json deleted file mode 100644 index 51f414a65..000000000 --- a/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1-v2/1762652579.4791439", - "retrieved_timestamp": "1762652579.479145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Alepach/notHumpback-M1-v2", - "developer": "Alepach", - "inference_platform": "unknown", - "id": "Alepach/notHumpback-M1-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2277135777514772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2775640398406834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3473333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/35f11d5e-88c4-4a95-8d06-a40bee648b00.json b/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/35f11d5e-88c4-4a95-8d06-a40bee648b00.json new file mode 100644 index 000000000..63b777f1c --- /dev/null +++ b/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/35f11d5e-88c4-4a95-8d06-a40bee648b00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "notHumpback-M1-v2", + "id": "Alepach/notHumpback-M1-v2", + "developer": "Alepach", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2776 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json b/data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json deleted file mode 100644 index 2b0d336ba..000000000 --- a/data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1/1762652579.478936", - "retrieved_timestamp": "1762652579.4789371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Alepach/notHumpback-M1", - "developer": "Alepach", - "inference_platform": "unknown", - "id": "Alepach/notHumpback-M1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2206944241279804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28824720129981835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10912566489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M1/ba1193c0-42b8-487d-b9fd-ddbc1fd15359.json b/data/hfopenllm_v2/Alepach/notHumpback-M1/ba1193c0-42b8-487d-b9fd-ddbc1fd15359.json new file mode 100644 index 000000000..80d910c4c --- /dev/null +++ b/data/hfopenllm_v2/Alepach/notHumpback-M1/ba1193c0-42b8-487d-b9fd-ddbc1fd15359.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "notHumpback-M1", + "id": "Alepach/notHumpback-M1", + "developer": "Alepach", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2882 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1091 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json b/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json deleted file mode 100644 index 03f19aa78..000000000 --- a/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Alibaba-NLP_gte-Qwen2-7B-instruct/1762652579.479603", - "retrieved_timestamp": "1762652579.479604", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Alibaba-NLP/gte-Qwen2-7B-instruct", - "developer": "Alibaba-NLP", - "inference_platform": "unknown", - "id": "Alibaba-NLP/gte-Qwen2-7B-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22554045488193547 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4495144990818469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35585416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33211436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/95733620-e1e7-4442-b9c3-a699165df5e7.json b/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/95733620-e1e7-4442-b9c3-a699165df5e7.json new file mode 100644 index 000000000..2205af9c9 --- /dev/null +++ b/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/95733620-e1e7-4442-b9c3-a699165df5e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Alibaba-NLP_gte-Qwen2-7B-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gte-Qwen2-7B-instruct", + "id": "Alibaba-NLP/gte-Qwen2-7B-instruct", + "developer": "Alibaba-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2255 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3559 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alsebay/Qwen2.5-7B-test-novelist/cacfce0d-f5f1-4101-8065-f5f02eaab1fb.json b/data/hfopenllm_v2/Alsebay/Qwen2.5-7B-test-novelist/cacfce0d-f5f1-4101-8065-f5f02eaab1fb.json new file mode 100644 index 000000000..ea8074956 --- /dev/null +++ b/data/hfopenllm_v2/Alsebay/Qwen2.5-7B-test-novelist/cacfce0d-f5f1-4101-8065-f5f02eaab1fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Alsebay_Qwen2.5-7B-test-novelist/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-test-novelist", + "id": "Alsebay/Qwen2.5-7B-test-novelist", + "developer": "Alsebay", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4749 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAI2006/72be5537-198a-43e9-9840-a803083158d3.json b/data/hfopenllm_v2/Amaorynho/BBAI2006/72be5537-198a-43e9-9840-a803083158d3.json new file mode 100644 index 000000000..0d3a4cdd2 --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAI2006/72be5537-198a-43e9-9840-a803083158d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI2006/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI2006", + "id": "Amaorynho/BBAI2006", + "developer": "Amaorynho", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.09 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2704 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3605 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json b/data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json deleted file mode 100644 index 5a16e6985..000000000 --- a/data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI2006/1762652579.480136", - "retrieved_timestamp": "1762652579.4801369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Amaorynho/BBAI2006", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAI2006", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.09 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14670518668244703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2704366990167133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3605416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json b/data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json deleted file mode 100644 index c71c7a534..000000000 --- a/data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI270V4/1762652579.4803882", - "retrieved_timestamp": "1762652579.4803882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Amaorynho/BBAI270V4", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAI270V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1990374428737971 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30712046736502824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33139583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11136968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Amaorynho/BBAI270V4/2e9a3443-970d-4f37-a356-277a11c81754.json b/data/hfopenllm_v2/Amaorynho/BBAI270V4/2e9a3443-970d-4f37-a356-277a11c81754.json new file mode 100644 index 000000000..2399e7c15 --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAI270V4/2e9a3443-970d-4f37-a356-277a11c81754.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI270V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI270V4", + "id": "Amaorynho/BBAI270V4", + "developer": "Amaorynho", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/1188402f-aa1c-4306-b031-c92ff0a5dd64.json b/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/1188402f-aa1c-4306-b031-c92ff0a5dd64.json new file mode 100644 index 000000000..1e9f99354 --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/1188402f-aa1c-4306-b031-c92ff0a5dd64.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAIIFEV1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAIIFEV1", + "id": "Amaorynho/BBAIIFEV1", + "developer": "Amaorynho", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8047 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json b/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json deleted file mode 100644 index 3327e5433..000000000 --- a/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAIIFEV1/1762652579.480599", - "retrieved_timestamp": "1762652579.4806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Amaorynho/BBAIIFEV1", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAIIFEV1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8047369867507104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292462038560509 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json b/data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json deleted file mode 100644 index 9a38e99d5..000000000 --- a/data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI_375/1762652579.480799", - "retrieved_timestamp": "1762652579.480799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Amaorynho/BBAI_375", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAI_375", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.09 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14670518668244703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2704366990167133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3605416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Amaorynho/BBAI_375/ee2f567a-6403-46d5-9a6b-bd029f81d660.json b/data/hfopenllm_v2/Amaorynho/BBAI_375/ee2f567a-6403-46d5-9a6b-bd029f81d660.json new file mode 100644 index 000000000..e92063a97 --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAI_375/ee2f567a-6403-46d5-9a6b-bd029f81d660.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI_375/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_375", + "id": "Amaorynho/BBAI_375", + "developer": "Amaorynho", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.09 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2704 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3605 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json b/data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json deleted file mode 100644 index 09ea5d0e5..000000000 --- a/data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Amu_t1-1.5B/1762652579.481014", - "retrieved_timestamp": "1762652579.481015", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Amu/t1-1.5B", - "developer": "Amu", - "inference_platform": "unknown", - "id": "Amu/t1-1.5B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393717558300864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007606984109216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3517083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2566489361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/Amu/t1-1.5B/d809fdff-f5ff-44f5-afc7-7e8af9ce2f93.json b/data/hfopenllm_v2/Amu/t1-1.5B/d809fdff-f5ff-44f5-afc7-7e8af9ce2f93.json new file mode 100644 index 000000000..c7e1777b9 --- /dev/null +++ b/data/hfopenllm_v2/Amu/t1-1.5B/d809fdff-f5ff-44f5-afc7-7e8af9ce2f93.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Amu_t1-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "t1-1.5B", + "id": "Amu/t1-1.5B", + "developer": "Amu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amu/t1-3B/87d66efc-173f-4c14-b76c-d8b7e00d575d.json b/data/hfopenllm_v2/Amu/t1-3B/87d66efc-173f-4c14-b76c-d8b7e00d575d.json new file mode 100644 index 000000000..8c8a648ec --- /dev/null +++ b/data/hfopenllm_v2/Amu/t1-3B/87d66efc-173f-4c14-b76c-d8b7e00d575d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Amu_t1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "t1-3B", + "id": "Amu/t1-3B", + "developer": "Amu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3999 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json b/data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json deleted file mode 100644 index 4a27d3c46..000000000 --- a/data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Amu_t1-3B/1762652579.481272", - "retrieved_timestamp": "1762652579.4812732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Amu/t1-3B", - "developer": "Amu", - "inference_platform": "unknown", - "id": "Amu/t1-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33277703160946287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39989750143834385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34348958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12840757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/47f62378-c3cc-408f-a0d1-71eb3f522f57.json b/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/47f62378-c3cc-408f-a0d1-71eb3f522f57.json new file mode 100644 index 000000000..6c27ae339 --- /dev/null +++ b/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/47f62378-c3cc-408f-a0d1-71eb3f522f57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ArliAI_ArliAI-RPMax-12B-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArliAI-RPMax-12B-v1.1", + "id": "ArliAI/ArliAI-RPMax-12B-v1.1", + "developer": "ArliAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5349 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4752 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json b/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json deleted file mode 100644 index 3573938bf..000000000 --- a/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ArliAI_ArliAI-RPMax-12B-v1.1/1762652579.481497", - "retrieved_timestamp": "1762652579.481498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ArliAI/ArliAI-RPMax-12B-v1.1", - "developer": "ArliAI", - "inference_platform": "unknown", - "id": "ArliAI/ArliAI-RPMax-12B-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5348852156721942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.475181760840119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3384308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/dba8c12c-388d-4f8b-8ce8-83acfc4920c7.json b/data/hfopenllm_v2/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/dba8c12c-388d-4f8b-8ce8-83acfc4920c7.json new file mode 100644 index 000000000..c8b199daf --- /dev/null +++ b/data/hfopenllm_v2/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/dba8c12c-388d-4f8b-8ce8-83acfc4920c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-ArliAI-RPMax-v1.1", + "id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", + "developer": "ArliAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6359 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5016 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3577 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json b/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json deleted file mode 100644 index 46a118b27..000000000 --- a/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Arthur-LAGACHERIE_Precis-1B-Instruct/1762652579.482005", - "retrieved_timestamp": "1762652579.482006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Arthur-LAGACHERIE/Precis-1B-Instruct", - "developer": "Arthur-LAGACHERIE", - "inference_platform": "unknown", - "id": "Arthur-LAGACHERIE/Precis-1B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670738086056109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223614510687368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14261968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/e4087285-1d1a-465e-ac88-91310e939710.json b/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/e4087285-1d1a-465e-ac88-91310e939710.json new file mode 100644 index 000000000..d6fc53fc3 --- /dev/null +++ b/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/e4087285-1d1a-465e-ac88-91310e939710.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Arthur-LAGACHERIE_Precis-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Precis-1B-Instruct", + "id": "Arthur-LAGACHERIE/Precis-1B-Instruct", + "developer": "Arthur-LAGACHERIE", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1426 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Artples/L-MChat-7b/09f189d9-74fd-47bb-b5fb-7994cba56ae2.json b/data/hfopenllm_v2/Artples/L-MChat-7b/09f189d9-74fd-47bb-b5fb-7994cba56ae2.json new file mode 100644 index 000000000..68e074cae --- /dev/null +++ b/data/hfopenllm_v2/Artples/L-MChat-7b/09f189d9-74fd-47bb-b5fb-7994cba56ae2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Artples_L-MChat-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L-MChat-7b", + "id": "Artples/L-MChat-7b", + "developer": "Artples", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5297 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json b/data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json deleted file mode 100644 index a1f4d092e..000000000 --- a/data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Artples_L-MChat-7b/1762652579.482251", - "retrieved_timestamp": "1762652579.482251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Artples/L-MChat-7b", - "developer": "Artples", - "inference_platform": "unknown", - "id": "Artples/L-MChat-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5296646231997766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46003301674679414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json b/data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json deleted file mode 100644 index 64a06c784..000000000 --- a/data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Artples_L-MChat-Small/1762652579.4824991", - "retrieved_timestamp": "1762652579.4825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Artples/L-MChat-Small", - "developer": "Artples", - "inference_platform": "unknown", - "id": "Artples/L-MChat-Small", - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32870561222002065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48225627665257265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24642619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Artples/L-MChat-Small/5754c262-6ddf-4f54-9722-22ff20a8d76f.json b/data/hfopenllm_v2/Artples/L-MChat-Small/5754c262-6ddf-4f54-9722-22ff20a8d76f.json new file mode 100644 index 000000000..fd60828c3 --- /dev/null +++ b/data/hfopenllm_v2/Artples/L-MChat-Small/5754c262-6ddf-4f54-9722-22ff20a8d76f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Artples_L-MChat-Small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L-MChat-Small", + "id": "Artples/L-MChat-Small", + "developer": "Artples", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3287 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4823 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2464 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aryanne/QwentileSwap/cc1bd811-ec88-4514-8b47-4140ded4f03d.json b/data/hfopenllm_v2/Aryanne/QwentileSwap/cc1bd811-ec88-4514-8b47-4140ded4f03d.json new file mode 100644 index 000000000..cd69fff0a --- /dev/null +++ b/data/hfopenllm_v2/Aryanne/QwentileSwap/cc1bd811-ec88-4514-8b47-4140ded4f03d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aryanne_QwentileSwap/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwentileSwap", + "id": "Aryanne/QwentileSwap", + "developer": "Aryanne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4222 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.464 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aryanne/SHBA/3f08155d-8551-4472-86fe-7988cd6df78b.json b/data/hfopenllm_v2/Aryanne/SHBA/3f08155d-8551-4472-86fe-7988cd6df78b.json new file mode 100644 index 000000000..5001e5134 --- /dev/null +++ b/data/hfopenllm_v2/Aryanne/SHBA/3f08155d-8551-4472-86fe-7988cd6df78b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aryanne_SHBA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SHBA", + "id": "Aryanne/SHBA", + "developer": "Aryanne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5233 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3892 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json b/data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json deleted file mode 100644 index 2db159ad2..000000000 --- a/data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aryanne_SHBA/1762652579.482961", - "retrieved_timestamp": "1762652579.482962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aryanne/SHBA", - "developer": "Aryanne", - "inference_platform": "unknown", - "id": "Aryanne/SHBA", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5233174837035715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892121010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Aryanne/SuperHeart/339e12fb-b4a4-4a4b-bb40-899b4ad833f9.json b/data/hfopenllm_v2/Aryanne/SuperHeart/339e12fb-b4a4-4a4b-bb40-899b4ad833f9.json new file mode 100644 index 000000000..9df9c1198 --- /dev/null +++ b/data/hfopenllm_v2/Aryanne/SuperHeart/339e12fb-b4a4-4a4b-bb40-899b4ad833f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aryanne_SuperHeart/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperHeart", + "id": "Aryanne/SuperHeart", + "developer": "Aryanne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5192 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json b/data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json deleted file mode 100644 index e22a523a0..000000000 --- a/data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aryanne_SuperHeart/1762652579.483199", - "retrieved_timestamp": "1762652579.4832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aryanne/SuperHeart", - "developer": "Aryanne", - "inference_platform": "unknown", - "id": "Aryanne/SuperHeart", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5192234382549413 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215375046264326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44357291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912067819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4fd60e9c-5c90-492a-b24d-7ca6d1e91eae.json b/data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4fd60e9c-5c90-492a-b24d-7ca6d1e91eae.json new file mode 100644 index 000000000..c1dcb320d --- /dev/null +++ b/data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4fd60e9c-5c90-492a-b24d-7ca6d1e91eae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B-continuous-learnt", + "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", + "developer": "AtAndDev", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/7f8d935e-3782-4769-8bd0-ee8a0ce91cd6.json b/data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/7f8d935e-3782-4769-8bd0-ee8a0ce91cd6.json new file mode 100644 index 000000000..cd9fc36f7 --- /dev/null +++ b/data/hfopenllm_v2/AtAndDev/Qwen2.5-1.5B-continuous-learnt/7f8d935e-3782-4769-8bd0-ee8a0ce91cd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B-continuous-learnt", + "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", + "developer": "AtAndDev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4511 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1473 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3623 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2806 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json b/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json deleted file mode 100644 index e2c0d2793..000000000 --- a/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ateron_Glowing-Forest-12B/1762652579.484101", - "retrieved_timestamp": "1762652579.4841018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ateron/Glowing-Forest-12B", - "developer": "Ateron", - "inference_platform": "unknown", - "id": "Ateron/Glowing-Forest-12B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3591803082487799 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549176294722067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37175864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/6fa07e60-9f82-4abc-aa45-4dfc0bcf9b8d.json b/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/6fa07e60-9f82-4abc-aa45-4dfc0bcf9b8d.json new file mode 100644 index 000000000..d15a6561f --- /dev/null +++ b/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/6fa07e60-9f82-4abc-aa45-4dfc0bcf9b8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ateron_Glowing-Forest-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Glowing-Forest-12B", + "id": "Ateron/Glowing-Forest-12B", + "developer": "Ateron", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3592 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3718 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ateron/Lotus-Magpic/99a0022b-3fe7-4612-9cbb-cf082c1f6b70.json b/data/hfopenllm_v2/Ateron/Lotus-Magpic/99a0022b-3fe7-4612-9cbb-cf082c1f6b70.json new file mode 100644 index 000000000..63c101443 --- /dev/null +++ b/data/hfopenllm_v2/Ateron/Lotus-Magpic/99a0022b-3fe7-4612-9cbb-cf082c1f6b70.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ateron_Lotus-Magpic/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lotus-Magpic", + "id": "Ateron/Lotus-Magpic", + "developer": "Ateron", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6286 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3491 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json b/data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json deleted file mode 100644 index f5e8c3acd..000000000 --- a/data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ateron_Lotus-Magpic/1762652579.484373", - "retrieved_timestamp": "1762652579.484374", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ateron/Lotus-Magpic", - "developer": "Ateron", - "inference_platform": "unknown", - "id": "Ateron/Lotus-Magpic", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6286076499244228 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5253514950133299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4331875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3490691489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json b/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json deleted file mode 100644 index 4c7e73273..000000000 --- a/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ateron_Way_of_MagPicaro/1762652579.484595", - "retrieved_timestamp": "1762652579.484596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ateron/Way_of_MagPicaro", - "developer": "Ateron", - "inference_platform": "unknown", - "id": "Ateron/Way_of_MagPicaro", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2637091805298829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427386861946704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35355718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/b1153714-d6fe-4ff9-ab8c-85b677d57f8f.json b/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/b1153714-d6fe-4ff9-ab8c-85b677d57f8f.json new file mode 100644 index 000000000..7a1c07042 --- /dev/null +++ b/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/b1153714-d6fe-4ff9-ab8c-85b677d57f8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ateron_Way_of_MagPicaro/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Way_of_MagPicaro", + "id": "Ateron/Way_of_MagPicaro", + "developer": "Ateron", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2637 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5427 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3536 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json b/data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json deleted file mode 100644 index 85b1a7ec5..000000000 --- a/data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-4B/1762652579.484812", - "retrieved_timestamp": "1762652579.484813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AuraIndustries/Aura-4B", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-4B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.513 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38156203318306536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4490409465001946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27061170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-4B/c3d39b6c-02af-410d-8a5c-224495b04572.json b/data/hfopenllm_v2/AuraIndustries/Aura-4B/c3d39b6c-02af-410d-8a5c-224495b04572.json new file mode 100644 index 000000000..259ade02b --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-4B/c3d39b6c-02af-410d-8a5c-224495b04572.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aura-4B", + "id": "AuraIndustries/Aura-4B", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.513 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2706 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-8B/0426fcba-3db4-492d-b622-e34ab8d3fc8f.json b/data/hfopenllm_v2/AuraIndustries/Aura-8B/0426fcba-3db4-492d-b622-e34ab8d3fc8f.json new file mode 100644 index 000000000..2bec15124 --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-8B/0426fcba-3db4-492d-b622-e34ab8d3fc8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aura-8B", + "id": "AuraIndustries/Aura-8B", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7205 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5131 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json b/data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json deleted file mode 100644 index 125321046..000000000 --- a/data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-8B/1762652579.485057", - "retrieved_timestamp": "1762652579.485057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AuraIndustries/Aura-8B", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7205315230255722 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131231419849063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4004479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38738364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json deleted file mode 100644 index 6c2a32b2a..000000000 --- a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B-v2/1762652579.4855082", - "retrieved_timestamp": "1762652579.4855092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AuraIndustries/Aura-MoE-2x4B-v2", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-MoE-2x4B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 7.231 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4777822843388875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43152444292813597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/aa099cfe-ac9a-42dd-8357-f4d8115133ca.json b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/aa099cfe-ac9a-42dd-8357-f4d8115133ca.json new file mode 100644 index 000000000..2a673ad65 --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/aa099cfe-ac9a-42dd-8357-f4d8115133ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aura-MoE-2x4B-v2", + "id": "AuraIndustries/Aura-MoE-2x4B-v2", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 7.231 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.261 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json deleted file mode 100644 index a09085905..000000000 --- a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B/1762652579.48526", - "retrieved_timestamp": "1762652579.485261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AuraIndustries/Aura-MoE-2x4B", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-MoE-2x4B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 7.231 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.460096987105325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43385067041774666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40851041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26496010638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/ccbc8a5e-9a97-452a-b023-cc996ffe31f1.json b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/ccbc8a5e-9a97-452a-b023-cc996ffe31f1.json new file mode 100644 index 000000000..460a41061 --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/ccbc8a5e-9a97-452a-b023-cc996ffe31f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aura-MoE-2x4B", + "id": "AuraIndustries/Aura-MoE-2x4B", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 7.231 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4601 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4085 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aurel9/testmerge-7b/b359a7a3-cf2c-4952-b308-333672dadcec.json b/data/hfopenllm_v2/Aurel9/testmerge-7b/b359a7a3-cf2c-4952-b308-333672dadcec.json new file mode 100644 index 000000000..a8574b05b --- /dev/null +++ b/data/hfopenllm_v2/Aurel9/testmerge-7b/b359a7a3-cf2c-4952-b308-333672dadcec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Aurel9_testmerge-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "testmerge-7b", + "id": "Aurel9/testmerge-7b", + "developer": "Aurel9", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4659 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3053 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json b/data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json deleted file mode 100644 index 320228d2b..000000000 --- a/data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aurel9_testmerge-7b/1762652579.485724", - "retrieved_timestamp": "1762652579.485725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aurel9/testmerge-7b", - "developer": "Aurel9", - "inference_platform": "unknown", - "id": "Aurel9/testmerge-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3979984219648311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189590919105128 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/0864d5cf-d6fe-42bc-9059-9f2e5ff06b60.json b/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/0864d5cf-d6fe-42bc-9059-9f2e5ff06b60.json new file mode 100644 index 000000000..cf11b9c2a --- /dev/null +++ b/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/0864d5cf-d6fe-42bc-9059-9f2e5ff06b60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ayush-Singh_Llama1B-sft-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama1B-sft-2", + "id": "Ayush-Singh/Llama1B-sft-2", + "developer": "Ayush-Singh", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2834 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3552 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json b/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json deleted file mode 100644 index 03fe5e627..000000000 --- a/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ayush-Singh_Llama1B-sft-2/1762652579.4859679", - "retrieved_timestamp": "1762652579.4859688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ayush-Singh/Llama1B-sft-2", - "developer": "Ayush-Singh", - "inference_platform": "unknown", - "id": "Ayush-Singh/Llama1B-sft-2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13743755457741016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.283428204214368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json b/data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json deleted file mode 100644 index 745e029dc..000000000 --- a/data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-14B/1762652579.486225", - "retrieved_timestamp": "1762652579.4862258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Azure99/Blossom-V6-14B", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/Blossom-V6-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395486198841297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5068726694646123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525679758308157 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40352083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4543716755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Azure99/Blossom-V6-14B/e6ef2559-8a63-43e3-a60b-0d2b7256ad3d.json b/data/hfopenllm_v2/Azure99/Blossom-V6-14B/e6ef2559-8a63-43e3-a60b-0d2b7256ad3d.json new file mode 100644 index 000000000..374b26fce --- /dev/null +++ b/data/hfopenllm_v2/Azure99/Blossom-V6-14B/e6ef2559-8a63-43e3-a60b-0d2b7256ad3d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Blossom-V6-14B", + "id": "Azure99/Blossom-V6-14B", + "developer": "Azure99", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5069 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4544 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json b/data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json deleted file mode 100644 index ca27fc4fb..000000000 --- a/data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-7B/1762652579.486468", - "retrieved_timestamp": "1762652579.486469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Azure99/Blossom-V6-7B", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/Blossom-V6-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5538194213575536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49736683240887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43009375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41439494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Azure99/Blossom-V6-7B/45d019ab-b23c-4fc3-baf5-d57576e9945c.json b/data/hfopenllm_v2/Azure99/Blossom-V6-7B/45d019ab-b23c-4fc3-baf5-d57576e9945c.json new file mode 100644 index 000000000..e807205a9 --- /dev/null +++ b/data/hfopenllm_v2/Azure99/Blossom-V6-7B/45d019ab-b23c-4fc3-baf5-d57576e9945c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Blossom-V6-7B", + "id": "Azure99/Blossom-V6-7B", + "developer": "Azure99", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5538 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4974 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json b/data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json deleted file mode 100644 index f5a6cfb39..000000000 --- a/data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-32b/1762652579.4866729", - "retrieved_timestamp": "1762652579.4866738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Azure99/blossom-v5-32b", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5-32b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.512 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235441960664371 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5954545257004673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4234541223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Azure99/blossom-v5-32b/e3cd7c32-e5a1-4cd6-a9dc-95364a8abe75.json b/data/hfopenllm_v2/Azure99/blossom-v5-32b/e3cd7c32-e5a1-4cd6-a9dc-95364a8abe75.json new file mode 100644 index 000000000..c1499a18c --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5-32b/e3cd7c32-e5a1-4cd6-a9dc-95364a8abe75.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-32b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "blossom-v5-32b", + "id": "Azure99/blossom-v5-32b", + "developer": "Azure99", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.512 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5-llama3-8b/9be442e8-4b77-43e0-a981-887338e59b78.json b/data/hfopenllm_v2/Azure99/blossom-v5-llama3-8b/9be442e8-4b77-43e0-a981-887338e59b78.json new file mode 100644 index 000000000..6b1528202 --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5-llama3-8b/9be442e8-4b77-43e0-a981-887338e59b78.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-llama3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "blossom-v5-llama3-8b", + "id": "Azure99/blossom-v5-llama3-8b", + "developer": "Azure99", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2206 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/a07b6326-f393-490e-b696-d8b45f593d4b.json b/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/a07b6326-f393-490e-b696-d8b45f593d4b.json new file mode 100644 index 000000000..108854be5 --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/a07b6326-f393-490e-b696-d8b45f593d4b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-34b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "blossom-v5.1-34b", + "id": "Azure99/blossom-v5.1-34b", + "developer": "Azure99", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2591 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json b/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json deleted file mode 100644 index a40b85708..000000000 --- a/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-34b/1762652579.4871309", - "retrieved_timestamp": "1762652579.4871309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Azure99/blossom-v5.1-34b", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5.1-34b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5696562897556262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6109110096611161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557845744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json b/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json deleted file mode 100644 index 6f14a09ab..000000000 --- a/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-9b/1762652579.487347", - "retrieved_timestamp": "1762652579.487348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Azure99/blossom-v5.1-9b", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5.1-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5085816744016985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343292377916368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39793882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/b66ed91a-98d5-407c-9896-9c2e2a31e9da.json b/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/b66ed91a-98d5-407c-9896-9c2e2a31e9da.json new file mode 100644 index 000000000..812870f1b --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/b66ed91a-98d5-407c-9896-9c2e2a31e9da.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "blossom-v5.1-9b", + "id": "Azure99/blossom-v5.1-9b", + "developer": "Azure99", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5086 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/9c70921d-956b-4727-9201-1addbd01bb8b.json b/data/hfopenllm_v2/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/9c70921d-956b-4727-9201-1addbd01bb8b.json new file mode 100644 index 000000000..212643c5b --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/9c70921d-956b-4727-9201-1addbd01bb8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2-9B-IT-Simpo-Infinity-Preference", + "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3176 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5979 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/4ba6d51e-314a-4db4-9552-568a4093e01a.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/4ba6d51e-314a-4db4-9552-568a4093e01a.json new file mode 100644 index 000000000..3e94d0dd0 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/4ba6d51e-314a-4db4-9552-568a4093e01a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0613-Llama3-70B", + "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6642 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4523 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json deleted file mode 100644 index b376225a6..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/1762652579.487831", - "retrieved_timestamp": "1762652579.487832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6821134589555713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6641614484348598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45226041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47298869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/835f5056-56bf-4a6c-886f-fbe6f263ac07.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/835f5056-56bf-4a6c-886f-fbe6f263ac07.json new file mode 100644 index 000000000..9053fed79 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/835f5056-56bf-4a6c-886f-fbe6f263ac07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0613-Mistral-7B", + "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4958 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json deleted file mode 100644 index c11c239e6..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/1762652579.48831", - "retrieved_timestamp": "1762652579.4883142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319873491225504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49582333763258896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31607380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json deleted file mode 100644 index 0af08bb81..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/1762652579.4887528", - "retrieved_timestamp": "1762652579.488755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7442120240960651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6670337872930245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46165625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4586103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/c2a63afa-9d25-41dc-b25f-848f5a640501.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/c2a63afa-9d25-41dc-b25f-848f5a640501.json new file mode 100644 index 000000000..090eaf46c --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/c2a63afa-9d25-41dc-b25f-848f5a640501.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0625-Llama3-70B", + "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7442 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.667 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2251 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4617 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4586 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json deleted file mode 100644 index 8fcf5281f..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/1762652579.4890082", - "retrieved_timestamp": "1762652579.489009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6050268842227512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954985723563075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37120833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3252160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/f64f9d24-e448-4bb6-89c3-edb66499bac9.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/f64f9d24-e448-4bb6-89c3-edb66499bac9.json new file mode 100644 index 000000000..4f8d75121 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/f64f9d24-e448-4bb6-89c3-edb66499bac9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0625-Llama3-8B", + "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3712 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3252 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/2de14bfb-844a-4711-815e-8f63487a78fd.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/2de14bfb-844a-4711-815e-8f63487a78fd.json new file mode 100644 index 000000000..ea49688f6 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/2de14bfb-844a-4711-815e-8f63487a78fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0625-Mistral-7B", + "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5867 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json deleted file mode 100644 index 0e567e344..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/1762652579.489246", - "retrieved_timestamp": "1762652579.489247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867420666054957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4939670574681802 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3229720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json deleted file mode 100644 index 4c5b160d7..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/1762652579.489471", - "retrieved_timestamp": "1762652579.489472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5553930238434022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5345911997776569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38876041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/f953e0e2-ddca-42a2-a0f6-752a137bc6b5.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/f953e0e2-ddca-42a2-a0f6-752a137bc6b5.json new file mode 100644 index 000000000..ff2b44e64 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/f953e0e2-ddca-42a2-a0f6-752a137bc6b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0625-Qwen2-7B", + "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5554 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5346 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3888 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json deleted file mode 100644 index 0581e807d..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/1762652579.489686", - "retrieved_timestamp": "1762652579.489687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5185984299436606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509115146247398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41181848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/98187b98-0cc8-4756-9cb7-c53deb998f90.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/98187b98-0cc8-4756-9cb7-c53deb998f90.json new file mode 100644 index 000000000..5553e65e0 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/98187b98-0cc8-4756-9cb7-c53deb998f90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-3M-0625-Yi-1.5-9B", + "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5186 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/8c79c60d-ebf4-4409-be4f-928a54cedd1d.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/8c79c60d-ebf4-4409-be4f-928a54cedd1d.json new file mode 100644 index 000000000..de7d61cf3 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/8c79c60d-ebf4-4409-be4f-928a54cedd1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-7M-0729-Llama3_1-8B", + "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6132 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json deleted file mode 100644 index 5356bf60b..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/1762652579.489912", - "retrieved_timestamp": "1762652579.489913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131952109292234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077335431381055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json deleted file mode 100644 index 9ed6cbbc7..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-mistral-7B/1762652579.490131", - "retrieved_timestamp": "1762652579.490131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6161928128476886 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4963813586525743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4061875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3273769946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/5d5cebeb-faf0-4fdf-8749-6307080e82f2.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/5d5cebeb-faf0-4fdf-8749-6307080e82f2.json new file mode 100644 index 000000000..27fce40f9 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/5d5cebeb-faf0-4fdf-8749-6307080e82f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-7M-0729-mistral-7B", + "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4964 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4062 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3274 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json deleted file mode 100644 index 8b67ae6f4..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/1762652579.490346", - "retrieved_timestamp": "1762652579.490347", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7335458804859993 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6695200461367471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25226586102719034 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.460688164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/e926ce8f-45bb-4f3d-b579-ecadb3df6468.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/e926ce8f-45bb-4f3d-b579-ecadb3df6468.json new file mode 100644 index 000000000..c05f8ef93 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/e926ce8f-45bb-4f3d-b579-ecadb3df6468.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-7M-Gen-Llama3_1-70B", + "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6695 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2523 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4607 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/070609d6-5f41-4712-9ad7-e215b1a6bb81.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/070609d6-5f41-4712-9ad7-e215b1a6bb81.json new file mode 100644 index 000000000..e21c1ef5d --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/070609d6-5f41-4712-9ad7-e215b1a6bb81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-7M-Gen-Llama3_1-8B", + "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6132 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json deleted file mode 100644 index 3ca2d0a38..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/1762652579.4905548", - "retrieved_timestamp": "1762652579.490556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131952109292234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077335431381055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json deleted file mode 100644 index dc9c56196..000000000 --- a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/1762652579.490771", - "retrieved_timestamp": "1762652579.490772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6146690780462506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4963813586525743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4061875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3273769946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/8d2909c7-37f2-4198-a1e2-4bf2ebc1444d.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/8d2909c7-37f2-4198-a1e2-4bf2ebc1444d.json new file mode 100644 index 000000000..4f2f9d23d --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/8d2909c7-37f2-4198-a1e2-4bf2ebc1444d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinity-Instruct-7M-Gen-mistral-7B", + "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6147 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4964 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4062 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3274 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/53587959-25f9-43aa-a34b-f274d8bc93af.json b/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/53587959-25f9-43aa-a34b-f274d8bc93af.json new file mode 100644 index 000000000..e76dc81cd --- /dev/null +++ b/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/53587959-25f9-43aa-a34b-f274d8bc93af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BAAI_OPI-Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OPI-Llama-3.1-8B-Instruct", + "id": "BAAI/OPI-Llama-3.1-8B-Instruct", + "developer": "BAAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2075 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3233 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json b/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json deleted file mode 100644 index b23070326..000000000 --- a/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_OPI-Llama-3.1-8B-Instruct/1762652579.490996", - "retrieved_timestamp": "1762652579.490996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/OPI-Llama-3.1-8B-Instruct", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/OPI-Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20745510800232272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551224419497605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21243351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/BEE-spoke-data/Meta-Llama-3-8Bee/2a7f80ed-d404-4c81-b000-b65c83069121.json b/data/hfopenllm_v2/BEE-spoke-data/Meta-Llama-3-8Bee/2a7f80ed-d404-4c81-b000-b65c83069121.json new file mode 100644 index 000000000..3edbb6f6f --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/Meta-Llama-3-8Bee/2a7f80ed-d404-4c81-b000-b65c83069121.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_Meta-Llama-3-8Bee/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-8Bee", + "id": "BEE-spoke-data/Meta-Llama-3-8Bee", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1951 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3654 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.322 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/smol_llama-101M-GQA/f0983645-4adb-4ddb-bf2f-33480cb7f421.json b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-101M-GQA/f0983645-4adb-4ddb-bf2f-33480cb7f421.json new file mode 100644 index 000000000..1bb779c1b --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-101M-GQA/f0983645-4adb-4ddb-bf2f-33480cb7f421.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-101M-GQA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smol_llama-101M-GQA", + "id": "BEE-spoke-data/smol_llama-101M-GQA", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.101 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1384 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1107 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/161dadfe-4983-4f56-8a7d-9b97f1c5a3c7.json b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/161dadfe-4983-4f56-8a7d-9b97f1c5a3c7.json new file mode 100644 index 000000000..d7af3cb33 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/161dadfe-4983-4f56-8a7d-9b97f1c5a3c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smol_llama-220M-GQA-fineweb_edu", + "id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.218 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1988 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2929 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA/694a02f9-4729-4d0b-97ce-80adaef29be2.json b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA/694a02f9-4729-4d0b-97ce-80adaef29be2.json new file mode 100644 index 000000000..aa2c6a93b --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-GQA/694a02f9-4729-4d0b-97ce-80adaef29be2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smol_llama-220M-GQA", + "id": "BEE-spoke-data/smol_llama-220M-GQA", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.218 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2386 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4059 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-openhermes/0521f51d-22c1-4821-8f04-23c533411668.json b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-openhermes/0521f51d-22c1-4821-8f04-23c533411668.json new file mode 100644 index 000000000..7df19fae4 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/smol_llama-220M-openhermes/0521f51d-22c1-4821-8f04-23c533411668.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-openhermes/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smol_llama-220M-openhermes", + "id": "BEE-spoke-data/smol_llama-220M-openhermes", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.218 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json deleted file mode 100644 index cb33ad0b9..000000000 --- a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/1762652579.492853", - "retrieved_timestamp": "1762652579.492853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206735905176042 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137786304497592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43927083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/8fdea71b-5e68-4a78-aefc-8a00650464c4.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/8fdea71b-5e68-4a78-aefc-8a00650464c4.json new file mode 100644 index 000000000..18703f8e9 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/8fdea71b-5e68-4a78-aefc-8a00650464c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", + "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4393 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1237 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json deleted file mode 100644 index d3f25d4a3..000000000 --- a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan/1762652579.492592", - "retrieved_timestamp": "1762652579.492592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan", - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15057713533424646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30280434847620613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1307347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/e2ba5674-9251-4a4e-9eb8-046c834da400.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/e2ba5674-9251-4a4e-9eb8-046c834da400.json new file mode 100644 index 000000000..c330cd864 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/e2ba5674-9251-4a4e-9eb8-046c834da400.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tFINE-900m-e16-d32-flan", + "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1506 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2332 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/4caafdb2-3065-40d4-b5a7-9deb41e1d8a7.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/4caafdb2-3065-40d4-b5a7-9deb41e1d8a7.json new file mode 100644 index 000000000..2509ffeaa --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/4caafdb2-3065-40d4-b5a7-9deb41e1d8a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tFINE-900m-e16-d32-instruct_2e", + "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3135 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1237 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json deleted file mode 100644 index 3322b612b..000000000 --- a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/1762652579.493063", - "retrieved_timestamp": "1762652579.493064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1402855534426433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31345674638809023 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42069791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/886e0b8b-b2dc-434f-a299-50f668006241.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/886e0b8b-b2dc-434f-a299-50f668006241.json new file mode 100644 index 000000000..2061ed658 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/886e0b8b-b2dc-434f-a299-50f668006241.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-instruct-orpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tFINE-900m-instruct-orpo", + "id": "BEE-spoke-data/tFINE-900m-instruct-orpo", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.133 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3022 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json deleted file mode 100644 index 8ae7a04df..000000000 --- a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-instruct-orpo/1762652579.493278", - "retrieved_timestamp": "1762652579.493279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-instruct-orpo", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-instruct-orpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13299157346950535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30220933767045094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json b/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json deleted file mode 100644 index 0636ea4cf..000000000 --- a/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b-instruct/1762652579.493781", - "retrieved_timestamp": "1762652579.493781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BSC-LT/salamandra-7b-instruct", - "developer": "BSC-LT", - "inference_platform": "unknown", - "id": "BSC-LT/salamandra-7b-instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.768 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24507418095098782 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851324290080956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41343749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051861702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/7a6a9443-f331-4dfa-acf9-6aa30049bade.json b/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/7a6a9443-f331-4dfa-acf9-6aa30049bade.json new file mode 100644 index 000000000..83c4bafbf --- /dev/null +++ b/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/7a6a9443-f331-4dfa-acf9-6aa30049bade.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "salamandra-7b-instruct", + "id": "BSC-LT/salamandra-7b-instruct", + "developer": "BSC-LT", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.768 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2451 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4134 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json b/data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json deleted file mode 100644 index d7674d616..000000000 --- a/data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b/1762652579.493503", - "retrieved_timestamp": "1762652579.493503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BSC-LT/salamandra-7b", - "developer": "BSC-LT", - "inference_platform": "unknown", - "id": "BSC-LT/salamandra-7b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.768 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13673829882489574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516612209885983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14926861702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/BSC-LT/salamandra-7b/6d523da4-ec4a-405b-a25d-afc7b1b5aefd.json b/data/hfopenllm_v2/BSC-LT/salamandra-7b/6d523da4-ec4a-405b-a25d-afc7b1b5aefd.json new file mode 100644 index 000000000..c5d56580f --- /dev/null +++ b/data/hfopenllm_v2/BSC-LT/salamandra-7b/6d523da4-ec4a-405b-a25d-afc7b1b5aefd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "salamandra-7b", + "id": "BSC-LT/salamandra-7b", + "developer": "BSC-LT", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.768 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1493 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ba2han/Llama-Phi-3_DoRA/cfecfce3-090d-4c2e-826c-03c0c5337e98.json b/data/hfopenllm_v2/Ba2han/Llama-Phi-3_DoRA/cfecfce3-090d-4c2e-826c-03c0c5337e98.json new file mode 100644 index 000000000..a299a3968 --- /dev/null +++ b/data/hfopenllm_v2/Ba2han/Llama-Phi-3_DoRA/cfecfce3-090d-4c2e-826c-03c0c5337e98.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ba2han_Llama-Phi-3_DoRA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Phi-3_DoRA", + "id": "Ba2han/Llama-Phi-3_DoRA", + "developer": "Ba2han", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5131 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/5aa124dc-4abd-4c5f-b40a-a8d81af922eb.json b/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/5aa124dc-4abd-4c5f-b40a-a8d81af922eb.json new file mode 100644 index 000000000..b23494890 --- /dev/null +++ b/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/5aa124dc-4abd-4c5f-b40a-a8d81af922eb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeTriomphant2.2_ECE_iLAB", + "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", + "developer": "Baptiste-HUVELLE-10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json b/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json deleted file mode 100644 index 925d04173..000000000 --- a/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/1762652579.4943", - "retrieved_timestamp": "1762652579.4943008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", - "developer": "Baptiste-HUVELLE-10", - "inference_platform": "unknown", - "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076330802271307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7256319952414622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39932885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46255208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5851063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json b/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json deleted file mode 100644 index a52b1123b..000000000 --- a/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/1762652579.4945831", - "retrieved_timestamp": "1762652579.494584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", - "developer": "BenevolenceMessiah", - "inference_platform": "unknown", - "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.7 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473499204333391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.727311411382245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5785498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628324468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ec91b122-c8f5-4dfb-94fd-336ef78c3e14.json b/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ec91b122-c8f5-4dfb-94fd-336ef78c3e14.json new file mode 100644 index 000000000..7d796c600 --- /dev/null +++ b/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ec91b122-c8f5-4dfb-94fd-336ef78c3e14.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-72B-2x-Instruct-TIES-v1.0", + "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", + "developer": "BenevolenceMessiah", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.7 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7273 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5628 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/114f246a-6049-40bf-ad86-9a822d13cf74.json b/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/114f246a-6049-40bf-ad86-9a822d13cf74.json new file mode 100644 index 000000000..49b763b11 --- /dev/null +++ b/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/114f246a-6049-40bf-ad86-9a822d13cf74.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", + "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", + "developer": "BenevolenceMessiah", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 28.309 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.268 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json b/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json deleted file mode 100644 index 9adece7da..000000000 --- a/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/1762652579.4948769", - "retrieved_timestamp": "1762652579.494878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", - "developer": "BenevolenceMessiah", - "inference_platform": "unknown", - "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 28.309 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011531624977283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908666248538678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26803523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json b/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json deleted file mode 100644 index d1ab9bfda..000000000 --- a/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Bloslain-8B-v0.2/1762652579.495104", - "retrieved_timestamp": "1762652579.495104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Bloslain-8B-v0.2", - "developer": "BlackBeenie", - "inference_platform": "unknown", - "id": "BlackBeenie/Bloslain-8B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5023371321427147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511087946253543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4075729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3653590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/82d28a3a-44f2-463f-a1b8-7e9079ec47b7.json b/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/82d28a3a-44f2-463f-a1b8-7e9079ec47b7.json new file mode 100644 index 000000000..40f3caa9f --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/82d28a3a-44f2-463f-a1b8-7e9079ec47b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Bloslain-8B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bloslain-8B-v0.2", + "id": "BlackBeenie/Bloslain-8B-v0.2", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5023 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4076 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3654 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json deleted file mode 100644 index 189742bf4..000000000 --- a/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/1762652579.495378", - "retrieved_timestamp": "1762652579.495378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", - "developer": "BlackBeenie", - "inference_platform": "unknown", - "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5124037553690873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4787448361604986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36181250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34915226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/ed3c1349-a154-4866-890f-2b115ffaf127.json b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/ed3c1349-a154-4866-890f-2b115ffaf127.json new file mode 100644 index 000000000..ce32e3e07 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/ed3c1349-a154-4866-890f-2b115ffaf127.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-OpenO1-SFT-v0.1", + "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4787 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3492 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/47942c55-5ddb-4fda-9c5b-34676ae2046a.json b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/47942c55-5ddb-4fda-9c5b-34676ae2046a.json new file mode 100644 index 000000000..3447f8e90 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/47942c55-5ddb-4fda-9c5b-34676ae2046a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-pythonic-passthrough-merge", + "id": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 20.245 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3454 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1332 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Neos-Gemma-2-9b/d860210b-4c8a-4d15-ad3a-4e39905f91ed.json b/data/hfopenllm_v2/BlackBeenie/Neos-Gemma-2-9b/d860210b-4c8a-4d15-ad3a-4e39905f91ed.json new file mode 100644 index 000000000..7ae80464f --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Neos-Gemma-2-9b/d860210b-4c8a-4d15-ad3a-4e39905f91ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Gemma-2-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neos-Gemma-2-9b", + "id": "BlackBeenie/Neos-Gemma-2-9b", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5876 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5503 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-8B/d137f429-2b65-4ee9-9d66-3f619b270fad.json b/data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-8B/d137f429-2b65-4ee9-9d66-3f619b270fad.json new file mode 100644 index 000000000..d7f22bbd1 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-8B/d137f429-2b65-4ee9-9d66-3f619b270fad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neos-Llama-3.1-8B", + "id": "BlackBeenie/Neos-Llama-3.1-8B", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4944 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4425 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-base/1da10dfe-b0a3-4cb8-aaa3-e16d48f3aab4.json b/data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-base/1da10dfe-b0a3-4cb8-aaa3-e16d48f3aab4.json new file mode 100644 index 000000000..4db1b8828 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Neos-Llama-3.1-base/1da10dfe-b0a3-4cb8-aaa3-e16d48f3aab4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neos-Llama-3.1-base", + "id": "BlackBeenie/Neos-Llama-3.1-base", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.65 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Neos-Phi-3-14B-v0.1/6156a0d2-4c32-40b2-9624-ef0c7a6a95bb.json b/data/hfopenllm_v2/BlackBeenie/Neos-Phi-3-14B-v0.1/6156a0d2-4c32-40b2-9624-ef0c7a6a95bb.json new file mode 100644 index 000000000..5db8afdb2 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Neos-Phi-3-14B-v0.1/6156a0d2-4c32-40b2-9624-ef0c7a6a95bb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Phi-3-14B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neos-Phi-3-14B-v0.1", + "id": "BlackBeenie/Neos-Phi-3-14B-v0.1", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6212 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/llama-3-luminous-merged/676342d2-f37a-4b6a-967d-3ac750243470.json b/data/hfopenllm_v2/BlackBeenie/llama-3-luminous-merged/676342d2-f37a-4b6a-967d-3ac750243470.json new file mode 100644 index 000000000..2c979d2a9 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/llama-3-luminous-merged/676342d2-f37a-4b6a-967d-3ac750243470.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3-luminous-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-luminous-merged", + "id": "BlackBeenie/llama-3-luminous-merged", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4149 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3773 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/950b7108-0192-4875-b4e9-c3e43ab71e08.json b/data/hfopenllm_v2/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/950b7108-0192-4875-b4e9-c3e43ab71e08.json new file mode 100644 index 000000000..776b081c0 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/950b7108-0192-4875-b4e9-c3e43ab71e08.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3.1-8B-Galore-openassistant-guanaco", + "id": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2635 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5213 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4406 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/85672df5-2f35-43be-8648-9937c66872dc.json b/data/hfopenllm_v2/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/85672df5-2f35-43be-8648-9937c66872dc.json new file mode 100644 index 000000000..2f5bfbf50 --- /dev/null +++ b/data/hfopenllm_v2/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/85672df5-2f35-43be-8648-9937c66872dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3.2-Korean-Bllossom-AICA-5B", + "id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", + "developer": "Bllossom", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 5.199 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5172 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/DreadMix/051c5642-3b23-4879-9d10-639d1b3127d7.json b/data/hfopenllm_v2/BoltMonkey/DreadMix/051c5642-3b23-4879-9d10-639d1b3127d7.json new file mode 100644 index 000000000..7ccd4a48e --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/DreadMix/051c5642-3b23-4879-9d10-639d1b3127d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BoltMonkey_DreadMix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadMix", + "id": "BoltMonkey/DreadMix", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7095 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json b/data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json deleted file mode 100644 index 94510270e..000000000 --- a/data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BoltMonkey_DreadMix/1762652579.497959", - "retrieved_timestamp": "1762652579.497961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BoltMonkey/DreadMix", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/DreadMix", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7094908176970438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435097438362475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37898936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/2acf0d12-7e0c-46dc-a079-ebc48a8818d3.json b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/2acf0d12-7e0c-46dc-a079-ebc48a8818d3.json new file mode 100644 index 000000000..7730b26d1 --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/2acf0d12-7e0c-46dc-a079-ebc48a8818d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7999 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5152 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/8ce42090-006e-4e08-8d3f-5b1eb0b8da0b.json b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/8ce42090-006e-4e08-8d3f-5b1eb0b8da0b.json new file mode 100644 index 000000000..0ea3c09c9 --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/8ce42090-006e-4e08-8d3f-5b1eb0b8da0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.459 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4083 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json deleted file mode 100644 index 0375cdb01..000000000 --- a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498452", - "retrieved_timestamp": "1762652579.498454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7998909559967553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151987922850448 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37333776595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json deleted file mode 100644 index a2cb9def6..000000000 --- a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498964", - "retrieved_timestamp": "1762652579.498965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45902316963434797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5185441912447182 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4082604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json b/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json deleted file mode 100644 index c04530c2b..000000000 --- a/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BoltMonkey_SuperNeuralDreadDevil-8b/1762652579.499188", - "retrieved_timestamp": "1762652579.499189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BoltMonkey/SuperNeuralDreadDevil-8b", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/SuperNeuralDreadDevil-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7709898624538447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286196012035721 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/703df6c3-dae4-437f-9379-f8c264797adc.json b/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/703df6c3-dae4-437f-9379-f8c264797adc.json new file mode 100644 index 000000000..00d308164 --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/703df6c3-dae4-437f-9379-f8c264797adc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BoltMonkey_SuperNeuralDreadDevil-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperNeuralDreadDevil-8b", + "id": "BoltMonkey/SuperNeuralDreadDevil-8b", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5286 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BrainWave-ML/llama3.2-3B-maths-orpo/1e349ad3-d29b-4a4b-97e7-b82055e41b07.json b/data/hfopenllm_v2/BrainWave-ML/llama3.2-3B-maths-orpo/1e349ad3-d29b-4a4b-97e7-b82055e41b07.json new file mode 100644 index 000000000..4d4697ad4 --- /dev/null +++ b/data/hfopenllm_v2/BrainWave-ML/llama3.2-3B-maths-orpo/1e349ad3-d29b-4a4b-97e7-b82055e41b07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BrainWave-ML_llama3.2-3B-maths-orpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3.2-3B-maths-orpo", + "id": "BrainWave-ML/llama3.2-3B-maths-orpo", + "developer": "BrainWave-ML", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/8f677a76-932c-4c35-9708-4b723226aa19.json b/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/8f677a76-932c-4c35-9708-4b723226aa19.json new file mode 100644 index 000000000..876af8a9c --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/8f677a76-932c-4c35-9708-4b723226aa19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BramVanroy_GEITje-7B-ultra/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GEITje-7B-ultra", + "id": "BramVanroy/GEITje-7B-ultra", + "developer": "BramVanroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2011 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json b/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json deleted file mode 100644 index 34227e08b..000000000 --- a/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BramVanroy_GEITje-7B-ultra/1762652579.499682", - "retrieved_timestamp": "1762652579.4996831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BramVanroy/GEITje-7B-ultra", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/GEITje-7B-ultra", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723442687624392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37761612997305494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32897916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20113031914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2-chat/ebfe625f-ff1f-45f9-826c-9351ea4134e1.json b/data/hfopenllm_v2/BramVanroy/fietje-2-chat/ebfe625f-ff1f-45f9-826c-9351ea4134e1.json new file mode 100644 index 000000000..3c12f59b5 --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/fietje-2-chat/ebfe625f-ff1f-45f9-826c-9351ea4134e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fietje-2-chat", + "id": "BramVanroy/fietje-2-chat", + "developer": "BramVanroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2917 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2055 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json b/data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json deleted file mode 100644 index 4e5c37ecb..000000000 --- a/data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-chat/1762652579.500146", - "retrieved_timestamp": "1762652579.5001469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BramVanroy/fietje-2-chat", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/fietje-2-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2917359273394593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149753717401999 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20545212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json b/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json deleted file mode 100644 index c9a14a26e..000000000 --- a/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-instruct/1762652579.500353", - "retrieved_timestamp": "1762652579.500354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BramVanroy/fietje-2-instruct", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/fietje-2-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2789963962286732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41360714173029806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2103557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/66e6a757-ac22-47f3-82ce-81af45e1d3cf.json b/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/66e6a757-ac22-47f3-82ce-81af45e1d3cf.json new file mode 100644 index 000000000..667f588bf --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/66e6a757-ac22-47f3-82ce-81af45e1d3cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fietje-2-instruct", + "id": "BramVanroy/fietje-2-instruct", + "developer": "BramVanroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2332 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2104 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2/1cd840c7-d432-495c-a3df-af1fa6264259.json b/data/hfopenllm_v2/BramVanroy/fietje-2/1cd840c7-d432-495c-a3df-af1fa6264259.json new file mode 100644 index 000000000..9d714f7c1 --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/fietje-2/1cd840c7-d432-495c-a3df-af1fa6264259.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fietje-2", + "id": "BramVanroy/fietje-2", + "developer": "BramVanroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2098 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1986 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json b/data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json deleted file mode 100644 index 2603bb106..000000000 --- a/data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2/1762652579.499938", - "retrieved_timestamp": "1762652579.499939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BramVanroy/fietje-2", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/fietje-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20980332185268422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40356695178386187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19855385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-base/066f520f-9a64-4564-abfc-6435732c3585.json b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-base/066f520f-9a64-4564-abfc-6435732c3585.json new file mode 100644 index 000000000..72e310100 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-base/066f520f-9a64-4564-abfc-6435732c3585.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-PLLuM-8B-base", + "id": "CYFRAGOVPL/Llama-PLLuM-8B-base", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2899 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.397 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2757 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/aced5181-040a-48c0-bc5f-78d0de3afae8.json b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/aced5181-040a-48c0-bc5f-78d0de3afae8.json new file mode 100644 index 000000000..8c1a7d263 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/aced5181-040a-48c0-bc5f-78d0de3afae8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-PLLuM-8B-chat", + "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json deleted file mode 100644 index 4868734ae..000000000 --- a/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-chat/1762652579.5008068", - "retrieved_timestamp": "1762652579.500808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CYFRAGOVPL/Llama-PLLuM-8B-chat", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3514862786295917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40770722535589576 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41991666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27194148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json deleted file mode 100644 index 3838a9961..000000000 --- a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-base/1762652579.501051", - "retrieved_timestamp": "1762652579.501052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-base", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820937335159599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390596143784447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4142395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2740192819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/a4889a38-84d2-4ae1-b8a9-297b4400602d.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/a4889a38-84d2-4ae1-b8a9-297b4400602d.json new file mode 100644 index 000000000..ec8ff80a8 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/a4889a38-84d2-4ae1-b8a9-297b4400602d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PLLuM-12B-base", + "id": "CYFRAGOVPL/PLLuM-12B-base", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4142 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.274 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json deleted file mode 100644 index 822d066ac..000000000 --- a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-chat/1762652579.501271", - "retrieved_timestamp": "1762652579.501272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-chat", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32143601200370575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44458000333075703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4114791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2872340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/d540505a-c67b-4b72-a53a-c03aa6f8d3e7.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/d540505a-c67b-4b72-a53a-c03aa6f8d3e7.json new file mode 100644 index 000000000..b607ed69c --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/d540505a-c67b-4b72-a53a-c03aa6f8d3e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PLLuM-12B-chat", + "id": "CYFRAGOVPL/PLLuM-12B-chat", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/9859afee-02ca-4c48-acc8-acfd20c37e4e.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/9859afee-02ca-4c48-acc8-acfd20c37e4e.json new file mode 100644 index 000000000..d200117b7 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/9859afee-02ca-4c48-acc8-acfd20c37e4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PLLuM-12B-nc-base", + "id": "CYFRAGOVPL/PLLuM-12B-nc-base", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2405 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3645 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json deleted file mode 100644 index d1f838bb0..000000000 --- a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-base/1762652579.501493", - "retrieved_timestamp": "1762652579.501494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-nc-base", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-nc-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24045310886226323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42767589675970014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36451041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25590093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/e222d12b-c796-4890-a584-cd689bae7ea6.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/e222d12b-c796-4890-a584-cd689bae7ea6.json new file mode 100644 index 000000000..2eaeab787 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/e222d12b-c796-4890-a584-cd689bae7ea6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PLLuM-12B-nc-chat", + "id": "CYFRAGOVPL/PLLuM-12B-nc-chat", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4576 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json deleted file mode 100644 index 96541c70c..000000000 --- a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-chat/1762652579.501705", - "retrieved_timestamp": "1762652579.501706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-nc-chat", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-nc-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28344237733657807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45764328318815456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25972406914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json deleted file mode 100644 index 31f534951..000000000 --- a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/1762652579.5021691", - "retrieved_timestamp": "1762652579.50217", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", - "developer": "CarrotAI", - "inference_platform": "unknown", - "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47818233398493776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43577246498246686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31341422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/c16850f8-0b80-4455-8f38-8ec453cd1d41.json b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/c16850f8-0b80-4455-8f38-8ec453cd1d41.json new file mode 100644 index 000000000..49e80d94c --- /dev/null +++ b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/c16850f8-0b80-4455-8f38-8ec453cd1d41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-Rabbit-Ko-3B-Instruct-2412", + "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", + "developer": "CarrotAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4782 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/0d400b0f-cc82-4c86-b600-93a31b133f9d.json b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/0d400b0f-cc82-4c86-b600-93a31b133f9d.json new file mode 100644 index 000000000..727b90c2f --- /dev/null +++ b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/0d400b0f-cc82-4c86-b600-93a31b133f9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-Rabbit-Ko-3B-Instruct", + "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", + "developer": "CarrotAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4427 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2822 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json deleted file mode 100644 index fc8b80948..000000000 --- a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/1762652579.501917", - "retrieved_timestamp": "1762652579.5019178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", - "developer": "CarrotAI", - "inference_platform": "unknown", - "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7198821349574684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4426719080820793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2822473404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/90f6f8f1-02fc-425a-8499-e9b43ae8ac59.json b/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/90f6f8f1-02fc-425a-8499-e9b43ae8ac59.json new file mode 100644 index 000000000..ea628405b --- /dev/null +++ b/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/90f6f8f1-02fc-425a-8499-e9b43ae8ac59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Umbral-Mind-RP-v2.0-8B", + "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", + "developer": "Casual-Autopsy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7123 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json b/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json deleted file mode 100644 index f85e8a505..000000000 --- a/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/1762652579.502389", - "retrieved_timestamp": "1762652579.502389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", - "developer": "Casual-Autopsy", - "inference_platform": "unknown", - "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7122634609502786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262406145493724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/CausalLM/14B/6704d6bc-6d38-4c59-87a4-81d3eacde3b1.json b/data/hfopenllm_v2/CausalLM/14B/6704d6bc-6d38-4c59-87a4-81d3eacde3b1.json new file mode 100644 index 000000000..aaa13fb97 --- /dev/null +++ b/data/hfopenllm_v2/CausalLM/14B/6704d6bc-6d38-4c59-87a4-81d3eacde3b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CausalLM_14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "14B", + "id": "CausalLM/14B", + "developer": "CausalLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2788 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.47 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json b/data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json deleted file mode 100644 index 3613dbfcf..000000000 --- a/data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CausalLM_14B/1762652579.502646", - "retrieved_timestamp": "1762652579.502647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CausalLM/14B", - "developer": "CausalLM", - "inference_platform": "unknown", - "id": "CausalLM/14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2788213052478535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700462397700626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4154791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221409574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json b/data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json deleted file mode 100644 index 1e9f0e336..000000000 --- a/data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CausalLM_34b-beta/1762652579.502916", - "retrieved_timestamp": "1762652579.502916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CausalLM/34b-beta", - "developer": "CausalLM", - "inference_platform": "unknown", - "id": "CausalLM/34b-beta", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3043247472262486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5590996102136266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37486458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/CausalLM/34b-beta/e8ad6ce4-7efc-499e-a2c9-9e0df898fbb9.json b/data/hfopenllm_v2/CausalLM/34b-beta/e8ad6ce4-7efc-499e-a2c9-9e0df898fbb9.json new file mode 100644 index 000000000..19e23bb71 --- /dev/null +++ b/data/hfopenllm_v2/CausalLM/34b-beta/e8ad6ce4-7efc-499e-a2c9-9e0df898fbb9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CausalLM_34b-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "34b-beta", + "id": "CausalLM/34b-beta", + "developer": "CausalLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5591 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3749 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5325 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CausalLM/preview-1-hf/5e9c1273-536d-4280-8fff-9931f46dc968.json b/data/hfopenllm_v2/CausalLM/preview-1-hf/5e9c1273-536d-4280-8fff-9931f46dc968.json new file mode 100644 index 000000000..571a2c125 --- /dev/null +++ b/data/hfopenllm_v2/CausalLM/preview-1-hf/5e9c1273-536d-4280-8fff-9931f46dc968.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CausalLM_preview-1-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "preview-1-hf", + "id": "CausalLM/preview-1-hf", + "developer": "CausalLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5559 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json b/data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json deleted file mode 100644 index 621589078..000000000 --- a/data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CausalLM_preview-1-hf/1762652579.503128", - "retrieved_timestamp": "1762652579.503129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CausalLM/preview-1-hf", - "developer": "CausalLM", - "inference_platform": "unknown", - "id": "CausalLM/preview-1-hf", - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558928088582737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614567463880903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34218750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35970744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/460ca160-ac34-4091-ba2d-986b53532b55.json b/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/460ca160-ac34-4091-ba2d-986b53532b55.json new file mode 100644 index 000000000..c087ab2dc --- /dev/null +++ b/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/460ca160-ac34-4091-ba2d-986b53532b55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Changgil_K2S3-14b-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "K2S3-14b-v0.2", + "id": "Changgil/K2S3-14b-v0.2", + "developer": "Changgil", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 14.352 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3243 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4613 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3923 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json b/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json deleted file mode 100644 index 3d9c9169c..000000000 --- a/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Changgil_K2S3-14b-v0.2/1762652579.503338", - "retrieved_timestamp": "1762652579.503339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Changgil/K2S3-14b-v0.2", - "developer": "Changgil", - "inference_platform": "unknown", - "id": "Changgil/K2S3-14b-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 14.352 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3242840108689389 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4613311786298187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3922604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2643783244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json b/data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json deleted file mode 100644 index ce8452ec7..000000000 --- a/data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Changgil_K2S3-v0.1/1762652579.503593", - "retrieved_timestamp": "1762652579.503594", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Changgil/K2S3-v0.1", - "developer": "Changgil", - "inference_platform": "unknown", - "id": "Changgil/K2S3-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 14.352 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32765617450586665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46554920672286154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40140624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2562333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/Changgil/K2S3-v0.1/ef9d2fab-07a2-44e2-aae2-ede5a2ff31d9.json b/data/hfopenllm_v2/Changgil/K2S3-v0.1/ef9d2fab-07a2-44e2-aae2-ede5a2ff31d9.json new file mode 100644 index 000000000..efefe2dc0 --- /dev/null +++ b/data/hfopenllm_v2/Changgil/K2S3-v0.1/ef9d2fab-07a2-44e2-aae2-ede5a2ff31d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Changgil_K2S3-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "K2S3-v0.1", + "id": "Changgil/K2S3-v0.1", + "developer": "Changgil", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 14.352 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4014 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json b/data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json deleted file mode 100644 index 5683de60d..000000000 --- a/data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_Albacus/1762652579.503804", - "retrieved_timestamp": "1762652579.503805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ClaudioItaly/Albacus", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/Albacus", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4667415790103592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5113043406568835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41353124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31648936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/ClaudioItaly/Albacus/a29a69d3-d64e-4463-aa52-0a9d6d012c98.json b/data/hfopenllm_v2/ClaudioItaly/Albacus/a29a69d3-d64e-4463-aa52-0a9d6d012c98.json new file mode 100644 index 000000000..8b6f83423 --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/Albacus/a29a69d3-d64e-4463-aa52-0a9d6d012c98.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_Albacus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Albacus", + "id": "ClaudioItaly/Albacus", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5113 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/4539c16e-1ac6-47f4-88eb-a09842497330.json b/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/4539c16e-1ac6-47f4-88eb-a09842497330.json new file mode 100644 index 000000000..c0c13e7aa --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/4539c16e-1ac6-47f4-88eb-a09842497330.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_Book-Gut12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Book-Gut12B", + "id": "ClaudioItaly/Book-Gut12B", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5417 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4635 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json b/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json deleted file mode 100644 index bfb375ac3..000000000 --- a/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_Book-Gut12B/1762652579.504094", - "retrieved_timestamp": "1762652579.504095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ClaudioItaly/Book-Gut12B", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/Book-Gut12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39984685080032095 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417370194443233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4635416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670212765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/2ff33c55-1236-4c57-8809-2d3076e43cc7.json b/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/2ff33c55-1236-4c57-8809-2d3076e43cc7.json new file mode 100644 index 000000000..da6fa6915 --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/2ff33c55-1236-4c57-8809-2d3076e43cc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_Evolutionstory-7B-v2.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Evolutionstory-7B-v2.2", + "id": "ClaudioItaly/Evolutionstory-7B-v2.2", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5108 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json b/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json deleted file mode 100644 index 3891f95b9..000000000 --- a/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_Evolutionstory-7B-v2.2/1762652579.504309", - "retrieved_timestamp": "1762652579.504309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ClaudioItaly/Evolutionstory-7B-v2.2", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/Evolutionstory-7B-v2.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813794066410457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5108043406568835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41353124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31590757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/281ba822-49a2-4746-bc04-8de046439508.json b/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/281ba822-49a2-4746-bc04-8de046439508.json new file mode 100644 index 000000000..5635777d0 --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/281ba822-49a2-4746-bc04-8de046439508.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_intelligence-cod-rag-7b-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "intelligence-cod-rag-7b-v3", + "id": "ClaudioItaly/intelligence-cod-rag-7b-v3", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6898 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4153 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4195 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json b/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json deleted file mode 100644 index 035e58fa4..000000000 --- a/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_intelligence-cod-rag-7b-v3/1762652579.504531", - "retrieved_timestamp": "1762652579.504531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ClaudioItaly/intelligence-cod-rag-7b-v3", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/intelligence-cod-rag-7b-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6897820006471718 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339718839108 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4152708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4195478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/aya-23-35B/0606d916-95ea-4318-af0c-3942329071c6.json b/data/hfopenllm_v2/CohereForAI/aya-23-35B/0606d916-95ea-4318-af0c-3942329071c6.json new file mode 100644 index 000000000..3eb7bd011 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-23-35B/0606d916-95ea-4318-af0c-3942329071c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-35B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aya-23-35B", + "id": "CohereForAI/aya-23-35B", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 34.981 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json b/data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json deleted file mode 100644 index 0320eee2d..000000000 --- a/data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-35B/1762652579.5047522", - "retrieved_timestamp": "1762652579.5047529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/aya-23-35B", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-23-35B", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 34.981 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6461932117891638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399551450731271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33560505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/aya-23-8B/005159f0-da68-480d-972c-c160d145a682.json b/data/hfopenllm_v2/CohereForAI/aya-23-8B/005159f0-da68-480d-972c-c160d145a682.json new file mode 100644 index 000000000..ff20417ff --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-23-8B/005159f0-da68-480d-972c-c160d145a682.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aya-23-8B", + "id": "CohereForAI/aya-23-8B", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 8.028 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4699 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4296 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3941 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json b/data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json deleted file mode 100644 index 1dbc154e0..000000000 --- a/data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-8B/1762652579.5050838", - "retrieved_timestamp": "1762652579.505085", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/aya-23-8B", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-23-8B", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 8.028 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4698887839820565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296161519220307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3940625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2278091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/2f6abb5d-52b3-44b0-b960-115793485fb1.json b/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/2f6abb5d-52b3-44b0-b960-115793485fb1.json new file mode 100644 index 000000000..398bb4dca --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/2f6abb5d-52b3-44b0-b960-115793485fb1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-32b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aya-expanse-32b", + "id": "CohereForAI/aya-expanse-32b", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 32.296 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5649 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json b/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json deleted file mode 100644 index be4b6d379..000000000 --- a/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-32b/1762652579.505483", - "retrieved_timestamp": "1762652579.505484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/aya-expanse-32b", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-expanse-32b", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 32.296 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7301737168490716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648670099212114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41298204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json b/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json deleted file mode 100644 index 08555721e..000000000 --- a/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-8b/1762652579.505729", - "retrieved_timestamp": "1762652579.5057302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/aya-expanse-8b", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-expanse-8b", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 8.028 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358517622131501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977203055736406 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37288541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/6ffacad9-1a4d-472e-bbbf-0d64d068dd0d.json b/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/6ffacad9-1a4d-472e-bbbf-0d64d068dd0d.json new file mode 100644 index 000000000..724fe03bd --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/6ffacad9-1a4d-472e-bbbf-0d64d068dd0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aya-expanse-8b", + "id": "CohereForAI/aya-expanse-8b", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 8.028 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6359 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4977 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3729 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/26eadaf8-bfb8-4aad-a8a4-90699b6f0fcd.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/26eadaf8-bfb8-4aad-a8a4-90699b6f0fcd.json new file mode 100644 index 000000000..294a861d5 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/26eadaf8-bfb8-4aad-a8a4-90699b6f0fcd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus-08-2024/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "c4ai-command-r-plus-08-2024", + "id": "CohereForAI/c4ai-command-r-plus-08-2024", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 103.811 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5996 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4829 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json deleted file mode 100644 index f359c2b4b..000000000 --- a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus-08-2024/1762652579.506166", - "retrieved_timestamp": "1762652579.506167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r-plus-08-2024", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r-plus-08-2024", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 103.811 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7539539532883859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995999913027185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48294791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44207114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json deleted file mode 100644 index 2701b0ba7..000000000 --- a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus/1762652579.50595", - "retrieved_timestamp": "1762652579.505951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r-plus", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r-plus", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 103.811 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7664186580495308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.581542357407793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48071875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991855053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/d4536913-5708-45e4-a024-45ae37fdae13.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/d4536913-5708-45e4-a024-45ae37fdae13.json new file mode 100644 index 000000000..e568ee283 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/d4536913-5708-45e4-a024-45ae37fdae13.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "c4ai-command-r-plus", + "id": "CohereForAI/c4ai-command-r-plus", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 103.811 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5815 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3992 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/848860aa-7de3-4fae-afca-ac11224b96c5.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/848860aa-7de3-4fae-afca-ac11224b96c5.json new file mode 100644 index 000000000..5b2083324 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/848860aa-7de3-4fae-afca-ac11224b96c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-v01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "c4ai-command-r-v01", + "id": "CohereForAI/c4ai-command-r-v01", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 34.981 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6748 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json deleted file mode 100644 index 6fac83b95..000000000 --- a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-v01/1762652579.506387", - "retrieved_timestamp": "1762652579.506388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r-v01", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r-v01", - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 34.981 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6748194789824333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406415512767856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45169791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/0241a8e3-d6e5-4ba5-afb9-862bde2ba851.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/0241a8e3-d6e5-4ba5-afb9-862bde2ba851.json new file mode 100644 index 000000000..0cc392d32 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/0241a8e3-d6e5-4ba5-afb9-862bde2ba851.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r7b-12-2024/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "c4ai-command-r7b-12-2024", + "id": "CohereForAI/c4ai-command-r7b-12-2024", + "developer": "CohereForAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Cohere2ForCausalLM", + "params_billions": 8.028 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7713 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5503 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2991 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json deleted file mode 100644 index d9188d83d..000000000 --- a/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r7b-12-2024/1762652579.5066051", - "retrieved_timestamp": "1762652579.506606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r7b-12-2024", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r7b-12-2024", - "additional_details": { - "precision": "bfloat16", - "architecture": "Cohere2ForCausalLM", - "params_billions": 8.028 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7713145564878965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5502642151855635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990936555891239 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41251041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3572140957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/20b69120-d476-4e34-b3c6-8cef11d6ee78.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/20b69120-d476-4e34-b3c6-8cef11d6ee78.json new file mode 100644 index 000000000..047b3a3d1 --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/20b69120-d476-4e34-b3c6-8cef11d6ee78.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-Gemma-2b-dpo-v1.0", + "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3102 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1665 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/696bbbfc-49dd-444e-a90b-76821845a726.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/696bbbfc-49dd-444e-a90b-76821845a726.json new file mode 100644 index 000000000..9deccf82b --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/696bbbfc-49dd-444e-a90b-76821845a726.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-Gemma-2b-dpo-v1.0", + "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3278 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json deleted file mode 100644 index 2451e9bd5..000000000 --- a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.506829", - "retrieved_timestamp": "1762652579.50683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3278312654866864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39199563613207467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41201041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16655585106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json deleted file mode 100644 index 1747da663..000000000 --- a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.507083", - "retrieved_timestamp": "1762652579.507083", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3102457036219453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38810309159554507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4080729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16647273936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/e6d974d3-467e-4fe7-bd84-79fc7c72cde2.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/e6d974d3-467e-4fe7-bd84-79fc7c72cde2.json new file mode 100644 index 000000000..7b19f887e --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/e6d974d3-467e-4fe7-bd84-79fc7c72cde2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-Gemma-2b-odpo-v1.0", + "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1692 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json deleted file mode 100644 index d874b8ff0..000000000 --- a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-sft-v1.0/1762652579.507553", - "retrieved_timestamp": "1762652579.507553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692469314751526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.387877927616119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17819148936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/b26ba2b7-1365-4b1c-a1be-35d588e02d36.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/b26ba2b7-1365-4b1c-a1be-35d588e02d36.json new file mode 100644 index 000000000..2d900597c --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/b26ba2b7-1365-4b1c-a1be-35d588e02d36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-sft-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-Gemma-2b-sft-v1.0", + "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3692 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4027 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/64bd755d-ba4b-4559-ad8e-f56c697b1ae6.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/64bd755d-ba4b-4559-ad8e-f56c697b1ae6.json new file mode 100644 index 000000000..f22d9e63b --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/64bd755d-ba4b-4559-ad8e-f56c697b1ae6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-LLaMA-3-8b-dpo-v1.0", + "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4957 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4097 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3219 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json deleted file mode 100644 index 73a9575d0..000000000 --- a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/1762652579.5077918", - "retrieved_timestamp": "1762652579.507793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4957424079220912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5028481044452986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40971874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3218916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c4e572cb-1d12-4baf-a4d8-a55422692207.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c4e572cb-1d12-4baf-a4d8-a55422692207.json new file mode 100644 index 000000000..6e2795510 --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c4e572cb-1d12-4baf-a4d8-a55422692207.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-LLaMA-3-8b-odpo-v1.0", + "id": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4057 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json deleted file mode 100644 index 8dc809929..000000000 --- a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/1762652579.5082712", - "retrieved_timestamp": "1762652579.5082722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171163623629745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087766443418147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45027083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32372007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/c6123e10-b1f9-49dc-888b-083881e6ef09.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/c6123e10-b1f9-49dc-888b-083881e6ef09.json new file mode 100644 index 000000000..1377d56ef --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/c6123e10-b1f9-49dc-888b-083881e6ef09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LION-LLaMA-3-8b-sft-v1.0", + "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3237 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/e1647f10-fec5-463d-b8e5-6b2b880bd687.json b/data/hfopenllm_v2/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/e1647f10-fec5-463d-b8e5-6b2b880bd687.json new file mode 100644 index 000000000..739ac6a43 --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/e1647f10-fec5-463d-b8e5-6b2b880bd687.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", + "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.824 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/6d5fa235-8d69-456e-9f23-0f702760baf4.json b/data/hfopenllm_v2/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/6d5fa235-8d69-456e-9f23-0f702760baf4.json new file mode 100644 index 000000000..e1668a135 --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/6d5fa235-8d69-456e-9f23-0f702760baf4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", + "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4932 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json b/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json deleted file mode 100644 index e80a271dd..000000000 --- a/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CombinHorizon_YiSM-blossom5.1-34B-SLERP/1762652579.508977", - "retrieved_timestamp": "1762652579.508977", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", - "developer": "CombinHorizon", - "inference_platform": "unknown", - "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033112142448702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6207548093635428 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44134375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4740691489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/e8709a6a-a2b8-4b09-9342-d1aeae89de1f.json b/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/e8709a6a-a2b8-4b09-9342-d1aeae89de1f.json new file mode 100644 index 000000000..0da86caa5 --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/e8709a6a-a2b8-4b09-9342-d1aeae89de1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CombinHorizon_YiSM-blossom5.1-34B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YiSM-blossom5.1-34B-SLERP", + "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6208 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4741 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/603e95c9-7e7f-4892-93f7-92f92b256865.json b/data/hfopenllm_v2/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/603e95c9-7e7f-4892-93f7-92f92b256865.json new file mode 100644 index 000000000..f4ca1a794 --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/603e95c9-7e7f-4892-93f7-92f92b256865.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", + "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8206 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6929 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5721 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/3e2fd38a-186e-49aa-915c-7eb3cde50562.json b/data/hfopenllm_v2/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/3e2fd38a-186e-49aa-915c-7eb3cde50562.json new file mode 100644 index 000000000..859925816 --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/3e2fd38a-186e-49aa-915c-7eb3cde50562.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", + "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8176 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6336 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.491 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/16d55e66-9015-4d72-81e4-3f14c42b0368.json b/data/hfopenllm_v2/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/16d55e66-9015-4d72-81e4-3f14c42b0368.json new file mode 100644 index 000000000..f7f8dee1c --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/16d55e66-9015-4d72-81e4-3f14c42b0368.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", + "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5685 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json deleted file mode 100644 index caad52e43..000000000 --- a/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-3B-CoT-012025/1762652579.509939", - "retrieved_timestamp": "1762652579.509939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ContactDoctor/Bio-Medical-3B-CoT-012025", - "developer": "ContactDoctor", - "inference_platform": "unknown", - "id": "ContactDoctor/Bio-Medical-3B-CoT-012025", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.360379349016166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438315337642466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2933843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/696644b9-bd40-4047-bb85-0cb19510a96c.json b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/696644b9-bd40-4047-bb85-0cb19510a96c.json new file mode 100644 index 000000000..ea7e83334 --- /dev/null +++ b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/696644b9-bd40-4047-bb85-0cb19510a96c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-3B-CoT-012025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bio-Medical-3B-CoT-012025", + "id": "ContactDoctor/Bio-Medical-3B-CoT-012025", + "developer": "ContactDoctor", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2934 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ContactDoctor/Bio-Medical-Llama-3-8B/cbae8c39-0aec-4859-98bc-3b2d065833ad.json b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-Llama-3-8B/cbae8c39-0aec-4859-98bc-3b2d065833ad.json new file mode 100644 index 000000000..0ff3b9020 --- /dev/null +++ b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-Llama-3-8B/cbae8c39-0aec-4859-98bc-3b2d065833ad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bio-Medical-Llama-3-8B", + "id": "ContactDoctor/Bio-Medical-Llama-3-8B", + "developer": "ContactDoctor", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4863 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge2/15fb3cc7-1ba5-4ba5-ba02-8e8a9d2029d0.json b/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge2/15fb3cc7-1ba5-4ba5-ba02-8e8a9d2029d0.json new file mode 100644 index 000000000..23736286b --- /dev/null +++ b/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge2/15fb3cc7-1ba5-4ba5-ba02-8e8a9d2029d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-0.5B-Abyme-merge2", + "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2", + "developer": "CoolSpring", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2994 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1489 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge3/357f6051-b880-48bb-8e68-e4b0a7a0cbcc.json b/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge3/357f6051-b880-48bb-8e68-e4b0a7a0cbcc.json new file mode 100644 index 000000000..a155e6c18 --- /dev/null +++ b/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme-merge3/357f6051-b880-48bb-8e68-e4b0a7a0cbcc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-0.5B-Abyme-merge3", + "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3", + "developer": "CoolSpring", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2386 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.15 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme/a50a542b-668e-47b1-a37e-805a58eea3d1.json b/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme/a50a542b-668e-47b1-a37e-805a58eea3d1.json new file mode 100644 index 000000000..43ce6bc2b --- /dev/null +++ b/data/hfopenllm_v2/CoolSpring/Qwen2-0.5B-Abyme/a50a542b-668e-47b1-a37e-805a58eea3d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-0.5B-Abyme", + "id": "CoolSpring/Qwen2-0.5B-Abyme", + "developer": "CoolSpring", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1915 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2862 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1333 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Corianas/Neural-Mistral-7B/00f7bd51-0b31-446d-be8c-1e0dc0d82e54.json b/data/hfopenllm_v2/Corianas/Neural-Mistral-7B/00f7bd51-0b31-446d-be8c-1e0dc0d82e54.json new file mode 100644 index 000000000..002cf8d4c --- /dev/null +++ b/data/hfopenllm_v2/Corianas/Neural-Mistral-7B/00f7bd51-0b31-446d-be8c-1e0dc0d82e54.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Corianas_Neural-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neural-Mistral-7B", + "id": "Corianas/Neural-Mistral-7B", + "developer": "Corianas", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5489 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4428 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Corianas/Quokka_2.7b/26782941-b918-44c5-a7f6-5f770e47c3d6.json b/data/hfopenllm_v2/Corianas/Quokka_2.7b/26782941-b918-44c5-a7f6-5f770e47c3d6.json new file mode 100644 index 000000000..04534ef20 --- /dev/null +++ b/data/hfopenllm_v2/Corianas/Quokka_2.7b/26782941-b918-44c5-a7f6-5f770e47c3d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Corianas_Quokka_2.7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quokka_2.7b", + "id": "Corianas/Quokka_2.7b", + "developer": "Corianas", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 2.786 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1749 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3055 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json b/data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json deleted file mode 100644 index dd979ef03..000000000 --- a/data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Corianas_Quokka_2.7b/1762652579.5120142", - "retrieved_timestamp": "1762652579.512015", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Corianas/Quokka_2.7b", - "developer": "Corianas", - "inference_platform": "unknown", - "id": "Corianas/Quokka_2.7b", - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 2.786 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17490702447284318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3055474937424842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3908333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Corianas/llama-3-reactor/5547ddaf-8fbb-4259-8b88-e946fc3d2404.json b/data/hfopenllm_v2/Corianas/llama-3-reactor/5547ddaf-8fbb-4259-8b88-e946fc3d2404.json new file mode 100644 index 000000000..0f76b0c3e --- /dev/null +++ b/data/hfopenllm_v2/Corianas/llama-3-reactor/5547ddaf-8fbb-4259-8b88-e946fc3d2404.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Corianas_llama-3-reactor/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-reactor", + "id": "Corianas/llama-3-reactor", + "developer": "Corianas", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": -1.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.23 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2801 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json b/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json deleted file mode 100644 index a0a6c1184..000000000 --- a/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CortexLM_btlm-7b-base-v0.2/1762652579.512528", - "retrieved_timestamp": "1762652579.512528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CortexLM/btlm-7b-base-v0.2", - "developer": "CortexLM", - "inference_platform": "unknown", - "id": "CortexLM/btlm-7b-base-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.885 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14832865685270635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4006411985841813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38460416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2349567819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/bee5ea59-b97a-4783-b763-b6bd432d4558.json b/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/bee5ea59-b97a-4783-b763-b6bd432d4558.json new file mode 100644 index 000000000..9f5b604cb --- /dev/null +++ b/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/bee5ea59-b97a-4783-b763-b6bd432d4558.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CortexLM_btlm-7b-base-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "btlm-7b-base-v0.2", + "id": "CortexLM/btlm-7b-base-v0.2", + "developer": "CortexLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.885 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1483 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4006 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/SCE-2-24B/8150333f-8e79-4230-af8b-7ddb1d5eeb21.json b/data/hfopenllm_v2/Cran-May/SCE-2-24B/8150333f-8e79-4230-af8b-7ddb1d5eeb21.json new file mode 100644 index 000000000..d8282b048 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/SCE-2-24B/8150333f-8e79-4230-af8b-7ddb1d5eeb21.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_SCE-2-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SCE-2-24B", + "id": "Cran-May/SCE-2-24B", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5866 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1896 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4612 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json b/data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json deleted file mode 100644 index b8fe727a1..000000000 --- a/data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_SCE-2-24B/1762652579.512776", - "retrieved_timestamp": "1762652579.5127769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/SCE-2-24B", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/SCE-2-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5865924635522636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6264692798019763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.461186835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json b/data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json deleted file mode 100644 index a952b16ec..000000000 --- a/data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_SCE-3-24B/1762652579.513022", - "retrieved_timestamp": "1762652579.513023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/SCE-3-24B", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/SCE-3-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465254413844156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597283045074691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44347916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4646775265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/SCE-3-24B/be8510a9-ecd4-4ac7-9930-3200cacb7b50.json b/data/hfopenllm_v2/Cran-May/SCE-3-24B/be8510a9-ecd4-4ac7-9930-3200cacb7b50.json new file mode 100644 index 000000000..17424927b --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/SCE-3-24B/be8510a9-ecd4-4ac7-9930-3200cacb7b50.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_SCE-3-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SCE-3-24B", + "id": "Cran-May/SCE-3-24B", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5973 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1881 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/T.E-8.1/887e4574-f876-4e75-afb8-e543bcb30020.json b/data/hfopenllm_v2/Cran-May/T.E-8.1/887e4574-f876-4e75-afb8-e543bcb30020.json new file mode 100644 index 000000000..18db9bb98 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/T.E-8.1/887e4574-f876-4e75-afb8-e543bcb30020.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_T.E-8.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T.E-8.1", + "id": "Cran-May/T.E-8.1", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json b/data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json deleted file mode 100644 index 87a5016d1..000000000 --- a/data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_T.E-8.1/1762652579.513231", - "retrieved_timestamp": "1762652579.513231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/T.E-8.1", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/T.E-8.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7076922565459647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5581754708123893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4505208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json deleted file mode 100644 index ef62911f4..000000000 --- a/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_2/1762652579.51357", - "retrieved_timestamp": "1762652579.5135732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/merge_model_20250308_2", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/merge_model_20250308_2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5932370554572978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6585311075974459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5419714095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/fd21d8bd-28cf-4b91-8075-c38a61f5f32a.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/fd21d8bd-28cf-4b91-8075-c38a61f5f32a.json new file mode 100644 index 000000000..e21b806dc --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/fd21d8bd-28cf-4b91-8075-c38a61f5f32a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merge_model_20250308_2", + "id": "Cran-May/merge_model_20250308_2", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5932 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6585 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.542 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json deleted file mode 100644 index f7460b2f8..000000000 --- a/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_3/1762652579.513911", - "retrieved_timestamp": "1762652579.513912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/merge_model_20250308_3", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/merge_model_20250308_3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017799438822324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6271459892225041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43204166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49617686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/c0f05e38-6592-478a-9c46-26567f24ff85.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/c0f05e38-6592-478a-9c46-26567f24ff85.json new file mode 100644 index 000000000..2aca4048b --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/c0f05e38-6592-478a-9c46-26567f24ff85.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merge_model_20250308_3", + "id": "Cran-May/merge_model_20250308_3", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6018 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/06cc2913-8e05-44bf-a128-9a7c4aeff536.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/06cc2913-8e05-44bf-a128-9a7c4aeff536.json new file mode 100644 index 000000000..9d92eeeaa --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/06cc2913-8e05-44bf-a128-9a7c4aeff536.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merge_model_20250308_4", + "id": "Cran-May/merge_model_20250308_4", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6664 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json deleted file mode 100644 index 5579d01f2..000000000 --- a/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_4/1762652579.514166", - "retrieved_timestamp": "1762652579.514167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/merge_model_20250308_4", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/merge_model_20250308_4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539521802151624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.666435217186487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4199395770392749 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4688125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366522606382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json b/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json deleted file mode 100644 index 609ccbb1f..000000000 --- a/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Cran-May_tempmotacilla-cinerea-0308/1762652579.514418", - "retrieved_timestamp": "1762652579.5144188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Cran-May/tempmotacilla-cinerea-0308", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/tempmotacilla-cinerea-0308", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8084837121061007 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550960569488126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42082291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250166223404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/86368d5b-0509-4b52-b988-58bcf7e1043e.json b/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/86368d5b-0509-4b52-b988-58bcf7e1043e.json new file mode 100644 index 000000000..4adc26fd6 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/86368d5b-0509-4b52-b988-58bcf7e1043e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Cran-May_tempmotacilla-cinerea-0308/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempmotacilla-cinerea-0308", + "id": "Cran-May/tempmotacilla-cinerea-0308", + "developer": "Cran-May", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8085 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6551 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.525 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CreitinGameplays/Llama-3.1-8B-R1-v0.1/77b89fe6-464b-4017-a77f-8750e2668a82.json b/data/hfopenllm_v2/CreitinGameplays/Llama-3.1-8B-R1-v0.1/77b89fe6-464b-4017-a77f-8750e2668a82.json new file mode 100644 index 000000000..213246109 --- /dev/null +++ b/data/hfopenllm_v2/CreitinGameplays/Llama-3.1-8B-R1-v0.1/77b89fe6-464b-4017-a77f-8750e2668a82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CreitinGameplays_Llama-3.1-8B-R1-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-R1-v0.1", + "id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", + "developer": "CreitinGameplays", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1252 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Broca/d2e47d86-23dd-4c95-a7fb-99518615d09f.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Broca/d2e47d86-23dd-4c95-a7fb-99518615d09f.json new file mode 100644 index 000000000..b78a75058 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Broca/d2e47d86-23dd-4c95-a7fb-99518615d09f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Broca/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Broca", + "id": "CultriX/Qwen2.5-14B-Broca", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6527 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-BrocaV9/0a09891e-ac97-4c3a-8364-7106a851f1a8.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-BrocaV9/0a09891e-ac97-4c3a-8364-7106a851f1a8.json new file mode 100644 index 000000000..31c50cf81 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-BrocaV9/0a09891e-ac97-4c3a-8364-7106a851f1a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-BrocaV9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-BrocaV9", + "id": "CultriX/Qwen2.5-14B-BrocaV9", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.469 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav3/eb41fe62-ac46-4630-bb2d-6b907f271737.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav3/eb41fe62-ac46-4630-bb2d-6b907f271737.json new file mode 100644 index 000000000..e98b2f9be --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav3/eb41fe62-ac46-4630-bb2d-6b907f271737.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Brocav3", + "id": "CultriX/Qwen2.5-14B-Brocav3", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6952 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav6/d540a6c8-e9ec-4413-b9d2-dee68533c377.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav6/d540a6c8-e9ec-4413-b9d2-dee68533c377.json new file mode 100644 index 000000000..9a923a074 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav6/d540a6c8-e9ec-4413-b9d2-dee68533c377.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Brocav6", + "id": "CultriX/Qwen2.5-14B-Brocav6", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6995 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6389 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4742 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav7/5b1f413a-05c4-43be-bdbc-9de5728e8d0a.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav7/5b1f413a-05c4-43be-bdbc-9de5728e8d0a.json new file mode 100644 index 000000000..3819754c9 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Brocav7/5b1f413a-05c4-43be-bdbc-9de5728e8d0a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Brocav7", + "id": "CultriX/Qwen2.5-14B-Brocav7", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6724 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6444 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4796 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emerged/6701738c-27e4-4bbd-b614-fbc297c3164f.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emerged/6701738c-27e4-4bbd-b614-fbc297c3164f.json new file mode 100644 index 000000000..84b4c7784 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emerged/6701738c-27e4-4bbd-b614-fbc297c3164f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emerged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Emerged", + "id": "CultriX/Qwen2.5-14B-Emerged", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4691 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5186 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emergedv3/7f4563b4-0b25-49e7-ac1c-afaa28b0eda2.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emergedv3/7f4563b4-0b25-49e7-ac1c-afaa28b0eda2.json new file mode 100644 index 000000000..0ba7935ad --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Emergedv3/7f4563b4-0b25-49e7-ac1c-afaa28b0eda2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emergedv3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Emergedv3", + "id": "CultriX/Qwen2.5-14B-Emergedv3", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-FinalMerge/32b6e4af-69ba-49b7-9367-dfafe3e390e8.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-FinalMerge/32b6e4af-69ba-49b7-9367-dfafe3e390e8.json new file mode 100644 index 000000000..481bd01ec --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-FinalMerge/32b6e4af-69ba-49b7-9367-dfafe3e390e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-FinalMerge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-FinalMerge", + "id": "CultriX/Qwen2.5-14B-FinalMerge", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4891 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5715 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyper/e16deaf7-da55-40ba-ac18-860fa3f14d34.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyper/e16deaf7-da55-40ba-ac18-860fa3f14d34.json new file mode 100644 index 000000000..e702043d7 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyper/e16deaf7-da55-40ba-ac18-860fa3f14d34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyper/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Hyper", + "id": "CultriX/Qwen2.5-14B-Hyper", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3437 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4898 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-HyperMarck-dl/8a7a5886-0618-4615-9cdf-46f5d19a29fe.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-HyperMarck-dl/8a7a5886-0618-4615-9cdf-46f5d19a29fe.json new file mode 100644 index 000000000..33af3ceb9 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-HyperMarck-dl/8a7a5886-0618-4615-9cdf-46f5d19a29fe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-HyperMarck-dl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-HyperMarck-dl", + "id": "CultriX/Qwen2.5-14B-HyperMarck-dl", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.665 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5091 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv3/66d18e5b-9ebc-4ab6-94fb-6d5c23c58672.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv3/66d18e5b-9ebc-4ab6-94fb-6d5c23c58672.json new file mode 100644 index 000000000..ad906fa69 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv3/66d18e5b-9ebc-4ab6-94fb-6d5c23c58672.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Hyperionv3", + "id": "CultriX/Qwen2.5-14B-Hyperionv3", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6836 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6522 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.534 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv4/a36aaaf6-2478-4b98-ad0c-2b06ddb8c308.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv4/a36aaaf6-2478-4b98-ad0c-2b06ddb8c308.json new file mode 100644 index 000000000..2baa36524 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv4/a36aaaf6-2478-4b98-ad0c-2b06ddb8c308.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Hyperionv4", + "id": "CultriX/Qwen2.5-14B-Hyperionv4", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6472 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3474 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv5/4a6237a7-019c-4310-971e-84b08d1b5067.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv5/4a6237a7-019c-4310-971e-84b08d1b5067.json new file mode 100644 index 000000000..bdfbba563 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Hyperionv5/4a6237a7-019c-4310-971e-84b08d1b5067.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Hyperionv5", + "id": "CultriX/Qwen2.5-14B-Hyperionv5", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6443 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3822 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-MegaMerge-pt2/996e781e-5939-41ac-b347-95c99037c34a.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-MegaMerge-pt2/996e781e-5939-41ac-b347-95c99037c34a.json new file mode 100644 index 000000000..0815bb180 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-MegaMerge-pt2/996e781e-5939-41ac-b347-95c99037c34a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MegaMerge-pt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-MegaMerge-pt2", + "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5683 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6578 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4729 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-MergeStock/e880fa0e-ae49-4398-91bd-eadf8695425f.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-MergeStock/e880fa0e-ae49-4398-91bd-eadf8695425f.json new file mode 100644 index 000000000..64f818a07 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-MergeStock/e880fa0e-ae49-4398-91bd-eadf8695425f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MergeStock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-MergeStock", + "id": "CultriX/Qwen2.5-14B-MergeStock", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6579 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4676 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-ReasoningMerge/da04ff51-fbeb-41a8-ae5e-8ddf5925b792.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-ReasoningMerge/da04ff51-fbeb-41a8-ae5e-8ddf5925b792.json new file mode 100644 index 000000000..a2f6c4850 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-ReasoningMerge/da04ff51-fbeb-41a8-ae5e-8ddf5925b792.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-ReasoningMerge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-ReasoningMerge", + "id": "CultriX/Qwen2.5-14B-ReasoningMerge", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6578 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Ultimav2/6d709396-1ae1-4e5c-a03c-13c1e9425202.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Ultimav2/6d709396-1ae1-4e5c-a03c-13c1e9425202.json new file mode 100644 index 000000000..0693a1c39 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Ultimav2/6d709396-1ae1-4e5c-a03c-13c1e9425202.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Ultimav2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Ultimav2", + "id": "CultriX/Qwen2.5-14B-Ultimav2", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.55 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5417 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Unity/5b616df9-e15a-4f84-98b4-c2cb532c1b95.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Unity/5b616df9-e15a-4f84-98b4-c2cb532c1b95.json new file mode 100644 index 000000000..5dd3c6ef1 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Unity/5b616df9-e15a-4f84-98b4-c2cb532c1b95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Unity/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Unity", + "id": "CultriX/Qwen2.5-14B-Unity", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6739 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4313 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4679 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/0f6552d9-3cbe-447e-909b-068e5ceed4c9.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/0f6552d9-3cbe-447e-909b-068e5ceed4c9.json new file mode 100644 index 000000000..05fdd29f4 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/0f6552d9-3cbe-447e-909b-068e5ceed4c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Wernicke-SFT", + "id": "CultriX/Qwen2.5-14B-Wernicke-SFT", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4937 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3595 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json deleted file mode 100644 index 70d4c38ac..000000000 --- a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SFT/1762652579.520046", - "retrieved_timestamp": "1762652579.5200472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernicke-SFT", - "developer": "CultriX", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernicke-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4937443760333692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6460586236565512 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SLERP/2861aae0-d2ec-48f5-bd20-9e7bcaf8dabd.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SLERP/2861aae0-d2ec-48f5-bd20-9e7bcaf8dabd.json new file mode 100644 index 000000000..19347fafa --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SLERP/2861aae0-d2ec-48f5-bd20-9e7bcaf8dabd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Wernicke-SLERP", + "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.491 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5589 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke/51a64f37-256c-4fe7-b28c-6117520f04ec.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke/51a64f37-256c-4fe7-b28c-6117520f04ec.json new file mode 100644 index 000000000..62b9abb48 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke/51a64f37-256c-4fe7-b28c-6117520f04ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Wernicke", + "id": "CultriX/Qwen2.5-14B-Wernicke", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6568 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4689 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernickev3/03ce9c1d-38e8-4a6c-b293-57428a9d7c0e.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernickev3/03ce9c1d-38e8-4a6c-b293-57428a9d7c0e.json new file mode 100644 index 000000000..ece044f89 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernickev3/03ce9c1d-38e8-4a6c-b293-57428a9d7c0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernickev3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Wernickev3", + "id": "CultriX/Qwen2.5-14B-Wernickev3", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4717 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-partialmergept1/3b0f5dea-db9b-4657-9807-6b3e56d38823.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-partialmergept1/3b0f5dea-db9b-4657-9807-6b3e56d38823.json new file mode 100644 index 000000000..a8c01a5f1 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-partialmergept1/3b0f5dea-db9b-4657-9807-6b3e56d38823.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-partialmergept1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-partialmergept1", + "id": "CultriX/Qwen2.5-14B-partialmergept1", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6337 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5208 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwenfinity-2.5-14B/2d19e9ff-e331-4171-ae90-47e44f3f8885.json b/data/hfopenllm_v2/CultriX/Qwenfinity-2.5-14B/2d19e9ff-e331-4171-ae90-47e44f3f8885.json new file mode 100644 index 000000000..4b70d0648 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwenfinity-2.5-14B/2d19e9ff-e331-4171-ae90-47e44f3f8885.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwenfinity-2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenfinity-2.5-14B", + "id": "CultriX/Qwenfinity-2.5-14B", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4506 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4498 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwestion-14B/6bfb8b24-1abd-405b-b01d-7d7111705dbb.json b/data/hfopenllm_v2/CultriX/Qwestion-14B/6bfb8b24-1abd-405b-b01d-7d7111705dbb.json new file mode 100644 index 000000000..27242621d --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwestion-14B/6bfb8b24-1abd-405b-b01d-7d7111705dbb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_Qwestion-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwestion-14B", + "id": "CultriX/Qwestion-14B", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6318 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.645 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4636 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json b/data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json deleted file mode 100644 index 241b3800d..000000000 --- a/data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwestion-14B/1762652579.521322", - "retrieved_timestamp": "1762652579.521322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwestion-14B", - "developer": "CultriX", - "inference_platform": "unknown", - "id": "CultriX/Qwestion-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6317803428237078 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6450104739140539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46360416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542220744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMerge/c83e6b6c-c8be-4d97-9c65-2d883f88f37f.json b/data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMerge/c83e6b6c-c8be-4d97-9c65-2d883f88f37f.json new file mode 100644 index 000000000..2ff8194ea --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMerge/c83e6b6c-c8be-4d97-9c65-2d883f88f37f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMerge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14B-EvolMerge", + "id": "CultriX/SeQwence-14B-EvolMerge", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6572 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4821 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMergev1/72569796-1b11-48cc-ada7-e8c09522dd54.json b/data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMergev1/72569796-1b11-48cc-ada7-e8c09522dd54.json new file mode 100644 index 000000000..eb95ee2a0 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14B-EvolMergev1/72569796-1b11-48cc-ada7-e8c09522dd54.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMergev1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14B-EvolMergev1", + "id": "CultriX/SeQwence-14B-EvolMergev1", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4215 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4623 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14B-v5/58403e30-bd2b-4f4c-ad41-daa890c77d40.json b/data/hfopenllm_v2/CultriX/SeQwence-14B-v5/58403e30-bd2b-4f4c-ad41-daa890c77d40.json new file mode 100644 index 000000000..3cbe2fbd3 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14B-v5/58403e30-bd2b-4f4c-ad41-daa890c77d40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14B-v5", + "id": "CultriX/SeQwence-14B-v5", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.592 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14B/eb8e1f1d-c6b3-407c-b172-d240553d2f89.json b/data/hfopenllm_v2/CultriX/SeQwence-14B/eb8e1f1d-c6b3-407c-b172-d240553d2f89.json new file mode 100644 index 000000000..c20b9bd39 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14B/eb8e1f1d-c6b3-407c-b172-d240553d2f89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14B", + "id": "CultriX/SeQwence-14B", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14Bv1/356d75a0-6520-46c1-afa9-7dbb2596a5c1.json b/data/hfopenllm_v2/CultriX/SeQwence-14Bv1/356d75a0-6520-46c1-afa9-7dbb2596a5c1.json new file mode 100644 index 000000000..4bc3ce623 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14Bv1/356d75a0-6520-46c1-afa9-7dbb2596a5c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14Bv1", + "id": "CultriX/SeQwence-14Bv1", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4704 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.532 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14Bv2/78681e0c-5fe2-4920-af7b-99345cea3efe.json b/data/hfopenllm_v2/CultriX/SeQwence-14Bv2/78681e0c-5fe2-4920-af7b-99345cea3efe.json new file mode 100644 index 000000000..a0e28a509 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14Bv2/78681e0c-5fe2-4920-af7b-99345cea3efe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14Bv2", + "id": "CultriX/SeQwence-14Bv2", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4758 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4601 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5334 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/SeQwence-14Bv3/ba0ee5b4-070a-461d-a3d2-cd4036387cc9.json b/data/hfopenllm_v2/CultriX/SeQwence-14Bv3/ba0ee5b4-070a-461d-a3d2-cd4036387cc9.json new file mode 100644 index 000000000..ff855fb3e --- /dev/null +++ b/data/hfopenllm_v2/CultriX/SeQwence-14Bv3/ba0ee5b4-070a-461d-a3d2-cd4036387cc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeQwence-14Bv3", + "id": "CultriX/SeQwence-14Bv3", + "developer": "CultriX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5719 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5335 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DRXD1000/Atlas-7B/17d0d377-bca4-411c-be11-6c5cfce07798.json b/data/hfopenllm_v2/DRXD1000/Atlas-7B/17d0d377-bca4-411c-be11-6c5cfce07798.json new file mode 100644 index 000000000..3ae174b80 --- /dev/null +++ b/data/hfopenllm_v2/DRXD1000/Atlas-7B/17d0d377-bca4-411c-be11-6c5cfce07798.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DRXD1000_Atlas-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Atlas-7B", + "id": "DRXD1000/Atlas-7B", + "developer": "DRXD1000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.768 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json b/data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json deleted file mode 100644 index c1ad0efca..000000000 --- a/data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DRXD1000_Atlas-7B/1762652579.5232708", - "retrieved_timestamp": "1762652579.523272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DRXD1000/Atlas-7B", - "developer": "DRXD1000", - "inference_platform": "unknown", - "id": "DRXD1000/Atlas-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.768 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3704459722425387 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3302176697760134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14012632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json b/data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json deleted file mode 100644 index 2901a08d6..000000000 --- a/data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DRXD1000_Phoenix-7B/1762652579.5236301", - "retrieved_timestamp": "1762652579.523632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DRXD1000/Phoenix-7B", - "developer": "DRXD1000", - "inference_platform": "unknown", - "id": "DRXD1000/Phoenix-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209617149164218 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931566034728218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38494791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23429188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/DRXD1000/Phoenix-7B/d01a56a1-1eb9-4ccf-8c09-348b6ba5480b.json b/data/hfopenllm_v2/DRXD1000/Phoenix-7B/d01a56a1-1eb9-4ccf-8c09-348b6ba5480b.json new file mode 100644 index 000000000..9ea88c590 --- /dev/null +++ b/data/hfopenllm_v2/DRXD1000/Phoenix-7B/d01a56a1-1eb9-4ccf-8c09-348b6ba5480b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DRXD1000_Phoenix-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phoenix-7B", + "id": "DRXD1000/Phoenix-7B", + "developer": "DRXD1000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3932 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3849 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/389821ff-d8e2-4d1d-8fb2-57a689867ac5.json b/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/389821ff-d8e2-4d1d-8fb2-57a689867ac5.json new file mode 100644 index 000000000..1cd2505fb --- /dev/null +++ b/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/389821ff-d8e2-4d1d-8fb2-57a689867ac5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-7b-ipo-0k-15k-i1", + "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", + "developer": "DUAL-GPO", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2756 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.313 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json b/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json deleted file mode 100644 index d88f58de8..000000000 --- a/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/1762652579.523929", - "retrieved_timestamp": "1762652579.52393", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", - "developer": "DUAL-GPO", - "inference_platform": "unknown", - "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27562423259174545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4472712447565954 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31299867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json b/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json deleted file mode 100644 index 3134fb82d..000000000 --- a/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DZgas_GIGABATEMAN-7B/1762652579.524226", - "retrieved_timestamp": "1762652579.5242271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DZgas/GIGABATEMAN-7B", - "developer": "DZgas", - "inference_platform": "unknown", - "id": "DZgas/GIGABATEMAN-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46074637517342876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032184342862756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43284374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/7913f782-29b0-48bd-bc62-37da9a5ac7d9.json b/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/7913f782-29b0-48bd-bc62-37da9a5ac7d9.json new file mode 100644 index 000000000..150aa45c9 --- /dev/null +++ b/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/7913f782-29b0-48bd-bc62-37da9a5ac7d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DZgas_GIGABATEMAN-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GIGABATEMAN-7B", + "id": "DZgas/GIGABATEMAN-7B", + "developer": "DZgas", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4607 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3177 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json b/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json deleted file mode 100644 index e22fb8432..000000000 --- a/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherDrake-SFT/1762652579.524555", - "retrieved_timestamp": "1762652579.524556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/AetherDrake-SFT", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherDrake-SFT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4812796712722244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48720075507220245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/b0930974-999e-4372-9d21-b9790e0bad4c.json b/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/b0930974-999e-4372-9d21-b9790e0bad4c.json new file mode 100644 index 000000000..e4aedebfc --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/b0930974-999e-4372-9d21-b9790e0bad4c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherDrake-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AetherDrake-SFT", + "id": "Daemontatox/AetherDrake-SFT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4872 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1511 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4088 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json b/data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json deleted file mode 100644 index b5c1fcf8b..000000000 --- a/data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherSett/1762652579.524883", - "retrieved_timestamp": "1762652579.524884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/AetherSett", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherSett", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369586031729146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5451624435465484 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46031249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4278590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/AetherSett/8265f577-f504-4a56-9cf0-42c34766559a.json b/data/hfopenllm_v2/Daemontatox/AetherSett/8265f577-f504-4a56-9cf0-42c34766559a.json new file mode 100644 index 000000000..66d790f66 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherSett/8265f577-f504-4a56-9cf0-42c34766559a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherSett/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AetherSett", + "id": "Daemontatox/AetherSett", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4603 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherTOT/82044cd2-1a46-406e-bc68-397ce41b29ea.json b/data/hfopenllm_v2/Daemontatox/AetherTOT/82044cd2-1a46-406e-bc68-397ce41b29ea.json new file mode 100644 index 000000000..859888bd9 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherTOT/82044cd2-1a46-406e-bc68-397ce41b29ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AetherTOT", + "id": "Daemontatox/AetherTOT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1443 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json b/data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json deleted file mode 100644 index b1ecd647b..000000000 --- a/data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5251331", - "retrieved_timestamp": "1762652579.5251389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/AetherTOT", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherTOT", - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4397642699149368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5066056342472064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/AetherTOT/de09e323-8cf1-4aa9-9537-e8ad30a8c297.json b/data/hfopenllm_v2/Daemontatox/AetherTOT/de09e323-8cf1-4aa9-9537-e8ad30a8c297.json new file mode 100644 index 000000000..e4d9830af --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherTOT/de09e323-8cf1-4aa9-9537-e8ad30a8c297.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AetherTOT", + "id": "Daemontatox/AetherTOT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4079 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json b/data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json deleted file mode 100644 index 340a7babe..000000000 --- a/data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5253801", - "retrieved_timestamp": "1762652579.525381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/AetherTOT", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherTOT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43829040279790954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034307630533988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14425981873111782 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40518750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37782579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json b/data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json deleted file mode 100644 index 7868e30f2..000000000 --- a/data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherUncensored/1762652579.525634", - "retrieved_timestamp": "1762652579.5256362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/AetherUncensored", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherUncensored", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40419309653940433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44631282805144945 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3746770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27102726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/AetherUncensored/bfe543b4-ec38-488e-ae04-125cd358b61f.json b/data/hfopenllm_v2/Daemontatox/AetherUncensored/bfe543b4-ec38-488e-ae04-125cd358b61f.json new file mode 100644 index 000000000..a8cb84de3 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherUncensored/bfe543b4-ec38-488e-ae04-125cd358b61f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherUncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AetherUncensored", + "id": "Daemontatox/AetherUncensored", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4042 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4463 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3747 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json b/data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json deleted file mode 100644 index 6f6de8dfc..000000000 --- a/data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Cogito-MIS/1762652579.525943", - "retrieved_timestamp": "1762652579.5259452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Cogito-MIS", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Cogito-MIS", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18145188100905596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059981143086196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37676041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14353390957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/Cogito-MIS/be36d8ae-b81c-4b4e-aa2f-5999c7582237.json b/data/hfopenllm_v2/Daemontatox/Cogito-MIS/be36d8ae-b81c-4b4e-aa2f-5999c7582237.json new file mode 100644 index 000000000..8a0c7af29 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Cogito-MIS/be36d8ae-b81c-4b4e-aa2f-5999c7582237.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Cogito-MIS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cogito-MIS", + "id": "Daemontatox/Cogito-MIS", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1815 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/CogitoDistil/342b435f-89e9-48ad-ab0f-2c1f52f4571a.json b/data/hfopenllm_v2/Daemontatox/CogitoDistil/342b435f-89e9-48ad-ab0f-2c1f52f4571a.json new file mode 100644 index 000000000..c37151cbd --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/CogitoDistil/342b435f-89e9-48ad-ab0f-2c1f52f4571a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoDistil/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CogitoDistil", + "id": "Daemontatox/CogitoDistil", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2776 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3677 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3755 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2625 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json b/data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json deleted file mode 100644 index edc6fe053..000000000 --- a/data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoDistil/1762652579.526295", - "retrieved_timestamp": "1762652579.5262961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/CogitoDistil", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/CogitoDistil", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27764775240805506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36767660461416857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625498670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json b/data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json deleted file mode 100644 index 8aa75ccc4..000000000 --- a/data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ/1762652579.5265448", - "retrieved_timestamp": "1762652579.526546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/CogitoZ", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/CogitoZ", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3967240255854466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6734487392645502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241691842900302 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4792604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5592586436170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/CogitoZ/b0c8737d-d838-4da1-909b-b218e22119dc.json b/data/hfopenllm_v2/Daemontatox/CogitoZ/b0c8737d-d838-4da1-909b-b218e22119dc.json new file mode 100644 index 000000000..f78c0c39c --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/CogitoZ/b0c8737d-d838-4da1-909b-b218e22119dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CogitoZ", + "id": "Daemontatox/CogitoZ", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3967 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6734 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5593 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json b/data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json deleted file mode 100644 index d776a4386..000000000 --- a/data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ14/1762652579.526777", - "retrieved_timestamp": "1762652579.5267782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/CogitoZ14", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/CogitoZ14", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6637034180419066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6297514788808327 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39993351063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/CogitoZ14/4cd40f28-842f-44d5-9eb2-86238077fc55.json b/data/hfopenllm_v2/Daemontatox/CogitoZ14/4cd40f28-842f-44d5-9eb2-86238077fc55.json new file mode 100644 index 000000000..e16a58952 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/CogitoZ14/4cd40f28-842f-44d5-9eb2-86238077fc55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ14/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CogitoZ14", + "id": "Daemontatox/CogitoZ14", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6637 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4222 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4059 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3999 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/DocumentCogito/0758051c-2d75-402e-af0e-769096cbb17c.json b/data/hfopenllm_v2/Daemontatox/DocumentCogito/0758051c-2d75-402e-af0e-769096cbb17c.json new file mode 100644 index 000000000..b59d83df4 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/DocumentCogito/0758051c-2d75-402e-af0e-769096cbb17c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DocumentCogito", + "id": "Daemontatox/DocumentCogito", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.777 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json b/data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json deleted file mode 100644 index df3c3ac1c..000000000 --- a/data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.5270069", - "retrieved_timestamp": "1762652579.527008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/DocumentCogito", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/DocumentCogito", - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064340394597445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111563719111275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3973125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38023603723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json b/data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json deleted file mode 100644 index 3fd2dc59c..000000000 --- a/data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.527227", - "retrieved_timestamp": "1762652579.5272279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/DocumentCogito", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/DocumentCogito", - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7770349339751859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186726621665779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39105208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/DocumentCogito/c93f610b-fb97-4ad1-b8af-fc41c6d8da33.json b/data/hfopenllm_v2/Daemontatox/DocumentCogito/c93f610b-fb97-4ad1-b8af-fc41c6d8da33.json new file mode 100644 index 000000000..4744e7468 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/DocumentCogito/c93f610b-fb97-4ad1-b8af-fc41c6d8da33.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DocumentCogito", + "id": "Daemontatox/DocumentCogito", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Llama3.3-70B-CogniLink/b8467118-d895-41fa-81c7-89892e1844d5.json b/data/hfopenllm_v2/Daemontatox/Llama3.3-70B-CogniLink/b8467118-d895-41fa-81c7-89892e1844d5.json new file mode 100644 index 000000000..7fa8b968c --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Llama3.3-70B-CogniLink/b8467118-d895-41fa-81c7-89892e1844d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Llama3.3-70B-CogniLink/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.3-70B-CogniLink", + "id": "Daemontatox/Llama3.3-70B-CogniLink", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6931 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4455 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4877 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Llama_cot/30d867bb-63c6-48d1-8d43-6c24f4cf44ba.json b/data/hfopenllm_v2/Daemontatox/Llama_cot/30d867bb-63c6-48d1-8d43-6c24f4cf44ba.json new file mode 100644 index 000000000..e654aab90 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Llama_cot/30d867bb-63c6-48d1-8d43-6c24f4cf44ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Llama_cot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_cot", + "id": "Daemontatox/Llama_cot", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4838 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json b/data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json deleted file mode 100644 index e25cc8d90..000000000 --- a/data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_MawaredT1/1762652579.527918", - "retrieved_timestamp": "1762652579.527919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/MawaredT1", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/MawaredT1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988036188424493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214815439293661 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021148036253776 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47020833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4718251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/MawaredT1/89b92cda-c5b6-45ed-a534-361c9d34794a.json b/data/hfopenllm_v2/Daemontatox/MawaredT1/89b92cda-c5b6-45ed-a534-361c9d34794a.json new file mode 100644 index 000000000..9ac085f5f --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/MawaredT1/89b92cda-c5b6-45ed-a534-361c9d34794a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_MawaredT1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MawaredT1", + "id": "Daemontatox/MawaredT1", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3021 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4702 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4718 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Mini_QwQ/48cdf76a-886d-41ec-8580-00ed4232b601.json b/data/hfopenllm_v2/Daemontatox/Mini_QwQ/48cdf76a-886d-41ec-8580-00ed4232b601.json new file mode 100644 index 000000000..e1b1e912d --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Mini_QwQ/48cdf76a-886d-41ec-8580-00ed4232b601.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Mini_QwQ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mini_QwQ", + "id": "Daemontatox/Mini_QwQ", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4192 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json b/data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json deleted file mode 100644 index 420e60009..000000000 --- a/data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Mini_QwQ/1762652579.528199", - "retrieved_timestamp": "1762652579.5282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Mini_QwQ", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Mini_QwQ", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44970566984490046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554898906584336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41918429003021146 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437250664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/NemoR/116272d4-d25d-49cb-80cb-ff26a0fb3cf4.json b/data/hfopenllm_v2/Daemontatox/NemoR/116272d4-d25d-49cb-80cb-ff26a0fb3cf4.json new file mode 100644 index 000000000..450d53bc4 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/NemoR/116272d4-d25d-49cb-80cb-ff26a0fb3cf4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_NemoR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NemoR", + "id": "Daemontatox/NemoR", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2287 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json b/data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json deleted file mode 100644 index 7dccc9c8f..000000000 --- a/data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_NemoR/1762652579.528459", - "retrieved_timestamp": "1762652579.528459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/NemoR", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/NemoR", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2287375275380435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194067688446361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39080208333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32903922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json b/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json deleted file mode 100644 index 8d0cf6b57..000000000 --- a/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAI2.0/1762652579.528686", - "retrieved_timestamp": "1762652579.528686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/PathFinderAI2.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathFinderAI2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45410178326839457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.665823006477417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075528700906344 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5546875 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/bb103828-70fe-4767-9302-6750d839129e.json b/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/bb103828-70fe-4767-9302-6750d839129e.json new file mode 100644 index 000000000..32487f192 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/bb103828-70fe-4767-9302-6750d839129e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAI2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PathFinderAI2.0", + "id": "Daemontatox/PathFinderAI2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4541 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6658 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4216 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5547 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/7b58ab54-239b-4e49-93f1-c3940df61474.json b/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/7b58ab54-239b-4e49-93f1-c3940df61474.json new file mode 100644 index 000000000..1fd11dd1d --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/7b58ab54-239b-4e49-93f1-c3940df61474.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAi3.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PathFinderAi3.0", + "id": "Daemontatox/PathFinderAi3.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4271 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6884 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5757 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json b/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json deleted file mode 100644 index b32b14169..000000000 --- a/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAi3.0/1762652579.5289202", - "retrieved_timestamp": "1762652579.5289202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/PathFinderAi3.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathFinderAi3.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42709898624538445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6884221416328996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4806875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5757147606382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json b/data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json deleted file mode 100644 index 29e2bb12a..000000000 --- a/data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.529176", - "retrieved_timestamp": "1762652579.5291772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/PathfinderAI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathfinderAI", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37451739163198094 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6667854331232542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48583333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/PathfinderAI/559067a2-816c-4091-893e-b1c7860171ec.json b/data/hfopenllm_v2/Daemontatox/PathfinderAI/559067a2-816c-4091-893e-b1c7860171ec.json new file mode 100644 index 000000000..a2410c22f --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathfinderAI/559067a2-816c-4091-893e-b1c7860171ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PathfinderAI", + "id": "Daemontatox/PathfinderAI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4855 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6627 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4841 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5542 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json b/data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json deleted file mode 100644 index af2587e02..000000000 --- a/data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.5294342", - "retrieved_timestamp": "1762652579.529435", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/PathfinderAI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathfinderAI", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4855006937148987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6627335380624046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/PathfinderAI/ec502619-880b-4b7c-acfe-c43cf6514e3f.json b/data/hfopenllm_v2/Daemontatox/PathfinderAI/ec502619-880b-4b7c-acfe-c43cf6514e3f.json new file mode 100644 index 000000000..02616b562 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathfinderAI/ec502619-880b-4b7c-acfe-c43cf6514e3f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PathfinderAI", + "id": "Daemontatox/PathfinderAI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3745 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4758 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4858 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5593 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Phi-4-COT/6941a5dd-2a70-4846-a5f6-b16ef2d56a03.json b/data/hfopenllm_v2/Daemontatox/Phi-4-COT/6941a5dd-2a70-4846-a5f6-b16ef2d56a03.json new file mode 100644 index 000000000..d40331320 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Phi-4-COT/6941a5dd-2a70-4846-a5f6-b16ef2d56a03.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Phi-4-COT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-COT", + "id": "Daemontatox/Phi-4-COT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1793 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.453 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5005 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json b/data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json deleted file mode 100644 index be080ec8f..000000000 --- a/data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_PixelParse_AI/1762652579.529871", - "retrieved_timestamp": "1762652579.529872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/PixelParse_AI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PixelParse_AI", - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43829040279790954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034307630533988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40518750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37782579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/PixelParse_AI/636e2f93-3242-491c-9df5-003aa1dacecf.json b/data/hfopenllm_v2/Daemontatox/PixelParse_AI/636e2f93-3242-491c-9df5-003aa1dacecf.json new file mode 100644 index 000000000..c6518eaf3 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PixelParse_AI/636e2f93-3242-491c-9df5-003aa1dacecf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_PixelParse_AI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PixelParse_AI", + "id": "Daemontatox/PixelParse_AI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1473 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/RA2.0/1f4efa23-816d-49be-8659-feb003f4b3ef.json b/data/hfopenllm_v2/Daemontatox/RA2.0/1f4efa23-816d-49be-8659-feb003f4b3ef.json new file mode 100644 index 000000000..cdbe9004f --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/RA2.0/1f4efa23-816d-49be-8659-feb003f4b3ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_RA2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RA2.0", + "id": "Daemontatox/RA2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4889 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2616 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json b/data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json deleted file mode 100644 index ec61fff2c..000000000 --- a/data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_RA2.0/1762652579.53008", - "retrieved_timestamp": "1762652579.530081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/RA2.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/RA2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37838934028378035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4888687006782508 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40912499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26163563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json b/data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json deleted file mode 100644 index 192552964..000000000 --- a/data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner/1762652579.530283", - "retrieved_timestamp": "1762652579.530284", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/RA_Reasoner", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/RA_Reasoner", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559215104810791 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6053692417205033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43001994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/RA_Reasoner/d05be1e4-bcac-4b4a-bbde-8b17a5a71243.json b/data/hfopenllm_v2/Daemontatox/RA_Reasoner/d05be1e4-bcac-4b4a-bbde-8b17a5a71243.json new file mode 100644 index 000000000..01700c90a --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/RA_Reasoner/d05be1e4-bcac-4b4a-bbde-8b17a5a71243.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RA_Reasoner", + "id": "Daemontatox/RA_Reasoner", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5592 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.43 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json b/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json deleted file mode 100644 index 7945bf89a..000000000 --- a/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner2.0/1762652579.530484", - "retrieved_timestamp": "1762652579.530485", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/RA_Reasoner2.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/RA_Reasoner2.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339091388627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6062469551969276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3883541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353390957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/9ab53055-86f5-4a88-976f-015dd9c9e832.json b/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/9ab53055-86f5-4a88-976f-015dd9c9e832.json new file mode 100644 index 000000000..beab11efc --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/9ab53055-86f5-4a88-976f-015dd9c9e832.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RA_Reasoner2.0", + "id": "Daemontatox/RA_Reasoner2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6062 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2311 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json b/data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json deleted file mode 100644 index 8a1acb2b6..000000000 --- a/data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_ReasonTest/1762652579.530685", - "retrieved_timestamp": "1762652579.530686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/ReasonTest", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/ReasonTest", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.808 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079653098223824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543526397621609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4271941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/ReasonTest/ba34083a-9b13-46d9-8f36-aa3ddd586711.json b/data/hfopenllm_v2/Daemontatox/ReasonTest/ba34083a-9b13-46d9-8f36-aa3ddd586711.json new file mode 100644 index 000000000..60c753137 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/ReasonTest/ba34083a-9b13-46d9-8f36-aa3ddd586711.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_ReasonTest/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasonTest", + "id": "Daemontatox/ReasonTest", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.808 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/6a39d734-ad73-4c4a-9583-3563e336d4b3.json b/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/6a39d734-ad73-4c4a-9583-3563e336d4b3.json new file mode 100644 index 000000000..fbb52784d --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/6a39d734-ad73-4c4a-9583-3563e336d4b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Research_PathfinderAI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Research_PathfinderAI", + "id": "Daemontatox/Research_PathfinderAI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2872 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1699 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json b/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json deleted file mode 100644 index e380d3a77..000000000 --- a/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Research_PathfinderAI/1762652579.530894", - "retrieved_timestamp": "1762652579.530895", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Research_PathfinderAI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Research_PathfinderAI", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3456916537010687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287225755504323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/SphinX/2af71e88-4931-4359-b92a-c64fa33df802.json b/data/hfopenllm_v2/Daemontatox/SphinX/2af71e88-4931-4359-b92a-c64fa33df802.json new file mode 100644 index 000000000..4195d2bc9 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/SphinX/2af71e88-4931-4359-b92a-c64fa33df802.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_SphinX/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SphinX", + "id": "Daemontatox/SphinX", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3082 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4405 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Sphinx2.0/bf9336a7-a7c4-420a-9dd0-68d8e0c815c4.json b/data/hfopenllm_v2/Daemontatox/Sphinx2.0/bf9336a7-a7c4-420a-9dd0-68d8e0c815c4.json new file mode 100644 index 000000000..ab374a609 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Sphinx2.0/bf9336a7-a7c4-420a-9dd0-68d8e0c815c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Sphinx2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sphinx2.0", + "id": "Daemontatox/Sphinx2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7123 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6473 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/TinySphinx/2de872b2-10c7-44dd-91c3-f20205207da6.json b/data/hfopenllm_v2/Daemontatox/TinySphinx/2de872b2-10c7-44dd-91c3-f20205207da6.json new file mode 100644 index 000000000..8e4831568 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/TinySphinx/2de872b2-10c7-44dd-91c3-f20205207da6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinySphinx", + "id": "Daemontatox/TinySphinx", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1698 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/TinySphinx2.0/5cabed09-d8ea-46c2-bb78-012dac954d6b.json b/data/hfopenllm_v2/Daemontatox/TinySphinx2.0/5cabed09-d8ea-46c2-bb78-012dac954d6b.json new file mode 100644 index 000000000..7a83a9405 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/TinySphinx2.0/5cabed09-d8ea-46c2-bb78-012dac954d6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinySphinx2.0", + "id": "Daemontatox/TinySphinx2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2535 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1731 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json b/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json deleted file mode 100644 index 32f1fd780..000000000 --- a/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel-7B-Math/1762652579.531958", - "retrieved_timestamp": "1762652579.531959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Zirel-7B-Math", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Zirel-7B-Math", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6638785090227264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447698777469486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47891666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4237034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/8236db6a-ff8a-4237-af5a-03bb258f8e59.json b/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/8236db6a-ff8a-4237-af5a-03bb258f8e59.json new file mode 100644 index 000000000..00c47b014 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/8236db6a-ff8a-4237-af5a-03bb258f8e59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel-7B-Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zirel-7B-Math", + "id": "Daemontatox/Zirel-7B-Math", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6639 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5448 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4789 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Zirel_1.5/1a7b078e-bc1f-400f-a0cd-f7b535548f23.json b/data/hfopenllm_v2/Daemontatox/Zirel_1.5/1a7b078e-bc1f-400f-a0cd-f7b535548f23.json new file mode 100644 index 000000000..160e661a4 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Zirel_1.5/1a7b078e-bc1f-400f-a0cd-f7b535548f23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel_1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zirel_1.5", + "id": "Daemontatox/Zirel_1.5", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4168 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3985 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json b/data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json deleted file mode 100644 index bdd8ccb5c..000000000 --- a/data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel_1.5/1762652579.532257", - "retrieved_timestamp": "1762652579.532258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Zirel_1.5", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Zirel_1.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4167575366693706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3984669254999634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36581250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21434507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json b/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json deleted file mode 100644 index a790dd843..000000000 --- a/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_mini-Cogito-R1/1762652579.532486", - "retrieved_timestamp": "1762652579.532487", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/mini-Cogito-R1", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/mini-Cogito-R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2298368329366082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3280491875175077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14818816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/fdaf561c-567c-416d-a74a-ac3c07c5be5b.json b/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/fdaf561c-567c-416d-a74a-ac3c07c5be5b.json new file mode 100644 index 000000000..a9a68a873 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/fdaf561c-567c-416d-a74a-ac3c07c5be5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_mini-Cogito-R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mini-Cogito-R1", + "id": "Daemontatox/mini-Cogito-R1", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2298 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1482 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/58900b3b-303b-49c8-b807-7b8d06601568.json b/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/58900b3b-303b-49c8-b807-7b8d06601568.json new file mode 100644 index 000000000..08611e986 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/58900b3b-303b-49c8-b807-7b8d06601568.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Daemontatox_mini_Pathfinder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mini_Pathfinder", + "id": "Daemontatox/mini_Pathfinder", + "developer": "Daemontatox", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3956 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4751 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json b/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json deleted file mode 100644 index d46b915da..000000000 --- a/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_mini_Pathfinder/1762652579.53272", - "retrieved_timestamp": "1762652579.5327208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/mini_Pathfinder", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/mini_Pathfinder", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29615752869054107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39556911910803755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28091755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json b/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json deleted file mode 100644 index 90bf7e482..000000000 --- a/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/1762652579.532935", - "retrieved_timestamp": "1762652579.532935", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", - "developer": "Dampfinchen", - "inference_platform": "unknown", - "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8081091503876381 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257532452246574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40032291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.382563164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/7ac5a45a-7b41-4f63-8556-8737638a00ea.json b/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/7ac5a45a-7b41-4f63-8556-8737638a00ea.json new file mode 100644 index 000000000..d6ac101ed --- /dev/null +++ b/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/7ac5a45a-7b41-4f63-8556-8737638a00ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Ultra-Instruct", + "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", + "developer": "Dampfinchen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8081 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5258 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/3cb55475-30c8-43c8-8d7d-394450fdc117.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/3cb55475-30c8-43c8-8d7d-394450fdc117.json new file mode 100644 index 000000000..104efe008 --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/3cb55475-30c8-43c8-8d7d-394450fdc117.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-10b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-10b", + "id": "Danielbrdz/Barcenas-10b", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6608 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json deleted file mode 100644 index 90fd01bbe..000000000 --- a/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-10b/1762652579.533203", - "retrieved_timestamp": "1762652579.533203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-10b", - "developer": "Danielbrdz", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-10b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6607811717354397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6120828494270083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4360871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f5e140ff-0c0e-4769-8116-63cf50255773.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f5e140ff-0c0e-4769-8116-63cf50255773.json new file mode 100644 index 000000000..4dc6150c8 --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f5e140ff-0c0e-4769-8116-63cf50255773.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-14b-Phi-3-medium-ORPO", + "id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4-v2/df85ec6e-1325-40ce-8087-d960a1d767dd.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4-v2/df85ec6e-1325-40ce-8087-d960a1d767dd.json new file mode 100644 index 000000000..604f4b71a --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4-v2/df85ec6e-1325-40ce-8087-d960a1d767dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-14b-phi-4-v2", + "id": "Danielbrdz/Barcenas-14b-phi-4-v2", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6573 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4/a7bd3fff-f01e-46ca-af85-5b4ac6ae7320.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4/a7bd3fff-f01e-46ca-af85-5b4ac6ae7320.json new file mode 100644 index 000000000..e976af74e --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-14b-phi-4/a7bd3fff-f01e-46ca-af85-5b4ac6ae7320.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-14b-phi-4", + "id": "Danielbrdz/Barcenas-14b-phi-4", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6769 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2583 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/11842dd9-0572-41ef-aaa0-8d19f3420efc.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/11842dd9-0572-41ef-aaa0-8d19f3420efc.json new file mode 100644 index 000000000..85744ceae --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/11842dd9-0572-41ef-aaa0-8d19f3420efc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-3b-GRPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-3b-GRPO", + "id": "Danielbrdz/Barcenas-3b-GRPO", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5444 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3576 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json deleted file mode 100644 index 80452a47d..000000000 --- a/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-3b-GRPO/1762652579.534181", - "retrieved_timestamp": "1762652579.5341818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-3b-GRPO", - "developer": "Danielbrdz", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-3b-GRPO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444276741268723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44143515175110304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-Llama3-8b-ORPO/01abccec-1cea-4060-89be-289987d0a2ce.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-Llama3-8b-ORPO/01abccec-1cea-4060-89be-289987d0a2ce.json new file mode 100644 index 000000000..a208f17df --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-Llama3-8b-ORPO/01abccec-1cea-4060-89be-289987d0a2ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-Llama3-8b-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-Llama3-8b-ORPO", + "id": "Danielbrdz/Barcenas-Llama3-8b-ORPO", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7372 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.419 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-R1-Qwen-1.5b/dce8226c-57bd-4255-b813-8a70494f0a1a.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-R1-Qwen-1.5b/dce8226c-57bd-4255-b813-8a70494f0a1a.json new file mode 100644 index 000000000..03948309d --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-R1-Qwen-1.5b/dce8226c-57bd-4255-b813-8a70494f0a1a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-R1-Qwen-1.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barcenas-R1-Qwen-1.5b", + "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2428 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1909 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/7f80e69c-eec6-49ac-a088-6248ee25f736.json b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/7f80e69c-eec6-49ac-a088-6248ee25f736.json new file mode 100644 index 000000000..f352222ea --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/7f80e69c-eec6-49ac-a088-6248ee25f736.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "12b-mn-dans-reasoning-test-2", + "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3711 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4807 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json deleted file mode 100644 index 717cffeac..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/1762652579.534956", - "retrieved_timestamp": "1762652579.5349572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3710953603106424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48070333147041405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2507480053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json deleted file mode 100644 index ae74b914c..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/1762652579.535208", - "retrieved_timestamp": "1762652579.535209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052593784491815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48388753289945696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4167604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2515791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/e0267a2c-dfc5-456e-864d-b5b0ad1fa508.json b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/e0267a2c-dfc5-456e-864d-b5b0ad1fa508.json new file mode 100644 index 000000000..570947dbd --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/e0267a2c-dfc5-456e-864d-b5b0ad1fa508.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "12b-mn-dans-reasoning-test-3", + "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5053 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4839 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4168 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2516 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json deleted file mode 100644 index ddb9080ca..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/1762652579.535429", - "retrieved_timestamp": "1762652579.53543", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21110209798889168 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4791864789096407 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2805019946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/e6ad37be-28f4-43b4-9df1-b7b47d31232e.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/e6ad37be-28f4-43b4-9df1-b7b47d31232e.json new file mode 100644 index 000000000..fc2a36d87 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/e6ad37be-28f4-43b4-9df1-b7b47d31232e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-Instruct-CoreCurriculum-12b-ChatML", + "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2111 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4792 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3606 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2805 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/5514368a-1f7d-4cd0-b7f7-d116b753f975.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/5514368a-1f7d-4cd0-b7f7-d116b753f975.json new file mode 100644 index 000000000..83903ba4f --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/5514368a-1f7d-4cd0-b7f7-d116b753f975.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-Instruct-Mix-8b-ChatML-V0.1.0", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0668 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4775 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3786 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3284 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json deleted file mode 100644 index 08a1ef48f..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/1762652579.5358772", - "retrieved_timestamp": "1762652579.535878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06682048076880455 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47747656219777285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3785833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328374335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json deleted file mode 100644 index 9fb501141..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/1762652579.5360918", - "retrieved_timestamp": "1762652579.5360918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09105063453857985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4748653313732898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3824895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.327875664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/c0e29cf8-897f-4e07-abb4-71c801d34301.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/c0e29cf8-897f-4e07-abb4-71c801d34301.json new file mode 100644 index 000000000..4a453f833 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/c0e29cf8-897f-4e07-abb4-71c801d34301.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-Instruct-Mix-8b-ChatML-V0.1.1", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0911 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4749 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/68310379-65b2-482d-892b-f76547bce2b0.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/68310379-65b2-482d-892b-f76547bce2b0.json new file mode 100644 index 000000000..a2a7418c6 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/68310379-65b2-482d-892b-f76547bce2b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-Instruct-Mix-8b-ChatML-V0.2.0", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json deleted file mode 100644 index 1c42ecd18..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/1762652579.536302", - "retrieved_timestamp": "1762652579.536303", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064085515321569 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4624263551503409 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json deleted file mode 100644 index 5b5606802..000000000 --- a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/1762652579.5356538", - "retrieved_timestamp": "1762652579.535655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08250774611364513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4738171816307924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32878989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/a034c4ec-d4cd-439b-8dbd-e67685ea7616.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/a034c4ec-d4cd-439b-8dbd-e67685ea7616.json new file mode 100644 index 000000000..d03dd14df --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/a034c4ec-d4cd-439b-8dbd-e67685ea7616.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-Instruct-Mix-8b-ChatML", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4738 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/e4b761d3-bb84-4433-b9fb-4c92ecae6279.json b/data/hfopenllm_v2/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/e4b761d3-bb84-4433-b9fb-4c92ecae6279.json new file mode 100644 index 000000000..17f92f2fb --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/e4b761d3-bb84-4433-b9fb-4c92ecae6279.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7b-v0.3-Test-E0.7", + "id": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2744 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/mistral-7b-test-merged/38d78d30-be6d-476c-a3aa-d9a40f570a56.json b/data/hfopenllm_v2/Dans-DiscountModels/mistral-7b-test-merged/38d78d30-be6d-476c-a3aa-d9a40f570a56.json new file mode 100644 index 000000000..530b81d04 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/mistral-7b-test-merged/38d78d30-be6d-476c-a3aa-d9a40f570a56.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_mistral-7b-test-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-7b-test-merged", + "id": "Dans-DiscountModels/mistral-7b-test-merged", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4898 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/36e60f6c-60f7-4b17-88fe-82810e195fc7.json b/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/36e60f6c-60f7-4b17-88fe-82810e195fc7.json new file mode 100644 index 000000000..62f99b5b5 --- /dev/null +++ b/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/36e60f6c-60f7-4b17-88fe-82810e195fc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Darkknight535_OpenCrystal-12B-L3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenCrystal-12B-L3", + "id": "Darkknight535/OpenCrystal-12B-L3", + "developer": "Darkknight535", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.52 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5223 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3657 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json b/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json deleted file mode 100644 index 0c6d391c4..000000000 --- a/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Darkknight535_OpenCrystal-12B-L3/1762652579.5369642", - "retrieved_timestamp": "1762652579.5369651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Darkknight535/OpenCrystal-12B-L3", - "developer": "Darkknight535", - "inference_platform": "unknown", - "id": "Darkknight535/OpenCrystal-12B-L3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.52 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070909630890482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222598504945516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640292553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/a6c647e8-ed24-4150-8563-dd9b20e21498.json b/data/hfopenllm_v2/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/a6c647e8-ed24-4150-8563-dd9b20e21498.json new file mode 100644 index 000000000..61ffee2d2 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/a6c647e8-ed24-4150-8563-dd9b20e21498.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", + "id": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4762 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/b5a366ac-d736-4447-a2f1-98d0b84ba3bd.json b/data/hfopenllm_v2/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/b5a366ac-d736-4447-a2f1-98d0b84ba3bd.json new file mode 100644 index 000000000..408f2c43a --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/b5a366ac-d736-4447-a2f1-98d0b84ba3bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", + "id": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4887 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/5d098dc6-8124-4d26-86ec-d54e6e09c3a6.json b/data/hfopenllm_v2/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/5d098dc6-8124-4d26-86ec-d54e6e09c3a6.json new file mode 100644 index 000000000..ba72ae399 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/5d098dc6-8124-4d26-86ec-d54e6e09c3a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", + "id": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.664 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4164 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1137cbc4-d80b-4e21-bfeb-feab41dc80b2.json b/data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1137cbc4-d80b-4e21-bfeb-feab41dc80b2.json new file mode 100644 index 000000000..55cc9cff5 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1137cbc4-d80b-4e21-bfeb-feab41dc80b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", + "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3024 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/097bbfbc-0ccd-4fd4-9e0c-9c192cba9e8b.json b/data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/097bbfbc-0ccd-4fd4-9e0c-9c192cba9e8b.json new file mode 100644 index 000000000..e4618c8df --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/097bbfbc-0ccd-4fd4-9e0c-9c192cba9e8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", + "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4769 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/db8c6169-bfc1-48bb-be53-fa93c673f051.json b/data/hfopenllm_v2/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/db8c6169-bfc1-48bb-be53-fa93c673f051.json new file mode 100644 index 000000000..a94c29f09 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/db8c6169-bfc1-48bb-be53-fa93c673f051.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", + "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 25.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5807 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/41437fc9-6d48-4317-a8de-ab4e63b2cf46.json b/data/hfopenllm_v2/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/41437fc9-6d48-4317-a8de-ab4e63b2cf46.json new file mode 100644 index 000000000..0f3796fae --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/41437fc9-6d48-4317-a8de-ab4e63b2cf46.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", + "id": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2853 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2778 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/e075f4fe-95e0-48f4-94c4-f6ebd3f4edaa.json b/data/hfopenllm_v2/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/e075f4fe-95e0-48f4-94c4-f6ebd3f4edaa.json new file mode 100644 index 000000000..774de1d5e --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/e075f4fe-95e0-48f4-94c4-f6ebd3f4edaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", + "id": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 18.405 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.272 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-9B/3349d66c-e12b-49c1-a406-e0e77b697458.json b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-9B/3349d66c-e12b-49c1-a406-e0e77b697458.json new file mode 100644 index 000000000..1978b8349 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-9B/3349d66c-e12b-49c1-a406-e0e77b697458.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-The-Writer-9B", + "id": "DavidAU/Gemma-The-Writer-9B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5905 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-DEADLINE-10B/7aa0ff6b-11a9-4554-a27f-e477a0ff77c7.json b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-DEADLINE-10B/7aa0ff6b-11a9-4554-a27f-e477a0ff77c7.json new file mode 100644 index 000000000..5af39ed94 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-DEADLINE-10B/7aa0ff6b-11a9-4554-a27f-e477a0ff77c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-DEADLINE-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-The-Writer-DEADLINE-10B", + "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.952 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5896 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4189 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/ac749485-df6d-485e-8fa7-63bdfd744167.json b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/ac749485-df6d-485e-8fa7-63bdfd744167.json new file mode 100644 index 000000000..f0e5db406 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/ac749485-df6d-485e-8fa7-63bdfd744167.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-The-Writer-J.GutenBerg-10B", + "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.034 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2858 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4176 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/54363a4b-312b-4035-a1c3-b5321311cec4.json b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/54363a4b-312b-4035-a1c3-b5321311cec4.json new file mode 100644 index 000000000..fdb1da8cf --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/54363a4b-312b-4035-a1c3-b5321311cec4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-The-Writer-Mighty-Sword-9B", + "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1911 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/aa9e2b9e-cd25-4492-9801-eba7d40b4365.json b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/aa9e2b9e-cd25-4492-9801-eba7d40b4365.json new file mode 100644 index 000000000..949b96b5e --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/aa9e2b9e-cd25-4492-9801-eba7d40b4365.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", + "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.034 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7071 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5922 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2296 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json b/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json deleted file mode 100644 index 854871616..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-DARKEST-PLANET-16.5B/1762652579.540939", - "retrieved_timestamp": "1762652579.54094", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-DARKEST-PLANET-16.5B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-DARKEST-PLANET-16.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6230623634179533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230436906708896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363031914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/c6b484b8-f6f3-4516-aff5-c2f6438c9047.json b/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/c6b484b8-f6f3-4516-aff5-c2f6438c9047.json new file mode 100644 index 000000000..adcdba506 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/c6b484b8-f6f3-4516-aff5-c2f6438c9047.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-DARKEST-PLANET-16.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-DARKEST-PLANET-16.5B", + "id": "DavidAU/L3-DARKEST-PLANET-16.5B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6231 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/c6c760c9-a345-4e25-b333-b403bf6db389.json b/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/c6c760c9-a345-4e25-b333-b403bf6db389.json new file mode 100644 index 000000000..2857bf738 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/c6c760c9-a345-4e25-b333-b403bf6db389.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Dark-Planet-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Dark-Planet-8B", + "id": "DavidAU/L3-Dark-Planet-8B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4134 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json b/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json deleted file mode 100644 index ee2591587..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Dark-Planet-8B/1762652579.5412621", - "retrieved_timestamp": "1762652579.541263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-Dark-Planet-8B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Dark-Planet-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4134108609600305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084081453197787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36159375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/65b2aa58-2c04-48f2-9ea3-c8fd97cb9dde.json b/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/65b2aa58-2c04-48f2-9ea3-c8fd97cb9dde.json new file mode 100644 index 000000000..d2d3fcadd --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/65b2aa58-2c04-48f2-9ea3-c8fd97cb9dde.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Jamet-12.2B-MK.V-Blackroot-Instruct", + "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3291 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json b/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json deleted file mode 100644 index 11587b0a9..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/1762652579.541475", - "retrieved_timestamp": "1762652579.541475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961998608137519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4765717717789398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/92903344-0dde-4f5a-a7d2-749a1ffe9cd3.json b/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/92903344-0dde-4f5a-a7d2-749a1ffe9cd3.json new file mode 100644 index 000000000..0b5fe1f21 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/92903344-0dde-4f5a-a7d2-749a1ffe9cd3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Lumimaid-12.2B-v0.1-OAS-Instruct", + "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4693 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json b/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json deleted file mode 100644 index 048adf326..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/1762652579.541698", - "retrieved_timestamp": "1762652579.5416992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3924032677739509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46930207579694677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31416223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/59ddd478-c1cd-4bd8-80c3-fdebe762414a.json b/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/59ddd478-c1cd-4bd8-80c3-fdebe762414a.json new file mode 100644 index 000000000..39a911ded --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/59ddd478-c1cd-4bd8-80c3-fdebe762414a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-SMB-Instruct-12.2B-F32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-SMB-Instruct-12.2B-F32", + "id": "DavidAU/L3-SMB-Instruct-12.2B-F32", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4786 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3312 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json b/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json deleted file mode 100644 index f595ce849..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-SMB-Instruct-12.2B-F32/1762652579.541919", - "retrieved_timestamp": "1762652579.54192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-SMB-Instruct-12.2B-F32", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-SMB-Instruct-12.2B-F32", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303215468290802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4786412360346213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40872916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3312001329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/02f63fc6-9376-4fb5-b067-63493238cc27.json b/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/02f63fc6-9376-4fb5-b067-63493238cc27.json new file mode 100644 index 000000000..abc99f61c --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/02f63fc6-9376-4fb5-b067-63493238cc27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", + "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3439 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json b/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json deleted file mode 100644 index 73ebbc10d..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/1762652579.542142", - "retrieved_timestamp": "1762652579.5421429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34389309254998957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736328900737677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40311458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json b/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json deleted file mode 100644 index 7041e5b7c..000000000 --- a/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/1762652579.542359", - "retrieved_timestamp": "1762652579.54236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027945850343755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4845980190500647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41025 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345246010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/dd7597fd-27f5-4e77-a44f-b01d0db82719.json b/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/dd7597fd-27f5-4e77-a44f-b01d0db82719.json new file mode 100644 index 000000000..e2f7047b8 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/dd7597fd-27f5-4e77-a44f-b01d0db82719.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Stheno-v3.2-12.2B-Instruct", + "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4028 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4846 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json b/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json deleted file mode 100644 index f89c604dd..000000000 --- a/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/1762652579.542578", - "retrieved_timestamp": "1762652579.542578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7042702252246262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260910165037093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670212765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/20cd0d60-eb0d-41bd-b37f-910a03dd7f82.json b/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/20cd0d60-eb0d-41bd-b37f-910a03dd7f82.json new file mode 100644 index 000000000..c256c420f --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/20cd0d60-eb0d-41bd-b37f-910a03dd7f82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Dark-Planet-SpinFire-Uncensored-8B", + "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5261 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json b/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json deleted file mode 100644 index 91e233037..000000000 --- a/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/1762652579.542795", - "retrieved_timestamp": "1762652579.5427961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345257250761313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4420822344441435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37486458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892287234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/c4e9d045-3769-4828-a2ca-7fa508873089.json b/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/c4e9d045-3769-4828-a2ca-7fa508873089.json new file mode 100644 index 000000000..0420b014e --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/c4e9d045-3769-4828-a2ca-7fa508873089.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", + "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2606 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3749 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2892 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/0a0501ec-4ecd-47c1-914b-d473f795cef2.json b/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/0a0501ec-4ecd-47c1-914b-d473f795cef2.json new file mode 100644 index 000000000..0a0b5cf10 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/0a0501ec-4ecd-47c1-914b-d473f795cef2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", + "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 4.089 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1783 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3715 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/beca755f-203f-4bc8-b5cf-f9a9e3f8bd8f.json b/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/beca755f-203f-4bc8-b5cf-f9a9e3f8bd8f.json new file mode 100644 index 000000000..3d03444da --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/beca755f-203f-4bc8-b5cf-f9a9e3f8bd8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", + "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 19.022 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3592 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2417 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1636 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/79e1e1c6-cbe0-43a9-a593-8e2119baaf77.json b/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/79e1e1c6-cbe0-43a9-a593-8e2119baaf77.json new file mode 100644 index 000000000..60721bfc8 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/79e1e1c6-cbe0-43a9-a593-8e2119baaf77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", + "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", + "developer": "DavidAU", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 8.714 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3286 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3404 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json b/data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json deleted file mode 100644 index 40c040e7a..000000000 --- a/data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Davidsv_SUONG-1/1762652579.5439382", - "retrieved_timestamp": "1762652579.54394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Davidsv/SUONG-1", - "developer": "Davidsv", - "inference_platform": "unknown", - "id": "Davidsv/SUONG-1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 2.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2497207409673001 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28171339082318814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1085438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Davidsv/SUONG-1/def80b44-3d9a-46ba-bf5f-ffc81e50af2e.json b/data/hfopenllm_v2/Davidsv/SUONG-1/def80b44-3d9a-46ba-bf5f-ffc81e50af2e.json new file mode 100644 index 000000000..392a9563b --- /dev/null +++ b/data/hfopenllm_v2/Davidsv/SUONG-1/def80b44-3d9a-46ba-bf5f-ffc81e50af2e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Davidsv_SUONG-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SUONG-1", + "id": "Davidsv/SUONG-1", + "developer": "Davidsv", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 2.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1085 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/5e1aa809-ef20-445e-a05b-eccd585d5991.json b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/5e1aa809-ef20-445e-a05b-eccd585d5991.json new file mode 100644 index 000000000..a70497e8f --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/5e1aa809-ef20-445e-a05b-eccd585d5991.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-SPIN-iter0", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter0", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/7c2be651-ca56-4285-afc7-1bfe1c8ce11e.json b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/7c2be651-ca56-4285-afc7-1bfe1c8ce11e.json new file mode 100644 index 000000000..ff2f7e65f --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter0/7c2be651-ca56-4285-afc7-1bfe1c8ce11e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-SPIN-iter0", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter0", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter1/cfe4ea72-ddb9-49b5-9599-99f215e112e5.json b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter1/cfe4ea72-ddb9-49b5-9599-99f215e112e5.json new file mode 100644 index 000000000..6cf1f5440 --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter1/cfe4ea72-ddb9-49b5-9599-99f215e112e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-SPIN-iter1", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter1", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.294 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3646 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter2/81d63d8e-88dd-4b16-b9b8-d07604878f8f.json b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter2/81d63d8e-88dd-4b16-b9b8-d07604878f8f.json new file mode 100644 index 000000000..b5a6f68d3 --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter2/81d63d8e-88dd-4b16-b9b8-d07604878f8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-SPIN-iter2", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter2", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1376 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3553 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/81f8208b-f7e7-4685-bb84-321d9e097470.json b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/81f8208b-f7e7-4685-bb84-321d9e097470.json new file mode 100644 index 000000000..fe2d84160 --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/81f8208b-f7e7-4685-bb84-321d9e097470.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-SPIN-iter3", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter3", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2972 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/a0c9a434-9b8c-47c5-b511-9daac7901686.json b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/a0c9a434-9b8c-47c5-b511-9daac7901686.json new file mode 100644 index 000000000..e22bf8212 --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Llama-3.2-1B-SPIN-iter3/a0c9a434-9b8c-47c5-b511-9daac7901686.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-SPIN-iter3", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter3", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2975 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.35 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json b/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json deleted file mode 100644 index a8c3c184d..000000000 --- a/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Lllma-3.2-1B/1762652579.5458188", - "retrieved_timestamp": "1762652579.54582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Lllma-3.2-1B", - "developer": "DavieLion", - "inference_platform": "unknown", - "id": "DavieLion/Lllma-3.2-1B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1601439735457475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2964692268500723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35781250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/28b60eae-1b38-4404-8db1-3fb2997583f4.json b/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/28b60eae-1b38-4404-8db1-3fb2997583f4.json new file mode 100644 index 000000000..9df57bb74 --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/28b60eae-1b38-4404-8db1-3fb2997583f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DavieLion_Lllma-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lllma-3.2-1B", + "id": "DavieLion/Lllma-3.2-1B", + "developer": "DavieLion", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2965 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/746862a2-a90c-4612-91d0-f989b9eed1a5.json b/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/746862a2-a90c-4612-91d0-f989b9eed1a5.json new file mode 100644 index 000000000..64711247e --- /dev/null +++ b/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/746862a2-a90c-4612-91d0-f989b9eed1a5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Argunaut-1-8B-SFT", + "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", + "developer": "DebateLabKIT", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5519 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json b/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json deleted file mode 100644 index f371adbac..000000000 --- a/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/1762652579.546083", - "retrieved_timestamp": "1762652579.5460842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", - "developer": "DebateLabKIT", - "inference_platform": "unknown", - "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551921124837653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48238301936695316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472406914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json b/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json deleted file mode 100644 index 8805ad5d9..000000000 --- a/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B-instruct/1762652579.546672", - "retrieved_timestamp": "1762652579.546672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Deci/DeciLM-7B-instruct", - "developer": "Deci", - "inference_platform": "unknown", - "id": "Deci/DeciLM-7B-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "DeciLMForCausalLM", - "params_billions": 7.044 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4880239985460799 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4589748654047652 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38841666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26080452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/715ee057-9c9a-4e04-991c-7040b1eef65b.json b/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/715ee057-9c9a-4e04-991c-7040b1eef65b.json new file mode 100644 index 000000000..19a0bd337 --- /dev/null +++ b/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/715ee057-9c9a-4e04-991c-7040b1eef65b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeciLM-7B-instruct", + "id": "Deci/DeciLM-7B-instruct", + "developer": "Deci", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "DeciLMForCausalLM", + "params_billions": 7.044 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.459 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2608 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Deci/DeciLM-7B/4dc1d103-3458-4b8c-9e63-b98effd69667.json b/data/hfopenllm_v2/Deci/DeciLM-7B/4dc1d103-3458-4b8c-9e63-b98effd69667.json new file mode 100644 index 000000000..7a095aa90 --- /dev/null +++ b/data/hfopenllm_v2/Deci/DeciLM-7B/4dc1d103-3458-4b8c-9e63-b98effd69667.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeciLM-7B", + "id": "Deci/DeciLM-7B", + "developer": "Deci", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "DeciLMForCausalLM", + "params_billions": 7.044 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4359 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2692 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json b/data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json deleted file mode 100644 index d83521422..000000000 --- a/data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B/1762652579.5463831", - "retrieved_timestamp": "1762652579.5463839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Deci/DeciLM-7B", - "developer": "Deci", - "inference_platform": "unknown", - "id": "Deci/DeciLM-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "DeciLMForCausalLM", - "params_billions": 7.044 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28129474239462404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44228566674266495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43585416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26919880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.1-8B-Inst/070ff2a5-9a5d-48cf-8517-1ad9b6642d59.json b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.1-8B-Inst/070ff2a5-9a5d-48cf-8517-1ad9b6642d59.json new file mode 100644 index 000000000..693d2501c --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.1-8B-Inst/070ff2a5-9a5d-48cf-8517-1ad9b6642d59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.1-8B-Inst/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Explore_Llama-3.1-8B-Inst", + "id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2009 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst/8406a5b8-a87d-489b-b75b-00e9f675f09f.json b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst/8406a5b8-a87d-489b-b75b-00e9f675f09f.json new file mode 100644 index 000000000..2d0dd9463 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst/8406a5b8-a87d-489b-b75b-00e9f675f09f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Explore_Llama-3.2-1B-Inst", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/11e8f9b6-32ab-4b83-a601-e5644c0b2c39.json b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/11e8f9b6-32ab-4b83-a601-e5644c0b2c39.json new file mode 100644 index 000000000..6773c3179 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/11e8f9b6-32ab-4b83-a601-e5644c0b2c39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Explore_Llama-3.2-1B-Inst_v0", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3365 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/6b542f5a-ea62-45ce-8e98-436a4d058877.json b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/6b542f5a-ea62-45ce-8e98-436a4d058877.json new file mode 100644 index 000000000..a3c9bf2aa --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/6b542f5a-ea62-45ce-8e98-436a4d058877.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Explore_Llama-3.2-1B-Inst_v1.1", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5844 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3117 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1818 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/9b280640-bfee-4730-acc3-386a54b2434c.json b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/9b280640-bfee-4730-acc3-386a54b2434c.json new file mode 100644 index 000000000..0e0834213 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/9b280640-bfee-4730-acc3-386a54b2434c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Explore_Llama-3.2-1B-Inst_v1", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4999 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/causal_gpt2/eff5171b-6119-4013-8aa8-8a4f0215b045.json b/data/hfopenllm_v2/DeepAutoAI/causal_gpt2/eff5171b-6119-4013-8aa8-8a4f0215b045.json new file mode 100644 index 000000000..f4355b69c --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/causal_gpt2/eff5171b-6119-4013-8aa8-8a4f0215b045.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_causal_gpt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "causal_gpt2", + "id": "DeepAutoAI/causal_gpt2", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.427 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/471c5fed-f155-4521-9d9c-b5370ca91bec.json b/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/471c5fed-f155-4521-9d9c-b5370ca91bec.json new file mode 100644 index 000000000..db64d4ff1 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/471c5fed-f155-4521-9d9c-b5370ca91bec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "d2nwg_Llama-3.1-8B-Instruct-v0.0", + "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3877 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json b/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json deleted file mode 100644 index 5791f38b0..000000000 --- a/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/1762652579.548984", - "retrieved_timestamp": "1762652579.548985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", - "developer": "DeepAutoAI", - "inference_platform": "unknown", - "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7892746800711002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080411642065981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2/690be099-3ace-484f-b01f-2fe6b324d12a.json b/data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2/690be099-3ace-484f-b01f-2fe6b324d12a.json new file mode 100644 index 000000000..e1f3baeef --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2/690be099-3ace-484f-b01f-2fe6b324d12a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "d2nwg_causal_gpt2", + "id": "DeepAutoAI/d2nwg_causal_gpt2", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1916 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3027 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4297 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2_v1/71fbd15f-5eec-40d9-84e8-07323f3ffac6.json b/data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2_v1/71fbd15f-5eec-40d9-84e8-07323f3ffac6.json new file mode 100644 index 000000000..97cbbb48b --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/d2nwg_causal_gpt2_v1/71fbd15f-5eec-40d9-84e8-07323f3ffac6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "d2nwg_causal_gpt2_v1", + "id": "DeepAutoAI/d2nwg_causal_gpt2_v1", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1989 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2992 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1135 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/eb93dd3e-3d13-4234-bb66-f6177648aa2b.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/eb93dd3e-3d13-4234-bb66-f6177648aa2b.json new file mode 100644 index 000000000..02684506f --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/eb93dd3e-3d13-4234-bb66-f6177648aa2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ldm_soup_Llama-3.1-8B-Inst", + "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3886 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/f7ec1ed7-cc30-4879-8ab1-4909011553d5.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/f7ec1ed7-cc30-4879-8ab1-4909011553d5.json new file mode 100644 index 000000000..78c4abb58 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/f7ec1ed7-cc30-4879-8ab1-4909011553d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ldm_soup_Llama-3.1-8B-Instruct-v0.0", + "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5125 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4121 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3895 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json deleted file mode 100644 index b40a4a296..000000000 --- a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/1762652579.5500422", - "retrieved_timestamp": "1762652579.5500429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", - "developer": "DeepAutoAI", - "inference_platform": "unknown", - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5125175335277464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38954454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/3e100704-dbd3-4d05-b325-5bb4bc90e51c.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/3e100704-dbd3-4d05-b325-5bb4bc90e51c.json new file mode 100644 index 000000000..dbebcb08a --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/3e100704-dbd3-4d05-b325-5bb4bc90e51c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ldm_soup_Llama-3.1-8B-Instruct-v0.1", + "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5125 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4121 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3895 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json deleted file mode 100644 index d74fbe18b..000000000 --- a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/1762652579.550273", - "retrieved_timestamp": "1762652579.5502741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", - "developer": "DeepAutoAI", - "inference_platform": "unknown", - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5125175335277464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38954454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/12f003ef-1098-4d3f-aed7-7343034157bc.json b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/12f003ef-1098-4d3f-aed7-7343034157bc.json new file mode 100644 index 000000000..d3e936e6c --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/12f003ef-1098-4d3f-aed7-7343034157bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lexora-Lite-3B", + "id": "DeepMount00/Lexora-Lite-3B", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5776 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4873 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2304 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3602 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json deleted file mode 100644 index 4c4deb104..000000000 --- a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B/1762652579.550504", - "retrieved_timestamp": "1762652579.550505", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Lexora-Lite-3B", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Lexora-Lite-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5775996577968678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4873392373334518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39660416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3602061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/9de2e564-3a30-4f1c-80da-6432a245a64f.json b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/9de2e564-3a30-4f1c-80da-6432a245a64f.json new file mode 100644 index 000000000..6c7c5b9aa --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/9de2e564-3a30-4f1c-80da-6432a245a64f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lexora-Lite-3B_v2", + "id": "DeepMount00/Lexora-Lite-3B_v2", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4943 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3822 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3544 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json deleted file mode 100644 index 2e21df44d..000000000 --- a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B_v2/1762652579.550789", - "retrieved_timestamp": "1762652579.550789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Lexora-Lite-3B_v2", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Lexora-Lite-3B_v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49431840848947456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48117654754683153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35438829787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json b/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json deleted file mode 100644 index df672c1ee..000000000 --- a/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Medium-7B/1762652579.551008", - "retrieved_timestamp": "1762652579.551009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Lexora-Medium-7B", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Lexora-Medium-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103379034295669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5144844494250328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44394791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43251329787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/dd5aaa3f-b24b-4a5b-852b-b80f4a6bf366.json b/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/dd5aaa3f-b24b-4a5b-852b-b80f4a6bf366.json new file mode 100644 index 000000000..65cb6dbf1 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/dd5aaa3f-b24b-4a5b-852b-b80f4a6bf366.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Medium-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lexora-Medium-7B", + "id": "DeepMount00/Lexora-Medium-7B", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5145 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2221 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4325 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Llama-3-8b-Ita/8d8b9fd2-43f6-4edc-8340-44d20824a7e7.json b/data/hfopenllm_v2/DeepMount00/Llama-3-8b-Ita/8d8b9fd2-43f6-4edc-8340-44d20824a7e7.json new file mode 100644 index 000000000..c5cf4ed7b --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Llama-3-8b-Ita/8d8b9fd2-43f6-4edc-8340-44d20824a7e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3-8b-Ita/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8b-Ita", + "id": "DeepMount00/Llama-3-8b-Ita", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.753 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/7fe45c20-a2c0-4acf-9425-651a1ec3b0d0.json b/data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/7fe45c20-a2c0-4acf-9425-651a1ec3b0d0.json new file mode 100644 index 000000000..3e1809697 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/7fe45c20-a2c0-4acf-9425-651a1ec3b0d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-Ita/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8b-Ita", + "id": "DeepMount00/Llama-3.1-8b-Ita", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5365 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/baf93ef6-56f3-4809-93f6-32dcf4730388.json b/data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/baf93ef6-56f3-4809-93f6-32dcf4730388.json new file mode 100644 index 000000000..3d7b72221 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Llama-3.1-8b-ITA/baf93ef6-56f3-4809-93f6-32dcf4730388.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-ITA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8b-ITA", + "id": "DeepMount00/Llama-3.1-8b-ITA", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7917 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Llama-3.1-Distilled/f6df14bd-207c-4fea-b789-c9f9aef749b3.json b/data/hfopenllm_v2/DeepMount00/Llama-3.1-Distilled/f6df14bd-207c-4fea-b789-c9f9aef749b3.json new file mode 100644 index 000000000..5d76d499b --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Llama-3.1-Distilled/f6df14bd-207c-4fea-b789-c9f9aef749b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-Distilled/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Distilled", + "id": "DeepMount00/Llama-3.1-Distilled", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7844 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4058 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita/97766a7f-cf5b-46ae-b51e-5c5702ae000b.json b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita/97766a7f-cf5b-46ae-b51e-5c5702ae000b.json new file mode 100644 index 000000000..dfa22cee3 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita/97766a7f-cf5b-46ae-b51e-5c5702ae000b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B-Ita", + "id": "DeepMount00/Qwen2-1.5B-Ita", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3504 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2772 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v2/d5cd2a1b-3def-4b33-a8fe-4b02e090db27.json b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v2/d5cd2a1b-3def-4b33-a8fe-4b02e090db27.json new file mode 100644 index 000000000..28635f94d --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v2/d5cd2a1b-3def-4b33-a8fe-4b02e090db27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B-Ita_v2", + "id": "DeepMount00/Qwen2-1.5B-Ita_v2", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3954 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3032 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v3/275d4bf0-566c-4b50-86b9-38c7f45df143.json b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v3/275d4bf0-566c-4b50-86b9-38c7f45df143.json new file mode 100644 index 000000000..4d8e80e4f --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v3/275d4bf0-566c-4b50-86b9-38c7f45df143.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B-Ita_v3", + "id": "DeepMount00/Qwen2-1.5B-Ita_v3", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.489 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3948 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3018 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v5/aa504db9-81f3-424f-b7d9-683ebe31f5d8.json b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v5/aa504db9-81f3-424f-b7d9-683ebe31f5d8.json new file mode 100644 index 000000000..d9344d807 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v5/aa504db9-81f3-424f-b7d9-683ebe31f5d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B-Ita_v5", + "id": "DeepMount00/Qwen2-1.5B-Ita_v5", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2943 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v6/2cc209b7-ef10-435d-a840-b904ab741491.json b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v6/2cc209b7-ef10-435d-a840-b904ab741491.json new file mode 100644 index 000000000..a299046f1 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2-1.5B-Ita_v6/2cc209b7-ef10-435d-a840-b904ab741491.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B-Ita_v6", + "id": "DeepMount00/Qwen2-1.5B-Ita_v6", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.497 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2999 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4249 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3755 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/9b9390ac-fd65-4a58-9834-5352aa340cdc.json b/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/9b9390ac-fd65-4a58-9834-5352aa340cdc.json new file mode 100644 index 000000000..b0e72b4e2 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/9b9390ac-fd65-4a58-9834-5352aa340cdc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-MathCoder", + "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3806 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json b/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json deleted file mode 100644 index 1df519897..000000000 --- a/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/1762652579.55323", - "retrieved_timestamp": "1762652579.553231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15302508455342934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2998444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json b/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json deleted file mode 100644 index 50ccd99ad..000000000 --- a/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_mergekit-ties-okvgjfz/1762652579.5535848", - "retrieved_timestamp": "1762652579.553586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/mergekit-ties-okvgjfz", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/mergekit-ties-okvgjfz", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15302508455342934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2998444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/4efe5cd4-6b8a-4951-a63a-4c7dc390bbec.json b/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/4efe5cd4-6b8a-4951-a63a-4c7dc390bbec.json new file mode 100644 index 000000000..9daeaca32 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/4efe5cd4-6b8a-4951-a63a-4c7dc390bbec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DeepMount00_mergekit-ties-okvgjfz/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-ties-okvgjfz", + "id": "DeepMount00/mergekit-ties-okvgjfz", + "developer": "DeepMount00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3806 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Baldur-8B/4bc5a0db-1c88-4c61-9343-1d340305ecc5.json b/data/hfopenllm_v2/Delta-Vector/Baldur-8B/4bc5a0db-1c88-4c61-9343-1d340305ecc5.json new file mode 100644 index 000000000..4eaef8e92 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Baldur-8B/4bc5a0db-1c88-4c61-9343-1d340305ecc5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Baldur-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Baldur-8B", + "id": "Delta-Vector/Baldur-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4782 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3654 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json b/data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json deleted file mode 100644 index e19464978..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Baldur-8B/1762652579.5538838", - "retrieved_timestamp": "1762652579.553885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Baldur-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Baldur-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47818233398493776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5305842954529679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43715624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3654421542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json b/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json deleted file mode 100644 index e59ce6104..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B-V1.1/1762652579.5543838", - "retrieved_timestamp": "1762652579.554385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Control-8B-V1.1", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Control-8B-V1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5696562897556262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49928406748541837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42372916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37450132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/74527f51-dcec-4b82-8ba8-075c933404f5.json b/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/74527f51-dcec-4b82-8ba8-075c933404f5.json new file mode 100644 index 000000000..5f8e589b6 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/74527f51-dcec-4b82-8ba8-075c933404f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B-V1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Control-8B-V1.1", + "id": "Delta-Vector/Control-8B-V1.1", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4993 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3745 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json b/data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json deleted file mode 100644 index c1f89d015..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B/1762652579.554166", - "retrieved_timestamp": "1762652579.554166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Control-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Control-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489733906035985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5041458754993735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43554166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Delta-Vector/Control-8B/ac31bc90-3854-4d38-925d-ef8dc7e75d24.json b/data/hfopenllm_v2/Delta-Vector/Control-8B/ac31bc90-3854-4d38-925d-ef8dc7e75d24.json new file mode 100644 index 000000000..e9f8fccf8 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Control-8B/ac31bc90-3854-4d38-925d-ef8dc7e75d24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Control-8B", + "id": "Delta-Vector/Control-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3732 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Darkens-8B/88583cff-1adc-4b1b-8e68-07f0074d0ae2.json b/data/hfopenllm_v2/Delta-Vector/Darkens-8B/88583cff-1adc-4b1b-8e68-07f0074d0ae2.json new file mode 100644 index 000000000..da2477729 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Darkens-8B/88583cff-1adc-4b1b-8e68-07f0074d0ae2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Darkens-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Darkens-8B", + "id": "Delta-Vector/Darkens-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5251 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4106 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3736 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json b/data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json deleted file mode 100644 index f5395d084..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Darkens-8B/1762652579.5545971", - "retrieved_timestamp": "1762652579.5545971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Darkens-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Darkens-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25476624245889795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250590567372793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3735871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json b/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json deleted file mode 100644 index 439ab555c..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Henbane-7b-attempt2/1762652579.55481", - "retrieved_timestamp": "1762652579.55481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Henbane-7b-attempt2", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Henbane-7b-attempt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157335868828043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061177974093075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39734375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/fadbac9e-7224-41d1-abfa-7039cbcba9f6.json b/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/fadbac9e-7224-41d1-abfa-7039cbcba9f6.json new file mode 100644 index 000000000..18bdc7508 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/fadbac9e-7224-41d1-abfa-7039cbcba9f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Henbane-7b-attempt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Henbane-7b-attempt2", + "id": "Delta-Vector/Henbane-7b-attempt2", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4157 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5061 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4028 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Odin-9B/1fb90540-0fa0-44ca-ad67-1e3503f6b729.json b/data/hfopenllm_v2/Delta-Vector/Odin-9B/1fb90540-0fa0-44ca-ad67-1e3503f6b729.json new file mode 100644 index 000000000..e6b6d563d --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Odin-9B/1fb90540-0fa0-44ca-ad67-1e3503f6b729.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Odin-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Odin-9B", + "id": "Delta-Vector/Odin-9B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3692 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4648 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json b/data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json deleted file mode 100644 index 84fcf9dd3..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Odin-9B/1762652579.555037", - "retrieved_timestamp": "1762652579.555038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Odin-9B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Odin-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691970637907419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440253444823155 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46478125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4046708776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/Delta-Vector/Tor-8B/047784e2-c1ee-40d9-a60d-e43504825801.json b/data/hfopenllm_v2/Delta-Vector/Tor-8B/047784e2-c1ee-40d9-a60d-e43504825801.json new file mode 100644 index 000000000..226245e5e --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Tor-8B/047784e2-c1ee-40d9-a60d-e43504825801.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Tor-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tor-8B", + "id": "Delta-Vector/Tor-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json b/data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json deleted file mode 100644 index 42791ce57..000000000 --- a/data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Tor-8B/1762652579.555239", - "retrieved_timestamp": "1762652579.55524", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Delta-Vector/Tor-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Tor-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23815476269631244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209108776928992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40921874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37300531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/ee60453d-2d51-46f7-8a18-c651d590f0e7.json b/data/hfopenllm_v2/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/ee60453d-2d51-46f7-8a18-c651d590f0e7.json new file mode 100644 index 000000000..0c6a93423 --- /dev/null +++ b/data/hfopenllm_v2/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/ee60453d-2d51-46f7-8a18-c651d590f0e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DevQuasar-R1-Uncensored-Llama-8B", + "id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", + "developer": "DevQuasar", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3849 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5118 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b0ac4b11-f7b4-4753-baae-310a92f08259.json b/data/hfopenllm_v2/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b0ac4b11-f7b4-4753-baae-310a92f08259.json new file mode 100644 index 000000000..c68c27f32 --- /dev/null +++ b/data/hfopenllm_v2/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b0ac4b11-f7b4-4753-baae-310a92f08259.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B-GRPO", + "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", + "developer": "Dongwei", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3443 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1956 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2322 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/324db8b3-38c7-4a2c-82e8-7bebfa38e760.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/324db8b3-38c7-4a2c-82e8-7bebfa38e760.json new file mode 100644 index 000000000..735b5e369 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/324db8b3-38c7-4a2c-82e8-7bebfa38e760.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-R1-WolfCore-V1.5-test", + "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3955 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3841 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json deleted file mode 100644 index 0f91665f5..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/1762652579.556192", - "retrieved_timestamp": "1762652579.556193", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3955006050612375 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5314954163679548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3840729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37275598404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/54dd9033-61b9-4f26-9cde-e04c7136524b.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/54dd9033-61b9-4f26-9cde-e04c7136524b.json new file mode 100644 index 000000000..806bf87b7 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/54dd9033-61b9-4f26-9cde-e04c7136524b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-R1-WolfCore", + "id": "DoppelReflEx/L3-8B-R1-WolfCore", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3717 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json deleted file mode 100644 index 12b2bb198..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore/1762652579.555949", - "retrieved_timestamp": "1762652579.5559502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/L3-8B-R1-WolfCore", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/L3-8B-R1-WolfCore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775404814780339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531794652653343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716755319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json deleted file mode 100644 index b1bec9b9f..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-WolfCore/1762652579.556399", - "retrieved_timestamp": "1762652579.5564", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/L3-8B-WolfCore", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/L3-8B-WolfCore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4021950646506824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181980783946081 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39728125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/d0973d6c-373c-41cd-9e62-52470c044dac.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/d0973d6c-373c-41cd-9e62-52470c044dac.json new file mode 100644 index 000000000..1553f5b9c --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/d0973d6c-373c-41cd-9e62-52470c044dac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-WolfCore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-WolfCore", + "id": "DoppelReflEx/L3-8B-WolfCore", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3705 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/da15da67-b316-4c2e-86a5-c1f88eece9cb.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/da15da67-b316-4c2e-86a5-c1f88eece9cb.json new file mode 100644 index 000000000..49166bb43 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/da15da67-b316-4c2e-86a5-c1f88eece9cb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-FoxFrame-test", + "id": "DoppelReflEx/MN-12B-FoxFrame-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4222 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5456 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json deleted file mode 100644 index cc91284f6..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame-test/1762652579.556618", - "retrieved_timestamp": "1762652579.556619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-FoxFrame-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-FoxFrame-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220308780701876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456376527271466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/b0c34174-bfd0-4556-a3bf-92ec0ddf5ec4.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/b0c34174-bfd0-4556-a3bf-92ec0ddf5ec4.json new file mode 100644 index 000000000..b9e5d4976 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/b0c34174-bfd0-4556-a3bf-92ec0ddf5ec4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame2-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-FoxFrame2-test", + "id": "DoppelReflEx/MN-12B-FoxFrame2-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5485 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1405 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4252 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3569 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json deleted file mode 100644 index 5bce8409c..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame2-test/1762652579.556837", - "retrieved_timestamp": "1762652579.5568378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-FoxFrame2-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-FoxFrame2-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43189514931492884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5484795753806021 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4251875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json deleted file mode 100644 index f9d1bb765..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame3-test/1762652579.557049", - "retrieved_timestamp": "1762652579.5570502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-FoxFrame3-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-FoxFrame3-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43231957871780213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394764281718397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45976041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35289228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/bce7b15d-1670-46db-bdff-24fb38bc3fd9.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/bce7b15d-1670-46db-bdff-24fb38bc3fd9.json new file mode 100644 index 000000000..9a33bd885 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/bce7b15d-1670-46db-bdff-24fb38bc3fd9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame3-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-FoxFrame3-test", + "id": "DoppelReflEx/MN-12B-FoxFrame3-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4598 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/15e5e02f-27b9-4063-b601-42c2b17180f9.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/15e5e02f-27b9-4063-b601-42c2b17180f9.json new file mode 100644 index 000000000..14634831b --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/15e5e02f-27b9-4063-b601-42c2b17180f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Kakigori/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Kakigori", + "id": "DoppelReflEx/MN-12B-Kakigori", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json deleted file mode 100644 index 5dbffaa15..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Kakigori/1762652579.5572648", - "retrieved_timestamp": "1762652579.557266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Kakigori", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Kakigori", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359329911302012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415529337961275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40521875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3581283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/51b0c546-0dde-4668-a8b8-3b9753a31aa0.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/51b0c546-0dde-4668-a8b8-3b9753a31aa0.json new file mode 100644 index 000000000..91323e70b --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/51b0c546-0dde-4668-a8b8-3b9753a31aa0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-LilithFrame-Experiment-2", + "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json deleted file mode 100644 index 95574cd7b..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/1762652579.5578592", - "retrieved_timestamp": "1762652579.5578601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4299469851106176 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4982672766561394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32762632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json deleted file mode 100644 index 908836600..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/1762652579.558079", - "retrieved_timestamp": "1762652579.558079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4127858526487498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468080647121653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/45842b1c-cf68-44a7-928f-2da454cdd13f.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/45842b1c-cf68-44a7-928f-2da454cdd13f.json new file mode 100644 index 000000000..e8a0610db --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/45842b1c-cf68-44a7-928f-2da454cdd13f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-LilithFrame-Experiment-3", + "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4128 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/c15cdefd-dbe3-432e-aab0-3c43540cd320.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/c15cdefd-dbe3-432e-aab0-3c43540cd320.json new file mode 100644 index 000000000..61d3a73e2 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/c15cdefd-dbe3-432e-aab0-3c43540cd320.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-LilithFrame-Experiment-4", + "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5534 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json deleted file mode 100644 index 4ec203b1b..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/1762652579.5582879", - "retrieved_timestamp": "1762652579.5582888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3981480250180632 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534370722864824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648603723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/1f489afa-a01d-40f3-836a-9e386c502d1d.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/1f489afa-a01d-40f3-836a-9e386c502d1d.json new file mode 100644 index 000000000..87c8dd012 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/1f489afa-a01d-40f3-836a-9e386c502d1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-LilithFrame", + "id": "DoppelReflEx/MN-12B-LilithFrame", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.451 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4944 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json deleted file mode 100644 index f355cb9cf..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557674", - "retrieved_timestamp": "1762652579.5576751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43604192431636946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4956125598349656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32372007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/94bcc87e-eb06-4321-9b72-2f99168cf92a.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/94bcc87e-eb06-4321-9b72-2f99168cf92a.json new file mode 100644 index 000000000..8a84d871c --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/94bcc87e-eb06-4321-9b72-2f99168cf92a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-LilithFrame", + "id": "DoppelReflEx/MN-12B-LilithFrame", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4956 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3237 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json deleted file mode 100644 index 1a023bf0d..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557468", - "retrieved_timestamp": "1762652579.557469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509545782966972 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4944264226434414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3895625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3256316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json deleted file mode 100644 index 46ca3ca27..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-GreenSnake/1762652579.5585039", - "retrieved_timestamp": "1762652579.558505", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47800724300411795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480509710089697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/c0bc9811-4d7c-412f-a12b-3e6eab2e5a6f.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/c0bc9811-4d7c-412f-a12b-3e6eab2e5a6f.json new file mode 100644 index 000000000..c66fd0b26 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/c0bc9811-4d7c-412f-a12b-3e6eab2e5a6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-GreenSnake/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-GreenSnake", + "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4306 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json deleted file mode 100644 index 89d912757..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Nocturne/1762652579.558723", - "retrieved_timestamp": "1762652579.5587242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Nocturne", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3956502081144696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5703329773483826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45690625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36336436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/b5a8b278-69e9-41ba-89ee-8fd6b2d90a1c.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/b5a8b278-69e9-41ba-89ee-8fd6b2d90a1c.json new file mode 100644 index 000000000..8af1a665e --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/b5a8b278-69e9-41ba-89ee-8fd6b2d90a1c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Nocturne/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-Nocturne", + "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3957 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5703 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3634 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/a3ad7f0f-64bd-42a1-bc7d-d7d4cbbd80fd.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/a3ad7f0f-64bd-42a1-bc7d-d7d4cbbd80fd.json new file mode 100644 index 000000000..eb3e1d5ef --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/a3ad7f0f-64bd-42a1-bc7d-d7d4cbbd80fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-Orochi-v2-Experiment", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2842 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5323 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4574 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json deleted file mode 100644 index deb24f416..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/1762652579.5591779", - "retrieved_timestamp": "1762652579.559179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2842413684579139 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322525988273211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45737500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3423371010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json deleted file mode 100644 index 4e072570b..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/1762652579.559391", - "retrieved_timestamp": "1762652579.559392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4101628124487471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437817873983797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44379166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.339594414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/f07c3a4a-2a8e-45c4-a726-be95726df2db.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/f07c3a4a-2a8e-45c4-a726-be95726df2db.json new file mode 100644 index 000000000..368728f64 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/f07c3a4a-2a8e-45c4-a726-be95726df2db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-Orochi-v3-Experiment", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4102 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5438 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json deleted file mode 100644 index 481ca3d50..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/1762652579.559606", - "retrieved_timestamp": "1762652579.559606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4320702402957486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462502212045214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4449375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3519780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/f36d56b8-cd77-4d69-a51d-39025bcfcdfd.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/f36d56b8-cd77-4d69-a51d-39025bcfcdfd.json new file mode 100644 index 000000000..e36c363a4 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/f36d56b8-cd77-4d69-a51d-39025bcfcdfd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-Orochi-v4-Experiment", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5463 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/65acabdc-ea5f-426c-820b-2b79f2b20b44.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/65acabdc-ea5f-426c-820b-2b79f2b20b44.json new file mode 100644 index 000000000..443a4c901 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/65acabdc-ea5f-426c-820b-2b79f2b20b44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-Orochi", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4546 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json deleted file mode 100644 index 654ef6b33..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi/1762652579.558937", - "retrieved_timestamp": "1762652579.558938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620451513096362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.54977394640115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34466422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/96b00cfa-1383-4b36-a043-17eb39678ffc.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/96b00cfa-1383-4b36-a043-17eb39678ffc.json new file mode 100644 index 000000000..995274f5e --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/96b00cfa-1383-4b36-a043-17eb39678ffc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4866 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json deleted file mode 100644 index 14c64323c..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/1762652579.5600362", - "retrieved_timestamp": "1762652579.5600362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39090391272933595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48656395204478037 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3789583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31141954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/3b8a796e-6bde-4506-8335-bd3cc72482e1.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/3b8a796e-6bde-4506-8335-bd3cc72482e1.json new file mode 100644 index 000000000..037fde141 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/3b8a796e-6bde-4506-8335-bd3cc72482e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json deleted file mode 100644 index 657c890f0..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/1762652579.560246", - "retrieved_timestamp": "1762652579.560246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31239333856389934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5126398500939828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33136635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json deleted file mode 100644 index b6c3baf74..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1762652579.56046", - "retrieved_timestamp": "1762652579.560461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302218114602588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811798810475259 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31981382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/a93e99e2-ca13-4cdc-9904-7ae5cc82c623.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/a93e99e2-ca13-4cdc-9904-7ae5cc82c623.json new file mode 100644 index 000000000..acc3b57f9 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/a93e99e2-ca13-4cdc-9904-7ae5cc82c623.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3198 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/65d9e237-2757-459e-94e7-e382213e4eeb.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/65d9e237-2757-459e-94e7-e382213e4eeb.json new file mode 100644 index 000000000..bd2328696 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/65d9e237-2757-459e-94e7-e382213e4eeb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json deleted file mode 100644 index f67d2ecef..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/1762652579.560668", - "retrieved_timestamp": "1762652579.560668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42405151664250856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184748714407336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40019791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json deleted file mode 100644 index 09efa749d..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/1762652579.55982", - "retrieved_timestamp": "1762652579.5598211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44376033369238066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5604605871844869 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/c3f44524-4c75-4cd0-9f5d-79c8b08f6f77.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/c3f44524-4c75-4cd0-9f5d-79c8b08f6f77.json new file mode 100644 index 000000000..2aafdb196 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/c3f44524-4c75-4cd0-9f5d-79c8b08f6f77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mimicore-WhiteSnake", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4438 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5605 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json deleted file mode 100644 index a5634a043..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Unleashed-Twilight/1762652579.560919", - "retrieved_timestamp": "1762652579.56092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Unleashed-Twilight", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Unleashed-Twilight", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3505121965274361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5520627163174447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3677692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/2e7d3674-d0b0-4b87-8bd8-8202114b7665.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/2e7d3674-d0b0-4b87-8bd8-8202114b7665.json new file mode 100644 index 000000000..c22d0071e --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/2e7d3674-d0b0-4b87-8bd8-8202114b7665.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Unleashed-Twilight/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Unleashed-Twilight", + "id": "DoppelReflEx/MN-12B-Unleashed-Twilight", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5521 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4384 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/30d21295-beb1-4179-8c6f-7bac79b29474.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/30d21295-beb1-4179-8c6f-7bac79b29474.json new file mode 100644 index 000000000..4c6187607 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/30d21295-beb1-4179-8c6f-7bac79b29474.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-WolFrame/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-WolFrame", + "id": "DoppelReflEx/MN-12B-WolFrame", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json deleted file mode 100644 index b7aa76c5f..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-WolFrame/1762652579.5611808", - "retrieved_timestamp": "1762652579.561182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-WolFrame", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-WolFrame", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4397387819873491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511681287565329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33934507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e2fc95de-b9d9-4043-b55c-aa2819d4f52f.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e2fc95de-b9d9-4043-b55c-aa2819d4f52f.json new file mode 100644 index 000000000..3848f175e --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e2fc95de-b9d9-4043-b55c-aa2819d4f52f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniusLight-24B-test", + "id": "DoppelReflEx/MiniusLight-24B-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0394 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6334 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4093 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json deleted file mode 100644 index aef3f2cc6..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-test/1762652579.5616372", - "retrieved_timestamp": "1762652579.5616379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03936776641533354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6333927323374534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40925000000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5182014627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json deleted file mode 100644 index f24cd3c36..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1b-test/1762652579.561931", - "retrieved_timestamp": "1762652579.561932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-v1b-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-v1b-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37911408396388246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6617145681113757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2394259818731118 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364860372340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/7fbd7f97-baf9-4acd-ba0c-90ffbf0c47a5.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/7fbd7f97-baf9-4acd-ba0c-90ffbf0c47a5.json new file mode 100644 index 000000000..fa3042ed0 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/7fbd7f97-baf9-4acd-ba0c-90ffbf0c47a5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1b-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniusLight-24B-v1b-test", + "id": "DoppelReflEx/MiniusLight-24B-v1b-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2394 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5365 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json deleted file mode 100644 index 85c457a38..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1c-test/1762652579.562173", - "retrieved_timestamp": "1762652579.5621738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-v1c-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-v1c-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37858881102142317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6752681657268389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46341666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5487034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/336effcd-d8fc-4477-846f-70fc40bdc111.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/336effcd-d8fc-4477-846f-70fc40bdc111.json new file mode 100644 index 000000000..c424357f5 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/336effcd-d8fc-4477-846f-70fc40bdc111.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1c-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniusLight-24B-v1c-test", + "id": "DoppelReflEx/MiniusLight-24B-v1c-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6753 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2968 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4634 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5487 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/28f87820-d587-498e-b713-7c0af0cdc324.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/28f87820-d587-498e-b713-7c0af0cdc324.json new file mode 100644 index 000000000..f4fb232bb --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/28f87820-d587-498e-b713-7c0af0cdc324.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1d-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniusLight-24B-v1d-test", + "id": "DoppelReflEx/MiniusLight-24B-v1d-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6712 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5489 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json deleted file mode 100644 index d69442fd4..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1d-test/1762652579.5624058", - "retrieved_timestamp": "1762652579.5624058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-v1d-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-v1d-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40324339419407174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6712025325276962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46208333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5488696808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json deleted file mode 100644 index 4b264bfff..000000000 --- a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B/1762652579.561418", - "retrieved_timestamp": "1762652579.561419", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25766410900854175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6256461050033514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43191666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091422872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/f1b671ab-ebb3-43ec-86fa-832982d04cc1.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/f1b671ab-ebb3-43ec-86fa-832982d04cc1.json new file mode 100644 index 000000000..5cc2e4360 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/f1b671ab-ebb3-43ec-86fa-832982d04cc1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniusLight-24B", + "id": "DoppelReflEx/MiniusLight-24B", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2577 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5091 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/327cde83-d107-4455-bc03-7e03026c52e6.json b/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/327cde83-d107-4455-bc03-7e03026c52e6.json new file mode 100644 index 000000000..e2c3518ef --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/327cde83-d107-4455-bc03-7e03026c52e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Again-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Again-8B-Model_Stock", + "id": "DreadPoor/Again-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6724 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3987 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json b/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json deleted file mode 100644 index ce716a342..000000000 --- a/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Again-8B-Model_Stock/1762652579.562616", - "retrieved_timestamp": "1762652579.562617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Again-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Again-8B-Model_Stock", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6724213974476612 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309801059970912 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39867708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json b/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json deleted file mode 100644 index 4af0d7b61..000000000 --- a/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Alita99-8B-LINEAR/1762652579.562879", - "retrieved_timestamp": "1762652579.56288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Alita99-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Alita99-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7190077882241341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441767095577089 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42664583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38090093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/7497b8fb-9a7d-46dc-868e-1a2bbcdc7860.json b/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/7497b8fb-9a7d-46dc-868e-1a2bbcdc7860.json new file mode 100644 index 000000000..de70affaf --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/7497b8fb-9a7d-46dc-868e-1a2bbcdc7860.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Alita99-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alita99-8B-LINEAR", + "id": "DreadPoor/Alita99-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.719 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5442 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json b/data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json deleted file mode 100644 index f266ccda0..000000000 --- a/data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_AnotherTest/1762652579.563089", - "retrieved_timestamp": "1762652579.563089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/AnotherTest", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/AnotherTest", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47006387496287627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46834113564549334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2874833776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/AnotherTest/92c8afbe-7735-40c8-af0e-29da687c2070.json b/data/hfopenllm_v2/DreadPoor/AnotherTest/92c8afbe-7735-40c8-af0e-29da687c2070.json new file mode 100644 index 000000000..c4e25da29 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/AnotherTest/92c8afbe-7735-40c8-af0e-29da687c2070.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_AnotherTest/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AnotherTest", + "id": "DreadPoor/AnotherTest", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4683 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4213 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2875 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json b/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json deleted file mode 100644 index 7b265a912..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire-8B-model_stock/1762652579.5633001", - "retrieved_timestamp": "1762652579.563301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire-8B-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7140620221013578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278251846388996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42124999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37632978723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/bca052ac-6556-49d8-94e3-f4bda560a5d3.json b/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/bca052ac-6556-49d8-94e3-f4bda560a5d3.json new file mode 100644 index 000000000..6e0e74802 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/bca052ac-6556-49d8-94e3-f4bda560a5d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire-8B-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire-8B-model_stock", + "id": "DreadPoor/Aspire-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7141 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1495 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/5f74fe6e-8575-4cea-959b-e6ba03c7e273.json b/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/5f74fe6e-8575-4cea-959b-e6ba03c7e273.json new file mode 100644 index 000000000..91197dc03 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/5f74fe6e-8575-4cea-959b-e6ba03c7e273.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_1.3-8B_model-stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_1.3-8B_model-stock", + "id": "DreadPoor/Aspire_1.3-8B_model-stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1692 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json b/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json deleted file mode 100644 index 7ddb36122..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_1.3-8B_model-stock/1762652579.563606", - "retrieved_timestamp": "1762652579.563607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_1.3-8B_model-stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_1.3-8B_model-stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7061685217445268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301644606574212 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json deleted file mode 100644 index e24604f18..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2-8B-Model_Stock/1762652579.56384", - "retrieved_timestamp": "1762652579.563841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7371430027881576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329650089428358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38937499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/b0f696f5-ed70-4293-999d-a9121192c137.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/b0f696f5-ed70-4293-999d-a9121192c137.json new file mode 100644 index 000000000..206f0a344 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/b0f696f5-ed70-4293-999d-a9121192c137.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V2-8B-Model_Stock", + "id": "DreadPoor/Aspire_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3894 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/18751a6f-062c-4915-bbe0-ae222cf9ae0b.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/18751a6f-062c-4915-bbe0-ae222cf9ae0b.json new file mode 100644 index 000000000..2e1a6a761 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/18751a6f-062c-4915-bbe0-ae222cf9ae0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2.1-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V2.1-8B-Model_Stock", + "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7238 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5236 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1767 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3801 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json deleted file mode 100644 index e64712219..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2.1-8B-Model_Stock/1762652579.564126", - "retrieved_timestamp": "1762652579.564127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2.1-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7237540836092679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5236395810818485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/398ebe04-638f-4a11-b99d-6778ff3ff97b.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/398ebe04-638f-4a11-b99d-6778ff3ff97b.json new file mode 100644 index 000000000..98689999d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/398ebe04-638f-4a11-b99d-6778ff3ff97b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V2_ALT-8B-Model_Stock", + "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json deleted file mode 100644 index 05a478445..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/1762652579.5643399", - "retrieved_timestamp": "1762652579.564341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7381170848903134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265819478728287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39749999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726728723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json deleted file mode 100644 index 8f5a49119..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/1762652579.564561", - "retrieved_timestamp": "1762652579.5645618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7381170848903134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265819478728287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39749999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726728723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/b4f197f2-3456-4221-b222-10dfbbb50f56.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/b4f197f2-3456-4221-b222-10dfbbb50f56.json new file mode 100644 index 000000000..2a86794fd --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/b4f197f2-3456-4221-b222-10dfbbb50f56.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V2_ALT_ROW-8B-Model_Stock", + "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/0a2fa86a-f9b3-4a49-b215-4cd3ee9b4c22.json b/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/0a2fa86a-f9b3-4a49-b215-4cd3ee9b4c22.json new file mode 100644 index 000000000..f208a5c89 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/0a2fa86a-f9b3-4a49-b215-4cd3ee9b4c22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V3-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V3-8B-Model_Stock", + "id": "DreadPoor/Aspire_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5268 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3642 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json b/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json deleted file mode 100644 index 28fe3faee..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V3-8B-Model_Stock/1762652579.5648441", - "retrieved_timestamp": "1762652579.564845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V3-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118795905973927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5267958758971987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40149999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36419547872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/1561ec50-1cb9-47ce-9db1-09efe9c3fc61.json b/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/1561ec50-1cb9-47ce-9db1-09efe9c3fc61.json new file mode 100644 index 000000000..b90acd330 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/1561ec50-1cb9-47ce-9db1-09efe9c3fc61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V4-8B-Model_Stock", + "id": "DreadPoor/Aspire_V4-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json b/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json deleted file mode 100644 index 1b5dbd0b5..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4-8B-Model_Stock/1762652579.565063", - "retrieved_timestamp": "1762652579.565064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V4-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V4-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.769416259967996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5314037161536506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3867395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/496525ff-394a-4b7b-9d93-f5b38d2a1ee3.json b/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/496525ff-394a-4b7b-9d93-f5b38d2a1ee3.json new file mode 100644 index 000000000..d418f3c08 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/496525ff-394a-4b7b-9d93-f5b38d2a1ee3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aspire_V4_ALT-8B-Model_Stock", + "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5268 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json b/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json deleted file mode 100644 index e7849cb6a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/1762652579.565274", - "retrieved_timestamp": "1762652579.565275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7365933500888753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5268232518944024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681848404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/37071760-d24c-43cc-9965-d8c7873c0ee8.json b/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/37071760-d24c-43cc-9965-d8c7873c0ee8.json new file mode 100644 index 000000000..77c551bb3 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/37071760-d24c-43cc-9965-d8c7873c0ee8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Asymmetric_Linearity-8B-Model_Stock", + "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json b/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json deleted file mode 100644 index 1bf5416e0..000000000 --- a/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/1762652579.5654871", - "retrieved_timestamp": "1762652579.565488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7174341857382855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.546535755155883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41994791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/91a71a49-5dd4-43b1-9e1c-fd9492236712.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/91a71a49-5dd4-43b1-9e1c-fd9492236712.json new file mode 100644 index 000000000..ea3d38b72 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/91a71a49-5dd4-43b1-9e1c-fd9492236712.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aurora_faustus-8B-LINEAR", + "id": "DreadPoor/Aurora_faustus-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json deleted file mode 100644 index c707a7d77..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LINEAR/1762652579.565701", - "retrieved_timestamp": "1762652579.565702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aurora_faustus-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aurora_faustus-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7281003293483512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5515538279425277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json deleted file mode 100644 index c0b240439..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED/1762652579.565921", - "retrieved_timestamp": "1762652579.565921", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aurora_faustus-8B-LORABLATED", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aurora_faustus-8B-LORABLATED", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527050448365891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539159616655651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42385416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/d1d48abb-6dcf-4905-958f-c3a3e75feac6.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/d1d48abb-6dcf-4905-958f-c3a3e75feac6.json new file mode 100644 index 000000000..1abc59610 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/d1d48abb-6dcf-4905-958f-c3a3e75feac6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aurora_faustus-8B-LORABLATED", + "id": "DreadPoor/Aurora_faustus-8B-LORABLATED", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json deleted file mode 100644 index 0a6157e91..000000000 --- a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/1762652579.566129", - "retrieved_timestamp": "1762652579.56613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387670721191214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36943151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/68282f29-f56f-420b-bd1e-9cc54783c1a5.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/68282f29-f56f-420b-bd1e-9cc54783c1a5.json new file mode 100644 index 000000000..12017ff39 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/68282f29-f56f-420b-bd1e-9cc54783c1a5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aurora_faustus-8B-LORABLATED_ALT", + "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3694 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json b/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json deleted file mode 100644 index b9656bb5a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Autumn_Dawn-8B-LINEAR/1762652579.566346", - "retrieved_timestamp": "1762652579.5663471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Autumn_Dawn-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Autumn_Dawn-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7292993701157373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459436958014627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39677526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/cd1c84dc-6c6e-4789-add7-0e3ca783b0ea.json b/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/cd1c84dc-6c6e-4789-add7-0e3ca783b0ea.json new file mode 100644 index 000000000..ab92e181e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/cd1c84dc-6c6e-4789-add7-0e3ca783b0ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Autumn_Dawn-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Autumn_Dawn-8B-LINEAR", + "id": "DreadPoor/Autumn_Dawn-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7293 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5459 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/22a9d3b8-ac45-4433-8926-5d28681af922.json b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/22a9d3b8-ac45-4433-8926-5d28681af922.json new file mode 100644 index 000000000..52019043a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/22a9d3b8-ac45-4433-8926-5d28681af922.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BaeZel-8B-LINEAR", + "id": "DreadPoor/BaeZel-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5464 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4227 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json deleted file mode 100644 index abfa99fc3..000000000 --- a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-LINEAR/1762652579.56655", - "retrieved_timestamp": "1762652579.566551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/BaeZel-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5463800554321383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3861369680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json deleted file mode 100644 index e09c0c427..000000000 --- a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-Model_Stock/1762652579.566763", - "retrieved_timestamp": "1762652579.566764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/BaeZel-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7713145564878965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407680550216925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41991666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38804853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/57c4b9eb-dffd-4623-a2d5-b2374d3c9109.json b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/57c4b9eb-dffd-4623-a2d5-b2374d3c9109.json new file mode 100644 index 000000000..0d62d9248 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/57c4b9eb-dffd-4623-a2d5-b2374d3c9109.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BaeZel-8B-Model_Stock", + "id": "DreadPoor/BaeZel-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7713 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/24adbd8c-df3a-4b58-94e6-61a3dfa6828e.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/24adbd8c-df3a-4b58-94e6-61a3dfa6828e.json new file mode 100644 index 000000000..92150ff2e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/24adbd8c-df3a-4b58-94e6-61a3dfa6828e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BaeZel_V2-8B-Model_Stock", + "id": "DreadPoor/BaeZel_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7677 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json deleted file mode 100644 index 6cf48369d..000000000 --- a/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2-8B-Model_Stock/1762652579.566977", - "retrieved_timestamp": "1762652579.566978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/BaeZel_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel_V2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676675665013276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373871612758611 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json deleted file mode 100644 index 8e4c6bd7e..000000000 --- a/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/1762652579.567195", - "retrieved_timestamp": "1762652579.567196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676675665013276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373871612758611 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/6ed62f64-c2be-4bca-b17d-bd0184a3d498.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/6ed62f64-c2be-4bca-b17d-bd0184a3d498.json new file mode 100644 index 000000000..4fbb404d5 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/6ed62f64-c2be-4bca-b17d-bd0184a3d498.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BaeZel_V2_ALT-8B-Model_Stock", + "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7677 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json deleted file mode 100644 index b189f9081..000000000 --- a/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V3-8B-Model_Stock/1762652579.5674188", - "retrieved_timestamp": "1762652579.56742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/BaeZel_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel_V3-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7831797408653485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539231076759135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41743749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/db9e4d03-03a8-4a10-8739-16bbcfbb06d4.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/db9e4d03-03a8-4a10-8739-16bbcfbb06d4.json new file mode 100644 index 000000000..6a2b2bd8f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/db9e4d03-03a8-4a10-8739-16bbcfbb06d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V3-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BaeZel_V3-8B-Model_Stock", + "id": "DreadPoor/BaeZel_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7832 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1896 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3888 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json b/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json deleted file mode 100644 index f5471ea1c..000000000 --- a/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Blunt_Edge-8B-SLERP/1762652579.567633", - "retrieved_timestamp": "1762652579.5676339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Blunt_Edge-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Blunt_Edge-8B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7496575752337131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389470863694941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/7b0fc4fe-51c8-4f01-b07b-5bca05b40859.json b/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/7b0fc4fe-51c8-4f01-b07b-5bca05b40859.json new file mode 100644 index 000000000..c3a3e4356 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/7b0fc4fe-51c8-4f01-b07b-5bca05b40859.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Blunt_Edge-8B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Blunt_Edge-8B-SLERP", + "id": "DreadPoor/Blunt_Edge-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5389 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json b/data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json deleted file mode 100644 index f131cd31b..000000000 --- a/data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_BulkUp/1762652579.567868", - "retrieved_timestamp": "1762652579.567869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/BulkUp", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BulkUp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.177804891022487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28698602947692575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/BulkUp/6f286418-d8e3-4c11-8941-cfe5a18b1037.json b/data/hfopenllm_v2/DreadPoor/BulkUp/6f286418-d8e3-4c11-8941-cfe5a18b1037.json new file mode 100644 index 000000000..dfd176ab1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BulkUp/6f286418-d8e3-4c11-8941-cfe5a18b1037.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_BulkUp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BulkUp", + "id": "DreadPoor/BulkUp", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json b/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json deleted file mode 100644 index 62b98656a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Cadence-8B-LINEAR/1762652579.568077", - "retrieved_timestamp": "1762652579.568078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Cadence-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Cadence-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7682172192006099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433358555450108 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/b0a83b1f-3af2-45e8-9d88-d7302a529112.json b/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/b0a83b1f-3af2-45e8-9d88-d7302a529112.json new file mode 100644 index 000000000..6b7ee9836 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/b0a83b1f-3af2-45e8-9d88-d7302a529112.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Cadence-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cadence-8B-LINEAR", + "id": "DreadPoor/Cadence-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7682 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5433 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1677 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3803 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/0462fce1-51b4-48d8-8278-a90048ffd637.json b/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/0462fce1-51b4-48d8-8278-a90048ffd637.json new file mode 100644 index 000000000..2d3dba8f9 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/0462fce1-51b4-48d8-8278-a90048ffd637.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Caelid-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Caelid-8B-Model_Stock", + "id": "DreadPoor/Caelid-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1511 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json b/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json deleted file mode 100644 index 5e33d053c..000000000 --- a/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Caelid-8B-Model_Stock/1762652579.5682912", - "retrieved_timestamp": "1762652579.5682921", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Caelid-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Caelid-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7247281657114235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459605196913864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816489361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json b/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json deleted file mode 100644 index 1b3ce8a6f..000000000 --- a/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Casuar-9B-Model_Stock/1762652579.5685189", - "retrieved_timestamp": "1762652579.5685189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Casuar-9B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Casuar-9B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7764852812759035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106681877306871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41654166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156416223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/e02f597c-c368-4223-ac90-c99d82c90634.json b/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/e02f597c-c368-4223-ac90-c99d82c90634.json new file mode 100644 index 000000000..c97dcca00 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/e02f597c-c368-4223-ac90-c99d82c90634.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Casuar-9B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Casuar-9B-Model_Stock", + "id": "DreadPoor/Casuar-9B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4156 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/32e63ffc-c64e-4562-ba99-14873f5bac2e.json b/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/32e63ffc-c64e-4562-ba99-14873f5bac2e.json new file mode 100644 index 000000000..3cd2321bc --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/32e63ffc-c64e-4562-ba99-14873f5bac2e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Condensed_Milk-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Condensed_Milk-8B-Model_Stock", + "id": "DreadPoor/Condensed_Milk-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1745 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json b/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json deleted file mode 100644 index 64ffeb572..000000000 --- a/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Condensed_Milk-8B-Model_Stock/1762652579.568758", - "retrieved_timestamp": "1762652579.568759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Condensed_Milk-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Condensed_Milk-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7536292592543341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5434864122121906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41601041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38763297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/6af4faad-05c2-488b-9685-e11ae4e1cbf0.json b/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/6af4faad-05c2-488b-9685-e11ae4e1cbf0.json new file mode 100644 index 000000000..c49036cd8 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/6af4faad-05c2-488b-9685-e11ae4e1cbf0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_CoolerCoder-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CoolerCoder-8B-LINEAR", + "id": "DreadPoor/CoolerCoder-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4519 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4762 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json b/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json deleted file mode 100644 index f278993a8..000000000 --- a/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_CoolerCoder-8B-LINEAR/1762652579.568993", - "retrieved_timestamp": "1762652579.568993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/CoolerCoder-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/CoolerCoder-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4519286603988528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4761504835496542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31590757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/8aa7701b-7019-44a0-851f-cfc9108fdfbd.json b/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/8aa7701b-7019-44a0-851f-cfc9108fdfbd.json new file mode 100644 index 000000000..7160aa1de --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/8aa7701b-7019-44a0-851f-cfc9108fdfbd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Damasteel-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Damasteel-8B-LINEAR", + "id": "DreadPoor/Damasteel-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7384 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json b/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json deleted file mode 100644 index beeb6ee56..000000000 --- a/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Damasteel-8B-LINEAR/1762652579.569221", - "retrieved_timestamp": "1762652579.569222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Damasteel-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Damasteel-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7384417789243651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5388142176959776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42124999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json b/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json deleted file mode 100644 index 247974d43..000000000 --- a/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Dearly_Beloved-8B-TIES/1762652579.569437", - "retrieved_timestamp": "1762652579.569438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Dearly_Beloved-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Dearly_Beloved-8B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8266687943545348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4049833102731906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2826628989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/a2f95fad-5ab5-47d0-b9aa-33358c673caf.json b/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/a2f95fad-5ab5-47d0-b9aa-33358c673caf.json new file mode 100644 index 000000000..a39f8ede9 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/a2f95fad-5ab5-47d0-b9aa-33358c673caf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Dearly_Beloved-8B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dearly_Beloved-8B-TIES", + "id": "DreadPoor/Dearly_Beloved-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8267 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json b/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json deleted file mode 100644 index 1731b863f..000000000 --- a/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Decayed-8B-LINEAR/1762652579.569654", - "retrieved_timestamp": "1762652579.569655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Decayed-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Decayed-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417014088773181 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37632978723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/aef73a77-9df7-4d4f-89ef-50905d326198.json b/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/aef73a77-9df7-4d4f-89ef-50905d326198.json new file mode 100644 index 000000000..28e908428 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/aef73a77-9df7-4d4f-89ef-50905d326198.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Decayed-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Decayed-8B-LINEAR", + "id": "DreadPoor/Decayed-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5417 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1715 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json b/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json deleted file mode 100644 index 034625812..000000000 --- a/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative-8B-Model_Stock/1762652579.569859", - "retrieved_timestamp": "1762652579.56986", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Derivative-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7667433520835827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395493987763994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42004166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3810671542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/e9ffdfb6-6f91-4bac-89d2-40b1eb43f3ee.json b/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/e9ffdfb6-6f91-4bac-89d2-40b1eb43f3ee.json new file mode 100644 index 000000000..5b83a9506 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/e9ffdfb6-6f91-4bac-89d2-40b1eb43f3ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Derivative-8B-Model_Stock", + "id": "DreadPoor/Derivative-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.179 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3811 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json b/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json deleted file mode 100644 index 8d61d283a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2-8B-Model_Stock/1762652579.5701172", - "retrieved_timestamp": "1762652579.570118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Derivative_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative_V2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7536791269387447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392643954415269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38563829787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/8ff39438-907c-465f-ac7a-5a25cfd8d824.json b/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/8ff39438-907c-465f-ac7a-5a25cfd8d824.json new file mode 100644 index 000000000..332287ece --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/8ff39438-907c-465f-ac7a-5a25cfd8d824.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Derivative_V2-8B-Model_Stock", + "id": "DreadPoor/Derivative_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7537 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3856 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/83d831c5-a74f-4699-9961-664a7a51b7b8.json b/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/83d831c5-a74f-4699-9961-664a7a51b7b8.json new file mode 100644 index 000000000..2b2d015d1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/83d831c5-a74f-4699-9961-664a7a51b7b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Derivative_V2_ALT-8B-Model_Stock", + "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.772 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5365 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1881 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json b/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json deleted file mode 100644 index d5a69254f..000000000 --- a/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/1762652579.570343", - "retrieved_timestamp": "1762652579.570344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7719639445560003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5365351570462934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json b/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json deleted file mode 100644 index 6dc9e976c..000000000 --- a/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V3-8B-Model_Stock/1762652579.570688", - "retrieved_timestamp": "1762652579.570689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Derivative_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative_V3-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6963767248677952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524319745545524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35023271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/83fb88ec-f640-4c1e-b71c-53a123fc4c2e.json b/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/83fb88ec-f640-4c1e-b71c-53a123fc4c2e.json new file mode 100644 index 000000000..cfc2e036f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/83fb88ec-f640-4c1e-b71c-53a123fc4c2e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V3-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Derivative_V3-8B-Model_Stock", + "id": "DreadPoor/Derivative_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/3811cc34-45cb-4932-b862-39bf042331e0.json b/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/3811cc34-45cb-4932-b862-39bf042331e0.json new file mode 100644 index 000000000..cf8031e7d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/3811cc34-45cb-4932-b862-39bf042331e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Elusive_Dragon_Heart-8B-LINEAR", + "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7131 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5456 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json b/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json deleted file mode 100644 index 1322cee85..000000000 --- a/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/1762652579.570945", - "retrieved_timestamp": "1762652579.570946", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131378076836128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456414280881592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3813996010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/5b2a16a1-7a2a-40b7-add6-b99378b6af00.json b/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/5b2a16a1-7a2a-40b7-add6-b99378b6af00.json new file mode 100644 index 000000000..34eb53856 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/5b2a16a1-7a2a-40b7-add6-b99378b6af00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Emu_Eggs-9B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Emu_Eggs-9B-Model_Stock", + "id": "DreadPoor/Emu_Eggs-9B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7607 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4227 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json b/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json deleted file mode 100644 index 73eb124c8..000000000 --- a/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Emu_Eggs-9B-Model_Stock/1762652579.571181", - "retrieved_timestamp": "1762652579.571182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Emu_Eggs-9B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Emu_Eggs-9B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606982805622415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6051657213517168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/1dc2a5bb-40b6-401e-8f1c-6110cb4c0f0d.json b/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/1dc2a5bb-40b6-401e-8f1c-6110cb4c0f0d.json new file mode 100644 index 000000000..be6c2967a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/1dc2a5bb-40b6-401e-8f1c-6110cb4c0f0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Eunoia_Vespera-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eunoia_Vespera-8B-LINEAR", + "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3839 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json b/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json deleted file mode 100644 index 86497226b..000000000 --- a/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Eunoia_Vespera-8B-LINEAR/1762652579.571407", - "retrieved_timestamp": "1762652579.571407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Eunoia_Vespera-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7235291249440374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399310621081937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38389295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json b/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json deleted file mode 100644 index c7edd31e2..000000000 --- a/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Fu_sion_HA-8B-SLERP/1762652579.57162", - "retrieved_timestamp": "1762652579.5716212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Fu_sion_HA-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Fu_sion_HA-8B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7609232392274721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5372804197028272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17522658610271905 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41601041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/742e0a1c-7496-4076-bdbf-ada0a8e528c2.json b/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/742e0a1c-7496-4076-bdbf-ada0a8e528c2.json new file mode 100644 index 000000000..cc65f1f79 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/742e0a1c-7496-4076-bdbf-ada0a8e528c2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Fu_sion_HA-8B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fu_sion_HA-8B-SLERP", + "id": "DreadPoor/Fu_sion_HA-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7609 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5373 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1752 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json b/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json deleted file mode 100644 index 3ae33671b..000000000 --- a/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_HOT_STINKING_GARBAGE/1762652579.571834", - "retrieved_timestamp": "1762652579.5718348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/HOT_STINKING_GARBAGE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/HOT_STINKING_GARBAGE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5754265349273262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4884000866161456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30169547872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/f0664035-3256-444c-b848-ef603e0d46b5.json b/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/f0664035-3256-444c-b848-ef603e0d46b5.json new file mode 100644 index 000000000..0bd391e53 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/f0664035-3256-444c-b848-ef603e0d46b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_HOT_STINKING_GARBAGE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HOT_STINKING_GARBAGE", + "id": "DreadPoor/HOT_STINKING_GARBAGE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4884 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json b/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json deleted file mode 100644 index e2b3ea364..000000000 --- a/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_H_the_eighth-8B-LINEAR/1762652579.572039", - "retrieved_timestamp": "1762652579.5720398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/H_the_eighth-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/H_the_eighth-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7469347996648892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383752114303682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3823969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/9159aaa6-8663-491f-901a-74da4c343d20.json b/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/9159aaa6-8663-491f-901a-74da4c343d20.json new file mode 100644 index 000000000..506db7e4d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/9159aaa6-8663-491f-901a-74da4c343d20.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_H_the_eighth-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "H_the_eighth-8B-LINEAR", + "id": "DreadPoor/H_the_eighth-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3824 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json b/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json deleted file mode 100644 index 17432e904..000000000 --- a/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Happy_New_Year-8B-Model_Stock/1762652579.572258", - "retrieved_timestamp": "1762652579.5722592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Happy_New_Year-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Happy_New_Year-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7615726272955757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367913866457493 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878823138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/5179b145-9fdb-4ab5-8cca-87966ecf6519.json b/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/5179b145-9fdb-4ab5-8cca-87966ecf6519.json new file mode 100644 index 000000000..c37605ceb --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/5179b145-9fdb-4ab5-8cca-87966ecf6519.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Happy_New_Year-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Happy_New_Year-8B-Model_Stock", + "id": "DreadPoor/Happy_New_Year-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5368 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json deleted file mode 100644 index 3972d0ad2..000000000 --- a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-8B-Model_Stock/1762652579.572714", - "retrieved_timestamp": "1762652579.5727181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Heart_Stolen-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Heart_Stolen-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7244533393617822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395443745186658 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41622916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37940492021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/da872193-1d25-4e8e-bc22-9138a9d121ba.json b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/da872193-1d25-4e8e-bc22-9138a9d121ba.json new file mode 100644 index 000000000..5afe7e574 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/da872193-1d25-4e8e-bc22-9138a9d121ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Heart_Stolen-8B-Model_Stock", + "id": "DreadPoor/Heart_Stolen-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1722 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json deleted file mode 100644 index 9906942f3..000000000 --- a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/1762652579.573096", - "retrieved_timestamp": "1762652579.573097", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183584001560305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526338467747489 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40549999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37724401595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/967fdd26-1f8a-40d6-8f7d-ca731c7ef2e3.json b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/967fdd26-1f8a-40d6-8f7d-ca731c7ef2e3.json new file mode 100644 index 000000000..1724bbb8b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/967fdd26-1f8a-40d6-8f7d-ca731c7ef2e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Heart_Stolen-ALT-8B-Model_Stock", + "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7184 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4055 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3772 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json b/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json deleted file mode 100644 index e85dacdd1..000000000 --- a/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Here_We_Go_Again-8B-SLERP/1762652579.573366", - "retrieved_timestamp": "1762652579.573367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Here_We_Go_Again-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Here_We_Go_Again-8B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7442120240960651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460182474181831 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/dd615b4c-189e-4361-bcf4-879fd59b28a2.json b/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/dd615b4c-189e-4361-bcf4-879fd59b28a2.json new file mode 100644 index 000000000..040701af7 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/dd615b4c-189e-4361-bcf4-879fd59b28a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Here_We_Go_Again-8B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Here_We_Go_Again-8B-SLERP", + "id": "DreadPoor/Here_We_Go_Again-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7442 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/0aeee3e8-00ce-4f95-bbd9-307d93a194a4.json b/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/0aeee3e8-00ce-4f95-bbd9-307d93a194a4.json new file mode 100644 index 000000000..6da88e2f6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/0aeee3e8-00ce-4f95-bbd9-307d93a194a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Howdy-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Howdy-8B-LINEAR", + "id": "DreadPoor/Howdy-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4121 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json b/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json deleted file mode 100644 index 6aca3f6d2..000000000 --- a/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Howdy-8B-LINEAR/1762652579.573699", - "retrieved_timestamp": "1762652579.5737002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Howdy-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Howdy-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383981582614435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json b/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json deleted file mode 100644 index 5882cb191..000000000 --- a/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Incidental-8B-Model_Stock/1762652579.573979", - "retrieved_timestamp": "1762652579.5739799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Incidental-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Incidental-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748183708116686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452070612873019 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/8c583b51-4349-48af-98d9-8eaaf43d60b6.json b/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/8c583b51-4349-48af-98d9-8eaaf43d60b6.json new file mode 100644 index 000000000..f986ac603 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/8c583b51-4349-48af-98d9-8eaaf43d60b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Incidental-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Incidental-8B-Model_Stock", + "id": "DreadPoor/Incidental-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/34aab556-5e97-4ea2-9ada-d17dc3624be2.json b/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/34aab556-5e97-4ea2-9ada-d17dc3624be2.json new file mode 100644 index 000000000..b23dc6544 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/34aab556-5e97-4ea2-9ada-d17dc3624be2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Irina-8B-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Irina-8B-model_stock", + "id": "DreadPoor/Irina-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5237 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json b/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json deleted file mode 100644 index 74db9f0a8..000000000 --- a/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Irina-8B-model_stock/1762652579.574285", - "retrieved_timestamp": "1762652579.574286", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Irina-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Irina-8B-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6799403360860294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5236638956084764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40029166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json b/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json deleted file mode 100644 index 328193130..000000000 --- a/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Kindling-8B-Model_Stock/1762652579.57468", - "retrieved_timestamp": "1762652579.574682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Kindling-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Kindling-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7308231049171753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5492054832931256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17522658610271905 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/fbd9d5e3-15f7-45ce-92fb-368b3bfcc526.json b/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/fbd9d5e3-15f7-45ce-92fb-368b3bfcc526.json new file mode 100644 index 000000000..26f843f70 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/fbd9d5e3-15f7-45ce-92fb-368b3bfcc526.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Kindling-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kindling-8B-Model_Stock", + "id": "DreadPoor/Kindling-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7308 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1752 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json b/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json deleted file mode 100644 index 06826813d..000000000 --- a/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_L3.1-BaeZel-8B-Della/1762652579.575009", - "retrieved_timestamp": "1762652579.57501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/L3.1-BaeZel-8B-Della", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/L3.1-BaeZel-8B-Della", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5180243974875552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5448449542185521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4199791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/b177e329-ce6b-4bc6-aeac-1c01306e6b1f.json b/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/b177e329-ce6b-4bc6-aeac-1c01306e6b1f.json new file mode 100644 index 000000000..5d86d24a4 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/b177e329-ce6b-4bc6-aeac-1c01306e6b1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_L3.1-BaeZel-8B-Della/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-BaeZel-8B-Della", + "id": "DreadPoor/L3.1-BaeZel-8B-Della", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5448 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1745 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/7f371c11-e8f0-4233-b359-aac39c0a1110.json b/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/7f371c11-e8f0-4233-b359-aac39c0a1110.json new file mode 100644 index 000000000..aec03b176 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/7f371c11-e8f0-4233-b359-aac39c0a1110.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Laughing_Stock-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Laughing_Stock-8B-Model_Stock", + "id": "DreadPoor/Laughing_Stock-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.719 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json b/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json deleted file mode 100644 index 2173ca660..000000000 --- a/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Laughing_Stock-8B-Model_Stock/1762652579.5752351", - "retrieved_timestamp": "1762652579.575236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Laughing_Stock-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Laughing_Stock-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7189579205397235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5449429262155 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json b/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json deleted file mode 100644 index c31aef3a9..000000000 --- a/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Lava_Lamp-8B-SLERP/1762652579.575455", - "retrieved_timestamp": "1762652579.575455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Lava_Lamp-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Lava_Lamp-8B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7381170848903134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367586873360172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/9f758d4e-d121-4688-8ece-8dc67a499811.json b/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/9f758d4e-d121-4688-8ece-8dc67a499811.json new file mode 100644 index 000000000..e0d95bfba --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/9f758d4e-d121-4688-8ece-8dc67a499811.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Lava_Lamp-8B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lava_Lamp-8B-SLERP", + "id": "DreadPoor/Lava_Lamp-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5368 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1737 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/903b8c71-d54d-4ce4-9845-71eb8ca8733a.json b/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/903b8c71-d54d-4ce4-9845-71eb8ca8733a.json new file mode 100644 index 000000000..bd0f0baf1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/903b8c71-d54d-4ce4-9845-71eb8ca8733a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_LemonP-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LemonP-8B-Model_Stock", + "id": "DreadPoor/LemonP-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5439 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1767 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json b/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json deleted file mode 100644 index c959f7964..000000000 --- a/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_LemonP-8B-Model_Stock/1762652579.575685", - "retrieved_timestamp": "1762652579.575686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/LemonP-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/LemonP-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5439348074265458 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40043218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/9bdc17bf-7b81-49c8-81f5-c6dfa31b449b.json b/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/9bdc17bf-7b81-49c8-81f5-c6dfa31b449b.json new file mode 100644 index 000000000..d1ecbf772 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/9bdc17bf-7b81-49c8-81f5-c6dfa31b449b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lydia_of_Whiterun-8B-LINEAR", + "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7603 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.538 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1767 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3801 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json b/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json deleted file mode 100644 index 5813f9c5d..000000000 --- a/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/1762652579.575901", - "retrieved_timestamp": "1762652579.575901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.760323718843779 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379527944750039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/28109e00-87c1-4809-a4fc-dddebba52621.json b/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/28109e00-87c1-4809-a4fc-dddebba52621.json new file mode 100644 index 000000000..81c216cdf --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/28109e00-87c1-4809-a4fc-dddebba52621.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Matryoshka-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Matryoshka-8B-LINEAR", + "id": "DreadPoor/Matryoshka-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5444 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1752 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4252 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json b/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json deleted file mode 100644 index 6ee95c40a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Matryoshka-8B-LINEAR/1762652579.576119", - "retrieved_timestamp": "1762652579.5761201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Matryoshka-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Matryoshka-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7262519005128614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444280006376178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17522658610271905 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42524999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865525265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/6a21381b-426d-4a5d-ad6d-2aeb57ed14c5.json b/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/6a21381b-426d-4a5d-ad6d-2aeb57ed14c5.json new file mode 100644 index 000000000..84fcc3ee6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/6a21381b-426d-4a5d-ad6d-2aeb57ed14c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mercury_In_Retrograde-8b-Model-Stock", + "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7296 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json b/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json deleted file mode 100644 index 9bfb792d3..000000000 --- a/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/1762652579.576331", - "retrieved_timestamp": "1762652579.576332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7296240641497892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390507664719518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/03a8091c-473e-4fbe-af70-35f791a23a0f.json b/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/03a8091c-473e-4fbe-af70-35f791a23a0f.json new file mode 100644 index 000000000..8bace15e3 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/03a8091c-473e-4fbe-af70-35f791a23a0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minthy-8B-Model_Stock", + "id": "DreadPoor/Minthy-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5353 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json b/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json deleted file mode 100644 index c8e6800f0..000000000 --- a/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy-8B-Model_Stock/1762652579.5765939", - "retrieved_timestamp": "1762652579.5765948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Minthy-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minthy-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.765769269981427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5352951319641014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40940624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json b/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json deleted file mode 100644 index 1f3b348da..000000000 --- a/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_ALT-8B-Model_Stock/1762652579.57681", - "retrieved_timestamp": "1762652579.576811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Minthy_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minthy_ALT-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6991992358054406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374800202589046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673537234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/ed75e9ed-841b-4783-a201-bc72651afd0a.json b/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/ed75e9ed-841b-4783-a201-bc72651afd0a.json new file mode 100644 index 000000000..bb6a90a55 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/ed75e9ed-841b-4783-a201-bc72651afd0a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_ALT-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minthy_ALT-8B-Model_Stock", + "id": "DreadPoor/Minthy_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6992 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5375 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/38cd418c-9770-49d2-8b30-ac47e445cee3.json b/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/38cd418c-9770-49d2-8b30-ac47e445cee3.json new file mode 100644 index 000000000..f602119ec --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/38cd418c-9770-49d2-8b30-ac47e445cee3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_V2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minthy_V2-8B-Model_Stock", + "id": "DreadPoor/Minthy_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7126 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json b/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json deleted file mode 100644 index 270ea9a78..000000000 --- a/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_V2-8B-Model_Stock/1762652579.5770218", - "retrieved_timestamp": "1762652579.577023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Minthy_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minthy_V2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7125881549843305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491095928821667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json b/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json deleted file mode 100644 index d012c6bfc..000000000 --- a/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minus_Penus-8B-Model_Stock/1762652579.577236", - "retrieved_timestamp": "1762652579.577237", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Minus_Penus-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minus_Penus-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7311477989512272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343781571200968 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40190624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751662234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/d49b6a48-ae81-467d-87c5-b17f9ca306f8.json b/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/d49b6a48-ae81-467d-87c5-b17f9ca306f8.json new file mode 100644 index 000000000..214eb3bc5 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/d49b6a48-ae81-467d-87c5-b17f9ca306f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minus_Penus-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minus_Penus-8B-Model_Stock", + "id": "DreadPoor/Minus_Penus-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7311 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5344 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Morphing-8B-Model_Stock/39b7e250-9f71-4833-941e-85692a48b6e6.json b/data/hfopenllm_v2/DreadPoor/Morphing-8B-Model_Stock/39b7e250-9f71-4833-941e-85692a48b6e6.json new file mode 100644 index 000000000..0295f0f91 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Morphing-8B-Model_Stock/39b7e250-9f71-4833-941e-85692a48b6e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Morphing-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Morphing-8B-Model_Stock", + "id": "DreadPoor/Morphing-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json b/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json deleted file mode 100644 index 11ced8def..000000000 --- a/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/1762652579.577775", - "retrieved_timestamp": "1762652579.5777762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7721889032212308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350849793007441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41473958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3839760638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/c0d102a2-ff8c-45ac-a825-31472b98b871.json b/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/c0d102a2-ff8c-45ac-a825-31472b98b871.json new file mode 100644 index 000000000..c07ee195d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/c0d102a2-ff8c-45ac-a825-31472b98b871.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Not_Even_My_Final_Form-8B-Model_Stock", + "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7722 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5351 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json b/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json deleted file mode 100644 index 49d9dc846..000000000 --- a/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Nother_One-8B-Model_Stock/1762652579.578036", - "retrieved_timestamp": "1762652579.578037", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Nother_One-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Nother_One-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6863101016414226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204527600425481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38702083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35945811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/7c5674a8-6a1c-483e-be9c-b0a6d00d3ac4.json b/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/7c5674a8-6a1c-483e-be9c-b0a6d00d3ac4.json new file mode 100644 index 000000000..3a365d7fc --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/7c5674a8-6a1c-483e-be9c-b0a6d00d3ac4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Nother_One-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nother_One-8B-Model_Stock", + "id": "DreadPoor/Nother_One-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6863 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5205 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json b/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json deleted file mode 100644 index 2965274f6..000000000 --- a/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Noxis-8B-LINEAR/1762652579.578263", - "retrieved_timestamp": "1762652579.578263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Noxis-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Noxis-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6913057354486096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420956502068554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3660239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/d34b899e-b067-4c9c-9fa2-439f8b2d589d.json b/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/d34b899e-b067-4c9c-9fa2-439f8b2d589d.json new file mode 100644 index 000000000..73cd484d6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/d34b899e-b067-4c9c-9fa2-439f8b2d589d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Noxis-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Noxis-8B-LINEAR", + "id": "DreadPoor/Noxis-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6913 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json b/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json deleted file mode 100644 index 973544f29..000000000 --- a/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Nullsworn-12B-LINEAR/1762652579.578492", - "retrieved_timestamp": "1762652579.5784929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Nullsworn-12B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Nullsworn-12B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44356086295473784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483045026677609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645279255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/8c7b2332-510b-42d3-bcbb-e177c35d27d5.json b/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/8c7b2332-510b-42d3-bcbb-e177c35d27d5.json new file mode 100644 index 000000000..4c95be2fd --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/8c7b2332-510b-42d3-bcbb-e177c35d27d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Nullsworn-12B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nullsworn-12B-LINEAR", + "id": "DreadPoor/Nullsworn-12B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5483 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3645 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json b/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json deleted file mode 100644 index 35d1271e2..000000000 --- a/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Nwah-8B-Model_Stock/1762652579.578718", - "retrieved_timestamp": "1762652579.578719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Nwah-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Nwah-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7715893828375378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5384269019541996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3807347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/685f107f-e431-4dba-a117-8d6f1dd2c296.json b/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/685f107f-e431-4dba-a117-8d6f1dd2c296.json new file mode 100644 index 000000000..ed00a7392 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/685f107f-e431-4dba-a117-8d6f1dd2c296.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Nwah-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nwah-8B-Model_Stock", + "id": "DreadPoor/Nwah-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7716 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json b/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json deleted file mode 100644 index d8cbfdf7f..000000000 --- a/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_ONeil-model_stock-8B/1762652579.578939", - "retrieved_timestamp": "1762652579.57894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/ONeil-model_stock-8B", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ONeil-model_stock-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6785662043378236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548337982400763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/e1570804-85b6-4518-a099-5f21ab27d12c.json b/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/e1570804-85b6-4518-a099-5f21ab27d12c.json new file mode 100644 index 000000000..43888a5d9 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/e1570804-85b6-4518-a099-5f21ab27d12c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_ONeil-model_stock-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ONeil-model_stock-8B", + "id": "DreadPoor/ONeil-model_stock-8B", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json b/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json deleted file mode 100644 index aa40b10ec..000000000 --- a/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Oh_Boy-8B-LINEAR/1762652579.5791628", - "retrieved_timestamp": "1762652579.5791638", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Oh_Boy-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Oh_Boy-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7503069633018169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375114406292553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4107708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3848902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/a779ebec-76ab-4a1e-aa4f-d1a6adfe2d5c.json b/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/a779ebec-76ab-4a1e-aa4f-d1a6adfe2d5c.json new file mode 100644 index 000000000..a4b064bc6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/a779ebec-76ab-4a1e-aa4f-d1a6adfe2d5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Oh_Boy-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Oh_Boy-8B-LINEAR", + "id": "DreadPoor/Oh_Boy-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7503 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5375 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4108 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3849 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/1ed7f6ed-d04d-4cfc-a36a-1ef0f72d4814.json b/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/1ed7f6ed-d04d-4cfc-a36a-1ef0f72d4814.json new file mode 100644 index 000000000..009d6a3f3 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/1ed7f6ed-d04d-4cfc-a36a-1ef0f72d4814.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_OrangeJ-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OrangeJ-8B-Model_Stock", + "id": "DreadPoor/OrangeJ-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7841 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5413 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4028 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3969 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json b/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json deleted file mode 100644 index ed4c33826..000000000 --- a/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_OrangeJ-8B-Model_Stock/1762652579.57939", - "retrieved_timestamp": "1762652579.579391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/OrangeJ-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/OrangeJ-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7841039552830933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413478053905038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3968583776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json deleted file mode 100644 index 7f6a4b75d..000000000 --- a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/1762652579.579823", - "retrieved_timestamp": "1762652579.5798242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7156356245872064 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435183631990302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37391954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/c901a9ee-069a-4e3e-ac52-3017d67d8800.json b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/c901a9ee-069a-4e3e-ac52-3017d67d8800.json new file mode 100644 index 000000000..fbaecf746 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/c901a9ee-069a-4e3e-ac52-3017d67d8800.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Promissum_Mane-8B-LINEAR-lorablated", + "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7156 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/08317b59-ff74-43c8-bea5-2a266c38816e.json b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/08317b59-ff74-43c8-bea5-2a266c38816e.json new file mode 100644 index 000000000..c14c68ca6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/08317b59-ff74-43c8-bea5-2a266c38816e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Promissum_Mane-8B-LINEAR", + "id": "DreadPoor/Promissum_Mane-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.715 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5458 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json deleted file mode 100644 index ba17a32d5..000000000 --- a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR/1762652579.5796108", - "retrieved_timestamp": "1762652579.579612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Promissum_Mane-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Promissum_Mane-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7150361042035134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5457684398146738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42004166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38505651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/4106d4d3-344a-4c1f-b9ce-a3140d435013.json b/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/4106d4d3-344a-4c1f-b9ce-a3140d435013.json new file mode 100644 index 000000000..4c1772a72 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/4106d4d3-344a-4c1f-b9ce-a3140d435013.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RPMash-8B-Model_Stock", + "id": "DreadPoor/RPMash-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4054 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json b/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json deleted file mode 100644 index dcdb7147e..000000000 --- a/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash-8B-Model_Stock/1762652579.5800488", - "retrieved_timestamp": "1762652579.58005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/RPMash-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/RPMash-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4563502617499346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169088291675549 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/2b308fad-8494-4056-8b84-82733cd2710a.json b/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/2b308fad-8494-4056-8b84-82733cd2710a.json new file mode 100644 index 000000000..1c26373d3 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/2b308fad-8494-4056-8b84-82733cd2710a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash_V3-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RPMash_V3-8B-Model_Stock", + "id": "DreadPoor/RPMash_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5217 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3614 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json b/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json deleted file mode 100644 index 9cadb69ef..000000000 --- a/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash_V3-8B-Model_Stock/1762652579.580262", - "retrieved_timestamp": "1762652579.580263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/RPMash_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/RPMash_V3-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.70491961329273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5217453397523113 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36136968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json b/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json deleted file mode 100644 index d95458c4f..000000000 --- a/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Gold-8B-LINEAR/1762652579.58047", - "retrieved_timestamp": "1762652579.580471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Rusted_Gold-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Rusted_Gold-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7296240641497892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386646439313688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37799202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/93c867d0-4f10-440c-838c-91d1633fe584.json b/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/93c867d0-4f10-440c-838c-91d1633fe584.json new file mode 100644 index 000000000..f8b506d6b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/93c867d0-4f10-440c-838c-91d1633fe584.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Gold-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rusted_Gold-8B-LINEAR", + "id": "DreadPoor/Rusted_Gold-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7296 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5387 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/1a4a69c5-4acc-4ad9-adb2-bd9cf0fa2875.json b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/1a4a69c5-4acc-4ad9-adb2-bd9cf0fa2875.json new file mode 100644 index 000000000..928a50b08 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/1a4a69c5-4acc-4ad9-adb2-bd9cf0fa2875.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rusted_Platinum-8B-LINEAR", + "id": "DreadPoor/Rusted_Platinum-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5428 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1722 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3967 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json deleted file mode 100644 index e24b7ac76..000000000 --- a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-LINEAR/1762652579.580692", - "retrieved_timestamp": "1762652579.580693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Rusted_Platinum-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Rusted_Platinum-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7179838384375679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427868416987739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37300531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/151226ba-9744-45bc-b923-30df57f7aa3e.json b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/151226ba-9744-45bc-b923-30df57f7aa3e.json new file mode 100644 index 000000000..f71f0f094 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/151226ba-9744-45bc-b923-30df57f7aa3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rusted_Platinum-8B-Model_Stock", + "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3741 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3546 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json deleted file mode 100644 index 6825a52a3..000000000 --- a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-Model_Stock/1762652579.580914", - "retrieved_timestamp": "1762652579.580915", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Rusted_Platinum-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078821970150317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242840148078765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37406249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3546376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json b/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json deleted file mode 100644 index 9c769e2d0..000000000 --- a/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Sellen-8B-model_stock/1762652579.5811431", - "retrieved_timestamp": "1762652579.581144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Sellen-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Sellen-8B-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7112893788481229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5231680557624704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35696476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/98363657-0793-4eb3-94de-28961afc92ea.json b/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/98363657-0793-4eb3-94de-28961afc92ea.json new file mode 100644 index 000000000..1c2ffb709 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/98363657-0793-4eb3-94de-28961afc92ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Sellen-8B-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sellen-8B-model_stock", + "id": "DreadPoor/Sellen-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5232 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json b/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json deleted file mode 100644 index e35a1fe2a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Something-8B-Model_Stock/1762652579.5815392", - "retrieved_timestamp": "1762652579.58154", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Something-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Something-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043107842746135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395029370473196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41873958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885472074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/a32b4ded-6bff-441e-afbd-736e6d8cce5c.json b/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/a32b4ded-6bff-441e-afbd-736e6d8cce5c.json new file mode 100644 index 000000000..7c20e3cb1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/a32b4ded-6bff-441e-afbd-736e6d8cce5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Something-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Something-8B-Model_Stock", + "id": "DreadPoor/Something-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/326bcf4a-02e9-4218-8bf2-55a94a79435e.json b/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/326bcf4a-02e9-4218-8bf2-55a94a79435e.json new file mode 100644 index 000000000..decc0df8c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/326bcf4a-02e9-4218-8bf2-55a94a79435e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Spring_Dusk-8B-SCE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Spring_Dusk-8B-SCE", + "id": "DreadPoor/Spring_Dusk-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3436 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json b/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json deleted file mode 100644 index e02332e67..000000000 --- a/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Spring_Dusk-8B-SCE/1762652579.581773", - "retrieved_timestamp": "1762652579.581774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Spring_Dusk-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Spring_Dusk-8B-SCE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6514636719459922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635271357931001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3435837765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/145facc2-ab11-4c68-b841-762e0ad9bd5a.json b/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/145facc2-ab11-4c68-b841-762e0ad9bd5a.json new file mode 100644 index 000000000..4227020d6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/145facc2-ab11-4c68-b841-762e0ad9bd5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dawn-8B-SCE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Summer_Dawn-8B-SCE", + "id": "DreadPoor/Summer_Dawn-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6642 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1722 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json b/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json deleted file mode 100644 index 463551b09..000000000 --- a/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dawn-8B-SCE/1762652579.581994", - "retrieved_timestamp": "1762652579.581994", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Summer_Dawn-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Dawn-8B-SCE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6642032030567783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539111375413361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41204166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json b/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json deleted file mode 100644 index fa122f843..000000000 --- a/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dusk-8B-TIES/1762652579.582258", - "retrieved_timestamp": "1762652579.582258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Summer_Dusk-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Dusk-8B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4922206412319312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359662578395569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3855551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/d3e6aae6-9284-4309-8d8c-02c9e797a58b.json b/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/d3e6aae6-9284-4309-8d8c-02c9e797a58b.json new file mode 100644 index 000000000..563ad7232 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/d3e6aae6-9284-4309-8d8c-02c9e797a58b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dusk-8B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Summer_Dusk-8B-TIES", + "id": "DreadPoor/Summer_Dusk-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4267 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3856 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/6ee8537c-90e8-4455-83ca-c8c375a5ead7.json b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/6ee8537c-90e8-4455-83ca-c8c375a5ead7.json new file mode 100644 index 000000000..9e3634f9a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/6ee8537c-90e8-4455-83ca-c8c375a5ead7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-SCE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Summer_Rain-8B-SCE", + "id": "DreadPoor/Summer_Rain-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5459 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5846 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4477 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json deleted file mode 100644 index 00d197578..000000000 --- a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-SCE/1762652579.582465", - "retrieved_timestamp": "1762652579.5824661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Summer_Rain-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Rain-8B-SCE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459259210007226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5845948417986419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4477291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json deleted file mode 100644 index dcd3e1795..000000000 --- a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-TIES/1762652579.582679", - "retrieved_timestamp": "1762652579.582679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Summer_Rain-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Rain-8B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444021861992845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5845948417986419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4477291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/6efbfb38-57e5-46c7-b765-f7d0356afb97.json b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/6efbfb38-57e5-46c7-b765-f7d0356afb97.json new file mode 100644 index 000000000..2e8e86b23 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/6efbfb38-57e5-46c7-b765-f7d0356afb97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Summer_Rain-8B-TIES", + "id": "DreadPoor/Summer_Rain-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5444 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5846 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4477 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json b/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json deleted file mode 100644 index e7ee042e1..000000000 --- a/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Sun-8B-Model_Stock/1762652579.58288", - "retrieved_timestamp": "1762652579.58288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Sun-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Sun-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7758358932077998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263511014407583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38347739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/f4d418d9-1089-452d-9c7f-4cc4712e6ac7.json b/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/f4d418d9-1089-452d-9c7f-4cc4712e6ac7.json new file mode 100644 index 000000000..0c4fbb59f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/f4d418d9-1089-452d-9c7f-4cc4712e6ac7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Sun-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sun-8B-Model_Stock", + "id": "DreadPoor/Sun-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7758 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5264 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/1c9b325b-92b3-499a-a3ea-026269c63c88.json b/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/1c9b325b-92b3-499a-a3ea-026269c63c88.json new file mode 100644 index 000000000..28f4c0025 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/1c9b325b-92b3-499a-a3ea-026269c63c88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sweetened_Condensed_Milk-8B-Model_Stock", + "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7417 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4107 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json b/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json deleted file mode 100644 index 743f28c4f..000000000 --- a/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/1762652579.5830941", - "retrieved_timestamp": "1762652579.583095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7417142071924716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406287643522295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4106770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38480718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json b/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json deleted file mode 100644 index 594353017..000000000 --- a/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST02-Ignore/1762652579.583313", - "retrieved_timestamp": "1762652579.583314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/TEST02-Ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST02-Ignore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118964347930158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601644306147606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/c546ccde-cef3-4de2-a49f-24517d76dde5.json b/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/c546ccde-cef3-4de2-a49f-24517d76dde5.json new file mode 100644 index 000000000..875b97c44 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/c546ccde-cef3-4de2-a49f-24517d76dde5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST02-Ignore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST02-Ignore", + "id": "DreadPoor/TEST02-Ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3468 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json b/data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json deleted file mode 100644 index 71f201cfd..000000000 --- a/data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST03-ignore/1762652579.583565", - "retrieved_timestamp": "1762652579.5835662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/TEST03-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST03-ignore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6967014189018471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383414134372179 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37890625 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/TEST03-ignore/e85d3ccf-f48d-4e5c-b893-771a107773d4.json b/data/hfopenllm_v2/DreadPoor/TEST03-ignore/e85d3ccf-f48d-4e5c-b893-771a107773d4.json new file mode 100644 index 000000000..f463020e8 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST03-ignore/e85d3ccf-f48d-4e5c-b893-771a107773d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST03-ignore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST03-ignore", + "id": "DreadPoor/TEST03-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6967 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5383 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json b/data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json deleted file mode 100644 index 87383d764..000000000 --- a/data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST06-ignore/1762652579.583824", - "retrieved_timestamp": "1762652579.5838249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/TEST06-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST06-ignore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7322969720342026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509060880148441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/TEST06-ignore/b8d22ade-874e-4ff3-9fcd-dbe14220d48b.json b/data/hfopenllm_v2/DreadPoor/TEST06-ignore/b8d22ade-874e-4ff3-9fcd-dbe14220d48b.json new file mode 100644 index 000000000..b39933b07 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST06-ignore/b8d22ade-874e-4ff3-9fcd-dbe14220d48b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST06-ignore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST06-ignore", + "id": "DreadPoor/TEST06-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json b/data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json deleted file mode 100644 index b9b471108..000000000 --- a/data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST07-ignore/1762652579.5841951", - "retrieved_timestamp": "1762652579.584198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/TEST07-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST07-ignore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399655137258031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561275711510345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40937500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879654255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/TEST07-ignore/97e8e7e2-74a4-42a5-a0b1-250e47d3c3e6.json b/data/hfopenllm_v2/DreadPoor/TEST07-ignore/97e8e7e2-74a4-42a5-a0b1-250e47d3c3e6.json new file mode 100644 index 000000000..42a699d62 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST07-ignore/97e8e7e2-74a4-42a5-a0b1-250e47d3c3e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST07-ignore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST07-ignore", + "id": "DreadPoor/TEST07-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.74 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5561 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1662 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json b/data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json deleted file mode 100644 index 873020d50..000000000 --- a/data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST08-ignore/1762652579.5845299", - "retrieved_timestamp": "1762652579.5845308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/TEST08-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST08-ignore", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7466599733152479 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5453519655444978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/TEST08-ignore/b2d56bb6-a726-4e47-8bc6-c016a51aac5c.json b/data/hfopenllm_v2/DreadPoor/TEST08-ignore/b2d56bb6-a726-4e47-8bc6-c016a51aac5c.json new file mode 100644 index 000000000..9477b8a75 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST08-ignore/b2d56bb6-a726-4e47-8bc6-c016a51aac5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST08-ignore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST08-ignore", + "id": "DreadPoor/TEST08-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5454 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/3366f6d8-41bc-4c2c-a72c-bc0fd7dc8dd2.json b/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/3366f6d8-41bc-4c2c-a72c-bc0fd7dc8dd2.json new file mode 100644 index 000000000..bf826442f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/3366f6d8-41bc-4c2c-a72c-bc0fd7dc8dd2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Trinas_Nectar-8B-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Trinas_Nectar-8B-model_stock", + "id": "DreadPoor/Trinas_Nectar-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7259 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json b/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json deleted file mode 100644 index d0c7ead5a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Trinas_Nectar-8B-model_stock/1762652579.58478", - "retrieved_timestamp": "1762652579.5847821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Trinas_Nectar-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Trinas_Nectar-8B-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7259272064788096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256123853406084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json b/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json deleted file mode 100644 index 3419d42d8..000000000 --- a/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/1762652579.585024", - "retrieved_timestamp": "1762652579.585025", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47176270074513404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5475027267486955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4449375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/7ba52efb-3890-4691-8740-9f051f1f645e.json b/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/7ba52efb-3890-4691-8740-9f051f1f645e.json new file mode 100644 index 000000000..44c3145e9 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/7ba52efb-3890-4691-8740-9f051f1f645e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UNTESTED-VENN_1.2-8B-Model_Stock", + "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json b/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json deleted file mode 100644 index 9568d074d..000000000 --- a/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_VENN_1.2-8B-Model_Stock/1762652579.5852559", - "retrieved_timestamp": "1762652579.585257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/VENN_1.2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/VENN_1.2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226049105262924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458812486333333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42001041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/7b192b49-057e-418a-b47d-44b0ec82a6b6.json b/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/7b192b49-057e-418a-b47d-44b0ec82a6b6.json new file mode 100644 index 000000000..77da6fb17 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/7b192b49-057e-418a-b47d-44b0ec82a6b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_VENN_1.2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VENN_1.2-8B-Model_Stock", + "id": "DreadPoor/VENN_1.2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7226 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5459 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3721 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json b/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json deleted file mode 100644 index 872d5bc66..000000000 --- a/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_WIP-Acacia-8B-Model_Stock/1762652579.5854762", - "retrieved_timestamp": "1762652579.585477", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/WIP-Acacia-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/WIP-Acacia-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6246359659038019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194665568943516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/f2120d53-bef6-44d6-84a6-a6f8e3537188.json b/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/f2120d53-bef6-44d6-84a6-a6f8e3537188.json new file mode 100644 index 000000000..aadd0095c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/f2120d53-bef6-44d6-84a6-a6f8e3537188.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_WIP-Acacia-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WIP-Acacia-8B-Model_Stock", + "id": "DreadPoor/WIP-Acacia-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5195 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json b/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json deleted file mode 100644 index 25a3a8821..000000000 --- a/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_WIP_Damascus-8B-TIES/1762652579.5856981", - "retrieved_timestamp": "1762652579.5856981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/WIP_Damascus-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/WIP_Damascus-8B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4776326812856554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410672913070808 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41185416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/f5408aa9-85c8-46e5-b225-0480b2e18e97.json b/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/f5408aa9-85c8-46e5-b225-0480b2e18e97.json new file mode 100644 index 000000000..e05a5b321 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/f5408aa9-85c8-46e5-b225-0480b2e18e97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_WIP_Damascus-8B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WIP_Damascus-8B-TIES", + "id": "DreadPoor/WIP_Damascus-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4776 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/c1918f55-286c-4b29-ac53-2ee8f9d36d9e.json b/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/c1918f55-286c-4b29-ac53-2ee8f9d36d9e.json new file mode 100644 index 000000000..f551e73cc --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/c1918f55-286c-4b29-ac53-2ee8f9d36d9e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Wannabe-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Wannabe-8B-Model_Stock", + "id": "DreadPoor/Wannabe-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7205 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json b/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json deleted file mode 100644 index 7ecb2dc3d..000000000 --- a/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Wannabe-8B-Model_Stock/1762652579.585919", - "retrieved_timestamp": "1762652579.58592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Wannabe-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Wannabe-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204816553411615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389637944785705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/52659d37-67f8-45b8-88e4-11917dc90488.json b/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/52659d37-67f8-45b8-88e4-11917dc90488.json new file mode 100644 index 000000000..00ab3ed08 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/52659d37-67f8-45b8-88e4-11917dc90488.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_What_A_Thrill-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "What_A_Thrill-8B-Model_Stock", + "id": "DreadPoor/What_A_Thrill-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5311 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json b/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json deleted file mode 100644 index 42bd599a4..000000000 --- a/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_What_A_Thrill-8B-Model_Stock/1762652579.5861409", - "retrieved_timestamp": "1762652579.586142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/What_A_Thrill-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/What_A_Thrill-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7064433480941679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531144904394377 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40804166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/556ae77c-effe-44ab-ac4a-1ad7cbd7c363.json b/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/556ae77c-effe-44ab-ac4a-1ad7cbd7c363.json new file mode 100644 index 000000000..b9a6a545d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/556ae77c-effe-44ab-ac4a-1ad7cbd7c363.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter-8B-SCE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Winter-8B-SCE", + "id": "DreadPoor/Winter-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3839 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json b/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json deleted file mode 100644 index a0b6e4597..000000000 --- a/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter-8B-SCE/1762652579.586359", - "retrieved_timestamp": "1762652579.58636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Winter-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter-8B-SCE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7536292592543341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261733490323383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38389295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/048fc971-3baf-4740-a132-2f9476d01b7a.json b/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/048fc971-3baf-4740-a132-2f9476d01b7a.json new file mode 100644 index 000000000..f48ac4b48 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/048fc971-3baf-4740-a132-2f9476d01b7a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dawn-8B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Winter_Dawn-8B-TIES", + "id": "DreadPoor/Winter_Dawn-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5309 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json b/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json deleted file mode 100644 index b2e517e49..000000000 --- a/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dawn-8B-TIES/1762652579.586569", - "retrieved_timestamp": "1762652579.58657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Winter_Dawn-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter_Dawn-8B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496482665992899 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309416142154736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/abd28d25-01e0-474d-be35-08d816d281f5.json b/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/abd28d25-01e0-474d-be35-08d816d281f5.json new file mode 100644 index 000000000..1470af900 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/abd28d25-01e0-474d-be35-08d816d281f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dusk-8B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Winter_Dusk-8B-TIES", + "id": "DreadPoor/Winter_Dusk-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4952 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3478 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json b/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json deleted file mode 100644 index bc114b11b..000000000 --- a/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dusk-8B-TIES/1762652579.586781", - "retrieved_timestamp": "1762652579.586782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Winter_Dusk-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter_Dusk-8B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7152610628687439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951882158967103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3688229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3478224734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/17f49724-6553-4baa-b354-45ffd0f2c844.json b/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/17f49724-6553-4baa-b354-45ffd0f2c844.json new file mode 100644 index 000000000..2969a6c18 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/17f49724-6553-4baa-b354-45ffd0f2c844.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Night-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Winter_Night-8B-Model_Stock", + "id": "DreadPoor/Winter_Night-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json b/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json deleted file mode 100644 index d4f0f5cc0..000000000 --- a/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Night-8B-Model_Stock/1762652579.587023", - "retrieved_timestamp": "1762652579.587024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Winter_Night-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter_Night-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7040452665593957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184968441488284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/3e60d982-d7d5-432b-962e-b7734cc90534.json b/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/3e60d982-d7d5-432b-962e-b7734cc90534.json new file mode 100644 index 000000000..25b9ce545 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/3e60d982-d7d5-432b-962e-b7734cc90534.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Yafune-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yafune-8B-Model_Stock", + "id": "DreadPoor/Yafune-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7533 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5467 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1662 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json b/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json deleted file mode 100644 index 54210d573..000000000 --- a/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Yafune-8B-Model_Stock/1762652579.587391", - "retrieved_timestamp": "1762652579.587392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Yafune-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Yafune-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7533045652202822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5466719512941253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38505651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json b/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json deleted file mode 100644 index a9be9144d..000000000 --- a/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Yearn_V3-8B-Model_Stock/1762652579.587668", - "retrieved_timestamp": "1762652579.587669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Yearn_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Yearn_V3-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7289746760816855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322019394938072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3908958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/79a0fdf3-b432-4598-be62-f9eb57fa5a43.json b/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/79a0fdf3-b432-4598-be62-f9eb57fa5a43.json new file mode 100644 index 000000000..74619217a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/79a0fdf3-b432-4598-be62-f9eb57fa5a43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Yearn_V3-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yearn_V3-8B-Model_Stock", + "id": "DreadPoor/Yearn_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5322 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1896 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json b/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json deleted file mode 100644 index d67332266..000000000 --- a/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/1762652579.587883", - "retrieved_timestamp": "1762652579.587884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5511221337163171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5231075970343642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41492708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3890458776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/662566e0-2af3-40d6-90de-9b361bcae355.json b/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/662566e0-2af3-40d6-90de-9b361bcae355.json new file mode 100644 index 000000000..02d4db078 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/662566e0-2af3-40d6-90de-9b361bcae355.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V17-Abliterated_ALT", + "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5511 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5231 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4149 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json b/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json deleted file mode 100644 index 635b7e91a..000000000 --- a/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus-8B-Model_Stock/1762652579.5881522", - "retrieved_timestamp": "1762652579.5881522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Zelus-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Zelus-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.778833495126265 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5307011398651839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38414228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/d81c0035-a0b1-426c-9080-8ccbf745642b.json b/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/d81c0035-a0b1-426c-9080-8ccbf745642b.json new file mode 100644 index 000000000..03e62bfa1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/d81c0035-a0b1-426c-9080-8ccbf745642b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zelus-8B-Model_Stock", + "id": "DreadPoor/Zelus-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7788 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5307 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3841 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/100bc243-158c-4e5c-918b-1439bf26fee8.json b/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/100bc243-158c-4e5c-918b-1439bf26fee8.json new file mode 100644 index 000000000..059e73847 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/100bc243-158c-4e5c-918b-1439bf26fee8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus_V2-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zelus_V2-8B-Model_Stock", + "id": "DreadPoor/Zelus_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7898 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3961 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3833 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json b/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json deleted file mode 100644 index 9881ffb3d..000000000 --- a/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus_V2-8B-Model_Stock/1762652579.588366", - "retrieved_timestamp": "1762652579.5883808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Zelus_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Zelus_V2-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7898243327703826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344816839912676 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38331117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/felix_dies-mistral-7B-model_stock/45e32080-1464-40e0-a232-310fdda967eb.json b/data/hfopenllm_v2/DreadPoor/felix_dies-mistral-7B-model_stock/45e32080-1464-40e0-a232-310fdda967eb.json new file mode 100644 index 000000000..c0877447b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/felix_dies-mistral-7B-model_stock/45e32080-1464-40e0-a232-310fdda967eb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_felix_dies-mistral-7B-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "felix_dies-mistral-7B-model_stock", + "id": "DreadPoor/felix_dies-mistral-7B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3008 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4518 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json b/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json deleted file mode 100644 index b21d12fdf..000000000 --- a/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_hakuchido-8B-MODEL_STOCK/1762652579.589018", - "retrieved_timestamp": "1762652579.589018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/hakuchido-8B-MODEL_STOCK", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/hakuchido-8B-MODEL_STOCK", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7375175645066203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398373390214104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/e89b279f-d548-4aa8-b5e5-0bffdd98b840.json b/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/e89b279f-d548-4aa8-b5e5-0bffdd98b840.json new file mode 100644 index 000000000..81722a6a0 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/e89b279f-d548-4aa8-b5e5-0bffdd98b840.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_hakuchido-8B-MODEL_STOCK/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hakuchido-8B-MODEL_STOCK", + "id": "DreadPoor/hakuchido-8B-MODEL_STOCK", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/777a53f9-891c-4f9e-99a8-bb1988f61f19.json b/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/777a53f9-891c-4f9e-99a8-bb1988f61f19.json new file mode 100644 index 000000000..c35d0a556 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/777a53f9-891c-4f9e-99a8-bb1988f61f19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_ichor-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ichor-8B-Model_Stock", + "id": "DreadPoor/ichor-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5386 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json b/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json deleted file mode 100644 index fde1fad2c..000000000 --- a/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_ichor-8B-Model_Stock/1762652579.589237", - "retrieved_timestamp": "1762652579.589238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/ichor-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ichor-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386319410275846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084222037759372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31507646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json b/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json deleted file mode 100644 index 94aa7aabd..000000000 --- a/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_ichor_1.1-8B-Model_Stock/1762652579.589439", - "retrieved_timestamp": "1762652579.589439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/ichor_1.1-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ichor_1.1-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8096328851890761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528067770617839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3855551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/f15846b1-8eaa-411b-88f7-25064161af4e.json b/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/f15846b1-8eaa-411b-88f7-25064161af4e.json new file mode 100644 index 000000000..07bce712f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/f15846b1-8eaa-411b-88f7-25064161af4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_ichor_1.1-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ichor_1.1-8B-Model_Stock", + "id": "DreadPoor/ichor_1.1-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8096 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5281 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3856 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json b/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json deleted file mode 100644 index 5cde2a283..000000000 --- a/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus-8B-Model_Stock/1762652579.589726", - "retrieved_timestamp": "1762652579.589729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/inexpertus-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/inexpertus-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7795327508787795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5280190470468065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41182291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790724734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/e803fc85-fb98-4db8-aab0-a63100dcd5fc.json b/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/e803fc85-fb98-4db8-aab0-a63100dcd5fc.json new file mode 100644 index 000000000..dd5b49bf4 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/e803fc85-fb98-4db8-aab0-a63100dcd5fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus-8B-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "inexpertus-8B-Model_Stock", + "id": "DreadPoor/inexpertus-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.528 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/50620749-5ecf-41eb-a131-611675560e07.json b/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/50620749-5ecf-41eb-a131-611675560e07.json new file mode 100644 index 000000000..0f7b2d86a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/50620749-5ecf-41eb-a131-611675560e07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.1-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "inexpertus_1.1-8B-LINEAR", + "id": "DreadPoor/inexpertus_1.1-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3827 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json b/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json deleted file mode 100644 index 3d4821c1a..000000000 --- a/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.1-8B-LINEAR/1762652579.59006", - "retrieved_timestamp": "1762652579.590061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/inexpertus_1.1-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/inexpertus_1.1-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527050448365891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524638802167572 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38272938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/2d40a551-6440-4d71-87e4-639d486c1c5e.json b/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/2d40a551-6440-4d71-87e4-639d486c1c5e.json new file mode 100644 index 000000000..9628c12f8 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/2d40a551-6440-4d71-87e4-639d486c1c5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.2-8B-LINEAR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "inexpertus_1.2-8B-LINEAR", + "id": "DreadPoor/inexpertus_1.2-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3788 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json b/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json deleted file mode 100644 index 5752670a9..000000000 --- a/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.2-8B-LINEAR/1762652579.590318", - "retrieved_timestamp": "1762652579.5903192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/inexpertus_1.2-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/inexpertus_1.2-8B-LINEAR", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347947889377962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5523440600721518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41334374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37882313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/22235942-2e3e-4ef4-b7a0-5800f507571a.json b/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/22235942-2e3e-4ef4-b7a0-5800f507571a.json new file mode 100644 index 000000000..0f3089a37 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/22235942-2e3e-4ef4-b7a0-5800f507571a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_mergekit-nuslerp-nqzkedi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-nuslerp-nqzkedi", + "id": "DreadPoor/mergekit-nuslerp-nqzkedi", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5362 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1881 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3919 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json b/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json deleted file mode 100644 index 8e62737af..000000000 --- a/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_mergekit-nuslerp-nqzkedi/1762652579.590566", - "retrieved_timestamp": "1762652579.590566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/mergekit-nuslerp-nqzkedi", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/mergekit-nuslerp-nqzkedi", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7764852812759035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361918366546249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918716755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json b/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json deleted file mode 100644 index 59c06d2ab..000000000 --- a/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_remember_to_breathe-8b-Model-Stock/1762652579.5907981", - "retrieved_timestamp": "1762652579.590799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/remember_to_breathe-8b-Model-Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/remember_to_breathe-8b-Model-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7104150321147887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411654435599922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/ac06867d-3a34-42f6-9e2e-226cf86748f6.json b/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/ac06867d-3a34-42f6-9e2e-226cf86748f6.json new file mode 100644 index 000000000..08420f429 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/ac06867d-3a34-42f6-9e2e-226cf86748f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_remember_to_breathe-8b-Model-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "remember_to_breathe-8b-Model-Stock", + "id": "DreadPoor/remember_to_breathe-8b-Model-Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7104 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/test/394f1fc8-dc2c-4ff9-9ad0-7b3a8a8ddeb3.json b/data/hfopenllm_v2/DreadPoor/test/394f1fc8-dc2c-4ff9-9ad0-7b3a8a8ddeb3.json new file mode 100644 index 000000000..4d6f24d90 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/test/394f1fc8-dc2c-4ff9-9ad0-7b3a8a8ddeb3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test", + "id": "DreadPoor/test", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4937 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json b/data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json deleted file mode 100644 index 9f6fe4d9b..000000000 --- a/data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_test/1762652579.5910451", - "retrieved_timestamp": "1762652579.5910459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/test", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49369450834895856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371873804638203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/test_ALT/03e52d4f-78d7-453c-9685-844dd1636904.json b/data/hfopenllm_v2/DreadPoor/test_ALT/03e52d4f-78d7-453c-9685-844dd1636904.json new file mode 100644 index 000000000..14344c072 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/test_ALT/03e52d4f-78d7-453c-9685-844dd1636904.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_test_ALT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test_ALT", + "id": "DreadPoor/test_ALT", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4997 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3492 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json b/data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json deleted file mode 100644 index a1e300847..000000000 --- a/data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_test_ALT/1762652579.591327", - "retrieved_timestamp": "1762652579.591328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/test_ALT", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/test_ALT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.499689712185889 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370433315307738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3492353723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/3ce136d5-be81-4b8c-a7dc-4e1346935d35.json b/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/3ce136d5-be81-4b8c-a7dc-4e1346935d35.json new file mode 100644 index 000000000..e83a16a2c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/3ce136d5-be81-4b8c-a7dc-4e1346935d35.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/DreadPoor_tests_pending-do_not_use_yet/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tests_pending-do_not_use_yet", + "id": "DreadPoor/tests_pending-do_not_use_yet", + "developer": "DreadPoor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3827 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json b/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json deleted file mode 100644 index b5d04c90c..000000000 --- a/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_tests_pending-do_not_use_yet/1762652579.591608", - "retrieved_timestamp": "1762652579.591609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/tests_pending-do_not_use_yet", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/tests_pending-do_not_use_yet", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7691414336183549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407897873885027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40047916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38272938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json b/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json deleted file mode 100644 index da2e0ccf9..000000000 --- a/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/1762652579.5918348", - "retrieved_timestamp": "1762652579.591836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", - "developer": "ECE-ILAB-PRYMMAL", - "inference_platform": "unknown", - "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40289432040319684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401935891431586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43321875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38605385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/fb35accf-0c5d-4f72-8d73-ba366a41a76d.json b/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/fb35accf-0c5d-4f72-8d73-ba366a41a76d.json new file mode 100644 index 000000000..2a680a23b --- /dev/null +++ b/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/fb35accf-0c5d-4f72-8d73-ba366a41a76d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ILAB-Merging-3B-V2", + "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", + "developer": "ECE-ILAB-PRYMMAL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/75e5ca5d-cce1-4463-b398-553399ce6833.json b/data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/75e5ca5d-cce1-4463-b398-553399ce6833.json new file mode 100644 index 000000000..12b4347c9 --- /dev/null +++ b/data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/75e5ca5d-cce1-4463-b398-553399ce6833.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-Qwen2.5-14B-v0.2", + "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", + "developer": "EVA-UNIT-01", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.609 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5135 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/c426bae7-b98d-4343-b419-ac8206196a95.json b/data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/c426bae7-b98d-4343-b419-ac8206196a95.json new file mode 100644 index 000000000..557addc60 --- /dev/null +++ b/data/hfopenllm_v2/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/c426bae7-b98d-4343-b419-ac8206196a95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-Qwen2.5-72B-v0.2", + "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", + "developer": "EVA-UNIT-01", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4313 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.472 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5813 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json b/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json deleted file mode 100644 index 93ac36ccd..000000000 --- a/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1762652579.592541", - "retrieved_timestamp": "1762652579.592542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", - "developer": "Edgerunners", - "inference_platform": "unknown", - "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7147114101694614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979908369885237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36361369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/b17de9f2-6f94-49f6-b908-fa983e8f8f9b.json b/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/b17de9f2-6f94-49f6-b908-fa983e8f8f9b.json new file mode 100644 index 000000000..548eedb0d --- /dev/null +++ b/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/b17de9f2-6f94-49f6-b908-fa983e8f8f9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", + "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", + "developer": "Edgerunners", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7147 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/gpt-j-6b/58ba7ca1-8cca-4668-836b-824491d9cf01.json b/data/hfopenllm_v2/EleutherAI/gpt-j-6b/58ba7ca1-8cca-4668-836b-824491d9cf01.json new file mode 100644 index 000000000..3f4b4bd5e --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/gpt-j-6b/58ba7ca1-8cca-4668-836b-824491d9cf01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-j-6b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-j-6b", + "id": "EleutherAI/gpt-j-6b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2522 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1241 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/gpt-neo-1.3B/23da100a-13b9-42a7-ba79-234be551d0e4.json b/data/hfopenllm_v2/EleutherAI/gpt-neo-1.3B/23da100a-13b9-42a7-ba79-234be551d0e4.json new file mode 100644 index 000000000..14e41e155 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/gpt-neo-1.3B/23da100a-13b9-42a7-ba79-234be551d0e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-1.3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-neo-1.3B", + "id": "EleutherAI/gpt-neo-1.3B", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 1.366 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2079 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3039 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/gpt-neo-125m/2d0c12b9-cff8-4366-a3ce-7772e4c098c9.json b/data/hfopenllm_v2/EleutherAI/gpt-neo-125m/2d0c12b9-cff8-4366-a3ce-7772e4c098c9.json new file mode 100644 index 000000000..58ad6f3da --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/gpt-neo-125m/2d0c12b9-cff8-4366-a3ce-7772e4c098c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-125m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-neo-125m", + "id": "EleutherAI/gpt-neo-125m", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 0.15 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3115 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1026 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/gpt-neo-2.7B/4b87eea2-169c-411e-9d15-caf6b7826590.json b/data/hfopenllm_v2/EleutherAI/gpt-neo-2.7B/4b87eea2-169c-411e-9d15-caf6b7826590.json new file mode 100644 index 000000000..409e0b82a --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/gpt-neo-2.7B/4b87eea2-169c-411e-9d15-caf6b7826590.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-2.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-neo-2.7B", + "id": "EleutherAI/gpt-neo-2.7B", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 2.718 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.259 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/gpt-neox-20b/62a3cce2-4ff5-4dc9-beab-a06001fd82d9.json b/data/hfopenllm_v2/EleutherAI/gpt-neox-20b/62a3cce2-4ff5-4dc9-beab-a06001fd82d9.json new file mode 100644 index 000000000..8d26f484b --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/gpt-neox-20b/62a3cce2-4ff5-4dc9-beab-a06001fd82d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neox-20b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-neox-20b", + "id": "EleutherAI/gpt-neox-20b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 20.739 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2587 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1155 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-1.4b/0e5961e1-af27-4eee-8b9b-c82ee4ab61b1.json b/data/hfopenllm_v2/EleutherAI/pythia-1.4b/0e5961e1-af27-4eee-8b9b-c82ee4ab61b1.json new file mode 100644 index 000000000..0e25cd321 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-1.4b/0e5961e1-af27-4eee-8b9b-c82ee4ab61b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1.4b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-1.4b", + "id": "EleutherAI/pythia-1.4b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 1.515 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json b/data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json deleted file mode 100644 index 4a24b9e40..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1.4b/1762652579.593903", - "retrieved_timestamp": "1762652579.593904", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-1.4b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-1.4b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 1.515 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23708094522533543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.315042649740714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json b/data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json deleted file mode 100644 index 3864c5b6c..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-12b/1762652579.5942001", - "retrieved_timestamp": "1762652579.594201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-12b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-12b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 12.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24714756845170813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179653957935337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/EleutherAI/pythia-12b/b62352d4-e3b0-4b4d-8d68-e2d973d820c1.json b/data/hfopenllm_v2/EleutherAI/pythia-12b/b62352d4-e3b0-4b4d-8d68-e2d973d820c1.json new file mode 100644 index 000000000..c70b94beb --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-12b/b62352d4-e3b0-4b4d-8d68-e2d973d820c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-12b", + "id": "EleutherAI/pythia-12b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 12.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2471 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-160m/7fadc486-767e-45ef-979d-74ecb858cb99.json b/data/hfopenllm_v2/EleutherAI/pythia-160m/7fadc486-767e-45ef-979d-74ecb858cb99.json new file mode 100644 index 000000000..fc912f961 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-160m/7fadc486-767e-45ef-979d-74ecb858cb99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-160m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-160m", + "id": "EleutherAI/pythia-160m", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 0.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1816 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json b/data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json deleted file mode 100644 index dbf025ed1..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-160m/1762652579.5944068", - "retrieved_timestamp": "1762652579.594408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-160m", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-160m", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 0.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18155161637787737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2970437484241321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4179375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11195146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json b/data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json deleted file mode 100644 index 145faf327..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1b/1762652579.594618", - "retrieved_timestamp": "1762652579.594618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-1b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-1b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 1.079 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2207941594968018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3004093017564394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11361369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/EleutherAI/pythia-1b/d0628e6f-a6f3-42eb-b9fc-e880ae8c0688.json b/data/hfopenllm_v2/EleutherAI/pythia-1b/d0628e6f-a6f3-42eb-b9fc-e880ae8c0688.json new file mode 100644 index 000000000..daea284b8 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-1b/d0628e6f-a6f3-42eb-b9fc-e880ae8c0688.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-1b", + "id": "EleutherAI/pythia-1b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 1.079 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3552 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1136 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0999a066-1151-4445-b130-00d8fe4a516e.json b/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0999a066-1151-4445-b130-00d8fe4a516e.json new file mode 100644 index 000000000..7e836076c --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0999a066-1151-4445-b130-00d8fe4a516e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-2.8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-2.8b", + "id": "EleutherAI/pythia-2.8b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.909 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json b/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json deleted file mode 100644 index e1cfcd562..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-2.8b/1762652579.594833", - "retrieved_timestamp": "1762652579.5948339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-2.8b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-2.8b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.909 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21732226049105263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224085936276087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/EleutherAI/pythia-410m/1efc09d8-6a5c-4d48-b76e-2e04ef97b676.json b/data/hfopenllm_v2/EleutherAI/pythia-410m/1efc09d8-6a5c-4d48-b76e-2e04ef97b676.json new file mode 100644 index 000000000..070f7c573 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-410m/1efc09d8-6a5c-4d48-b76e-2e04ef97b676.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-410m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-410m", + "id": "EleutherAI/pythia-410m", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 0.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2195 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json b/data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json deleted file mode 100644 index 47d4b368c..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-410m/1762652579.5950441", - "retrieved_timestamp": "1762652579.595045", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-410m", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-410m", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 0.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21954525104500505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.302813387064426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35781250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/EleutherAI/pythia-6.9b/1a59412f-fe78-4ecf-8951-8f2996dd374f.json b/data/hfopenllm_v2/EleutherAI/pythia-6.9b/1a59412f-fe78-4ecf-8951-8f2996dd374f.json new file mode 100644 index 000000000..6f0eb37e2 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-6.9b/1a59412f-fe78-4ecf-8951-8f2996dd374f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-6.9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-6.9b", + "id": "EleutherAI/pythia-6.9b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 6.9 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3232 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json b/data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json deleted file mode 100644 index d5ede0110..000000000 --- a/data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-6.9b/1762652579.595358", - "retrieved_timestamp": "1762652579.595359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/pythia-6.9b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-6.9b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 6.9 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22811362739752744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3232287869322383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3590520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/b5403311-2069-488d-af98-27da14496c15.json b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/b5403311-2069-488d-af98-27da14496c15.json new file mode 100644 index 000000000..43c0cbc2f --- /dev/null +++ b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/b5403311-2069-488d-af98-27da14496c15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi-Pro-French-Llama-3-8B-v0.4", + "id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", + "developer": "Enno-Ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4189 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2635 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/6c10c176-b2b6-4216-91c0-1444944612f7.json b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/6c10c176-b2b6-4216-91c0-1444944612f7.json new file mode 100644 index 000000000..3c0a9c0a6 --- /dev/null +++ b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/6c10c176-b2b6-4216-91c0-1444944612f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi-Pro-Llama-3-8B-v0.3", + "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", + "developer": "Enno-Ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5083 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B/80ebd92e-d9b6-46ce-b77e-973c3f3f6051.json b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B/80ebd92e-d9b6-46ce-b77e-973c3f3f6051.json new file mode 100644 index 000000000..ae3f57ad1 --- /dev/null +++ b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3-8B/80ebd92e-d9b6-46ce-b77e-973c3f3f6051.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi-Pro-Llama-3-8B", + "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", + "developer": "Enno-Ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4152 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/0418e36f-17ea-46a2-bfeb-91cc0ff719bf.json b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/0418e36f-17ea-46a2-bfeb-91cc0ff719bf.json new file mode 100644 index 000000000..145d529e4 --- /dev/null +++ b/data/hfopenllm_v2/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/0418e36f-17ea-46a2-bfeb-91cc0ff719bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi-Pro-Llama-3.1-8B-v0.9", + "id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", + "developer": "Enno-Ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/4f5ba3fc-694a-45b1-ae9d-2c7d33e41519.json b/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/4f5ba3fc-694a-45b1-ae9d-2c7d33e41519.json new file mode 100644 index 000000000..28ac26d3c --- /dev/null +++ b/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/4f5ba3fc-694a-45b1-ae9d-2c7d33e41519.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-7B-French-Instruct-202502/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi-7B-French-Instruct-202502", + "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502", + "developer": "EnnoAi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4013 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json b/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json deleted file mode 100644 index b1fb54519..000000000 --- a/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-7B-French-Instruct-202502/1762652579.596549", - "retrieved_timestamp": "1762652579.59655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EnnoAi/EnnoAi-7B-French-Instruct-202502", - "developer": "EnnoAi", - "inference_platform": "unknown", - "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5564424615575562 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574545199388612 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4013464095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/8b0d1556-bbd5-49e3-b881-32224bc1aa9a.json b/data/hfopenllm_v2/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/8b0d1556-bbd5-49e3-b881-32224bc1aa9a.json new file mode 100644 index 000000000..610b4bd7f --- /dev/null +++ b/data/hfopenllm_v2/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/8b0d1556-bbd5-49e3-b881-32224bc1aa9a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi-Pro-Llama-3.1-8B-v1.0", + "id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", + "developer": "EnnoAi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/524e634f-280c-4f3a-9f1f-bdda19fad740.json b/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/524e634f-280c-4f3a-9f1f-bdda19fad740.json new file mode 100644 index 000000000..db3809f7b --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/524e634f-280c-4f3a-9f1f-bdda19fad740.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Epiculous_Azure_Dusk-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure_Dusk-v0.2", + "id": "Epiculous/Azure_Dusk-v0.2", + "developer": "Epiculous", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3034 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json b/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json deleted file mode 100644 index 0cd5e77a2..000000000 --- a/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Epiculous_Azure_Dusk-v0.2/1762652579.5970619", - "retrieved_timestamp": "1762652579.5970628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Epiculous/Azure_Dusk-v0.2", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/Azure_Dusk-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.346715603487635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119721873553597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3834583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3034408244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json b/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json deleted file mode 100644 index 8d2f56136..000000000 --- a/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Epiculous_Crimson_Dawn-v0.2/1762652579.5973198", - "retrieved_timestamp": "1762652579.5973198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Epiculous/Crimson_Dawn-v0.2", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/Crimson_Dawn-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103454389907667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44823796489645434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27210771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/cb82e92b-f207-4fbd-9bfe-43184769cdbd.json b/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/cb82e92b-f207-4fbd-9bfe-43184769cdbd.json new file mode 100644 index 000000000..0f0cfc70f --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/cb82e92b-f207-4fbd-9bfe-43184769cdbd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Epiculous_Crimson_Dawn-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Crimson_Dawn-v0.2", + "id": "Epiculous/Crimson_Dawn-v0.2", + "developer": "Epiculous", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3103 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4482 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4152 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2721 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/NovaSpark/0b674103-4e55-41f4-accb-b7be73671801.json b/data/hfopenllm_v2/Epiculous/NovaSpark/0b674103-4e55-41f4-accb-b7be73671801.json new file mode 100644 index 000000000..74391a8d3 --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/NovaSpark/0b674103-4e55-41f4-accb-b7be73671801.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Epiculous_NovaSpark/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NovaSpark", + "id": "Epiculous/NovaSpark", + "developer": "Epiculous", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json b/data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json deleted file mode 100644 index 1c42a82c4..000000000 --- a/data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Epiculous_NovaSpark/1762652579.597535", - "retrieved_timestamp": "1762652579.597536", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Epiculous/NovaSpark", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/NovaSpark", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6408473960203371 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5063958663768304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648603723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json b/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json deleted file mode 100644 index 80d963b22..000000000 --- a/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Epiculous_Violet_Twilight-v0.2/1762652579.597749", - "retrieved_timestamp": "1762652579.59775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Epiculous/Violet_Twilight-v0.2", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/Violet_Twilight-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317756885064964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4614552476845888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02870090634441088 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42993750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3110871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/fa0290e0-723f-4502-90b6-c77007fffc1f.json b/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/fa0290e0-723f-4502-90b6-c77007fffc1f.json new file mode 100644 index 000000000..c08d3f9d1 --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/fa0290e0-723f-4502-90b6-c77007fffc1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Epiculous_Violet_Twilight-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Violet_Twilight-v0.2", + "id": "Epiculous/Violet_Twilight-v0.2", + "developer": "Epiculous", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4615 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Alpaca-Llama3.1-8B/c3827ecd-d02a-4464-a098-110f4fb54516.json b/data/hfopenllm_v2/EpistemeAI/Alpaca-Llama3.1-8B/c3827ecd-d02a-4464-a098-110f4fb54516.json new file mode 100644 index 000000000..5e88a9d80 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Alpaca-Llama3.1-8B/c3827ecd-d02a-4464-a098-110f4fb54516.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Alpaca-Llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alpaca-Llama3.1-8B", + "id": "EpistemeAI/Alpaca-Llama3.1-8B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4755 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3403 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3246 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it-Philos/af9700fe-20c0-4b7c-9f3a-c4d78fab7911.json b/data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it-Philos/af9700fe-20c0-4b7c-9f3a-c4d78fab7911.json new file mode 100644 index 000000000..109e4335c --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it-Philos/af9700fe-20c0-4b7c-9f3a-c4d78fab7911.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it-Philos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Athena-gemma-2-2b-it-Philos", + "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2248 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it/959a4e4d-211c-4e45-94f1-f8f877e0b36f.json b/data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it/959a4e4d-211c-4e45-94f1-f8f877e0b36f.json new file mode 100644 index 000000000..6c1214fb7 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Athena-gemma-2-2b-it/959a4e4d-211c-4e45-94f1-f8f877e0b36f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Athena-gemma-2-2b-it", + "id": "EpistemeAI/Athena-gemma-2-2b-it", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3134 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2422 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/96a8b3c0-d6bc-41fe-8967-0d798669aa8e.json b/data/hfopenllm_v2/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/96a8b3c0-d6bc-41fe-8967-0d798669aa8e.json new file mode 100644 index 000000000..18bd5723e --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/96a8b3c0-d6bc-41fe-8967-0d798669aa8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Athene-codegemma-2-7b-it-alpaca-v1.3", + "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json b/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json deleted file mode 100644 index 18f2886f5..000000000 --- a/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepPhi-3.5-mini-instruct/1762652579.5991712", - "retrieved_timestamp": "1762652579.599172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/DeepPhi-3.5-mini-instruct", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/DeepPhi-3.5-mini-instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1325915238234551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28822860667627487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36562500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11028922872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/ed5d2ca8-d551-493d-8877-348204ef91cc.json b/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/ed5d2ca8-d551-493d-8877-348204ef91cc.json new file mode 100644 index 000000000..378a472d1 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/ed5d2ca8-d551-493d-8877-348204ef91cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepPhi-3.5-mini-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepPhi-3.5-mini-instruct", + "id": "EpistemeAI/DeepPhi-3.5-mini-instruct", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2882 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2332 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/DeepThinkers-Phi4/04e20a14-8346-4801-8515-189861c857cb.json b/data/hfopenllm_v2/EpistemeAI/DeepThinkers-Phi4/04e20a14-8346-4801-8515-189861c857cb.json new file mode 100644 index 000000000..f41ea35a4 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/DeepThinkers-Phi4/04e20a14-8346-4801-8515-189861c857cb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepThinkers-Phi4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepThinkers-Phi4", + "id": "EpistemeAI/DeepThinkers-Phi4", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.679 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json b/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json deleted file mode 100644 index 0294bc96a..000000000 --- a/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_FineLlama3.1-8B-Instruct/1762652579.5997", - "retrieved_timestamp": "1762652579.599701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/FineLlama3.1-8B-Instruct", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/FineLlama3.1-8B-Instruct", - "additional_details": { - "precision": "4bit", - "architecture": "?", - "params_billions": 14.483 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08000992921005155 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45573635384163325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3481666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3112533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/eec2da56-ba0a-418f-afe1-8a46882b9839.json b/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/eec2da56-ba0a-418f-afe1-8a46882b9839.json new file mode 100644 index 000000000..7a0548790 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/eec2da56-ba0a-418f-afe1-8a46882b9839.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_FineLlama3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineLlama3.1-8B-Instruct", + "id": "EpistemeAI/FineLlama3.1-8B-Instruct", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "4bit", + "architecture": "?", + "params_billions": 14.483 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.08 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-12B-v1.13a-philosophers/321cf68b-9220-4ada-89da-061341a20a9d.json b/data/hfopenllm_v2/EpistemeAI/Fireball-12B-v1.13a-philosophers/321cf68b-9220-4ada-89da-061341a20a9d.json new file mode 100644 index 000000000..2dc46794a --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-12B-v1.13a-philosophers/321cf68b-9220-4ada-89da-061341a20a9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B-v1.13a-philosophers/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-12B-v1.13a-philosophers", + "id": "EpistemeAI/Fireball-12B-v1.13a-philosophers", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-12B/86fda025-2345-4a40-9094-223b96b21f13.json b/data/hfopenllm_v2/EpistemeAI/Fireball-12B/86fda025-2345-4a40-9094-223b96b21f13.json new file mode 100644 index 000000000..0d7c7fd33 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-12B/86fda025-2345-4a40-9094-223b96b21f13.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-12B", + "id": "EpistemeAI/Fireball-12B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3344 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json b/data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json deleted file mode 100644 index 35a1eefe4..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B/1762652579.59992", - "retrieved_timestamp": "1762652579.59992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-12B", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1833501775289565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110893652548262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42363541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343583776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/3c734233-9868-4ba6-83c0-2b63f2ce8980.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/3c734233-9868-4ba6-83c0-2b63f2ce8980.json new file mode 100644 index 000000000..185221b63 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/3c734233-9868-4ba6-83c0-2b63f2ce8980.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", + "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4577 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4838 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3945 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3583 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json deleted file mode 100644 index 2936b1298..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/1762652579.600397", - "retrieved_timestamp": "1762652579.600397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4577243934981405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4838398624677178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39445833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35829454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/7f5eca48-0ab9-4ef2-85c2-a7f1fe713afe.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/7f5eca48-0ab9-4ef2-85c2-a7f1fe713afe.json new file mode 100644 index 000000000..a5c0fc181 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/7f5eca48-0ab9-4ef2-85c2-a7f1fe713afe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", + "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7274 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/f5e0e809-08b8-43dd-a44d-875f365610c3.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/f5e0e809-08b8-43dd-a44d-875f365610c3.json new file mode 100644 index 000000000..5554b76ed --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/f5e0e809-08b8-43dd-a44d-875f365610c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", + "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4932 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/8d267135-a7e6-4ec5-ae09-66478804bb66.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/8d267135-a7e6-4ec5-ae09-66478804bb66.json new file mode 100644 index 000000000..e11cd048a --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/8d267135-a7e6-4ec5-ae09-66478804bb66.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4066 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json deleted file mode 100644 index 33f5b26b9..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/1762652579.601048", - "retrieved_timestamp": "1762652579.6010492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44318630123627534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4823644760491404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515625 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/4940ed0e-2c1e-4408-9806-49ceed30a69e.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/4940ed0e-2c1e-4408-9806-49ceed30a69e.json new file mode 100644 index 000000000..afd2c84cc --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/4940ed0e-2c1e-4408-9806-49ceed30a69e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7305 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.348 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/5f6f7b7c-ef6a-4468-aae5-d7dfc25c5659.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/5f6f7b7c-ef6a-4468-aae5-d7dfc25c5659.json new file mode 100644 index 000000000..df9550996 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/5f6f7b7c-ef6a-4468-aae5-d7dfc25c5659.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json deleted file mode 100644 index 76a3ed14e..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.6022642", - "retrieved_timestamp": "1762652579.6022651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207066140063919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610092915501656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3432395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json deleted file mode 100644 index acc70a297..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.601946", - "retrieved_timestamp": "1762652579.6019468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7304984108831234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46492466713692354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32088541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34798869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json deleted file mode 100644 index ef3075a24..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1762652579.60172", - "retrieved_timestamp": "1762652579.601721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669099101495144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4668070143164938 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33892952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/5244ee3c-7d65-434a-acfe-cdb277ff5264.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/5244ee3c-7d65-434a-acfe-cdb277ff5264.json new file mode 100644 index 000000000..1b365ab87 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/5244ee3c-7d65-434a-acfe-cdb277ff5264.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json deleted file mode 100644 index e2a036d8b..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/1762652579.601493", - "retrieved_timestamp": "1762652579.601493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5975334335119704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4904191122627008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34225398936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/eba4644f-d455-4a23-a16f-8ecb038ffe7f.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/eba4644f-d455-4a23-a16f-8ecb038ffe7f.json new file mode 100644 index 000000000..647383065 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/eba4644f-d455-4a23-a16f-8ecb038ffe7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5975 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4904 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json deleted file mode 100644 index b40cab675..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/1762652579.6012669", - "retrieved_timestamp": "1762652579.601268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457339858242796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48973199216860547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543051861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/fb270319-7010-4946-b60c-409aebe41aaa.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/fb270319-7010-4946-b60c-409aebe41aaa.json new file mode 100644 index 000000000..19697de71 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/fb270319-7010-4946-b60c-409aebe41aaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json deleted file mode 100644 index 1bdf7d3fc..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/1762652579.6025012", - "retrieved_timestamp": "1762652579.6025019", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578241288669619 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4760520079608936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3470744680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/d57bd77a-11cc-497c-b0bb-31c1ffa63dc2.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/d57bd77a-11cc-497c-b0bb-31c1ffa63dc2.json new file mode 100644 index 000000000..b052f7972 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/d57bd77a-11cc-497c-b0bb-31c1ffa63dc2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4761 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1382 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3471 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/0220984e-fe8c-4e72-bc3e-92b949ffe769.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/0220984e-fe8c-4e72-bc3e-92b949ffe769.json new file mode 100644 index 000000000..f8d75adfa --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/0220984e-fe8c-4e72-bc3e-92b949ffe769.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7205 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4818 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3548 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json deleted file mode 100644 index 45a991768..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/1762652579.602742", - "retrieved_timestamp": "1762652579.6027431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204816553411615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4817795525811035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35480385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/16482634-ec03-463a-9deb-2230ee955800.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/16482634-ec03-463a-9deb-2230ee955800.json new file mode 100644 index 000000000..af0d2a1e2 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/16482634-ec03-463a-9deb-2230ee955800.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Math", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4623 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json deleted file mode 100644 index 553675979..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/1762652579.602981", - "retrieved_timestamp": "1762652579.6029818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46229559790245434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49829504320793055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33311170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/4c1db32d-96fc-4a66-b083-530a3e75ad6d.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/4c1db32d-96fc-4a66-b083-530a3e75ad6d.json new file mode 100644 index 000000000..a7e78720e --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/4c1db32d-96fc-4a66-b083-530a3e75ad6d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", + "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4801 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json deleted file mode 100644 index cc91de217..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/1762652579.60321", - "retrieved_timestamp": "1762652579.603211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46109655713506825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48010141537970213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35206117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json deleted file mode 100644 index 67c7ae680..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/1762652579.603439", - "retrieved_timestamp": "1762652579.60344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18607295309778055 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49677687590350894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/c0c5c846-395a-47ac-9e8e-e598939f317d.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/c0c5c846-395a-47ac-9e8e-e598939f317d.json new file mode 100644 index 000000000..d0ad3364c --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/c0c5c846-395a-47ac-9e8e-e598939f317d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Mistral-Nemo-Base-2407-v1-DPO2", + "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.404 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/6b3f6b59-a8eb-48c2-acbc-92e8f34b2dd6.json b/data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/6b3f6b59-a8eb-48c2-acbc-92e8f34b2dd6.json new file mode 100644 index 000000000..4e38880fc --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/6b3f6b59-a8eb-48c2-acbc-92e8f34b2dd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-R1-Llama-3.1-8B-Medical-COT", + "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3216 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B/d017e3bf-2abe-4b84-810e-e0eaf973adc3.json b/data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B/d017e3bf-2abe-4b84-810e-e0eaf973adc3.json new file mode 100644 index 000000000..694f4c5d2 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-R1-Llama-3.1-8B/d017e3bf-2abe-4b84-810e-e0eaf973adc3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-R1-Llama-3.1-8B", + "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4427 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/62a3ecb8-f6d1-429c-807f-5545b2a5897f.json b/data/hfopenllm_v2/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/62a3ecb8-f6d1-429c-807f-5545b2a5897f.json new file mode 100644 index 000000000..81219a0cf --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/62a3ecb8-f6d1-429c-807f-5545b2a5897f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-R1.1-Llama-3.1-8B", + "id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1382 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3419 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Llama-3.2-3B-Agent007-Coder/748557ce-1a49-4b3a-9c38-9007dc04aafb.json b/data/hfopenllm_v2/EpistemeAI/Llama-3.2-3B-Agent007-Coder/748557ce-1a49-4b3a-9c38-9007dc04aafb.json new file mode 100644 index 000000000..c5b57ce3d --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Llama-3.2-3B-Agent007-Coder/748557ce-1a49-4b3a-9c38-9007dc04aafb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Llama-3.2-3B-Agent007-Coder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Agent007-Coder", + "id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4304 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3668 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/95d43d01-a75e-4af4-a2cc-b60f832071d3.json b/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/95d43d01-a75e-4af4-a2cc-b60f832071d3.json new file mode 100644 index 000000000..8cad61e00 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/95d43d01-a75e-4af4-a2cc-b60f832071d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Instruct-12B-Philosophy-Math", + "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5365 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4292 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3296 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json b/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json deleted file mode 100644 index d5231fe79..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/1762652579.6045282", - "retrieved_timestamp": "1762652579.604529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06946790072563022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364928342081372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32962101063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/4dc7c889-7839-4047-b48c-33be5b688e72.json b/data/hfopenllm_v2/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/4dc7c889-7839-4047-b48c-33be5b688e72.json new file mode 100644 index 000000000..62e0df3f5 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/4dc7c889-7839-4047-b48c-33be5b688e72.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenReasoner-Llama-3.2-3B-rs1.0", + "id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7274 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json deleted file mode 100644 index 987cb5aa0..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/1762652579.6049678", - "retrieved_timestamp": "1762652579.6049678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7100903380807368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46279874531423665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/751851c8-9a7f-4135-a106-eab4efbd0734.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/751851c8-9a7f-4135-a106-eab4efbd0734.json new file mode 100644 index 000000000..afaf74d49 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/751851c8-9a7f-4135-a106-eab4efbd0734.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", + "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7101 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4628 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/2930e30c-9f2e-4248-ae3b-ed7ffbd12f8c.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/2930e30c-9f2e-4248-ae3b-ed7ffbd12f8c.json new file mode 100644 index 000000000..9a4c900d1 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/2930e30c-9f2e-4248-ae3b-ed7ffbd12f8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", + "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7122 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4566 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.335 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json deleted file mode 100644 index a6e0bb7bd..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/1762652579.605414", - "retrieved_timestamp": "1762652579.6054149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712213593265868 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45659361690861294 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33502327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json deleted file mode 100644 index eb0cdb5ac..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/1762652579.605665", - "retrieved_timestamp": "1762652579.6056662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6915306941138402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4524732961901791 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32903922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/c1acc460-aeb8-4a99-8ca5-376ab60fb74a.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/c1acc460-aeb8-4a99-8ca5-376ab60fb74a.json new file mode 100644 index 000000000..f3d4c197b --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/c1acc460-aeb8-4a99-8ca5-376ab60fb74a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", + "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6915 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/33b8b64f-7da5-45aa-bf80-7145ef704229.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/33b8b64f-7da5-45aa-bf80-7145ef704229.json new file mode 100644 index 000000000..40ecf34e1 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/33b8b64f-7da5-45aa-bf80-7145ef704229.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", + "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4553 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4804 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3931 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3598 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/2662d257-49e2-430d-b44f-b0b347c61271.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/2662d257-49e2-430d-b44f-b0b347c61271.json new file mode 100644 index 000000000..c9e3844b1 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/2662d257-49e2-430d-b44f-b0b347c61271.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3.1-CoT-RE1-NMT", + "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4829 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json deleted file mode 100644 index b80d39adf..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/1762652579.606377", - "retrieved_timestamp": "1762652579.606377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40871443325930756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3324495305251265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/870b639b-ee7a-4b13-872b-52657539c836.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/870b639b-ee7a-4b13-872b-52657539c836.json new file mode 100644 index 000000000..2e9d69e3c --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/870b639b-ee7a-4b13-872b-52657539c836.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3.2-1B-Instruct-v1.2", + "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3324 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/6ff20678-a335-4fa8-8126-9f96ce247f34.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/6ff20678-a335-4fa8-8126-9f96ce247f34.json new file mode 100644 index 000000000..a23d5fb0b --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/6ff20678-a335-4fa8-8126-9f96ce247f34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3.2-1B-Instruct-v1.3", + "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json deleted file mode 100644 index e75d20542..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/1762652579.606596", - "retrieved_timestamp": "1762652579.6065972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3272816127874041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3262818751942827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/19c4ea89-896a-4577-a386-c2470eaf743f.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/19c4ea89-896a-4577-a386-c2470eaf743f.json new file mode 100644 index 000000000..3b21e4f59 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/19c4ea89-896a-4577-a386-c2470eaf743f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", + "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json deleted file mode 100644 index 415036425..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/1762652579.6070201", - "retrieved_timestamp": "1762652579.607021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7289746760816855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45181862491313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json deleted file mode 100644 index 5f2186554..000000000 --- a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/1762652579.606812", - "retrieved_timestamp": "1762652579.606813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119538380386264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43810846923178864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34352083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2789228723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/22eb2479-16ff-4a56-b9e4-e8835da7ca0e.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/22eb2479-16ff-4a56-b9e4-e8835da7ca0e.json new file mode 100644 index 000000000..ceb6ab2db --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/22eb2479-16ff-4a56-b9e4-e8835da7ca0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3.2-3B-Math-Instruct-RE1", + "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2789 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json deleted file mode 100644 index 07c4a89e3..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/1762652579.60724", - "retrieved_timestamp": "1762652579.607241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5902893212232432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.436379591348482 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28233045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/aca3f1fd-9c46-47f6-81c6-dc56a702c1de.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/aca3f1fd-9c46-47f6-81c6-dc56a702c1de.json new file mode 100644 index 000000000..3c0a3ed67 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/aca3f1fd-9c46-47f6-81c6-dc56a702c1de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", + "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5903 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2823 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/071ca686-5950-4af4-80f2-969b1008e370.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/071ca686-5950-4af4-80f2-969b1008e370.json new file mode 100644 index 000000000..da61c8f6c --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/071ca686-5950-4af4-80f2-969b1008e370.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-0", + "id": "EpistemeAI/ReasoningCore-3B-0", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json deleted file mode 100644 index b37e72087..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-0/1762652579.60745", - "retrieved_timestamp": "1762652579.60745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-0", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7341454008696924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44460707451155984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35539583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172373670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/78977c34-33f8-4037-86e0-dfce1d01c3f8.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/78977c34-33f8-4037-86e0-dfce1d01c3f8.json new file mode 100644 index 000000000..99bcceb57 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/78977c34-33f8-4037-86e0-dfce1d01c3f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-Instruct-r01-Reflect", + "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json deleted file mode 100644 index abc3d326f..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/1762652579.607657", - "retrieved_timestamp": "1762652579.607658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7334960128015887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44496323889512146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/480e4294-c8d9-4088-9b8c-7a239d57f683.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/480e4294-c8d9-4088-9b8c-7a239d57f683.json new file mode 100644 index 000000000..78ed2cc8e --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/480e4294-c8d9-4088-9b8c-7a239d57f683.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-R01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-R01", + "id": "EpistemeAI/ReasoningCore-3B-R01", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2591 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json deleted file mode 100644 index 37f4ad1d2..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-R01/1762652579.607871", - "retrieved_timestamp": "1762652579.607872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-R01", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-R01", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29760590787998065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43725189001258497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31945833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25914228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json deleted file mode 100644 index aefb4a3cb..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2/1762652579.6080902", - "retrieved_timestamp": "1762652579.6080909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393161256576994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44623884450165807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/be9b21e8-90ce-451a-bcaf-2ebc7c72bc34.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/be9b21e8-90ce-451a-bcaf-2ebc7c72bc34.json new file mode 100644 index 000000000..a387e5176 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/be9b21e8-90ce-451a-bcaf-2ebc7c72bc34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-RE1-V2", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json deleted file mode 100644 index e37c93585..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2A/1762652579.608308", - "retrieved_timestamp": "1762652579.608309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2A", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5732534120577845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4189899823502799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2736037234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/b0054dd8-e62c-4d0c-9b18-090851c3a7e2.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/b0054dd8-e62c-4d0c-9b18-090851c3a7e2.json new file mode 100644 index 000000000..6f4873443 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/b0054dd8-e62c-4d0c-9b18-090851c3a7e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2A/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-RE1-V2A", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5733 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2736 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/985e479b-658a-4548-9b5e-c9c04b8838c1.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/985e479b-658a-4548-9b5e-c9c04b8838c1.json new file mode 100644 index 000000000..21879ed27 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/985e479b-658a-4548-9b5e-c9c04b8838c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-RE1-V2B", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4168 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2673 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json deleted file mode 100644 index d31bfbb8d..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2B/1762652579.60862", - "retrieved_timestamp": "1762652579.6086211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2B", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5051097753959495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41678877951897175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26728723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json deleted file mode 100644 index 10c9d18ca..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2C/1762652579.608856", - "retrieved_timestamp": "1762652579.6088572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2C", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057092957796425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41774567831526244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2691156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/d0ef8af4-156d-456d-9e33-b2cdb3f8c04e.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/d0ef8af4-156d-456d-9e33-b2cdb3f8c04e.json new file mode 100644 index 000000000..abce8ee39 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/d0ef8af4-156d-456d-9e33-b2cdb3f8c04e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2C/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-RE1-V2C", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2691 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/5050c787-2f95-4a17-a4b0-c094860627b5.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/5050c787-2f95-4a17-a4b0-c094860627b5.json new file mode 100644 index 000000000..e1a337bdd --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/5050c787-2f95-4a17-a4b0-c094860627b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-T1-V1", + "id": "EpistemeAI/ReasoningCore-3B-T1-V1", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.312 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json deleted file mode 100644 index 68556657c..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1-V1/1762652579.609117", - "retrieved_timestamp": "1762652579.609117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-T1-V1", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-T1-V1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207564816908026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516908992961786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31200132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/bb5c8274-4324-47f2-94c5-d0c831ce0de7.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/bb5c8274-4324-47f2-94c5-d0c831ce0de7.json new file mode 100644 index 000000000..a3f8f7216 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/bb5c8274-4324-47f2-94c5-d0c831ce0de7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1_1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReasoningCore-3B-T1_1", + "id": "EpistemeAI/ReasoningCore-3B-T1_1", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json deleted file mode 100644 index b3ce5d61d..000000000 --- a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1_1/1762652579.609335", - "retrieved_timestamp": "1762652579.6093361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-T1_1", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-T1_1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45239424517060806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3116688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/8113a26a-5941-4f3d-872a-bdde5456ad97.json b/data/hfopenllm_v2/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/8113a26a-5941-4f3d-872a-bdde5456ad97.json new file mode 100644 index 000000000..18f691dc4 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/8113a26a-5941-4f3d-872a-bdde5456ad97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Athene-codegemma-2-7b-it-alpaca-v1.2", + "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2297 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/5b60047b-2e85-4a47-a31f-4c07f4bd2c30.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/5b60047b-2e85-4a47-a31f-4c07f4bd2c30.json new file mode 100644 index 000000000..8a14100ea --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/5b60047b-2e85-4a47-a31f-4c07f4bd2c30.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-12B-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-12B-v1.2", + "id": "EpistemeAI2/Fireball-12B-v1.2", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1355 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5019 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json deleted file mode 100644 index 213b9399a..000000000 --- a/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-12B-v1.2/1762652579.609813", - "retrieved_timestamp": "1762652579.609814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-12B-v1.2", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-12B-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13553925805750963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5018583230653281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33369348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/88d79858-3a35-43eb-8da6-95b80b5deef6.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/88d79858-3a35-43eb-8da6-95b80b5deef6.json new file mode 100644 index 000000000..1fef1a4b8 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/88d79858-3a35-43eb-8da6-95b80b5deef6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1-8B-Philos", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/63266a49-01ea-40f1-83ef-778f391aff2b.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/63266a49-01ea-40f1-83ef-778f391aff2b.json new file mode 100644 index 000000000..81024bca2 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/63266a49-01ea-40f1-83ef-778f391aff2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.01-8B-Philos", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4956 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/f0da069a-833f-489a-a923-c79542a3a9a6.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/f0da069a-833f-489a-a923-c79542a3a9a6.json new file mode 100644 index 000000000..b979e6ad4 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/f0da069a-833f-489a-a923-c79542a3a9a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.03-8B-Philos", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/205b9da8-d561-41ec-946e-1d2f9a43e437.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/205b9da8-d561-41ec-946e-1d2f9a43e437.json new file mode 100644 index 000000000..d4d82fcc6 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/205b9da8-d561-41ec-946e-1d2f9a43e437.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.04-8B-Philos", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3403 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/2ea4da56-4b95-4222-a4e2-f57c73e0ee4e.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/2ea4da56-4b95-4222-a4e2-f57c73e0ee4e.json new file mode 100644 index 000000000..e5118311f --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/2ea4da56-4b95-4222-a4e2-f57c73e0ee4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4866 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3932 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json deleted file mode 100644 index 6bc7475c7..000000000 --- a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/1762652579.610973", - "retrieved_timestamp": "1762652579.6109738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865756193566404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48807730539009225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/c086f693-cef1-4212-9c17-669b210f4caa.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/c086f693-cef1-4212-9c17-669b210f4caa.json new file mode 100644 index 000000000..a1b7ae3fb --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/c086f693-cef1-4212-9c17-669b210f4caa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5079 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4063 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/290995f2-9982-4f29-ac74-dc646905206c.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/290995f2-9982-4f29-ac74-dc646905206c.json new file mode 100644 index 000000000..83634a0a2 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/290995f2-9982-4f29-ac74-dc646905206c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/c60e65e6-d771-4c53-80d0-c1e09aa39377.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/c60e65e6-d771-4c53-80d0-c1e09aa39377.json new file mode 100644 index 000000000..caf84f694 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/c60e65e6-d771-4c53-80d0-c1e09aa39377.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/fcff202d-3b4f-4ba9-b3f6-1122d8abcac1.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/fcff202d-3b4f-4ba9-b3f6-1122d8abcac1.json new file mode 100644 index 000000000..cbd4f8177 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/fcff202d-3b4f-4ba9-b3f6-1122d8abcac1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Llama-3.1-8B-Philos-Reflection", + "id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3596 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4898 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3957 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/5f0fa37a-e829-402b-b2ab-c68ffa248b6e.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/5f0fa37a-e829-402b-b2ab-c68ffa248b6e.json new file mode 100644 index 000000000..a3f2cd9b8 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/5f0fa37a-e829-402b-b2ab-c68ffa248b6e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-MathMistral-Nemo-Base-2407-v2dpo", + "id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.58 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3097 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.403 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json deleted file mode 100644 index 5d197337c..000000000 --- a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/1762652579.61236", - "retrieved_timestamp": "1762652579.612361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5515465631191904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075580310342053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36925 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3420046542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/a0b4a345-3530-4da2-8403-87259bbd1405.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/a0b4a345-3530-4da2-8403-87259bbd1405.json new file mode 100644 index 000000000..fe44332dd --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/a0b4a345-3530-4da2-8403-87259bbd1405.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", + "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/3548f0ea-f3ab-4a0e-9c77-5ae62014ed44.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/3548f0ea-f3ab-4a0e-9c77-5ae62014ed44.json new file mode 100644 index 000000000..0ddafa271 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/3548f0ea-f3ab-4a0e-9c77-5ae62014ed44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", + "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4633 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4791 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3774 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json deleted file mode 100644 index a936a91fd..000000000 --- a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/1762652579.6125782", - "retrieved_timestamp": "1762652579.612579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4633195476890207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4790834283312441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37743750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564660904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/707270e3-334b-4eba-84c0-2795ae53d79a.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/707270e3-334b-4eba-84c0-2795ae53d79a.json new file mode 100644 index 000000000..6b5e01456 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/707270e3-334b-4eba-84c0-2795ae53d79a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fireball-Phi-3-medium-4k-inst-Philos", + "id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5313 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/c827bee3-a181-42bc-9387-ca132d59c8ba.json b/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/c827bee3-a181-42bc-9387-ca132d59c8ba.json new file mode 100644 index 000000000..f742caff8 --- /dev/null +++ b/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/c827bee3-a181-42bc-9387-ca132d59c8ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CatunaMayo-DPO", + "id": "Eric111/CatunaMayo-DPO", + "developer": "Eric111", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.317 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json b/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json deleted file mode 100644 index 449172282..000000000 --- a/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo-DPO/1762652579.613287", - "retrieved_timestamp": "1762652579.613288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Eric111/CatunaMayo-DPO", - "developer": "Eric111", - "inference_platform": "unknown", - "id": "Eric111/CatunaMayo-DPO", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214539643700936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5223991323844243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44503125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3169880319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json b/data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json deleted file mode 100644 index 08af3256d..000000000 --- a/data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo/1762652579.613048", - "retrieved_timestamp": "1762652579.613049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Eric111/CatunaMayo", - "developer": "Eric111", - "inference_platform": "unknown", - "id": "Eric111/CatunaMayo", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074156571231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243635518600797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45398958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Eric111/CatunaMayo/d3e8949b-f6f8-459f-891b-f4900ff806cd.json b/data/hfopenllm_v2/Eric111/CatunaMayo/d3e8949b-f6f8-459f-891b-f4900ff806cd.json new file mode 100644 index 000000000..5cbeab7ab --- /dev/null +++ b/data/hfopenllm_v2/Eric111/CatunaMayo/d3e8949b-f6f8-459f-891b-f4900ff806cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CatunaMayo", + "id": "Eric111/CatunaMayo", + "developer": "Eric111", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4074 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.454 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/35d5f5e3-74eb-4eea-9f78-b7b8969830a2.json b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/35d5f5e3-74eb-4eea-9f78-b7b8969830a2.json new file mode 100644 index 000000000..fb25872ce --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/35d5f5e3-74eb-4eea-9f78-b7b8969830a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", + "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json deleted file mode 100644 index b800fb7b3..000000000 --- a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/1762652579.613742", - "retrieved_timestamp": "1762652579.613743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37399322686028624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410649663618229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39777260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/4cf4479a-622a-4bc2-86f2-aa526216f24c.json b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/4cf4479a-622a-4bc2-86f2-aa526216f24c.json new file mode 100644 index 000000000..34efe21cd --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/4cf4479a-622a-4bc2-86f2-aa526216f24c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-3B-Instruct-DPO-Revised-Ties", + "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json deleted file mode 100644 index bc1183e49..000000000 --- a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/1762652579.613493", - "retrieved_timestamp": "1762652579.613494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724694920588483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410649663618229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39777260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/6ed27890-3e61-4c7d-8c94-a78c0b34ba32.json b/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/6ed27890-3e61-4c7d-8c94-a78c0b34ba32.json new file mode 100644 index 000000000..f9d679b28 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/6ed27890-3e61-4c7d-8c94-a78c0b34ba32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Herplete-LLM-Llama-3.1-8b-Ties", + "id": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6164 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5338 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/87b5e360-7867-4edd-b45e-e7bb92a91b69.json b/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/87b5e360-7867-4edd-b45e-e7bb92a91b69.json new file mode 100644 index 000000000..c264e2240 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/87b5e360-7867-4edd-b45e-e7bb92a91b69.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Herplete-LLM-Llama-3.1-8b", + "id": "Etherll/Herplete-LLM-Llama-3.1-8b", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/d93116b8-28ff-41ea-8273-56f7ae11cf18.json b/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/d93116b8-28ff-41ea-8273-56f7ae11cf18.json new file mode 100644 index 000000000..e86363c53 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Herplete-LLM-Llama-3.1-8b/d93116b8-28ff-41ea-8273-56f7ae11cf18.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Herplete-LLM-Llama-3.1-8b", + "id": "Etherll/Herplete-LLM-Llama-3.1-8b", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6106 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5347 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Qwen2.5-7B-della-test/ba8c2c17-64f6-4cdb-b3b9-8977ce1bdbe2.json b/data/hfopenllm_v2/Etherll/Qwen2.5-7B-della-test/ba8c2c17-64f6-4cdb-b3b9-8977ce1bdbe2.json new file mode 100644 index 000000000..dffbcaa3e --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Qwen2.5-7B-della-test/ba8c2c17-64f6-4cdb-b3b9-8977ce1bdbe2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-7B-della-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-della-test", + "id": "Etherll/Qwen2.5-7B-della-test", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7625 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/5e5602cc-b4de-4247-aa6d-940817fc849b.json b/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/5e5602cc-b4de-4247-aa6d-940817fc849b.json new file mode 100644 index 000000000..89117f4d7 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/5e5602cc-b4de-4247-aa6d-940817fc849b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-7B-Instruct-Ties", + "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5005 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4895 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json b/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json deleted file mode 100644 index 154e0af36..000000000 --- a/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/1762652579.61485", - "retrieved_timestamp": "1762652579.614851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5005385709916355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4895144464043051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Etherll/Replete-LLM-V3-Llama-3.1-8b/cc5f27f5-36d8-49bb-9c9d-7879598bfe71.json b/data/hfopenllm_v2/Etherll/Replete-LLM-V3-Llama-3.1-8b/cc5f27f5-36d8-49bb-9c9d-7879598bfe71.json new file mode 100644 index 000000000..66e068f9c --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Replete-LLM-V3-Llama-3.1-8b/cc5f27f5-36d8-49bb-9c9d-7879598bfe71.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_Replete-LLM-V3-Llama-3.1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-LLM-V3-Llama-3.1-8b", + "id": "Etherll/Replete-LLM-V3-Llama-3.1-8b", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.347 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json b/data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json deleted file mode 100644 index 9fc51c393..000000000 --- a/data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_SuperHermes/1762652579.615286", - "retrieved_timestamp": "1762652579.615287", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/SuperHermes", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/SuperHermes", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459015412438996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5289531792679852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44004166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39486369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Etherll/SuperHermes/aec03bd9-808a-4c3f-bbde-40bcac5775fb.json b/data/hfopenllm_v2/Etherll/SuperHermes/aec03bd9-808a-4c3f-bbde-40bcac5775fb.json new file mode 100644 index 000000000..688d993d6 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/SuperHermes/aec03bd9-808a-4c3f-bbde-40bcac5775fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Etherll_SuperHermes/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperHermes", + "id": "Etherll/SuperHermes", + "developer": "Etherll", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5459 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3949 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Eurdem/Defne-llama3.1-8B/b4ae6f0b-8a6b-4c60-8eb2-3e202877bcf5.json b/data/hfopenllm_v2/Eurdem/Defne-llama3.1-8B/b4ae6f0b-8a6b-4c60-8eb2-3e202877bcf5.json new file mode 100644 index 000000000..39493bbf0 --- /dev/null +++ b/data/hfopenllm_v2/Eurdem/Defne-llama3.1-8B/b4ae6f0b-8a6b-4c60-8eb2-3e202877bcf5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Eurdem_Defne-llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Defne-llama3.1-8B", + "id": "Eurdem/Defne-llama3.1-8B", + "developer": "Eurdem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5036 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5321 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4331 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json b/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json deleted file mode 100644 index 21ce4669c..000000000 --- a/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Chocolatine-Fusion-14B/1762652579.615752", - "retrieved_timestamp": "1762652579.615752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/Chocolatine-Fusion-14B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/Chocolatine-Fusion-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.367 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6949028577507679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.64132285324613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49402083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261801861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/c68deb4d-73a8-40ab-b4e5-1773b7ec4ed8.json b/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/c68deb4d-73a8-40ab-b4e5-1773b7ec4ed8.json new file mode 100644 index 000000000..35a5b2bfe --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/c68deb4d-73a8-40ab-b4e5-1773b7ec4ed8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Chocolatine-Fusion-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-Fusion-14B", + "id": "FINGU-AI/Chocolatine-Fusion-14B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.367 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6413 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/L3-8B/a93c5674-599b-429c-a322-3c6bc7248f45.json b/data/hfopenllm_v2/FINGU-AI/L3-8B/a93c5674-599b-429c-a322-3c6bc7248f45.json new file mode 100644 index 000000000..c9e5228a0 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/L3-8B/a93c5674-599b-429c-a322-3c6bc7248f45.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_L3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B", + "id": "FINGU-AI/L3-8B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7517 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4986 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3639 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json b/data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json deleted file mode 100644 index e210e0b9e..000000000 --- a/data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_L3-8B/1762652579.615993", - "retrieved_timestamp": "1762652579.615993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/L3-8B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/L3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7517309627344335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4985585187130108 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38283333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36394614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/FINGU-AI/Phi-4-RRStock/5e6374a6-56bd-4bd9-b04b-30ec9cf234bc.json b/data/hfopenllm_v2/FINGU-AI/Phi-4-RRStock/5e6374a6-56bd-4bd9-b04b-30ec9cf234bc.json new file mode 100644 index 000000000..e93edbe24 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Phi-4-RRStock/5e6374a6-56bd-4bd9-b04b-30ec9cf234bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Phi-4-RRStock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-RRStock", + "id": "FINGU-AI/Phi-4-RRStock", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.652 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2855 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6443 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4883 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json b/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json deleted file mode 100644 index 5954b0d54..000000000 --- a/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Q-Small-3B/1762652579.616768", - "retrieved_timestamp": "1762652579.61677", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/Q-Small-3B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/Q-Small-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145345461154182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43185314557630744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40054166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27900598404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/c3d2fc86-a5c4-4e92-bcf9-26096ca32ad4.json b/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/c3d2fc86-a5c4-4e92-bcf9-26096ca32ad4.json new file mode 100644 index 000000000..61898827c --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/c3d2fc86-a5c4-4e92-bcf9-26096ca32ad4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Q-Small-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q-Small-3B", + "id": "FINGU-AI/Q-Small-3B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/1b49cb06-3ee1-4945-aaed-12c868d9e45e.json b/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/1b49cb06-3ee1-4945-aaed-12c868d9e45e.json new file mode 100644 index 000000000..a93048eac --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/1b49cb06-3ee1-4945-aaed-12c868d9e45e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_QwQ-Buddy-32B-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-Buddy-32B-Alpha", + "id": "FINGU-AI/QwQ-Buddy-32B-Alpha", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 19.662 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6424 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.506 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json b/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json deleted file mode 100644 index ae1ebccb9..000000000 --- a/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_QwQ-Buddy-32B-Alpha/1762652579.617035", - "retrieved_timestamp": "1762652579.617036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/QwQ-Buddy-32B-Alpha", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/QwQ-Buddy-32B-Alpha", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 19.662 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34464221598691475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.642442234274039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294215425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65853bb5-ff3e-4880-8c32-ce9aabcadd7b.json b/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65853bb5-ff3e-4880-8c32-ce9aabcadd7b.json new file mode 100644 index 000000000..05c3a1ad9 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65853bb5-ff3e-4880-8c32-ce9aabcadd7b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_RomboUltima-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RomboUltima-32B", + "id": "FINGU-AI/RomboUltima-32B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.645 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4836 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5789 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json b/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json deleted file mode 100644 index 3a43b80cc..000000000 --- a/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_RomboUltima-32B/1762652579.6173398", - "retrieved_timestamp": "1762652579.617341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/RomboUltima-32B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/RomboUltima-32B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.645 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6671509372908327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6938448333620042 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5385196374622356 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4836354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.578873005319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/7fecc176-debf-4bf7-b3f3-479d05678a1e.json b/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/7fecc176-debf-4bf7-b3f3-479d05678a1e.json new file mode 100644 index 000000000..d9153beab --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/7fecc176-debf-4bf7-b3f3-479d05678a1e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Ultimos-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ultimos-32B", + "id": "FINGU-AI/Ultimos-32B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 9.604 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1592 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2906 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3286 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json b/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json deleted file mode 100644 index a71d7cebc..000000000 --- a/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Ultimos-32B/1762652579.617578", - "retrieved_timestamp": "1762652579.617579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/Ultimos-32B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/Ultimos-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 9.604 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1592197591280026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2905531373728777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32860416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/3c965626-a264-40db-93e1-cd7659d0662e.json b/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/3c965626-a264-40db-93e1-cd7659d0662e.json new file mode 100644 index 000000000..21887b781 --- /dev/null +++ b/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/3c965626-a264-40db-93e1-cd7659d0662e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FallenMerick_Chewy-Lemon-Cookie-11B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chewy-Lemon-Cookie-11B", + "id": "FallenMerick/Chewy-Lemon-Cookie-11B", + "developer": "FallenMerick", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4875 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5251 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4546 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3267 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json b/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json deleted file mode 100644 index 4b38d28f2..000000000 --- a/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FallenMerick_Chewy-Lemon-Cookie-11B/1762652579.6178062", - "retrieved_timestamp": "1762652579.6178071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FallenMerick/Chewy-Lemon-Cookie-11B", - "developer": "FallenMerick", - "inference_platform": "unknown", - "id": "FallenMerick/Chewy-Lemon-Cookie-11B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4875242135312083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251122307375103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45455208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3267121010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json b/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json deleted file mode 100644 index 3360b005a..000000000 --- a/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Felladrin_Llama-160M-Chat-v1/1762652579.618279", - "retrieved_timestamp": "1762652579.61828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Felladrin/Llama-160M-Chat-v1", - "developer": "Felladrin", - "inference_platform": "unknown", - "id": "Felladrin/Llama-160M-Chat-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.162 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15754642127333254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30360811146348365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11361369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/50fa6f0c-d689-4380-b619-253209b5badc.json b/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/50fa6f0c-d689-4380-b619-253209b5badc.json new file mode 100644 index 000000000..bb5ec6c1c --- /dev/null +++ b/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/50fa6f0c-d689-4380-b619-253209b5badc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Felladrin_Llama-160M-Chat-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-160M-Chat-v1", + "id": "Felladrin/Llama-160M-Chat-v1", + "developer": "Felladrin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.162 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1136 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json b/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json deleted file mode 100644 index b257697c0..000000000 --- a/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Felladrin_Minueza-32M-UltraChat/1762652579.6187", - "retrieved_timestamp": "1762652579.6187022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Felladrin/Minueza-32M-UltraChat", - "developer": "Felladrin", - "inference_platform": "unknown", - "id": "Felladrin/Minueza-32M-UltraChat", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 0.033 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13756277787381924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2941478734048925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37418749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/adb25c88-6113-4307-bbf0-d377f757bc18.json b/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/adb25c88-6113-4307-bbf0-d377f757bc18.json new file mode 100644 index 000000000..43e58afa9 --- /dev/null +++ b/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/adb25c88-6113-4307-bbf0-d377f757bc18.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Felladrin_Minueza-32M-UltraChat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minueza-32M-UltraChat", + "id": "Felladrin/Minueza-32M-UltraChat", + "developer": "Felladrin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 0.033 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1376 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/b9ac5e03-c878-4e46-a89c-1906f3b91dce.json b/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/b9ac5e03-c878-4e46-a89c-1906f3b91dce.json new file mode 100644 index 000000000..1fd7e8ac3 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/b9ac5e03-c878-4e46-a89c-1906f3b91dce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3083 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3323 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1498 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json b/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json deleted file mode 100644 index 7bbc36e69..000000000 --- a/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.618947", - "retrieved_timestamp": "1762652579.618948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30832191917445706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323387445789459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14976728723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/d6a6badf-4472-44b5-af9e-4282e4406a8e.json b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/d6a6badf-4472-44b5-af9e-4282e4406a8e.json new file mode 100644 index 000000000..648617711 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/d6a6badf-4472-44b5-af9e-4282e4406a8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 16.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json deleted file mode 100644 index 3e59d854b..000000000 --- a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.619225", - "retrieved_timestamp": "1762652579.6192262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 16.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509730847484674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214989784123593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37691156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json deleted file mode 100644 index f1022aa47..000000000 --- a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619448", - "retrieved_timestamp": "1762652579.6194491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28154408081667753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3305518729746925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15408909574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/92e62d3a-3091-4538-b6da-ba705e11687a.json b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/92e62d3a-3091-4538-b6da-ba705e11687a.json new file mode 100644 index 000000000..7f912b4b9 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/92e62d3a-3091-4538-b6da-ba705e11687a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2815 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/04f5fdc6-f1cd-4b2d-947a-86fee67b3b62.json b/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/04f5fdc6-f1cd-4b2d-947a-86fee67b3b62.json new file mode 100644 index 000000000..cbbb13eef --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/04f5fdc6-f1cd-4b2d-947a-86fee67b3b62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3016 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3325 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json b/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json deleted file mode 100644 index 4589534f1..000000000 --- a/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619661", - "retrieved_timestamp": "1762652579.619661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3015775919006015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33246082656550385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14852061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5013ccfc-6bc5-4862-898c-1ca781f92572.json b/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5013ccfc-6bc5-4862-898c-1ca781f92572.json new file mode 100644 index 000000000..b055b4a00 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5013ccfc-6bc5-4862-898c-1ca781f92572.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1555 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json b/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json deleted file mode 100644 index 814388506..000000000 --- a/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619875", - "retrieved_timestamp": "1762652579.6198761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28693976426991497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33465340701604496 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15550199468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/38fff98c-72b1-453c-a2cf-cf077dd19d10.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/38fff98c-72b1-453c-a2cf-cf077dd19d10.json new file mode 100644 index 000000000..fe9ff6a34 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/38fff98c-72b1-453c-a2cf-cf077dd19d10.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1000k_fineweb", + "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1485 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2918 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json deleted file mode 100644 index 6b46acaf7..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb/1762652579.620099", - "retrieved_timestamp": "1762652579.6201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14845388014911545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2917939408206228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35806249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/42911928-ef64-474b-828a-02ce3383773e.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/42911928-ef64-474b-828a-02ce3383773e.json new file mode 100644 index 000000000..1a53297cc --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/42911928-ef64-474b-828a-02ce3383773e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1554 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json deleted file mode 100644 index 398d574c0..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/1762652579.620331", - "retrieved_timestamp": "1762652579.620332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15537329840379083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3066426145674803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11427859042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/7989d7d3-c5e9-43c6-80a1-6de51533f9bf.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/7989d7d3-c5e9-43c6-80a1-6de51533f9bf.json new file mode 100644 index 000000000..36ed45fb4 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/7989d7d3-c5e9-43c6-80a1-6de51533f9bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1468 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2932 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json deleted file mode 100644 index aa2e41bd9..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/1762652579.62055", - "retrieved_timestamp": "1762652579.6205509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14678054229444543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29317781029884354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json deleted file mode 100644 index d7f19d6b0..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb/1762652579.620773", - "retrieved_timestamp": "1762652579.620773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15809607397261488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29409841468035297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10762965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/5b9acd52-7eb6-4099-98be-ecd6cae07835.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/5b9acd52-7eb6-4099-98be-ecd6cae07835.json new file mode 100644 index 000000000..0446e5349 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/5b9acd52-7eb6-4099-98be-ecd6cae07835.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1200k_fineweb", + "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1581 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1076 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/666bef5a-2d62-4743-bff1-07365716ab19.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/666bef5a-2d62-4743-bff1-07365716ab19.json new file mode 100644 index 000000000..197afa5d3 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/666bef5a-2d62-4743-bff1-07365716ab19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.295 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json deleted file mode 100644 index 5a6eda002..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/1762652579.6209762", - "retrieved_timestamp": "1762652579.620977", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.157771379938563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29496212100634955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36999999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/85de411c-2308-4824-bd6e-3327eeb6fe3e.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/85de411c-2308-4824-bd6e-3327eeb6fe3e.json new file mode 100644 index 000000000..bc92a7eb5 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/85de411c-2308-4824-bd6e-3327eeb6fe3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.296 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json deleted file mode 100644 index 09e90f1a5..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/1762652579.6211882", - "retrieved_timestamp": "1762652579.6211882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15847063569107744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29604672415652145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json deleted file mode 100644 index bbb766c22..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb/1762652579.6213892", - "retrieved_timestamp": "1762652579.62139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17638089158987041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2921781950918249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1079621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/df28c4c2-d6a4-4ab0-a1ac-faf00a93de99.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/df28c4c2-d6a4-4ab0-a1ac-faf00a93de99.json new file mode 100644 index 000000000..86c5be30e --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/df28c4c2-d6a4-4ab0-a1ac-faf00a93de99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1400k_fineweb", + "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1764 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2922 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/6fb37ad0-b41b-4ad7-91a2-79bbb835d445.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/6fb37ad0-b41b-4ad7-91a2-79bbb835d445.json new file mode 100644 index 000000000..4fd364781 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/6fb37ad0-b41b-4ad7-91a2-79bbb835d445.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2992 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json deleted file mode 100644 index 20a46133e..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/1762652579.621598", - "retrieved_timestamp": "1762652579.621599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17066051410258115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992388897714206 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11045545212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json deleted file mode 100644 index 121697d20..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/1762652579.621813", - "retrieved_timestamp": "1762652579.621814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15384956360235286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.291672957517483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37406249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/c41df02e-5aff-4de6-a1c4-d45b5585e29d.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/c41df02e-5aff-4de6-a1c4-d45b5585e29d.json new file mode 100644 index 000000000..c93f56f5d --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/c41df02e-5aff-4de6-a1c4-d45b5585e29d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1538 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2917 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3741 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/aa587b4a-9c19-4231-ba72-9b66446460f9.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/aa587b4a-9c19-4231-ba72-9b66446460f9.json new file mode 100644 index 000000000..c51687eca --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/aa587b4a-9c19-4231-ba72-9b66446460f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json deleted file mode 100644 index 94eda8b53..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/1762652579.622025", - "retrieved_timestamp": "1762652579.622026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14747979804695985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30287372123209483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11195146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json deleted file mode 100644 index 6fb925664..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/1762652579.622248", - "retrieved_timestamp": "1762652579.622248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13451530827094332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2927186496606003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/be14e75e-4fb1-41aa-b168-1ec23eb305e0.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/be14e75e-4fb1-41aa-b168-1ec23eb305e0.json new file mode 100644 index 000000000..6785da5dc --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/be14e75e-4fb1-41aa-b168-1ec23eb305e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_200k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2927 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/73be4a2b-28c9-4208-8107-3734fea25008.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/73be4a2b-28c9-4208-8107-3734fea25008.json new file mode 100644 index 000000000..ec2adafe9 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/73be4a2b-28c9-4208-8107-3734fea25008.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_400k_fineweb", + "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1511 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2972 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json deleted file mode 100644 index bcd730774..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb/1762652579.62247", - "retrieved_timestamp": "1762652579.62247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1511267880335288 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29723404576965046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11627327127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/0bf2fa4e-3bcb-46ff-a068-f4c796123c6d.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/0bf2fa4e-3bcb-46ff-a068-f4c796123c6d.json new file mode 100644 index 000000000..f405d336a --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/0bf2fa4e-3bcb-46ff-a068-f4c796123c6d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3049 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json deleted file mode 100644 index 90451b957..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/1762652579.622689", - "retrieved_timestamp": "1762652579.62269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.155648124753432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3048804422828362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38599999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json deleted file mode 100644 index e68f8d6be..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1762652579.62291", - "retrieved_timestamp": "1762652579.622911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15842076800666677 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925171720555518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1157746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/9f8fc05a-8658-4ed3-994a-965e6882d242.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/9f8fc05a-8658-4ed3-994a-965e6882d242.json new file mode 100644 index 000000000..2de316a22 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/9f8fc05a-8658-4ed3-994a-965e6882d242.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_400k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2925 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json deleted file mode 100644 index f04594ab4..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb/1762652579.623165", - "retrieved_timestamp": "1762652579.6231658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16391618682872555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013718229200533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/ced11f6e-490d-42e9-8f3e-00e22cfc2910.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/ced11f6e-490d-42e9-8f3e-00e22cfc2910.json new file mode 100644 index 000000000..13f664341 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/ced11f6e-490d-42e9-8f3e-00e22cfc2910.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_600k_fineweb", + "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json deleted file mode 100644 index cc5f45be5..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/1762652579.623383", - "retrieved_timestamp": "1762652579.623384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414114549395603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30001678726257036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/70ba788b-fe8c-4667-a859-0fb122de22b9.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/70ba788b-fe8c-4667-a859-0fb122de22b9.json new file mode 100644 index 000000000..3978e6a37 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/70ba788b-fe8c-4667-a859-0fb122de22b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1641 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json deleted file mode 100644 index e8cb37404..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/1762652579.623598", - "retrieved_timestamp": "1762652579.623599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16059389087620846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2983444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3846354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/e93f2d5f-7ffc-44b8-b2dc-d07b73de44ab.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/e93f2d5f-7ffc-44b8-b2dc-d07b73de44ab.json new file mode 100644 index 000000000..4fb6c3205 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/e93f2d5f-7ffc-44b8-b2dc-d07b73de44ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_600k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/15cacfe0-bdfb-4b87-a813-bfa70ff71984.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/15cacfe0-bdfb-4b87-a813-bfa70ff71984.json new file mode 100644 index 000000000..3d0b3cb65 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/15cacfe0-bdfb-4b87-a813-bfa70ff71984.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_800k_fineweb", + "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1641 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2959 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json deleted file mode 100644 index 4242730b2..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb/1762652579.623817", - "retrieved_timestamp": "1762652579.623818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414114549395603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29594449748780255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json deleted file mode 100644 index a49d62010..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/1762652579.62404", - "retrieved_timestamp": "1762652579.6240408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1622927166584662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038096660271284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/cff00e2a-41e3-40d2-aab3-4bb3bd7d0d0e.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/cff00e2a-41e3-40d2-aab3-4bb3bd7d0d0e.json new file mode 100644 index 000000000..ee1ff5fea --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/cff00e2a-41e3-40d2-aab3-4bb3bd7d0d0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", + "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1623 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3038 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json deleted file mode 100644 index 8eb0f5d83..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/1762652579.624255", - "retrieved_timestamp": "1762652579.624256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14742993036254914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2942808065535252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/e1eab0cf-2c6d-44b2-8aaf-a75347741529.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/e1eab0cf-2c6d-44b2-8aaf-a75347741529.json new file mode 100644 index 000000000..c0e2e5309 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/e1eab0cf-2c6d-44b2-8aaf-a75347741529.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-135M_pretrained_800k_fineweb_uncovai_selected", + "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1474 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2943 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json b/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json deleted file mode 100644 index d086df090..000000000 --- a/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2_pretrained_200k_fineweb/1762652579.624471", - "retrieved_timestamp": "1762652579.624471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/smollm2_pretrained_200k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2_pretrained_200k_fineweb", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15270039051937748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.299468427221449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11594082446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/ed221db8-cf81-4257-8785-db9381eec5b7.json b/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/ed221db8-cf81-4257-8785-db9381eec5b7.json new file mode 100644 index 000000000..efa15e0a4 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/ed221db8-cf81-4257-8785-db9381eec5b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2_pretrained_200k_fineweb/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2_pretrained_200k_fineweb", + "id": "FlofloB/smollm2_pretrained_200k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json b/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json deleted file mode 100644 index 3184471c3..000000000 --- a/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.6246889", - "retrieved_timestamp": "1762652579.6246898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 16.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.521546164177715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240829189778252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42441666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/b314468b-401a-4318-b022-c966bf3366aa.json b/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/b314468b-401a-4318-b022-c966bf3366aa.json new file mode 100644 index 000000000..0db2c1292 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/b314468b-401a-4318-b022-c966bf3366aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 16.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3721 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/a0dbb2eb-66c7-48a3-a85c-725b49141edf.json b/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/a0dbb2eb-66c7-48a3-a85c-725b49141edf.json new file mode 100644 index 000000000..09f22c033 --- /dev/null +++ b/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/a0dbb2eb-66c7-48a3-a85c-725b49141edf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ft-openhermes-25-mistral-7b-irca-dpo-pairs", + "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", + "developer": "FuJhen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4773 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2956 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json b/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json deleted file mode 100644 index 205ab4408..000000000 --- a/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/1762652579.624908", - "retrieved_timestamp": "1762652579.6249092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", - "developer": "FuJhen", - "inference_platform": "unknown", - "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420041046645123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47730323895548116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2956283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json b/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json deleted file mode 100644 index bed416543..000000000 --- a/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuJhen_mistral-instruct-7B-DPO/1762652579.625171", - "retrieved_timestamp": "1762652579.625172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuJhen/mistral-instruct-7B-DPO", - "developer": "FuJhen", - "inference_platform": "unknown", - "id": "FuJhen/mistral-instruct-7B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.496 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49684171332065585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46239050561386214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30335771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/812a36ec-4928-40a9-9aa8-ee39d7bb02f5.json b/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/812a36ec-4928-40a9-9aa8-ee39d7bb02f5.json new file mode 100644 index 000000000..a76f20076 --- /dev/null +++ b/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/812a36ec-4928-40a9-9aa8-ee39d7bb02f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuJhen_mistral-instruct-7B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-instruct-7B-DPO", + "id": "FuJhen/mistral-instruct-7B-DPO", + "developer": "FuJhen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.496 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3034 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_e2e/77af2424-0a23-49f3-97b0-316d04a33547.json b/data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_e2e/77af2424-0a23-49f3-97b0-316d04a33547.json new file mode 100644 index 000000000..fd9de9edd --- /dev/null +++ b/data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_e2e/77af2424-0a23-49f3-97b0-316d04a33547.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_e2e/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral_7b_v0.1_structedData_e2e", + "id": "FuJhen/mistral_7b_v0.1_structedData_e2e", + "developer": "FuJhen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1727 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2811 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_viggo/6f422676-2d7e-40ed-a5e3-4afc25564cfc.json b/data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_viggo/6f422676-2d7e-40ed-a5e3-4afc25564cfc.json new file mode 100644 index 000000000..2e924bc72 --- /dev/null +++ b/data/hfopenllm_v2/FuJhen/mistral_7b_v0.1_structedData_viggo/6f422676-2d7e-40ed-a5e3-4afc25564cfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_viggo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral_7b_v0.1_structedData_viggo", + "id": "FuJhen/mistral_7b_v0.1_structedData_viggo", + "developer": "FuJhen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1783 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2942 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json b/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json deleted file mode 100644 index 41377ff04..000000000 --- a/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-7B-v2.0/1762652579.625878", - "retrieved_timestamp": "1762652579.625879", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuseAI/FuseChat-7B-v2.0", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-7B-v2.0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3423194900641409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954212795868764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4796666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162400265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/43923dd6-838a-4259-a938-7766dfd9c07e.json b/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/43923dd6-838a-4259-a938-7766dfd9c07e.json new file mode 100644 index 000000000..c01a38f0d --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/43923dd6-838a-4259-a938-7766dfd9c07e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-7B-v2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-7B-v2.0", + "id": "FuseAI/FuseChat-7B-v2.0", + "developer": "FuseAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4954 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4797 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/dba94a49-02b0-4e92-bd6c-c6bfc9be3cfb.json b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/dba94a49-02b0-4e92-bd6c-c6bfc9be3cfb.json new file mode 100644 index 000000000..b8ec7de60 --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/dba94a49-02b0-4e92-bd6c-c6bfc9be3cfb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-Llama-3.1-8B-Instruct", + "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", + "developer": "FuseAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7205 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json deleted file mode 100644 index ec9b0e671..000000000 --- a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.1-8B-Instruct/1762652579.626143", - "retrieved_timestamp": "1762652579.626144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204816553411615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119887898349903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38200000000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37333776595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/16a782dc-0795-4281-aad6-4f664a0940ab.json b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/16a782dc-0795-4281-aad6-4f664a0940ab.json new file mode 100644 index 000000000..879cf24a5 --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/16a782dc-0795-4281-aad6-4f664a0940ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.2-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-Llama-3.2-3B-Instruct", + "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", + "developer": "FuseAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6849 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json deleted file mode 100644 index 731de2043..000000000 --- a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.2-3B-Instruct/1762652579.626356", - "retrieved_timestamp": "1762652579.626357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.684886102208806 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46583679221755164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39139583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31316489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json b/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json deleted file mode 100644 index 98c38689e..000000000 --- a/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1762652579.626579", - "retrieved_timestamp": "1762652579.626579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5905641475728844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552599883615556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41181848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/5d24d4ad-9f37-4634-ba23-74fbc74fd298.json b/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/5d24d4ad-9f37-4634-ba23-74fbc74fd298.json new file mode 100644 index 000000000..0a5c32d0b --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/5d24d4ad-9f37-4634-ba23-74fbc74fd298.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-Qwen-2.5-7B-Instruct", + "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", + "developer": "FuseAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5906 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/043cd315-fcb7-4871-ae79-dee3fdefaef0.json b/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/043cd315-fcb7-4871-ae79-dee3fdefaef0.json new file mode 100644 index 000000000..a45337ab5 --- /dev/null +++ b/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/043cd315-fcb7-4871-ae79-dee3fdefaef0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MN-LooseCannon-12B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-LooseCannon-12B-v1", + "id": "GalrionSoftworks/MN-LooseCannon-12B-v1", + "developer": "GalrionSoftworks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5128 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json b/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json deleted file mode 100644 index 20c446713..000000000 --- a/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MN-LooseCannon-12B-v1/1762652579.626794", - "retrieved_timestamp": "1762652579.626794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GalrionSoftworks/MN-LooseCannon-12B-v1", - "developer": "GalrionSoftworks", - "inference_platform": "unknown", - "id": "GalrionSoftworks/MN-LooseCannon-12B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417791459992819 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128183808679557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3195644946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/3c377d7e-14bc-4c82-9ada-7560552abbe4.json b/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/3c377d7e-14bc-4c82-9ada-7560552abbe4.json new file mode 100644 index 000000000..19eeb69d1 --- /dev/null +++ b/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/3c377d7e-14bc-4c82-9ada-7560552abbe4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MagnusIntellectus-12B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MagnusIntellectus-12B-v1", + "id": "GalrionSoftworks/MagnusIntellectus-12B-v1", + "developer": "GalrionSoftworks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5323 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json b/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json deleted file mode 100644 index a283d4fd7..000000000 --- a/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MagnusIntellectus-12B-v1/1762652579.62705", - "retrieved_timestamp": "1762652579.627051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GalrionSoftworks/MagnusIntellectus-12B-v1", - "developer": "GalrionSoftworks", - "inference_platform": "unknown", - "id": "GalrionSoftworks/MagnusIntellectus-12B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4421368635221213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5323010476246133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34208776595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/43bb650b-8bb7-41b4-866a-cb2dad1499d6.json b/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/43bb650b-8bb7-41b4-866a-cb2dad1499d6.json new file mode 100644 index 000000000..ce87f2d44 --- /dev/null +++ b/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/43bb650b-8bb7-41b4-866a-cb2dad1499d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AryaBhatta-GemmaOrca-2-Merged", + "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", + "developer": "GenVRadmin", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3887 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.455 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-Merged/bdf8f907-37ca-41ca-9a4e-f4dd446f895f.json b/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-Merged/bdf8f907-37ca-41ca-9a4e-f4dd446f895f.json new file mode 100644 index 000000000..54f086875 --- /dev/null +++ b/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaOrca-Merged/bdf8f907-37ca-41ca-9a4e-f4dd446f895f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AryaBhatta-GemmaOrca-Merged", + "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", + "developer": "GenVRadmin", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaUltra-Merged/14a1872c-7afd-4cd4-ad87-853e4fc0847e.json b/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaUltra-Merged/14a1872c-7afd-4cd4-ad87-853e4fc0847e.json new file mode 100644 index 000000000..88dd8d560 --- /dev/null +++ b/data/hfopenllm_v2/GenVRadmin/AryaBhatta-GemmaUltra-Merged/14a1872c-7afd-4cd4-ad87-853e4fc0847e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaUltra-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AryaBhatta-GemmaUltra-Merged", + "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", + "developer": "GenVRadmin", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GenVRadmin/llama38bGenZ_Vikas-Merged/887e4ca9-ed48-4b33-b933-f8534a8d0377.json b/data/hfopenllm_v2/GenVRadmin/llama38bGenZ_Vikas-Merged/887e4ca9-ed48-4b33-b933-f8534a8d0377.json new file mode 100644 index 000000000..0c72024f8 --- /dev/null +++ b/data/hfopenllm_v2/GenVRadmin/llama38bGenZ_Vikas-Merged/887e4ca9-ed48-4b33-b933-f8534a8d0377.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GenVRadmin_llama38bGenZ_Vikas-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama38bGenZ_Vikas-Merged", + "id": "GenVRadmin/llama38bGenZ_Vikas-Merged", + "developer": "GenVRadmin", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json b/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json deleted file mode 100644 index 9d09b4d5f..000000000 --- a/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/1762652579.628178", - "retrieved_timestamp": "1762652579.628178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", - "developer": "GoToCompany", - "inference_platform": "unknown", - "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550607942481504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5954551751157878 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4778645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263630319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/c585488d-4043-482f-b1fa-4a61e96f7f0f.json b/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/c585488d-4043-482f-b1fa-4a61e96f7f0f.json new file mode 100644 index 000000000..99a5c1561 --- /dev/null +++ b/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/c585488d-4043-482f-b1fa-4a61e96f7f0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma2-9b-cpt-sahabatai-v1-instruct", + "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", + "developer": "GoToCompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6551 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4779 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json b/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json deleted file mode 100644 index a20ac8cc1..000000000 --- a/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/1762652579.628486", - "retrieved_timestamp": "1762652579.628489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", - "developer": "GoToCompany", - "inference_platform": "unknown", - "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523844510343666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951292004509417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3453291223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/d64541f6-19ef-4f04-a991-93efec6fe24f.json b/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/d64541f6-19ef-4f04-a991-93efec6fe24f.json new file mode 100644 index 000000000..e5f54c812 --- /dev/null +++ b/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/d64541f6-19ef-4f04-a991-93efec6fe24f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-8b-cpt-sahabatai-v1-instruct", + "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", + "developer": "GoToCompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5238 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3453 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json deleted file mode 100644 index 82bce9580..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.6293938", - "retrieved_timestamp": "1762652579.629396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416944817528602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32921013057720044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.002265861027190332 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16381316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1c13e194-8bee-4456-a249-f71e7e34b0eb.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1c13e194-8bee-4456-a249-f71e7e34b0eb.json new file mode 100644 index 000000000..db3383a91 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1c13e194-8bee-4456-a249-f71e7e34b0eb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3417 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1638 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1d3db737-20e7-4da1-a311-e60de0b41c93.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1d3db737-20e7-4da1-a311-e60de0b41c93.json new file mode 100644 index 000000000..a0e2c50db --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1d3db737-20e7-4da1-a311-e60de0b41c93.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3472 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3268 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json deleted file mode 100644 index 01ae949de..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.629041", - "retrieved_timestamp": "1762652579.629042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.347189900574919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32683063456958195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/7b73d50e-358b-4961-8b58-63765ce5a82a.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/7b73d50e-358b-4961-8b58-63765ce5a82a.json new file mode 100644 index 000000000..6da2363a1 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/7b73d50e-358b-4961-8b58-63765ce5a82a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4769 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2085 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2783 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json deleted file mode 100644 index 484b2e4f9..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/1762652579.629639", - "retrieved_timestamp": "1762652579.6296399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47685806992114255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418600731531926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27825797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/81dfd69c-cf01-4114-8157-fd09af6f490c.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/81dfd69c-cf01-4114-8157-fd09af6f490c.json new file mode 100644 index 000000000..5793c4f9c --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/81dfd69c-cf01-4114-8157-fd09af6f490c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4216 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4042 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json deleted file mode 100644 index c2d402255..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/1762652579.629877", - "retrieved_timestamp": "1762652579.629878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421553699738915 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40418921704436744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25615026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json deleted file mode 100644 index 21b718022..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/1762652579.630181", - "retrieved_timestamp": "1762652579.6301818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42525055740989465 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4053446177133173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37018749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25556848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/f38240ab-35e4-431e-b4d5-b1b0e1d57c5f.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/f38240ab-35e4-431e-b4d5-b1b0e1d57c5f.json new file mode 100644 index 000000000..c4deee9bd --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/f38240ab-35e4-431e-b4d5-b1b0e1d57c5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4053 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/01863b4f-9550-49c3-ad83-74c0bb535eb9.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/01863b4f-9550-49c3-ad83-74c0bb535eb9.json new file mode 100644 index 000000000..42f6949ab --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/01863b4f-9550-49c3-ad83-74c0bb535eb9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6356 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5018 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json deleted file mode 100644 index 0c17b12de..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/1762652579.6304152", - "retrieved_timestamp": "1762652579.630416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8291666112581284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6355637424320617 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422960725075529 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5018284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json deleted file mode 100644 index c374d65ef..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.630644", - "retrieved_timestamp": "1762652579.630645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7813811797142693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309672164610734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119847074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/edd25437-38bc-443c-9da3-bc041270447e.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/edd25437-38bc-443c-9da3-bc041270447e.json new file mode 100644 index 000000000..0afe4d3d1 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/edd25437-38bc-443c-9da3-bc041270447e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/31836d43-5022-488f-ba9e-379195809069.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/31836d43-5022-488f-ba9e-379195809069.json new file mode 100644 index 000000000..85cf3c248 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/31836d43-5022-488f-ba9e-379195809069.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", + "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2555 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json deleted file mode 100644 index 99963b087..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/1762652579.631213", - "retrieved_timestamp": "1762652579.631215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883092417009093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41242101633634826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2554853723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/2a5a3ed6-7137-49e2-a141-497ceba88757.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/2a5a3ed6-7137-49e2-a141-497ceba88757.json new file mode 100644 index 000000000..492ac7214 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/2a5a3ed6-7137-49e2-a141-497ceba88757.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-3b-v6.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "josie-3b-v6.0", + "id": "Goekdeniz-Guelmez/josie-3b-v6.0", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.601 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2938 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.322 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json deleted file mode 100644 index b275c5d18..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-3b-v6.0/1762652579.631514", - "retrieved_timestamp": "1762652579.6315148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-3b-v6.0", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-3b-v6.0", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6009554648333089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4496147842264783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32197473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/0b1c6aa6-b94e-4400-9b0d-c39aa1bcd808.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/0b1c6aa6-b94e-4400-9b0d-c39aa1bcd808.json new file mode 100644 index 000000000..55f207668 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/0b1c6aa6-b94e-4400-9b0d-c39aa1bcd808.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "josie-7b-v6.0-step2000", + "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7598 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4012 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/69423132-adc9-4b97-b799-15f37de1d7e5.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/69423132-adc9-4b97-b799-15f37de1d7e5.json new file mode 100644 index 000000000..8561cb840 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/69423132-adc9-4b97-b799-15f37de1d7e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "josie-7b-v6.0-step2000", + "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7628 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json deleted file mode 100644 index 816b2eae7..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.6322381", - "retrieved_timestamp": "1762652579.632239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7597740661444966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510712680636641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45393750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011801861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json deleted file mode 100644 index c35e9d253..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.632", - "retrieved_timestamp": "1762652579.632001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7627716680629618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097811950503962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45793750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40325797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/54d5bf0f-7c4c-40b1-bca6-5484ef8e2a04.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/54d5bf0f-7c4c-40b1-bca6-5484ef8e2a04.json new file mode 100644 index 000000000..359a9fc74 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/54d5bf0f-7c4c-40b1-bca6-5484ef8e2a04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "josie-7b-v6.0", + "id": "Goekdeniz-Guelmez/josie-7b-v6.0", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json deleted file mode 100644 index 1dcbc126e..000000000 --- a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0/1762652579.631763", - "retrieved_timestamp": "1762652579.631764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-7b-v6.0", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7411645544931892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104855208094123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41539583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/cfe8f9c7-e9bf-4a17-afa0-d5b8f46d24e7.json b/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/cfe8f9c7-e9bf-4a17-afa0-d5b8f46d24e7.json new file mode 100644 index 000000000..847a2aed3 --- /dev/null +++ b/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/cfe8f9c7-e9bf-4a17-afa0-d5b8f46d24e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GreenNode_GreenNode-small-9B-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GreenNode-small-9B-it", + "id": "GreenNode/GreenNode-small-9B-it", + "developer": "GreenNode", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5994 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1745 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json b/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json deleted file mode 100644 index 142863316..000000000 --- a/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GreenNode_GreenNode-small-9B-it/1762652579.6324449", - "retrieved_timestamp": "1762652579.632446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GreenNode/GreenNode-small-9B-it", - "developer": "GreenNode", - "inference_platform": "unknown", - "id": "GreenNode/GreenNode-small-9B-it", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7436125037123721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.599383874005197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3927027925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json b/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json deleted file mode 100644 index c495ce349..000000000 --- a/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GritLM_GritLM-7B-KTO/1762652579.632807", - "retrieved_timestamp": "1762652579.632808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GritLM/GritLM-7B-KTO", - "developer": "GritLM", - "inference_platform": "unknown", - "id": "GritLM/GritLM-7B-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5310132670203948 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.485293719684692 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37102083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26803523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/7fbc0323-1c78-46b6-a08a-6e5870c64e53.json b/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/7fbc0323-1c78-46b6-a08a-6e5870c64e53.json new file mode 100644 index 000000000..054831436 --- /dev/null +++ b/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/7fbc0323-1c78-46b6-a08a-6e5870c64e53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GritLM_GritLM-7B-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GritLM-7B-KTO", + "id": "GritLM/GritLM-7B-KTO", + "developer": "GritLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.531 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.268 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/1c769f0d-b99d-4b82-a529-f5264f7b3349.json b/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/1c769f0d-b99d-4b82-a529-f5264f7b3349.json new file mode 100644 index 000000000..9c52b15ad --- /dev/null +++ b/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/1c769f0d-b99d-4b82-a529-f5264f7b3349.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GritLM_GritLM-8x7B-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GritLM-8x7B-KTO", + "id": "GritLM/GritLM-8x7B-KTO", + "developer": "GritLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json b/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json deleted file mode 100644 index af47ac2e7..000000000 --- a/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GritLM_GritLM-8x7B-KTO/1762652579.633089", - "retrieved_timestamp": "1762652579.633089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GritLM/GritLM-8x7B-KTO", - "developer": "GritLM", - "inference_platform": "unknown", - "id": "GritLM/GritLM-8x7B-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5714049832222946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5820304362331497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36477726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Groq/Llama-3-Groq-8B-Tool-Use/a9365685-e299-48e2-931a-c63e123a9e00.json b/data/hfopenllm_v2/Groq/Llama-3-Groq-8B-Tool-Use/a9365685-e299-48e2-931a-c63e123a9e00.json new file mode 100644 index 000000000..1407a69b5 --- /dev/null +++ b/data/hfopenllm_v2/Groq/Llama-3-Groq-8B-Tool-Use/a9365685-e299-48e2-931a-c63e123a9e00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Groq_Llama-3-Groq-8B-Tool-Use/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Groq-8B-Tool-Use", + "id": "Groq/Llama-3-Groq-8B-Tool-Use", + "developer": "Groq", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6098 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4863 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.0-8b-Llama-3/bdf2d61a-daa1-4b1f-9245-43ff263540fb.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.0-8b-Llama-3/bdf2d61a-daa1-4b1f-9245-43ff263540fb.json new file mode 100644 index 000000000..7c3f4b4a5 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.0-8b-Llama-3/bdf2d61a-daa1-4b1f-9245-43ff263540fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.0-8b-Llama-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pantheon-RP-1.0-8b-Llama-3", + "id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", + "developer": "Gryphe", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3067 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f0b4eef9-dab2-48e2-87f8-ad83ec33ec23.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f0b4eef9-dab2-48e2-87f8-ad83ec33ec23.json new file mode 100644 index 000000000..5c8283525 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f0b4eef9-dab2-48e2-87f8-ad83ec33ec23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.5-12b-Nemo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pantheon-RP-1.5-12b-Nemo", + "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo", + "developer": "Gryphe", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.442 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json deleted file mode 100644 index 5e0ade0fc..000000000 --- a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.5-12b-Nemo/1762652579.633812", - "retrieved_timestamp": "1762652579.633813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.5-12b-Nemo", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47630841722186024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519582216884963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44203125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3302027925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/29e10491-8c34-4b7a-a0bd-77f6ca0dc54c.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/29e10491-8c34-4b7a-a0bd-77f6ca0dc54c.json new file mode 100644 index 000000000..2a9bc0b72 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/29e10491-8c34-4b7a-a0bd-77f6ca0dc54c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pantheon-RP-1.6-12b-Nemo-KTO", + "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", + "developer": "Gryphe", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4248 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json deleted file mode 100644 index d6110f90a..000000000 --- a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/1762652579.634284", - "retrieved_timestamp": "1762652579.634285", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636187537954849 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276980814125921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json deleted file mode 100644 index bae9b1cff..000000000 --- a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo/1762652579.634059", - "retrieved_timestamp": "1762652579.6340601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44805671174705336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204007434392454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4287604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/c588d86a-80c4-46d1-93e0-b7fa8491f3b3.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/c588d86a-80c4-46d1-93e0-b7fa8491f3b3.json new file mode 100644 index 000000000..36f59a731 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/c588d86a-80c4-46d1-93e0-b7fa8491f3b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pantheon-RP-1.6-12b-Nemo", + "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo", + "developer": "Gryphe", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/0b11eb9a-61c8-4af1-8335-24bef2597e5d.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/0b11eb9a-61c8-4af1-8335-24bef2597e5d.json new file mode 100644 index 000000000..672a5af34 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/0b11eb9a-61c8-4af1-8335-24bef2597e5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pantheon-RP-Pure-1.6.2-22b-Small", + "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", + "developer": "Gryphe", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6931 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3942 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json deleted file mode 100644 index 7a5d165a2..000000000 --- a/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/1762652579.6344929", - "retrieved_timestamp": "1762652579.6344929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6931042965996888 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304537230538597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37647916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39419880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json deleted file mode 100644 index 3dd710ad5..000000000 --- a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.63471", - "retrieved_timestamp": "1762652579.634711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "developer": "GuilhermeNaturaUmana", - "inference_platform": "unknown", - "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4985405391029136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5644838945274894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44290226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/7d31e5fd-700a-42a8-bea8-8989e8c52603.json b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/7d31e5fd-700a-42a8-bea8-8989e8c52603.json new file mode 100644 index 000000000..51a98d10f --- /dev/null +++ b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/7d31e5fd-700a-42a8-bea8-8989e8c52603.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nature-Reason-1.2-reallysmall", + "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", + "developer": "GuilhermeNaturaUmana", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5649 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json deleted file mode 100644 index c8f80151c..000000000 --- a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.634963", - "retrieved_timestamp": "1762652579.634964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "developer": "GuilhermeNaturaUmana", - "inference_platform": "unknown", - "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47910654840268263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648715950622487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4439166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4408244680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/f993880a-3c7c-4af9-a3ce-3c27207b9a3c.json b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/f993880a-3c7c-4af9-a3ce-3c27207b9a3c.json new file mode 100644 index 000000000..36f7c0fb0 --- /dev/null +++ b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/f993880a-3c7c-4af9-a3ce-3c27207b9a3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nature-Reason-1.2-reallysmall", + "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", + "developer": "GuilhermeNaturaUmana", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4985 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5645 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/2fae7e4a-8c28-4be8-9391-ca79077e32c2.json b/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/2fae7e4a-8c28-4be8-9391-ca79077e32c2.json new file mode 100644 index 000000000..3d2506966 --- /dev/null +++ b/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/2fae7e4a-8c28-4be8-9391-ca79077e32c2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-Ko-Merge-PEFT", + "id": "Gunulhona/Gemma-Ko-Merge-PEFT", + "developer": "Gunulhona", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 20.318 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.288 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/436e651e-6f04-44ff-ab3d-db8ed0d639bd.json b/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/436e651e-6f04-44ff-ab3d-db8ed0d639bd.json new file mode 100644 index 000000000..d5f8fff6e --- /dev/null +++ b/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge-PEFT/436e651e-6f04-44ff-ab3d-db8ed0d639bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-Ko-Merge-PEFT", + "id": "Gunulhona/Gemma-Ko-Merge-PEFT", + "developer": "Gunulhona", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 20.318 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4863 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3986 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge/9fbccac2-c840-494e-a24d-a6f0c9a07b88.json b/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge/9fbccac2-c840-494e-a24d-a6f0c9a07b88.json new file mode 100644 index 000000000..6f57f8828 --- /dev/null +++ b/data/hfopenllm_v2/Gunulhona/Gemma-Ko-Merge/9fbccac2-c840-494e-a24d-a6f0c9a07b88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-Ko-Merge", + "id": "Gunulhona/Gemma-Ko-Merge", + "developer": "Gunulhona", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5813 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1881 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HPAI-BSC/Llama3-Aloe-8B-Alpha/a4ee6a33-df51-4a4e-a13d-45488a094fd7.json b/data/hfopenllm_v2/HPAI-BSC/Llama3-Aloe-8B-Alpha/a4ee6a33-df51-4a4e-a13d-45488a094fd7.json new file mode 100644 index 000000000..97b6f883b --- /dev/null +++ b/data/hfopenllm_v2/HPAI-BSC/Llama3-Aloe-8B-Alpha/a4ee6a33-df51-4a4e-a13d-45488a094fd7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3-Aloe-8B-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-Aloe-8B-Alpha", + "id": "HPAI-BSC/Llama3-Aloe-8B-Alpha", + "developer": "HPAI-BSC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5081 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4831 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3295 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HPAI-BSC/Llama3.1-Aloe-Beta-8B/a3923f10-e64c-4556-9616-4fe7072eff60.json b/data/hfopenllm_v2/HPAI-BSC/Llama3.1-Aloe-Beta-8B/a3923f10-e64c-4556-9616-4fe7072eff60.json new file mode 100644 index 000000000..00bf8e2ef --- /dev/null +++ b/data/hfopenllm_v2/HPAI-BSC/Llama3.1-Aloe-Beta-8B/a3923f10-e64c-4556-9616-4fe7072eff60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3.1-Aloe-Beta-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-Aloe-Beta-8B", + "id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", + "developer": "HPAI-BSC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7253 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/ca15d972-9075-42df-884b-5d069f6ff425.json b/data/hfopenllm_v2/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/ca15d972-9075-42df-884b-5d069f6ff425.json new file mode 100644 index 000000000..4e7791b20 --- /dev/null +++ b/data/hfopenllm_v2/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/ca15d972-9075-42df-884b-5d069f6ff425.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Aloe-Beta-7B", + "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", + "developer": "HPAI-BSC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5049 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json deleted file mode 100644 index 20a4f40aa..000000000 --- a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1.2/1762652579.6374269", - "retrieved_timestamp": "1762652579.637428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HarbingerX/Zeitgeist-3b-V1.2", - "developer": "HarbingerX", - "inference_platform": "unknown", - "id": "HarbingerX/Zeitgeist-3b-V1.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6754189993661264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440650477102142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35790625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30560172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/905909a5-abef-46bf-9392-c97873e229df.json b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/905909a5-abef-46bf-9392-c97873e229df.json new file mode 100644 index 000000000..e6b388131 --- /dev/null +++ b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/905909a5-abef-46bf-9392-c97873e229df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zeitgeist-3b-V1.2", + "id": "HarbingerX/Zeitgeist-3b-V1.2", + "developer": "HarbingerX", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3056 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json deleted file mode 100644 index 89c8e3e9e..000000000 --- a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1/1762652579.637166", - "retrieved_timestamp": "1762652579.6371672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HarbingerX/Zeitgeist-3b-V1", - "developer": "HarbingerX", - "inference_platform": "unknown", - "id": "HarbingerX/Zeitgeist-3b-V1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6711724889958643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440790761237121 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3009474734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/95bd05cf-8f59-409d-a99e-d249bad6c561.json b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/95bd05cf-8f59-409d-a99e-d249bad6c561.json new file mode 100644 index 000000000..e04145e0b --- /dev/null +++ b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/95bd05cf-8f59-409d-a99e-d249bad6c561.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zeitgeist-3b-V1", + "id": "HarbingerX/Zeitgeist-3b-V1", + "developer": "HarbingerX", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6712 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3009 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/76b12246-33f6-4992-a0ab-38704dcf6345.json b/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/76b12246-33f6-4992-a0ab-38704dcf6345.json new file mode 100644 index 000000000..9a45e8545 --- /dev/null +++ b/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/76b12246-33f6-4992-a0ab-38704dcf6345.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Hastagaras_L3.2-JametMini-3B-MK.III/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.2-JametMini-3B-MK.III", + "id": "Hastagaras/L3.2-JametMini-3B-MK.III", + "developer": "Hastagaras", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6183 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2983 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json b/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json deleted file mode 100644 index 7cd076d1a..000000000 --- a/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Hastagaras_L3.2-JametMini-3B-MK.III/1762652579.6376362", - "retrieved_timestamp": "1762652579.6376371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Hastagaras/L3.2-JametMini-3B-MK.III", - "developer": "Hastagaras", - "inference_platform": "unknown", - "id": "Hastagaras/L3.2-JametMini-3B-MK.III", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6182662003484088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45385245294894094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2982878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Hastagaras/Llama-3.1-Jamet-8B-MK.I/e4415806-0ec0-465a-b28f-9c8741436fb4.json b/data/hfopenllm_v2/Hastagaras/Llama-3.1-Jamet-8B-MK.I/e4415806-0ec0-465a-b28f-9c8741436fb4.json new file mode 100644 index 000000000..680f1f7a0 --- /dev/null +++ b/data/hfopenllm_v2/Hastagaras/Llama-3.1-Jamet-8B-MK.I/e4415806-0ec0-465a-b28f-9c8741436fb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Hastagaras_Llama-3.1-Jamet-8B-MK.I/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Jamet-8B-MK.I", + "id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", + "developer": "Hastagaras", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7338 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5049 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3726 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Hastagaras/Zabuza-8B-Llama-3.1/98e62ab5-d35a-42dd-904b-bed9c50f3745.json b/data/hfopenllm_v2/Hastagaras/Zabuza-8B-Llama-3.1/98e62ab5-d35a-42dd-904b-bed9c50f3745.json new file mode 100644 index 000000000..a06055349 --- /dev/null +++ b/data/hfopenllm_v2/Hastagaras/Zabuza-8B-Llama-3.1/98e62ab5-d35a-42dd-904b-bed9c50f3745.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Hastagaras_Zabuza-8B-Llama-3.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zabuza-8B-Llama-3.1", + "id": "Hastagaras/Zabuza-8B-Llama-3.1", + "developer": "Hastagaras", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6265 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json b/data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json deleted file mode 100644 index 3894da742..000000000 --- a/data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HelpingAI_Cipher-20B/1762652579.638349", - "retrieved_timestamp": "1762652579.63835", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HelpingAI/Cipher-20B", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Cipher-20B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 20.551 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5377575942942504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6032432743536918 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40029166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3744182180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/HelpingAI/Cipher-20B/8fb3596e-224e-492b-bdb6-a95a16656eb0.json b/data/hfopenllm_v2/HelpingAI/Cipher-20B/8fb3596e-224e-492b-bdb6-a95a16656eb0.json new file mode 100644 index 000000000..a8c33014c --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Cipher-20B/8fb3596e-224e-492b-bdb6-a95a16656eb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HelpingAI_Cipher-20B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cipher-20B", + "id": "HelpingAI/Cipher-20B", + "developer": "HelpingAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 20.551 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1994 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3744 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/154203c4-d86e-4c36-806b-c45c5cc568ce.json b/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/154203c4-d86e-4c36-806b-c45c5cc568ce.json new file mode 100644 index 000000000..d0d75bfd3 --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/154203c4-d86e-4c36-806b-c45c5cc568ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HelpingAI_Dhanishtha-Large/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dhanishtha-Large", + "id": "HelpingAI/Dhanishtha-Large", + "developer": "HelpingAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4604 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2755 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json b/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json deleted file mode 100644 index 3fa1ec0c2..000000000 --- a/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HelpingAI_Dhanishtha-Large/1762652579.638597", - "retrieved_timestamp": "1762652579.638598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HelpingAI/Dhanishtha-Large", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Dhanishtha-Large", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24567370133468086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46036539145861094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38451041666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2755152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json b/data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json deleted file mode 100644 index 95a750750..000000000 --- a/data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-10B/1762652579.638817", - "retrieved_timestamp": "1762652579.638818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HelpingAI/Priya-10B", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Priya-10B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40429283190822574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441457310476767 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24925199468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/HelpingAI/Priya-10B/e42c01f7-2869-4103-bbfd-81aa5a15c140.json b/data/hfopenllm_v2/HelpingAI/Priya-10B/e42c01f7-2869-4103-bbfd-81aa5a15c140.json new file mode 100644 index 000000000..5c591c97d --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Priya-10B/e42c01f7-2869-4103-bbfd-81aa5a15c140.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Priya-10B", + "id": "HelpingAI/Priya-10B", + "developer": "HelpingAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2493 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Priya-3B/323d2f94-5e04-4627-9f74-129217f53eea.json b/data/hfopenllm_v2/HelpingAI/Priya-3B/323d2f94-5e04-4627-9f74-129217f53eea.json new file mode 100644 index 000000000..ab36912a7 --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Priya-3B/323d2f94-5e04-4627-9f74-129217f53eea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Priya-3B", + "id": "HelpingAI/Priya-3B", + "developer": "HelpingAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4526 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3961 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json b/data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json deleted file mode 100644 index d23fdec71..000000000 --- a/data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-3B/1762652579.639023", - "retrieved_timestamp": "1762652579.639024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HelpingAI/Priya-3B", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Priya-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525780484669566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961184863327844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23387632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/6bcc284b-8973-47d5-b5b1-1abb7a3242ee.json b/data/hfopenllm_v2/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/6bcc284b-8973-47d5-b5b1-1abb7a3242ee.json new file mode 100644 index 000000000..83225ff07 --- /dev/null +++ b/data/hfopenllm_v2/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/6bcc284b-8973-47d5-b5b1-1abb7a3242ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Qwen-Coder-8B", + "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", + "developer": "HeraiHench", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.164 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1869 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HeraiHench/Double-Down-Qwen-Math-7B/691cace3-5316-4f5b-8693-67efb24a0a06.json b/data/hfopenllm_v2/HeraiHench/Double-Down-Qwen-Math-7B/691cace3-5316-4f5b-8693-67efb24a0a06.json new file mode 100644 index 000000000..6515c0ca8 --- /dev/null +++ b/data/hfopenllm_v2/HeraiHench/Double-Down-Qwen-Math-7B/691cace3-5316-4f5b-8693-67efb24a0a06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HeraiHench_Double-Down-Qwen-Math-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Double-Down-Qwen-Math-7B", + "id": "HeraiHench/Double-Down-Qwen-Math-7B", + "developer": "HeraiHench", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.167 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2845 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HeraiHench/Marge-Qwen-Math-7B/d387b3dc-9e76-44a6-9a9f-132a4fd762b4.json b/data/hfopenllm_v2/HeraiHench/Marge-Qwen-Math-7B/d387b3dc-9e76-44a6-9a9f-132a4fd762b4.json new file mode 100644 index 000000000..c9043860f --- /dev/null +++ b/data/hfopenllm_v2/HeraiHench/Marge-Qwen-Math-7B/d387b3dc-9e76-44a6-9a9f-132a4fd762b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HeraiHench_Marge-Qwen-Math-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marge-Qwen-Math-7B", + "id": "HeraiHench/Marge-Qwen-Math-7B", + "developer": "HeraiHench", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1262 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3069 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1056 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HeraiHench/Phi-4-slerp-ReasoningRP-14B/f6f515d3-f5e9-4362-be51-bb8fc05527e6.json b/data/hfopenllm_v2/HeraiHench/Phi-4-slerp-ReasoningRP-14B/f6f515d3-f5e9-4362-be51-bb8fc05527e6.json new file mode 100644 index 000000000..287a631b7 --- /dev/null +++ b/data/hfopenllm_v2/HeraiHench/Phi-4-slerp-ReasoningRP-14B/f6f515d3-f5e9-4362-be51-bb8fc05527e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HeraiHench_Phi-4-slerp-ReasoningRP-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-slerp-ReasoningRP-14B", + "id": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", + "developer": "HeraiHench", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 9.207 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3116 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.19 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HiroseKoichi/Llama-Salad-4x8B-V3/2e1e215f-b622-439f-a13f-531441e25ae3.json b/data/hfopenllm_v2/HiroseKoichi/Llama-Salad-4x8B-V3/2e1e215f-b622-439f-a13f-531441e25ae3.json new file mode 100644 index 000000000..68ae14b6f --- /dev/null +++ b/data/hfopenllm_v2/HiroseKoichi/Llama-Salad-4x8B-V3/2e1e215f-b622-439f-a13f-531441e25ae3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HiroseKoichi_Llama-Salad-4x8B-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Salad-4x8B-V3", + "id": "HiroseKoichi/Llama-Salad-4x8B-V3", + "developer": "HiroseKoichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6654 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5245 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HoangHa/Pensez-Llama3.1-8B/d50d66a9-a0c4-4b82-922c-9d012f1b50a1.json b/data/hfopenllm_v2/HoangHa/Pensez-Llama3.1-8B/d50d66a9-a0c4-4b82-922c-9d012f1b50a1.json new file mode 100644 index 000000000..bdc98fec4 --- /dev/null +++ b/data/hfopenllm_v2/HoangHa/Pensez-Llama3.1-8B/d50d66a9-a0c4-4b82-922c-9d012f1b50a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HoangHa_Pensez-Llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pensez-Llama3.1-8B", + "id": "HoangHa/Pensez-Llama3.1-8B", + "developer": "HoangHa", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3887 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4669 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3597 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json deleted file mode 100644 index 2a44dfd0c..000000000 --- a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-alpha/1762652579.640769", - "retrieved_timestamp": "1762652579.64077", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-7b-alpha", - "developer": "HuggingFaceH4", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-7b-alpha", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191480826429429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45828635059044115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2795046542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/ea7292a8-3f07-47be-b8ae-7d352ed1ecb6.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/ea7292a8-3f07-47be-b8ae-7d352ed1ecb6.json new file mode 100644 index 000000000..6f52aa9ae --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/ea7292a8-3f07-47be-b8ae-7d352ed1ecb6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-7b-alpha", + "id": "HuggingFaceH4/zephyr-7b-alpha", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5191 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4583 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json deleted file mode 100644 index f2955c7fb..000000000 --- a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-beta/1762652579.641025", - "retrieved_timestamp": "1762652579.641026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-7b-beta", - "developer": "HuggingFaceH4", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-7b-beta", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49504315216957673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.431582191918003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3925416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2780917553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/4eedd6d4-279f-4660-8d71-708a27bb53e0.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/4eedd6d4-279f-4660-8d71-708a27bb53e0.json new file mode 100644 index 000000000..243694134 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/4eedd6d4-279f-4660-8d71-708a27bb53e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-7b-beta", + "id": "HuggingFaceH4/zephyr-7b-beta", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.495 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3925 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2781 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-gemma-v0.1/9c0f67d1-f95d-4ca0-a234-2e09ac788f55.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-gemma-v0.1/9c0f67d1-f95d-4ca0-a234-2e09ac788f55.json new file mode 100644 index 000000000..46badaeea --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-gemma-v0.1/9c0f67d1-f95d-4ca0-a234-2e09ac788f55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-gemma-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-7b-gemma-v0.1", + "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2847 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json deleted file mode 100644 index 275a79f5a..000000000 --- a/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/1762652579.641484", - "retrieved_timestamp": "1762652579.641485", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", - "developer": "HuggingFaceH4", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6510891102275296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6290439728524093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4465208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4586103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/e5c0fbc9-f424-4b04-839a-8335adaf89cc.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/e5c0fbc9-f424-4b04-839a-8335adaf89cc.json new file mode 100644 index 000000000..24a491daf --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/e5c0fbc9-f424-4b04-839a-8335adaf89cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-orpo-141b-A35b-v0.1", + "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6511 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.629 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4586 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json deleted file mode 100644 index 2f4296c00..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B-Instruct/1762652579.641991", - "retrieved_timestamp": "1762652579.641991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-1.7B-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-1.7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.71 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23478259905938464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28851114363217695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/d91107fa-eb8d-4d01-90a2-fc9831f337b2.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/d91107fa-eb8d-4d01-90a2-fc9831f337b2.json new file mode 100644 index 000000000..0268b6bbe --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/d91107fa-eb8d-4d01-90a2-fc9831f337b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-1.7B-Instruct", + "id": "HuggingFaceTB/SmolLM-1.7B-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.71 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/926999bf-1ba6-4321-82b2-fcced4336739.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/926999bf-1ba6-4321-82b2-fcced4336739.json new file mode 100644 index 000000000..e9cd807d2 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/926999bf-1ba6-4321-82b2-fcced4336739.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-1.7B", + "id": "HuggingFaceTB/SmolLM-1.7B", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.71 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2362 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json deleted file mode 100644 index 5ace40b6a..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B/1762652579.6417458", - "retrieved_timestamp": "1762652579.6417458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-1.7B", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-1.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.71 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23615673080759053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3180516538964782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34209375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11477726063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/57d481bf-0db9-4208-afda-dcd20df13964.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/57d481bf-0db9-4208-afda-dcd20df13964.json new file mode 100644 index 000000000..745d7547b --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/57d481bf-0db9-4208-afda-dcd20df13964.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-135M-Instruct", + "id": "HuggingFaceTB/SmolLM-135M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3015 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3635 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1176 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json deleted file mode 100644 index 70cd2688d..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M-Instruct/1762652579.642397", - "retrieved_timestamp": "1762652579.6423979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-135M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-135M-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12140121544169469 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30150816789978757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36345833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11760305851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json deleted file mode 100644 index 6f8359347..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M/1762652579.642195", - "retrieved_timestamp": "1762652579.642196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-135M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-135M", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.13 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21247622973709757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3046054260062988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/eb417e47-fe63-4dc5-b3e5-28782f3782da.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/eb417e47-fe63-4dc5-b3e5-28782f3782da.json new file mode 100644 index 000000000..b15e9c58d --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/eb417e47-fe63-4dc5-b3e5-28782f3782da.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-135M", + "id": "HuggingFaceTB/SmolLM-135M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.13 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2125 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3046 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/b0f516dd-7185-4906-87a5-3c6f019894d0.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/b0f516dd-7185-4906-87a5-3c6f019894d0.json new file mode 100644 index 000000000..c5826e27d --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/b0f516dd-7185-4906-87a5-3c6f019894d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-360M-Instruct", + "id": "HuggingFaceTB/SmolLM-360M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1952 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3472 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json deleted file mode 100644 index 88ec8292b..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M-Instruct/1762652579.642821", - "retrieved_timestamp": "1762652579.642821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-360M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-360M-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19516549422199764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28851114363217695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34717708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/1e562944-a205-4ef7-aff1-3776595d131c.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/1e562944-a205-4ef7-aff1-3776595d131c.json new file mode 100644 index 000000000..3e49abf4e --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/1e562944-a205-4ef7-aff1-3776595d131c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-360M", + "id": "HuggingFaceTB/SmolLM-360M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.36 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2134 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3065 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json deleted file mode 100644 index 1124bd15e..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M/1762652579.642613", - "retrieved_timestamp": "1762652579.6426141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-360M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-360M", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.36 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2133505764704318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30645160333152527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json deleted file mode 100644 index 02509d2d9..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B-Instruct/1762652579.643299", - "retrieved_timestamp": "1762652579.6433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-1.7B-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367835121920947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598617531415158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2053690159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/6ccaf08d-1b0a-4ca9-941e-a71e2dce5cb4.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/6ccaf08d-1b0a-4ca9-941e-a71e2dce5cb4.json new file mode 100644 index 000000000..7fcd1bd5f --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/6ccaf08d-1b0a-4ca9-941e-a71e2dce5cb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-1.7B-Instruct", + "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5368 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/2064938d-9f05-4740-a4d4-2a2da0eac21d.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/2064938d-9f05-4740-a4d4-2a2da0eac21d.json new file mode 100644 index 000000000..c33283bc1 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/2064938d-9f05-4740-a4d4-2a2da0eac21d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-1.7B", + "id": "HuggingFaceTB/SmolLM2-1.7B", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.71 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.244 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3453 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3485 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json deleted file mode 100644 index 8801904f3..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B/1762652579.6430368", - "retrieved_timestamp": "1762652579.643038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-1.7B", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-1.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.71 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2440003634800108 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3452594377166261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2137632978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/43240184-8245-43ff-a971-678523918fe0.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/43240184-8245-43ff-a971-678523918fe0.json new file mode 100644 index 000000000..a41b862d1 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/43240184-8245-43ff-a971-678523918fe0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-135M-Instruct", + "id": "HuggingFaceTB/SmolLM2-135M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0593 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3135 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1092 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json deleted file mode 100644 index 82273b309..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.644038", - "retrieved_timestamp": "1762652579.644039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-135M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-135M-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05925167444602544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31347502947335903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10920877659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/b3b854b6-700c-4297-b335-6acc3c385f84.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/b3b854b6-700c-4297-b335-6acc3c385f84.json new file mode 100644 index 000000000..5a55c0fe8 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/b3b854b6-700c-4297-b335-6acc3c385f84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-135M-Instruct", + "id": "HuggingFaceTB/SmolLM2-135M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json deleted file mode 100644 index 2294d7173..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.643796", - "retrieved_timestamp": "1762652579.643796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-135M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-135M-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2883138960181208 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3124321328066677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36621875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11145279255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json deleted file mode 100644 index fdd40b31e..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M/1762652579.643546", - "retrieved_timestamp": "1762652579.6435468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-135M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-135M", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18177657504310785 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3044234246877141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4111770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10945811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/a9d79c6a-f99a-4b60-8e37-ee2cdfe75f30.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/a9d79c6a-f99a-4b60-8e37-ee2cdfe75f30.json new file mode 100644 index 000000000..08375897a --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/a9d79c6a-f99a-4b60-8e37-ee2cdfe75f30.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-135M", + "id": "HuggingFaceTB/SmolLM2-135M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1818 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json deleted file mode 100644 index 47cad94b8..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.644474", - "retrieved_timestamp": "1762652579.644475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-360M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-360M-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08303191088533979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052703401844317 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34228125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json deleted file mode 100644 index d101ddd11..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.6446972", - "retrieved_timestamp": "1762652579.6446981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-360M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-360M-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.36 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38415958545548035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31435050538888504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.346125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/88e1dd78-d3bc-401b-88e9-d963bac181db.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/88e1dd78-d3bc-401b-88e9-d963bac181db.json new file mode 100644 index 000000000..bfc21511e --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/88e1dd78-d3bc-401b-88e9-d963bac181db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-360M-Instruct", + "id": "HuggingFaceTB/SmolLM2-360M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.36 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/a41bd607-f319-4063-a6e4-813f43e40568.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/a41bd607-f319-4063-a6e4-813f43e40568.json new file mode 100644 index 000000000..96d89080d --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/a41bd607-f319-4063-a6e4-813f43e40568.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-360M-Instruct", + "id": "HuggingFaceTB/SmolLM2-360M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.083 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3053 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json deleted file mode 100644 index 680159fa7..000000000 --- a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M/1762652579.6442492", - "retrieved_timestamp": "1762652579.6442502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-360M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-360M", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.36 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21145227995053123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233478044302361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3954270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/8629aef1-c673-4b17-a9cc-b361a53bdaa7.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/8629aef1-c673-4b17-a9cc-b361a53bdaa7.json new file mode 100644 index 000000000..95ae8687c --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/8629aef1-c673-4b17-a9cc-b361a53bdaa7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-360M", + "id": "HuggingFaceTB/SmolLM2-360M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.36 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3233 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3954 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/532c927a-dc0c-4e65-8ab0-7b9ddd889d89.json b/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/532c927a-dc0c-4e65-8ab0-7b9ddd889d89.json new file mode 100644 index 000000000..c3eae323f --- /dev/null +++ b/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/532c927a-dc0c-4e65-8ab0-7b9ddd889d89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-LLama3-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Humanish-LLama3-8B-Instruct", + "id": "HumanLLMs/Humanish-LLama3-8B-Instruct", + "developer": "HumanLLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6498 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json b/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json deleted file mode 100644 index ca45ad7f0..000000000 --- a/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-LLama3-8B-Instruct/1762652579.6448839", - "retrieved_timestamp": "1762652579.644885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HumanLLMs/Humanish-LLama3-8B-Instruct", - "developer": "HumanLLMs", - "inference_platform": "unknown", - "id": "HumanLLMs/Humanish-LLama3-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6497903340913221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49677096627896544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35815624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/843f9927-9865-4066-9cc0-f0522d3b914f.json b/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/843f9927-9865-4066-9cc0-f0522d3b914f.json new file mode 100644 index 000000000..b2ce02227 --- /dev/null +++ b/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/843f9927-9865-4066-9cc0-f0522d3b914f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Humanish-Mistral-Nemo-Instruct-2407", + "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", + "developer": "HumanLLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5451 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json b/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json deleted file mode 100644 index 591a2b8aa..000000000 --- a/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/1762652579.6451478", - "retrieved_timestamp": "1762652579.645149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", - "developer": "HumanLLMs", - "inference_platform": "unknown", - "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5451269298793867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261780772532613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39676041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35206117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json b/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json deleted file mode 100644 index d6c9f96a7..000000000 --- a/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/1762652579.645365", - "retrieved_timestamp": "1762652579.645366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", - "developer": "HumanLLMs", - "inference_platform": "unknown", - "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7284250233824031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363681457807072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/eeecb2cb-e286-443f-84aa-d825702a4ad8.json b/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/eeecb2cb-e286-443f-84aa-d825702a4ad8.json new file mode 100644 index 000000000..899bd0d00 --- /dev/null +++ b/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/eeecb2cb-e286-443f-84aa-d825702a4ad8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Humanish-Qwen2.5-7B-Instruct", + "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", + "developer": "HumanLLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/IDEA-CCNL/Ziya-LLaMA-13B-v1/36ab4f5a-b2cf-4d01-8283-9eaf2c90928f.json b/data/hfopenllm_v2/IDEA-CCNL/Ziya-LLaMA-13B-v1/36ab4f5a-b2cf-4d01-8283-9eaf2c90928f.json new file mode 100644 index 000000000..27a4a8e2c --- /dev/null +++ b/data/hfopenllm_v2/IDEA-CCNL/Ziya-LLaMA-13B-v1/36ab4f5a-b2cf-4d01-8283-9eaf2c90928f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/IDEA-CCNL_Ziya-LLaMA-13B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ziya-LLaMA-13B-v1", + "id": "IDEA-CCNL/Ziya-LLaMA-13B-v1", + "developer": "IDEA-CCNL", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3751 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1101 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/c4e810f1-ffb3-4ece-b445-64e339761530.json b/data/hfopenllm_v2/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/c4e810f1-ffb3-4ece-b445-64e339761530.json new file mode 100644 index 000000000..225b86c3c --- /dev/null +++ b/data/hfopenllm_v2/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/c4e810f1-ffb3-4ece-b445-64e339761530.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BgGPT-Gemma-2-27B-IT-v1.0", + "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", + "developer": "INSAIT-Institute", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/IlyaGusev/gemma-2-2b-it-abliterated/025725b6-0034-48c0-a720-5fc210e5e24b.json b/data/hfopenllm_v2/IlyaGusev/gemma-2-2b-it-abliterated/025725b6-0034-48c0-a720-5fc210e5e24b.json new file mode 100644 index 000000000..1daa4500b --- /dev/null +++ b/data/hfopenllm_v2/IlyaGusev/gemma-2-2b-it-abliterated/025725b6-0034-48c0-a720-5fc210e5e24b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-2b-it-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-it-abliterated", + "id": "IlyaGusev/gemma-2-2b-it-abliterated", + "developer": "IlyaGusev", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2538 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/IlyaGusev/gemma-2-9b-it-abliterated/7bdd8928-c336-494e-9c87-de9ecc2749b8.json b/data/hfopenllm_v2/IlyaGusev/gemma-2-9b-it-abliterated/7bdd8928-c336-494e-9c87-de9ecc2749b8.json new file mode 100644 index 000000000..b601a539f --- /dev/null +++ b/data/hfopenllm_v2/IlyaGusev/gemma-2-9b-it-abliterated/7bdd8928-c336-494e-9c87-de9ecc2749b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-9b-it-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-abliterated", + "id": "IlyaGusev/gemma-2-9b-it-abliterated", + "developer": "IlyaGusev", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5906 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/ff7369dc-3ff2-424b-80b0-e06a141b54f3.json b/data/hfopenllm_v2/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/ff7369dc-3ff2-424b-80b0-e06a141b54f3.json new file mode 100644 index 000000000..dde7937ec --- /dev/null +++ b/data/hfopenllm_v2/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/ff7369dc-3ff2-424b-80b0-e06a141b54f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinirc-Llama3-8B-2G-Release-v1.0", + "id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", + "developer": "Infinirc", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4609 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.216 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json deleted file mode 100644 index e2b864619..000000000 --- a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-1/1762652579.6473012", - "retrieved_timestamp": "1762652579.647302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3-1", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3-1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686897432146704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5051565464054848 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49789583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677859042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/a6dc7253-75fd-4897-be85-8ac89fc11f8e.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/a6dc7253-75fd-4897-be85-8ac89fc11f8e.json new file mode 100644 index 000000000..c13200f68 --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/a6dc7253-75fd-4897-be85-8ac89fc11f8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neural-chat-7b-v3-1", + "id": "Intel/neural-chat-7b-v3-1", + "developer": "Intel", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4687 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/296ceacc-542a-4000-bf9b-ae59b33a53ce.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/296ceacc-542a-4000-bf9b-ae59b33a53ce.json new file mode 100644 index 000000000..050bc7fae --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/296ceacc-542a-4000-bf9b-ae59b33a53ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neural-chat-7b-v3-2", + "id": "Intel/neural-chat-7b-v3-2", + "developer": "Intel", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4988 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4895 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2667 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json deleted file mode 100644 index 93038f9da..000000000 --- a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-2/1762652579.647583", - "retrieved_timestamp": "1762652579.647584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3-2", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3-2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988397452093778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032226831964403 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48952083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26670545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json deleted file mode 100644 index 911daab6f..000000000 --- a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-3/1762652579.6477928", - "retrieved_timestamp": "1762652579.647794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3-3", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3-3", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762585495374495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48766180524289693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2624667553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/13870577-7579-48b4-9c92-202318ca6ecc.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/13870577-7579-48b4-9c92-202318ca6ecc.json new file mode 100644 index 000000000..f546afc25 --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/13870577-7579-48b4-9c92-202318ca6ecc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neural-chat-7b-v3-3", + "id": "Intel/neural-chat-7b-v3-3", + "developer": "Intel", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2625 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json deleted file mode 100644 index 22031e71d..000000000 --- a/data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3/1762652579.646828", - "retrieved_timestamp": "1762652579.6468291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27779735546128714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048316221363103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26986369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3/6ebd2806-2623-4773-93bd-1036ff01cb8c.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3/6ebd2806-2623-4773-93bd-1036ff01cb8c.json new file mode 100644 index 000000000..937a77afe --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3/6ebd2806-2623-4773-93bd-1036ff01cb8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neural-chat-7b-v3", + "id": "Intel/neural-chat-7b-v3", + "developer": "Intel", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5055 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2699 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/IntervitensInc/internlm2_5-20b-llamafied/99d6a44b-d556-4674-8ade-a5b30cf99255.json b/data/hfopenllm_v2/IntervitensInc/internlm2_5-20b-llamafied/99d6a44b-d556-4674-8ade-a5b30cf99255.json new file mode 100644 index 000000000..9f01570fc --- /dev/null +++ b/data/hfopenllm_v2/IntervitensInc/internlm2_5-20b-llamafied/99d6a44b-d556-4674-8ade-a5b30cf99255.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/IntervitensInc_internlm2_5-20b-llamafied/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2_5-20b-llamafied", + "id": "IntervitensInc/internlm2_5-20b-llamafied", + "developer": "IntervitensInc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 19.861 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1715 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4051 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/605118a3-316a-46b5-9719-f596e361a2a8.json b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/605118a3-316a-46b5-9719-f596e361a2a8.json new file mode 100644 index 000000000..cb284ea11 --- /dev/null +++ b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/605118a3-316a-46b5-9719-f596e361a2a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PeiYangMe-0.5", + "id": "Invalid-Null/PeiYangMe-0.5", + "developer": "Invalid-Null", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2791 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json deleted file mode 100644 index bfbc81d92..000000000 --- a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.5/1762652579.648252", - "retrieved_timestamp": "1762652579.648252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Invalid-Null/PeiYangMe-0.5", - "developer": "Invalid-Null", - "inference_platform": "unknown", - "id": "Invalid-Null/PeiYangMe-0.5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14088507382633633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27907748194216614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/271d2829-fbd4-438e-9f09-59539af68c8b.json b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/271d2829-fbd4-438e-9f09-59539af68c8b.json new file mode 100644 index 000000000..b5d375fc5 --- /dev/null +++ b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/271d2829-fbd4-438e-9f09-59539af68c8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PeiYangMe-0.7", + "id": "Invalid-Null/PeiYangMe-0.7", + "developer": "Invalid-Null", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1491 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2332 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1101 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json deleted file mode 100644 index befc1198e..000000000 --- a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.7/1762652579.648521", - "retrieved_timestamp": "1762652579.648522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Invalid-Null/PeiYangMe-0.7", - "developer": "Invalid-Null", - "inference_platform": "unknown", - "id": "Invalid-Null/PeiYangMe-0.7", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1491032682172192 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30275310145886614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38571874999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/107bc549-75c1-4272-b567-f8ab9f6cd675.json b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/107bc549-75c1-4272-b567-f8ab9f6cd675.json new file mode 100644 index 000000000..ffdfacf77 --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/107bc549-75c1-4272-b567-f8ab9f6cd675.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JOSIEv4o-8b-stage1-v4", + "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4758 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3292 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/dfb451e9-c1c1-45a1-8082-155763366129.json b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/dfb451e9-c1c1-45a1-8082-155763366129.json new file mode 100644 index 000000000..9fc986679 --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/dfb451e9-c1c1-45a1-8082-155763366129.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JOSIEv4o-8b-stage1-v4", + "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2553 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3654 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json deleted file mode 100644 index fe9728949..000000000 --- a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.648735", - "retrieved_timestamp": "1762652579.648736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2552660274737696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4724973116620121 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3654375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3316156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json deleted file mode 100644 index feafa4522..000000000 --- a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.6489909", - "retrieved_timestamp": "1762652579.648992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2476972211509905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4758066295235124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32920545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json deleted file mode 100644 index 18e99d617..000000000 --- a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.649409", - "retrieved_timestamp": "1762652579.64941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7841039552830933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5310923599182072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/b2d80977-d079-42ec-b057-5aac530b9d70.json b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/b2d80977-d079-42ec-b057-5aac530b9d70.json new file mode 100644 index 000000000..839f443ca --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/b2d80977-d079-42ec-b057-5aac530b9d70.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7841 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5311 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/16b33b80-3b4b-4edb-b89f-3d93dca8969c.json b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/16b33b80-3b4b-4edb-b89f-3d93dca8969c.json new file mode 100644 index 000000000..7fb690341 --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/16b33b80-3b4b-4edb-b89f-3d93dca8969c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated", + "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7317 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4276 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json deleted file mode 100644 index 21e509a95..000000000 --- a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/1762652579.6491818", - "retrieved_timestamp": "1762652579.649183", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7317473193349202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396376284460921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json b/data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json deleted file mode 100644 index 36962f3e7..000000000 --- a/data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/J-LAB_Thynk_orpo/1762652579.649622", - "retrieved_timestamp": "1762652579.6496232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "J-LAB/Thynk_orpo", - "developer": "J-LAB", - "inference_platform": "unknown", - "id": "J-LAB/Thynk_orpo", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21017788357114678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44631138778709606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45147916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32313829787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/J-LAB/Thynk_orpo/63c94e0a-4572-4b8a-bfe0-7f88bb847d7f.json b/data/hfopenllm_v2/J-LAB/Thynk_orpo/63c94e0a-4572-4b8a-bfe0-7f88bb847d7f.json new file mode 100644 index 000000000..9f7d195c4 --- /dev/null +++ b/data/hfopenllm_v2/J-LAB/Thynk_orpo/63c94e0a-4572-4b8a-bfe0-7f88bb847d7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/J-LAB_Thynk_orpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Thynk_orpo", + "id": "J-LAB/Thynk_orpo", + "developer": "J-LAB", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2102 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4463 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3231 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JackFram/llama-160m/538f2b43-328c-456d-8a40-ff2b37924453.json b/data/hfopenllm_v2/JackFram/llama-160m/538f2b43-328c-456d-8a40-ff2b37924453.json new file mode 100644 index 000000000..94fe29570 --- /dev/null +++ b/data/hfopenllm_v2/JackFram/llama-160m/538f2b43-328c-456d-8a40-ff2b37924453.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JackFram_llama-160m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-160m", + "id": "JackFram/llama-160m", + "developer": "JackFram", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.162 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2888 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JackFram/llama-68m/fb7a68e6-716e-48c6-96c0-d227735f9a7c.json b/data/hfopenllm_v2/JackFram/llama-68m/fb7a68e6-716e-48c6-96c0-d227735f9a7c.json new file mode 100644 index 000000000..620583267 --- /dev/null +++ b/data/hfopenllm_v2/JackFram/llama-68m/fb7a68e6-716e-48c6-96c0-d227735f9a7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JackFram_llama-68m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-68m", + "id": "JackFram/llama-68m", + "developer": "JackFram", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.068 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1726 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json b/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json deleted file mode 100644 index 8b6b45456..000000000 --- a/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Casual-Magnum-34B/1762652579.65033", - "retrieved_timestamp": "1762652579.6503308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Casual-Magnum-34B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Casual-Magnum-34B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19301675110927893 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6032046880542974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/3593d4b8-5602-4cca-935f-a76e342f060a.json b/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/3593d4b8-5602-4cca-935f-a76e342f060a.json new file mode 100644 index 000000000..b0ceabd5f --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/3593d4b8-5602-4cca-935f-a76e342f060a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Casual-Magnum-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Casual-Magnum-34B", + "id": "Jacoby746/Casual-Magnum-34B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.193 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/72d503fc-b221-498e-811a-a806769175d6.json b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/72d503fc-b221-498e-811a-a806769175d6.json new file mode 100644 index 000000000..eeb08788f --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/72d503fc-b221-498e-811a-a806769175d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Inf-Silent-Kunoichi-v0.1-2x7B", + "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3271 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json deleted file mode 100644 index d704fb134..000000000 --- a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/1762652579.6505952", - "retrieved_timestamp": "1762652579.6505961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38798166642286913 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518546209727402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/ad7d9698-d9e6-4f2d-9767-987835626c8c.json b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/ad7d9698-d9e6-4f2d-9767-987835626c8c.json new file mode 100644 index 000000000..1870540b9 --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/ad7d9698-d9e6-4f2d-9767-987835626c8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Inf-Silent-Kunoichi-v0.2-2x7B", + "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json deleted file mode 100644 index 72ab2175a..000000000 --- a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/1762652579.650832", - "retrieved_timestamp": "1762652579.650833", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3636019095998617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209417299963208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32721077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json b/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json deleted file mode 100644 index 0d1301887..000000000 --- a/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-4x7B/1762652579.651071", - "retrieved_timestamp": "1762652579.651072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Proto-Athena-4x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Athena-4x7B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37029636918930664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5106547638742905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43477083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32064494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/98899942-fcf0-41de-8587-44d7429bea47.json b/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/98899942-fcf0-41de-8587-44d7429bea47.json new file mode 100644 index 000000000..290b886ee --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/98899942-fcf0-41de-8587-44d7429bea47.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-4x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Proto-Athena-4x7B", + "id": "Jacoby746/Proto-Athena-4x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3703 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4348 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json b/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json deleted file mode 100644 index de17cdf46..000000000 --- a/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-v0.2-4x7B/1762652579.651291", - "retrieved_timestamp": "1762652579.6512918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Proto-Athena-v0.2-4x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Athena-v0.2-4x7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37524213531208306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5067731005424964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/bb51eb59-88f6-49c2-814a-11b2c80313d0.json b/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/bb51eb59-88f6-49c2-814a-11b2c80313d0.json new file mode 100644 index 000000000..93635570d --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/bb51eb59-88f6-49c2-814a-11b2c80313d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-v0.2-4x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Proto-Athena-v0.2-4x7B", + "id": "Jacoby746/Proto-Athena-v0.2-4x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5068 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4213 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/d8563f36-e299-4186-a5dc-9dae51824e1f.json b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/d8563f36-e299-4186-a5dc-9dae51824e1f.json new file mode 100644 index 000000000..38b06bee1 --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/d8563f36-e299-4186-a5dc-9dae51824e1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Proto-Harpy-Blazing-Light-v0.1-2x7B", + "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json deleted file mode 100644 index fffa52b34..000000000 --- a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/1762652579.651509", - "retrieved_timestamp": "1762652579.65151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4904719477652628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186849053052595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44496874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33011968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json deleted file mode 100644 index b33085744..000000000 --- a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Spark-v0.1-7B/1762652579.651721", - "retrieved_timestamp": "1762652579.651722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43326928106313467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735771808296548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43166666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30693151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/43bc0528-7bc5-4eac-8848-c9995079450f.json b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/43bc0528-7bc5-4eac-8848-c9995079450f.json new file mode 100644 index 000000000..220166198 --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/43bc0528-7bc5-4eac-8848-c9995079450f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Spark-v0.1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Proto-Harpy-Spark-v0.1-7B", + "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3069 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json deleted file mode 100644 index 70278daab..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-1epoch/1762652579.651926", - "retrieved_timestamp": "1762652579.651926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-DPO-1epoch", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-DPO-1epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26473313031644924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31907502434278595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33517708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15575132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/ce19893b-a7e1-4f8e-96f2-eb9cee2afeac.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/ce19893b-a7e1-4f8e-96f2-eb9cee2afeac.json new file mode 100644 index 000000000..b932687e9 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/ce19893b-a7e1-4f8e-96f2-eb9cee2afeac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-1epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-0.5B-DPO-1epoch", + "id": "JayHyeon/Qwen-0.5B-DPO-1epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2647 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1558 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/24629e14-d197-4a5b-adff-7840af652f22.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/24629e14-d197-4a5b-adff-7840af652f22.json new file mode 100644 index 000000000..ab62482c6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/24629e14-d197-4a5b-adff-7840af652f22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-5epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-0.5B-DPO-5epoch", + "id": "JayHyeon/Qwen-0.5B-DPO-5epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.257 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json deleted file mode 100644 index 1e9f04e21..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-5epoch/1762652579.65218", - "retrieved_timestamp": "1762652579.652181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-DPO-5epoch", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-DPO-5epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25701472094043804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3112109544868782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15325797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-1epoch/9c3ea35c-2cf7-4c31-8b83-c69df3cd9448.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-1epoch/9c3ea35c-2cf7-4c31-8b83-c69df3cd9448.json new file mode 100644 index 000000000..bd901d9fc --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-1epoch/9c3ea35c-2cf7-4c31-8b83-c69df3cd9448.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-1epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-0.5B-IRPO-1epoch", + "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2589 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3286 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.15 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-5epoch/46548403-6eb5-4f7a-874c-1327420f4cab.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-5epoch/46548403-6eb5-4f7a-874c-1327420f4cab.json new file mode 100644 index 000000000..296d7cd98 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-IRPO-5epoch/46548403-6eb5-4f7a-874c-1327420f4cab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-5epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-0.5B-IRPO-5epoch", + "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-1epoch/0bd9c061-b7ee-4bc2-9deb-ea7eea012c49.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-1epoch/0bd9c061-b7ee-4bc2-9deb-ea7eea012c49.json new file mode 100644 index 000000000..e9415599f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-1epoch/0bd9c061-b7ee-4bc2-9deb-ea7eea012c49.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-1epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-0.5B-eDPO-1epoch", + "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2623 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1553 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-5epoch/aa2fe858-111c-45e8-b0d4-0048d7fc7ef7.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-5epoch/aa2fe858-111c-45e8-b0d4-0048d7fc7ef7.json new file mode 100644 index 000000000..c6919cc60 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-eDPO-5epoch/aa2fe858-111c-45e8-b0d4-0048d7fc7ef7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-5epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-0.5B-eDPO-5epoch", + "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1523 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json deleted file mode 100644 index 11512db01..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/1762652579.653574", - "retrieved_timestamp": "1762652579.653575", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24687274210206694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3260313037664168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06495468277945618 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34336458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/ad03cae6-b126-4157-a225-9576e4d651d0.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/ad03cae6-b126-4157-a225-9576e4d651d0.json new file mode 100644 index 000000000..4ad70853d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/ad03cae6-b126-4157-a225-9576e4d651d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/0d57b65d-3dd4-4185-b8cf-531105e94b5e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/0d57b65d-3dd4-4185-b8cf-531105e94b5e.json new file mode 100644 index 000000000..b71ae80d9 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/0d57b65d-3dd4-4185-b8cf-531105e94b5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json deleted file mode 100644 index 63c28a489..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1762652579.6538298", - "retrieved_timestamp": "1762652579.6538298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2605863553150086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3308028437367363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16256648936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json deleted file mode 100644 index 935dc82d1..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/1762652579.654063", - "retrieved_timestamp": "1762652579.6540642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529178136234081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261949089625076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15757978723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/f8882044-6e71-4788-b2ee-f51f85e67ecc.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/f8882044-6e71-4788-b2ee-f51f85e67ecc.json new file mode 100644 index 000000000..f890e44af --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/f8882044-6e71-4788-b2ee-f51f85e67ecc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1576 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/3c8f96c5-af91-4f41-a0b4-6e1b7d55d8ad.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/3c8f96c5-af91-4f41-a0b4-6e1b7d55d8ad.json new file mode 100644 index 000000000..4ed5a1209 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/3c8f96c5-af91-4f41-a0b4-6e1b7d55d8ad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct-SFT", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json deleted file mode 100644 index edb97838f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/1762652579.65331", - "retrieved_timestamp": "1762652579.653311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27677340567472086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3253697801563151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15201130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json deleted file mode 100644 index 6547dea6a..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/1762652579.654743", - "retrieved_timestamp": "1762652579.6547441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2140498322229462 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172227797719337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34727083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15367353723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/e26743b9-4caf-46f8-bd5a-7e4445c850b1.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/e26743b9-4caf-46f8-bd5a-7e4445c850b1.json new file mode 100644 index 000000000..a9d17a4e6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/e26743b9-4caf-46f8-bd5a-7e4445c850b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-4-2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json deleted file mode 100644 index 15e42cb05..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/1762652579.6549618", - "retrieved_timestamp": "1762652579.654963", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22573992561957826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3064261556890236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36606249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15317486702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/febd4016-3a30-4b26-93e5-f7b556781b9b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/febd4016-3a30-4b26-93e5-f7b556781b9b.json new file mode 100644 index 000000000..24b629e3d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/febd4016-3a30-4b26-93e5-f7b556781b9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-4-3ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2257 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1532 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json deleted file mode 100644 index 806a27ea5..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/1762652579.655172", - "retrieved_timestamp": "1762652579.655173", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19868726091215752 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31044747322019184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15575132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/ae82125e-94ac-48ca-8240-807e4b7ef9a0.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/ae82125e-94ac-48ca-8240-807e4b7ef9a0.json new file mode 100644 index 000000000..3a2866ea3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/ae82125e-94ac-48ca-8240-807e4b7ef9a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-4-5ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1987 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1558 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/5321fa0b-b010-4e1d-9f20-a97b56f4f937.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/5321fa0b-b010-4e1d-9f20-a97b56f4f937.json new file mode 100644 index 000000000..844a39283 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/5321fa0b-b010-4e1d-9f20-a97b56f4f937.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-4", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.202 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1619 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json deleted file mode 100644 index 32479964c..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/1762652579.654527", - "retrieved_timestamp": "1762652579.654527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2019596891802639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017092819749249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16190159574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/d25a4602-ea50-4a53-952c-112ba250123b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/d25a4602-ea50-4a53-952c-112ba250123b.json new file mode 100644 index 000000000..4799c0aa3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/d25a4602-ea50-4a53-952c-112ba250123b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-5-2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json deleted file mode 100644 index 29bc0faca..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/1762652579.655605", - "retrieved_timestamp": "1762652579.655606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19706379074189817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224699194774388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1651429521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/232e3fc4-5cd2-4515-9e15-acd7d56bc34d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/232e3fc4-5cd2-4515-9e15-acd7d56bc34d.json new file mode 100644 index 000000000..3305c120d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/232e3fc4-5cd2-4515-9e15-acd7d56bc34d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-5-3ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2241 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1689 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json deleted file mode 100644 index 090d07e57..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/1762652579.655815", - "retrieved_timestamp": "1762652579.6558158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2241164554493189 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32468117082421427 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16888297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/975f54fe-a581-4ce1-b0c1-7becb7605f09.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/975f54fe-a581-4ce1-b0c1-7becb7605f09.json new file mode 100644 index 000000000..925baa726 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/975f54fe-a581-4ce1-b0c1-7becb7605f09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-5-5ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3259 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1688 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json deleted file mode 100644 index e5fdc6a26..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/1762652579.656047", - "retrieved_timestamp": "1762652579.656048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22918744486850445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259343389530942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3235208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16879986702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json deleted file mode 100644 index d8c67b732..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/1762652579.6553931", - "retrieved_timestamp": "1762652579.655394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1985875255433361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139860294769257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1697972074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/92ae4461-48bc-47fe-a3ad-ea4c3452d395.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/92ae4461-48bc-47fe-a3ad-ea4c3452d395.json new file mode 100644 index 000000000..36e1434b4 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/92ae4461-48bc-47fe-a3ad-ea4c3452d395.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-1e-5", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1698 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/638e1cc0-9baf-4555-a278-4b21c46af86f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/638e1cc0-9baf-4555-a278-4b21c46af86f.json new file mode 100644 index 000000000..b6a076686 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/638e1cc0-9baf-4555-a278-4b21c46af86f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-4-2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1831 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1484 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json deleted file mode 100644 index e77a176e1..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/1762652579.656457", - "retrieved_timestamp": "1762652579.656457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18307535117931534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29839616748934167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1484375 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/cef4161a-4e1c-4a92-bca8-b07f957a13b1.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/cef4161a-4e1c-4a92-bca8-b07f957a13b1.json new file mode 100644 index 000000000..ae6218e48 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/cef4161a-4e1c-4a92-bca8-b07f957a13b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-4-3ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.311 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1416 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json deleted file mode 100644 index 601fc4bac..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/1762652579.656671", - "retrieved_timestamp": "1762652579.656672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1989620872617987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3109875129533253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14162234042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/715b556b-2bc0-4864-b4b1-b7413a5d45bc.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/715b556b-2bc0-4864-b4b1-b7413a5d45bc.json new file mode 100644 index 000000000..e5bad0533 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/715b556b-2bc0-4864-b4b1-b7413a5d45bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-4-5ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1897 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1336 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json deleted file mode 100644 index 47e8f5d6a..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/1762652579.656877", - "retrieved_timestamp": "1762652579.656878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18971994308434953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936418449815176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38739583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13364361702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json deleted file mode 100644 index 0ec9bf3ad..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/1762652579.656255", - "retrieved_timestamp": "1762652579.656256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2034335562972912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935549587263229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14128989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/7552ad5c-5d1f-478b-a931-036083b2954e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/7552ad5c-5d1f-478b-a931-036083b2954e.json new file mode 100644 index 000000000..43c9ef63c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/7552ad5c-5d1f-478b-a931-036083b2954e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-4", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2034 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json deleted file mode 100644 index 1fc7263b5..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/1762652579.6580968", - "retrieved_timestamp": "1762652579.658098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24105262924595627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671815484837784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/7bb3ae9f-9bb3-4bf2-9d97-d7f4f30697ac.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/7bb3ae9f-9bb3-4bf2-9d97-d7f4f30697ac.json new file mode 100644 index 000000000..a44e27ca9 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/7bb3ae9f-9bb3-4bf2-9d97-d7f4f30697ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2411 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/821d67e5-da8d-4383-8825-3bfa72a91fc9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/821d67e5-da8d-4383-8825-3bfa72a91fc9.json new file mode 100644 index 000000000..def874bd2 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/821d67e5-da8d-4383-8825-3bfa72a91fc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2369 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json deleted file mode 100644 index 3cc76e849..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/1762652579.658342", - "retrieved_timestamp": "1762652579.6583428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23685598656010498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3260038632940968 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3355208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15699800531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json deleted file mode 100644 index d6e0d49b4..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/1762652579.6585789", - "retrieved_timestamp": "1762652579.65858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22623971063444992 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261540051256346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15408909574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/c5bddcba-4a40-4fbb-93e8-aebd06a70a66.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/c5bddcba-4a40-4fbb-93e8-aebd06a70a66.json new file mode 100644 index 000000000..48999ddc5 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/c5bddcba-4a40-4fbb-93e8-aebd06a70a66.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2262 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json deleted file mode 100644 index 1907f43d4..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/1762652579.6588218", - "retrieved_timestamp": "1762652579.658823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25079455843827714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199331515135054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33545833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15550199468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/dc35237c-606d-4609-927a-566bea767312.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/dc35237c-606d-4609-927a-566bea767312.json new file mode 100644 index 000000000..4dd0076c1 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/dc35237c-606d-4609-927a-566bea767312.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1555 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/3924d1af-e167-4186-a34b-d9b4b8c26d59.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/3924d1af-e167-4186-a34b-d9b4b8c26d59.json new file mode 100644 index 000000000..a8a95addb --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/3924d1af-e167-4186-a34b-d9b4b8c26d59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.239 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.156 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json deleted file mode 100644 index 7f7eb7dfd..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/1762652579.6591082", - "retrieved_timestamp": "1762652579.659109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.238979241745236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31816042712158116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15600066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json deleted file mode 100644 index 49484ee2f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/1762652579.6593359", - "retrieved_timestamp": "1762652579.659337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2423015376977531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3154080373582542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15475398936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/f733c4cc-90fc-4b31-bed3-c57dba6d4b6a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/f733c4cc-90fc-4b31-bed3-c57dba6d4b6a.json new file mode 100644 index 000000000..8e804555f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/f733c4cc-90fc-4b31-bed3-c57dba6d4b6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/08f933a0-b096-4271-890e-0df7e20d1d20.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/08f933a0-b096-4271-890e-0df7e20d1d20.json new file mode 100644 index 000000000..254d8727c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/08f933a0-b096-4271-890e-0df7e20d1d20.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2493 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json deleted file mode 100644 index abf56d19c..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/1762652579.6595562", - "retrieved_timestamp": "1762652579.659557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24932069132124984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3189717077702392 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/8434e448-ed77-45f2-9c31-39128912f842.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/8434e448-ed77-45f2-9c31-39128912f842.json new file mode 100644 index 000000000..2af5272ab --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/8434e448-ed77-45f2-9c31-39128912f842.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json deleted file mode 100644 index e2637bf90..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/1762652579.660001", - "retrieved_timestamp": "1762652579.660005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2541667220752049 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671883869615397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json deleted file mode 100644 index 0eaa14394..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/1762652579.660342", - "retrieved_timestamp": "1762652579.660342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24507418095098782 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159533058861391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/d801037b-1eb0-4058-9096-429e5237e015.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/d801037b-1eb0-4058-9096-429e5237e015.json new file mode 100644 index 000000000..b5090ee5f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/d801037b-1eb0-4058-9096-429e5237e015.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2451 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json deleted file mode 100644 index 416025aef..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/1762652579.6605709", - "retrieved_timestamp": "1762652579.6605718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25574032456105356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419826948787827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3315208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/e0c46f18-598e-402f-8955-68e71fab67cd.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/e0c46f18-598e-402f-8955-68e71fab67cd.json new file mode 100644 index 000000000..eb8631855 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/e0c46f18-598e-402f-8955-68e71fab67cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2557 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/4b987cb5-cf7c-4866-8cf0-9926f78c2de9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/4b987cb5-cf7c-4866-8cf0-9926f78c2de9.json new file mode 100644 index 000000000..a7ae82558 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/4b987cb5-cf7c-4866-8cf0-9926f78c2de9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1577 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json deleted file mode 100644 index c7fdd7a02..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/1762652579.660821", - "retrieved_timestamp": "1762652579.660822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26053648763059795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166968072745491 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15766289893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json deleted file mode 100644 index 969f44cb8..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/1762652579.661046", - "retrieved_timestamp": "1762652579.661047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25781371206177384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31732037273750646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1583277925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/ec658058-1075-4918-9dc9-fc79d0dcf897.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/ec658058-1075-4918-9dc9-fc79d0dcf897.json new file mode 100644 index 000000000..090ea8c8f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/ec658058-1075-4918-9dc9-fc79d0dcf897.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1583 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json deleted file mode 100644 index 50046b7de..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/1762652579.661258", - "retrieved_timestamp": "1762652579.661259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23353369060758786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197619098572027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348994 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32755208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1580784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/b68baa86-3e1a-4888-98ba-2ecede79b4a7.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/b68baa86-3e1a-4888-98ba-2ecede79b4a7.json new file mode 100644 index 000000000..0b37ef072 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/b68baa86-3e1a-4888-98ba-2ecede79b4a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1581 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json deleted file mode 100644 index 540185e08..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/1762652579.6614761", - "retrieved_timestamp": "1762652579.6614761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24719743613611883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32262707839652854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15375664893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/0b11c8ab-2cfa-425d-9d81-d999f94401db.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/0b11c8ab-2cfa-425d-9d81-d999f94401db.json new file mode 100644 index 000000000..8cc0efa5c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/0b11c8ab-2cfa-425d-9d81-d999f94401db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2472 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3226 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1538 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json deleted file mode 100644 index f41caa9b0..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/1762652579.661691", - "retrieved_timestamp": "1762652579.661692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2474223948013493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32291208173140107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32748958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15392287234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/a3e48db8-3679-4f19-853d-82a73ef49400.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/a3e48db8-3679-4f19-853d-82a73ef49400.json new file mode 100644 index 000000000..9df394a48 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/a3e48db8-3679-4f19-853d-82a73ef49400.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2474 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3229 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1539 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json deleted file mode 100644 index 8a9a726a0..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/1762652579.6619039", - "retrieved_timestamp": "1762652579.6619048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24027801788144343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32453683161596314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1573304521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/7dbf35b2-80c1-4181-80f9-850ea51cead2.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/7dbf35b2-80c1-4181-80f9-850ea51cead2.json new file mode 100644 index 000000000..839bdc602 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/7dbf35b2-80c1-4181-80f9-850ea51cead2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3245 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1573 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/231f47db-1662-4313-9ff4-f32883f5615c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/231f47db-1662-4313-9ff4-f32883f5615c.json new file mode 100644 index 000000000..33c170571 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/231f47db-1662-4313-9ff4-f32883f5615c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2368 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1516 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json deleted file mode 100644 index ba646184f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/1762652579.662116", - "retrieved_timestamp": "1762652579.662117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23680611887569425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224293761524927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33548958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15159574468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/c79df898-14c6-4f00-9f65-0d01cd34ed61.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/c79df898-14c6-4f00-9f65-0d01cd34ed61.json new file mode 100644 index 000000000..f02cc4dee --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/c79df898-14c6-4f00-9f65-0d01cd34ed61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2372 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.155 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json deleted file mode 100644 index 9b38546da..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/1762652579.662327", - "retrieved_timestamp": "1762652579.662327", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23718068059415687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32477052921998556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3394270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1550033244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/2c52917f-c396-410d-bc78-c93c433797fc.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/2c52917f-c396-410d-bc78-c93c433797fc.json new file mode 100644 index 000000000..1ff95edd2 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/2c52917f-c396-410d-bc78-c93c433797fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2499 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json deleted file mode 100644 index 7141e0100..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/1762652579.6625469", - "retrieved_timestamp": "1762652579.662548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24992021170494289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806007750183346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/0f1d2925-4e1c-495b-94be-f3515fbd53d7.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/0f1d2925-4e1c-495b-94be-f3515fbd53d7.json new file mode 100644 index 000000000..69688309d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/0f1d2925-4e1c-495b-94be-f3515fbd53d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3242 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json deleted file mode 100644 index a12b3bcc1..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/1762652579.662755", - "retrieved_timestamp": "1762652579.662755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23810489501190177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32421844512358233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/5cbb1972-9895-4689-9f6f-7e0037829a78.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/5cbb1972-9895-4689-9f6f-7e0037829a78.json new file mode 100644 index 000000000..98d643888 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/5cbb1972-9895-4689-9f6f-7e0037829a78.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2421 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1496 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json deleted file mode 100644 index 7f7cd2fd9..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/1762652579.662969", - "retrieved_timestamp": "1762652579.662969", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2420765790325226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224798177796032 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14960106382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/6bc42e37-1f31-47cb-97e4-9d0b28b53691.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/6bc42e37-1f31-47cb-97e4-9d0b28b53691.json new file mode 100644 index 000000000..146ac953b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/6bc42e37-1f31-47cb-97e4-9d0b28b53691.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1499 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json deleted file mode 100644 index e55a0ccf9..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/1762652579.663178", - "retrieved_timestamp": "1762652579.663179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23805502732749106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32652003776870003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14985039893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/a1573b95-59e6-4ae0-bc12-6ef6fee90b76.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/a1573b95-59e6-4ae0-bc12-6ef6fee90b76.json new file mode 100644 index 000000000..23710b611 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/a1573b95-59e6-4ae0-bc12-6ef6fee90b76.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2526 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json deleted file mode 100644 index 636fa8a42..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/1762652579.663386", - "retrieved_timestamp": "1762652579.663386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25264298727376694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176911636441555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/78c61b39-3c76-4af9-8d5e-fcd67d6c8779.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/78c61b39-3c76-4af9-8d5e-fcd67d6c8779.json new file mode 100644 index 000000000..f02becf07 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/78c61b39-3c76-4af9-8d5e-fcd67d6c8779.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json deleted file mode 100644 index 07a2dbc78..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/1762652579.6636329", - "retrieved_timestamp": "1762652579.6636338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24572356901909154 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316045450978746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15716422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json deleted file mode 100644 index b46668cc9..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/1762652579.663875", - "retrieved_timestamp": "1762652579.663876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2441998342176536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3193544697854515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/e4c06400-da86-4448-b421-23476f50bdb3.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/e4c06400-da86-4448-b421-23476f50bdb3.json new file mode 100644 index 000000000..77c01e879 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/e4c06400-da86-4448-b421-23476f50bdb3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2442 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json deleted file mode 100644 index efcfa87db..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/1762652579.6641018", - "retrieved_timestamp": "1762652579.664103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26036139664977814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31784656431310543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15674867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/48f4c2a7-e819-4789-92ea-e02c5e92d3e4.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/48f4c2a7-e819-4789-92ea-e02c5e92d3e4.json new file mode 100644 index 000000000..4b4f5231f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/48f4c2a7-e819-4789-92ea-e02c5e92d3e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json deleted file mode 100644 index 9b64379e3..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/1762652579.664332", - "retrieved_timestamp": "1762652579.664333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24899599728719796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172899997448431 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15691489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/cd9cbbac-f1ca-4193-88cc-e5968cc1bb62.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/cd9cbbac-f1ca-4193-88cc-e5968cc1bb62.json new file mode 100644 index 000000000..d5f2c4e6c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/cd9cbbac-f1ca-4193-88cc-e5968cc1bb62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1569 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json deleted file mode 100644 index 7b3ff3da1..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/1762652579.6645608", - "retrieved_timestamp": "1762652579.664562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26036139664977814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149566664115098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15658244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/ab3685ab-1795-4a0e-8ee4-4f509616d1b8.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/ab3685ab-1795-4a0e-8ee4-4f509616d1b8.json new file mode 100644 index 000000000..03d43d960 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/ab3685ab-1795-4a0e-8ee4-4f509616d1b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/9018f443-a63f-4e07-b10b-272f66d1eb0d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/9018f443-a63f-4e07-b10b-272f66d1eb0d.json new file mode 100644 index 000000000..b053ce369 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/9018f443-a63f-4e07-b10b-272f66d1eb0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3211 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json deleted file mode 100644 index 58d94a45e..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/1762652579.664829", - "retrieved_timestamp": "1762652579.66483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550410688085391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3211026993947845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32876041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15708111702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/548d1536-b941-43a9-a60b-ae5448b70933.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/548d1536-b941-43a9-a60b-ae5448b70933.json new file mode 100644 index 000000000..85329973f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/548d1536-b941-43a9-a60b-ae5448b70933.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json deleted file mode 100644 index 06d592d1d..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/1762652579.665046", - "retrieved_timestamp": "1762652579.665047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24779695651981187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197773660515741 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33145833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/99853109-17d9-46fa-a502-e4c977c1fb8f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/99853109-17d9-46fa-a502-e4c977c1fb8f.json new file mode 100644 index 000000000..76daaf914 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/99853109-17d9-46fa-a502-e4c977c1fb8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json deleted file mode 100644 index ac256ac27..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/1762652579.6652648", - "retrieved_timestamp": "1762652579.665266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24747226248576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32246983072126806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json deleted file mode 100644 index 62be69b4f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/1762652579.665471", - "retrieved_timestamp": "1762652579.665472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590127528291599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3185132309797721 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15857712765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/e171a0a0-f46d-404f-84e8-539155284e17.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/e171a0a0-f46d-404f-84e8-539155284e17.json new file mode 100644 index 000000000..b9590f6bc --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/e171a0a0-f46d-404f-84e8-539155284e17.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.259 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json deleted file mode 100644 index 6a2de2cfc..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/1762652579.665683", - "retrieved_timestamp": "1762652579.665684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23233464984020177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179474145066817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15475398936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/eadd93e5-5770-4d4a-a1b2-6e732a82ce34.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/eadd93e5-5770-4d4a-a1b2-6e732a82ce34.json new file mode 100644 index 000000000..70f5c52d6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/eadd93e5-5770-4d4a-a1b2-6e732a82ce34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3179 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/151cb8c4-0a7d-4886-80ea-560902e1f932.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/151cb8c4-0a7d-4886-80ea-560902e1f932.json new file mode 100644 index 000000000..6d9e81842 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/151cb8c4-0a7d-4886-80ea-560902e1f932.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2315 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json deleted file mode 100644 index ba56ec10d..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/1762652579.665889", - "retrieved_timestamp": "1762652579.66589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23151017079127825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259705145690442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3383125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15209441489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1acb97c4-a9d2-4ec8-9486-77eb6857646c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1acb97c4-a9d2-4ec8-9486-77eb6857646c.json new file mode 100644 index 000000000..95db25ea7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1acb97c4-a9d2-4ec8-9486-77eb6857646c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2298 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json deleted file mode 100644 index 2699b5ad6..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1762652579.666097", - "retrieved_timestamp": "1762652579.6660979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2298368329366082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33204616486918276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33288541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15674867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json deleted file mode 100644 index 147f05fc4..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1762652579.666312", - "retrieved_timestamp": "1762652579.666313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24687274210206694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178544697854515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1d803ac5-3ca6-4cb0-bcd1-779eaea1562d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1d803ac5-3ca6-4cb0-bcd1-779eaea1562d.json new file mode 100644 index 000000000..5f441e414 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1d803ac5-3ca6-4cb0-bcd1-779eaea1562d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3179 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/81562e50-23c5-4ef1-b98c-b40625f3b8c6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/81562e50-23c5-4ef1-b98c-b40625f3b8c6.json new file mode 100644 index 000000000..1cd8d7c85 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/81562e50-23c5-4ef1-b98c-b40625f3b8c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.252 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1576 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json deleted file mode 100644 index db59c93bb..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/1762652579.666527", - "retrieved_timestamp": "1762652579.666527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2520434668900739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3167822100533442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15757978723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json deleted file mode 100644 index a7f8bcab3..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/1762652579.6667368", - "retrieved_timestamp": "1762652579.666738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2665815591519391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3190675981811982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/95fa292a-ee64-4844-9646-ce3cc7f730d2.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/95fa292a-ee64-4844-9646-ce3cc7f730d2.json new file mode 100644 index 000000000..361a9000e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/95fa292a-ee64-4844-9646-ce3cc7f730d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2666 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/4d14c584-b5a1-41cd-9605-78088dfebd7f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/4d14c584-b5a1-41cd-9605-78088dfebd7f.json new file mode 100644 index 000000000..a144afa1e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/4d14c584-b5a1-41cd-9605-78088dfebd7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2499 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json deleted file mode 100644 index 0264dafd4..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/1762652579.667231", - "retrieved_timestamp": "1762652579.667236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24992021170494289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31779941873624934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json deleted file mode 100644 index 35beb5e61..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1762652579.6676302", - "retrieved_timestamp": "1762652579.6676311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24170201731406002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178391594145879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1415d3d9-d7f8-48ef-8a2f-aa675c4c14db.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1415d3d9-d7f8-48ef-8a2f-aa675c4c14db.json new file mode 100644 index 000000000..fa1682646 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1415d3d9-d7f8-48ef-8a2f-aa675c4c14db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2417 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/4b0ab369-e72f-4229-b449-3a21ee9d2c95.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/4b0ab369-e72f-4229-b449-3a21ee9d2c95.json new file mode 100644 index 000000000..cbb23bb4c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/4b0ab369-e72f-4229-b449-3a21ee9d2c95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2562 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1576 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json deleted file mode 100644 index 5fa90df9f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/1762652579.66787", - "retrieved_timestamp": "1762652579.667871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2562401095759252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31904280434381205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15757978723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/478b6c1f-3329-4c9b-9d90-59b8b551c1af.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/478b6c1f-3329-4c9b-9d90-59b8b551c1af.json new file mode 100644 index 000000000..fc688f9c5 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/478b6c1f-3329-4c9b-9d90-59b8b551c1af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1557 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json deleted file mode 100644 index 3903ad889..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/1762652579.6680949", - "retrieved_timestamp": "1762652579.6680949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2408276705807258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31647277641099675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3315208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1556682180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/212f8dd2-3c61-45bd-a3de-2326334feb73.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/212f8dd2-3c61-45bd-a3de-2326334feb73.json new file mode 100644 index 000000000..4c5ef642e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/212f8dd2-3c61-45bd-a3de-2326334feb73.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3204 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1592 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json deleted file mode 100644 index 77e7b36e4..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/1762652579.668304", - "retrieved_timestamp": "1762652579.668305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24812165055386376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3204166266783764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15915890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9251282e-f72f-406e-a2cf-e7063516f624.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9251282e-f72f-406e-a2cf-e7063516f624.json new file mode 100644 index 000000000..98dde13cb --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9251282e-f72f-406e-a2cf-e7063516f624.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3186 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json deleted file mode 100644 index 039f02a70..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/1762652579.668525", - "retrieved_timestamp": "1762652579.6685262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2544914161092568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3185709286639082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json deleted file mode 100644 index f5dd6d4b2..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/1762652579.668755", - "retrieved_timestamp": "1762652579.668756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2519935992056632 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.320368681472897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15375664893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/91a3c739-7e16-4d21-8879-bb2fd4d4c6ad.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/91a3c739-7e16-4d21-8879-bb2fd4d4c6ad.json new file mode 100644 index 000000000..b16eabb54 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/91a3c739-7e16-4d21-8879-bb2fd4d4c6ad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.252 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3204 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1538 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json deleted file mode 100644 index c1c6fdb0c..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/1762652579.6689868", - "retrieved_timestamp": "1762652579.668988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23146030310686755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32128474090743103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15824468085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/aaa78d8f-6050-4b5d-bb67-da6c9d1ee065.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/aaa78d8f-6050-4b5d-bb67-da6c9d1ee065.json new file mode 100644 index 000000000..2f7a799fb --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/aaa78d8f-6050-4b5d-bb67-da6c9d1ee065.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2315 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1582 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1f0430fe-24ff-4ef6-8577-ee5bfa74f18b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1f0430fe-24ff-4ef6-8577-ee5bfa74f18b.json new file mode 100644 index 000000000..928372798 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1f0430fe-24ff-4ef6-8577-ee5bfa74f18b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1539 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json deleted file mode 100644 index 8215a109e..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1762652579.669204", - "retrieved_timestamp": "1762652579.669204", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25149381419079153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31867127828365593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32888541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15392287234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json deleted file mode 100644 index 6295717d8..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/1762652579.669419", - "retrieved_timestamp": "1762652579.66942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24719743613611883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213274785812292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15882646276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/f374772b-2685-41e2-a455-9002e48e3739.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/f374772b-2685-41e2-a455-9002e48e3739.json new file mode 100644 index 000000000..feb9ce218 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/f374772b-2685-41e2-a455-9002e48e3739.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2472 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1588 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/6db801f8-5253-47c0-b87e-6779bff42f6b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/6db801f8-5253-47c0-b87e-6779bff42f6b.json new file mode 100644 index 000000000..70dc4e5f1 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/6db801f8-5253-47c0-b87e-6779bff42f6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json deleted file mode 100644 index 932339112..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/1762652579.6696231", - "retrieved_timestamp": "1762652579.669624", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24599839536873275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32337658694524307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334109042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/0d704671-c0b6-4296-85b5-eaf972d6be6a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/0d704671-c0b6-4296-85b5-eaf972d6be6a.json new file mode 100644 index 000000000..7fff16277 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/0d704671-c0b6-4296-85b5-eaf972d6be6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json deleted file mode 100644 index 0b4880a8b..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1762652579.669835", - "retrieved_timestamp": "1762652579.669836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25236816092412573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255638228201855 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33679166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15309175531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/7e31545f-0865-4843-914b-a71f8a84314f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/7e31545f-0865-4843-914b-a71f8a84314f.json new file mode 100644 index 000000000..b0805fce1 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/7e31545f-0865-4843-914b-a71f8a84314f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2265 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3252 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1568 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json deleted file mode 100644 index 30c2f461f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/1762652579.670048", - "retrieved_timestamp": "1762652579.670049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2264646692996804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3252098558034601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568317819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/431c7130-5a19-4a71-8a92-fea9726769ac.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/431c7130-5a19-4a71-8a92-fea9726769ac.json new file mode 100644 index 000000000..a3d21ddaa --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/431c7130-5a19-4a71-8a92-fea9726769ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.15 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json deleted file mode 100644 index a659f90df..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/1762652579.6702561", - "retrieved_timestamp": "1762652579.6702569", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23016152697066006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224479825736107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json deleted file mode 100644 index 9ae9af03e..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/1762652579.670511", - "retrieved_timestamp": "1762652579.6705122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25236816092412573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3278027492189594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15209441489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/ca850c4a-14d0-4145-9977-0d33e6e3e362.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/ca850c4a-14d0-4145-9977-0d33e6e3e362.json new file mode 100644 index 000000000..38c8a17c7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/ca850c4a-14d0-4145-9977-0d33e6e3e362.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3278 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json deleted file mode 100644 index 4efd56ef3..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/1762652579.67072", - "retrieved_timestamp": "1762652579.67072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2657570801030156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31752113645211816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/7389caa3-6d8f-43e3-b3f2-d9320e56f621.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/7389caa3-6d8f-43e3-b3f2-d9320e56f621.json new file mode 100644 index 000000000..a5198f971 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/7389caa3-6d8f-43e3-b3f2-d9320e56f621.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1e822b0f-0d80-4613-983b-ebd2e6fbfcd6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1e822b0f-0d80-4613-983b-ebd2e6fbfcd6.json new file mode 100644 index 000000000..1707361ce --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1e822b0f-0d80-4613-983b-ebd2e6fbfcd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json deleted file mode 100644 index c1396859b..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1762652579.6709208", - "retrieved_timestamp": "1762652579.6709208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2487211709375568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3189091360416723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1206f592-e6f7-4e7d-83cd-cbe82b37ec58.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1206f592-e6f7-4e7d-83cd-cbe82b37ec58.json new file mode 100644 index 000000000..375805022 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1206f592-e6f7-4e7d-83cd-cbe82b37ec58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.256 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json deleted file mode 100644 index 168e1b92c..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1762652579.671123", - "retrieved_timestamp": "1762652579.671123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2560151509106947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158776856286612 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15616688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/e4085c6a-bc16-4328-a724-4b9838b55faa.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/e4085c6a-bc16-4328-a724-4b9838b55faa.json new file mode 100644 index 000000000..67bf5ff22 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/e4085c6a-bc16-4328-a724-4b9838b55faa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2499 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json deleted file mode 100644 index 57648d4d8..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/1762652579.671335", - "retrieved_timestamp": "1762652579.671336", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2498703440205322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31561997255280577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json deleted file mode 100644 index 0c6c5f078..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/1762652579.671542", - "retrieved_timestamp": "1762652579.6715431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.249595517670891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31774285416798703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/b929b955-1fbb-43d0-add1-4d58fdc4097c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/b929b955-1fbb-43d0-add1-4d58fdc4097c.json new file mode 100644 index 000000000..0387f9f97 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/b929b955-1fbb-43d0-add1-4d58fdc4097c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json deleted file mode 100644 index 7b7f76cca..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/1762652579.6717582", - "retrieved_timestamp": "1762652579.6717582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25149381419079153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172338500122228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15533577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/df723a0f-9a32-42f3-9421-780159f7d821.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/df723a0f-9a32-42f3-9421-780159f7d821.json new file mode 100644 index 000000000..03229f6c3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/df723a0f-9a32-42f3-9421-780159f7d821.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1553 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json deleted file mode 100644 index a1a1e76ff..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/1762652579.6576698", - "retrieved_timestamp": "1762652579.657671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2201447714286981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217197270809481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33669791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17096077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/c1046d2c-0b5b-4ab7-b173-8d5b5ecbc07d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/c1046d2c-0b5b-4ab7-b173-8d5b5ecbc07d.json new file mode 100644 index 000000000..d40e539a7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/c1046d2c-0b5b-4ab7-b173-8d5b5ecbc07d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2201 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3217 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.171 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json deleted file mode 100644 index 4142084f6..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/1762652579.671975", - "retrieved_timestamp": "1762652579.671975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22808813946993975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239538094779519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17461768617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/60c02070-7554-4764-8a02-841ca75a0d5c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/60c02070-7554-4764-8a02-841ca75a0d5c.json new file mode 100644 index 000000000..c536b41df --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/60c02070-7554-4764-8a02-841ca75a0d5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-3ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.324 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1746 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json deleted file mode 100644 index 3e4c0ca29..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/1762652579.672395", - "retrieved_timestamp": "1762652579.672396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25259311958935626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323809171214906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/d243f226-149b-4824-837e-e80ab68bae9d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/d243f226-149b-4824-837e-e80ab68bae9d.json new file mode 100644 index 000000000..a45e23e39 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/d243f226-149b-4824-837e-e80ab68bae9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2526 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json deleted file mode 100644 index 7ce581266..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/1762652579.672603", - "retrieved_timestamp": "1762652579.6726038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24812165055386376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31748404240871353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/4f9361d0-2ad9-44da-a1d9-876d43451ae6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/4f9361d0-2ad9-44da-a1d9-876d43451ae6.json new file mode 100644 index 000000000..d516e44ee --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/4f9361d0-2ad9-44da-a1d9-876d43451ae6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json deleted file mode 100644 index 88d976e3a..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1762652579.672818", - "retrieved_timestamp": "1762652579.672818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25476624245889795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199073234678175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34348958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15616688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/6c6e9ebc-f83d-48d5-b69f-be43d4167a0e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/6c6e9ebc-f83d-48d5-b69f-be43d4167a0e.json new file mode 100644 index 000000000..fc27247a4 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/6c6e9ebc-f83d-48d5-b69f-be43d4167a0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/7cd2c0da-15b8-4ad6-8cad-feb68631c079.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/7cd2c0da-15b8-4ad6-8cad-feb68631c079.json new file mode 100644 index 000000000..60222a1b0 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/7cd2c0da-15b8-4ad6-8cad-feb68631c079.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3219 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json deleted file mode 100644 index 37b7f7b12..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/1762652579.673032", - "retrieved_timestamp": "1762652579.673033", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2423015376977531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32193163799444524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15633311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/36b84cf2-d221-4e9a-b728-37dc2bf7e1d6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/36b84cf2-d221-4e9a-b728-37dc2bf7e1d6.json new file mode 100644 index 000000000..97123425b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/36b84cf2-d221-4e9a-b728-37dc2bf7e1d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2493 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1592 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json deleted file mode 100644 index 9030b2f2d..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/1762652579.673239", - "retrieved_timestamp": "1762652579.67324", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24927082363683917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3190945593427599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15915890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1fd0d1db-1d75-4b10-bae8-33023c2c7466.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1fd0d1db-1d75-4b10-bae8-33023c2c7466.json new file mode 100644 index 000000000..591bed95d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1fd0d1db-1d75-4b10-bae8-33023c2c7466.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json deleted file mode 100644 index 24425d4b0..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1762652579.673455", - "retrieved_timestamp": "1762652579.6734562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24779695651981187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3218405915852565 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json deleted file mode 100644 index 7447ea482..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/1762652579.6721878", - "retrieved_timestamp": "1762652579.6721878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23478259905938464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33076056644270485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34088541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16954787234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/c6c02512-6c91-4818-a084-c48915fd83de.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/c6c02512-6c91-4818-a084-c48915fd83de.json new file mode 100644 index 000000000..6bd7dcd08 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/c6c02512-6c91-4818-a084-c48915fd83de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5-5ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json deleted file mode 100644 index ce31cd29e..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/1762652579.65739", - "retrieved_timestamp": "1762652579.657391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2067558522498083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3203968601167082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16780252659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/326affa2-9ea4-4fc9-b60f-d2abeb7493c3.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/326affa2-9ea4-4fc9-b60f-d2abeb7493c3.json new file mode 100644 index 000000000..02a1ed3dd --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/326affa2-9ea4-4fc9-b60f-d2abeb7493c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-2e-5", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2068 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3204 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json deleted file mode 100644 index 9952cd229..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/1762652579.673873", - "retrieved_timestamp": "1762652579.673873", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21747186354428472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179879277889672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33679166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16273271276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/b3a190d1-5b86-4439-a21e-1f118239db82.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/b3a190d1-5b86-4439-a21e-1f118239db82.json new file mode 100644 index 000000000..3ce9e8679 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/b3a190d1-5b86-4439-a21e-1f118239db82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-5e-5-2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2175 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1627 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json deleted file mode 100644 index 021da02ec..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1762652579.674078", - "retrieved_timestamp": "1762652579.674078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2198699450790569 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32974820176156994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1651429521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/b37a7db5-b26f-4a82-b27c-6c3a2ba72fda.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/b37a7db5-b26f-4a82-b27c-6c3a2ba72fda.json new file mode 100644 index 000000000..ce21c0bef --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/b37a7db5-b26f-4a82-b27c-6c3a2ba72fda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-5e-5-3ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/05a59445-b816-4982-9b1a-1c2394ffbaa9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/05a59445-b816-4982-9b1a-1c2394ffbaa9.json new file mode 100644 index 000000000..8649fd379 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/05a59445-b816-4982-9b1a-1c2394ffbaa9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-5e-5-5ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json deleted file mode 100644 index aa11fd153..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/1762652579.674291", - "retrieved_timestamp": "1762652579.6742918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2077299343519639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275980298873716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json deleted file mode 100644 index 746b8d738..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/1762652579.673672", - "retrieved_timestamp": "1762652579.673672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2009856070781083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31093810553451656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33809375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16722074468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/ff952579-e92d-4af8-9497-f49fed5efba0.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/ff952579-e92d-4af8-9497-f49fed5efba0.json new file mode 100644 index 000000000..c0f65b58b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/ff952579-e92d-4af8-9497-f49fed5efba0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-5e-5", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.201 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json deleted file mode 100644 index c980472e1..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/1762652579.674706", - "retrieved_timestamp": "1762652579.674707", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2156234347087949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100411318318588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15674867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/b541ede0-6de9-4557-8280-43567fd3dd96.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/b541ede0-6de9-4557-8280-43567fd3dd96.json new file mode 100644 index 000000000..b702f2d51 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/b541ede0-6de9-4557-8280-43567fd3dd96.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-7e-5-2ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2156 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json deleted file mode 100644 index 3efdf42d8..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/1762652579.674919", - "retrieved_timestamp": "1762652579.674919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23805502732749106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199313632207049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15217752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/8514f601-0bb2-4639-90cc-29e96088e7de.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/8514f601-0bb2-4639-90cc-29e96088e7de.json new file mode 100644 index 000000000..686bff7aa --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/8514f601-0bb2-4639-90cc-29e96088e7de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-7e-5-3ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1522 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json deleted file mode 100644 index ff156f257..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/1762652579.6751308", - "retrieved_timestamp": "1762652579.6751318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21197644472222593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32002953673668666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37127083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1628158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/57e6d0cf-943a-4b83-a1f4-4f03b5066523.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/57e6d0cf-943a-4b83-a1f4-4f03b5066523.json new file mode 100644 index 000000000..138ef3aec --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/57e6d0cf-943a-4b83-a1f4-4f03b5066523.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-7e-5-5ep", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.32 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1628 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json deleted file mode 100644 index 7e2d9b4bc..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/1762652579.6744971", - "retrieved_timestamp": "1762652579.674498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20925366915340185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158179005969299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33669791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1622340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/ec205127-21c0-4edf-bb3a-ec8ccac4fcdb.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/ec205127-21c0-4edf-bb3a-ec8ccac4fcdb.json new file mode 100644 index 000000000..0c78b2864 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/ec205127-21c0-4edf-bb3a-ec8ccac4fcdb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-7e-5", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2093 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/14b260e6-4300-43ec-b7af-587a2f5b03fb.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/14b260e6-4300-43ec-b7af-587a2f5b03fb.json new file mode 100644 index 000000000..0329a0fc0 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/14b260e6-4300-43ec-b7af-587a2f5b03fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-DPO-1epoch_v1", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2025 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3268 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json deleted file mode 100644 index 0564dc2cd..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/1762652579.67534", - "retrieved_timestamp": "1762652579.6753411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20245947419513555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326814314271471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13297872340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/53de1fc9-7097-4103-b731-588a7bf39f80.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/53de1fc9-7097-4103-b731-588a7bf39f80.json new file mode 100644 index 000000000..2d3c81041 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/53de1fc9-7097-4103-b731-588a7bf39f80.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json deleted file mode 100644 index d970be2a4..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/1762652579.675586", - "retrieved_timestamp": "1762652579.6755872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1964144026737944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32925816453885065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13372672872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/1a1031c5-3ec2-4d12-93eb-e0a3b0448ed4.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/1a1031c5-3ec2-4d12-93eb-e0a3b0448ed4.json new file mode 100644 index 000000000..2c42031ee --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/1a1031c5-3ec2-4d12-93eb-e0a3b0448ed4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-SFT", + "id": "JayHyeon/Qwen2.5-0.5B-SFT", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1673 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json deleted file mode 100644 index f66b463ab..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT/1762652579.654298", - "retrieved_timestamp": "1762652579.6542988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19636453498938372 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31207478976310743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3394270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16730385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/51b62d59-f39c-49ca-af0a-73df6440e29d.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/51b62d59-f39c-49ca-af0a-73df6440e29d.json new file mode 100644 index 000000000..b18776596 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/51b62d59-f39c-49ca-af0a-73df6440e29d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json deleted file mode 100644 index dab966edb..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/1762652579.675801", - "retrieved_timestamp": "1762652579.675801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25324250765746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3140431891367934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33145833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15658244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json deleted file mode 100644 index 84455742f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/1762652579.676018", - "retrieved_timestamp": "1762652579.676018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26695612087040166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3188575312560274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/622a0ae1-0eb5-49f0-bc44-d396c7233e27.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/622a0ae1-0eb5-49f0-bc44-d396c7233e27.json new file mode 100644 index 000000000..8a1f01917 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/622a0ae1-0eb5-49f0-bc44-d396c7233e27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.267 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/71291a41-283e-42ca-b192-7b759e3c3712.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/71291a41-283e-42ca-b192-7b759e3c3712.json new file mode 100644 index 000000000..9c3e33edf --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/71291a41-283e-42ca-b192-7b759e3c3712.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3261 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json deleted file mode 100644 index a8baf4c46..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/1762652579.6762261", - "retrieved_timestamp": "1762652579.6762261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24807178286945303 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32608064671010917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json deleted file mode 100644 index fd6f4b285..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/1762652579.67643", - "retrieved_timestamp": "1762652579.6764312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23832985367713222 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32184656431310543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15034906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/7e504fef-b304-4c1a-856d-06e56a8869d7.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/7e504fef-b304-4c1a-856d-06e56a8869d7.json new file mode 100644 index 000000000..1841fefbe --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/7e504fef-b304-4c1a-856d-06e56a8869d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json deleted file mode 100644 index 6c45d4640..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/1762652579.676642", - "retrieved_timestamp": "1762652579.6766431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24714756845170813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32244323308961736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334109042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/f8258f5e-8826-4fe1-b9d3-61708e79d4ab.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/f8258f5e-8826-4fe1-b9d3-61708e79d4ab.json new file mode 100644 index 000000000..0f50c78cd --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/f8258f5e-8826-4fe1-b9d3-61708e79d4ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2471 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/099ce031-1e11-4a07-bac1-03bef9b915d6.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/099ce031-1e11-4a07-bac1-03bef9b915d6.json new file mode 100644 index 000000000..0b464aa1e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/099ce031-1e11-4a07-bac1-03bef9b915d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json deleted file mode 100644 index 3a7d00493..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/1762652579.6768441", - "retrieved_timestamp": "1762652579.676845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24474948691693596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181429193838813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/75ff25fd-e5f7-4380-b192-cbc8a8ee95aa.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/75ff25fd-e5f7-4380-b192-cbc8a8ee95aa.json new file mode 100644 index 000000000..c6cea6b5c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/75ff25fd-e5f7-4380-b192-cbc8a8ee95aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2551 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json deleted file mode 100644 index 631dc3606..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/1762652579.677048", - "retrieved_timestamp": "1762652579.6770492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2551408041773605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194064593640778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/cbc43c7a-d8ac-4b03-a383-703f7fa51757.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/cbc43c7a-d8ac-4b03-a383-703f7fa51757.json new file mode 100644 index 000000000..6baf33d2a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/cbc43c7a-d8ac-4b03-a383-703f7fa51757.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2538 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3153 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3261 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1583 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json deleted file mode 100644 index 71a820116..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/1762652579.677404", - "retrieved_timestamp": "1762652579.677407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25379216035674235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31530652457997205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1583277925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json deleted file mode 100644 index a85c0e1d2..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/1762652579.677789", - "retrieved_timestamp": "1762652579.67779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24022815019703275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168335157841944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568317819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/72d7f252-1bff-40ad-9ec8-1ac2a2e02a8e.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/72d7f252-1bff-40ad-9ec8-1ac2a2e02a8e.json new file mode 100644 index 000000000..b8795c78b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/72d7f252-1bff-40ad-9ec8-1ac2a2e02a8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1568 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json deleted file mode 100644 index 41cf247f3..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/1762652579.678058", - "retrieved_timestamp": "1762652579.67806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24839647690350491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210570160312575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1573304521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/5eb10878-11e6-43ad-9bb5-658a3495129c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/5eb10878-11e6-43ad-9bb5-658a3495129c.json new file mode 100644 index 000000000..6f4423d45 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/5eb10878-11e6-43ad-9bb5-658a3495129c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2484 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3211 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1573 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/23b29cd4-cfd0-49f1-8959-c3aa8be9722f.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/23b29cd4-cfd0-49f1-8959-c3aa8be9722f.json new file mode 100644 index 000000000..e97c3a4f4 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/23b29cd4-cfd0-49f1-8959-c3aa8be9722f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", + "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1583 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json deleted file mode 100644 index 287c9a1f3..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/1762652579.6783109", - "retrieved_timestamp": "1762652579.6783118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25781371206177384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32030958605054793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1583277925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/03db2532-f8e0-41e9-ac0c-ff2913f4b12a.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/03db2532-f8e0-41e9-ac0c-ff2913f4b12a.json new file mode 100644 index 000000000..5542366cc --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/03db2532-f8e0-41e9-ac0c-ff2913f4b12a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3258 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json deleted file mode 100644 index af7e2179f..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/1762652579.678605", - "retrieved_timestamp": "1762652579.678606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23163539408768735 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3258499805340021 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/273f0d50-aa4e-4469-8360-2ce0a2e1a850.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/273f0d50-aa4e-4469-8360-2ce0a2e1a850.json new file mode 100644 index 000000000..fcfa337b4 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/273f0d50-aa4e-4469-8360-2ce0a2e1a850.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.236 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json deleted file mode 100644 index f8fea8c74..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/1762652579.678855", - "retrieved_timestamp": "1762652579.678856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23598163982677073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3225125170893353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1595744680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/79a48e79-d59b-4f86-a8f4-3af174a9ee0b.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/79a48e79-d59b-4f86-a8f4-3af174a9ee0b.json new file mode 100644 index 000000000..8cad12a77 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/79a48e79-d59b-4f86-a8f4-3af174a9ee0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2337 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json deleted file mode 100644 index db9bddc76..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/1762652579.679086", - "retrieved_timestamp": "1762652579.679086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23370878158840763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3132229900705577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3235208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15325797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json deleted file mode 100644 index c6f9f4643..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/1762652579.6792939", - "retrieved_timestamp": "1762652579.679295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25693936532843964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32760017293049276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/9da9a0e6-257a-41f6-b3a3-e3279a4924db.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/9da9a0e6-257a-41f6-b3a3-e3279a4924db.json new file mode 100644 index 000000000..8fcff6407 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/9da9a0e6-257a-41f6-b3a3-e3279a4924db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json deleted file mode 100644 index 0fdee1f86..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/1762652579.679507", - "retrieved_timestamp": "1762652579.679507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24599839536873275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32674094707635526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15433843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/dfed058c-48b2-4e1e-9a29-624771e3e9dd.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/dfed058c-48b2-4e1e-9a29-624771e3e9dd.json new file mode 100644 index 000000000..1408d7de5 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/dfed058c-48b2-4e1e-9a29-624771e3e9dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3267 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json deleted file mode 100644 index 9d40d91e6..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1762652579.679712", - "retrieved_timestamp": "1762652579.679712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529178136234081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32292563083414066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3195208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/bcb53a8a-1670-400c-aab6-bd8ed2ebcdf4.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/bcb53a8a-1670-400c-aab6-bd8ed2ebcdf4.json new file mode 100644 index 000000000..2ee2160cf --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/bcb53a8a-1670-400c-aab6-bd8ed2ebcdf4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3229 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json deleted file mode 100644 index f9da2a839..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/1762652579.6799219", - "retrieved_timestamp": "1762652579.679923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25046986440422525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255735108237258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15990691489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/8438a108-0d5d-48b6-b73a-981d13329daa.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/8438a108-0d5d-48b6-b73a-981d13329daa.json new file mode 100644 index 000000000..404b429a1 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/8438a108-0d5d-48b6-b73a-981d13329daa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json deleted file mode 100644 index f0cff3589..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/1762652579.680135", - "retrieved_timestamp": "1762652579.680136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2387044153955948 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3258394284267221 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/88616292-1e38-4481-af30-6b60e28fb097.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/88616292-1e38-4481-af30-6b60e28fb097.json new file mode 100644 index 000000000..6805ba8e2 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/88616292-1e38-4481-af30-6b60e28fb097.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2387 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3258 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/44094907-0b09-4706-a117-116a7e10a6e5.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/44094907-0b09-4706-a117-116a7e10a6e5.json new file mode 100644 index 000000000..7ed0e9876 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/44094907-0b09-4706-a117-116a7e10a6e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1593 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json deleted file mode 100644 index 1666b5689..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/1762652579.6803432", - "retrieved_timestamp": "1762652579.6803432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25324250765746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32182747858122923 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15932513297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json deleted file mode 100644 index a9ded6222..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1762652579.680558", - "retrieved_timestamp": "1762652579.6805592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24562383365027018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3299192088381941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.318125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16015625 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/d19e8078-87e9-4760-9b91-6b5f478820e1.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/d19e8078-87e9-4760-9b91-6b5f478820e1.json new file mode 100644 index 000000000..97e1295f7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/d19e8078-87e9-4760-9b91-6b5f478820e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2456 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1602 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/896464f1-01bc-4370-8d90-3368323b2908.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/896464f1-01bc-4370-8d90-3368323b2908.json new file mode 100644 index 000000000..b8bf7d1e7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/896464f1-01bc-4370-8d90-3368323b2908.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json deleted file mode 100644 index 639e6ec92..000000000 --- a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/1762652579.680775", - "retrieved_timestamp": "1762652579.680775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24225167001334236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32712145602920534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.318125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/9889f0b9-9051-485c-bd44-32b1e56b865c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/9889f0b9-9051-485c-bd44-32b1e56b865c.json new file mode 100644 index 000000000..71c059874 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/9889f0b9-9051-485c-bd44-32b1e56b865c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2574 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3279 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/6563ce79-6df4-4c78-89e2-064f1250d898.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/6563ce79-6df4-4c78-89e2-064f1250d898.json new file mode 100644 index 000000000..04b6f52a6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/6563ce79-6df4-4c78-89e2-064f1250d898.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3072 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3264 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/b1778755-e6e6-47e2-925d-44d786c4ff62.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/b1778755-e6e6-47e2-925d-44d786c4ff62.json new file mode 100644 index 000000000..0810520c3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/b1778755-e6e6-47e2-925d-44d786c4ff62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2551 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3242 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/3ae923b8-e9f4-472e-8d5e-54fa5f42ce01.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/3ae923b8-e9f4-472e-8d5e-54fa5f42ce01.json new file mode 100644 index 000000000..c28c2cd0f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/3ae923b8-e9f4-472e-8d5e-54fa5f42ce01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/40831e23-0a9e-4bdc-a365-9399b6b82ff9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/40831e23-0a9e-4bdc-a365-9399b6b82ff9.json new file mode 100644 index 000000000..69728b3a1 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/40831e23-0a9e-4bdc-a365-9399b6b82ff9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1612 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/4a60fa82-34dc-4b0c-9102-65adac5039e4.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/4a60fa82-34dc-4b0c-9102-65adac5039e4.json new file mode 100644 index 000000000..0bf9d0574 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/4a60fa82-34dc-4b0c-9102-65adac5039e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2414 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1532 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/75ff2c43-dd19-48ae-9ba3-f99cdbadda1c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/75ff2c43-dd19-48ae-9ba3-f99cdbadda1c.json new file mode 100644 index 000000000..493b1d463 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/75ff2c43-dd19-48ae-9ba3-f99cdbadda1c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3362 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/d7962833-660a-4b9b-9836-8a2f3251f38e.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/d7962833-660a-4b9b-9836-8a2f3251f38e.json new file mode 100644 index 000000000..0290bbad0 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/d7962833-660a-4b9b-9836-8a2f3251f38e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3231 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/ad8ecabf-a868-496e-892b-582efb54fa6a.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/ad8ecabf-a868-496e-892b-582efb54fa6a.json new file mode 100644 index 000000000..5edc518ea --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/ad8ecabf-a868-496e-892b-582efb54fa6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2639 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3257 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/49f25d3d-80c9-4723-8fa9-1501d44d70aa.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/49f25d3d-80c9-4723-8fa9-1501d44d70aa.json new file mode 100644 index 000000000..2d9b9aac9 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/49f25d3d-80c9-4723-8fa9-1501d44d70aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3214 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1585 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/70ea520c-3e0c-4412-9dbe-40a00801335c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/70ea520c-3e0c-4412-9dbe-40a00801335c.json new file mode 100644 index 000000000..47326edc5 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/70ea520c-3e0c-4412-9dbe-40a00801335c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2438 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1554 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/8e7f8bad-812b-4f6c-8dea-1cf44584c300.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/8e7f8bad-812b-4f6c-8dea-1cf44584c300.json new file mode 100644 index 000000000..2ecc04e03 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/8e7f8bad-812b-4f6c-8dea-1cf44584c300.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", + "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2465 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/3b39a8f0-c5ba-4f74-9d27-bf5b389e038c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/3b39a8f0-c5ba-4f74-9d27-bf5b389e038c.json new file mode 100644 index 000000000..fdc8cfb2c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/3b39a8f0-c5ba-4f74-9d27-bf5b389e038c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2506 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3261 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1522 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/702a14d5-a7fd-4926-ab26-e4c3b7f5eda7.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/702a14d5-a7fd-4926-ab26-e4c3b7f5eda7.json new file mode 100644 index 000000000..ed6db3b8d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/702a14d5-a7fd-4926-ab26-e4c3b7f5eda7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/20e5d087-7b20-4a39-81da-7334354b61f0.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/20e5d087-7b20-4a39-81da-7334354b61f0.json new file mode 100644 index 000000000..2b13ae37e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/20e5d087-7b20-4a39-81da-7334354b61f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1544 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/4c5a769c-0472-402c-8e97-d24e5b302bac.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/4c5a769c-0472-402c-8e97-d24e5b302bac.json new file mode 100644 index 000000000..455e40c44 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/4c5a769c-0472-402c-8e97-d24e5b302bac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2342 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/96166735-ed03-4931-81c9-d3daed1913d9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/96166735-ed03-4931-81c9-d3daed1913d9.json new file mode 100644 index 000000000..4438ad84e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/96166735-ed03-4931-81c9-d3daed1913d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.232 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/06d9b1e3-d054-4fa5-bf1f-9d6149e5111c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/06d9b1e3-d054-4fa5-bf1f-9d6149e5111c.json new file mode 100644 index 000000000..9827aba86 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/06d9b1e3-d054-4fa5-bf1f-9d6149e5111c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/776fd8d8-9846-4359-97d4-2340425d1315.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/776fd8d8-9846-4359-97d4-2340425d1315.json new file mode 100644 index 000000000..9a8c26353 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/776fd8d8-9846-4359-97d4-2340425d1315.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2493 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/197ae1c5-c9b1-4912-91a3-8ccacddc1be6.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/197ae1c5-c9b1-4912-91a3-8ccacddc1be6.json new file mode 100644 index 000000000..ecb5d143e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/197ae1c5-c9b1-4912-91a3-8ccacddc1be6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.252 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1fffd3d9-1c6b-4965-84e6-980bb0a13af3.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1fffd3d9-1c6b-4965-84e6-980bb0a13af3.json new file mode 100644 index 000000000..0e4125637 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1fffd3d9-1c6b-4965-84e6-980bb0a13af3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.258 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1539 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/57e8aaf0-f10b-4024-9f93-7b7f13f3ab10.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/57e8aaf0-f10b-4024-9f93-7b7f13f3ab10.json new file mode 100644 index 000000000..a66e2fd4a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/57e8aaf0-f10b-4024-9f93-7b7f13f3ab10.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.232 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/304d5bee-df2d-40fc-b4a0-e3d99178f4bd.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/304d5bee-df2d-40fc-b4a0-e3d99178f4bd.json new file mode 100644 index 000000000..a651a40c3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/304d5bee-df2d-40fc-b4a0-e3d99178f4bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3273 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/6126d30d-e2dd-4b8b-9cb3-acdc76084bbb.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/6126d30d-e2dd-4b8b-9cb3-acdc76084bbb.json new file mode 100644 index 000000000..19c99bb89 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/6126d30d-e2dd-4b8b-9cb3-acdc76084bbb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.313 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1564 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/fc7284d9-a73f-4562-a781-5cb87247183f.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/fc7284d9-a73f-4562-a781-5cb87247183f.json new file mode 100644 index 000000000..4c3d00e8d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/fc7284d9-a73f-4562-a781-5cb87247183f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2514 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1538 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/26ab447c-a850-4197-983a-a0dca4532029.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/26ab447c-a850-4197-983a-a0dca4532029.json new file mode 100644 index 000000000..c06a3ff06 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/26ab447c-a850-4197-983a-a0dca4532029.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/ee9e2131-aa99-49e1-9814-f0664614354b.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/ee9e2131-aa99-49e1-9814-f0664614354b.json new file mode 100644 index 000000000..56da94f33 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/ee9e2131-aa99-49e1-9814-f0664614354b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/23c472f7-f060-4a69-8f72-12490675825a.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/23c472f7-f060-4a69-8f72-12490675825a.json new file mode 100644 index 000000000..070354a8d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/23c472f7-f060-4a69-8f72-12490675825a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", + "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3174 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1558 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/04172bef-c06b-4c08-b2af-9e1fe4d97664.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/04172bef-c06b-4c08-b2af-9e1fe4d97664.json new file mode 100644 index 000000000..c722b726e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/04172bef-c06b-4c08-b2af-9e1fe4d97664.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/3436355a-d2fe-411f-a764-4cb8284deb4c.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/3436355a-d2fe-411f-a764-4cb8284deb4c.json new file mode 100644 index 000000000..7ac21a2a4 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/3436355a-d2fe-411f-a764-4cb8284deb4c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/265655c0-2ead-4dd7-8c7e-4bee69d51bce.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/265655c0-2ead-4dd7-8c7e-4bee69d51bce.json new file mode 100644 index 000000000..d1101d0a7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/265655c0-2ead-4dd7-8c7e-4bee69d51bce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2448 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.324 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/645cae82-9e7b-4d1b-b944-e3783089c1c1.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/645cae82-9e7b-4d1b-b944-e3783089c1c1.json new file mode 100644 index 000000000..f0b7804e2 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/645cae82-9e7b-4d1b-b944-e3783089c1c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3227 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/ab658117-7c6b-428f-8f60-bf88a1d8a5bc.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/ab658117-7c6b-428f-8f60-bf88a1d8a5bc.json new file mode 100644 index 000000000..a26877761 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/ab658117-7c6b-428f-8f60-bf88a1d8a5bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2472 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/03c4b5ce-3b22-4d9f-bf60-b626b52a114b.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/03c4b5ce-3b22-4d9f-bf60-b626b52a114b.json new file mode 100644 index 000000000..5b2d375d1 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/03c4b5ce-3b22-4d9f-bf60-b626b52a114b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2417 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/ce7e3a31-c65b-4521-b685-fcbd067c75d9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/ce7e3a31-c65b-4521-b685-fcbd067c75d9.json new file mode 100644 index 000000000..271b74801 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/ce7e3a31-c65b-4521-b685-fcbd067c75d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/adb53e2c-5dee-4840-8eae-e0186c6e103f.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/adb53e2c-5dee-4840-8eae-e0186c6e103f.json new file mode 100644 index 000000000..67643af45 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/adb53e2c-5dee-4840-8eae-e0186c6e103f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1634 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/ba89563d-f53a-4bf0-91e1-92ac950523d8.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/ba89563d-f53a-4bf0-91e1-92ac950523d8.json new file mode 100644 index 000000000..fc2edfe9a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/ba89563d-f53a-4bf0-91e1-92ac950523d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2702 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1635 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/3fc0ad8d-4bb2-401a-9baf-b94b39b7e1aa.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/3fc0ad8d-4bb2-401a-9baf-b94b39b7e1aa.json new file mode 100644 index 000000000..21e635518 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/3fc0ad8d-4bb2-401a-9baf-b94b39b7e1aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.248 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3309 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ed816bcb-bbe9-48ae-a6ac-3603779a985f.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ed816bcb-bbe9-48ae-a6ac-3603779a985f.json new file mode 100644 index 000000000..a5f23395a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ed816bcb-bbe9-48ae-a6ac-3603779a985f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2622 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1634 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/f347ed24-066a-4cba-8478-f03628cb2b5b.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/f347ed24-066a-4cba-8478-f03628cb2b5b.json new file mode 100644 index 000000000..1a374d513 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/f347ed24-066a-4cba-8478-f03628cb2b5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/ffddfea0-d17e-44e7-8931-a9601e9cb26b.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/ffddfea0-d17e-44e7-8931-a9601e9cb26b.json new file mode 100644 index 000000000..bf35e8693 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/ffddfea0-d17e-44e7-8931-a9601e9cb26b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.293 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.322 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3116 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1591 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/ec351fa1-78c2-48c6-83f0-7c2a9b2f0731.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/ec351fa1-78c2-48c6-83f0-7c2a9b2f0731.json new file mode 100644 index 000000000..45a06565e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/ec351fa1-78c2-48c6-83f0-7c2a9b2f0731.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2881 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3102 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1582 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/a0038c34-130b-49dc-a93f-94706a3dad50.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/a0038c34-130b-49dc-a93f-94706a3dad50.json new file mode 100644 index 000000000..8033e9669 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/a0038c34-130b-49dc-a93f-94706a3dad50.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2887 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3237 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/cbd5ea42-1e5b-4984-bdcf-e60fbfb9d692.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/cbd5ea42-1e5b-4984-bdcf-e60fbfb9d692.json new file mode 100644 index 000000000..85441076b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/cbd5ea42-1e5b-4984-bdcf-e60fbfb9d692.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/b902e2b2-a0b3-4467-b076-b98717c40d74.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/b902e2b2-a0b3-4467-b076-b98717c40d74.json new file mode 100644 index 000000000..cb1e7dc31 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/b902e2b2-a0b3-4467-b076-b98717c40d74.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1592 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/4c749665-59ff-49df-a193-0262f66e6003.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/4c749665-59ff-49df-a193-0262f66e6003.json new file mode 100644 index 000000000..7f560d07a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/4c749665-59ff-49df-a193-0262f66e6003.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", + "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3244 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1573 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/c99899c6-95e1-4dea-ac12-f8df49728a3b.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/c99899c6-95e1-4dea-ac12-f8df49728a3b.json new file mode 100644 index 000000000..52430793e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/c99899c6-95e1-4dea-ac12-f8df49728a3b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", + "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/13deca9f-073e-444b-bf79-35e816f7c312.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/13deca9f-073e-444b-bf79-35e816f7c312.json new file mode 100644 index 000000000..d31366b10 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/13deca9f-073e-444b-bf79-35e816f7c312.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", + "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3278 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3022 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1496 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/c8adc0a5-f4bf-4f88-984c-aba506eae6a9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/c8adc0a5-f4bf-4f88-984c-aba506eae6a9.json new file mode 100644 index 000000000..9fb481bab --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/c8adc0a5-f4bf-4f88-984c-aba506eae6a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", + "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/b146daaf-ce1f-4520-bc19-21ce8679b220.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/b146daaf-ce1f-4520-bc19-21ce8679b220.json new file mode 100644 index 000000000..345fd539c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/b146daaf-ce1f-4520-bc19-21ce8679b220.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", + "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", + "developer": "JayHyeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2739 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3245 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1597 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/45e1d037-1ed0-472c-a311-c651fde270fc.json b/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/45e1d037-1ed0-472c-a311-c651fde270fc.json new file mode 100644 index 000000000..36efeda0f --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/45e1d037-1ed0-472c-a311-c651fde270fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SimPO-v0.2", + "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.654 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4013 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json b/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json deleted file mode 100644 index 8cd110aea..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/1762652579.692669", - "retrieved_timestamp": "1762652579.692669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6540368444615842 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.498371102582105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40125000000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/3f4ce54a-01f3-4c23-a4ba-22d47e0344dc.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/3f4ce54a-01f3-4c23-a4ba-22d47e0344dc.json new file mode 100644 index 000000000..ea821d05a --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/3f4ce54a-01f3-4c23-a4ba-22d47e0344dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6717 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3634 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json deleted file mode 100644 index 4d8f535aa..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/1762652579.6929338", - "retrieved_timestamp": "1762652579.692935", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6717221416951467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48797965672899357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36336436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/470d52be-9dbd-4714-b004-f65cc82d245f.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/470d52be-9dbd-4714-b004-f65cc82d245f.json new file mode 100644 index 000000000..49a213fd9 --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/470d52be-9dbd-4714-b004-f65cc82d245f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6556 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json deleted file mode 100644 index 3819287d9..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.6931531", - "retrieved_timestamp": "1762652579.693154", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555605792630221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49345840367294164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json deleted file mode 100644 index 8249bb5c7..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.693368", - "retrieved_timestamp": "1762652579.6933692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6315055164740666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4916414793938901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/c836fd05-1969-439c-91e1-fd0cab816f6c.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/c836fd05-1969-439c-91e1-fd0cab816f6c.json new file mode 100644 index 000000000..539534c00 --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/c836fd05-1969-439c-91e1-fd0cab816f6c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6315 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3611 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/14774c6b-eb03-4abc-92df-1e7a196ca8a4.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/14774c6b-eb03-4abc-92df-1e7a196ca8a4.json new file mode 100644 index 000000000..80d5fde9f --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/14774c6b-eb03-4abc-92df-1e7a196ca8a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6285 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4986 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4014 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3545 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json deleted file mode 100644 index b1f9930aa..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/1762652579.69359", - "retrieved_timestamp": "1762652579.693591", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284580468711907 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986088445592742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40137500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3544714095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/5293ae0c-8022-44d4-b2f5-4f5390dff93e.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/5293ae0c-8022-44d4-b2f5-4f5390dff93e.json new file mode 100644 index 000000000..ce4d8a93a --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/5293ae0c-8022-44d4-b2f5-4f5390dff93e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3987 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json deleted file mode 100644 index 7042a2436..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/1762652579.69381", - "retrieved_timestamp": "1762652579.693811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6677504576745258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4940463886115545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json deleted file mode 100644 index a8f92be29..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.69404", - "retrieved_timestamp": "1762652579.694041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6605063453857986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49160075581298046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/9020f91f-a8f0-447d-af68-247aa81a25c6.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/9020f91f-a8f0-447d-af68-247aa81a25c6.json new file mode 100644 index 000000000..4e211ab7c --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/9020f91f-a8f0-447d-af68-247aa81a25c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/0cd6837a-8c3f-4529-9ea0-8755e1725467.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/0cd6837a-8c3f-4529-9ea0-8755e1725467.json new file mode 100644 index 000000000..b0cd90258 --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/0cd6837a-8c3f-4529-9ea0-8755e1725467.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6492 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4952 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3961 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3711 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json deleted file mode 100644 index d65334ff4..000000000 --- a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.694266", - "retrieved_timestamp": "1762652579.6942668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.649190813707629 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4952489348573605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/7cb17011-cf77-4e86-b67f-84e6ff4b8086.json b/data/hfopenllm_v2/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/7cb17011-cf77-4e86-b67f-84e6ff4b8086.json new file mode 100644 index 000000000..25c8b3965 --- /dev/null +++ b/data/hfopenllm_v2/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/7cb17011-cf77-4e86-b67f-84e6ff4b8086.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", + "id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", + "developer": "Joseph717171", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6185 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/086831f9-c677-428b-a997-4da58733633c.json b/data/hfopenllm_v2/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/086831f9-c677-428b-a997-4da58733633c.json new file mode 100644 index 000000000..eb6e8c12a --- /dev/null +++ b/data/hfopenllm_v2/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/086831f9-c677-428b-a997-4da58733633c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", + "id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", + "developer": "Joseph717171", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8096 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.411 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/d71893b8-b82c-490b-a700-b579d64e0610.json b/data/hfopenllm_v2/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/d71893b8-b82c-490b-a700-b579d64e0610.json new file mode 100644 index 000000000..536e2fe85 --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/d71893b8-b82c-490b-a700-b579d64e0610.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cinder-Phi-2-V1-F16-gguf", + "id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/9893689f-c27d-4148-a27f-cd07b07e98b7.json b/data/hfopenllm_v2/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/9893689f-c27d-4148-a27f-cd07b07e98b7.json new file mode 100644 index 000000000..dab2ee4a6 --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/9893689f-c27d-4148-a27f-cd07b07e98b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Differential-Attention-Liquid-Metal-Tinyllama", + "id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2227 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2926 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1214 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/TinyLlama-Cinder-Agent-v1/90f2df23-a9ec-44be-ade5-89b59cb7368a.json b/data/hfopenllm_v2/Josephgflowers/TinyLlama-Cinder-Agent-v1/90f2df23-a9ec-44be-ade5-89b59cb7368a.json new file mode 100644 index 000000000..7c3cb09c4 --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/TinyLlama-Cinder-Agent-v1/90f2df23-a9ec-44be-ade5-89b59cb7368a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-Cinder-Agent-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-Cinder-Agent-v1", + "id": "Josephgflowers/TinyLlama-Cinder-Agent-v1", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.267 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3116 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/TinyLlama-v1.1-Cinders-World/afd545da-390a-478a-b0f5-ea819f088f27.json b/data/hfopenllm_v2/Josephgflowers/TinyLlama-v1.1-Cinders-World/afd545da-390a-478a-b0f5-ea819f088f27.json new file mode 100644 index 000000000..3ac65a5c2 --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/TinyLlama-v1.1-Cinders-World/afd545da-390a-478a-b0f5-ea819f088f27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-v1.1-Cinders-World/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-v1.1-Cinders-World", + "id": "Josephgflowers/TinyLlama-v1.1-Cinders-World", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1198 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/ce776f68-856f-4aee-b7e4-e55d15e8d714.json b/data/hfopenllm_v2/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/ce776f68-856f-4aee-b7e4-e55d15e8d714.json new file mode 100644 index 000000000..489ca17d9 --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/ce776f68-856f-4aee-b7e4-e55d15e8d714.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama_v1.1_math_code-world-test-1", + "id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0078 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/9b015729-524c-44f3-9c2c-c42981d7a61e.json b/data/hfopenllm_v2/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/9b015729-524c-44f3-9c2c-c42981d7a61e.json new file mode 100644 index 000000000..586b23dba --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/9b015729-524c-44f3-9c2c-c42981d7a61e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tinyllama-STEM-Cinder-Agent-v1", + "id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2126 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1086 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Josephgflowers/Tinyllama-r1/56a54ffc-4692-496c-95df-8e4ad19d4d95.json b/data/hfopenllm_v2/Josephgflowers/Tinyllama-r1/56a54ffc-4692-496c-95df-8e4ad19d4d95.json new file mode 100644 index 000000000..5e761ffc2 --- /dev/null +++ b/data/hfopenllm_v2/Josephgflowers/Tinyllama-r1/56a54ffc-4692-496c-95df-8e4ad19d4d95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-r1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tinyllama-r1", + "id": "Josephgflowers/Tinyllama-r1", + "developer": "Josephgflowers", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3015 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json b/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json deleted file mode 100644 index cef212781..000000000 --- a/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/1762652579.696794", - "retrieved_timestamp": "1762652579.696794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", - "developer": "JungZoona", - "inference_platform": "unknown", - "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.732396707403024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7585971930826706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2862537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41694630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5884308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/4b105969-2ce5-4c62-89ef-efd392c2ca89.json b/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/4b105969-2ce5-4c62-89ef-efd392c2ca89.json new file mode 100644 index 000000000..6864f9d79 --- /dev/null +++ b/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/4b105969-2ce5-4c62-89ef-efd392c2ca89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T3Q-Qwen2.5-14B-Instruct-1M-e3", + "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", + "developer": "JungZoona", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2863 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5884 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/31af79b1-48c1-4399-9d16-8582c92996ee.json b/data/hfopenllm_v2/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/31af79b1-48c1-4399-9d16-8582c92996ee.json new file mode 100644 index 000000000..ebb5f9b2d --- /dev/null +++ b/data/hfopenllm_v2/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/31af79b1-48c1-4399-9d16-8582c92996ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T3Q-qwen2.5-14b-v1.0-e3", + "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", + "developer": "JungZoona", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2863 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5884 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Junhoee/Qwen-Megumin/59a67f29-cb7d-497c-b7bb-1764a665ae33.json b/data/hfopenllm_v2/Junhoee/Qwen-Megumin/59a67f29-cb7d-497c-b7bb-1764a665ae33.json new file mode 100644 index 000000000..8f5c2b802 --- /dev/null +++ b/data/hfopenllm_v2/Junhoee/Qwen-Megumin/59a67f29-cb7d-497c-b7bb-1764a665ae33.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Junhoee_Qwen-Megumin/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-Megumin", + "id": "Junhoee/Qwen-Megumin", + "developer": "Junhoee", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 15.231 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7141 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5285 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json deleted file mode 100644 index 317b41b4a..000000000 --- a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/1762652579.6977122", - "retrieved_timestamp": "1762652579.697713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6179913739987677 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650146340680478 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/fe57367c-74b7-483e-af54-4f404cbea75b.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/fe57367c-74b7-483e-af54-4f404cbea75b.json new file mode 100644 index 000000000..d5244c3c9 --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/fe57367c-74b7-483e-af54-4f404cbea75b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-70b-SVA-FT-1415", + "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.618 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json deleted file mode 100644 index 9e4077331..000000000 --- a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-500/1762652579.6980212", - "retrieved_timestamp": "1762652579.698022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6105223030448099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6692236023098005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45114583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522689494680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/fda2277b-1513-416e-b586-ed05920a0bb4.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/fda2277b-1513-416e-b586-ed05920a0bb4.json new file mode 100644 index 000000000..12fd56cb7 --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/fda2277b-1513-416e-b586-ed05920a0bb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-500/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-70b-SVA-FT-500", + "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6105 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6692 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5227 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json deleted file mode 100644 index 259f810a5..000000000 --- a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-final/1762652579.698244", - "retrieved_timestamp": "1762652579.698245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6164676391973297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650146340680478 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/b3dde216-f80a-4664-aadc-b5f5dd3e5895.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/b3dde216-f80a-4664-aadc-b5f5dd3e5895.json new file mode 100644 index 000000000..54dad4185 --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/b3dde216-f80a-4664-aadc-b5f5dd3e5895.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-final/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-70b-SVA-FT-final", + "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6165 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/07ed6241-fd1a-46eb-91fd-92a4a8f6bd15.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/07ed6241-fd1a-46eb-91fd-92a4a8f6bd15.json new file mode 100644 index 000000000..39cc4b352 --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/07ed6241-fd1a-46eb-91fd-92a4a8f6bd15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-70b-SVA-FT-1000step", + "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7238 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6903 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.321 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4592 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5252 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json deleted file mode 100644 index 6664c79ab..000000000 --- a/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/1762652579.698519", - "retrieved_timestamp": "1762652579.69852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7238039512936785 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6903120365165111 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32099697885196377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45917708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251828457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json b/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json deleted file mode 100644 index a8670958c..000000000 --- a/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Khetterman_DarkAtom-12B-v3/1762652579.6987362", - "retrieved_timestamp": "1762652579.698737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Khetterman/DarkAtom-12B-v3", - "developer": "Khetterman", - "inference_platform": "unknown", - "id": "Khetterman/DarkAtom-12B-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6173419859306639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153709655381875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3546376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/ba76c356-cd6a-4636-8ab1-18bb9df69881.json b/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/ba76c356-cd6a-4636-8ab1-18bb9df69881.json new file mode 100644 index 000000000..45d9e2c04 --- /dev/null +++ b/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/ba76c356-cd6a-4636-8ab1-18bb9df69881.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Khetterman_DarkAtom-12B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DarkAtom-12B-v3", + "id": "Khetterman/DarkAtom-12B-v3", + "developer": "Khetterman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4468 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3546 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json b/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json deleted file mode 100644 index 0545f1ad0..000000000 --- a/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Khetterman_Kosmos-8B-v1/1762652579.6990001", - "retrieved_timestamp": "1762652579.699001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Khetterman/Kosmos-8B-v1", - "developer": "Khetterman", - "inference_platform": "unknown", - "id": "Khetterman/Kosmos-8B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41291107594515886 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5233522858623628 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/c6ae54a1-2821-48d1-b689-bbb85aaa70a6.json b/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/c6ae54a1-2821-48d1-b689-bbb85aaa70a6.json new file mode 100644 index 000000000..9f41fea54 --- /dev/null +++ b/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/c6ae54a1-2821-48d1-b689-bbb85aaa70a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Khetterman_Kosmos-8B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-8B-v1", + "id": "Khetterman/Kosmos-8B-v1", + "developer": "Khetterman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3919 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kimargin/GPT-NEO-1.3B-wiki/6f296f0e-80ca-49b7-94e7-cb45b795c715.json b/data/hfopenllm_v2/Kimargin/GPT-NEO-1.3B-wiki/6f296f0e-80ca-49b7-94e7-cb45b795c715.json new file mode 100644 index 000000000..2495d87be --- /dev/null +++ b/data/hfopenllm_v2/Kimargin/GPT-NEO-1.3B-wiki/6f296f0e-80ca-49b7-94e7-cb45b795c715.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kimargin_GPT-NEO-1.3B-wiki/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GPT-NEO-1.3B-wiki", + "id": "Kimargin/GPT-NEO-1.3B-wiki", + "developer": "Kimargin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 1.316 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1921 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/Qwen2.5-0.5b-Test-ft/b5509e11-820a-4ad4-8c6a-0294762502a8.json b/data/hfopenllm_v2/KingNish/Qwen2.5-0.5b-Test-ft/b5509e11-820a-4ad4-8c6a-0294762502a8.json new file mode 100644 index 000000000..c989aeb0c --- /dev/null +++ b/data/hfopenllm_v2/KingNish/Qwen2.5-0.5b-Test-ft/b5509e11-820a-4ad4-8c6a-0294762502a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_Qwen2.5-0.5b-Test-ft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5b-Test-ft", + "id": "KingNish/Qwen2.5-0.5b-Test-ft", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2671 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3232 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1689 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/Reasoning-0.5b/90d73665-8d83-4e74-ab7d-29b1d3b6181b.json b/data/hfopenllm_v2/KingNish/Reasoning-0.5b/90d73665-8d83-4e74-ab7d-29b1d3b6181b.json new file mode 100644 index 000000000..57493bf65 --- /dev/null +++ b/data/hfopenllm_v2/KingNish/Reasoning-0.5b/90d73665-8d83-4e74-ab7d-29b1d3b6181b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-0.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-0.5b", + "id": "KingNish/Reasoning-0.5b", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json b/data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json deleted file mode 100644 index 2d95dc10d..000000000 --- a/data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-0.5b/1762652579.6997252", - "retrieved_timestamp": "1762652579.699726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/Reasoning-0.5b", - "developer": "KingNish", - "inference_platform": "unknown", - "id": "KingNish/Reasoning-0.5b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.217421995859874 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33536255853174524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/KingNish/Reasoning-Llama-3b-v0.1/72387647-cbac-4b72-9c22-db7029a39457.json b/data/hfopenllm_v2/KingNish/Reasoning-Llama-3b-v0.1/72387647-cbac-4b72-9c22-db7029a39457.json new file mode 100644 index 000000000..e30cf7c55 --- /dev/null +++ b/data/hfopenllm_v2/KingNish/Reasoning-Llama-3b-v0.1/72387647-cbac-4b72-9c22-db7029a39457.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-Llama-3b-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reasoning-Llama-3b-v0.1", + "id": "KingNish/Reasoning-Llama-3b-v0.1", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6225 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.1/6219ec01-4b6a-4acd-aee1-96c3e8e48643.json b/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.1/6219ec01-4b6a-4acd-aee1-96c3e8e48643.json new file mode 100644 index 000000000..143abb8e4 --- /dev/null +++ b/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.1/6219ec01-4b6a-4acd-aee1-96c3e8e48643.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-1b-continued-v2.1", + "id": "KingNish/qwen-1b-continued-v2.1", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3042 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.2/5c323d7c-25cd-4718-8a1f-54d986cadaf2.json b/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.2/5c323d7c-25cd-4718-8a1f-54d986cadaf2.json new file mode 100644 index 000000000..04f12a579 --- /dev/null +++ b/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2.2/5c323d7c-25cd-4718-8a1f-54d986cadaf2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-1b-continued-v2.2", + "id": "KingNish/qwen-1b-continued-v2.2", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3059 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2/adfab21a-941b-4efc-8b63-fdfb3074ba9b.json b/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2/adfab21a-941b-4efc-8b63-fdfb3074ba9b.json new file mode 100644 index 000000000..7b2c2ff2f --- /dev/null +++ b/data/hfopenllm_v2/KingNish/qwen-1b-continued-v2/adfab21a-941b-4efc-8b63-fdfb3074ba9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-1b-continued-v2", + "id": "KingNish/qwen-1b-continued-v2", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/qwen-1b-continued/350d00a4-7501-4130-a069-323530bc9729.json b/data/hfopenllm_v2/KingNish/qwen-1b-continued/350d00a4-7501-4130-a069-323530bc9729.json new file mode 100644 index 000000000..7d5687eaf --- /dev/null +++ b/data/hfopenllm_v2/KingNish/qwen-1b-continued/350d00a4-7501-4130-a069-323530bc9729.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-1b-continued", + "id": "KingNish/qwen-1b-continued", + "developer": "KingNish", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1255 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2991 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json b/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json deleted file mode 100644 index 70200399c..000000000 --- a/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kquant03_CognitiveFusion2-4x7B-BF16/1762652579.701032", - "retrieved_timestamp": "1762652579.7010329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kquant03/CognitiveFusion2-4x7B-BF16", - "developer": "Kquant03", - "inference_platform": "unknown", - "id": "Kquant03/CognitiveFusion2-4x7B-BF16", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35665700341759865 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41078286111483786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27925531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/ea809d28-178e-4a0b-ab5a-34739077c5ff.json b/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/ea809d28-178e-4a0b-ab5a-34739077c5ff.json new file mode 100644 index 000000000..49d973710 --- /dev/null +++ b/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/ea809d28-178e-4a0b-ab5a-34739077c5ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kquant03_CognitiveFusion2-4x7B-BF16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CognitiveFusion2-4x7B-BF16", + "id": "Kquant03/CognitiveFusion2-4x7B-BF16", + "developer": "Kquant03", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4108 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2793 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/243d5ccd-58f3-4da5-8718-553f3f456490.json b/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/243d5ccd-58f3-4da5-8718-553f3f456490.json new file mode 100644 index 000000000..408f75c77 --- /dev/null +++ b/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/243d5ccd-58f3-4da5-8718-553f3f456490.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kquant03_L3-Pneuma-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Pneuma-8B", + "id": "Kquant03/L3-Pneuma-8B", + "developer": "Kquant03", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json b/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json deleted file mode 100644 index 89e4c6447..000000000 --- a/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kquant03_L3-Pneuma-8B/1762652579.701272", - "retrieved_timestamp": "1762652579.7012732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kquant03/L3-Pneuma-8B", - "developer": "Kquant03", - "inference_platform": "unknown", - "id": "Kquant03/L3-Pneuma-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2374056392593873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49550433176754827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41715624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31840093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Krystalan/DRT-o1-14B/a45537a7-76a6-4855-b83b-abe965f13460.json b/data/hfopenllm_v2/Krystalan/DRT-o1-14B/a45537a7-76a6-4855-b83b-abe965f13460.json new file mode 100644 index 000000000..970337ee9 --- /dev/null +++ b/data/hfopenllm_v2/Krystalan/DRT-o1-14B/a45537a7-76a6-4855-b83b-abe965f13460.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DRT-o1-14B", + "id": "Krystalan/DRT-o1-14B", + "developer": "Krystalan", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6379 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4826 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json b/data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json deleted file mode 100644 index 3c0303eb4..000000000 --- a/data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-14B/1762652579.70148", - "retrieved_timestamp": "1762652579.7014809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Krystalan/DRT-o1-14B", - "developer": "Krystalan", - "inference_platform": "unknown", - "id": "Krystalan/DRT-o1-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067662690549963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.637927537514229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4826283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47951041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5178690159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Krystalan/DRT-o1-7B/9be911b6-b9f4-47b1-849d-62eb20c9e944.json b/data/hfopenllm_v2/Krystalan/DRT-o1-7B/9be911b6-b9f4-47b1-849d-62eb20c9e944.json new file mode 100644 index 000000000..e93c9a10d --- /dev/null +++ b/data/hfopenllm_v2/Krystalan/DRT-o1-7B/9be911b6-b9f4-47b1-849d-62eb20c9e944.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DRT-o1-7B", + "id": "Krystalan/DRT-o1-7B", + "developer": "Krystalan", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json b/data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json deleted file mode 100644 index cd3be8819..000000000 --- a/data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-7B/1762652579.701715", - "retrieved_timestamp": "1762652579.701716", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Krystalan/DRT-o1-7B", - "developer": "Krystalan", - "inference_platform": "unknown", - "id": "Krystalan/DRT-o1-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3928276971768242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5467693339610741 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4478851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50865625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41514295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/33d7d5f0-cbee-4a26-b5e8-48bdd12492cf.json b/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/33d7d5f0-cbee-4a26-b5e8-48bdd12492cf.json new file mode 100644 index 000000000..0cf70abae --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/33d7d5f0-cbee-4a26-b5e8-48bdd12492cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralExperiment-7b-MagicCoder-v7.5", + "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4553 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2824 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json b/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json deleted file mode 100644 index bdfde41b6..000000000 --- a/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/1762652579.701928", - "retrieved_timestamp": "1762652579.7019289", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4552509563513699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988446544778517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4281979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824135638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/4355fbdd-ac72-4f26-8e07-b7e8d774d238.json b/data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/4355fbdd-ac72-4f26-8e07-b7e8d774d238.json new file mode 100644 index 000000000..cdfa1a3cb --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/4355fbdd-ac72-4f26-8e07-b7e8d774d238.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralLLaMa-3-8b-DT-v0.1", + "id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/4bffc633-e20c-4874-b7db-d1b7dabb8070.json b/data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/4bffc633-e20c-4874-b7db-d1b7dabb8070.json new file mode 100644 index 000000000..76e148b2f --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/4bffc633-e20c-4874-b7db-d1b7dabb8070.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralLLaMa-3-8b-ORPO-v0.3", + "id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5276 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/2d5c844d-d950-4254-bac2-0a986659c541.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/2d5c844d-d950-4254-bac2-0a986659c541.json new file mode 100644 index 000000000..79df21f7a --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/2d5c844d-d950-4254-bac2-0a986659c541.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralSynthesis-7B-v0.1", + "id": "Kukedlc/NeuralSynthesis-7B-v0.1", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5145 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3049 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json deleted file mode 100644 index d147ab4e6..000000000 --- a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.1/1762652579.7026482", - "retrieved_timestamp": "1762652579.702649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/NeuralSynthesis-7B-v0.1", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralSynthesis-7B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184563624516283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5144745481048844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304936835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json deleted file mode 100644 index 4205af851..000000000 --- a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.3/1762652579.702864", - "retrieved_timestamp": "1762652579.702865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/NeuralSynthesis-7B-v0.3", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralSynthesis-7B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078400865259733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5138078814382175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30501994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/f6e74b3c-9ee4-40c3-bf92-35d965503a04.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/f6e74b3c-9ee4-40c3-bf92-35d965503a04.json new file mode 100644 index 000000000..869b589d4 --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/f6e74b3c-9ee4-40c3-bf92-35d965503a04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralSynthesis-7B-v0.3", + "id": "Kukedlc/NeuralSynthesis-7B-v0.3", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5138 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json deleted file mode 100644 index e109a69d9..000000000 --- a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/1762652579.7030761", - "retrieved_timestamp": "1762652579.703077", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3947259936967247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142932549151301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43324999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/8f1d2600-7347-48b8-9759-11570598459d.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/8f1d2600-7347-48b8-9759-11570598459d.json new file mode 100644 index 000000000..950dab5d0 --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/8f1d2600-7347-48b8-9759-11570598459d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralSynthesis-7b-v0.4-slerp", + "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3043 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/cd653bfd-2c06-4224-aeeb-bf591995a69e.json b/data/hfopenllm_v2/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/cd653bfd-2c06-4224-aeeb-bf591995a69e.json new file mode 100644 index 000000000..21324748e --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/cd653bfd-2c06-4224-aeeb-bf591995a69e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7b-Spanish-o1-CoT", + "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", + "developer": "Kukedlc", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4777 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json b/data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json deleted file mode 100644 index cf9069419..000000000 --- a/data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kumar955_Hemanth-llm/1762652579.703545", - "retrieved_timestamp": "1762652579.703546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kumar955/Hemanth-llm", - "developer": "Kumar955", - "inference_platform": "unknown", - "id": "Kumar955/Hemanth-llm", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045102550122564 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522494907014536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4485625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3112533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Kumar955/Hemanth-llm/cdf1fcc7-429d-44bd-b76c-d26ee743f6fe.json b/data/hfopenllm_v2/Kumar955/Hemanth-llm/cdf1fcc7-429d-44bd-b76c-d26ee743f6fe.json new file mode 100644 index 000000000..c93b19301 --- /dev/null +++ b/data/hfopenllm_v2/Kumar955/Hemanth-llm/cdf1fcc7-429d-44bd-b76c-d26ee743f6fe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Kumar955_Hemanth-llm/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hemanth-llm", + "id": "Kumar955/Hemanth-llm", + "developer": "Kumar955", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json b/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json deleted file mode 100644 index 90a59227d..000000000 --- a/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/1762652579.703805", - "retrieved_timestamp": "1762652579.703806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", - "developer": "L-RAGE", - "inference_platform": "unknown", - "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27422572108671656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.422793974567173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29247007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/4828bd36-5453-4383-8985-08d04a7ebecd.json b/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/4828bd36-5453-4383-8985-08d04a7ebecd.json new file mode 100644 index 000000000..22101047a --- /dev/null +++ b/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/4828bd36-5453-4383-8985-08d04a7ebecd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3_PRYMMAL-ECE-7B-SLERP-V1", + "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", + "developer": "L-RAGE", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3841 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2925 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki100p/4c2baa59-c2f1-4779-9d21-1f69c0821968.json b/data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki100p/4c2baa59-c2f1-4779-9d21-1f69c0821968.json new file mode 100644 index 000000000..6b1aef1c0 --- /dev/null +++ b/data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki100p/4c2baa59-c2f1-4779-9d21-1f69c0821968.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki100p/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-2-7b-hf-lora-oki100p", + "id": "LEESM/llama-2-7b-hf-lora-oki100p", + "developer": "LEESM", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2513 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1856 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki10p/555c1079-c4d0-4b9e-9d2d-769e7ba32429.json b/data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki10p/555c1079-c4d0-4b9e-9d2d-769e7ba32429.json new file mode 100644 index 000000000..a80706c7c --- /dev/null +++ b/data/hfopenllm_v2/LEESM/llama-2-7b-hf-lora-oki10p/555c1079-c4d0-4b9e-9d2d-769e7ba32429.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki10p/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-2-7b-hf-lora-oki10p", + "id": "LEESM/llama-2-7b-hf-lora-oki10p", + "developer": "LEESM", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.227 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LEESM/llama-3-8b-bnb-4b-kowiki231101/58a4a1c6-0ee4-4524-9ca1-b40870f1d600.json b/data/hfopenllm_v2/LEESM/llama-3-8b-bnb-4b-kowiki231101/58a4a1c6-0ee4-4524-9ca1-b40870f1d600.json new file mode 100644 index 000000000..87aab7f15 --- /dev/null +++ b/data/hfopenllm_v2/LEESM/llama-3-8b-bnb-4b-kowiki231101/58a4a1c6-0ee4-4524-9ca1-b40870f1d600.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LEESM_llama-3-8b-bnb-4b-kowiki231101/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-bnb-4b-kowiki231101", + "id": "LEESM/llama-3-8b-bnb-4b-kowiki231101", + "developer": "LEESM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2425 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/eea2a38a-4f1b-48d0-894c-09974894f264.json b/data/hfopenllm_v2/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/eea2a38a-4f1b-48d0-894c-09974894f264.json new file mode 100644 index 000000000..65d39674d --- /dev/null +++ b/data/hfopenllm_v2/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/eea2a38a-4f1b-48d0-894c-09974894f264.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Korean-Bllossom-8B-trexlab-oki10p", + "id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", + "developer": "LEESM", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3177 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/3d8063ab-0ad5-43e4-83ff-90b46dee766f.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/3d8063ab-0ad5-43e4-83ff-90b46dee766f.json new file mode 100644 index 000000000..53d2d42f2 --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/3d8063ab-0ad5-43e4-83ff-90b46dee766f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EXAONE-3.0-7.8B-Instruct", + "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "ExaoneForCausalLM", + "params_billions": 7.8 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7193 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3577 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json deleted file mode 100644 index 366806516..000000000 --- a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/1762652579.705025", - "retrieved_timestamp": "1762652579.705025", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "ExaoneForCausalLM", - "params_billions": 7.8 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7192826145737754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4174432647784512 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30438066465256797 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35771276595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/da5e0284-7c44-42d4-a110-a23880de277f.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/da5e0284-7c44-42d4-a110-a23880de277f.json new file mode 100644 index 000000000..ae4eb863d --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/da5e0284-7c44-42d4-a110-a23880de277f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EXAONE-3.5-2.4B-Instruct", + "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "ExaoneForCausalLM", + "params_billions": 2.405 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json deleted file mode 100644 index 2325e6d6d..000000000 --- a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/1762652579.705282", - "retrieved_timestamp": "1762652579.7052832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "ExaoneForCausalLM", - "params_billions": 2.405 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7950449252428002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4092347113723405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32804188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json deleted file mode 100644 index f2196526f..000000000 --- a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/1762652579.705488", - "retrieved_timestamp": "1762652579.705489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "ExaoneForCausalLM", - "params_billions": 32.003 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8391833668000904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5760913742720142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38066666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636801861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/bef017bb-47b1-48e4-93c4-3b222a16af7a.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/bef017bb-47b1-48e4-93c4-3b222a16af7a.json new file mode 100644 index 000000000..5bbd9c37d --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/bef017bb-47b1-48e4-93c4-3b222a16af7a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EXAONE-3.5-32B-Instruct", + "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "ExaoneForCausalLM", + "params_billions": 32.003 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8392 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5761 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/401c83b0-b7d2-4987-9e46-f127fdbb595f.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/401c83b0-b7d2-4987-9e46-f127fdbb595f.json new file mode 100644 index 000000000..48cb86a10 --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/401c83b0-b7d2-4987-9e46-f127fdbb595f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EXAONE-3.5-7.8B-Instruct", + "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "ExaoneForCausalLM", + "params_billions": 7.818 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4751 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json deleted file mode 100644 index 02e460cbc..000000000 --- a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/1762652579.705873", - "retrieved_timestamp": "1762652579.705875", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "ExaoneForCausalLM", - "params_billions": 7.818 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8136045692096969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727592304359862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4133144946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/LLM360/K2-Chat/c6fde59b-73ed-4179-a907-076be068b262.json b/data/hfopenllm_v2/LLM360/K2-Chat/c6fde59b-73ed-4179-a907-076be068b262.json new file mode 100644 index 000000000..6ec8b2598 --- /dev/null +++ b/data/hfopenllm_v2/LLM360/K2-Chat/c6fde59b-73ed-4179-a907-076be068b262.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LLM360_K2-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "K2-Chat", + "id": "LLM360/K2-Chat", + "developer": "LLM360", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 65.286 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5152 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5358 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.457 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json b/data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json deleted file mode 100644 index 8f3008c80..000000000 --- a/data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LLM360_K2-Chat/1762652579.706591", - "retrieved_timestamp": "1762652579.706592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LLM360/K2-Chat", - "developer": "LLM360", - "inference_platform": "unknown", - "id": "LLM360/K2-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 65.286 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151763986223221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5358099630242067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.457 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371010638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json b/data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json deleted file mode 100644 index f64e69e5c..000000000 --- a/data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LLM360_K2/1762652579.706215", - "retrieved_timestamp": "1762652579.7062159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LLM360/K2", - "developer": "LLM360", - "inference_platform": "unknown", - "id": "LLM360/K2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 65.286 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2252157608478836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4971835676523677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39799999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/LLM360/K2/90997fea-6c67-493e-bd8e-5327cfb33ea4.json b/data/hfopenllm_v2/LLM360/K2/90997fea-6c67-493e-bd8e-5327cfb33ea4.json new file mode 100644 index 000000000..c8a7cd176 --- /dev/null +++ b/data/hfopenllm_v2/LLM360/K2/90997fea-6c67-493e-bd8e-5327cfb33ea4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LLM360_K2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "K2", + "id": "LLM360/K2", + "developer": "LLM360", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 65.286 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2252 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4972 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/08957d63-7462-44ff-9dd8-060a5801a31b.json b/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/08957d63-7462-44ff-9dd8-060a5801a31b.json new file mode 100644 index 000000000..85cc8161c --- /dev/null +++ b/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/08957d63-7462-44ff-9dd8-060a5801a31b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LLM4Binary_llm4decompile-1.3b-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llm4decompile-1.3b-v2", + "id": "LLM4Binary/llm4decompile-1.3b-v2", + "developer": "LLM4Binary", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.346 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2268 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4072 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1209 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json b/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json deleted file mode 100644 index 76ef11f2a..000000000 --- a/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LLM4Binary_llm4decompile-1.3b-v2/1762652579.7068748", - "retrieved_timestamp": "1762652579.706877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LLM4Binary/llm4decompile-1.3b-v2", - "developer": "LLM4Binary", - "inference_platform": "unknown", - "id": "LLM4Binary/llm4decompile-1.3b-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.346 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22678936333373229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271808417267589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4071770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12092752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/a434f569-e7d6-4464-afa8-6104be43fa06.json b/data/hfopenllm_v2/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/a434f569-e7d6-4464-afa8-6104be43fa06.json new file mode 100644 index 000000000..105590f52 --- /dev/null +++ b/data/hfopenllm_v2/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/a434f569-e7d6-4464-afa8-6104be43fa06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-reinstruct-alternate-lumen-14B", + "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", + "developer": "Lambent", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4794 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6459 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.477 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json b/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json deleted file mode 100644 index 8ff900bbf..000000000 --- a/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Langboat_Mengzi3-8B-Chat/1762652579.707526", - "retrieved_timestamp": "1762652579.707527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Langboat/Mengzi3-8B-Chat", - "developer": "Langboat", - "inference_platform": "unknown", - "id": "Langboat/Mengzi3-8B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513977357854936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4683725003203179 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31416223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/e32ed251-e817-409f-b4c3-8f168f1ff822.json b/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/e32ed251-e817-409f-b4c3-8f168f1ff822.json new file mode 100644 index 000000000..1c7e4d12f --- /dev/null +++ b/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/e32ed251-e817-409f-b4c3-8f168f1ff822.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Langboat_Mengzi3-8B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mengzi3-8B-Chat", + "id": "Langboat/Mengzi3-8B-Chat", + "developer": "Langboat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.514 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4684 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBA100/1d9a65a3-d2bb-48a7-8a00-8e4a79c36db2.json b/data/hfopenllm_v2/Lawnakk/BBA100/1d9a65a3-d2bb-48a7-8a00-8e4a79c36db2.json new file mode 100644 index 000000000..9cbc0affe --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBA100/1d9a65a3-d2bb-48a7-8a00-8e4a79c36db2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBA100/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBA100", + "id": "Lawnakk/BBA100", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2826 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json b/data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json deleted file mode 100644 index 8fef4d078..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBA100/1762652579.707814", - "retrieved_timestamp": "1762652579.707815", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBA100", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBA100", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2075803312987318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2825701502983552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.0/608398da-ae2a-4be2-aaf9-6ec8899aa63d.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.0/608398da-ae2a-4be2-aaf9-6ec8899aa63d.json new file mode 100644 index 000000000..11de02d19 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.0/608398da-ae2a-4be2-aaf9-6ec8899aa63d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.0", + "id": "Lawnakk/BBALAW1.0", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.353 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1351 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3526 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json deleted file mode 100644 index d69606f01..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.0/1762652579.708328", - "retrieved_timestamp": "1762652579.708329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.0", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.353 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13511482865463637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28276697965906106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3525729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.2/80e04641-be7d-4351-a4f6-1318981ef834.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.2/80e04641-be7d-4351-a4f6-1318981ef834.json new file mode 100644 index 000000000..0b4de2bab --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.2/80e04641-be7d-4351-a4f6-1318981ef834.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.2", + "id": "Lawnakk/BBALAW1.2", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.353 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2811 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json deleted file mode 100644 index bfcb9f421..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.2/1762652579.708597", - "retrieved_timestamp": "1762652579.708598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.2", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.353 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13543952268868825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28112730419661675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35790625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11053856382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json deleted file mode 100644 index 63a71eebf..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.3/1762652579.70884", - "retrieved_timestamp": "1762652579.7088408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.3", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.353 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13543952268868825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28269808045232453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.3/e74222c6-636c-4075-8d4d-30c73fa70fda.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.3/e74222c6-636c-4075-8d4d-30c73fa70fda.json new file mode 100644 index 000000000..a9204af31 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.3/e74222c6-636c-4075-8d4d-30c73fa70fda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.3", + "id": "Lawnakk/BBALAW1.3", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.353 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json deleted file mode 100644 index 1cc37b498..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.6/1762652579.7090619", - "retrieved_timestamp": "1762652579.7090628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.6", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245437660961804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555356284691385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43684375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45071476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.6/aed80361-9304-44a0-934a-52976d7f1bf3.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.6/aed80361-9304-44a0-934a-52976d7f1bf3.json new file mode 100644 index 000000000..02391ea7b --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.6/aed80361-9304-44a0-934a-52976d7f1bf3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.6", + "id": "Lawnakk/BBALAW1.6", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.61/709bd280-b03e-4908-808f-34566bc968f4.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.61/709bd280-b03e-4908-808f-34566bc968f4.json new file mode 100644 index 000000000..d518940c4 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.61/709bd280-b03e-4908-808f-34566bc968f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.61/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.61", + "id": "Lawnakk/BBALAW1.61", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4471 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json deleted file mode 100644 index 3e9eac2eb..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.61/1762652579.709277", - "retrieved_timestamp": "1762652579.7092779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.61", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.61", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5771253607095839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548582474785428 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36631419939577037 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4470578457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json deleted file mode 100644 index 24598dda3..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.62/1762652579.709492", - "retrieved_timestamp": "1762652579.709493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.62", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.62", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046099903810778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5580519941056026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45445478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.62/66c495b3-4b09-42ad-b742-4d753c3bde7a.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.62/66c495b3-4b09-42ad-b742-4d753c3bde7a.json new file mode 100644 index 000000000..0ca80591d --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.62/66c495b3-4b09-42ad-b742-4d753c3bde7a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.62/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.62", + "id": "Lawnakk/BBALAW1.62", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5581 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2825 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json deleted file mode 100644 index d60e0d31d..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.63/1762652579.709696", - "retrieved_timestamp": "1762652579.709697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.63", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.63", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44073835201709244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540633758841665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4470578457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.63/e24f7be6-3051-4990-8b93-121aec5402eb.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.63/e24f7be6-3051-4990-8b93-121aec5402eb.json new file mode 100644 index 000000000..cb01d3410 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.63/e24f7be6-3051-4990-8b93-121aec5402eb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.63/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.63", + "id": "Lawnakk/BBALAW1.63", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5541 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4471 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.64/0321571b-4246-4490-bd6c-7b106eb8e15a.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.64/0321571b-4246-4490-bd6c-7b106eb8e15a.json new file mode 100644 index 000000000..227047148 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.64/0321571b-4246-4490-bd6c-7b106eb8e15a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.64/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1.64", + "id": "Lawnakk/BBALAW1.64", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2779 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json deleted file mode 100644 index 45f2c3e2b..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.64/1762652579.709901", - "retrieved_timestamp": "1762652579.709902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.64", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.64", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13946107439371977 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27790701865141654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11153590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1/54dbf947-ab18-40dd-9cd7-a496289b2e72.json b/data/hfopenllm_v2/Lawnakk/BBALAW1/54dbf947-ab18-40dd-9cd7-a496289b2e72.json new file mode 100644 index 000000000..f7c7740c8 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1/54dbf947-ab18-40dd-9cd7-a496289b2e72.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBALAW1", + "id": "Lawnakk/BBALAW1", + "developer": "Lawnakk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2872 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4153 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json b/data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json deleted file mode 100644 index 7938844e6..000000000 --- a/data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1/1762652579.708089", - "retrieved_timestamp": "1762652579.70809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lawnakk/BBALAW1", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19054442213327305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28723681696502185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4152708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/LenguajeNaturalAI/leniachat-gemma-2b-v0/d841e204-ed6a-439d-8408-d5cfb3b38dae.json b/data/hfopenllm_v2/LenguajeNaturalAI/leniachat-gemma-2b-v0/d841e204-ed6a-439d-8408-d5cfb3b38dae.json new file mode 100644 index 000000000..6394ebbd7 --- /dev/null +++ b/data/hfopenllm_v2/LenguajeNaturalAI/leniachat-gemma-2b-v0/d841e204-ed6a-439d-8408-d5cfb3b38dae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-gemma-2b-v0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "leniachat-gemma-2b-v0", + "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0", + "developer": "LenguajeNaturalAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3074 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3659 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/96b57891-83e3-4948-ad48-64a2a370e166.json b/data/hfopenllm_v2/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/96b57891-83e3-4948-ad48-64a2a370e166.json new file mode 100644 index 000000000..1f7ec34a2 --- /dev/null +++ b/data/hfopenllm_v2/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/96b57891-83e3-4948-ad48-64a2a370e166.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "leniachat-qwen2-1.5B-v0", + "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", + "developer": "LenguajeNaturalAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2221 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.188 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/30301818-6dad-45f9-acfb-a68ccc7c0609.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/30301818-6dad-45f9-acfb-a68ccc7c0609.json new file mode 100644 index 000000000..009c8048b --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/30301818-6dad-45f9-acfb-a68ccc7c0609.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_A/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CheckPoint_A", + "id": "LeroyDyer/CheckPoint_A", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4513 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4748 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json deleted file mode 100644 index b00336018..000000000 --- a/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_A/1762652579.714355", - "retrieved_timestamp": "1762652579.714355", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_A", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_A", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45127927233074905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4747699745968042 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28798204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json deleted file mode 100644 index a4c8589b1..000000000 --- a/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_B/1762652579.7146208", - "retrieved_timestamp": "1762652579.714622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_B", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4439852923576111 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47799475378324896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29072473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/50743107-30de-4c5d-bf83-cc003af8a5db.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/50743107-30de-4c5d-bf83-cc003af8a5db.json new file mode 100644 index 000000000..c87445cfa --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/50743107-30de-4c5d-bf83-cc003af8a5db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CheckPoint_B", + "id": "LeroyDyer/CheckPoint_B", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.444 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3898 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2907 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/625ee1b3-e0a1-4a86-83a4-6e66b380f864.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/625ee1b3-e0a1-4a86-83a4-6e66b380f864.json new file mode 100644 index 000000000..27af86b1d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/625ee1b3-e0a1-4a86-83a4-6e66b380f864.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_C/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CheckPoint_C", + "id": "LeroyDyer/CheckPoint_C", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3021 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json deleted file mode 100644 index 801c921e6..000000000 --- a/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_C/1762652579.714836", - "retrieved_timestamp": "1762652579.714837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_C", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_C", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34768968558979063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45864215446207585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4346145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30211103723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json deleted file mode 100644 index 9c62a0f94..000000000 --- a/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_R1/1762652579.715039", - "retrieved_timestamp": "1762652579.71504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_R1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17278376928771216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225419506658359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22049534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/89fda762-1989-4850-837c-f79ef538c58c.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/89fda762-1989-4850-837c-f79ef538c58c.json new file mode 100644 index 000000000..5131c24dd --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/89fda762-1989-4850-837c-f79ef538c58c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CheckPoint_R1", + "id": "LeroyDyer/CheckPoint_R1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1728 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/1de1f906-0e36-4f79-b159-16ef8ee33ab3.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/1de1f906-0e36-4f79-b159-16ef8ee33ab3.json new file mode 100644 index 000000000..f13e5d9d1 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/1de1f906-0e36-4f79-b159-16ef8ee33ab3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LCARS_AI_001", + "id": "LeroyDyer/LCARS_AI_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3109 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4384 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.267 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json deleted file mode 100644 index c84073928..000000000 --- a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_001/1762652579.7152472", - "retrieved_timestamp": "1762652579.715248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/LCARS_AI_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_AI_001", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31094495937445976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42578875825590146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43836458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2670378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/d8588222-9e4b-47c1-9f86-92f47c9c8e38.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/d8588222-9e4b-47c1-9f86-92f47c9c8e38.json new file mode 100644 index 000000000..a513ad18e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/d8588222-9e4b-47c1-9f86-92f47c9c8e38.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_1x4_003_SuperAI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LCARS_AI_1x4_003_SuperAI", + "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4506 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2972 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json deleted file mode 100644 index 5aa81e6c1..000000000 --- a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_1x4_003_SuperAI/1762652579.7154438", - "retrieved_timestamp": "1762652579.715445", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111251479407973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49198503573704794 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4506145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29720744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/15e6e6e6-39fa-424f-ba12-5f209cd4b2cc.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/15e6e6e6-39fa-424f-ba12-5f209cd4b2cc.json new file mode 100644 index 000000000..c31ece219 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/15e6e6e6-39fa-424f-ba12-5f209cd4b2cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_StarTrek_Computer/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LCARS_AI_StarTrek_Computer", + "id": "LeroyDyer/LCARS_AI_StarTrek_Computer", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json deleted file mode 100644 index 578fb254f..000000000 --- a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_StarTrek_Computer/1762652579.7157388", - "retrieved_timestamp": "1762652579.715741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/LCARS_AI_StarTrek_Computer", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_AI_StarTrek_Computer", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35825609383103496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4446191188748297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24584441489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json b/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json deleted file mode 100644 index e88a29783..000000000 --- a/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_TOP_SCORE/1762652579.716028", - "retrieved_timestamp": "1762652579.716029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/LCARS_TOP_SCORE", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_TOP_SCORE", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706587410293574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127371051825098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031083776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/81225b85-1523-49c1-b770-897112d2e6ae.json b/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/81225b85-1523-49c1-b770-897112d2e6ae.json new file mode 100644 index 000000000..6e727e47a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/81225b85-1523-49c1-b770-897112d2e6ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_TOP_SCORE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LCARS_TOP_SCORE", + "id": "LeroyDyer/LCARS_TOP_SCORE", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5127 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3031 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/254deaf7-a253-4d41-a10d-1143f86b288c.json b/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/254deaf7-a253-4d41-a10d-1143f86b288c.json new file mode 100644 index 000000000..3ffc415fc --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/254deaf7-a253-4d41-a10d-1143f86b288c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_Mixtral_AI_SwahiliTron_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral_AI_SwahiliTron_7b", + "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1534 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3055 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json b/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json deleted file mode 100644 index e8d73b227..000000000 --- a/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_Mixtral_AI_SwahiliTron_7b/1762652579.716297", - "retrieved_timestamp": "1762652579.716299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1533996462718919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3055092453201354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12076130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json deleted file mode 100644 index 93897c70e..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI/1762652579.7166212", - "retrieved_timestamp": "1762652579.716622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWebAI_Human_AGI", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWebAI_Human_AGI", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3388221031308041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3374862127508733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39663541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1478557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/ba0b66f5-724a-4a6b-ac20-a36d530a8b4b.json b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/ba0b66f5-724a-4a6b-ac20-a36d530a8b4b.json new file mode 100644 index 000000000..93ca501b0 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/ba0b66f5-724a-4a6b-ac20-a36d530a8b4b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWebAI_Human_AGI", + "id": "LeroyDyer/SpydazWebAI_Human_AGI", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3375 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1479 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json deleted file mode 100644 index 3c4236180..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI_001/1762652579.716855", - "retrieved_timestamp": "1762652579.716856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWebAI_Human_AGI_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWebAI_Human_AGI_001", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31181930610779396 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3433421938604874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14261968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/eed0b3b4-e277-49ee-aed5-f3599b2d5653.json b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/eed0b3b4-e277-49ee-aed5-f3599b2d5653.json new file mode 100644 index 000000000..03d018a11 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/eed0b3b4-e277-49ee-aed5-f3599b2d5653.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI_001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWebAI_Human_AGI_001", + "id": "LeroyDyer/SpydazWebAI_Human_AGI_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3118 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3433 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1426 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/96a21b6e-ed47-40fb-85cd-15924330e60d.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/96a21b6e-ed47-40fb-85cd-15924330e60d.json new file mode 100644 index 000000000..faca52fe7 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/96a21b6e-ed47-40fb-85cd-15924330e60d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_CyberTron_Ultra_7b", + "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4811 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json deleted file mode 100644 index d7b1e2b5a..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/1762652579.71707", - "retrieved_timestamp": "1762652579.717071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15557276914143361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48107736108561827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2865691489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json deleted file mode 100644 index 3e54b39ec..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/1762652579.71728", - "retrieved_timestamp": "1762652579.717281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39395138233221183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4888172059118469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/f41f5471-6384-4510-85d2-41f236082583.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/f41f5471-6384-4510-85d2-41f236082583.json new file mode 100644 index 000000000..0b361a56c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/f41f5471-6384-4510-85d2-41f236082583.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAGI_001_M2", + "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.394 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4888 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3005 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/2728eccc-525f-4350-901b-dbc352c78014.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/2728eccc-525f-4350-901b-dbc352c78014.json new file mode 100644 index 000000000..0eb676741 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/2728eccc-525f-4350-901b-dbc352c78014.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAGI_002", + "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4088 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4865 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3059 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json deleted file mode 100644 index 9223cce85..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_002/1762652579.71767", - "retrieved_timestamp": "1762652579.7176719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40876430094371824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043871825389313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48648958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3058510638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/3e7ae935-46c3-427c-8713-41c659c1828a.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/3e7ae935-46c3-427c-8713-41c659c1828a.json new file mode 100644 index 000000000..f6494cb5b --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/3e7ae935-46c3-427c-8713-41c659c1828a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_001", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2252 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3344 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1271 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json deleted file mode 100644 index 5d943268c..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_001/1762652579.717986", - "retrieved_timestamp": "1762652579.717987", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22516589316347294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33440360243051986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1270777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/66782676-c942-4aff-b754-b96cd96cf1f9.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/66782676-c942-4aff-b754-b96cd96cf1f9.json new file mode 100644 index 000000000..9408883d4 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/66782676-c942-4aff-b754-b96cd96cf1f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_006/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_006", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.143 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1135 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json deleted file mode 100644 index 125a57771..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_006/1762652579.718229", - "retrieved_timestamp": "1762652579.71823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_006", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14300832901146734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301800420981355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json deleted file mode 100644 index 5441e16d6..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_007/1762652579.718461", - "retrieved_timestamp": "1762652579.718461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_007", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351751131442351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3415665794743605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40962499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13522273936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/941a9e27-2ac4-4dab-a6d0-cb9319c79a27.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/941a9e27-2ac4-4dab-a6d0-cb9319c79a27.json new file mode 100644 index 000000000..fd4a7fc65 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/941a9e27-2ac4-4dab-a6d0-cb9319c79a27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_007/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_007", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json deleted file mode 100644 index bcb8f07eb..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/1762652579.718692", - "retrieved_timestamp": "1762652579.718693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2973310815303395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3306728717792965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1432845744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/caf93f75-530e-4f4d-9cc0-2cf9b0a7f2ff.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/caf93f75-530e-4f4d-9cc0-2cf9b0a7f2ff.json new file mode 100644 index 000000000..67f16d3b2 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/caf93f75-530e-4f4d-9cc0-2cf9b0a7f2ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_009_CHAT", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2973 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3307 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1433 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json deleted file mode 100644 index ade547392..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/1762652579.7189271", - "retrieved_timestamp": "1762652579.7189288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2506948230694557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33363164762455844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41371874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14303523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/d3ca0458-ee97-4a4c-a6a9-066880ffefb5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/d3ca0458-ee97-4a4c-a6a9-066880ffefb5.json new file mode 100644 index 000000000..3fc4a754a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/d3ca0458-ee97-4a4c-a6a9-066880ffefb5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_010_CHAT", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3336 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4137 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/615bf89b-9357-46f4-82ed-f49b0021da01.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/615bf89b-9357-46f4-82ed-f49b0021da01.json new file mode 100644 index 000000000..877f891f4 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/615bf89b-9357-46f4-82ed-f49b0021da01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_011_INSTRUCT", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3149 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json deleted file mode 100644 index def47f5f4..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/1762652579.719242", - "retrieved_timestamp": "1762652579.719243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148667757106699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3522609512356862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/06398630-23ad-4000-8ea2-fcca230568d7.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/06398630-23ad-4000-8ea2-fcca230568d7.json new file mode 100644 index 000000000..5534cf616 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/06398630-23ad-4000-8ea2-fcca230568d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2019 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json deleted file mode 100644 index 17444d216..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/1762652579.719551", - "retrieved_timestamp": "1762652579.719552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37524213531208306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39840187861283577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42391666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2018783244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json deleted file mode 100644 index 37f7f0c71..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/1762652579.7198021", - "retrieved_timestamp": "1762652579.7198029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4049677079039171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48583341042911066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2956283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/bdfa30f8-da0f-418f-adaf-caafda4c81a5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/bdfa30f8-da0f-418f-adaf-caafda4c81a5.json new file mode 100644 index 000000000..9e0ca9e6d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/bdfa30f8-da0f-418f-adaf-caafda4c81a5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.405 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4858 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3921 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2956 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json deleted file mode 100644 index 7c50d7aa9..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720226", - "retrieved_timestamp": "1762652579.720227", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30664858131978706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45768864760562744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23179853723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json deleted file mode 100644 index 290912f51..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720018", - "retrieved_timestamp": "1762652579.7200189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30355124403250044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4575107149412439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42534374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23287898936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bd5e550c-5355-4e01-bafc-2ca89899253a.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bd5e550c-5355-4e01-bafc-2ca89899253a.json new file mode 100644 index 000000000..05508c5e5 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bd5e550c-5355-4e01-bafc-2ca89899253a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4577 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2318 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/f842ad5b-24f0-419b-9d65-5a6ff1f5e04b.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/f842ad5b-24f0-419b-9d65-5a6ff1f5e04b.json new file mode 100644 index 000000000..6bafa117f --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/f842ad5b-24f0-419b-9d65-5a6ff1f5e04b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3036 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/3a09590f-28f3-4161-8a93-d42cec62aa90.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/3a09590f-28f3-4161-8a93-d42cec62aa90.json new file mode 100644 index 000000000..fad8789d2 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/3a09590f-28f3-4161-8a93-d42cec62aa90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1107 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json deleted file mode 100644 index b5b76c0a1..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/1762652579.7204201", - "retrieved_timestamp": "1762652579.720421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3065987136353764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158421938604874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34438541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json deleted file mode 100644 index 8a03bab0f..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.72064", - "retrieved_timestamp": "1762652579.7206411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35788153211257245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476544560399054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23761635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0f6b76ca-c4b8-40b2-a3af-2ea1c3650933.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0f6b76ca-c4b8-40b2-a3af-2ea1c3650933.json new file mode 100644 index 000000000..33826b74e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0f6b76ca-c4b8-40b2-a3af-2ea1c3650933.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4477 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4134 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json deleted file mode 100644 index cba8c5e62..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.720855", - "retrieved_timestamp": "1762652579.720855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37976347203198624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44827466097749213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2388630319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/f276ad54-4e3b-4718-ae1f-0479565e4565.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/f276ad54-4e3b-4718-ae1f-0479565e4565.json new file mode 100644 index 000000000..4d6c638c9 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/f276ad54-4e3b-4718-ae1f-0479565e4565.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3798 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4483 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json deleted file mode 100644 index 0fb38b40e..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_RP/1762652579.721039", - "retrieved_timestamp": "1762652579.7210398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2541168543907942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33230179059744286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1323969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/dec20396-6555-4773-bf02-2cd1fcedda89.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/dec20396-6555-4773-bf02-2cd1fcedda89.json new file mode 100644 index 000000000..35280576b --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/dec20396-6555-4773-bf02-2cd1fcedda89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_RP", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2541 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3323 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1324 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json deleted file mode 100644 index a5aec11da..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/1762652579.7212439", - "retrieved_timestamp": "1762652579.7212448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062740196013245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33536617928965984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13871343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/eebc33e1-0016-4adf-815a-72653a34c01b.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/eebc33e1-0016-4adf-815a-72653a34c01b.json new file mode 100644 index 000000000..b478f0a27 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/eebc33e1-0016-4adf-815a-72653a34c01b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_AI_HumanAI_TextVision", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3063 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/803c3898-c1a6-4832-ac3a-a86139489810.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/803c3898-c1a6-4832-ac3a-a86139489810.json new file mode 100644 index 000000000..0bd353a92 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/803c3898-c1a6-4832-ac3a-a86139489810.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_HumanAI_M1", + "id": "LeroyDyer/SpydazWeb_HumanAI_M1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3563 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1663 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json deleted file mode 100644 index ec5d7961d..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M1/1762652579.721453", - "retrieved_timestamp": "1762652579.721453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_HumanAI_M1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3582062261466243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35632705798398107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36711458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1663065159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json deleted file mode 100644 index e492255dc..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M2/1762652579.7216609", - "retrieved_timestamp": "1762652579.721662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M2", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_HumanAI_M2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3750171766468526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39308772552915555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2010472074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/bfaa3d3e-66fd-4477-85af-4b83f13ff05b.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/bfaa3d3e-66fd-4477-85af-4b83f13ff05b.json new file mode 100644 index 000000000..3df076af2 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/bfaa3d3e-66fd-4477-85af-4b83f13ff05b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_HumanAI_M2", + "id": "LeroyDyer/SpydazWeb_HumanAI_M2", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3751 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.201 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/99debdd2-1dea-4eb6-be5c-c144656cfe20.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/99debdd2-1dea-4eb6-be5c-c144656cfe20.json new file mode 100644 index 000000000..5211d3064 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/99debdd2-1dea-4eb6-be5c-c144656cfe20.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpydazWeb_HumanAI_M3", + "id": "LeroyDyer/SpydazWeb_HumanAI_M3", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json deleted file mode 100644 index 517a7bac2..000000000 --- a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M3/1762652579.721856", - "retrieved_timestamp": "1762652579.721857", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M3", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_HumanAI_M3", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578711153073844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31272572546166244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11486037234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/ad67bb88-7f74-4eb4-b771-0b3b60be4416.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/ad67bb88-7f74-4eb4-b771-0b3b60be4416.json new file mode 100644 index 000000000..8fb0a66ae --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/ad67bb88-7f74-4eb4-b771-0b3b60be4416.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_12/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_12", + "id": "LeroyDyer/_Spydaz_Web_AI_12", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json deleted file mode 100644 index 97ade8581..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_12/1762652579.722054", - "retrieved_timestamp": "1762652579.722055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_12", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_12", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764985793250797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31633960292107943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35815624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json deleted file mode 100644 index 96293b9ca..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_14/1762652579.722256", - "retrieved_timestamp": "1762652579.722257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_14", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_14", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1811770546594148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2988848127354542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3395208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/af2f579d-1e8a-47d8-8e44-a599bee83e37.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/af2f579d-1e8a-47d8-8e44-a599bee83e37.json new file mode 100644 index 000000000..6d3423046 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/af2f579d-1e8a-47d8-8e44-a599bee83e37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_14/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_14", + "id": "LeroyDyer/_Spydaz_Web_AI_14", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1812 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2989 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json deleted file mode 100644 index 68c0ecdb1..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/1762652579.72245", - "retrieved_timestamp": "1762652579.722451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4505046609662362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4609124425176902 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2734375 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/763c840e-ea73-453e-8e54-5f4fd6fda9cd.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/763c840e-ea73-453e-8e54-5f4fd6fda9cd.json new file mode 100644 index 000000000..5e467d10a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/763c840e-ea73-453e-8e54-5f4fd6fda9cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_001", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4609 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/4fb40ac4-a637-4b9a-b69d-ba551c0f0938.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/4fb40ac4-a637-4b9a-b69d-ba551c0f0938.json new file mode 100644 index 000000000..bedce984a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/4fb40ac4-a637-4b9a-b69d-ba551c0f0938.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_002", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5307 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4683 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json deleted file mode 100644 index 70c59eb0e..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/1762652579.7226508", - "retrieved_timestamp": "1762652579.7226508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5306885729863429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4682582050072746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42546875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28939494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json deleted file mode 100644 index a16fcf294..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/1762652579.722848", - "retrieved_timestamp": "1762652579.7228491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.478606763387811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4671769411194033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48689583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2828291223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/ffc4ef41-4a28-4816-be54-8ffd8e153073.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/ffc4ef41-4a28-4816-be54-8ffd8e153073.json new file mode 100644 index 000000000..ccb933fa1 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/ffc4ef41-4a28-4816-be54-8ffd8e153073.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_MUSR", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2828 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json deleted file mode 100644 index dd0ed2f7f..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/1762652579.723048", - "retrieved_timestamp": "1762652579.723048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414259719765777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4689417813020516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47197916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27194148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/f75fe902-f1c7-4e6c-87d6-128688db8d94.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/f75fe902-f1c7-4e6c-87d6-128688db8d94.json new file mode 100644 index 000000000..b1dc3496d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/f75fe902-f1c7-4e6c-87d6-128688db8d94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_MasterCoder", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4689 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.472 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json deleted file mode 100644 index d06e90a31..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/1762652579.723258", - "retrieved_timestamp": "1762652579.723258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571492528712705 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48178882135920675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47784375000000007 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2681183510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/dbd3098b-4532-441b-a81c-072c52579be6.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/dbd3098b-4532-441b-a81c-072c52579be6.json new file mode 100644 index 000000000..96985ae3e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/dbd3098b-4532-441b-a81c-072c52579be6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Math_001", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4571 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4818 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2681 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/438e4aa3-5e02-446e-bd3a-07ef724d24ff.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/438e4aa3-5e02-446e-bd3a-07ef724d24ff.json new file mode 100644 index 000000000..d2edae906 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/438e4aa3-5e02-446e-bd3a-07ef724d24ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Math_003", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.62 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2999 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json deleted file mode 100644 index 9527058ea..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/1762652579.723467", - "retrieved_timestamp": "1762652579.723468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6200148938150774 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755509035158693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29986702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/027fdc55-61eb-416c-b6ad-4408912d151b.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/027fdc55-61eb-416c-b6ad-4408912d151b.json new file mode 100644 index 000000000..e3bc05a1b --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/027fdc55-61eb-416c-b6ad-4408912d151b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5951 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4927 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json deleted file mode 100644 index 764d89509..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/1762652579.7236722", - "retrieved_timestamp": "1762652579.7236722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5950854842927876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4927473238025393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5198229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/37a4895d-def5-494d-9b62-d8c97ba9350b.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/37a4895d-def5-494d-9b62-d8c97ba9350b.json new file mode 100644 index 000000000..d67104ff1 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/37a4895d-def5-494d-9b62-d8c97ba9350b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Math_Student", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2927 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json deleted file mode 100644 index 082a3205c..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/1762652579.723874", - "retrieved_timestamp": "1762652579.723874", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5735781060918363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48808115770970123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/0d53c27e-962c-428f-b540-35ab027883a8.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/0d53c27e-962c-428f-b540-35ab027883a8.json new file mode 100644 index 000000000..173bb8257 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/0d53c27e-962c-428f-b540-35ab027883a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Math_Teacher", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5772 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4805 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2956 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json deleted file mode 100644 index d2ac5567d..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/1762652579.7241092", - "retrieved_timestamp": "1762652579.7241101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5772250960784053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4805094960871836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2956283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/6f7b2d91-24d6-442c-93a5-9afc88e9a308.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/6f7b2d91-24d6-442c-93a5-9afc88e9a308.json new file mode 100644 index 000000000..a4540e902 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/6f7b2d91-24d6-442c-93a5-9afc88e9a308.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_OmG_001", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5818 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2906 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json deleted file mode 100644 index 2b150b98b..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/1762652579.72431", - "retrieved_timestamp": "1762652579.724311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5817963004827191 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907982146977475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29055851063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/21793520-7d1a-4040-bb96-fa7fe98ae580.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/21793520-7d1a-4040-bb96-fa7fe98ae580.json new file mode 100644 index 000000000..c42060e34 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/21793520-7d1a-4040-bb96-fa7fe98ae580.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_OmG_002", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2867 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json deleted file mode 100644 index 563bbfc83..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/1762652579.7245262", - "retrieved_timestamp": "1762652579.7245262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.546150879665953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4655028607746287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45108333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28665226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/59d53c40-5b16-4a70-a693-5fb554cf7614.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/59d53c40-5b16-4a70-a693-5fb554cf7614.json new file mode 100644 index 000000000..3df532609 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/59d53c40-5b16-4a70-a693-5fb554cf7614.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_OmG_Coder", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4638 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.289 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json deleted file mode 100644 index 0f21e4796..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/1762652579.724742", - "retrieved_timestamp": "1762652579.724742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4923702442851634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46376531085099754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5624583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28897938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json deleted file mode 100644 index 6861cca0c..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/1762652579.7249558", - "retrieved_timestamp": "1762652579.724957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033112142448702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4676503002757066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29130651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/b28a569c-6bdf-4547-a2ce-c3e224764be3.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/b28a569c-6bdf-4547-a2ce-c3e224764be3.json new file mode 100644 index 000000000..481f7a4c2 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/b28a569c-6bdf-4547-a2ce-c3e224764be3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_OmG_Math", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4677 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/2de129c8-2259-4367-a619-85d9e8f61e06.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/2de129c8-2259-4367-a619-85d9e8f61e06.json new file mode 100644 index 000000000..b5ee95324 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/2de129c8-2259-4367-a619-85d9e8f61e06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5558 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4742 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.451 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json deleted file mode 100644 index d97a9f425..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/1762652579.7251709", - "retrieved_timestamp": "1762652579.7251709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558429411738631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47422312505675873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45098958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2672041223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json deleted file mode 100644 index 12c1c3c1e..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1762652579.725384", - "retrieved_timestamp": "1762652579.725385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5449518388985669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4650844324968853 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43883333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/c242030f-fb2b-42dc-a5d1-687273b17282.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/c242030f-fb2b-42dc-a5d1-687273b17282.json new file mode 100644 index 000000000..d5484f88e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/c242030f-fb2b-42dc-a5d1-687273b17282.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Student_Coder", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.545 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4651 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/3b3fdb16-b6e1-40c8-9ac0-02f1f2207eb7.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/3b3fdb16-b6e1-40c8-9ac0-02f1f2207eb7.json new file mode 100644 index 000000000..f0fe06eb1 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/3b3fdb16-b6e1-40c8-9ac0-02f1f2207eb7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Teacher_Coder", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4797 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2845 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json deleted file mode 100644 index c4ea1fe33..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/1762652579.7256", - "retrieved_timestamp": "1762652579.725601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081572449988254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47965526444811907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4338125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28449135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json deleted file mode 100644 index 9e42a8560..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/1762652579.725811", - "retrieved_timestamp": "1762652579.725811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6039530667517742 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49877449828070924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30244348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/ef6e8e0d-7ba4-45ea-aaf7-617f68f2e97c.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/ef6e8e0d-7ba4-45ea-aaf7-617f68f2e97c.json new file mode 100644 index 000000000..bf973d06a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/ef6e8e0d-7ba4-45ea-aaf7-617f68f2e97c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_Top_Student", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4988 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3024 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json deleted file mode 100644 index b85f5de91..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/1762652579.7260191", - "retrieved_timestamp": "1762652579.72602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.427323944910615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47589342126093026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2890625 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/f8c131a4-1fee-4694-8753-88853418ef4b.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/f8c131a4-1fee-4694-8753-88853418ef4b.json new file mode 100644 index 000000000..6d959d14f --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/f8c131a4-1fee-4694-8753-88853418ef4b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_X1", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4759 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2891 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json deleted file mode 100644 index ab5ffa5cf..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/1762652579.7262201", - "retrieved_timestamp": "1762652579.726221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433782364127182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4785559277736029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46953125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29205452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/27dec9ff-fb18-43dd-949f-7c0587a5858f.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/27dec9ff-fb18-43dd-949f-7c0587a5858f.json new file mode 100644 index 000000000..0a0055521 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/27dec9ff-fb18-43dd-949f-7c0587a5858f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_R1_X2", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5434 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4786 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4695 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2921 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/060df34d-ab67-43e1-bd56-ebaceb77abd3.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/060df34d-ab67-43e1-bd56-ebaceb77abd3.json new file mode 100644 index 000000000..b431b930c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/060df34d-ab67-43e1-bd56-ebaceb77abd3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_AGI_RP_R1", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5426 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4701 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4201 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json deleted file mode 100644 index 7957c9c2b..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/1762652579.726439", - "retrieved_timestamp": "1762652579.72644", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5426036250482054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4701061648636955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42013541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28939494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json deleted file mode 100644 index e5fcdadb6..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_BIBLE_002/1762652579.72666", - "retrieved_timestamp": "1762652579.7266612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21949538336059432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289070186514165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34069791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13680186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/a6357673-3daa-4593-8593-2b65a7d5477e.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/a6357673-3daa-4593-8593-2b65a7d5477e.json new file mode 100644 index 000000000..a62a6bb3e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/a6357673-3daa-4593-8593-2b65a7d5477e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_BIBLE_002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_BIBLE_002", + "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2195 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json deleted file mode 100644 index 3ea8e767e..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatML_002/1762652579.7268748", - "retrieved_timestamp": "1762652579.726876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24122772022677608 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106383598957094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3623125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10945811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/121d4877-1955-48db-a23a-6b0ad0623b9e.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/121d4877-1955-48db-a23a-6b0ad0623b9e.json new file mode 100644 index 000000000..d97ec510d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/121d4877-1955-48db-a23a-6b0ad0623b9e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatML_002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_ChatML_002", + "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3106 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3623 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/1f1eab02-219e-4ad8-af50-e103541e1c9d.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/1f1eab02-219e-4ad8-af50-e103541e1c9d.json new file mode 100644 index 000000000..21baf209e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/1f1eab02-219e-4ad8-af50-e103541e1c9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_ChatQA", + "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1415 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json deleted file mode 100644 index 3c49e408b..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA/1762652579.727107", - "retrieved_timestamp": "1762652579.727108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1414591062824417 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32359493837413505 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14752327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json deleted file mode 100644 index 8d7ade596..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA_003/1762652579.727351", - "retrieved_timestamp": "1762652579.7273521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22091938279321088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171811407815537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/b4cccfb3-1c17-48a3-a211-a26c44de757f.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/b4cccfb3-1c17-48a3-a211-a26c44de757f.json new file mode 100644 index 000000000..5a698b13e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/b4cccfb3-1c17-48a3-a211-a26c44de757f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA_003/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_ChatQA_003", + "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2209 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/05e97a86-681d-42a2-8a47-beade25d8fc9.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/05e97a86-681d-42a2-8a47-beade25d8fc9.json new file mode 100644 index 000000000..09c825d37 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/05e97a86-681d-42a2-8a47-beade25d8fc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_TEMP_/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_TEMP_", + "id": "LeroyDyer/_Spydaz_Web_AI_TEMP_", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4218 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json deleted file mode 100644 index 195d77997..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_TEMP_/1762652579.7275891", - "retrieved_timestamp": "1762652579.7275898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_TEMP_", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_TEMP_", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47953097780555587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.495695749059555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3120844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/6c0899b4-f066-45f6-827d-11c535ef0634.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/6c0899b4-f066-45f6-827d-11c535ef0634.json new file mode 100644 index 000000000..c36cff39f --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/6c0899b4-f066-45f6-827d-11c535ef0634.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "_Spydaz_Web_AI_Top_Teacher_", + "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4891 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json deleted file mode 100644 index 6fcf26bf2..000000000 --- a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/1762652579.728002", - "retrieved_timestamp": "1762652579.728004", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44038817005545283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48909617780536035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json deleted file mode 100644 index afb4f6f99..000000000 --- a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.0/1762652579.7282822", - "retrieved_timestamp": "1762652579.728283", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LightningRodLabs/Flashlight-v1.0", - "developer": "LightningRodLabs", - "inference_platform": "unknown", - "id": "LightningRodLabs/Flashlight-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6745446526327921 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6876833310149727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49697885196374625 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41009375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/f9660557-b9f6-4ecc-b260-c245f0e62b5b.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/f9660557-b9f6-4ecc-b260-c245f0e62b5b.json new file mode 100644 index 000000000..b9c99ac4d --- /dev/null +++ b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/f9660557-b9f6-4ecc-b260-c245f0e62b5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Flashlight-v1.0", + "id": "LightningRodLabs/Flashlight-v1.0", + "developer": "LightningRodLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6745 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.497 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json deleted file mode 100644 index 6d8a56a1a..000000000 --- a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.1/1762652579.728596", - "retrieved_timestamp": "1762652579.728597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LightningRodLabs/Flashlight-v1.1", - "developer": "LightningRodLabs", - "inference_platform": "unknown", - "id": "LightningRodLabs/Flashlight-v1.1", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720967034136092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6901141327534415 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415558510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/89168032-5840-4c2c-821e-b3d717ade46f.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/89168032-5840-4c2c-821e-b3d717ade46f.json new file mode 100644 index 000000000..4c4c25448 --- /dev/null +++ b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/89168032-5840-4c2c-821e-b3d717ade46f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Flashlight-v1.1", + "id": "LightningRodLabs/Flashlight-v1.1", + "developer": "LightningRodLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/10d0aa63-67d9-4dba-9bdc-db7ab3b4547d.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/10d0aa63-67d9-4dba-9bdc-db7ab3b4547d.json new file mode 100644 index 000000000..5dd6485a7 --- /dev/null +++ b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/10d0aa63-67d9-4dba-9bdc-db7ab3b4547d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Flashlight-v1.2", + "id": "LightningRodLabs/Flashlight-v1.2", + "developer": "LightningRodLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json deleted file mode 100644 index 758dc0d88..000000000 --- a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.2/1762652579.728818", - "retrieved_timestamp": "1762652579.728819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LightningRodLabs/Flashlight-v1.2", - "developer": "LightningRodLabs", - "inference_platform": "unknown", - "id": "LightningRodLabs/Flashlight-v1.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3264526807518731 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45536458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24850398936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/6f66ae5b-8cb6-4263-98a4-4a1eddfaca10.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/6f66ae5b-8cb6-4263-98a4-4a1eddfaca10.json new file mode 100644 index 000000000..9f4103403 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/6f66ae5b-8cb6-4263-98a4-4a1eddfaca10.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2_PRYMMAL-ECE-2B-SLERP-V1", + "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5823 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json deleted file mode 100644 index 49c305794..000000000 --- a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/1762652579.7290292", - "retrieved_timestamp": "1762652579.72903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5823459531820016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4287069505821554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677859042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json deleted file mode 100644 index e3b438aeb..000000000 --- a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/1762652579.729285", - "retrieved_timestamp": "1762652579.729285", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5542693386880144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43764741906109417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44816666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2744348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/5e715199-7030-47b4-89c6-83ba0968c07c.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/5e715199-7030-47b4-89c6-83ba0968c07c.json new file mode 100644 index 000000000..343a94755 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/5e715199-7030-47b4-89c6-83ba0968c07c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2_PRYMMAL-ECE-2B-SLERP-V2", + "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5543 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2744 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/3fca39e8-443d-47da-a858-83a68c18eec9.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/3fca39e8-443d-47da-a858-83a68c18eec9.json new file mode 100644 index 000000000..e5d8c93e6 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/3fca39e8-443d-47da-a858-83a68c18eec9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2_PRYMMAL-ECE-7B-SLERP-V1", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3053 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json deleted file mode 100644 index 315069798..000000000 --- a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/1762652579.7297568", - "retrieved_timestamp": "1762652579.7297568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10733742026711349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30525797550329686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json deleted file mode 100644 index cce2de56e..000000000 --- a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/1762652579.729984", - "retrieved_timestamp": "1762652579.729985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10733742026711349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30525797550329686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/b7518bd2-d3af-49e6-823a-f8d507e8e60f.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/b7518bd2-d3af-49e6-823a-f8d507e8e60f.json new file mode 100644 index 000000000..6709db764 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/b7518bd2-d3af-49e6-823a-f8d507e8e60f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2_PRYMMAL-ECE-7B-SLERP-V2", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3053 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json deleted file mode 100644 index 17876b9ba..000000000 --- a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/1762652579.730203", - "retrieved_timestamp": "1762652579.730203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22346706738121516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357839880712804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4107083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18168218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/fa399f16-1652-430c-be19-afaf5ab96be1.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/fa399f16-1652-430c-be19-afaf5ab96be1.json new file mode 100644 index 000000000..04700dc51 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/fa399f16-1652-430c-be19-afaf5ab96be1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2_PRYMMAL-ECE-7B-SLERP-V3", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4107 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json deleted file mode 100644 index 4f2a878b7..000000000 --- a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP/1762652579.7294989", - "retrieved_timestamp": "1762652579.7294998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5577412376937636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5556642048146725 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632930513595166 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43960416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45071476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/cbe5032b-122c-4a0b-a099-50e998a4bc77.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/cbe5032b-122c-4a0b-a099-50e998a4bc77.json new file mode 100644 index 000000000..ad0887f0c --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/cbe5032b-122c-4a0b-a099-50e998a4bc77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2_PRYMMAL-ECE-7B-SLERP", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5577 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json deleted file mode 100644 index dd77dbcf3..000000000 --- a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/1762652579.7304142", - "retrieved_timestamp": "1762652579.730415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2874395492847866 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41904526564708194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39743749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/fd8c3209-dcc0-4d27-a3aa-d0f76ef86f8d.json b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/fd8c3209-dcc0-4d27-a3aa-d0f76ef86f8d.json new file mode 100644 index 000000000..82de476ba --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/fd8c3209-dcc0-4d27-a3aa-d0f76ef86f8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-1B-SLERP-V1", + "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2874 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3974 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2926 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/1a18d49c-ad7b-4823-abbc-7191e9d659cd.json b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/1a18d49c-ad7b-4823-abbc-7191e9d659cd.json new file mode 100644 index 000000000..1bab75b49 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/1a18d49c-ad7b-4823-abbc-7191e9d659cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-7B-SLERP-V8", + "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", + "developer": "Lil-R", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1258 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json deleted file mode 100644 index 2ae0f36d5..000000000 --- a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/1762652579.7306318", - "retrieved_timestamp": "1762652579.730633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1258471965495995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2955092966258663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/9e2c614e-1104-43a6-9e8f-b7851562e01a.json b/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/9e2c614e-1104-43a6-9e8f-b7851562e01a.json new file mode 100644 index 000000000..3ab3feb1c --- /dev/null +++ b/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/9e2c614e-1104-43a6-9e8f-b7851562e01a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_10PRYMMAL-3B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "10PRYMMAL-3B-slerp", + "id": "LilRg/10PRYMMAL-3B-slerp", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.532 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1495 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json b/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json deleted file mode 100644 index f161199d4..000000000 --- a/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_10PRYMMAL-3B-slerp/1762652579.7308428", - "retrieved_timestamp": "1762652579.7308428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/10PRYMMAL-3B-slerp", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/10PRYMMAL-3B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1945903535951276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5320377091634505 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45290625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json b/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json deleted file mode 100644 index 2eaf78e3d..000000000 --- a/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_ECE-1B-merge-PRYMMAL/1762652579.7310941", - "retrieved_timestamp": "1762652579.731095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/ECE-1B-merge-PRYMMAL", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/ECE-1B-merge-PRYMMAL", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27122811916825135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42345600176908743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906416223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/7d4b83ab-9c9d-46e5-8cbf-b8afcf781230.json b/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/7d4b83ab-9c9d-46e5-8cbf-b8afcf781230.json new file mode 100644 index 000000000..93bc8cf86 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/7d4b83ab-9c9d-46e5-8cbf-b8afcf781230.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_ECE-1B-merge-PRYMMAL/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-1B-merge-PRYMMAL", + "id": "LilRg/ECE-1B-merge-PRYMMAL", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2712 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3801 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2906 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/ECE_Finetunning/a42b5d7e-be7f-4cde-aaf0-001e2cf05a44.json b/data/hfopenllm_v2/LilRg/ECE_Finetunning/a42b5d7e-be7f-4cde-aaf0-001e2cf05a44.json new file mode 100644 index 000000000..fd422f5f3 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/ECE_Finetunning/a42b5d7e-be7f-4cde-aaf0-001e2cf05a44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_ECE_Finetunning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE_Finetunning", + "id": "LilRg/ECE_Finetunning", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4732 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3839 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json b/data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json deleted file mode 100644 index e055de7dd..000000000 --- a/data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_ECE_Finetunning/1762652579.731307", - "retrieved_timestamp": "1762652579.731308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/ECE_Finetunning", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/ECE_Finetunning", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04453849120334047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47321596790730514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38394791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191489361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/21f6688c-be52-4352-9c95-d37c0a5f6c94.json b/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/21f6688c-be52-4352-9c95-d37c0a5f6c94.json new file mode 100644 index 000000000..ecbfdcf37 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/21f6688c-be52-4352-9c95-d37c0a5f6c94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-6B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-6B-slerp", + "id": "LilRg/PRYMMAL-6B-slerp", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.293 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2868 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json b/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json deleted file mode 100644 index 992a03e72..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-6B-slerp/1762652579.731526", - "retrieved_timestamp": "1762652579.7315269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-6B-slerp", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-6B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.293 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11533065599276586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28676215692036117 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json deleted file mode 100644 index 4f3b3aedb..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V3/1762652579.731744", - "retrieved_timestamp": "1762652579.731745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12432346174816154 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/e92ba586-7bee-4a9b-b388-e35efde3d36f.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/e92ba586-7bee-4a9b-b388-e35efde3d36f.json new file mode 100644 index 000000000..e89c0598f --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/e92ba586-7bee-4a9b-b388-e35efde3d36f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-7B-SLERP-V3", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1243 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json deleted file mode 100644 index e43222e2d..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V4/1762652579.731953", - "retrieved_timestamp": "1762652579.7319539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12492298213185458 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/45ed0bb3-efbf-4a32-9735-d814aa08790a.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/45ed0bb3-efbf-4a32-9735-d814aa08790a.json new file mode 100644 index 000000000..caaf555af --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/45ed0bb3-efbf-4a32-9735-d814aa08790a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-7B-SLERP-V4", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json deleted file mode 100644 index 458cf371d..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V5/1762652579.7321632", - "retrieved_timestamp": "1762652579.7321641", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12492298213185458 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/eff28375-89a7-4970-9342-428b07d0c6f4.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/eff28375-89a7-4970-9342-428b07d0c6f4.json new file mode 100644 index 000000000..17c5c7e82 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/eff28375-89a7-4970-9342-428b07d0c6f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-7B-SLERP-V5", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/23877e30-b8fb-45ea-a803-47df757ea909.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/23877e30-b8fb-45ea-a803-47df757ea909.json new file mode 100644 index 000000000..ec37cc975 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/23877e30-b8fb-45ea-a803-47df757ea909.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-7B-SLERP-V6", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1243 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json deleted file mode 100644 index 91b240219..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V6/1762652579.732379", - "retrieved_timestamp": "1762652579.732379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12432346174816154 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/8bc25d04-9cc5-4551-a9c5-ce185c7ad974.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/8bc25d04-9cc5-4551-a9c5-ce185c7ad974.json new file mode 100644 index 000000000..06e5be7ee --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/8bc25d04-9cc5-4551-a9c5-ce185c7ad974.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-ECE-7B-SLERP-V7", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json deleted file mode 100644 index 433631425..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V7/1762652579.732605", - "retrieved_timestamp": "1762652579.732606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12492298213185458 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json b/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json deleted file mode 100644 index 674d8b65f..000000000 --- a/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-slerp-Merge/1762652579.732816", - "retrieved_timestamp": "1762652579.732817", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LilRg/PRYMMAL-slerp-Merge", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-slerp-Merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304400102838247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364156271768925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46347916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863031914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/d2d4b5a5-109d-4d26-a166-3d97b341584e.json b/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/d2d4b5a5-109d-4d26-a166-3d97b341584e.json new file mode 100644 index 000000000..3ff38766a --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/d2d4b5a5-109d-4d26-a166-3d97b341584e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-slerp-Merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRYMMAL-slerp-Merge", + "id": "LilRg/PRYMMAL-slerp-Merge", + "developer": "LilRg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4635 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3863 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/ac404d92-7a06-4758-ab1d-fcf840c2b995.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/ac404d92-7a06-4758-ab1d-fcf840c2b995.json new file mode 100644 index 000000000..c0faabec4 --- /dev/null +++ b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/ac404d92-7a06-4758-ab1d-fcf840c2b995.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeMind-Llama3-8B-unsloth_v2-merged", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", + "developer": "LimYeri", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6946 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3506 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/95ea7fbf-d3f2-4fc1-ba17-05549f6e4d25.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/95ea7fbf-d3f2-4fc1-ba17-05549f6e4d25.json new file mode 100644 index 000000000..535530ceb --- /dev/null +++ b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/95ea7fbf-d3f2-4fc1-ba17-05549f6e4d25.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeMind-Llama3-8B-unsloth_v3-merged", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", + "developer": "LimYeri", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/c101e272-24d2-44db-9b0f-2ed4d17cec41.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/c101e272-24d2-44db-9b0f-2ed4d17cec41.json new file mode 100644 index 000000000..c1155f027 --- /dev/null +++ b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/c101e272-24d2-44db-9b0f-2ed4d17cec41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", + "developer": "LimYeri", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6492 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3608 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json deleted file mode 100644 index 3b81edd79..000000000 --- a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/1762652579.733827", - "retrieved_timestamp": "1762652579.733829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", - "developer": "LimYeri", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492406813920397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48526582322240047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3607916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/2cb789c7-dddf-42b2-8fdf-4cbd5132946c.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/2cb789c7-dddf-42b2-8fdf-4cbd5132946c.json new file mode 100644 index 000000000..14803d71b --- /dev/null +++ b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/2cb789c7-dddf-42b2-8fdf-4cbd5132946c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeMind-Llama3-8B-unsloth_v4-one-merged", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", + "developer": "LimYeri", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3211 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4739 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/a414aefd-ce24-49a9-b431-0c6014ebfbd8.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/a414aefd-ce24-49a9-b431-0c6014ebfbd8.json new file mode 100644 index 000000000..323a4d3be --- /dev/null +++ b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/a414aefd-ce24-49a9-b431-0c6014ebfbd8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeMind-Llama3.1-8B-unsloth-merged", + "id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", + "developer": "LimYeri", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4695 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.334 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json b/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json deleted file mode 100644 index 7d4ff5c2f..000000000 --- a/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Locutusque_CollectiveLM-Falcon-3-7B/1762652579.734693", - "retrieved_timestamp": "1762652579.734694", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Locutusque/CollectiveLM-Falcon-3-7B", - "developer": "Locutusque", - "inference_platform": "unknown", - "id": "Locutusque/CollectiveLM-Falcon-3-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918281271470808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5105131374222629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/91fcb6a3-d351-48c8-87e8-e2a06642e925.json b/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/91fcb6a3-d351-48c8-87e8-e2a06642e925.json new file mode 100644 index 000000000..95e472469 --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/91fcb6a3-d351-48c8-87e8-e2a06642e925.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Locutusque_CollectiveLM-Falcon-3-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CollectiveLM-Falcon-3-7B", + "id": "Locutusque/CollectiveLM-Falcon-3-7B", + "developer": "Locutusque", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3887 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/Hercules-6.0-Llama-3.1-8B/3cd90efa-ddf0-43c4-884c-84337ded14b2.json b/data/hfopenllm_v2/Locutusque/Hercules-6.0-Llama-3.1-8B/3cd90efa-ddf0-43c4-884c-84337ded14b2.json new file mode 100644 index 000000000..200257158 --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/Hercules-6.0-Llama-3.1-8B/3cd90efa-ddf0-43c4-884c-84337ded14b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.0-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hercules-6.0-Llama-3.1-8B", + "id": "Locutusque/Hercules-6.0-Llama-3.1-8B", + "developer": "Locutusque", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.663 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4813 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/Hercules-6.1-Llama-3.1-8B/c66c21e9-a332-40f9-ae87-bdd78a25d753.json b/data/hfopenllm_v2/Locutusque/Hercules-6.1-Llama-3.1-8B/c66c21e9-a332-40f9-ae87-bdd78a25d753.json new file mode 100644 index 000000000..c70788d79 --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/Hercules-6.1-Llama-3.1-8B/c66c21e9-a332-40f9-ae87-bdd78a25d753.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.1-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hercules-6.1-Llama-3.1-8B", + "id": "Locutusque/Hercules-6.1-Llama-3.1-8B", + "developer": "Locutusque", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6007 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4656 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3553 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/Llama-3-NeuralHercules-5.0-8B/0b4def91-29df-45d9-8dd4-c4097ec47ba3.json b/data/hfopenllm_v2/Locutusque/Llama-3-NeuralHercules-5.0-8B/0b4def91-29df-45d9-8dd4-c4097ec47ba3.json new file mode 100644 index 000000000..289875aa0 --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/Llama-3-NeuralHercules-5.0-8B/0b4def91-29df-45d9-8dd4-c4097ec47ba3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-NeuralHercules-5.0-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-NeuralHercules-5.0-8B", + "id": "Locutusque/Llama-3-NeuralHercules-5.0-8B", + "developer": "Locutusque", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.394 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2933 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/Llama-3-Yggdrasil-2.0-8B/2cbf258c-369e-4b1c-863f-43cf97c3a7a4.json b/data/hfopenllm_v2/Locutusque/Llama-3-Yggdrasil-2.0-8B/2cbf258c-369e-4b1c-863f-43cf97c3a7a4.json new file mode 100644 index 000000000..04066b41d --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/Llama-3-Yggdrasil-2.0-8B/2cbf258c-369e-4b1c-863f-43cf97c3a7a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-Yggdrasil-2.0-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Yggdrasil-2.0-8B", + "id": "Locutusque/Llama-3-Yggdrasil-2.0-8B", + "developer": "Locutusque", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4772 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/TinyMistral-248M-v2.5/8372889e-f9cd-4cf7-aec0-8e18d5c627e3.json b/data/hfopenllm_v2/Locutusque/TinyMistral-248M-v2.5/8372889e-f9cd-4cf7-aec0-8e18d5c627e3.json new file mode 100644 index 000000000..f44380bf2 --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/TinyMistral-248M-v2.5/8372889e-f9cd-4cf7-aec0-8e18d5c627e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Locutusque_TinyMistral-248M-v2.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyMistral-248M-v2.5", + "id": "Locutusque/TinyMistral-248M-v2.5", + "developer": "Locutusque", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 0.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3039 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1135 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Luni/StarDust-12b-v1/ce4cc270-57da-4d08-9130-62508b409cb2.json b/data/hfopenllm_v2/Luni/StarDust-12b-v1/ce4cc270-57da-4d08-9130-62508b409cb2.json new file mode 100644 index 000000000..cba3a4a90 --- /dev/null +++ b/data/hfopenllm_v2/Luni/StarDust-12b-v1/ce4cc270-57da-4d08-9130-62508b409cb2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StarDust-12b-v1", + "id": "Luni/StarDust-12b-v1", + "developer": "Luni", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5459 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4324 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json b/data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json deleted file mode 100644 index fa696a87f..000000000 --- a/data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v1/1762652579.736537", - "retrieved_timestamp": "1762652579.7365382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Luni/StarDust-12b-v1", - "developer": "Luni", - "inference_platform": "unknown", - "id": "Luni/StarDust-12b-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459259210007226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366139363101082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43244791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34117353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json b/data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json deleted file mode 100644 index c0c85098b..000000000 --- a/data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v2/1762652579.736784", - "retrieved_timestamp": "1762652579.736785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Luni/StarDust-12b-v2", - "developer": "Luni", - "inference_platform": "unknown", - "id": "Luni/StarDust-12b-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628620947973599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5419479534912178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4338125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439162234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/Luni/StarDust-12b-v2/4cfedb8f-0e47-4008-9bc5-fb15e4afa607.json b/data/hfopenllm_v2/Luni/StarDust-12b-v2/4cfedb8f-0e47-4008-9bc5-fb15e4afa607.json new file mode 100644 index 000000000..b3c1d3a2d --- /dev/null +++ b/data/hfopenllm_v2/Luni/StarDust-12b-v2/4cfedb8f-0e47-4008-9bc5-fb15e4afa607.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StarDust-12b-v2", + "id": "Luni/StarDust-12b-v2", + "developer": "Luni", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5629 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3439 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/de3c949d-bab5-4430-bdd1-48e1b7860934.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/de3c949d-bab5-4430-bdd1-48e1b7860934.json new file mode 100644 index 000000000..a9ed180e4 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/de3c949d-bab5-4430-bdd1-48e1b7860934.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v3", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5394 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/011e53cd-409f-479b-9c3d-bfce75a1277b.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/011e53cd-409f-479b-9c3d-bfce75a1277b.json new file mode 100644 index 000000000..77effc1ca --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/011e53cd-409f-479b-9c3d-bfce75a1277b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v4", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6943 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.642 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4769 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5252 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/1ff40e45-5be4-4625-9f66-5599a829903d.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/1ff40e45-5be4-4625-9f66-5599a829903d.json new file mode 100644 index 000000000..5eaad0555 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/1ff40e45-5be4-4625-9f66-5599a829903d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v5", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7485 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6467 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.514 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/fed97d94-2949-4383-8f25-fa79bd413508.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/fed97d94-2949-4383-8f25-fa79bd413508.json new file mode 100644 index 000000000..264dd2b3d --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/fed97d94-2949-4383-8f25-fa79bd413508.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4663 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4937 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/f4820bc8-7dfd-4439-af95-21b6cc9367ac.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/f4820bc8-7dfd-4439-af95-21b6cc9367ac.json new file mode 100644 index 000000000..37d866ec4 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/f4820bc8-7dfd-4439-af95-21b6cc9367ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v6", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6458 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3958 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/36e576bb-de50-49ec-a91f-f134c11bbe38.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/36e576bb-de50-49ec-a91f-f134c11bbe38.json new file mode 100644 index 000000000..fbc0a8b94 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/36e576bb-de50-49ec-a91f-f134c11bbe38.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6931 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4888 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5277 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/0edd388b-7a1b-4334-9b72-52d84653ff67.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/0edd388b-7a1b-4334-9b72-52d84653ff67.json new file mode 100644 index 000000000..a93457aeb --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/0edd388b-7a1b-4334-9b72-52d84653ff67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v7", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6794 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/b3199674-328e-41a0-9aa4-bf39aec735bc.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/b3199674-328e-41a0-9aa4-bf39aec735bc.json new file mode 100644 index 000000000..948f8741b --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/b3199674-328e-41a0-9aa4-bf39aec735bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.5", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5929 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6451 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.477 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/52db4d79-7040-4525-934e-0f33e4acec63.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/52db4d79-7040-4525-934e-0f33e4acec63.json new file mode 100644 index 000000000..0b2f0fd81 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/52db4d79-7040-4525-934e-0f33e4acec63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.6", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5919 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4953 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/ee34821e-9182-433f-a8b0-745711e23738.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/ee34821e-9182-433f-a8b0-745711e23738.json new file mode 100644 index 000000000..5a366fbb8 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/ee34821e-9182-433f-a8b0-745711e23738.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.7", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7875 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6483 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5242 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/10ef0990-5356-432f-b24c-dd107188ec5f.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/10ef0990-5356-432f-b24c-dd107188ec5f.json new file mode 100644 index 000000000..be4b45ebc --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/10ef0990-5356-432f-b24c-dd107188ec5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.8", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7028 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6566 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4912 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/47de680d-33b1-4441-92da-4b97a5fc513f.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/47de680d-33b1-4441-92da-4b97a5fc513f.json new file mode 100644 index 000000000..f10ff96d2 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/47de680d-33b1-4441-92da-4b97a5fc513f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.9", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7993 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6483 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5199 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/96ac0351-2ade-4d76-bcf9-bc0f633f8694.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/96ac0351-2ade-4d76-bcf9-bc0f633f8694.json new file mode 100644 index 000000000..e0b9d6239 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/96ac0351-2ade-4d76-bcf9-bc0f633f8694.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7875 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5206 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/31aae266-c14b-451f-8bab-62ee7d5d382e.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/31aae266-c14b-451f-8bab-62ee7d5d382e.json new file mode 100644 index 000000000..0e46ee73a --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/31aae266-c14b-451f-8bab-62ee7d5d382e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6514 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6571 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/f6edb102-e867-46d1-afdc-3c45166bd510.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/f6edb102-e867-46d1-afdc-3c45166bd510.json new file mode 100644 index 000000000..45123ecdc --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/f6edb102-e867-46d1-afdc-3c45166bd510.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9.1", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8003 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5251 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/8b7756cc-9af3-4f98-84ac-7fef4c1bdaa0.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/8b7756cc-9af3-4f98-84ac-7fef4c1bdaa0.json new file mode 100644 index 000000000..3c1854b85 --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/8b7756cc-9af3-4f98-84ac-7fef4c1bdaa0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9.2", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7862 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6538 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5283 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/dcf33a22-5e57-4476-a2cb-ebd60407a920.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/dcf33a22-5e57-4476-a2cb-ebd60407a920.json new file mode 100644 index 000000000..f8f18e3db --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/dcf33a22-5e57-4476-a2cb-ebd60407a920.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4806 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/15659480-be0b-41c8-a463-873be444b194.json b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/15659480-be0b-41c8-a463-873be444b194.json new file mode 100644 index 000000000..179cbfc2b --- /dev/null +++ b/data/hfopenllm_v2/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/15659480-be0b-41c8-a463-873be444b194.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NQLSG-Qwen2.5-14B-OriginalFusion", + "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", + "developer": "Lunzima", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6142 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6592 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5122 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5239 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/0444c1bf-a3d3-4d23-bc6c-0a98c4dc1e9d.json b/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/0444c1bf-a3d3-4d23-bc6c-0a98c4dc1e9d.json new file mode 100644 index 000000000..3b3f7cc5e --- /dev/null +++ b/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/0444c1bf-a3d3-4d23-bc6c-0a98c4dc1e9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", + "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", + "developer": "Lyte", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7098 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json b/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json deleted file mode 100644 index 1f8ad178b..000000000 --- a/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/1762652579.74142", - "retrieved_timestamp": "1762652579.74142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", - "developer": "Lyte", - "inference_platform": "unknown", - "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7098155117310957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4949521619329585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.346125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/93aa3a13-5069-410f-a1df-6944e0231e0e.json b/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/93aa3a13-5069-410f-a1df-6944e0231e0e.json new file mode 100644 index 000000000..1d73f7a54 --- /dev/null +++ b/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/93aa3a13-5069-410f-a1df-6944e0231e0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", + "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", + "developer": "Lyte", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5774 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1843 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json b/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json deleted file mode 100644 index c80a7bb91..000000000 --- a/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/1762652579.7416818", - "retrieved_timestamp": "1762652579.741683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", - "developer": "Lyte", - "inference_platform": "unknown", - "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5773503193748144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515036874279285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32355208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18425864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Lyte/Llama-3.2-3B-Overthinker/427ea7d0-c1f1-4cfe-b6a7-555262a7a317.json b/data/hfopenllm_v2/Lyte/Llama-3.2-3B-Overthinker/427ea7d0-c1f1-4cfe-b6a7-555262a7a317.json new file mode 100644 index 000000000..4e1649e92 --- /dev/null +++ b/data/hfopenllm_v2/Lyte/Llama-3.2-3B-Overthinker/427ea7d0-c1f1-4cfe-b6a7-555262a7a317.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-3B-Overthinker/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Overthinker", + "id": "Lyte/Llama-3.2-3B-Overthinker", + "developer": "Lyte", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3419 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2985 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/M4-ai/TinyMistral-248M-v3/c6dbe372-7a3c-487c-87c0-fb324c39f8c9.json b/data/hfopenllm_v2/M4-ai/TinyMistral-248M-v3/c6dbe372-7a3c-487c-87c0-fb324c39f8c9.json new file mode 100644 index 000000000..cbe484e9b --- /dev/null +++ b/data/hfopenllm_v2/M4-ai/TinyMistral-248M-v3/c6dbe372-7a3c-487c-87c0-fb324c39f8c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/M4-ai_TinyMistral-248M-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyMistral-248M-v3", + "id": "M4-ai/TinyMistral-248M-v3", + "developer": "M4-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 0.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json b/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json deleted file mode 100644 index 83b5f999e..000000000 --- a/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652579.7424488", - "retrieved_timestamp": "1762652579.7424488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "developer": "MEscriva", - "inference_platform": "unknown", - "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08662903318749807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.305728612437881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40171874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11544215425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/cf8d99c8-8790-4bdf-bfc2-1a6d1fe35916.json b/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/cf8d99c8-8790-4bdf-bfc2-1a6d1fe35916.json new file mode 100644 index 000000000..6ab96529f --- /dev/null +++ b/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/cf8d99c8-8790-4bdf-bfc2-1a6d1fe35916.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", + "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", + "developer": "MEscriva", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0866 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1154 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MLP-KTLim/llama-3-Korean-Bllossom-8B/5b5d42d7-8012-46f1-826f-32d839806048.json b/data/hfopenllm_v2/MLP-KTLim/llama-3-Korean-Bllossom-8B/5b5d42d7-8012-46f1-826f-32d839806048.json new file mode 100644 index 000000000..a8760b3f6 --- /dev/null +++ b/data/hfopenllm_v2/MLP-KTLim/llama-3-Korean-Bllossom-8B/5b5d42d7-8012-46f1-826f-32d839806048.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MLP-KTLim_llama-3-Korean-Bllossom-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Korean-Bllossom-8B", + "id": "MLP-KTLim/llama-3-Korean-Bllossom-8B", + "developer": "MLP-KTLim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.49 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3594 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MTSAIR/Cotype-Nano/5e1bf2cb-55c4-4806-89af-cb9953c7c1b1.json b/data/hfopenllm_v2/MTSAIR/Cotype-Nano/5e1bf2cb-55c4-4806-89af-cb9953c7c1b1.json new file mode 100644 index 000000000..fb6c77edb --- /dev/null +++ b/data/hfopenllm_v2/MTSAIR/Cotype-Nano/5e1bf2cb-55c4-4806-89af-cb9953c7c1b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MTSAIR_Cotype-Nano/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cotype-Nano", + "id": "MTSAIR/Cotype-Nano", + "developer": "MTSAIR", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3748 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json b/data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json deleted file mode 100644 index 9faa439af..000000000 --- a/data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MTSAIR_Cotype-Nano/1762652579.742943", - "retrieved_timestamp": "1762652579.742944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MTSAIR/Cotype-Nano", - "developer": "MTSAIR", - "inference_platform": "unknown", - "id": "MTSAIR/Cotype-Nano", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3747922179816221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3864940969601492 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24767287234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/21ee4b33-9829-4cca-9603-c30fd4a1f7ff.json b/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/21ee4b33-9829-4cca-9603-c30fd4a1f7ff.json new file mode 100644 index 000000000..4f7b2b0b1 --- /dev/null +++ b/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/21ee4b33-9829-4cca-9603-c30fd4a1f7ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MTSAIR_MultiVerse_70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiVerse_70B", + "id": "MTSAIR/MultiVerse_70B", + "developer": "MTSAIR", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6183 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json b/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json deleted file mode 100644 index bae61cf4c..000000000 --- a/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MTSAIR_MultiVerse_70B/1762652579.743202", - "retrieved_timestamp": "1762652579.7432032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MTSAIR/MultiVerse_70B", - "developer": "MTSAIR", - "inference_platform": "unknown", - "id": "MTSAIR/MultiVerse_70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5249183278146429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6183134284931178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47398958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48603723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/c6c14a8b-0e9f-4b97-b9f3-27c7250fb8f2.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/c6c14a8b-0e9f-4b97-b9f3-27c7250fb8f2.json new file mode 100644 index 000000000..115db7b96 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/c6c14a8b-0e9f-4b97-b9f3-27c7250fb8f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Magpie-Align-SFT-v0.1", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4615 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2863 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json deleted file mode 100644 index 5fe17199b..000000000 --- a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/1762652579.743415", - "retrieved_timestamp": "1762652579.743416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4361416596851908 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4615102744527366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32773958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2863198138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json deleted file mode 100644 index 968c1431f..000000000 --- a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/1762652579.743664", - "retrieved_timestamp": "1762652579.743665", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5063586838477463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45715808996720547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34237500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/6586fa94-9f43-4814-8c8a-8ed244ac94e7.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/6586fa94-9f43-4814-8c8a-8ed244ac94e7.json new file mode 100644 index 000000000..20e5b11c3 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/6586fa94-9f43-4814-8c8a-8ed244ac94e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Magpie-Align-SFT-v0.3", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/df7d7db2-867e-47f0-9abf-d71b79e97630.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/df7d7db2-867e-47f0-9abf-d71b79e97630.json new file mode 100644 index 000000000..bbdb79de2 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/df7d7db2-867e-47f0-9abf-d71b79e97630.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Magpie-Align-v0.1", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4811 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3047 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/e2502e7e-3a10-49f3-b5c6-b20496fed998.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/e2502e7e-3a10-49f3-b5c6-b20496fed998.json new file mode 100644 index 000000000..5f345d139 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/e2502e7e-3a10-49f3-b5c6-b20496fed998.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Magpie-Align-v0.1", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4027 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4789 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3001 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/51cde18f-09b0-4b66-a962-811ee49e192f.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/51cde18f-09b0-4b66-a962-811ee49e192f.json new file mode 100644 index 000000000..f30e78827 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/51cde18f-09b0-4b66-a962-811ee49e192f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Magpie-Align-v0.3", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json deleted file mode 100644 index 50003cc19..000000000 --- a/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/1762652579.744527", - "retrieved_timestamp": "1762652579.744527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47820671374176077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4764157817799906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3397395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29429853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/4ea48b42-8026-4799-b35d-46757fd2753f.json b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/4ea48b42-8026-4799-b35d-46757fd2753f.json new file mode 100644 index 000000000..aeb4cabcb --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/4ea48b42-8026-4799-b35d-46757fd2753f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Magpie-Align-SFT-v0.1", + "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4782 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4764 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3397 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2943 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/52e9b4ae-9119-4f26-87e4-6532d1148ecd.json b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/52e9b4ae-9119-4f26-87e4-6532d1148ecd.json new file mode 100644 index 000000000..a67ee6b2e --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/52e9b4ae-9119-4f26-87e4-6532d1148ecd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Magpie-Align-v0.1", + "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4458 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/4bda68c0-cc09-4945-961b-48776b7b5fc8.json b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/4bda68c0-cc09-4945-961b-48776b7b5fc8.json new file mode 100644 index 000000000..590475654 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/4bda68c0-cc09-4945-961b-48776b7b5fc8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-Chat-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MagpieLM-8B-Chat-v0.1", + "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json deleted file mode 100644 index ac9f526a7..000000000 --- a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-Chat-v0.1/1762652579.744951", - "retrieved_timestamp": "1762652579.744951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/MagpieLM-8B-Chat-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700714105240761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4172338260055306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/18ea0ad0-a216-4906-a96c-c8b040398dbd.json b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/18ea0ad0-a216-4906-a96c-c8b040398dbd.json new file mode 100644 index 000000000..3ffe20bca --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/18ea0ad0-a216-4906-a96c-c8b040398dbd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-SFT-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MagpieLM-8B-SFT-v0.1", + "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json deleted file mode 100644 index 50d4ad417..000000000 --- a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-SFT-v0.1/1762652579.7451751", - "retrieved_timestamp": "1762652579.7451751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/MagpieLM-8B-SFT-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4720619068515982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45528501595553356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2989527925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/MagusCorp/grpo_lora_enem_llama3_7b/1e2321f6-93bd-4acf-9f5b-c82807a40233.json b/data/hfopenllm_v2/MagusCorp/grpo_lora_enem_llama3_7b/1e2321f6-93bd-4acf-9f5b-c82807a40233.json new file mode 100644 index 000000000..7178d1d47 --- /dev/null +++ b/data/hfopenllm_v2/MagusCorp/grpo_lora_enem_llama3_7b/1e2321f6-93bd-4acf-9f5b-c82807a40233.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MagusCorp_grpo_lora_enem_llama3_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grpo_lora_enem_llama3_7b", + "id": "MagusCorp/grpo_lora_enem_llama3_7b", + "developer": "MagusCorp", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4724 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4801 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3971 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/13032961-52a1-43cf-b69d-1802c43e1bcc.json b/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/13032961-52a1-43cf-b69d-1802c43e1bcc.json new file mode 100644 index 000000000..6a60d0dd4 --- /dev/null +++ b/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/13032961-52a1-43cf-b69d-1802c43e1bcc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ManoloPueblo_ContentCuisine_1-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ContentCuisine_1-7B-slerp", + "id": "ManoloPueblo/ContentCuisine_1-7B-slerp", + "developer": "ManoloPueblo", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5188 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json b/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json deleted file mode 100644 index ab324c120..000000000 --- a/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ManoloPueblo_ContentCuisine_1-7B-slerp/1762652579.745631", - "retrieved_timestamp": "1762652579.745632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ManoloPueblo/ContentCuisine_1-7B-slerp", - "developer": "ManoloPueblo", - "inference_platform": "unknown", - "id": "ManoloPueblo/ContentCuisine_1-7B-slerp", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907044419916932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188437309746964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30535239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/9d444061-2c29-499a-8906-77ef58aba34d.json b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/9d444061-2c29-499a-8906-77ef58aba34d.json new file mode 100644 index 000000000..2ebddc2a3 --- /dev/null +++ b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/9d444061-2c29-499a-8906-77ef58aba34d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLM_MERGE_CC2", + "id": "ManoloPueblo/LLM_MERGE_CC2", + "developer": "ManoloPueblo", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3032 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json deleted file mode 100644 index 769e9dfc3..000000000 --- a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC2/1762652579.745891", - "retrieved_timestamp": "1762652579.745892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ManoloPueblo/LLM_MERGE_CC2", - "developer": "ManoloPueblo", - "inference_platform": "unknown", - "id": "ManoloPueblo/LLM_MERGE_CC2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853087585384557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209367401710429 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45929166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30319148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json deleted file mode 100644 index b4d6be0c7..000000000 --- a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC3/1762652579.7460978", - "retrieved_timestamp": "1762652579.746099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ManoloPueblo/LLM_MERGE_CC3", - "developer": "ManoloPueblo", - "inference_platform": "unknown", - "id": "ManoloPueblo/LLM_MERGE_CC3", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958751667797001 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5246290546274339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4671666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155751329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1ffdf6b0-b3a3-432a-a0e4-69b4d447bb76.json b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1ffdf6b0-b3a3-432a-a0e4-69b4d447bb76.json new file mode 100644 index 000000000..7e1671f54 --- /dev/null +++ b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1ffdf6b0-b3a3-432a-a0e4-69b4d447bb76.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLM_MERGE_CC3", + "id": "ManoloPueblo/LLM_MERGE_CC3", + "developer": "ManoloPueblo", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/8ce733ea-e6e9-4f9b-ab28-f93202507265.json b/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/8ce733ea-e6e9-4f9b-ab28-f93202507265.json new file mode 100644 index 000000000..805297be2 --- /dev/null +++ b/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/8ce733ea-e6e9-4f9b-ab28-f93202507265.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_NemoReRemix-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NemoReRemix-12B", + "id": "MarinaraSpaghetti/NemoReRemix-12B", + "developer": "MarinaraSpaghetti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5537 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3598 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json b/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json deleted file mode 100644 index f3708f4f3..000000000 --- a/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_NemoReRemix-12B/1762652579.7463942", - "retrieved_timestamp": "1762652579.746399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MarinaraSpaghetti/NemoReRemix-12B", - "developer": "MarinaraSpaghetti", - "inference_platform": "unknown", - "id": "MarinaraSpaghetti/NemoReRemix-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33425089872649016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536511805668158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4501458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/0e88aa91-609c-4d2d-9296-25b06eeb0342.json b/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/0e88aa91-609c-4d2d-9296-25b06eeb0342.json new file mode 100644 index 000000000..6824fedbe --- /dev/null +++ b/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/0e88aa91-609c-4d2d-9296-25b06eeb0342.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_Nemomix-v4.0-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemomix-v4.0-12B", + "id": "MarinaraSpaghetti/Nemomix-v4.0-12B", + "developer": "MarinaraSpaghetti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3613 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json b/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json deleted file mode 100644 index fdb443581..000000000 --- a/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_Nemomix-v4.0-12B/1762652579.746819", - "retrieved_timestamp": "1762652579.7468212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MarinaraSpaghetti/Nemomix-v4.0-12B", - "developer": "MarinaraSpaghetti", - "inference_platform": "unknown", - "id": "MarinaraSpaghetti/Nemomix-v4.0-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574664113441224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274986611124783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42444791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36128656914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json b/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json deleted file mode 100644 index aae1413ce..000000000 --- a/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/1762652579.747071", - "retrieved_timestamp": "1762652579.747073", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25484159807089635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3952730330493959 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40832291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22739361702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/3e235ea0-3f04-4d99-9db2-7cafcbdbac6f.json b/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/3e235ea0-3f04-4d99-9db2-7cafcbdbac6f.json new file mode 100644 index 000000000..a4acc5d5f --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/3e235ea0-3f04-4d99-9db2-7cafcbdbac6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", + "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4083 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2274 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/5e31a55c-f222-4192-b031-27bb40ba56fa.json b/data/hfopenllm_v2/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/5e31a55c-f222-4192-b031-27bb40ba56fa.json new file mode 100644 index 000000000..fe44d4d77 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/5e31a55c-f222-4192-b031-27bb40ba56fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniQwenMathExpert-ECE-PRYMMAL-Martial", + "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2922 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/11fd4b70-4ea7-4bee-8caf-8921d4c89f24.json b/data/hfopenllm_v2/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/11fd4b70-4ea7-4bee-8caf-8921d4c89f24.json new file mode 100644 index 000000000..8cb29fb63 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/11fd4b70-4ea7-4bee-8caf-8921d4c89f24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", + "id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.16 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3464 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json b/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json deleted file mode 100644 index 7a2c76d1e..000000000 --- a/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_general3B-ECE-PRYMMAL-Martial/1762652579.748109", - "retrieved_timestamp": "1762652579.74811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/general3B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27222658102722996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394350977017502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38763297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/8e721067-898d-45ca-b4f5-9f523c4ce3d3.json b/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/8e721067-898d-45ca-b4f5-9f523c4ce3d3.json new file mode 100644 index 000000000..7e5d5f9c9 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/8e721067-898d-45ca-b4f5-9f523c4ce3d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_general3B-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "general3B-ECE-PRYMMAL-Martial", + "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2722 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5394 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json b/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json deleted file mode 100644 index e3b7f0413..000000000 --- a/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/1762652579.748472", - "retrieved_timestamp": "1762652579.7484732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5692817280371636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5636569831901026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43960416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4498005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/be5d5480-ce4c-4ade-8c6a-c08cd2826909.json b/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/be5d5480-ce4c-4ade-8c6a-c08cd2826909.json new file mode 100644 index 000000000..2888af590 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/be5d5480-ce4c-4ade-8c6a-c08cd2826909.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "general3Bv2-ECE-PRYMMAL-Martial", + "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5693 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4498 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json b/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json deleted file mode 100644 index be8ba9846..000000000 --- a/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/1762652579.7487411", - "retrieved_timestamp": "1762652579.7487419", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794961812435449 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42301343044108936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2922207446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/54dec074-29f8-4863-be37-2c08f6f2c3cb.json b/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/54dec074-29f8-4863-be37-2c08f6f2c3cb.json new file mode 100644 index 000000000..21ecaf0b1 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/54dec074-29f8-4863-be37-2c08f6f2c3cb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lareneg1_78B-ECE-PRYMMAL-Martial", + "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2922 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/88a15025-556b-469d-be77-c773f2c61038.json b/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/88a15025-556b-469d-be77-c773f2c61038.json new file mode 100644 index 000000000..f634cf074 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/88a15025-556b-469d-be77-c773f2c61038.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lareneg3B-ECE-PRYMMAL-Martial", + "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3303 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5453 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json b/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json deleted file mode 100644 index a57785213..000000000 --- a/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/1762652579.748992", - "retrieved_timestamp": "1762652579.748993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33032908239028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5453325807578268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json b/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json deleted file mode 100644 index 82bf49a49..000000000 --- a/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/1762652579.749232", - "retrieved_timestamp": "1762652579.749232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5753267995585047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.562336014537904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36555891238670696 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45113031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/b4f4596b-17e5-40bf-ae60-0b17492ba9f8.json b/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/b4f4596b-17e5-40bf-ae60-0b17492ba9f8.json new file mode 100644 index 000000000..829e669c7 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/b4f4596b-17e5-40bf-ae60-0b17492ba9f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lareneg3Bv2-ECE-PRYMMAL-Martial", + "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5753 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5623 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4511 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/97ce858e-a64f-4881-b6d0-0a2c0814336d.json b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/97ce858e-a64f-4881-b6d0-0a2c0814336d.json new file mode 100644 index 000000000..e5c5e6482 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/97ce858e-a64f-4881-b6d0-0a2c0814336d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calme-4x7B-MoE-v0.1", + "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json deleted file mode 100644 index 66233acba..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.1/1762652579.7495291", - "retrieved_timestamp": "1762652579.74953", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4315205875964663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5102819889174134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056848404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/1becd83e-e9b8-49c1-a137-80c5a8dbdf0d.json b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/1becd83e-e9b8-49c1-a137-80c5a8dbdf0d.json new file mode 100644 index 000000000..d772d36c9 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/1becd83e-e9b8-49c1-a137-80c5a8dbdf0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calme-4x7B-MoE-v0.2", + "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4294 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4318 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3058 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json deleted file mode 100644 index 9dfb10278..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.2/1762652579.7498329", - "retrieved_timestamp": "1762652579.749834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429447200095746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110766802558263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43176041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30576795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json deleted file mode 100644 index 1372a47ec..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1762652579.750048", - "retrieved_timestamp": "1762652579.750049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47143800671108216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366257615951637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4617686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/337bb321-9c6e-4751-9c9b-d8ba0120dd07.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/337bb321-9c6e-4751-9c9b-d8ba0120dd07.json new file mode 100644 index 000000000..b12b4dd7c --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/337bb321-9c6e-4751-9c9b-d8ba0120dd07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-70B-Instruct-v0.1", + "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4618 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json deleted file mode 100644 index 18594390a..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/1762652579.750272", - "retrieved_timestamp": "1762652579.750272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7667433520835827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924311866686311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42143749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38622007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/cfa95cc9-5bb1-4921-97c7-078f2f929a2f.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/cfa95cc9-5bb1-4921-97c7-078f2f929a2f.json new file mode 100644 index 000000000..2084bb4eb --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/cfa95cc9-5bb1-4921-97c7-078f2f929a2f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-v0.10", + "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3862 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/6d5ba3c4-a0c2-40cd-9766-68d36d21c5b6.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/6d5ba3c4-a0c2-40cd-9766-68d36d21c5b6.json new file mode 100644 index 000000000..9ec03b55c --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/6d5ba3c4-a0c2-40cd-9766-68d36d21c5b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-v0.8", + "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4963 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json deleted file mode 100644 index 2ec4f91db..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/1762652579.750486", - "retrieved_timestamp": "1762652579.750487", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49627836815949883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json deleted file mode 100644 index 62f8534a7..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1762652579.750697", - "retrieved_timestamp": "1762652579.750697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.763046494412603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4936132794870085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845578457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/6cc4404a-f3e1-47b9-b56b-34e4269e1261.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/6cc4404a-f3e1-47b9-b56b-34e4269e1261.json new file mode 100644 index 000000000..9730bfb53 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/6cc4404a-f3e1-47b9-b56b-34e4269e1261.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-v0.9", + "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/8d820e43-ff42-4247-9ad0-4ed8e70672b4.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/8d820e43-ff42-4247-9ad0-4ed8e70672b4.json new file mode 100644 index 000000000..25b553bee --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/8d820e43-ff42-4247-9ad0-4ed8e70672b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-MoE-A2.7B-Wikihow", + "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2954 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.238 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json deleted file mode 100644 index b73549c5b..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/1762652579.7511811", - "retrieved_timestamp": "1762652579.751182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33522498082864577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123061019250074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44347916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/d858ce8e-6a4b-46b1-8d51-03ebc2d8aaec.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/d858ce8e-6a4b-46b1-8d51-03ebc2d8aaec.json new file mode 100644 index 000000000..1ec344d87 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/d858ce8e-6a4b-46b1-8d51-03ebc2d8aaec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B-Instruct-v0.1", + "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5123 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/9813dd88-ff70-4d9e-86c5-9b73444275c5.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/9813dd88-ff70-4d9e-86c5-9b73444275c5.json new file mode 100644 index 000000000..1b9f32279 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/9813dd88-ff70-4d9e-86c5-9b73444275c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B-Instruct-v0.8", + "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1767 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json deleted file mode 100644 index d385c0e08..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/1762652579.751401", - "retrieved_timestamp": "1762652579.751402", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27747266142723526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637108491317945 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566323138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-llama3.1-70b/ac677432-e7d1-4439-9c05-426059c285ef.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-llama3.1-70b/ac677432-e7d1-4439-9c05-426059c285ef.json new file mode 100644 index 000000000..3872599b8 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-llama3.1-70b/ac677432-e7d1-4439-9c05-426059c285ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-llama3.1-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-llama3.1-70b", + "id": "MaziyarPanahi/calme-2.1-llama3.1-70b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8434 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6448 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5283 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3-4b/018f270f-3cfe-403c-a236-483038a0b04e.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3-4b/018f270f-3cfe-403c-a236-483038a0b04e.json new file mode 100644 index 000000000..d1bb5a3bb --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3-4b/018f270f-3cfe-403c-a236-483038a0b04e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3-4b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-phi3-4b", + "id": "MaziyarPanahi/calme-2.1-phi3-4b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5525 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5595 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3746 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3.5-4b/718a40ea-26b1-4cf4-9584-57be798640ae.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3.5-4b/718a40ea-26b1-4cf4-9584-57be798640ae.json new file mode 100644 index 000000000..6ca32abe5 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-phi3.5-4b/718a40ea-26b1-4cf4-9584-57be798640ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3.5-4b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-phi3.5-4b", + "id": "MaziyarPanahi/calme-2.1-phi3.5-4b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5659 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-72b/207a28a9-ae24-4a31-be95-96296b2e466d.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-72b/207a28a9-ae24-4a31-be95-96296b2e466d.json new file mode 100644 index 000000000..847508496 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-72b/207a28a9-ae24-4a31-be95-96296b2e466d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-qwen2-72b", + "id": "MaziyarPanahi/calme-2.1-qwen2-72b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.699 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8163 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6966 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4079 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4732 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-7b/72efedb8-d456-41ed-b1ae-4887cb6c18f8.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-7b/72efedb8-d456-41ed-b1ae-4887cb6c18f8.json new file mode 100644 index 000000000..e64c522fa --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2-7b/72efedb8-d456-41ed-b1ae-4887cb6c18f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-qwen2-7b", + "id": "MaziyarPanahi/calme-2.1-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5046 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2311 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4437 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2.5-72b/ac91fb37-5742-4a3d-b93a-86c63b90cad5.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2.5-72b/ac91fb37-5742-4a3d-b93a-86c63b90cad5.json new file mode 100644 index 000000000..d23d575cb --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-qwen2.5-72b/ac91fb37-5742-4a3d-b93a-86c63b90cad5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2.5-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-qwen2.5-72b", + "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.7 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8662 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5619 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json deleted file mode 100644 index 879908f72..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-rys-78b/1762652579.752971", - "retrieved_timestamp": "1762652579.752971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-rys-78b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8135547015252862 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7097861139530462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3942598187311178 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4693125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5443816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/c71d025d-e954-4420-b397-e07c3644d1f4.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/c71d025d-e954-4420-b397-e07c3644d1f4.json new file mode 100644 index 000000000..d2b0fca22 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/c71d025d-e954-4420-b397-e07c3644d1f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-rys-78b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.1-rys-78b", + "id": "MaziyarPanahi/calme-2.1-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4693 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5444 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3-70b/968c3759-de5f-4255-ba95-cafc7a3c70a7.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3-70b/968c3759-de5f-4255-ba95-cafc7a3c70a7.json new file mode 100644 index 000000000..6a8c1b645 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3-70b/968c3759-de5f-4255-ba95-cafc7a3c70a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-llama3-70b", + "id": "MaziyarPanahi/calme-2.2-llama3-70b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2394 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3.1-70b/5e23b2f7-33f7-4e49-b73a-a02b8650ee0d.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3.1-70b/5e23b2f7-33f7-4e49-b73a-a02b8650ee0d.json new file mode 100644 index 000000000..27dee1772 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-llama3.1-70b/5e23b2f7-33f7-4e49-b73a-a02b8650ee0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3.1-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-llama3.1-70b", + "id": "MaziyarPanahi/calme-2.2-llama3.1-70b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8593 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6793 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4542 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-phi3-4b/1b6c64f6-acf8-4cff-bcae-6e8b3725c6f1.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-phi3-4b/1b6c64f6-acf8-4cff-bcae-6e8b3725c6f1.json new file mode 100644 index 000000000..76e64261b --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-phi3-4b/1b6c64f6-acf8-4cff-bcae-6e8b3725c6f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-phi3-4b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-phi3-4b", + "id": "MaziyarPanahi/calme-2.2-phi3-4b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5069 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3976 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-72b/7908f572-8886-4add-ae84-b4ec0ec17c26.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-72b/7908f572-8886-4add-ae84-b4ec0ec17c26.json new file mode 100644 index 000000000..ac04e14a8 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-72b/7908f572-8886-4add-ae84-b4ec0ec17c26.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-qwen2-72b", + "id": "MaziyarPanahi/calme-2.2-qwen2-72b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8008 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.694 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4508 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-7b/9e04ec5c-2208-4569-9b63-4768ed4262b9.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-7b/9e04ec5c-2208-4569-9b63-4768ed4262b9.json new file mode 100644 index 000000000..1a6f153fa --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2-7b/9e04ec5c-2208-4569-9b63-4768ed4262b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-qwen2-7b", + "id": "MaziyarPanahi/calme-2.2-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3899 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2.5-72b/ee2c8beb-6566-4b19-91d0-8e48c12a3fdf.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2.5-72b/ee2c8beb-6566-4b19-91d0-8e48c12a3fdf.json new file mode 100644 index 000000000..6b2655c95 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-qwen2.5-72b/ee2c8beb-6566-4b19-91d0-8e48c12a3fdf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2.5-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-qwen2.5-72b", + "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.7 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5618 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/c7579616-0c21-443a-a149-0c51a0ae92ac.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/c7579616-0c21-443a-a149-0c51a0ae92ac.json new file mode 100644 index 000000000..554ca11d3 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/c7579616-0c21-443a-a149-0c51a0ae92ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-rys-78b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.2-rys-78b", + "id": "MaziyarPanahi/calme-2.2-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7081 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json deleted file mode 100644 index eccf14cde..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-rys-78b/1762652579.754511", - "retrieved_timestamp": "1762652579.754511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-rys-78b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7986420475449585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7081014602379213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40687919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45356250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3-70b/ef7a1429-db2f-433b-a606-339a9d868e7a.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3-70b/ef7a1429-db2f-433b-a606-339a9d868e7a.json new file mode 100644 index 000000000..84b0cfd4e --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3-70b/ef7a1429-db2f-433b-a606-339a9d868e7a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.3-llama3-70b", + "id": "MaziyarPanahi/calme-2.3-llama3-70b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.801 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2326 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4261 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3.1-70b/f531e13c-79ed-45da-a246-857fd2c884c1.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3.1-70b/f531e13c-79ed-45da-a246-857fd2c884c1.json new file mode 100644 index 000000000..9e97146e6 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-llama3.1-70b/f531e13c-79ed-45da-a246-857fd2c884c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3.1-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.3-llama3.1-70b", + "id": "MaziyarPanahi/calme-2.3-llama3.1-70b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6872 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-phi3-4b/0f525d93-663a-442c-9a51-1ad3a5054172.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-phi3-4b/0f525d93-663a-442c-9a51-1ad3a5054172.json new file mode 100644 index 000000000..bc2d4c497 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-phi3-4b/0f525d93-663a-442c-9a51-1ad3a5054172.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-phi3-4b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.3-phi3-4b", + "id": "MaziyarPanahi/calme-2.3-phi3-4b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4926 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5538 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1473 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-72b/15af21e1-3193-47fa-a3fc-1f087216d4d9.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-72b/15af21e1-3193-47fa-a3fc-1f087216d4d9.json new file mode 100644 index 000000000..f937b7720 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-72b/15af21e1-3193-47fa-a3fc-1f087216d4d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.3-qwen2-72b", + "id": "MaziyarPanahi/calme-2.3-qwen2-72b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.385 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6576 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-7b/67b270d9-3422-4770-9957-7bde65acca0a.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-7b/67b270d9-3422-4770-9957-7bde65acca0a.json new file mode 100644 index 000000000..ed4ee9992 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-qwen2-7b/67b270d9-3422-4770-9957-7bde65acca0a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.3-qwen2-7b", + "id": "MaziyarPanahi/calme-2.3-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2069 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3611 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json deleted file mode 100644 index 3524efa0f..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-rys-78b/1762652579.7562392", - "retrieved_timestamp": "1762652579.7562408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-rys-78b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8065854155862002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7107763314317289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40436241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45492708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5475398936170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/e2d38bcc-9133-4051-82d0-4e4fd66e00f8.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/e2d38bcc-9133-4051-82d0-4e4fd66e00f8.json new file mode 100644 index 000000000..90960df8c --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/e2d38bcc-9133-4051-82d0-4e4fd66e00f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-rys-78b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.3-rys-78b", + "id": "MaziyarPanahi/calme-2.3-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7108 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4044 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4549 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-llama3-70b/4ff256af-73c7-4a5a-96da-19546a786c59.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-llama3-70b/4ff256af-73c7-4a5a-96da-19546a786c59.json new file mode 100644 index 000000000..5cac03e59 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-llama3-70b/4ff256af-73c7-4a5a-96da-19546a786c59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-llama3-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.4-llama3-70b", + "id": "MaziyarPanahi/calme-2.4-llama3-70b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5027 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-qwen2-7b/225cbeef-1d0d-40fc-949d-4ba6696fb690.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-qwen2-7b/225cbeef-1d0d-40fc-949d-4ba6696fb690.json new file mode 100644 index 000000000..b84becf6e --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-qwen2-7b/225cbeef-1d0d-40fc-949d-4ba6696fb690.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.4-qwen2-7b", + "id": "MaziyarPanahi/calme-2.4-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4453 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/24fcd662-5abb-4bf8-b8df-1c21b048cd92.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/24fcd662-5abb-4bf8-b8df-1c21b048cd92.json new file mode 100644 index 000000000..6535c029d --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/24fcd662-5abb-4bf8-b8df-1c21b048cd92.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-rys-78b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.4-rys-78b", + "id": "MaziyarPanahi/calme-2.4-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8011 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.728 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4027 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5771 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7002 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json deleted file mode 100644 index bb5d28d32..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-rys-78b/1762652579.7570088", - "retrieved_timestamp": "1762652579.75701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.4-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.4-rys-78b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8010899967641414 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7279510956242796 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40268456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5770624999999999 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7002160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.5-qwen2-7b/7badcb45-7826-4fd1-b964-c697fbda76cc.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.5-qwen2-7b/7badcb45-7826-4fd1-b964-c697fbda76cc.json new file mode 100644 index 000000000..76077262d --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.5-qwen2-7b/7badcb45-7826-4fd1-b964-c697fbda76cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.5-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.5-qwen2-7b", + "id": "MaziyarPanahi/calme-2.5-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3145 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4887 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.6-qwen2-7b/bfb532f1-3319-46ff-80ae-0ca783a18bb6.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.6-qwen2-7b/bfb532f1-3319-46ff-80ae-0ca783a18bb6.json new file mode 100644 index 000000000..901ba7227 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.6-qwen2-7b/bfb532f1-3319-46ff-80ae-0ca783a18bb6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.6-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.6-qwen2-7b", + "id": "MaziyarPanahi/calme-2.6-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3443 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4586 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3732 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.7-qwen2-7b/ea304515-b41f-4e96-a0ec-78c897ebf9a4.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.7-qwen2-7b/ea304515-b41f-4e96-a0ec-78c897ebf9a4.json new file mode 100644 index 000000000..4bb85f9fc --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.7-qwen2-7b/ea304515-b41f-4e96-a0ec-78c897ebf9a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.7-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-2.7-qwen2-7b", + "id": "MaziyarPanahi/calme-2.7-qwen2-7b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3592 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4883 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1382 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4824 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3705 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/1fe79ea5-1922-4a5e-8857-1c832353b0a6.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/1fe79ea5-1922-4a5e-8857-1c832353b0a6.json new file mode 100644 index 000000000..1217267f3 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/1fe79ea5-1922-4a5e-8857-1c832353b0a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-baguette-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.1-baguette-3b", + "id": "MaziyarPanahi/calme-3.1-baguette-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6234 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4683 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.256 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json deleted file mode 100644 index 9d7270193..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-baguette-3b/1762652579.7580318", - "retrieved_timestamp": "1762652579.7580328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-baguette-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-baguette-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6234369251364158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46833341042911075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25604229607250756 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33992686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json deleted file mode 100644 index 3f78ec62d..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-3b/1762652579.758249", - "retrieved_timestamp": "1762652579.75825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-instruct-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-instruct-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43359397509718656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4812730148043098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.355718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/9098d70f-cbcd-4f6c-bcba-0b1da743396e.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/9098d70f-cbcd-4f6c-bcba-0b1da743396e.json new file mode 100644 index 000000000..dfeeb7050 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/9098d70f-cbcd-4f6c-bcba-0b1da743396e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.1-instruct-3b", + "id": "MaziyarPanahi/calme-3.1-instruct-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4813 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json deleted file mode 100644 index cbdf264f9..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-78b/1762652579.7584739", - "retrieved_timestamp": "1762652579.758475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-instruct-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-instruct-78b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8135547015252862 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7305154498840408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890624999999999 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.718500664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/df4ed9e0-30bc-4a3f-b7a2-8955cbb38d31.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/df4ed9e0-30bc-4a3f-b7a2-8955cbb38d31.json new file mode 100644 index 000000000..8dfc528f4 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/df4ed9e0-30bc-4a3f-b7a2-8955cbb38d31.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-78b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.1-instruct-78b", + "id": "MaziyarPanahi/calme-3.1-instruct-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5891 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7185 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-llamaloi-3b/f68957d5-20a1-438f-9931-6a787aaed467.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-llamaloi-3b/f68957d5-20a1-438f-9931-6a787aaed467.json new file mode 100644 index 000000000..d3b477ce7 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-llamaloi-3b/f68957d5-20a1-438f-9931-6a787aaed467.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-llamaloi-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.1-llamaloi-3b", + "id": "MaziyarPanahi/calme-3.1-llamaloi-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/416e0c04-9119-4230-ba71-b0f47e2d4997.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/416e0c04-9119-4230-ba71-b0f47e2d4997.json new file mode 100644 index 000000000..339284c10 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/416e0c04-9119-4230-ba71-b0f47e2d4997.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-baguette-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.2-baguette-3b", + "id": "MaziyarPanahi/calme-3.2-baguette-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6338 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4709 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2825 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3338 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json deleted file mode 100644 index 88c61a675..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-baguette-3b/1762652579.75889", - "retrieved_timestamp": "1762652579.758891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.2-baguette-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.2-baguette-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338282423968404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.470862269902714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3337765957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json deleted file mode 100644 index f2f8b281d..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-3b/1762652579.759095", - "retrieved_timestamp": "1762652579.7590961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.2-instruct-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.2-instruct-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5533196363426819 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865641110376735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36527593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/d57780e2-154e-437d-ac2f-0007e1f9140e.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/d57780e2-154e-437d-ac2f-0007e1f9140e.json new file mode 100644 index 000000000..b0efacce9 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/d57780e2-154e-437d-ac2f-0007e1f9140e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.2-instruct-3b", + "id": "MaziyarPanahi/calme-3.2-instruct-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5533 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4866 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2168 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3653 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/027d464b-1375-4de7-aa57-e1473d16ba89.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/027d464b-1375-4de7-aa57-e1473d16ba89.json new file mode 100644 index 000000000..41995ee2f --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/027d464b-1375-4de7-aa57-e1473d16ba89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-78b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.2-instruct-78b", + "id": "MaziyarPanahi/calme-3.2-instruct-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8063 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4027 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6024 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json deleted file mode 100644 index 4c51c0869..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-78b/1762652579.759298", - "retrieved_timestamp": "1762652579.759299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.2-instruct-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.2-instruct-78b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8062607215521482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7318616272092674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033232628398791 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40268456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7303025265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json deleted file mode 100644 index 6f1f9de48..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-baguette-3b/1762652579.759511", - "retrieved_timestamp": "1762652579.759511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.3-baguette-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.3-baguette-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6359514975819713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4678217295957521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39282291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/a81f20fa-57e8-498c-a162-6d8a9be09ee6.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/a81f20fa-57e8-498c-a162-6d8a9be09ee6.json new file mode 100644 index 000000000..3d87a48e4 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/a81f20fa-57e8-498c-a162-6d8a9be09ee6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-baguette-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.3-baguette-3b", + "id": "MaziyarPanahi/calme-3.3-baguette-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4678 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json deleted file mode 100644 index ba276805e..000000000 --- a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-instruct-3b/1762652579.759784", - "retrieved_timestamp": "1762652579.759785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.3-instruct-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.3-instruct-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6423212631373645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46933409427688694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40742708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/d72ddbff-8ff7-446f-a74a-10a46bce6e3e.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/d72ddbff-8ff7-446f-a74a-10a46bce6e3e.json new file mode 100644 index 000000000..49ca1c4b5 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/d72ddbff-8ff7-446f-a74a-10a46bce6e3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-instruct-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calme-3.3-instruct-3b", + "id": "MaziyarPanahi/calme-3.3-instruct-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4693 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4074 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/Amara-o1-7B-Qwen/f681d612-f574-4641-b34e-95b6de97f9e8.json b/data/hfopenllm_v2/Minami-su/Amara-o1-7B-Qwen/f681d612-f574-4641-b34e-95b6de97f9e8.json new file mode 100644 index 000000000..ad3d67280 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/Amara-o1-7B-Qwen/f681d612-f574-4641-b34e-95b6de97f9e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o1-7B-Qwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amara-o1-7B-Qwen", + "id": "Minami-su/Amara-o1-7B-Qwen", + "developer": "Minami-su", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.739 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4083 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/Amara-o2-7B-Qwen/cae1adaf-e424-4dcd-943b-5bbb708aca57.json b/data/hfopenllm_v2/Minami-su/Amara-o2-7B-Qwen/cae1adaf-e424-4dcd-943b-5bbb708aca57.json new file mode 100644 index 000000000..d8fef8df3 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/Amara-o2-7B-Qwen/cae1adaf-e424-4dcd-943b-5bbb708aca57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o2-7B-Qwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amara-o2-7B-Qwen", + "id": "Minami-su/Amara-o2-7B-Qwen", + "developer": "Minami-su", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7147 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-7B-00/969ac825-92f2-448c-899a-226e69dee377.json b/data/hfopenllm_v2/Minami-su/test-7B-00/969ac825-92f2-448c-899a-226e69dee377.json new file mode 100644 index 000000000..813aba2e0 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/test-7B-00/969ac825-92f2-448c-899a-226e69dee377.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-00/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-7B-00", + "id": "Minami-su/test-7B-00", + "developer": "Minami-su", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4126 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3588 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json b/data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json deleted file mode 100644 index 6ea4f9716..000000000 --- a/data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-00/1762652579.7606468", - "retrieved_timestamp": "1762652579.76065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Minami-su/test-7B-00", - "developer": "Minami-su", - "inference_platform": "unknown", - "id": "Minami-su/test-7B-00", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6690492338107332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44661237656101793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41260416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3587932180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json b/data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json deleted file mode 100644 index 09f0a29df..000000000 --- a/data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-01/1762652579.761029", - "retrieved_timestamp": "1762652579.76103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Minami-su/test-7B-01", - "developer": "Minami-su", - "inference_platform": "unknown", - "id": "Minami-su/test-7B-01", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736204382150472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422359420239754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35355718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Minami-su/test-7B-01/e108ad28-c155-4162-852c-0f588a136bdc.json b/data/hfopenllm_v2/Minami-su/test-7B-01/e108ad28-c155-4162-852c-0f588a136bdc.json new file mode 100644 index 000000000..878626d0b --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/test-7B-01/e108ad28-c155-4162-852c-0f588a136bdc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-7B-01", + "id": "Minami-su/test-7B-01", + "developer": "Minami-su", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4153 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3536 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-v2-7B-00/93cfeba9-7d31-45b4-a6e2-99a5f318f5b3.json b/data/hfopenllm_v2/Minami-su/test-v2-7B-00/93cfeba9-7d31-45b4-a6e2-99a5f318f5b3.json new file mode 100644 index 000000000..0b0afc074 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/test-v2-7B-00/93cfeba9-7d31-45b4-a6e2-99a5f318f5b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Minami-su_test-v2-7B-00/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-v2-7B-00", + "id": "Minami-su/test-v2-7B-00", + "developer": "Minami-su", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6747 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4418 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json b/data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json deleted file mode 100644 index 6b19254ea..000000000 --- a/data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Minami-su_test-v2-7B-00/1762652579.76127", - "retrieved_timestamp": "1762652579.761271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Minami-su/test-v2-7B-00", - "developer": "Minami-su", - "inference_platform": "unknown", - "id": "Minami-su/test-v2-7B-00", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6747197436136119 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415989344595353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418429003021148 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472406914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json b/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json deleted file mode 100644 index b854ec2fa..000000000 --- a/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/1762652579.761516", - "retrieved_timestamp": "1762652579.761517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", - "developer": "ModelCloud", - "inference_platform": "unknown", - "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.453 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5268919799465418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3252726665015006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17644614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/c1b16b84-9392-48f3-b483-0a9786925506.json b/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/c1b16b84-9392-48f3-b483-0a9786925506.json new file mode 100644 index 000000000..0a18b9f48 --- /dev/null +++ b/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/c1b16b84-9392-48f3-b483-0a9786925506.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", + "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", + "developer": "ModelCloud", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.453 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5269 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ModelSpace/GemmaX2-28-9B-v0.1/b0c6e08d-b426-49d5-8a66-ee3d70131b62.json b/data/hfopenllm_v2/ModelSpace/GemmaX2-28-9B-v0.1/b0c6e08d-b426-49d5-8a66-ee3d70131b62.json new file mode 100644 index 000000000..3ce7f3cf6 --- /dev/null +++ b/data/hfopenllm_v2/ModelSpace/GemmaX2-28-9B-v0.1/b0c6e08d-b426-49d5-8a66-ee3d70131b62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ModelSpace_GemmaX2-28-9B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GemmaX2-28-9B-v0.1", + "id": "ModelSpace/GemmaX2-28-9B-v0.1", + "developer": "ModelSpace", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0039 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3537 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2231 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MoonRide/Llama-3.2-3B-Khelavaster/6a6651a3-b34e-404d-ac25-42c151fb9ba3.json b/data/hfopenllm_v2/MoonRide/Llama-3.2-3B-Khelavaster/6a6651a3-b34e-404d-ac25-42c151fb9ba3.json new file mode 100644 index 000000000..7575cd4c1 --- /dev/null +++ b/data/hfopenllm_v2/MoonRide/Llama-3.2-3B-Khelavaster/6a6651a3-b34e-404d-ac25-42c151fb9ba3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MoonRide_Llama-3.2-3B-Khelavaster/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Khelavaster", + "id": "MoonRide/Llama-3.2-3B-Khelavaster", + "developer": "MoonRide", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json b/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json deleted file mode 100644 index a8e0aa760..000000000 --- a/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/1762652579.7624152", - "retrieved_timestamp": "1762652579.7624161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", - "developer": "Mostafa8Mehrabi", - "inference_platform": "unknown", - "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206735905176042 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003508901818665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/da63b789-5571-4ed8-976e-146d385b18e2.json b/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/da63b789-5571-4ed8-976e-146d385b18e2.json new file mode 100644 index 000000000..47726cb82 --- /dev/null +++ b/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/da63b789-5571-4ed8-976e-146d385b18e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3.2-1b-Insomnia-ChatBot-merged", + "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", + "developer": "Mostafa8Mehrabi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json deleted file mode 100644 index 7d9e5d5f8..000000000 --- a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLong-8b-v4i/1762652579.762677", - "retrieved_timestamp": "1762652579.762678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", - "developer": "MrRobotoAI", - "inference_platform": "unknown", - "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3834603297029659 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.458548650453507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068484042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/87b900e7-3bab-4e60-b0ef-349667cb2656.json b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/87b900e7-3bab-4e60-b0ef-349667cb2656.json new file mode 100644 index 000000000..87434c005 --- /dev/null +++ b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/87b900e7-3bab-4e60-b0ef-349667cb2656.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLong-8b-v4i/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MrRoboto-ProLong-8b-v4i", + "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", + "developer": "MrRobotoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4014 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json deleted file mode 100644 index 35b7888cb..000000000 --- a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/1762652579.762937", - "retrieved_timestamp": "1762652579.762937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", - "developer": "MrRobotoAI", - "inference_platform": "unknown", - "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34754008253655855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515254903058233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42788541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2565658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/c9fd4740-4990-4174-b782-9b63c34d6407.json b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/c9fd4740-4990-4174-b782-9b63c34d6407.json new file mode 100644 index 000000000..91f97c6f4 --- /dev/null +++ b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/c9fd4740-4990-4174-b782-9b63c34d6407.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MrRoboto-ProLongBASE-pt8-unaligned-8b", + "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", + "developer": "MrRobotoAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2582a049-e940-408b-b2d9-7a7bdf470e49.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2582a049-e940-408b-b2d9-7a7bdf470e49.json new file mode 100644 index 000000000..5001bc314 --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2582a049-e940-408b-b2d9-7a7bdf470e49.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1211-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gladiator-Mini-Exp-1211-3B", + "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B", + "developer": "MultivexAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6876 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json deleted file mode 100644 index c8bdf4294..000000000 --- a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1211-3B/1762652579.763158", - "retrieved_timestamp": "1762652579.763159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1211-3B", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.68760887777763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44843752663028075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3151595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/99310118-d2ec-4647-85db-fcc22aee9161.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/99310118-d2ec-4647-85db-fcc22aee9161.json new file mode 100644 index 000000000..22f18ce11 --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/99310118-d2ec-4647-85db-fcc22aee9161.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gladiator-Mini-Exp-1221-3B-Instruct-V2", + "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", + "developer": "MultivexAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4389 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3025 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json deleted file mode 100644 index 2be875013..000000000 --- a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/1762652579.763629", - "retrieved_timestamp": "1762652579.7636302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6215386286165153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438883390990549 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30082291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3025265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/bedd12e4-da18-4ca6-ba51-6d13e1c80bae.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/bedd12e4-da18-4ca6-ba51-6d13e1c80bae.json new file mode 100644 index 000000000..b849a1f53 --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/bedd12e4-da18-4ca6-ba51-6d13e1c80bae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gladiator-Mini-Exp-1221-3B-Instruct", + "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", + "developer": "MultivexAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6079 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.437 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3115 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3049 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json deleted file mode 100644 index 527525888..000000000 --- a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/1762652579.763424", - "retrieved_timestamp": "1762652579.763425", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078748830879843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369766992416903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31145833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3048537234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/6767e14a-bbfa-4a0d-8120-1f48a565474e.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/6767e14a-bbfa-4a0d-8120-1f48a565474e.json new file mode 100644 index 000000000..37852b3f6 --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/6767e14a-bbfa-4a0d-8120-1f48a565474e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gladiator-Mini-Exp-1222-3B-Instruct", + "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", + "developer": "MultivexAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6163 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3128 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json deleted file mode 100644 index d84d50dc1..000000000 --- a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/1762652579.763836", - "retrieved_timestamp": "1762652579.7638369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163180361440976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4373182371021645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30169547872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/70260aac-1bbf-4913-9dcc-58633d055314.json b/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/70260aac-1bbf-4913-9dcc-58633d055314.json new file mode 100644 index 000000000..f8ab724bd --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/70260aac-1bbf-4913-9dcc-58633d055314.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", + "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", + "developer": "MultivexAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.144 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3642 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json b/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json deleted file mode 100644 index 71ab9cbcc..000000000 --- a/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/1762652579.764051", - "retrieved_timestamp": "1762652579.764052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14398241111362298 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29077474506950557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json deleted file mode 100644 index 0f9495395..000000000 --- a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1.1/1762652579.764531", - "retrieved_timestamp": "1762652579.764531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Mxode/NanoLM-0.3B-Instruct-v1.1", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-0.3B-Instruct-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.315 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17827918810977095 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3014403673764691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42733333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/fba6e1a2-c197-4731-91ea-f6d059ba8b16.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/fba6e1a2-c197-4731-91ea-f6d059ba8b16.json new file mode 100644 index 000000000..9fe318e68 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/fba6e1a2-c197-4731-91ea-f6d059ba8b16.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NanoLM-0.3B-Instruct-v1.1", + "id": "Mxode/NanoLM-0.3B-Instruct-v1.1", + "developer": "Mxode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.315 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1783 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/22e74d0c-70d6-43c5-be4d-62842d93fedf.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/22e74d0c-70d6-43c5-be4d-62842d93fedf.json new file mode 100644 index 000000000..fd4f80330 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/22e74d0c-70d6-43c5-be4d-62842d93fedf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NanoLM-0.3B-Instruct-v1", + "id": "Mxode/NanoLM-0.3B-Instruct-v1", + "developer": "Mxode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.315 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1537 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json deleted file mode 100644 index f6b0486a0..000000000 --- a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1/1762652579.764268", - "retrieved_timestamp": "1762652579.764269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Mxode/NanoLM-0.3B-Instruct-v1", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-0.3B-Instruct-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.315 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1536744726215331 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30282462164767127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11053856382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json deleted file mode 100644 index 9ef91167e..000000000 --- a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v2/1762652579.7647529", - "retrieved_timestamp": "1762652579.7647538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Mxode/NanoLM-0.3B-Instruct-v2", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-0.3B-Instruct-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.315 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1667885654507817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29211039456850646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3954583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11344747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/f7c33065-1da1-4da4-81c7-f2c9307b6e9b.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/f7c33065-1da1-4da4-81c7-f2c9307b6e9b.json new file mode 100644 index 000000000..f92c110e8 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/f7c33065-1da1-4da4-81c7-f2c9307b6e9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NanoLM-0.3B-Instruct-v2", + "id": "Mxode/NanoLM-0.3B-Instruct-v2", + "developer": "Mxode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.315 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1668 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2921 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3955 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json deleted file mode 100644 index 70e3ca118..000000000 --- a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v1.1/1762652579.764964", - "retrieved_timestamp": "1762652579.764964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Mxode/NanoLM-1B-Instruct-v1.1", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-1B-Instruct-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.076 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23952889444451833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31835012059590373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34327083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12150930851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/ecdb4661-426a-46be-aefc-7e04483cebc0.json b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/ecdb4661-426a-46be-aefc-7e04483cebc0.json new file mode 100644 index 000000000..a4f75b709 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/ecdb4661-426a-46be-aefc-7e04483cebc0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NanoLM-1B-Instruct-v1.1", + "id": "Mxode/NanoLM-1B-Instruct-v1.1", + "developer": "Mxode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.076 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1215 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/236976b3-af46-45ac-a8a5-f5897e3468a1.json b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/236976b3-af46-45ac-a8a5-f5897e3468a1.json new file mode 100644 index 000000000..3c6004de5 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/236976b3-af46-45ac-a8a5-f5897e3468a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NanoLM-1B-Instruct-v2", + "id": "Mxode/NanoLM-1B-Instruct-v2", + "developer": "Mxode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.076 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3123 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3552 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1238 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json deleted file mode 100644 index 130e7bd8c..000000000 --- a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v2/1762652579.765177", - "retrieved_timestamp": "1762652579.7651782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Mxode/NanoLM-1B-Instruct-v2", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-1B-Instruct-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.076 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2629844368497808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123145400715591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12375332446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v-0.1.0/fd175296-a5f6-4914-80e9-b8b75bc659de.json b/data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v-0.1.0/fd175296-a5f6-4914-80e9-b8b75bc659de.json new file mode 100644 index 000000000..55309810d --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v-0.1.0/fd175296-a5f6-4914-80e9-b8b75bc659de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v-0.1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-gemma-2-27b-v-0.1.0", + "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v0.1.0/d910bbaa-d55c-4b00-9320-856a8a6713c0.json b/data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v0.1.0/d910bbaa-d55c-4b00-9320-856a8a6713c0.json new file mode 100644 index 000000000..8fed04ca8 --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-gemma-2-27b-v0.1.0/d910bbaa-d55c-4b00-9320-856a8a6713c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v0.1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-gemma-2-27b-v0.1.0", + "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/99a5f123-5d2e-469b-884e-c9a64c6bc197.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/99a5f123-5d2e-469b-884e-c9a64c6bc197.json new file mode 100644 index 000000000..027340ffc --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/99a5f123-5d2e-469b-884e-c9a64c6bc197.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-llama-3_1-8b-instruct-v0.3", + "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json deleted file mode 100644 index bd2243e4e..000000000 --- a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/1762652579.765912", - "retrieved_timestamp": "1762652579.765913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", - "developer": "NAPS-ai", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390818583580456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4900525115527062 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37870833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33984375 - } - } - ] -} diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json deleted file mode 100644 index 99fdd4de5..000000000 --- a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/1762652579.766172", - "retrieved_timestamp": "1762652579.766173", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", - "developer": "NAPS-ai", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344202272193336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4861833360906734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4421145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/ed17a715-f0ae-461c-9618-ac952c450ec5.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/ed17a715-f0ae-461c-9618-ac952c450ec5.json new file mode 100644 index 000000000..778af4ae2 --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/ed17a715-f0ae-461c-9618-ac952c450ec5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-llama-3_1-8b-instruct-v0.4", + "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7344 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4862 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/3dd2a474-9ea8-4e26-8986-5bcc67c78c39.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/3dd2a474-9ea8-4e26-8986-5bcc67c78c39.json new file mode 100644 index 000000000..3ae07feca --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/3dd2a474-9ea8-4e26-8986-5bcc67c78c39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-llama-3_1-instruct-v0.5.0", + "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.502 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2614 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json deleted file mode 100644 index 46f8c148a..000000000 --- a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/1762652579.766381", - "retrieved_timestamp": "1762652579.766382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", - "developer": "NAPS-ai", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020124381086628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4147584365689691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37127083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26138630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/b39e14a6-c05f-4e88-b2d4-63a199aa61a1.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/b39e14a6-c05f-4e88-b2d4-63a199aa61a1.json new file mode 100644 index 000000000..5d319d532 --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/b39e14a6-c05f-4e88-b2d4-63a199aa61a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-llama-3_1_instruct-v0.6.0", + "id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/39893637-552a-48d8-9b83-433415eb26c3.json b/data/hfopenllm_v2/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/39893637-552a-48d8-9b83-433415eb26c3.json new file mode 100644 index 000000000..fa9e2a0a2 --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/39893637-552a-48d8-9b83-433415eb26c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "naps-llama3.1-70B-v0.2-fp16", + "id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.761 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1845 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json b/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json deleted file mode 100644 index 8ff2adbdf..000000000 --- a/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NCSOFT_Llama-VARCO-8B-Instruct/1762652579.767406", - "retrieved_timestamp": "1762652579.7674072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NCSOFT/Llama-VARCO-8B-Instruct", - "developer": "NCSOFT", - "inference_platform": "unknown", - "id": "NCSOFT/Llama-VARCO-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4470327619604871 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5022879316026018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3840729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31898271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/f9549713-f487-4e26-bfeb-ec6d394b7014.json b/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/f9549713-f487-4e26-bfeb-ec6d394b7014.json new file mode 100644 index 000000000..515a08f92 --- /dev/null +++ b/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/f9549713-f487-4e26-bfeb-ec6d394b7014.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NCSOFT_Llama-VARCO-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-VARCO-8B-Instruct", + "id": "NCSOFT/Llama-VARCO-8B-Instruct", + "developer": "NCSOFT", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.447 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5023 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3841 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.319 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NJS26/NJS_777/02579c41-f117-4412-9c00-ee7db3e9ab97.json b/data/hfopenllm_v2/NJS26/NJS_777/02579c41-f117-4412-9c00-ee7db3e9ab97.json new file mode 100644 index 000000000..bf0dad5d2 --- /dev/null +++ b/data/hfopenllm_v2/NJS26/NJS_777/02579c41-f117-4412-9c00-ee7db3e9ab97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NJS26_NJS_777/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NJS_777", + "id": "NJS26/NJS_777", + "developer": "NJS26", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 10.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1881 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2064 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json b/data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json deleted file mode 100644 index 4454c1a67..000000000 --- a/data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NJS26_NJS_777/1762652579.76769", - "retrieved_timestamp": "1762652579.76769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NJS26/NJS_777", - "developer": "NJS26", - "inference_platform": "unknown", - "id": "NJS26/NJS_777", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 10.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18809647291409015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21782097894078087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2063758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11627327127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json b/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json deleted file mode 100644 index f05d78969..000000000 --- a/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NLPark_AnFeng_v3.1-Avocet/1762652579.76799", - "retrieved_timestamp": "1762652579.767991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NLPark/AnFeng_v3.1-Avocet", - "developer": "NLPark", - "inference_platform": "unknown", - "id": "NLPark/AnFeng_v3.1-Avocet", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.393 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096311121158525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.582852329074409 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44757291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44381648936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/bfa1d761-00aa-4438-a5de-972d934c63d5.json b/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/bfa1d761-00aa-4438-a5de-972d934c63d5.json new file mode 100644 index 000000000..d4c3c3a62 --- /dev/null +++ b/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/bfa1d761-00aa-4438-a5de-972d934c63d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NLPark_AnFeng_v3.1-Avocet/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AnFeng_v3.1-Avocet", + "id": "NLPark/AnFeng_v3.1-Avocet", + "developer": "NLPark", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.393 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5096 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5829 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4438 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/20a84d88-05c2-4e02-8c84-2afa84cc659f.json b/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/20a84d88-05c2-4e02-8c84-2afa84cc659f.json new file mode 100644 index 000000000..17941cc70 --- /dev/null +++ b/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/20a84d88-05c2-4e02-8c84-2afa84cc659f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NLPark_B-and-W_Flycatcher-3AD1E/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "B-and-W_Flycatcher-3AD1E", + "id": "NLPark/B-and-W_Flycatcher-3AD1E", + "developer": "NLPark", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4908 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6065 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4423 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4741 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json b/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json deleted file mode 100644 index d540e9f52..000000000 --- a/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NLPark_B-and-W_Flycatcher-3AD1E/1762652579.7682638", - "retrieved_timestamp": "1762652579.768265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NLPark/B-and-W_Flycatcher-3AD1E", - "developer": "NLPark", - "inference_platform": "unknown", - "id": "NLPark/B-and-W_Flycatcher-3AD1E", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49084650948372543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6065117528534355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44227083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4740691489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json b/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json deleted file mode 100644 index 1acc7d865..000000000 --- a/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NLPark_Shi-Ci-Robin-Test_3AD80/1762652579.768489", - "retrieved_timestamp": "1762652579.76849", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NLPark/Shi-Ci-Robin-Test_3AD80", - "developer": "NLPark", - "inference_platform": "unknown", - "id": "NLPark/Shi-Ci-Robin-Test_3AD80", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226547782107031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6704805157570325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46959375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120511968085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/84eedce3-3a93-4630-b914-aa281fd2efda.json b/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/84eedce3-3a93-4630-b914-aa281fd2efda.json new file mode 100644 index 000000000..b7883ce1e --- /dev/null +++ b/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/84eedce3-3a93-4630-b914-aa281fd2efda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NLPark_Shi-Ci-Robin-Test_3AD80/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Shi-Ci-Robin-Test_3AD80", + "id": "NLPark/Shi-Ci-Robin-Test_3AD80", + "developer": "NLPark", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7227 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6705 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json b/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json deleted file mode 100644 index a29de3706..000000000 --- a/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NTQAI_NxMobileLM-1.5B-SFT/1762652579.768717", - "retrieved_timestamp": "1762652579.768718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NTQAI/NxMobileLM-1.5B-SFT", - "developer": "NTQAI", - "inference_platform": "unknown", - "id": "NTQAI/NxMobileLM-1.5B-SFT", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392239258500778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39571778048116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28174867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/b3b7b62f-ac82-4ef9-9634-afb81645ec19.json b/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/b3b7b62f-ac82-4ef9-9634-afb81645ec19.json new file mode 100644 index 000000000..ddf2ce282 --- /dev/null +++ b/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/b3b7b62f-ac82-4ef9-9634-afb81645ec19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NTQAI_NxMobileLM-1.5B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NxMobileLM-1.5B-SFT", + "id": "NTQAI/NxMobileLM-1.5B-SFT", + "developer": "NTQAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6392 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3957 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json b/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json deleted file mode 100644 index 48c687b1f..000000000 --- a/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NTQAI_Nxcode-CQ-7B-orpo/1762652579.769034", - "retrieved_timestamp": "1762652579.769035", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NTQAI/Nxcode-CQ-7B-orpo", - "developer": "NTQAI", - "inference_platform": "unknown", - "id": "NTQAI/Nxcode-CQ-7B-orpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.25 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072119753365515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143023249178217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39396875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16115359042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/283c5166-b9c5-4d20-9653-0cd0346d87c1.json b/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/283c5166-b9c5-4d20-9653-0cd0346d87c1.json new file mode 100644 index 000000000..61343896e --- /dev/null +++ b/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/283c5166-b9c5-4d20-9653-0cd0346d87c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NTQAI_Nxcode-CQ-7B-orpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nxcode-CQ-7B-orpo", + "id": "NTQAI/Nxcode-CQ-7B-orpo", + "developer": "NTQAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.25 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1612 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NYTK/PULI-GPTrio/478b54cd-6410-41e5-8a53-4e46bcd9d7af.json b/data/hfopenllm_v2/NYTK/PULI-GPTrio/478b54cd-6410-41e5-8a53-4e46bcd9d7af.json new file mode 100644 index 000000000..513daf9dc --- /dev/null +++ b/data/hfopenllm_v2/NYTK/PULI-GPTrio/478b54cd-6410-41e5-8a53-4e46bcd9d7af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NYTK_PULI-GPTrio/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PULI-GPTrio", + "id": "NYTK/PULI-GPTrio", + "developer": "NYTK", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.673 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.218 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json b/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json deleted file mode 100644 index fb80b36c2..000000000 --- a/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NYTK_PULI-LlumiX-32K/1762652579.76952", - "retrieved_timestamp": "1762652579.769521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NYTK/PULI-LlumiX-32K", - "developer": "NYTK", - "inference_platform": "unknown", - "id": "NYTK/PULI-LlumiX-32K", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1699612583500667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31893582242949375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16805186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/de2ae7a9-93eb-4149-b3ff-b5b7dfba29c4.json b/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/de2ae7a9-93eb-4149-b3ff-b5b7dfba29c4.json new file mode 100644 index 000000000..373fe82fa --- /dev/null +++ b/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/de2ae7a9-93eb-4149-b3ff-b5b7dfba29c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NYTK_PULI-LlumiX-32K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PULI-LlumiX-32K", + "id": "NYTK/PULI-LlumiX-32K", + "developer": "NYTK", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.17 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1681 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Naveenpoliasetty/llama3-8B-V2/ef5aa9db-804b-4a53-9c22-9c99f6c69eeb.json b/data/hfopenllm_v2/Naveenpoliasetty/llama3-8B-V2/ef5aa9db-804b-4a53-9c22-9c99f6c69eeb.json new file mode 100644 index 000000000..6dbee699d --- /dev/null +++ b/data/hfopenllm_v2/Naveenpoliasetty/llama3-8B-V2/ef5aa9db-804b-4a53-9c22-9c99f6c69eeb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Naveenpoliasetty_llama3-8B-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-8B-V2", + "id": "Naveenpoliasetty/llama3-8B-V2", + "developer": "Naveenpoliasetty", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/553fd36d-08dd-46a3-ab04-77b9039e7921.json b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/553fd36d-08dd-46a3-ab04-77b9039e7921.json new file mode 100644 index 000000000..b291ca255 --- /dev/null +++ b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/553fd36d-08dd-46a3-ab04-77b9039e7921.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nb-llama-3.1-8B-Instruct", + "id": "NbAiLab/nb-llama-3.1-8B-Instruct", + "developer": "NbAiLab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3625 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json deleted file mode 100644 index 2ad03daab..000000000 --- a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-Instruct/1762652579.7700322", - "retrieved_timestamp": "1762652579.770033", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NbAiLab/nb-llama-3.1-8B-Instruct", - "developer": "NbAiLab", - "inference_platform": "unknown", - "id": "NbAiLab/nb-llama-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362502604201297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466553135589526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32076041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e2bae853-cc0f-456a-a635-98d5f87ac47c.json b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e2bae853-cc0f-456a-a635-98d5f87ac47c.json new file mode 100644 index 000000000..c5be98399 --- /dev/null +++ b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e2bae853-cc0f-456a-a635-98d5f87ac47c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-sft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nb-llama-3.1-8B-sft", + "id": "NbAiLab/nb-llama-3.1-8B-sft", + "developer": "NbAiLab", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1222 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json deleted file mode 100644 index d9ed96f64..000000000 --- a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-sft/1762652579.7703218", - "retrieved_timestamp": "1762652579.770323", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NbAiLab/nb-llama-3.1-8B-sft", - "developer": "NbAiLab", - "inference_platform": "unknown", - "id": "NbAiLab/nb-llama-3.1-8B-sft", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36157838978355206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281509048328078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3287291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12217420212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-German-ORPO/d6c5f196-c97b-4a0a-81b0-59143ec4b10e.json b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-German-ORPO/d6c5f196-c97b-4a0a-81b0-59143ec4b10e.json new file mode 100644 index 000000000..95ea21ef4 --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-German-ORPO/d6c5f196-c97b-4a0a-81b0-59143ec4b10e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-German-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-German-ORPO", + "id": "Nekochu/Llama-3.1-8B-German-ORPO", + "developer": "Nekochu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/5d92e02f-b590-4b6b-8c64-30690f79e916.json b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/5d92e02f-b590-4b6b-8c64-30690f79e916.json new file mode 100644 index 000000000..ccbbfa608 --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/5d92e02f-b590-4b6b-8c64-30690f79e916.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-french-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-french-DPO", + "id": "Nekochu/Llama-3.1-8B-french-DPO", + "developer": "Nekochu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4216 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json deleted file mode 100644 index 5fa49583f..000000000 --- a/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-french-DPO/1762652579.770777", - "retrieved_timestamp": "1762652579.7707782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nekochu/Llama-3.1-8B-french-DPO", - "developer": "Nekochu", - "inference_platform": "unknown", - "id": "Nekochu/Llama-3.1-8B-french-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46564227361179444 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110888403999433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414228723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json b/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json deleted file mode 100644 index b495eae6e..000000000 --- a/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-13B-v3/1762652579.771023", - "retrieved_timestamp": "1762652579.771023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nekochu/Luminia-13B-v3", - "developer": "Nekochu", - "inference_platform": "unknown", - "id": "Nekochu/Luminia-13B-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25231829323971505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41121515510929624 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3983333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22149268617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/e10f38df-b5d5-47c6-924f-563c6f8a6616.json b/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/e10f38df-b5d5-47c6-924f-563c6f8a6616.json new file mode 100644 index 000000000..5012f1af5 --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/e10f38df-b5d5-47c6-924f-563c6f8a6616.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-13B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Luminia-13B-v3", + "id": "Nekochu/Luminia-13B-v3", + "developer": "Nekochu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2523 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3983 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2215 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/27257dc9-750c-4673-8865-986434bc5c0e.json b/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/27257dc9-750c-4673-8865-986434bc5c0e.json new file mode 100644 index 000000000..1ed86080e --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/27257dc9-750c-4673-8865-986434bc5c0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-8B-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Luminia-8B-RP", + "id": "Nekochu/Luminia-8B-RP", + "developer": "Nekochu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5574 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json b/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json deleted file mode 100644 index 21465e6e9..000000000 --- a/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-8B-RP/1762652579.7713962", - "retrieved_timestamp": "1762652579.7713978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nekochu/Luminia-8B-RP", - "developer": "Nekochu", - "inference_platform": "unknown", - "id": "Nekochu/Luminia-8B-RP", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574165436597118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5218151030627874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json deleted file mode 100644 index 8e210edb4..000000000 --- a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-12B/1762652579.771668", - "retrieved_timestamp": "1762652579.771669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NeverSleep/Lumimaid-v0.2-12B", - "developer": "NeverSleep", - "inference_platform": "unknown", - "id": "NeverSleep/Lumimaid-v0.2-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10993497253952846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395610525850818 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48211458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3511469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/e599f3f8-e5eb-4bfe-a102-efc5a967434d.json b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/e599f3f8-e5eb-4bfe-a102-efc5a967434d.json new file mode 100644 index 000000000..69ad8d773 --- /dev/null +++ b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/e599f3f8-e5eb-4bfe-a102-efc5a967434d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lumimaid-v0.2-12B", + "id": "NeverSleep/Lumimaid-v0.2-12B", + "developer": "NeverSleep", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1099 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4821 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json deleted file mode 100644 index 76fce9bb1..000000000 --- a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-8B/1762652579.771939", - "retrieved_timestamp": "1762652579.771939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NeverSleep/Lumimaid-v0.2-8B", - "developer": "NeverSleep", - "inference_platform": "unknown", - "id": "NeverSleep/Lumimaid-v0.2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5038109992597419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237767601226618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36361369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/8e56f2dd-49d0-4eff-beea-53d01cd96f0e.json b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/8e56f2dd-49d0-4eff-beea-53d01cd96f0e.json new file mode 100644 index 000000000..e40a35f70 --- /dev/null +++ b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/8e56f2dd-49d0-4eff-beea-53d01cd96f0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lumimaid-v0.2-8B", + "id": "NeverSleep/Lumimaid-v0.2-8B", + "developer": "NeverSleep", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5238 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/f1a2b5d0-2c8a-4bbc-8bc5-0484485c2dad.json b/data/hfopenllm_v2/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/f1a2b5d0-2c8a-4bbc-8bc5-0484485c2dad.json new file mode 100644 index 000000000..f3573a1fa --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/f1a2b5d0-2c8a-4bbc-8bc5-0484485c2dad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dolphin3.0-Llama3.1-1B-abliterated", + "id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5312 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/2c12ee67-0c77-4cb2-9e88-1c731ed55c3f.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/2c12ee67-0c77-4cb2-9e88-1c731ed55c3f.json new file mode 100644 index 000000000..cd74ed8ee --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/2c12ee67-0c77-4cb2-9e88-1c731ed55c3f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DeepDive_3_Prev_v1.0", + "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5155 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3438 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/567f8f54-225f-4d9b-be06-f24091adc1e6.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/567f8f54-225f-4d9b-be06-f24091adc1e6.json new file mode 100644 index 000000000..7e171d5ad --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/567f8f54-225f-4d9b-be06-f24091adc1e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", + "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7101 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3441 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/ebb59730-9522-4c45-8f42-c0d941fd728c.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/ebb59730-9522-4c45-8f42-c0d941fd728c.json new file mode 100644 index 000000000..2ed4134e3 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/ebb59730-9522-4c45-8f42-c0d941fd728c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DobHerWild_R1_v1.1R", + "id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.76 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5257 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2319 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/2c44fa8c-ebd3-4ea6-8578-61da38965c09.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/2c44fa8c-ebd3-4ea6-8578-61da38965c09.json new file mode 100644 index 000000000..d6626e33b --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/2c44fa8c-ebd3-4ea6-8578-61da38965c09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DoberWild_v2.01", + "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7996 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5251 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4012 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/3ef26b8c-6bfb-457b-a160-a65c3cc8b0c6.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/3ef26b8c-6bfb-457b-a160-a65c3cc8b0c6.json new file mode 100644 index 000000000..f57ee9701 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/3ef26b8c-6bfb-457b-a160-a65c3cc8b0c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DoberWild_v2.02", + "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7746 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5313 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1994 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/0ab721ba-fbda-44ca-a349-1d3abfaabe62.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/0ab721ba-fbda-44ca-a349-1d3abfaabe62.json new file mode 100644 index 000000000..7306b4374 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/0ab721ba-fbda-44ca-a349-1d3abfaabe62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DoberWild_v2.03", + "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7764 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5294 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3906 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3722 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/2fea1128-4f0c-40d8-be87-72c42c0648fb.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/2fea1128-4f0c-40d8-be87-72c42c0648fb.json new file mode 100644 index 000000000..a603278e4 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/2fea1128-4f0c-40d8-be87-72c42c0648fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DodoWild_v2.01", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7978 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1986 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/db9dc9d2-4aa2-43d0-9f2e-15fbd05af62c.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/db9dc9d2-4aa2-43d0-9f2e-15fbd05af62c.json new file mode 100644 index 000000000..afa87e2bb --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/db9dc9d2-4aa2-43d0-9f2e-15fbd05af62c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DodoWild_v2.02", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3971 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/28399fd0-840c-49d3-8179-407ed83d3bfc.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/28399fd0-840c-49d3-8179-407ed83d3bfc.json new file mode 100644 index 000000000..7503f6ebc --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/28399fd0-840c-49d3-8179-407ed83d3bfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DodoWild_v2.03", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2221 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3786 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/d7108c13-e14a-4366-9a39-204f853b1bee.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/d7108c13-e14a-4366-9a39-204f853b1bee.json new file mode 100644 index 000000000..cda2e5b7a --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/d7108c13-e14a-4366-9a39-204f853b1bee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_DodoWild_v2.10", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8054 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4157 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3855 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/56152d05-9273-4701-8c0a-723e2cab618d.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/56152d05-9273-4701-8c0a-723e2cab618d.json new file mode 100644 index 000000000..9d16585a7 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/56152d05-9273-4701-8c0a-723e2cab618d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Dolermed_R1_V1.01", + "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7534 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3747 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/55d2f23d-cb6c-42d2-8b57-837451d3c6df.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/55d2f23d-cb6c-42d2-8b57-837451d3c6df.json new file mode 100644 index 000000000..d30924818 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/55d2f23d-cb6c-42d2-8b57-837451d3c6df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Dolermed_R1_V1.03", + "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2092 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/7479ae87-e795-4e20-848a-291614176def.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/7479ae87-e795-4e20-848a-291614176def.json new file mode 100644 index 000000000..3796fa305 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/7479ae87-e795-4e20-848a-291614176def.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Dolermed_V1.01", + "id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5087 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3945 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/04ceb40e-bde8-487b-9d29-dc8f681af9be.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/04ceb40e-bde8-487b-9d29-dc8f681af9be.json new file mode 100644 index 000000000..baf40f42c --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/04ceb40e-bde8-487b-9d29-dc8f681af9be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Dolerstormed_V1.04", + "id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5195 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.403 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3889 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/e26b00b0-d9df-4ce2-a649-b19f8957b8ce.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/e26b00b0-d9df-4ce2-a649-b19f8957b8ce.json new file mode 100644 index 000000000..59b222af2 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/e26b00b0-d9df-4ce2-a649-b19f8957b8ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Hermedash_R1_V1.04", + "id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7872 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5192 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/9954194c-69b5-4eb4-8b32-859845548cb0.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/9954194c-69b5-4eb4-8b32-859845548cb0.json new file mode 100644 index 000000000..db5b6578a --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/9954194c-69b5-4eb4-8b32-859845548cb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Hermedive_R1_V1.01", + "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5001 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5171 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3427 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/2afbc279-242a-4276-85f0-facd29c2d89b.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/2afbc279-242a-4276-85f0-facd29c2d89b.json new file mode 100644 index 000000000..18a502512 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/2afbc279-242a-4276-85f0-facd29c2d89b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Hermedive_R1_V1.03", + "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3613 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/ba307ad4-3647-4785-9bf1-cd4dacf3c71f.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/ba307ad4-3647-4785-9bf1-cd4dacf3c71f.json new file mode 100644 index 000000000..fba2698e9 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/ba307ad4-3647-4785-9bf1-cd4dacf3c71f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Hermedive_V1.01", + "id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4918 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Mediver_V1.01/d03c73ca-7364-4517-aea4-f0ac564c49df.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Mediver_V1.01/d03c73ca-7364-4517-aea4-f0ac564c49df.json new file mode 100644 index 000000000..9a356ff72 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Mediver_V1.01/d03c73ca-7364-4517-aea4-f0ac564c49df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Mediver_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Mediver_V1.01", + "id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1885 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3898 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2994 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Medusa_v1.01/1dd4b82a-ca80-4c9c-8800-f97ab2b9cbe7.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Medusa_v1.01/1dd4b82a-ca80-4c9c-8800-f97ab2b9cbe7.json new file mode 100644 index 000000000..08ab1831d --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Medusa_v1.01/1dd4b82a-ca80-4c9c-8800-f97ab2b9cbe7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Medusa_v1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Medusa_v1.01", + "id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4067 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/f2363099-c39a-4874-bf77-ccc0fa087680.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/f2363099-c39a-4874-bf77-ccc0fa087680.json new file mode 100644 index 000000000..8f98cf96c --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/f2363099-c39a-4874-bf77-ccc0fa087680.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Smarteaz_0.2_R1", + "id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6346 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5113 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2606 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3645 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/596eeee8-3600-4f8a-8888-978b610eb2ca.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/596eeee8-3600-4f8a-8888-978b610eb2ca.json new file mode 100644 index 000000000..c7e67d274 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/596eeee8-3600-4f8a-8888-978b610eb2ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Smarteaz_V1.01", + "id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8151 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3736 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/595ddba1-c450-4b69-85b7-0e3118c8c6c7.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/595ddba1-c450-4b69-85b7-0e3118c8c6c7.json new file mode 100644 index 000000000..0c3c8b8b1 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/595ddba1-c450-4b69-85b7-0e3118c8c6c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Stormeder_v1.04", + "id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7853 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.185 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3949 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/64890314-bba0-4fb2-8c21-38b413cff4c8.json b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/64890314-bba0-4fb2-8c21-38b413cff4c8.json new file mode 100644 index 000000000..30c9c3431 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/64890314-bba0-4fb2-8c21-38b413cff4c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.1_8b_Typhoon_v1.03", + "id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8078 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3815 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/470b8b0d-fbaf-408c-a28e-57d1b294f8a8.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/470b8b0d-fbaf-408c-a28e-57d1b294f8a8.json new file mode 100644 index 000000000..421f9e1fb --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/470b8b0d-fbaf-408c-a28e-57d1b294f8a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_AquaSyn_0.1", + "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2741 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3284 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/00a1579e-8636-4eca-9a63-c0b067a5f3dc.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/00a1579e-8636-4eca-9a63-c0b067a5f3dc.json new file mode 100644 index 000000000..9ccdfaea8 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/00a1579e-8636-4eca-9a63-c0b067a5f3dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_AquaSyn_0.11", + "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Dolto_0.1/a52cc4c9-6d60-4083-ac77-591e247d86c9.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Dolto_0.1/a52cc4c9-6d60-4083-ac77-591e247d86c9.json new file mode 100644 index 000000000..8e04a3f3b --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Dolto_0.1/a52cc4c9-6d60-4083-ac77-591e247d86c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Dolto_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Dolto_0.1", + "id": "Nexesenex/Llama_3.2_1b_Dolto_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5434 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.335 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/ac5c321a-d35a-4e0f-a1be-bcc0b7109f91.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/ac5c321a-d35a-4e0f-a1be-bcc0b7109f91.json new file mode 100644 index 000000000..d44a7f415 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/ac5c321a-d35a-4e0f-a1be-bcc0b7109f91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Odyssea_V1.01", + "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2495 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1/c4d11b01-ae5b-4198-b102-07160f100a41.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1/c4d11b01-ae5b-4198-b102-07160f100a41.json new file mode 100644 index 000000000..4efe276e5 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Odyssea_V1/c4d11b01-ae5b-4198-b102-07160f100a41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Odyssea_V1", + "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2553 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.301 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/19405ead-2263-4613-8053-43beeafb4bfc.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/19405ead-2263-4613-8053-43beeafb4bfc.json new file mode 100644 index 000000000..d058404ed --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/19405ead-2263-4613-8053-43beeafb4bfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_OpenTree_R1_0.1", + "id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3131 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1675 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OrcaSun_V1/6c698a60-a813-4be7-b55f-b684029b492d.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OrcaSun_V1/6c698a60-a813-4be7-b55f-b684029b492d.json new file mode 100644 index 000000000..377ee7131 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_OrcaSun_V1/6c698a60-a813-4be7-b55f-b684029b492d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OrcaSun_V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_OrcaSun_V1", + "id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/b67c4a44-7787-45e2-b88c-5d7e8e496fa3.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/b67c4a44-7787-45e2-b88c-5d7e8e496fa3.json new file mode 100644 index 000000000..16d96b077 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/b67c4a44-7787-45e2-b88c-5d7e8e496fa3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_RandomLego_RP_R1_0.1", + "id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5543 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3428 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_SunOrca_V1/a20a529e-c52e-41b7-a8ee-909167048bfb.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_SunOrca_V1/a20a529e-c52e-41b7-a8ee-909167048bfb.json new file mode 100644 index 000000000..2c9dd24ef --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_SunOrca_V1/a20a529e-c52e-41b7-a8ee-909167048bfb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_SunOrca_V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_SunOrca_V1", + "id": "Nexesenex/Llama_3.2_1b_SunOrca_V1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.543 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1884 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Sydonia_0.1/2735e6f4-839f-4ab1-8ede-3447891b1b26.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Sydonia_0.1/2735e6f4-839f-4ab1-8ede-3447891b1b26.json new file mode 100644 index 000000000..794afd099 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Sydonia_0.1/2735e6f4-839f-4ab1-8ede-3447891b1b26.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Sydonia_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Sydonia_0.1", + "id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2197 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2282 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Syneridol_0.2/e74e7e7f-8550-4cba-97cd-2626c82d6b29.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Syneridol_0.2/e74e7e7f-8550-4cba-97cd-2626c82d6b29.json new file mode 100644 index 000000000..b39cc7c79 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Syneridol_0.2/e74e7e7f-8550-4cba-97cd-2626c82d6b29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Syneridol_0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Syneridol_0.2", + "id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2157 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3139 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1227 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.1/14f4c00d-8915-413d-8e85-79f395127682.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.1/14f4c00d-8915-413d-8e85-79f395127682.json new file mode 100644 index 000000000..0b4dbadaf --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.1/14f4c00d-8915-413d-8e85-79f395127682.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Synopsys_0.1", + "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1764 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3162 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.11/9119b586-d3b2-4ce0-a243-d584e2087184.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.11/9119b586-d3b2-4ce0-a243-d584e2087184.json new file mode 100644 index 000000000..ff16ce048 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_1b_Synopsys_0.11/9119b586-d3b2-4ce0-a243-d584e2087184.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b_Synopsys_0.11", + "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2842 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v1/629f3f1a-f8ee-4d1b-b604-7bbd35c6517b.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v1/629f3f1a-f8ee-4d1b-b604-7bbd35c6517b.json new file mode 100644 index 000000000..66193208d --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v1/629f3f1a-f8ee-4d1b-b604-7bbd35c6517b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_3b_Kermes_v1", + "id": "Nexesenex/Llama_3.2_3b_Kermes_v1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4852 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2547 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2.1/a6ac828c-904b-413a-a5fa-a5ed06a28143.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2.1/a6ac828c-904b-413a-a5fa-a5ed06a28143.json new file mode 100644 index 000000000..feb5d98bb --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2.1/a6ac828c-904b-413a-a5fa-a5ed06a28143.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_3b_Kermes_v2.1", + "id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4464 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2692 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2/251a3ef9-c7ae-4d79-8a60-4bc021a3f001.json b/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2/251a3ef9-c7ae-4d79-8a60-4bc021a3f001.json new file mode 100644 index 000000000..294b9bac9 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Llama_3.2_3b_Kermes_v2/251a3ef9-c7ae-4d79-8a60-4bc021a3f001.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_3b_Kermes_v2", + "id": "Nexesenex/Llama_3.2_3b_Kermes_v2", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4455 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json deleted file mode 100644 index b06dc96a9..000000000 --- a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_Halo_0.1/1762652579.78175", - "retrieved_timestamp": "1762652579.7817512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Nemotron_W_4b_Halo_0.1", - "developer": "Nexesenex", - "inference_platform": "unknown", - "id": "Nexesenex/Nemotron_W_4b_Halo_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.513 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3627275628665275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4135101667655742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28020134228187926 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41651041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25049867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/962b48a3-23d7-4104-b34d-4e5c2af31d58.json b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/962b48a3-23d7-4104-b34d-4e5c2af31d58.json new file mode 100644 index 000000000..d9a752984 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/962b48a3-23d7-4104-b34d-4e5c2af31d58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_Halo_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemotron_W_4b_Halo_0.1", + "id": "Nexesenex/Nemotron_W_4b_Halo_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.513 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3627 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json deleted file mode 100644 index 9f56c199d..000000000 --- a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_MagLight_0.1/1762652579.781992", - "retrieved_timestamp": "1762652579.781993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Nemotron_W_4b_MagLight_0.1", - "developer": "Nexesenex", - "inference_platform": "unknown", - "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.513 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230275668559422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42314083807225433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41120833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2544880319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/e4b0be31-6f9a-4a57-b433-e561da9bd827.json b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/e4b0be31-6f9a-4a57-b433-e561da9bd827.json new file mode 100644 index 000000000..7186f9001 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/e4b0be31-6f9a-4a57-b433-e561da9bd827.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_MagLight_0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemotron_W_4b_MagLight_0.1", + "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.513 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/9a31f208-b7d8-4baa-b96e-99926ecb35af.json b/data/hfopenllm_v2/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/9a31f208-b7d8-4baa-b96e-99926ecb35af.json new file mode 100644 index 000000000..8ea2f8444 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/9a31f208-b7d8-4baa-b96e-99926ecb35af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen_2.5_3b_Smarteaz_0.01a", + "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4012 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.286 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json b/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json deleted file mode 100644 index a81f62a4c..000000000 --- a/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/1762652579.7826922", - "retrieved_timestamp": "1762652579.7826939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", - "developer": "Nexesenex", - "inference_platform": "unknown", - "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5889905450870357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562492190965966 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728095 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1802692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/8d933df1-60cb-471d-bfc3-b11c93150203.json b/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/8d933df1-60cb-471d-bfc3-b11c93150203.json new file mode 100644 index 000000000..494bbaf5c --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/8d933df1-60cb-471d-bfc3-b11c93150203.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", + "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", + "developer": "Nexesenex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.589 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3562 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1803 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/35315c3a-ec06-433a-b3fa-ae7a4a59b7ea.json b/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/35315c3a-ec06-433a-b3fa-ae7a4a59b7ea.json new file mode 100644 index 000000000..f72a2ef92 --- /dev/null +++ b/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/35315c3a-ec06-433a-b3fa-ae7a4a59b7ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nexusflow_NexusRaven-V2-13B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NexusRaven-V2-13B", + "id": "Nexusflow/NexusRaven-V2-13B", + "developer": "Nexusflow", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3949 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json b/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json deleted file mode 100644 index ca8a7eea7..000000000 --- a/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexusflow_NexusRaven-V2-13B/1762652579.782948", - "retrieved_timestamp": "1762652579.7829492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexusflow/NexusRaven-V2-13B", - "developer": "Nexusflow", - "inference_platform": "unknown", - "id": "Nexusflow/NexusRaven-V2-13B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1790781792311068 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39488604640507335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3736875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18716755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json deleted file mode 100644 index fe9498669..000000000 --- a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/1762652579.783191", - "retrieved_timestamp": "1762652579.7831922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", - "developer": "NikolaSigmoid", - "inference_platform": "unknown", - "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.791 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2848918646967823 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.426284784119477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39251041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23761635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/3530db9a-0d61-4cf8-9fff-b15f6488c845.json b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/3530db9a-0d61-4cf8-9fff-b15f6488c845.json new file mode 100644 index 000000000..24a1eb210 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/3530db9a-0d61-4cf8-9fff-b15f6488c845.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-1.5B-Instruct-1epoch", + "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.791 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2849 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3925 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/7d9901e0-eafe-4d49-a5bb-fab059708bcb.json b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/7d9901e0-eafe-4d49-a5bb-fab059708bcb.json new file mode 100644 index 000000000..797f0d872 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/7d9901e0-eafe-4d49-a5bb-fab059708bcb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-1.5B-Instruct-dolphin-r1-200", + "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.928 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1808 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2815 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json deleted file mode 100644 index ca66c2d72..000000000 --- a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/1762652579.783446", - "retrieved_timestamp": "1762652579.783447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", - "developer": "NikolaSigmoid", - "inference_platform": "unknown", - "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.928 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18080249294095221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28148007801214714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11427859042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/ee7f9025-bb2c-4902-b8e2-bfac2b63d2fd.json b/data/hfopenllm_v2/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/ee7f9025-bb2c-4902-b8e2-bfac2b63d2fd.json new file mode 100644 index 000000000..149206bb9 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/ee7f9025-bb2c-4902-b8e2-bfac2b63d2fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-1.5B-500", + "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.157 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1749 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json b/data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json deleted file mode 100644 index 52041e0e7..000000000 --- a/data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_acemath-200/1762652579.783974", - "retrieved_timestamp": "1762652579.783974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/acemath-200", - "developer": "NikolaSigmoid", - "inference_platform": "unknown", - "id": "NikolaSigmoid/acemath-200", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.791 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2848918646967823 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.426284784119477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39251041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23761635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/NikolaSigmoid/acemath-200/6157f79e-2673-4ad6-99d7-e5cf5e4e1db2.json b/data/hfopenllm_v2/NikolaSigmoid/acemath-200/6157f79e-2673-4ad6-99d7-e5cf5e4e1db2.json new file mode 100644 index 000000000..947e95059 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/acemath-200/6157f79e-2673-4ad6-99d7-e5cf5e4e1db2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_acemath-200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "acemath-200", + "id": "NikolaSigmoid/acemath-200", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.791 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2849 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3925 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/phi-4-14b/0aa7572c-1aa6-4997-a2a2-3b557fbde639.json b/data/hfopenllm_v2/NikolaSigmoid/phi-4-14b/0aa7572c-1aa6-4997-a2a2-3b557fbde639.json new file mode 100644 index 000000000..61e7f3fc3 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/phi-4-14b/0aa7572c-1aa6-4997-a2a2-3b557fbde639.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-14b", + "id": "NikolaSigmoid/phi-4-14b", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 14.704 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6695 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2938 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/phi-4-1steps/6f5df760-2d3e-47b1-b55e-4031a5f11d41.json b/data/hfopenllm_v2/NikolaSigmoid/phi-4-1steps/6f5df760-2d3e-47b1-b55e-4031a5f11d41.json new file mode 100644 index 000000000..eba7dc221 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/phi-4-1steps/6f5df760-2d3e-47b1-b55e-4031a5f11d41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-1steps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-1steps", + "id": "NikolaSigmoid/phi-4-1steps", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 14.704 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6707 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2983 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5273 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/phi-4-300steps/ac676b03-c3ce-4ff1-83fc-5c8db82f1497.json b/data/hfopenllm_v2/NikolaSigmoid/phi-4-300steps/ac676b03-c3ce-4ff1-83fc-5c8db82f1497.json new file mode 100644 index 000000000..d245536d7 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/phi-4-300steps/ac676b03-c3ce-4ff1-83fc-5c8db82f1497.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-300steps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-300steps", + "id": "NikolaSigmoid/phi-4-300steps", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 14.704 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6701 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/2229cdf8-3ecb-4f11-8824-9c3bfbf6f968.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/2229cdf8-3ecb-4f11-8824-9c3bfbf6f968.json new file mode 100644 index 000000000..021208aeb --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/2229cdf8-3ecb-4f11-8824-9c3bfbf6f968.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Captain-Eris-BMO_Violent-GRPO-v0.420", + "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6313 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5079 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json deleted file mode 100644 index 77766be56..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/1762652579.784868", - "retrieved_timestamp": "1762652579.7848692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6312805578088361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5078530730075063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359624335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/95ebc5b8-a541-4fca-9e7c-692720e73362.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/95ebc5b8-a541-4fca-9e7c-692720e73362.json new file mode 100644 index 000000000..52a63c6ba --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/95ebc5b8-a541-4fca-9e7c-692720e73362.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_BMO-Violent-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Captain-Eris_BMO-Violent-12B", + "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6152 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3571 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json deleted file mode 100644 index 79c3fa9cc..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_BMO-Violent-12B/1762652579.785123", - "retrieved_timestamp": "1762652579.785124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris_BMO-Violent-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.615218730745533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104372825851065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42553124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35713098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/09a2508d-a171-493f-9ff2-e7f375815c91.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/09a2508d-a171-493f-9ff2-e7f375815c91.json new file mode 100644 index 000000000..4d63cdc2b --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/09a2508d-a171-493f-9ff2-e7f375815c91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Captain-Eris_Violet-GRPO-v0.420", + "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6262 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json deleted file mode 100644 index 84fd3c6e9..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/1762652579.785343", - "retrieved_timestamp": "1762652579.785344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6261597007052399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515921407165298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42791666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347406914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/12a4a921-5859-4fd6-9d64-677a7d8ef696.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/12a4a921-5859-4fd6-9d64-677a7d8ef696.json new file mode 100644 index 000000000..8679f45f6 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/12a4a921-5859-4fd6-9d64-677a7d8ef696.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-V0.420-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Captain-Eris_Violet-V0.420-12B", + "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4331 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json deleted file mode 100644 index eccb3161a..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-V0.420-12B/1762652579.785556", - "retrieved_timestamp": "1762652579.785557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43391866913123844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5478099417611365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43306249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722573138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json b/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json deleted file mode 100644 index e7d604542..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain_BMO-12B/1762652579.7857668", - "retrieved_timestamp": "1762652579.7857668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Captain_BMO-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain_BMO-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4750595087700634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285960650424973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37480208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/b79f12d0-cdfc-4c9d-a88b-40612dcbf64d.json b/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/b79f12d0-cdfc-4c9d-a88b-40612dcbf64d.json new file mode 100644 index 000000000..ab08d38f3 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/b79f12d0-cdfc-4c9d-a88b-40612dcbf64d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain_BMO-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Captain_BMO-12B", + "id": "Nitral-AI/Captain_BMO-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5286 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3748 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3569 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json b/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json deleted file mode 100644 index 242fec305..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Stable-v0.2-L3-8B/1762652579.7859662", - "retrieved_timestamp": "1762652579.785967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7174840534226963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285819178301682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3780625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36959773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/d162cf7c-3ef4-420f-aab4-789a98b1195a.json b/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/d162cf7c-3ef4-420f-aab4-789a98b1195a.json new file mode 100644 index 000000000..74bd3f7c0 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/d162cf7c-3ef4-420f-aab4-789a98b1195a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Stable-v0.2-L3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hathor_Stable-v0.2-L3-8B", + "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7175 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5286 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3696 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/7e49018e-5e2d-4cdb-be5b-2ac04ec84bf5.json b/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/7e49018e-5e2d-4cdb-be5b-2ac04ec84bf5.json new file mode 100644 index 000000000..b03d999be --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/7e49018e-5e2d-4cdb-be5b-2ac04ec84bf5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hathor_Tahsin-L3-8B-v0.85", + "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.711 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5279 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json b/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json deleted file mode 100644 index a91c92ff6..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/1762652579.786179", - "retrieved_timestamp": "1762652579.78618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7110145524984818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5279036861109899 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/24677f2a-ea89-4289-bcb6-13699de9782f.json b/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/24677f2a-ea89-4289-bcb6-13699de9782f.json new file mode 100644 index 000000000..2cbdf842f --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/24677f2a-ea89-4289-bcb6-13699de9782f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Nera_Noctis-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nera_Noctis-12B", + "id": "Nitral-AI/Nera_Noctis-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3468 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json b/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json deleted file mode 100644 index 010662639..000000000 --- a/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Nera_Noctis-12B/1762652579.786392", - "retrieved_timestamp": "1762652579.7863932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nitral-AI/Nera_Noctis-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Nera_Noctis-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45617517076911485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193675192746302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39790624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/3e09df3c-2224-4a29-8e55-18a485db2b25.json b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/3e09df3c-2224-4a29-8e55-18a485db2b25.json new file mode 100644 index 000000000..a6a01b8ab --- /dev/null +++ b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/3e09df3c-2224-4a29-8e55-18a485db2b25.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MS-Schisandra-22B-v0.1", + "id": "Nohobby/MS-Schisandra-22B-v0.1", + "developer": "Nohobby", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.579 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json deleted file mode 100644 index 65237560d..000000000 --- a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.1/1762652579.786606", - "retrieved_timestamp": "1762652579.786607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nohobby/MS-Schisandra-22B-v0.1", - "developer": "Nohobby", - "inference_platform": "unknown", - "id": "Nohobby/MS-Schisandra-22B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6331289866443259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5789949714896523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39284375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095744680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json deleted file mode 100644 index 61c7e3c36..000000000 --- a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.2/1762652579.78686", - "retrieved_timestamp": "1762652579.786861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nohobby/MS-Schisandra-22B-v0.2", - "developer": "Nohobby", - "inference_platform": "unknown", - "id": "Nohobby/MS-Schisandra-22B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6382997114323329 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5841215984231857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40747916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4136469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/cc0bd236-8fc4-43d3-a18f-4b2afb112946.json b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/cc0bd236-8fc4-43d3-a18f-4b2afb112946.json new file mode 100644 index 000000000..c97104188 --- /dev/null +++ b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/cc0bd236-8fc4-43d3-a18f-4b2afb112946.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MS-Schisandra-22B-v0.2", + "id": "Nohobby/MS-Schisandra-22B-v0.2", + "developer": "Nohobby", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5841 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Alpha/5afd4c0f-b61d-452f-8c48-d298780d91d5.json b/data/hfopenllm_v2/Norquinal/Alpha/5afd4c0f-b61d-452f-8c48-d298780d91d5.json new file mode 100644 index 000000000..a242e53cd --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Alpha/5afd4c0f-b61d-452f-8c48-d298780d91d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alpha", + "id": "Norquinal/Alpha", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2803 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3374 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json b/data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json deleted file mode 100644 index c7aef5c34..000000000 --- a/data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Alpha/1762652579.787071", - "retrieved_timestamp": "1762652579.787072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Alpha", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Alpha", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802951723648808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3373652507108038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36308333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30028257978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json b/data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json deleted file mode 100644 index afc8f15da..000000000 --- a/data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Bravo/1762652579.787321", - "retrieved_timestamp": "1762652579.787322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Bravo", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Bravo", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3024519386339357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3558431980261287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38686458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.312749335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Bravo/eac52141-4fd8-4e21-9c78-920ab8933e5a.json b/data/hfopenllm_v2/Norquinal/Bravo/eac52141-4fd8-4e21-9c78-920ab8933e5a.json new file mode 100644 index 000000000..4a140030a --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Bravo/eac52141-4fd8-4e21-9c78-920ab8933e5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Bravo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bravo", + "id": "Norquinal/Bravo", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3025 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3558 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json b/data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json deleted file mode 100644 index ad49384f8..000000000 --- a/data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Charlie/1762652579.787528", - "retrieved_timestamp": "1762652579.787528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Charlie", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Charlie", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3060989286205047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515288346438244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3736875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30925864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Charlie/8449837f-64ac-4293-b1f8-210e62779202.json b/data/hfopenllm_v2/Norquinal/Charlie/8449837f-64ac-4293-b1f8-210e62779202.json new file mode 100644 index 000000000..eb7553fa4 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Charlie/8449837f-64ac-4293-b1f8-210e62779202.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Charlie/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Charlie", + "id": "Norquinal/Charlie", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3061 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json b/data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json deleted file mode 100644 index be8764cf3..000000000 --- a/data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Delta/1762652579.78773", - "retrieved_timestamp": "1762652579.787731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Delta", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Delta", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.253842028041153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434783285415976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958776595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Delta/ab8a665c-8234-484f-a8a9-8ee79d73edff.json b/data/hfopenllm_v2/Norquinal/Delta/ab8a665c-8234-484f-a8a9-8ee79d73edff.json new file mode 100644 index 000000000..bbcdf56d1 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Delta/ab8a665c-8234-484f-a8a9-8ee79d73edff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Delta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta", + "id": "Norquinal/Delta", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2538 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2959 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Echo/a954242f-41a6-49d7-a71d-3bfe940cdb92.json b/data/hfopenllm_v2/Norquinal/Echo/a954242f-41a6-49d7-a71d-3bfe940cdb92.json new file mode 100644 index 000000000..b0482210d --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Echo/a954242f-41a6-49d7-a71d-3bfe940cdb92.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Echo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Echo", + "id": "Norquinal/Echo", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.353 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json b/data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json deleted file mode 100644 index 26ebdea26..000000000 --- a/data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Echo/1762652579.787929", - "retrieved_timestamp": "1762652579.787929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Echo", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Echo", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31579099012841483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35304654390055795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json b/data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json deleted file mode 100644 index d7f2466e2..000000000 --- a/data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Foxtrot/1762652579.788121", - "retrieved_timestamp": "1762652579.788121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Foxtrot", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Foxtrot", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011531624977283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3558026577191667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30501994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Foxtrot/6d1c518f-3f42-49eb-9208-b30e27e7e87e.json b/data/hfopenllm_v2/Norquinal/Foxtrot/6d1c518f-3f42-49eb-9208-b30e27e7e87e.json new file mode 100644 index 000000000..eb075ffce --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Foxtrot/6d1c518f-3f42-49eb-9208-b30e27e7e87e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Foxtrot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Foxtrot", + "id": "Norquinal/Foxtrot", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3558 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Golf/87931db7-42a4-48df-b5a5-8bd934061dbe.json b/data/hfopenllm_v2/Norquinal/Golf/87931db7-42a4-48df-b5a5-8bd934061dbe.json new file mode 100644 index 000000000..a262d2157 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Golf/87931db7-42a4-48df-b5a5-8bd934061dbe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Golf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Golf", + "id": "Norquinal/Golf", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3534 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3056 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json b/data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json deleted file mode 100644 index 7b93483e4..000000000 --- a/data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Golf/1762652579.788314", - "retrieved_timestamp": "1762652579.7883148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Golf", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Golf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3533601953926692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35332648991705207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30560172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Norquinal/Hotel/54088dbc-04cc-4b35-b4e1-e495b7cfd47f.json b/data/hfopenllm_v2/Norquinal/Hotel/54088dbc-04cc-4b35-b4e1-e495b7cfd47f.json new file mode 100644 index 000000000..21264522c --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Hotel/54088dbc-04cc-4b35-b4e1-e495b7cfd47f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Norquinal_Hotel/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hotel", + "id": "Norquinal/Hotel", + "developer": "Norquinal", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json b/data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json deleted file mode 100644 index a078dccc8..000000000 --- a/data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Norquinal_Hotel/1762652579.788509", - "retrieved_timestamp": "1762652579.7885098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Norquinal/Hotel", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Hotel", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215113676157041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785702492059275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/NotASI/FineTome-Llama3.2-1B-0929/7129efad-8ab2-4f7a-b6ed-055989b3e131.json b/data/hfopenllm_v2/NotASI/FineTome-Llama3.2-1B-0929/7129efad-8ab2-4f7a-b6ed-055989b3e131.json new file mode 100644 index 000000000..0d3997068 --- /dev/null +++ b/data/hfopenllm_v2/NotASI/FineTome-Llama3.2-1B-0929/7129efad-8ab2-4f7a-b6ed-055989b3e131.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-1B-0929/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineTome-Llama3.2-1B-0929", + "id": "NotASI/FineTome-Llama3.2-1B-0929", + "developer": "NotASI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1429 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NotASI/FineTome-Llama3.2-3B-1002/cfc6f85f-e4b6-4164-b7eb-4efb888e1ba5.json b/data/hfopenllm_v2/NotASI/FineTome-Llama3.2-3B-1002/cfc6f85f-e4b6-4164-b7eb-4efb888e1ba5.json new file mode 100644 index 000000000..acaee36c4 --- /dev/null +++ b/data/hfopenllm_v2/NotASI/FineTome-Llama3.2-3B-1002/cfc6f85f-e4b6-4164-b7eb-4efb888e1ba5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-3B-1002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineTome-Llama3.2-3B-1002", + "id": "NotASI/FineTome-Llama3.2-3B-1002", + "developer": "NotASI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5474 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2437 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-1B-1007/0f053a45-cd79-4e51-9b4c-ae5c51006c17.json b/data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-1B-1007/0f053a45-cd79-4e51-9b4c-ae5c51006c17.json new file mode 100644 index 000000000..eb26e5748 --- /dev/null +++ b/data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-1B-1007/0f053a45-cd79-4e51-9b4c-ae5c51006c17.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-1B-1007/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineTome-v1.5-Llama3.2-1B-1007", + "id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", + "developer": "NotASI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1427 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8002b35-1454-4635-a31e-b419c7000b53.json b/data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8002b35-1454-4635-a31e-b419c7000b53.json new file mode 100644 index 000000000..d99453b0a --- /dev/null +++ b/data/hfopenllm_v2/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8002b35-1454-4635-a31e-b419c7000b53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-3B-1007/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineTome-v1.5-Llama3.2-3B-1007", + "id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", + "developer": "NotASI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3645 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2448 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/DeepHermes-3-Mistral-24B-Preview/4c08530e-d529-49a1-a3fe-2351c422981a.json b/data/hfopenllm_v2/NousResearch/DeepHermes-3-Mistral-24B-Preview/4c08530e-d529-49a1-a3fe-2351c422981a.json new file mode 100644 index 000000000..df49d3613 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/DeepHermes-3-Mistral-24B-Preview/4c08530e-d529-49a1-a3fe-2351c422981a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_DeepHermes-3-Mistral-24B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepHermes-3-Mistral-24B-Preview", + "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.459 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Llama-3-8B/d16879dc-7ed7-49c4-aca6-4c9cd3b3a350.json b/data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Llama-3-8B/d16879dc-7ed7-49c4-aca6-4c9cd3b3a350.json new file mode 100644 index 000000000..bbff9c46a --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Llama-3-8B/d16879dc-7ed7-49c4-aca6-4c9cd3b3a350.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-2-Pro-Llama-3-8B", + "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5362 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3052 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Mistral-7B/70656b13-e0a2-4ef4-af43-0d9995d57af6.json b/data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Mistral-7B/70656b13-e0a2-4ef4-af43-0d9995d57af6.json new file mode 100644 index 000000000..ec8e4de73 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Hermes-2-Pro-Mistral-7B/70656b13-e0a2-4ef4-af43-0d9995d57af6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-2-Pro-Mistral-7B", + "id": "NousResearch/Hermes-2-Pro-Mistral-7B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5668 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4995 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Hermes-2-Theta-Llama-3-8B/6544f1ca-02a6-4e58-98f0-e19cc6082682.json b/data/hfopenllm_v2/NousResearch/Hermes-2-Theta-Llama-3-8B/6544f1ca-02a6-4e58-98f0-e19cc6082682.json new file mode 100644 index 000000000..b3a330363 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Hermes-2-Theta-Llama-3-8B/6544f1ca-02a6-4e58-98f0-e19cc6082682.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Theta-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-2-Theta-Llama-3-8B", + "id": "NousResearch/Hermes-2-Theta-Llama-3-8B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3949 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-70B/5cd3796f-fb31-49c1-a974-019c5c5b20ae.json b/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-70B/5cd3796f-fb31-49c1-a974-019c5c5b20ae.json new file mode 100644 index 000000000..75935fc92 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-70B/5cd3796f-fb31-49c1-a974-019c5c5b20ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-3-Llama-3.1-70B", + "id": "NousResearch/Hermes-3-Llama-3.1-70B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7661 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6756 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4949 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-8B/49eff9ad-90c9-43b1-a1f5-cf371ac4b39b.json b/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-8B/49eff9ad-90c9-43b1-a1f5-cf371ac4b39b.json new file mode 100644 index 000000000..0ebe2c0ab --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.1-8B/49eff9ad-90c9-43b1-a1f5-cf371ac4b39b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-3-Llama-3.1-8B", + "id": "NousResearch/Hermes-3-Llama-3.1-8B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.617 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.2-3B/59720f7e-7e09-483f-8332-8dc7aa19ae78.json b/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.2-3B/59720f7e-7e09-483f-8332-8dc7aa19ae78.json new file mode 100644 index 000000000..248fd1575 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Hermes-3-Llama-3.2-3B/59720f7e-7e09-483f-8332-8dc7aa19ae78.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-3-Llama-3.2-3B", + "id": "NousResearch/Hermes-3-Llama-3.2-3B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.403 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2544 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json deleted file mode 100644 index 7b0331266..000000000 --- a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/1762652579.7912042", - "retrieved_timestamp": "1762652579.7912052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5762510139762497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48526536654652347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099697 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3999791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3015292553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/a3a89e4a-0589-4776-a1da-227552482e94.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/a3a89e4a-0589-4776-a1da-227552482e94.json new file mode 100644 index 000000000..c982b95bd --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/a3a89e4a-0589-4776-a1da-227552482e94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nous-Hermes-2-Mistral-7B-DPO", + "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3015 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/b3c04d1f-80e3-4d86-9779-c5e4bbce6f35.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/b3c04d1f-80e3-4d86-9779-c5e4bbce6f35.json new file mode 100644 index 000000000..44ac522b4 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/b3c04d1f-80e3-4d86-9779-c5e4bbce6f35.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nous-Hermes-2-Mixtral-8x7B-DPO", + "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5897 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json deleted file mode 100644 index deecc3b86..000000000 --- a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/1762652579.791439", - "retrieved_timestamp": "1762652579.7914398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896898008395501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5538851384033822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json deleted file mode 100644 index da5ff3116..000000000 --- a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/1762652579.791643", - "retrieved_timestamp": "1762652579.7916439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5730783210769648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057868454026635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30659906914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/448fda35-bfdc-42ae-90f9-d44383e0a454.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/448fda35-bfdc-42ae-90f9-d44383e0a454.json new file mode 100644 index 000000000..d14264647 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/448fda35-bfdc-42ae-90f9-d44383e0a454.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nous-Hermes-2-Mixtral-8x7B-SFT", + "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5731 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/0d97542e-82b6-4f27-9822-62b67e7690c2.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/0d97542e-82b6-4f27-9822-62b67e7690c2.json new file mode 100644 index 000000000..d0f799596 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/0d97542e-82b6-4f27-9822-62b67e7690c2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-SOLAR-10.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nous-Hermes-2-SOLAR-10.7B", + "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3458 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json deleted file mode 100644 index ef062714e..000000000 --- a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-SOLAR-10.7B/1762652579.791853", - "retrieved_timestamp": "1762652579.7918541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278660620486975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414294841140173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3458277925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-llama-2-7b/2725bd69-839d-4427-8e05-0e289fff70de.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-llama-2-7b/2725bd69-839d-4427-8e05-0e289fff70de.json new file mode 100644 index 000000000..07b68eec5 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-llama-2-7b/2725bd69-839d-4427-8e05-0e289fff70de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-llama-2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nous-Hermes-llama-2-7b", + "id": "NousResearch/Nous-Hermes-llama-2-7b", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4257 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.194 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-13b-128k/adb71488-adb8-4848-bf1d-aecd04cb6718.json b/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-13b-128k/adb71488-adb8-4848-bf1d-aecd04cb6718.json new file mode 100644 index 000000000..d612c4d8c --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-13b-128k/adb71488-adb8-4848-bf1d-aecd04cb6718.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-13b-128k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Llama-2-13b-128k", + "id": "NousResearch/Yarn-Llama-2-13b-128k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1655 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3827 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3458 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.232 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-128k/c7736577-c4c3-4233-9308-a4bb9b2dbb89.json b/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-128k/c7736577-c4c3-4233-9308-a4bb9b2dbb89.json new file mode 100644 index 000000000..ca3a9ae2e --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-128k/c7736577-c4c3-4233-9308-a4bb9b2dbb89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-128k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Llama-2-7b-128k", + "id": "NousResearch/Yarn-Llama-2-7b-128k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1485 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3967 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1791 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-64k/76fe52f4-9fa5-4ccb-8c92-7bd9eb9886ee.json b/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-64k/76fe52f4-9fa5-4ccb-8c92-7bd9eb9886ee.json new file mode 100644 index 000000000..a38d9e7b9 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Llama-2-7b-64k/76fe52f4-9fa5-4ccb-8c92-7bd9eb9886ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-64k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Llama-2-7b-64k", + "id": "NousResearch/Yarn-Llama-2-7b-64k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.17 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1799 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-128k/1d92e45f-c5a5-4dd6-a61f-8e0f7246117a.json b/data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-128k/1d92e45f-c5a5-4dd6-a61f-8e0f7246117a.json new file mode 100644 index 000000000..8725d08a6 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-128k/1d92e45f-c5a5-4dd6-a61f-8e0f7246117a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-128k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Mistral-7b-128k", + "id": "NousResearch/Yarn-Mistral-7b-128k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-64k/5e1513f1-4375-4380-85fa-b96a419c013b.json b/data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-64k/5e1513f1-4375-4380-85fa-b96a419c013b.json new file mode 100644 index 000000000..6002e3e56 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Mistral-7b-64k/5e1513f1-4375-4380-85fa-b96a419c013b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-64k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Mistral-7b-64k", + "id": "NousResearch/Yarn-Mistral-7b-64k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2914 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json deleted file mode 100644 index 79ec67a34..000000000 --- a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-32k/1762652579.793437", - "retrieved_timestamp": "1762652579.793438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Solar-10b-32k", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Solar-10b-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19421579187666504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986859152325069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4146458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32721077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/fadbf3b2-283a-4f8e-9acf-463d75924b97.json b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/fadbf3b2-283a-4f8e-9acf-463d75924b97.json new file mode 100644 index 000000000..a3f6cc8a8 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/fadbf3b2-283a-4f8e-9acf-463d75924b97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Solar-10b-32k", + "id": "NousResearch/Yarn-Solar-10b-32k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1942 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json deleted file mode 100644 index 8a3c5b4fb..000000000 --- a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-64k/1762652579.793644", - "retrieved_timestamp": "1762652579.7936451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Solar-10b-64k", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Solar-10b-64k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1988867316498003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49219907954226505 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40143750000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/c04ffe5b-c313-4249-83bb-bbe07ad6fc69.json b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/c04ffe5b-c313-4249-83bb-bbe07ad6fc69.json new file mode 100644 index 000000000..6b8acf7a6 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/c04ffe5b-c313-4249-83bb-bbe07ad6fc69.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-64k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yarn-Solar-10b-64k", + "id": "NousResearch/Yarn-Solar-10b-64k", + "developer": "NousResearch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1989 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4922 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4014 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3148 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/a9aa164e-386b-4987-9f49-2dde64ade45c.json b/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/a9aa164e-386b-4987-9f49-2dde64ade45c.json new file mode 100644 index 000000000..da6642839 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/a9aa164e-386b-4987-9f49-2dde64ade45c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_ASTAROTH-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ASTAROTH-3.2-1B", + "id": "Novaciano/ASTAROTH-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5613 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1909 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json b/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json deleted file mode 100644 index 49f6c6384..000000000 --- a/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_ASTAROTH-3.2-1B/1762652579.7938519", - "retrieved_timestamp": "1762652579.793853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/ASTAROTH-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/ASTAROTH-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5612884923115112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542962056805596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19090757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json b/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json deleted file mode 100644 index 17a0aa521..000000000 --- a/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_BLAST_PROCESSING-3.2-1B/1762652579.794129", - "retrieved_timestamp": "1762652579.7941298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/BLAST_PROCESSING-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/BLAST_PROCESSING-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921783091087204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460318843168258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19414893617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/e4c1b3ef-e1db-4eca-b818-f3b1680cc5f0.json b/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/e4c1b3ef-e1db-4eca-b818-f3b1680cc5f0.json new file mode 100644 index 000000000..3f01feccd --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/e4c1b3ef-e1db-4eca-b818-f3b1680cc5f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_BLAST_PROCESSING-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BLAST_PROCESSING-3.2-1B", + "id": "Novaciano/BLAST_PROCESSING-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1941 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/1ab95edc-ea3c-4d3f-9f59-dc7f7468adb9.json b/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/1ab95edc-ea3c-4d3f-9f59-dc7f7468adb9.json new file mode 100644 index 000000000..850ea6a63 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/1ab95edc-ea3c-4d3f-9f59-dc7f7468adb9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_Cerberus-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cerberus-3.2-1B", + "id": "Novaciano/Cerberus-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1663 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json b/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json deleted file mode 100644 index f50bb245f..000000000 --- a/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_Cerberus-3.2-1B/1762652579.7945569", - "retrieved_timestamp": "1762652579.794559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/Cerberus-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Cerberus-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5016877440746109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4164937678626939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32888541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1663065159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json b/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json deleted file mode 100644 index 6964286fd..000000000 --- a/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_Cultist-3.2-1B/1762652579.7949288", - "retrieved_timestamp": "1762652579.79493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/Cultist-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Cultist-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294895322189568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3399311286410264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17137632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/80a81bbc-6edf-48b9-afb7-e4e0a03753d8.json b/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/80a81bbc-6edf-48b9-afb7-e4e0a03753d8.json new file mode 100644 index 000000000..25acd6b95 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/80a81bbc-6edf-48b9-afb7-e4e0a03753d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_Cultist-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cultist-3.2-1B", + "id": "Novaciano/Cultist-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1714 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json b/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json deleted file mode 100644 index 93efd1d7d..000000000 --- a/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/1762652579.795153", - "retrieved_timestamp": "1762652579.795153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559814625194484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487816706572648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33288541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17345412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/afb24bf8-3c47-4278-9b84-19b05017745b.json b/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/afb24bf8-3c47-4278-9b84-19b05017745b.json new file mode 100644 index 000000000..cfdd2f4d5 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/afb24bf8-3c47-4278-9b84-19b05017745b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseChat-3.2-1B-GRPO_Creative_RP", + "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5598 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1735 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/4f8cda4d-959b-41ab-a79d-d2b35968eb89.json b/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/4f8cda4d-959b-41ab-a79d-d2b35968eb89.json new file mode 100644 index 000000000..7acb2bdcc --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/4f8cda4d-959b-41ab-a79d-d2b35968eb89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fusetrix-3.2-1B-GRPO_RP_Creative", + "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1758 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json b/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json deleted file mode 100644 index 9132eb941..000000000 --- a/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/1762652579.795362", - "retrieved_timestamp": "1762652579.795362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339091388627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434595088038714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17578125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/2818aa8c-5c73-4de9-bcbe-fd8f68e8bc6b.json b/data/hfopenllm_v2/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/2818aa8c-5c73-4de9-bcbe-fd8f68e8bc6b.json new file mode 100644 index 000000000..4b1fc2436 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/2818aa8c-5c73-4de9-bcbe-fd8f68e8bc6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", + "id": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1823 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/6a683ead-0f3e-449b-9ae1-8afc9f1ab33d.json b/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/6a683ead-0f3e-449b-9ae1-8afc9f1ab33d.json new file mode 100644 index 000000000..2b69d3c05 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/6a683ead-0f3e-449b-9ae1-8afc9f1ab33d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_HarmfulProject-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HarmfulProject-3.2-1B", + "id": "Novaciano/HarmfulProject-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3274 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3419 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1823 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json b/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json deleted file mode 100644 index 022a445d7..000000000 --- a/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_HarmfulProject-3.2-1B/1762652579.7958348", - "retrieved_timestamp": "1762652579.795836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/HarmfulProject-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/HarmfulProject-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873821460391761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32744993658117816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18226396276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json b/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json deleted file mode 100644 index a9e55fb83..000000000 --- a/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_LEWD-Mental-Cultist-3.2-1B/1762652579.796045", - "retrieved_timestamp": "1762652579.796046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/LEWD-Mental-Cultist-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5308636639671627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35127188813594756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32228125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1768617021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/38cb02a8-862d-40e1-922a-e65f537df87e.json b/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/38cb02a8-862d-40e1-922a-e65f537df87e.json new file mode 100644 index 000000000..c5ba6cc4d --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/38cb02a8-862d-40e1-922a-e65f537df87e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_LEWD-Mental-Cultist-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LEWD-Mental-Cultist-3.2-1B", + "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5309 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3223 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1769 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json b/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json deleted file mode 100644 index a9491b1d2..000000000 --- a/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_La_Mejor_Mezcla-3.2-1B/1762652579.79625", - "retrieved_timestamp": "1762652579.7962508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/La_Mejor_Mezcla-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/La_Mejor_Mezcla-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509969104199081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34879364478381225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18292885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/f816e2a7-2629-4abe-9ed0-3d1299e95194.json b/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/f816e2a7-2629-4abe-9ed0-3d1299e95194.json new file mode 100644 index 000000000..6d6061a90 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/f816e2a7-2629-4abe-9ed0-3d1299e95194.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_La_Mejor_Mezcla-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "La_Mejor_Mezcla-3.2-1B", + "id": "Novaciano/La_Mejor_Mezcla-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.551 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1829 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/286fae5b-544a-4033-9092-d633fc80f47b.json b/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/286fae5b-544a-4033-9092-d633fc80f47b.json new file mode 100644 index 000000000..b801b00b1 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/286fae5b-544a-4033-9092-d633fc80f47b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Novaciano_Sigil-Of-Satan-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sigil-Of-Satan-3.2-1B", + "id": "Novaciano/Sigil-Of-Satan-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5494 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1855 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json b/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json deleted file mode 100644 index 329dd1f38..000000000 --- a/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_Sigil-Of-Satan-3.2-1B/1762652579.7964501", - "retrieved_timestamp": "1762652579.7964501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/Sigil-Of-Satan-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Sigil-Of-Satan-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494233079340594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3545862332731657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3276145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/93477bf6-ea00-418b-8a2f-975a9554263e.json b/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/93477bf6-ea00-418b-8a2f-975a9554263e.json new file mode 100644 index 000000000..9e4f3e608 --- /dev/null +++ b/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/93477bf6-ea00-418b-8a2f-975a9554263e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NucleusAI_nucleus-22B-token-500B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nucleus-22B-token-500B", + "id": "NucleusAI/nucleus-22B-token-500B", + "developer": "NucleusAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.828 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json b/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json deleted file mode 100644 index b02331a06..000000000 --- a/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NucleusAI_nucleus-22B-token-500B/1762652579.7966561", - "retrieved_timestamp": "1762652579.7966561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NucleusAI/nucleus-22B-token-500B", - "developer": "NucleusAI", - "inference_platform": "unknown", - "id": "NucleusAI/nucleus-22B-token-500B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.828 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.025654153202391873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29198007801214715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3510520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/NyxKrage/Microsoft_Phi-4/3d7c6576-f99c-4bb3-94fa-4f713e2898f6.json b/data/hfopenllm_v2/NyxKrage/Microsoft_Phi-4/3d7c6576-f99c-4bb3-94fa-4f713e2898f6.json new file mode 100644 index 000000000..a36e190d6 --- /dev/null +++ b/data/hfopenllm_v2/NyxKrage/Microsoft_Phi-4/3d7c6576-f99c-4bb3-94fa-4f713e2898f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/NyxKrage_Microsoft_Phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Microsoft_Phi-4", + "id": "NyxKrage/Microsoft_Phi-4", + "developer": "NyxKrage", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6691 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2991 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/Emotional-llama-8B/d1e9a242-941f-4461-b75b-7043c2c01ef7.json b/data/hfopenllm_v2/OEvortex/Emotional-llama-8B/d1e9a242-941f-4461-b75b-7043c2c01ef7.json new file mode 100644 index 000000000..66b1bbf40 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/Emotional-llama-8B/d1e9a242-941f-4461-b75b-7043c2c01ef7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OEvortex_Emotional-llama-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Emotional-llama-8B", + "id": "OEvortex/Emotional-llama-8B", + "developer": "OEvortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4839 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3659 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json b/data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json deleted file mode 100644 index f82bc8107..000000000 --- a/data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-15B/1762652579.797408", - "retrieved_timestamp": "1762652579.797409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OEvortex/HelpingAI-15B", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI-15B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.323 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2030091268944179 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936006977853758 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI-15B/e39661af-ad93-41d7-8892-1230064f1a1c.json b/data/hfopenllm_v2/OEvortex/HelpingAI-15B/e39661af-ad93-41d7-8892-1230064f1a1c.json new file mode 100644 index 000000000..58003624d --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI-15B/e39661af-ad93-41d7-8892-1230064f1a1c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-15B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI-15B", + "id": "OEvortex/HelpingAI-15B", + "developer": "OEvortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.323 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/595b61b2-5220-48f6-91a0-3aa0d37c63d8.json b/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/595b61b2-5220-48f6-91a0-3aa0d37c63d8.json new file mode 100644 index 000000000..c50a519bb --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/595b61b2-5220-48f6-91a0-3aa0d37c63d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-3B-reloaded/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI-3B-reloaded", + "id": "OEvortex/HelpingAI-3B-reloaded", + "developer": "OEvortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json b/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json deleted file mode 100644 index c04125a61..000000000 --- a/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-3B-reloaded/1762652579.797647", - "retrieved_timestamp": "1762652579.797647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OEvortex/HelpingAI-3B-reloaded", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI-3B-reloaded", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46466819150963884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128512897904065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25947473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/3173263e-2a42-4e8d-956e-8175ef464e76.json b/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/3173263e-2a42-4e8d-956e-8175ef464e76.json new file mode 100644 index 000000000..daf836ff0 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/3173263e-2a42-4e8d-956e-8175ef464e76.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI2-9B", + "id": "OEvortex/HelpingAI2-9B", + "developer": "OEvortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.903 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4845 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3711 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.29 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json b/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json deleted file mode 100644 index f0031870b..000000000 --- a/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2-9B/1762652579.797843", - "retrieved_timestamp": "1762652579.797844", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OEvortex/HelpingAI2-9B", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI2-9B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.903 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44131238447319776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4844617641983123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3710833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28997672872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json b/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json deleted file mode 100644 index a229c39fb..000000000 --- a/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2.5-10B/1762652579.798051", - "retrieved_timestamp": "1762652579.798051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OEvortex/HelpingAI2.5-10B", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI2.5-10B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32765617450586665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4495657491171711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25748005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/f77f8291-1573-4fb6-a984-1cc099c09621.json b/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/f77f8291-1573-4fb6-a984-1cc099c09621.json new file mode 100644 index 000000000..e0efa3c20 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/f77f8291-1573-4fb6-a984-1cc099c09621.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2.5-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI2.5-10B", + "id": "OEvortex/HelpingAI2.5-10B", + "developer": "OEvortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/c4681e14-513c-4e5e-af8c-88ca11849176.json b/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/c4681e14-513c-4e5e-af8c-88ca11849176.json new file mode 100644 index 000000000..1f0b31152 --- /dev/null +++ b/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/c4681e14-513c-4e5e-af8c-88ca11849176.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OliveiraJLT_Sagui-7B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sagui-7B-Instruct-v0.1", + "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1", + "developer": "OliveiraJLT", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2892 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json b/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json deleted file mode 100644 index ad1f9f1a2..000000000 --- a/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OliveiraJLT_Sagui-7B-Instruct-v0.1/1762652579.798249", - "retrieved_timestamp": "1762652579.798249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OliveiraJLT/Sagui-7B-Instruct-v0.1", - "developer": "OliveiraJLT", - "inference_platform": "unknown", - "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28916275482386733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3110678914743868 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4190520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14852061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Omkar1102/code-yi/0c220edd-2563-4fec-99a4-ef8c210ca5ce.json b/data/hfopenllm_v2/Omkar1102/code-yi/0c220edd-2563-4fec-99a4-ef8c210ca5ce.json new file mode 100644 index 000000000..032bf1e35 --- /dev/null +++ b/data/hfopenllm_v2/Omkar1102/code-yi/0c220edd-2563-4fec-99a4-ef8c210ca5ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "code-yi", + "id": "Omkar1102/code-yi", + "developer": "Omkar1102", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.084 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2254 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json b/data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json deleted file mode 100644 index 7666b7efb..000000000 --- a/data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.79849", - "retrieved_timestamp": "1762652579.7984908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Omkar1102/code-yi", - "developer": "Omkar1102", - "inference_platform": "unknown", - "id": "Omkar1102/code-yi", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.084 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21477457590304835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2760062695877461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json b/data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json deleted file mode 100644 index f91fd42be..000000000 --- a/data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.798722", - "retrieved_timestamp": "1762652579.798723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Omkar1102/code-yi", - "developer": "Omkar1102", - "inference_platform": "unknown", - "id": "Omkar1102/code-yi", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.084 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2254407195131141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2750025242693941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Omkar1102/code-yi/bd7ef5a7-aa75-4eb4-8860-aec63f8bf9d1.json b/data/hfopenllm_v2/Omkar1102/code-yi/bd7ef5a7-aa75-4eb4-8860-aec63f8bf9d1.json new file mode 100644 index 000000000..aa34a77d8 --- /dev/null +++ b/data/hfopenllm_v2/Omkar1102/code-yi/bd7ef5a7-aa75-4eb4-8860-aec63f8bf9d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "code-yi", + "id": "Omkar1102/code-yi", + "developer": "Omkar1102", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.084 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2148 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json b/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json deleted file mode 100644 index 239d9ce53..000000000 --- a/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/1762652579.7988968", - "retrieved_timestamp": "1762652579.798898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", - "developer": "OmnicromsBrain", - "inference_platform": "unknown", - "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5963842604289951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47762434766958123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2605551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/85c20522-03c0-4dac-a1c8-2945e4bf0e0e.json b/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/85c20522-03c0-4dac-a1c8-2945e4bf0e0e.json new file mode 100644 index 000000000..0f861c536 --- /dev/null +++ b/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/85c20522-03c0-4dac-a1c8-2945e4bf0e0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralStar_FusionWriter_4x7b", + "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", + "developer": "OmnicromsBrain", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4776 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2606 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f180fddd-077f-43f9-b2d9-38c5f33be44d.json b/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f180fddd-077f-43f9-b2d9-38c5f33be44d.json new file mode 100644 index 000000000..f63187b2d --- /dev/null +++ b/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f180fddd-077f-43f9-b2d9-38c5f33be44d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OnlyCheeini_greesychat-turbo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "greesychat-turbo", + "id": "OnlyCheeini/greesychat-turbo", + "developer": "OnlyCheeini", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0233 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json b/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json deleted file mode 100644 index 13048d7e9..000000000 --- a/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OnlyCheeini_greesychat-turbo/1762652579.7991328", - "retrieved_timestamp": "1762652579.799134", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OnlyCheeini/greesychat-turbo", - "developer": "OnlyCheeini", - "inference_platform": "unknown", - "id": "OnlyCheeini/greesychat-turbo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023256071667619692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30921339082318816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/Open-Orca/Mistral-7B-OpenOrca/ef384329-8406-4767-ac1a-3eba3131f726.json b/data/hfopenllm_v2/Open-Orca/Mistral-7B-OpenOrca/ef384329-8406-4767-ac1a-3eba3131f726.json new file mode 100644 index 000000000..db0c0f414 --- /dev/null +++ b/data/hfopenllm_v2/Open-Orca/Mistral-7B-OpenOrca/ef384329-8406-4767-ac1a-3eba3131f726.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Open-Orca_Mistral-7B-OpenOrca/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-OpenOrca", + "id": "Open-Orca/Mistral-7B-OpenOrca", + "developer": "Open-Orca", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2653 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/2ddeae27-77d3-413c-a6e1-9de0f3980c4e.json b/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/2ddeae27-77d3-413c-a6e1-9de0f3980c4e.json new file mode 100644 index 000000000..db24eb0bb --- /dev/null +++ b/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/2ddeae27-77d3-413c-a6e1-9de0f3980c4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenAssistant_oasst-sft-1-pythia-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oasst-sft-1-pythia-12b", + "id": "OpenAssistant/oasst-sft-1-pythia-12b", + "developer": "OpenAssistant", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 12.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1055 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json b/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json deleted file mode 100644 index d45a90b6f..000000000 --- a/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenAssistant_oasst-sft-1-pythia-12b/1762652579.799746", - "retrieved_timestamp": "1762652579.799747", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenAssistant/oasst-sft-1-pythia-12b", - "developer": "OpenAssistant", - "inference_platform": "unknown", - "id": "OpenAssistant/oasst-sft-1-pythia-12b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 12.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10553885911603435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.314662875941371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33269791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json deleted file mode 100644 index 920fd5d52..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/1762652579.800029", - "retrieved_timestamp": "1762652579.80003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.34 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5086315420861093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6003725722032135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833942819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/38b2dbbe-be86-4ef0-a39b-89841f662141.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/38b2dbbe-be86-4ef0-a39b-89841f662141.json new file mode 100644 index 000000000..133f23b3d --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/38b2dbbe-be86-4ef0-a39b-89841f662141.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-falcon3-10b-v24.2-131k", + "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.34 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5086 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/999a8091-22bd-4c08-bee1-772202e7edde.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/999a8091-22bd-4c08-bee1-772202e7edde.json new file mode 100644 index 000000000..e76e849ca --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/999a8091-22bd-4c08-bee1-772202e7edde.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3-70b-v21.2-32k", + "id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.458 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4832 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/fda91d98-d259-430c-929b-78852cab64ec.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/fda91d98-d259-430c-929b-78852cab64ec.json new file mode 100644 index 000000000..38eedcf18 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/fda91d98-d259-430c-929b-78852cab64ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3-8b-v21.1-8k", + "id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.557 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4788 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2955 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/535bfa4f-ab63-4832-9f17-7b245ff2b2af.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/535bfa4f-ab63-4832-9f17-7b245ff2b2af.json new file mode 100644 index 000000000..abbc1f40c --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/535bfa4f-ab63-4832-9f17-7b245ff2b2af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3-8b-v21.2-32k", + "id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6192 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4856 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/681a6cc5-5519-4b13-8b50-93adcab4a3f7.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/681a6cc5-5519-4b13-8b50-93adcab4a3f7.json new file mode 100644 index 000000000..e126a1c30 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/681a6cc5-5519-4b13-8b50-93adcab4a3f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3.1-70b-v22.1-131k", + "id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6698 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5304 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/141dd12c-6901-4a96-a051-f35647ddcc73.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/141dd12c-6901-4a96-a051-f35647ddcc73.json new file mode 100644 index 000000000..c11010c3b --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/141dd12c-6901-4a96-a051-f35647ddcc73.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3.1-8b-v22.2-131k", + "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6657 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/5b095779-aacc-41f3-9a3f-83f64a1c0d4c.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/5b095779-aacc-41f3-9a3f-83f64a1c0d4c.json new file mode 100644 index 000000000..e33abbdfc --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/5b095779-aacc-41f3-9a3f-83f64a1c0d4c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3.1-8b-v22.3-131k", + "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5997 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3277 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/7a88c95a-b253-4f36-8fde-1b0158bbf0b6.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/7a88c95a-b253-4f36-8fde-1b0158bbf0b6.json new file mode 100644 index 000000000..55a086e2b --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/7a88c95a-b253-4f36-8fde-1b0158bbf0b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3.2-1b-v23.1-131k", + "id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.359 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3267 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/7938a00e-4e11-4223-a900-fa53df168ab7.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/7938a00e-4e11-4223-a900-fa53df168ab7.json new file mode 100644 index 000000000..c20150989 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/7938a00e-4e11-4223-a900-fa53df168ab7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3.2-3b-v23.2-131k", + "id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2479 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/8f966b4e-1baf-445f-9f10-4ba6b47aaf9b.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/8f966b4e-1baf-445f-9f10-4ba6b47aaf9b.json new file mode 100644 index 000000000..97d54e843 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/8f966b4e-1baf-445f-9f10-4ba6b47aaf9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-llama3.3-70b-v24.1-131k", + "id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8121 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6858 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4411 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json deleted file mode 100644 index 3948cb4fb..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/1762652579.803262", - "retrieved_timestamp": "1762652579.803263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.741 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549347952322061 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46561770563515265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3830520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/a334d998-21a5-4108-96e3-9935507a9f8f.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/a334d998-21a5-4108-96e3-9935507a9f8f.json new file mode 100644 index 000000000..25e129f68 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/a334d998-21a5-4108-96e3-9935507a9f8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-mixtral-7bx8-v18.1-32k", + "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.741 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5493 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4656 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/941e27c6-81da-4ce1-b1c8-544c1426cd11.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/941e27c6-81da-4ce1-b1c8-544c1426cd11.json new file mode 100644 index 000000000..83c193063 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/941e27c6-81da-4ce1-b1c8-544c1426cd11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-nemotron-70b-v23.1-131k", + "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6749 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.321 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json deleted file mode 100644 index e78e895e4..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/1762652579.803536", - "retrieved_timestamp": "1762652579.803537", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7555275557742346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6749472828128272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32099697885196377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45375000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5174534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json deleted file mode 100644 index 337319d78..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/1762652579.803802", - "retrieved_timestamp": "1762652579.803806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226547782107031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6704805157570325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46959375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120511968085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/e409a374-685b-482d-82e4-2436dca37309.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/e409a374-685b-482d-82e4-2436dca37309.json new file mode 100644 index 000000000..d6fb1a342 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/e409a374-685b-482d-82e4-2436dca37309.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-nemotron-70b-v23.2-131k", + "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7227 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6705 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/84713625-97b6-4fad-982d-41b5c500d73a.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/84713625-97b6-4fad-982d-41b5c500d73a.json new file mode 100644 index 000000000..d8cf89afa --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/84713625-97b6-4fad-982d-41b5c500d73a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-qwen2.5llamaify-14b-v23.1-200k", + "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6309 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2538 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/b7edd9ab-a018-4b2f-9b01-b56cbe98abda.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/b7edd9ab-a018-4b2f-9b01-b56cbe98abda.json new file mode 100644 index 000000000..e24023046 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/b7edd9ab-a018-4b2f-9b01-b56cbe98abda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-qwen2.5llamaify-14b-v23.3-200k", + "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6131 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6081 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2311 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/ec896115-21ef-4337-9fdd-32a04c574a05.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/ec896115-21ef-4337-9fdd-32a04c574a05.json new file mode 100644 index 000000000..56c75d371 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/ec896115-21ef-4337-9fdd-32a04c574a05.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-qwen2.5llamaify-7b-v23.1-200k", + "id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.615 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3948 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json deleted file mode 100644 index c6e141c00..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/1762652579.804893", - "retrieved_timestamp": "1762652579.804894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.593661484860171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6798496773637743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.484875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490359042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/d8e5f49b-7bf3-41d4-a91e-c566219609f6.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/d8e5f49b-7bf3-41d4-a91e-c566219609f6.json new file mode 100644 index 000000000..45a21bb0f --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/d8e5f49b-7bf3-41d4-a91e-c566219609f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-qwq-32b-v24.1-200k", + "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5937 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6798 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4849 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.549 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json deleted file mode 100644 index fe88d9d40..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/1762652579.8051221", - "retrieved_timestamp": "1762652579.8051221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5969837808126881 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6771537576509328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47179166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446309840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/ce1a92a3-6bec-410f-ab42-c567c5d23856.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/ce1a92a3-6bec-410f-ab42-c567c5d23856.json new file mode 100644 index 000000000..74da68126 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/ce1a92a3-6bec-410f-ab42-c567c5d23856.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-qwq-32b-v24.2-200k", + "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6772 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4718 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5446 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/0a125470-b50f-4ca0-90dc-1f6b69c3ccd4.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/0a125470-b50f-4ca0-90dc-1f6b69c3ccd4.json new file mode 100644 index 000000000..ee9bc7580 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/0a125470-b50f-4ca0-90dc-1f6b69c3ccd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-yi1.5-34b-v21.3-32k", + "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.407 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6163 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json deleted file mode 100644 index 6ec632247..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/1762652579.8053398", - "retrieved_timestamp": "1762652579.805341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.407 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420041046645123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6162574860411373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44394791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599401595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json deleted file mode 100644 index 534d7cb4a..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-14b-v22.3-32k/1762652579.805548", - "retrieved_timestamp": "1762652579.8055491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.022 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37529200299649373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859759816473639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41660416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/aeee0165-ac7e-4da6-8102-ba60f43587de.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/aeee0165-ac7e-4da6-8102-ba60f43587de.json new file mode 100644 index 000000000..fe9fb2d74 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/aeee0165-ac7e-4da6-8102-ba60f43587de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-14b-v22.3-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-zero-14b-v22.3-32k", + "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.022 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3187 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json deleted file mode 100644 index a18249901..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-3b-v21.2-32k/1762652579.8057752", - "retrieved_timestamp": "1762652579.8057752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.769 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802377691192702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934791831798414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20337433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/b47b8666-2556-45df-ba5b-9a5e94186784.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/b47b8666-2556-45df-ba5b-9a5e94186784.json new file mode 100644 index 000000000..8ba48e7a5 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/b47b8666-2556-45df-ba5b-9a5e94186784.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-3b-v21.2-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-zero-3b-v21.2-32k", + "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.769 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2034 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/0bde5d57-39be-4497-a2a8-d08d3c8d65f4.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/0bde5d57-39be-4497-a2a8-d08d3c8d65f4.json new file mode 100644 index 000000000..bcede16a0 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/0bde5d57-39be-4497-a2a8-d08d3c8d65f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-56b-v21.2-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbuddy-zero-56b-v21.2-32k", + "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 56.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6128 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json deleted file mode 100644 index ce7659b30..000000000 --- a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-56b-v21.2-32k/1762652579.8059928", - "retrieved_timestamp": "1762652579.805994", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 56.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057092957796425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6128345897750148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16238670694864046 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43991023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/86599961-3ec2-4837-89a4-809f1dd7226c.json b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/86599961-3ec2-4837-89a4-809f1dd7226c.json new file mode 100644 index 000000000..e930a3e87 --- /dev/null +++ b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/86599961-3ec2-4837-89a4-809f1dd7226c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bifrost-14B", + "id": "OpenGenerativeAI/Bifrost-14B", + "developer": "OpenGenerativeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6615 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6845 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2356 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5074 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json deleted file mode 100644 index 9ea89d92a..000000000 --- a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost-14B/1762652579.806474", - "retrieved_timestamp": "1762652579.806475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenGenerativeAI/Bifrost-14B", - "developer": "OpenGenerativeAI", - "inference_platform": "unknown", - "id": "OpenGenerativeAI/Bifrost-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6615302951723648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6844897889249308 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23564954682779457 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46239583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json deleted file mode 100644 index 1a4187ef0..000000000 --- a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost/1762652579.8062131", - "retrieved_timestamp": "1762652579.8062139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenGenerativeAI/Bifrost", - "developer": "OpenGenerativeAI", - "inference_platform": "unknown", - "id": "OpenGenerativeAI/Bifrost", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6347524568145853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6849273974523276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45976041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5159574468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/dc3ca25e-41b2-4206-afaa-7d2d10fd27a7.json b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/dc3ca25e-41b2-4206-afaa-7d2d10fd27a7.json new file mode 100644 index 000000000..9269d76e2 --- /dev/null +++ b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/dc3ca25e-41b2-4206-afaa-7d2d10fd27a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bifrost", + "id": "OpenGenerativeAI/Bifrost", + "developer": "OpenGenerativeAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6849 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4598 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.516 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json deleted file mode 100644 index f56cb745f..000000000 --- a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-human-data/1762652579.8072178", - "retrieved_timestamp": "1762652579.807219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B-Instruct-human-data", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B-Instruct-human-data", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29460830596151544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32842533479733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37285416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14295212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/cd77d407-3be3-4b84-8a73-34a15744de93.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/cd77d407-3be3-4b84-8a73-34a15744de93.json new file mode 100644 index 000000000..1f47bfd03 --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/cd77d407-3be3-4b84-8a73-34a15744de93.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-human-data/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct-human-data", + "id": "OpenLLM-France/Lucie-7B-Instruct-human-data", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3284 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3729 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/1cd20db5-0225-4724-b1f9-7c32eae456e1.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/1cd20db5-0225-4724-b1f9-7c32eae456e1.json new file mode 100644 index 000000000..0a5903536 --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/1cd20db5-0225-4724-b1f9-7c32eae456e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct-v1.1", + "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3039 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1864 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json deleted file mode 100644 index 8546653f3..000000000 --- a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-v1.1/1762652579.807442", - "retrieved_timestamp": "1762652579.807442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B-Instruct-v1.1", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038759380665523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38158765227444885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37502083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1864195478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json deleted file mode 100644 index c3c2bb7a1..000000000 --- a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct/1762652579.806944", - "retrieved_timestamp": "1762652579.806945", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B-Instruct", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.279645784296777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254036581260458 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/dfc45dc3-51e6-454b-aee9-ea6b0714f0ca.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/dfc45dc3-51e6-454b-aee9-ea6b0714f0ca.json new file mode 100644 index 000000000..16a3cc3c6 --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/dfc45dc3-51e6-454b-aee9-ea6b0714f0ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct", + "id": "OpenLLM-France/Lucie-7B-Instruct", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json deleted file mode 100644 index c51f2c44e..000000000 --- a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B/1762652579.806693", - "retrieved_timestamp": "1762652579.8066938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24964538535530173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3492469872973046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39232291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14976728723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/3da2a408-672c-47b8-be32-61f56a15e9f3.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/3da2a408-672c-47b8-be32-61f56a15e9f3.json new file mode 100644 index 000000000..be1189595 --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/3da2a408-672c-47b8-be32-61f56a15e9f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B", + "id": "OpenLLM-France/Lucie-7B", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3923 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1498 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLeecher/llama3-8b-lima/94700c3c-f18d-4f96-a794-65bcf483fca9.json b/data/hfopenllm_v2/OpenLeecher/llama3-8b-lima/94700c3c-f18d-4f96-a794-65bcf483fca9.json new file mode 100644 index 000000000..63e1ec99a --- /dev/null +++ b/data/hfopenllm_v2/OpenLeecher/llama3-8b-lima/94700c3c-f18d-4f96-a794-65bcf483fca9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenLeecher_llama3-8b-lima/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-8b-lima", + "id": "OpenLeecher/llama3-8b-lima", + "developer": "OpenLeecher", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4296 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenScholar/Llama-3.1_OpenScholar-8B/6f3481d4-076f-45bd-8564-d485109c7a63.json b/data/hfopenllm_v2/OpenScholar/Llama-3.1_OpenScholar-8B/6f3481d4-076f-45bd-8564-d485109c7a63.json new file mode 100644 index 000000000..38d1da240 --- /dev/null +++ b/data/hfopenllm_v2/OpenScholar/Llama-3.1_OpenScholar-8B/6f3481d4-076f-45bd-8564-d485109c7a63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/OpenScholar_Llama-3.1_OpenScholar-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1_OpenScholar-8B", + "id": "OpenScholar/Llama-3.1_OpenScholar-8B", + "developer": "OpenScholar", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5208 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/9f5ca3b2-747a-4fd0-b382-bf7ef503ba25.json b/data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/9f5ca3b2-747a-4fd0-b382-bf7ef503ba25.json new file mode 100644 index 000000000..3461ecd6d --- /dev/null +++ b/data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/9f5ca3b2-747a-4fd0-b382-bf7ef503ba25.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Lexi-Uncensored-V2", + "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", + "developer": "Orenguteng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7792 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/f1932041-263a-4841-9c8b-c6cc9fa50c21.json b/data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/f1932041-263a-4841-9c8b-c6cc9fa50c21.json new file mode 100644 index 000000000..e8f2ea31c --- /dev/null +++ b/data/hfopenllm_v2/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/f1932041-263a-4841-9c8b-c6cc9fa50c21.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Lexi-Uncensored", + "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", + "developer": "Orenguteng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7777 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json b/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json deleted file mode 100644 index c456e1b50..000000000 --- a/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/1762652579.808624", - "retrieved_timestamp": "1762652579.808625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", - "developer": "Orion-zhen", - "inference_platform": "unknown", - "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204317876567508 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473918652157296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773413897280967 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4426529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/691bef38-bc9e-4f8d-b774-9d7c62eec72b.json b/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/691bef38-bc9e-4f8d-b774-9d7c62eec72b.json new file mode 100644 index 000000000..1eff33056 --- /dev/null +++ b/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/691bef38-bc9e-4f8d-b774-9d7c62eec72b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-Uncensored", + "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", + "developer": "Orion-zhen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5474 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4773 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4427 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Orion-zhen/phi-4-abliterated/5795f693-9ebc-47c6-9d2c-185dd0d32044.json b/data/hfopenllm_v2/Orion-zhen/phi-4-abliterated/5795f693-9ebc-47c6-9d2c-185dd0d32044.json new file mode 100644 index 000000000..93b48af5e --- /dev/null +++ b/data/hfopenllm_v2/Orion-zhen/phi-4-abliterated/5795f693-9ebc-47c6-9d2c-185dd0d32044.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Orion-zhen_phi-4-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-abliterated", + "id": "Orion-zhen/phi-4-abliterated", + "developer": "Orion-zhen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6698 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3021 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4044 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5292 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json b/data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json deleted file mode 100644 index aaf4a92a5..000000000 --- a/data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/P0x0_Astra-v1-12B/1762652579.8091059", - "retrieved_timestamp": "1762652579.8091059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "P0x0/Astra-v1-12B", - "developer": "P0x0", - "inference_platform": "unknown", - "id": "P0x0/Astra-v1-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28059437847134494 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214506484138984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4051875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460771276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/P0x0/Astra-v1-12B/eb83f474-0d3d-488c-bc0f-93e5d1dfb2f3.json b/data/hfopenllm_v2/P0x0/Astra-v1-12B/eb83f474-0d3d-488c-bc0f-93e5d1dfb2f3.json new file mode 100644 index 000000000..0751e8835 --- /dev/null +++ b/data/hfopenllm_v2/P0x0/Astra-v1-12B/eb83f474-0d3d-488c-bc0f-93e5d1dfb2f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/P0x0_Astra-v1-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Astra-v1-12B", + "id": "P0x0/Astra-v1-12B", + "developer": "P0x0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2806 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json b/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json deleted file mode 100644 index ca7658046..000000000 --- a/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/1762652579.8095942", - "retrieved_timestamp": "1762652579.8095949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46276989498973836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33018063718974094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14827127659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/f93b2053-11c4-4868-860f-90fbfe8288fc.json b/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/f93b2053-11c4-4868-860f-90fbfe8288fc.json new file mode 100644 index 000000000..b26fc54c0 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/f93b2053-11c4-4868-860f-90fbfe8288fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.2-Instruct-Thinking-v0.1-1B", + "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4628 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1483 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/8984fe95-9fd3-48ff-aa5f-18df63ecd6bb.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/8984fe95-9fd3-48ff-aa5f-18df63ecd6bb.json new file mode 100644 index 000000000..a3d8dd0e7 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/8984fe95-9fd3-48ff-aa5f-18df63ecd6bb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", + "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.047 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7871 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json deleted file mode 100644 index f330828c3..000000000 --- a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/1762652579.809847", - "retrieved_timestamp": "1762652579.809848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.047 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7871015572015585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073267838961463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/a0f6f5de-578c-4290-85b5-c51aed985074.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/a0f6f5de-578c-4290-85b5-c51aed985074.json new file mode 100644 index 000000000..1df7617e1 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/a0f6f5de-578c-4290-85b5-c51aed985074.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.1-RomboTiesTest-8B", + "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/8ccc76ff-25c9-4706-b6a8-31b49f8be813.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/8ccc76ff-25c9-4706-b6a8-31b49f8be813.json new file mode 100644 index 000000000..3d4794050 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/8ccc76ff-25c9-4706-b6a8-31b49f8be813.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.1-RomboTiesTest2-8B", + "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json deleted file mode 100644 index c7c4d3d90..000000000 --- a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1762652579.8105159", - "retrieved_timestamp": "1762652579.810517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.693054428915278 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4556166737589294 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37003125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.312749335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/924f8b31-506d-4df2-8a7b-d0cd66d55f6d.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/924f8b31-506d-4df2-8a7b-d0cd66d55f6d.json new file mode 100644 index 000000000..e62bf8d2d --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/924f8b31-506d-4df2-8a7b-d0cd66d55f6d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6931 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4556 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/8e7dfd9f-350d-406c-811d-453f1744dd53.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/8e7dfd9f-350d-406c-811d-453f1744dd53.json new file mode 100644 index 000000000..ddc4092b0 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/8e7dfd9f-350d-406c-811d-453f1744dd53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3659 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json deleted file mode 100644 index c6bdb2dd3..000000000 --- a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/1762652579.810729", - "retrieved_timestamp": "1762652579.81073", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6291573026237051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45814952191015346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.365875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115026595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json deleted file mode 100644 index c2ae4ec92..000000000 --- a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/1762652579.8109388", - "retrieved_timestamp": "1762652579.8109398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6503898544750152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45107942950222196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/b713d1d2-351f-43a1-b77d-27723e1d4267.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/b713d1d2-351f-43a1-b77d-27723e1d4267.json new file mode 100644 index 000000000..68fb66922 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/b713d1d2-351f-43a1-b77d-27723e1d4267.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6504 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4511 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/322a9442-174f-4223-b839-6f8f9664d5e5.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/322a9442-174f-4223-b839-6f8f9664d5e5.json new file mode 100644 index 000000000..b3614926d --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/322a9442-174f-4223-b839-6f8f9664d5e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5041 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4483 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3083 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json deleted file mode 100644 index 2da13762b..000000000 --- a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/1762652579.8111491", - "retrieved_timestamp": "1762652579.81115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040858256093831 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4483158594793648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308344414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/b12e71d1-c435-4172-a28f-38e26791dadb.json b/data/hfopenllm_v2/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/b12e71d1-c435-4172-a28f-38e26791dadb.json new file mode 100644 index 000000000..16e94042b --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/b12e71d1-c435-4172-a28f-38e26791dadb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-RomboTiesTest-7B", + "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.808 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7558 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers/LLaMa-3-CursedStock-v2.0-8B/ad33b0e8-39c8-4118-81bd-bc86b482f122.json b/data/hfopenllm_v2/PJMixers/LLaMa-3-CursedStock-v2.0-8B/ad33b0e8-39c8-4118-81bd-bc86b482f122.json new file mode 100644 index 000000000..4d593a418 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers/LLaMa-3-CursedStock-v2.0-8B/ad33b0e8-39c8-4118-81bd-bc86b482f122.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PJMixers_LLaMa-3-CursedStock-v2.0-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa-3-CursedStock-v2.0-8B", + "id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", + "developer": "PJMixers", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3856 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json b/data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json deleted file mode 100644 index bcd69e86a..000000000 --- a/data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Parissa3_test-model/1762652579.811859", - "retrieved_timestamp": "1762652579.81186", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Parissa3/test-model", - "developer": "Parissa3", - "inference_platform": "unknown", - "id": "Parissa3/test-model", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882564927725103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193916761801759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46853125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056848404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Parissa3/test-model/db8a7864-293b-45e9-995b-5301071c902d.json b/data/hfopenllm_v2/Parissa3/test-model/db8a7864-293b-45e9-995b-5301071c902d.json new file mode 100644 index 000000000..f98fab749 --- /dev/null +++ b/data/hfopenllm_v2/Parissa3/test-model/db8a7864-293b-45e9-995b-5301071c902d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Parissa3_test-model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-model", + "id": "Parissa3/test-model", + "developer": "Parissa3", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/31e3beea-28dc-4b47-a5e9-5fafc89226db.json b/data/hfopenllm_v2/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/31e3beea-28dc-4b47-a5e9-5fafc89226db.json new file mode 100644 index 000000000..d4a800079 --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/31e3beea-28dc-4b47-a5e9-5fafc89226db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", + "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", + "developer": "Pinkstack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5085 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4711 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1692 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/49315a95-394f-4508-8e6c-7c1d5547c257.json b/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/49315a95-394f-4508-8e6c-7c1d5547c257.json new file mode 100644 index 000000000..13d84a54a --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/49315a95-394f-4508-8e6c-7c1d5547c257.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperThoughts-CoT-14B-16k-o1-QwQ", + "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", + "developer": "Pinkstack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5268 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json b/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json deleted file mode 100644 index 565cbd6a3..000000000 --- a/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/1762652579.812447", - "retrieved_timestamp": "1762652579.812449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", - "developer": "Pinkstack", - "inference_platform": "unknown", - "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.051457909458015844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6719989821162488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4199395770392749 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4913541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526845079787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/375d3a94-97af-47ef-82af-afd7581663d4.json b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/375d3a94-97af-47ef-82af-afd7581663d4.json new file mode 100644 index 000000000..c0e931e91 --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/375d3a94-97af-47ef-82af-afd7581663d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Superthoughts-lite-1.8B-experimental-o1", + "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", + "developer": "Pinkstack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.812 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json deleted file mode 100644 index 4696e09c0..000000000 --- a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/1762652579.81273", - "retrieved_timestamp": "1762652579.81273", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", - "developer": "Pinkstack", - "inference_platform": "unknown", - "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.812 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0375193375798437 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434736647957908 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33539583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18508976063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/77cfe896-4aa1-4bcd-a39a-f437c3f7e738.json b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/77cfe896-4aa1-4bcd-a39a-f437c3f7e738.json new file mode 100644 index 000000000..7b240a43f --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/77cfe896-4aa1-4bcd-a39a-f437c3f7e738.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Superthoughts-lite-v1", + "id": "Pinkstack/Superthoughts-lite-v1", + "developer": "Pinkstack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1659 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1755 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json deleted file mode 100644 index f54746c97..000000000 --- a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-v1/1762652579.812961", - "retrieved_timestamp": "1762652579.812962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pinkstack/Superthoughts-lite-v1", - "developer": "Pinkstack", - "inference_platform": "unknown", - "id": "Pinkstack/Superthoughts-lite-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1658643510330368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3465571905256149 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671770833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17553191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/3d69ec7d-9999-4e16-8dc9-99fad35e156e.json b/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/3d69ec7d-9999-4e16-8dc9-99fad35e156e.json new file mode 100644 index 000000000..cb170495e --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/3d69ec7d-9999-4e16-8dc9-99fad35e156e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-Instruct-CoreCurriculum-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-Instruct-CoreCurriculum-12b", + "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2191 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1219 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json b/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json deleted file mode 100644 index 7e1be8ae4..000000000 --- a/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-Instruct-CoreCurriculum-12b/1762652579.81328", - "retrieved_timestamp": "1762652579.813282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21914520139895477 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3788739075240266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1219248670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json deleted file mode 100644 index 885ed543f..000000000 --- a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/1762652579.813654", - "retrieved_timestamp": "1762652579.8136551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7074672978807343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361046243199591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45867708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/d2a7459b-8a12-4529-b978-c7237979f16b.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/d2a7459b-8a12-4529-b978-c7237979f16b.json new file mode 100644 index 000000000..54790f749 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/d2a7459b-8a12-4529-b978-c7237979f16b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-PersonalityEngine-V1.1.0-12b", + "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7075 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4587 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json deleted file mode 100644 index ec19a74f5..000000000 --- a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/1762652579.813962", - "retrieved_timestamp": "1762652579.813962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7886252920029965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6421213844206719 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24546827794561935 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42996875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5025764627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/e7a228ad-69de-471a-9f31-6bdc7221999c.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/e7a228ad-69de-471a-9f31-6bdc7221999c.json new file mode 100644 index 000000000..55de50f7e --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/e7a228ad-69de-471a-9f31-6bdc7221999c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-PersonalityEngine-V1.2.0-24b", + "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7886 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2455 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.43 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5026 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/9196ae39-adb0-4d53-8399-0ccd4d628065.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/9196ae39-adb0-4d53-8399-0ccd4d628065.json new file mode 100644 index 000000000..28534a7f1 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/9196ae39-adb0-4d53-8399-0ccd4d628065.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-PersonalityEngine-v1.0.0-8b", + "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4982 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4733 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3065 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json deleted file mode 100644 index 3c96a2899..000000000 --- a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/1762652579.814201", - "retrieved_timestamp": "1762652579.814202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.498190357141274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47325544259149366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3065159574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json b/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json deleted file mode 100644 index 1c383102e..000000000 --- a/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/1762652579.81442", - "retrieved_timestamp": "1762652579.81442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520133246452745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405357251132225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47452083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35596742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/ea318f99-a1ab-41ed-ae5d-39c62ac40e1b.json b/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/ea318f99-a1ab-41ed-ae5d-39c62ac40e1b.json new file mode 100644 index 000000000..4fbe90b2e --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/ea318f99-a1ab-41ed-ae5d-39c62ac40e1b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-SakuraKaze-V1.0.0-12b", + "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4745 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.356 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/05f69fd6-a77e-478d-ad86-3e83e615e892.json b/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/05f69fd6-a77e-478d-ad86-3e83e615e892.json new file mode 100644 index 000000000..d49ad061e --- /dev/null +++ b/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/05f69fd6-a77e-478d-ad86-3e83e615e892.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PowerInfer_SmallThinker-3B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmallThinker-3B-Preview", + "id": "PowerInfer/SmallThinker-3B-Preview", + "developer": "PowerInfer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.62 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2779 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3525 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3018 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json b/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json deleted file mode 100644 index 946caa91d..000000000 --- a/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PowerInfer_SmallThinker-3B-Preview/1762652579.814635", - "retrieved_timestamp": "1762652579.814636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PowerInfer/SmallThinker-3B-Preview", - "developer": "PowerInfer", - "inference_platform": "unknown", - "id": "PowerInfer/SmallThinker-3B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6199650261306666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4494922016660919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017785904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/PranavHarshan/LaMistral-V4/5b8e9508-befb-4674-bd84-9c722a0864ce.json b/data/hfopenllm_v2/PranavHarshan/LaMistral-V4/5b8e9508-befb-4674-bd84-9c722a0864ce.json new file mode 100644 index 000000000..8281de999 --- /dev/null +++ b/data/hfopenllm_v2/PranavHarshan/LaMistral-V4/5b8e9508-befb-4674-bd84-9c722a0864ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PranavHarshan_LaMistral-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LaMistral-V4", + "id": "PranavHarshan/LaMistral-V4", + "developer": "PranavHarshan", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6239 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3643 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json b/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json deleted file mode 100644 index afff89abc..000000000 --- a/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PranavHarshan_MedNarra-X1/1762652579.815135", - "retrieved_timestamp": "1762652579.815136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PranavHarshan/MedNarra-X1", - "developer": "PranavHarshan", - "inference_platform": "unknown", - "id": "PranavHarshan/MedNarra-X1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43384331351924005 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46371668179774184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34308510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/8beb3730-23e8-4b89-933d-2d3f1a1d1365.json b/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/8beb3730-23e8-4b89-933d-2d3f1a1d1365.json new file mode 100644 index 000000000..3cf872d63 --- /dev/null +++ b/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/8beb3730-23e8-4b89-933d-2d3f1a1d1365.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PranavHarshan_MedNarra-X1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MedNarra-X1", + "id": "PranavHarshan/MedNarra-X1", + "developer": "PranavHarshan", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/07417712-1933-4920-8964-67ba74bf6d01.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/07417712-1933-4920-8964-67ba74bf6d01.json new file mode 100644 index 000000000..d98817118 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/07417712-1933-4920-8964-67ba74bf6d01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_10.7B_48Layers-Appended", + "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json deleted file mode 100644 index a8f49e288..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/1762652579.815407", - "retrieved_timestamp": "1762652579.815407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json deleted file mode 100644 index 0f695d9c3..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/1762652579.81567", - "retrieved_timestamp": "1762652579.815671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/ae4cc05d-a65a-4f18-a99c-f133603686d1.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/ae4cc05d-a65a-4f18-a99c-f133603686d1.json new file mode 100644 index 000000000..80b91addb --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/ae4cc05d-a65a-4f18-a99c-f133603686d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_10.7B_48Layers-Interleaved", + "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json deleted file mode 100644 index eafdd5014..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_32K-PoSE/1762652579.815889", - "retrieved_timestamp": "1762652579.8158898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3968991165662664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3471309425137119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42054166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.203125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/54df4d3e-0ef0-4e30-aa46-b47a4589a34c.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/54df4d3e-0ef0-4e30-aa46-b47a4589a34c.json new file mode 100644 index 000000000..f50ca0914 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/54df4d3e-0ef0-4e30-aa46-b47a4589a34c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_32K-PoSE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_32K-PoSE", + "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3969 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3471 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4205 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2031 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json deleted file mode 100644 index 9de90f022..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/1762652579.8160908", - "retrieved_timestamp": "1762652579.8160908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.114 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5975833011963811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/a717d466-9157-4991-8459-f39847d914a2.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/a717d466-9157-4991-8459-f39847d914a2.json new file mode 100644 index 000000000..a32aae183 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/a717d466-9157-4991-8459-f39847d914a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_8.11B_36Layers-Appended", + "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.114 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5976 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/15a8789b-27de-49d1-b3e5-9b1fc9b5694e.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/15a8789b-27de-49d1-b3e5-9b1fc9b5694e.json new file mode 100644 index 000000000..eea5d5d83 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/15a8789b-27de-49d1-b3e5-9b1fc9b5694e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_8.11B_36Layers-Interleaved", + "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.114 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json deleted file mode 100644 index 251cf30f3..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/1762652579.8163", - "retrieved_timestamp": "1762652579.8163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.114 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46213045510926887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/921562fe-cc21-4ff3-93de-a62e1d4bf7e7.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/921562fe-cc21-4ff3-93de-a62e1d4bf7e7.json new file mode 100644 index 000000000..8ecc037ff --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/921562fe-cc21-4ff3-93de-a62e1d4bf7e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_8.99B_40Layers-Appended", + "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json deleted file mode 100644 index fc610749b..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/1762652579.81651", - "retrieved_timestamp": "1762652579.816511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json deleted file mode 100644 index 96ce3b4eb..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/1762652579.816719", - "retrieved_timestamp": "1762652579.816719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5975833011963811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46213045510926887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/863969d9-e567-43cc-a0a9-7f80eaba374a.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/863969d9-e567-43cc-a0a9-7f80eaba374a.json new file mode 100644 index 000000000..dde7d977d --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/863969d9-e567-43cc-a0a9-7f80eaba374a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_8.99B_40Layers-Interleaved", + "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5976 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/2987fa45-363e-4a07-8e9f-db01586a135b.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/2987fa45-363e-4a07-8e9f-db01586a135b.json new file mode 100644 index 000000000..465c9e212 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/2987fa45-363e-4a07-8e9f-db01586a135b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenChat-3.5-0106_9.86B_44Layers-Appended", + "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 9.859 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json deleted file mode 100644 index 5ce059edd..000000000 --- a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/1762652579.816936", - "retrieved_timestamp": "1762652579.816937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 9.859 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/3488de21-d9a6-49e8-ba8f-d9beee9bdabe.json b/data/hfopenllm_v2/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/3488de21-d9a6-49e8-ba8f-d9beee9bdabe.json new file mode 100644 index 000000000..cf3bb48e6 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/3488de21-d9a6-49e8-ba8f-d9beee9bdabe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat-3.5-0106_Rebased_Mistral-7B-v0.2", + "id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", + "developer": "Pretergeek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3706 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3627 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.484 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.283 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/0cacf042-6b62-4b67-8821-97cd703788d0.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/0cacf042-6b62-4b67-8821-97cd703788d0.json new file mode 100644 index 000000000..10fb00e0e --- /dev/null +++ b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/0cacf042-6b62-4b67-8821-97cd703788d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "INTELLECT-1-Instruct", + "id": "PrimeIntellect/INTELLECT-1-Instruct", + "developer": "PrimeIntellect", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3577 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1064 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json deleted file mode 100644 index d91a460cd..000000000 --- a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1-Instruct/1762652579.817848", - "retrieved_timestamp": "1762652579.8178492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PrimeIntellect/INTELLECT-1-Instruct", - "developer": "PrimeIntellect", - "inference_platform": "unknown", - "id": "PrimeIntellect/INTELLECT-1-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28698007801214714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3576875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10638297872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/9f0dfceb-1332-447a-bf6f-6c6c40686a6f.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/9f0dfceb-1332-447a-bf6f-6c6c40686a6f.json new file mode 100644 index 000000000..978dc1bf9 --- /dev/null +++ b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/9f0dfceb-1332-447a-bf6f-6c6c40686a6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "INTELLECT-1", + "id": "PrimeIntellect/INTELLECT-1", + "developer": "PrimeIntellect", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json deleted file mode 100644 index dddbdaff1..000000000 --- a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.8176599", - "retrieved_timestamp": "1762652579.817661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PrimeIntellect/INTELLECT-1", - "developer": "PrimeIntellect", - "inference_platform": "unknown", - "id": "PrimeIntellect/INTELLECT-1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1757315035217667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27398007801214713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3752708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/c1308f95-6d55-4ff6-b14e-1bd09b467d99.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/c1308f95-6d55-4ff6-b14e-1bd09b467d99.json new file mode 100644 index 000000000..5993f7b41 --- /dev/null +++ b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/c1308f95-6d55-4ff6-b14e-1bd09b467d99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "INTELLECT-1", + "id": "PrimeIntellect/INTELLECT-1", + "developer": "PrimeIntellect", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.274 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json deleted file mode 100644 index 848e605d7..000000000 --- a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.817406", - "retrieved_timestamp": "1762652579.817406", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PrimeIntellect/INTELLECT-1", - "developer": "PrimeIntellect", - "inference_platform": "unknown", - "id": "PrimeIntellect/INTELLECT-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1757315035217667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27598007801214713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3339375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json b/data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json deleted file mode 100644 index 078276895..000000000 --- a/data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PuxAI_LUA_model/1762652579.818059", - "retrieved_timestamp": "1762652579.818059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PuxAI/LUA_model", - "developer": "PuxAI", - "inference_platform": "unknown", - "id": "PuxAI/LUA_model", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.386 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22821336276634885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34838541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/PuxAI/LUA_model/4ab16120-8d39-4dea-aa76-5c249506848d.json b/data/hfopenllm_v2/PuxAI/LUA_model/4ab16120-8d39-4dea-aa76-5c249506848d.json new file mode 100644 index 000000000..31a3fa054 --- /dev/null +++ b/data/hfopenllm_v2/PuxAI/LUA_model/4ab16120-8d39-4dea-aa76-5c249506848d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PuxAI_LUA_model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LUA_model", + "id": "PuxAI/LUA_model", + "developer": "PuxAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.386 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2282 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3484 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json b/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json deleted file mode 100644 index 4b3c3663e..000000000 --- a/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PygmalionAI_pygmalion-6b/1762652579.818316", - "retrieved_timestamp": "1762652579.8183172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PygmalionAI/pygmalion-6b", - "developer": "PygmalionAI", - "inference_platform": "unknown", - "id": "PygmalionAI/pygmalion-6b", - "additional_details": { - "precision": "float16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20910406610016974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31988944643860034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11835106382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/f9647ea0-6464-4aa0-b1ea-a994a7bcca3c.json b/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/f9647ea0-6464-4aa0-b1ea-a994a7bcca3c.json new file mode 100644 index 000000000..e8e7d6daf --- /dev/null +++ b/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/f9647ea0-6464-4aa0-b1ea-a994a7bcca3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/PygmalionAI_pygmalion-6b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pygmalion-6b", + "id": "PygmalionAI/pygmalion-6b", + "developer": "PygmalionAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2091 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json b/data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json deleted file mode 100644 index 3e08f98d7..000000000 --- a/data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Q-bert_MetaMath-1B/1762652579.8185658", - "retrieved_timestamp": "1762652579.8185658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Q-bert/MetaMath-1B", - "developer": "Q-bert", - "inference_platform": "unknown", - "id": "Q-bert/MetaMath-1B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300391849182392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34506863677929517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1495179521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/Q-bert/MetaMath-1B/c5ef47ab-2e73-43d6-b9ea-1ee7e50d9df8.json b/data/hfopenllm_v2/Q-bert/MetaMath-1B/c5ef47ab-2e73-43d6-b9ea-1ee7e50d9df8.json new file mode 100644 index 000000000..c76a87d90 --- /dev/null +++ b/data/hfopenllm_v2/Q-bert/MetaMath-1B/c5ef47ab-2e73-43d6-b9ea-1ee7e50d9df8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Q-bert_MetaMath-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MetaMath-1B", + "id": "Q-bert/MetaMath-1B", + "developer": "Q-bert", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.53 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3451 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1495 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/1up-14b/9ef7a4a0-b751-45ff-ab1f-d50687a3f4c3.json b/data/hfopenllm_v2/Quazim0t0/1up-14b/9ef7a4a0-b751-45ff-ab1f-d50687a3f4c3.json new file mode 100644 index 000000000..3622e1d5d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/1up-14b/9ef7a4a0-b751-45ff-ab1f-d50687a3f4c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_1up-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1up-14b", + "id": "Quazim0t0/1up-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6888 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6921 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4583 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json b/data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json deleted file mode 100644 index e9bf48e93..000000000 --- a/data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_1up-14b/1762652579.818811", - "retrieved_timestamp": "1762652579.818812", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/1up-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/1up-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6888079185450161 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6920935635451656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4583333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406416223404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json b/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json deleted file mode 100644 index 8457e76f4..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Adamant-14B-sce/1762652579.819103", - "retrieved_timestamp": "1762652579.819104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Adamant-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Adamant-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6857604489421402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858943778247303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45579166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/8b303795-557b-4fa1-bbc6-d36bd77ee739.json b/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/8b303795-557b-4fa1-bbc6-d36bd77ee739.json new file mode 100644 index 000000000..620dc27f3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/8b303795-557b-4fa1-bbc6-d36bd77ee739.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Adamant-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Adamant-14B-sce", + "id": "Quazim0t0/Adamant-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6858 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6859 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json b/data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json deleted file mode 100644 index fd4e5189d..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Alice-14B/1762652579.819317", - "retrieved_timestamp": "1762652579.819317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Alice-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Alice-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6836371937570092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937748567349198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569486404833837 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44794791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Alice-14B/7fec288e-0b0d-45c0-b0e6-17b905cd7ea3.json b/data/hfopenllm_v2/Quazim0t0/Alice-14B/7fec288e-0b0d-45c0-b0e6-17b905cd7ea3.json new file mode 100644 index 000000000..bb5caa0bb --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Alice-14B/7fec288e-0b0d-45c0-b0e6-17b905cd7ea3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Alice-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alice-14B", + "id": "Quazim0t0/Alice-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6836 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/5a09783b-82da-43ae-a607-2cfea550d931.json b/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/5a09783b-82da-43ae-a607-2cfea550d931.json new file mode 100644 index 000000000..e5578e0bc --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/5a09783b-82da-43ae-a607-2cfea550d931.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Alien-CoT-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alien-CoT-14B-sce", + "id": "Quazim0t0/Alien-CoT-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0749 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4785 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json b/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json deleted file mode 100644 index ed1ffed1f..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Alien-CoT-14B-sce/1762652579.819517", - "retrieved_timestamp": "1762652579.8195179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Alien-CoT-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Alien-CoT-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07486358417886763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395487523790632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47852083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json b/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json deleted file mode 100644 index f5a52b223..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Aura-8B-Linear/1762652579.819725", - "retrieved_timestamp": "1762652579.819726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Aura-8B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Aura-8B-Linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.794770098893159 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074298101934884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/6c2d191a-a2d1-459c-b2e2-5766bec62ce7.json b/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/6c2d191a-a2d1-459c-b2e2-5766bec62ce7.json new file mode 100644 index 000000000..3a532220d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/6c2d191a-a2d1-459c-b2e2-5766bec62ce7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Aura-8B-Linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aura-8B-Linear", + "id": "Quazim0t0/Aura-8B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7948 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5074 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3801 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json deleted file mode 100644 index 16a988c64..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.8199282", - "retrieved_timestamp": "1762652579.8199282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Casa-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Casa-14b-sce", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6653523761397536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6901033460664828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43102083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5425531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/121cb5fc-2fa2-4718-b325-c40014802e40.json b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/121cb5fc-2fa2-4718-b325-c40014802e40.json new file mode 100644 index 000000000..90b774872 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/121cb5fc-2fa2-4718-b325-c40014802e40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Casa-14b-sce", + "id": "Quazim0t0/Casa-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6891 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4985 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/8bbfa040-b16e-4116-ad3e-b3e4e58a7de6.json b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/8bbfa040-b16e-4116-ad3e-b3e4e58a7de6.json new file mode 100644 index 000000000..1cd19c932 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/8bbfa040-b16e-4116-ad3e-b3e4e58a7de6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Casa-14b-sce", + "id": "Quazim0t0/Casa-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6654 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4698 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5426 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json deleted file mode 100644 index 5a8aecf83..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.820149", - "retrieved_timestamp": "1762652579.820149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Casa-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Casa-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6718218770639681 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6891400252742456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json b/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json deleted file mode 100644 index 81b957dc6..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Charlie-8B-Linear/1762652579.820338", - "retrieved_timestamp": "1762652579.820339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Charlie-8B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Charlie-8B-Linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7380672172059026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141359215016831 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26510574018126887 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3572972074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c8891914-c9fb-4b4d-9592-826f04520e7b.json b/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c8891914-c9fb-4b4d-9592-826f04520e7b.json new file mode 100644 index 000000000..4db9f876c --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c8891914-c9fb-4b4d-9592-826f04520e7b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Charlie-8B-Linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Charlie-8B-Linear", + "id": "Quazim0t0/Charlie-8B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3485 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3573 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/e77ffcb3-c7d8-4700-b4ea-fe4e5ba94223.json b/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/e77ffcb3-c7d8-4700-b4ea-fe4e5ba94223.json new file mode 100644 index 000000000..3005f0449 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/e77ffcb3-c7d8-4700-b4ea-fe4e5ba94223.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Chromatic-8b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chromatic-8b-sce", + "id": "Quazim0t0/Chromatic-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5085 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5063 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4051 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3755 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json b/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json deleted file mode 100644 index 1cfb57c5b..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Chromatic-8b-sce/1762652579.8205519", - "retrieved_timestamp": "1762652579.820553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Chromatic-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Chromatic-8b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5085074269604649 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5063171816307924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37549867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/CoT_Phi/da237415-f34e-4cbb-9a94-3ff621f3df8d.json b/data/hfopenllm_v2/Quazim0t0/CoT_Phi/da237415-f34e-4cbb-9a94-3ff621f3df8d.json new file mode 100644 index 000000000..c2af11b27 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/CoT_Phi/da237415-f34e-4cbb-9a94-3ff621f3df8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_CoT_Phi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CoT_Phi", + "id": "Quazim0t0/CoT_Phi", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6159 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6751 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4901 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json b/data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json deleted file mode 100644 index 1f4d01aac..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Dyson-14b/1762652579.821013", - "retrieved_timestamp": "1762652579.821014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Dyson-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Dyson-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5856682491345186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6862902828866305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Dyson-14b/479f3bfa-d614-46a9-88c7-9891852b0d8c.json b/data/hfopenllm_v2/Quazim0t0/Dyson-14b/479f3bfa-d614-46a9-88c7-9891852b0d8c.json new file mode 100644 index 000000000..05cb9e800 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Dyson-14b/479f3bfa-d614-46a9-88c7-9891852b0d8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Dyson-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dyson-14b", + "id": "Quazim0t0/Dyson-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5857 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6863 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json b/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json deleted file mode 100644 index dfab67e46..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Edu-14B-Linear/1762652579.821216", - "retrieved_timestamp": "1762652579.821216", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Edu-14B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Edu-14B-Linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6158182511292261 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6757820996225599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43775000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508560505319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/f5f0c7da-fb03-4023-81a7-801b0729a19d.json b/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/f5f0c7da-fb03-4023-81a7-801b0729a19d.json new file mode 100644 index 000000000..ce336e83e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/f5f0c7da-fb03-4023-81a7-801b0729a19d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Edu-14B-Linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Edu-14B-Linear", + "id": "Quazim0t0/Edu-14B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6758 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5086 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Fugazi14b/40f51424-2922-498d-bbbc-d500667a8554.json b/data/hfopenllm_v2/Quazim0t0/Fugazi14b/40f51424-2922-498d-bbbc-d500667a8554.json new file mode 100644 index 000000000..d3ba7ceca --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Fugazi14b/40f51424-2922-498d-bbbc-d500667a8554.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Fugazi14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fugazi14b", + "id": "Quazim0t0/Fugazi14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6998 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4653 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4546 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5417 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json b/data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json deleted file mode 100644 index b14868217..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Fugazi14b/1762652579.8215911", - "retrieved_timestamp": "1762652579.821592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Fugazi14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Fugazi14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6997987561891337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6941017680723065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45455208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417220744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/4f25d177-6bcf-4864-87a4-1beb21a7373d.json b/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/4f25d177-6bcf-4864-87a4-1beb21a7373d.json new file mode 100644 index 000000000..f440931a3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/4f25d177-6bcf-4864-87a4-1beb21a7373d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_GZA-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GZA-14B-sce", + "id": "Quazim0t0/GZA-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6274 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6687 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4285 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5232 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json b/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json deleted file mode 100644 index 9ac6a2eda..000000000 --- a/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_GZA-14B-sce/1762652579.821823", - "retrieved_timestamp": "1762652579.821824", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/GZA-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/GZA-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6274086091570367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6686539892126272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4284791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523188164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/b160ab1f-be6b-4dfa-8fa9-36fc65a64782.json b/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/b160ab1f-be6b-4dfa-8fa9-36fc65a64782.json new file mode 100644 index 000000000..36ede1fd0 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/b160ab1f-be6b-4dfa-8fa9-36fc65a64782.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Geedorah-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Geedorah-14B", + "id": "Quazim0t0/Geedorah-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6964 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4547 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json b/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json deleted file mode 100644 index 4ae52e323..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Geedorah-14B/1762652579.822031", - "retrieved_timestamp": "1762652579.822032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Geedorah-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Geedorah-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6872841837435781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6964189914061528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45467708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json b/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json deleted file mode 100644 index 26386ccc9..000000000 --- a/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_GivingTree-8b-sce/1762652579.8222332", - "retrieved_timestamp": "1762652579.8222342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/GivingTree-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/GivingTree-8b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006139266036339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040482025572203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/d497a7e3-11c2-4e0c-8788-091caabede56.json b/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/d497a7e3-11c2-4e0c-8788-091caabede56.json new file mode 100644 index 000000000..cab765743 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/d497a7e3-11c2-4e0c-8788-091caabede56.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_GivingTree-8b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GivingTree-8b-sce", + "id": "Quazim0t0/GivingTree-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5006 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.504 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4051 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json b/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json deleted file mode 100644 index 559a23549..000000000 --- a/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_GuiltySpark-14B-ties/1762652579.822431", - "retrieved_timestamp": "1762652579.822432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/GuiltySpark-14B-ties", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/GuiltySpark-14B-ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6854357549080883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6914302574038697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/4a55bcf2-e1c1-4fce-8f79-472dae869b26.json b/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/4a55bcf2-e1c1-4fce-8f79-472dae869b26.json new file mode 100644 index 000000000..00575bbe7 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/4a55bcf2-e1c1-4fce-8f79-472dae869b26.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_GuiltySpark-14B-ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GuiltySpark-14B-ties", + "id": "Quazim0t0/GuiltySpark-14B-ties", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6854 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6914 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json b/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json deleted file mode 100644 index ef44c3cdd..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Halo-14B-sce/1762652579.822633", - "retrieved_timestamp": "1762652579.822633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Halo-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Halo-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6753691316817156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6875692490185378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42900302114803623 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376496010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/5b00dd5e-0ad3-4ea0-aa0d-2327d610e6a6.json b/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/5b00dd5e-0ad3-4ea0-aa0d-2327d610e6a6.json new file mode 100644 index 000000000..8957c0b18 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/5b00dd5e-0ad3-4ea0-aa0d-2327d610e6a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Halo-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Halo-14B-sce", + "id": "Quazim0t0/Halo-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6876 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/1c80d383-1ccb-4f32-a63d-dd3954fe5f6b.json b/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/1c80d383-1ccb-4f32-a63d-dd3954fe5f6b.json new file mode 100644 index 000000000..171487262 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/1c80d383-1ccb-4f32-a63d-dd3954fe5f6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Heretic1.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Heretic1.5b", + "id": "Quazim0t0/Heretic1.5b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.73 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.244 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1728 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json b/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json deleted file mode 100644 index 6a01a3f3e..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Heretic1.5b/1762652579.8228369", - "retrieved_timestamp": "1762652579.8228369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Heretic1.5b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Heretic1.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.73 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20615633186611523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529180801121154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24395770392749244 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3511458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17278922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/75065074-7ef6-41ac-be7c-496cc458640a.json b/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/75065074-7ef6-41ac-be7c-496cc458640a.json new file mode 100644 index 000000000..305107133 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/75065074-7ef6-41ac-be7c-496cc458640a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Hyde-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hyde-14b-sce", + "id": "Quazim0t0/Hyde-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6715 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2734 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.53 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json b/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json deleted file mode 100644 index 838f31a59..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Hyde-14b-sce/1762652579.823039", - "retrieved_timestamp": "1762652579.823039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Hyde-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Hyde-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6715470507143269 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6885164810743584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27341389728096677 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300033244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/49a0287b-48d7-44db-bf20-a084919d332f.json b/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/49a0287b-48d7-44db-bf20-a084919d332f.json new file mode 100644 index 000000000..f52a4474a --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/49a0287b-48d7-44db-bf20-a084919d332f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Imagine-v0.5-16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Imagine-v0.5-16bit", + "id": "Quazim0t0/Imagine-v0.5-16bit", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2759 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6769 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json b/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json deleted file mode 100644 index 50af25e46..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Imagine-v0.5-16bit/1762652579.823242", - "retrieved_timestamp": "1762652579.823243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Imagine-v0.5-16bit", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Imagine-v0.5-16bit", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2758990589413866 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769135492947932 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.535405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Imbue-14b/7b2861ee-58f9-4ac9-99ee-2ec663e1b157.json b/data/hfopenllm_v2/Quazim0t0/Imbue-14b/7b2861ee-58f9-4ac9-99ee-2ec663e1b157.json new file mode 100644 index 000000000..8fbf439a2 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Imbue-14b/7b2861ee-58f9-4ac9-99ee-2ec663e1b157.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Imbue-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Imbue-14b", + "id": "Quazim0t0/Imbue-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6845 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4167 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json b/data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json deleted file mode 100644 index 9248c5b58..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Imbue-14b/1762652579.8234398", - "retrieved_timestamp": "1762652579.8234408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Imbue-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Imbue-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199725616918665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6845292092854045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41672916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json b/data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json deleted file mode 100644 index 8153fed23..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Insom/1762652579.823634", - "retrieved_timestamp": "1762652579.8236349", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Insom", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Insom", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.68183863260593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6881456689046391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43114583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5352393617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Insom/628542f9-fac6-42a7-8ec5-5cd93f977a7e.json b/data/hfopenllm_v2/Quazim0t0/Insom/628542f9-fac6-42a7-8ec5-5cd93f977a7e.json new file mode 100644 index 000000000..b3398ce09 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Insom/628542f9-fac6-42a7-8ec5-5cd93f977a7e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Insom/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Insom", + "id": "Quazim0t0/Insom", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6818 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json b/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json deleted file mode 100644 index 1ca5e506d..000000000 --- a/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_InspectorDeck-14B-sce/1762652579.8238342", - "retrieved_timestamp": "1762652579.8238342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/InspectorDeck-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/InspectorDeck-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32408454013129606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6668480318764974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260970744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/5b0924ae-cf52-4245-a687-91e4b1742c16.json b/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/5b0924ae-cf52-4245-a687-91e4b1742c16.json new file mode 100644 index 000000000..6da062e91 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/5b0924ae-cf52-4245-a687-91e4b1742c16.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_InspectorDeck-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "InspectorDeck-14B-sce", + "id": "Quazim0t0/InspectorDeck-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3982 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5261 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/459c2b98-c3af-4334-a4bc-13334efe49b8.json b/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/459c2b98-c3af-4334-a4bc-13334efe49b8.json new file mode 100644 index 000000000..6119a139c --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/459c2b98-c3af-4334-a4bc-13334efe49b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Jekyl-8b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jekyl-8b-sce", + "id": "Quazim0t0/Jekyl-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4994 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json b/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json deleted file mode 100644 index d53cbe29a..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Jekyl-8b-sce/1762652579.82404", - "retrieved_timestamp": "1762652579.824041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Jekyl-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Jekyl-8b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46968931324441365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4993588236391566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41966666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json b/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json deleted file mode 100644 index a7d6ecb22..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Jigsaw-14B-Linear/1762652579.824291", - "retrieved_timestamp": "1762652579.824291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Jigsaw-14B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Jigsaw-14B-Linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480416406246536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864625931836906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26510574018126887 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44826041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5233543882978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/b2780aa3-d299-4180-8441-dd54e94255cb.json b/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/b2780aa3-d299-4180-8441-dd54e94255cb.json new file mode 100644 index 000000000..ab44c9d65 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/b2780aa3-d299-4180-8441-dd54e94255cb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Jigsaw-14B-Linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jigsaw-14B-Linear", + "id": "Quazim0t0/Jigsaw-14B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4483 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5234 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json b/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json deleted file mode 100644 index f141ee9ff..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Katana-8b-sce/1762652579.8246028", - "retrieved_timestamp": "1762652579.8246038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Katana-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Katana-8b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107304175144174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074684221457483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3770777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/f55d398d-0555-4e89-a37c-def04741a0dd.json b/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/f55d398d-0555-4e89-a37c-def04741a0dd.json new file mode 100644 index 000000000..3136f0083 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/f55d398d-0555-4e89-a37c-def04741a0dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Katana-8b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Katana-8b-sce", + "id": "Quazim0t0/Katana-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1511 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3771 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/63caf8f8-9e55-4ef6-ae76-ee7184a50675.json b/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/63caf8f8-9e55-4ef6-ae76-ee7184a50675.json new file mode 100644 index 000000000..12f493aa0 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/63caf8f8-9e55-4ef6-ae76-ee7184a50675.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Knot-CoT-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Knot-CoT-14B-sce", + "id": "Quazim0t0/Knot-CoT-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4832 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6616 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json b/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json deleted file mode 100644 index 319b23591..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Knot-CoT-14B-sce/1762652579.8248682", - "retrieved_timestamp": "1762652579.8248692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Knot-CoT-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Knot-CoT-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4831779677921249 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6615610657544672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515375664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json b/data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json deleted file mode 100644 index 96d2f071b..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Lineage-14B/1762652579.82509", - "retrieved_timestamp": "1762652579.8250911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Lineage-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Lineage-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7070428684778609 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6933789516730196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4597291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Lineage-14B/f82ccde3-bd3b-499c-8b8c-182822392cea.json b/data/hfopenllm_v2/Quazim0t0/Lineage-14B/f82ccde3-bd3b-499c-8b8c-182822392cea.json new file mode 100644 index 000000000..2aa939b3a --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Lineage-14B/f82ccde3-bd3b-499c-8b8c-182822392cea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Lineage-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lineage-14B", + "id": "Quazim0t0/Lineage-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.707 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6934 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4597 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Lo-Phi-14b/8a52fb4a-d6ae-4c8d-aed0-2137e0a83ea1.json b/data/hfopenllm_v2/Quazim0t0/Lo-Phi-14b/8a52fb4a-d6ae-4c8d-aed0-2137e0a83ea1.json new file mode 100644 index 000000000..26d2ee23e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Lo-Phi-14b/8a52fb4a-d6ae-4c8d-aed0-2137e0a83ea1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Lo-Phi-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lo-Phi-14b", + "id": "Quazim0t0/Lo-Phi-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6852 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/b7cbc2fb-2c52-4c13-9266-52103421f2ee.json b/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/b7cbc2fb-2c52-4c13-9266-52103421f2ee.json new file mode 100644 index 000000000..65be87e3f --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/b7cbc2fb-2c52-4c13-9266-52103421f2ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Loke-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Loke-14B-sce", + "id": "Quazim0t0/Loke-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6848 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6924 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3905 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json b/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json deleted file mode 100644 index 6c873bd91..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Loke-14B-sce/1762652579.825529", - "retrieved_timestamp": "1762652579.82553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Loke-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Loke-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6847863668399845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6923902176707362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3904833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46366666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401429521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json b/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json deleted file mode 100644 index f01d78f51..000000000 --- a/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_MFDOOM-14B/1762652579.825741", - "retrieved_timestamp": "1762652579.825742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/MFDOOM-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/MFDOOM-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736204382150472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6916400252742457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5264350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5425531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/f4474361-e897-4dbb-a89e-5451a4724474.json b/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/f4474361-e897-4dbb-a89e-5451a4724474.json new file mode 100644 index 000000000..dc38a1ad9 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/f4474361-e897-4dbb-a89e-5451a4724474.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_MFDOOM-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFDOOM-14B", + "id": "Quazim0t0/MFDOOM-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4377 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5426 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json b/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json deleted file mode 100644 index 05104ae96..000000000 --- a/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_MFGRIMM-14B/1762652579.8259468", - "retrieved_timestamp": "1762652579.825948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/MFGRIMM-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/MFGRIMM-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6894074389287091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.69087746819662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416389627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/de257b5e-4629-4f8a-b08d-d2ca372593e2.json b/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/de257b5e-4629-4f8a-b08d-d2ca372593e2.json new file mode 100644 index 000000000..ed96fac2b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/de257b5e-4629-4f8a-b08d-d2ca372593e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_MFGRIMM-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFGRIMM-14B", + "id": "Quazim0t0/MFGRIMM-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6894 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Math_Phi4_Reason/a37aada3-104a-488a-898f-245ff257de46.json b/data/hfopenllm_v2/Quazim0t0/Math_Phi4_Reason/a37aada3-104a-488a-898f-245ff257de46.json new file mode 100644 index 000000000..79be95de5 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Math_Phi4_Reason/a37aada3-104a-488a-898f-245ff257de46.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Math_Phi4_Reason/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Math_Phi4_Reason", + "id": "Quazim0t0/Math_Phi4_Reason", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.322 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3278 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json b/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json deleted file mode 100644 index 2c8ff8641..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Mithril-14B-sce/1762652579.826359", - "retrieved_timestamp": "1762652579.82636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Mithril-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Mithril-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6957772044841022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6925969240705362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3821752265861027 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/d9d655d1-d94c-483a-a3a2-ca196e1391d1.json b/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/d9d655d1-d94c-483a-a3a2-ca196e1391d1.json new file mode 100644 index 000000000..0346b1864 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/d9d655d1-d94c-483a-a3a2-ca196e1391d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Mithril-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mithril-14B-sce", + "id": "Quazim0t0/Mithril-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6958 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6926 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3822 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3691 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5403 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json b/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json deleted file mode 100644 index a03f51261..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Mononoke-14B-sce/1762652579.8265631", - "retrieved_timestamp": "1762652579.826564", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Mononoke-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Mononoke-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3502129904209719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744431226588331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4154583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297539893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/77bf7126-0cb9-43ef-8d23-5f1395f91642.json b/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/77bf7126-0cb9-43ef-8d23-5f1395f91642.json new file mode 100644 index 000000000..24ae15f0e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/77bf7126-0cb9-43ef-8d23-5f1395f91642.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Mononoke-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mononoke-14B-sce", + "id": "Quazim0t0/Mononoke-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6744 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4698 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5298 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/73f410be-3084-4994-8406-f8ac70880626.json b/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/73f410be-3084-4994-8406-f8ac70880626.json new file mode 100644 index 000000000..6934a4cda --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/73f410be-3084-4994-8406-f8ac70880626.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Motion-8B-Linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Motion-8B-Linear", + "id": "Quazim0t0/Motion-8B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7686 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3606 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3785 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json b/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json deleted file mode 100644 index 0d2449469..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Motion-8B-Linear/1762652579.826771", - "retrieved_timestamp": "1762652579.826771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Motion-8B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Motion-8B-Linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7685917809190725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084252652465131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3784906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Mouse-9B/24caad7a-15fa-4820-91cc-0f544a34d173.json b/data/hfopenllm_v2/Quazim0t0/Mouse-9B/24caad7a-15fa-4820-91cc-0f544a34d173.json new file mode 100644 index 000000000..cd04f17a6 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Mouse-9B/24caad7a-15fa-4820-91cc-0f544a34d173.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Mouse-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mouse-9B", + "id": "Quazim0t0/Mouse-9B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 9.207 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1325 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2979 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.347 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json b/data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json deleted file mode 100644 index 257ea40d4..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Mouse-9B/1762652579.826978", - "retrieved_timestamp": "1762652579.826978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Mouse-9B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Mouse-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 9.207 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1324917884546337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29789470527601253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3469583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11386303191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json b/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json deleted file mode 100644 index e05603993..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Nova-14b-sce/1762652579.827177", - "retrieved_timestamp": "1762652579.827178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Nova-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Nova-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7021968377239058 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6935261478148286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4570625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413065159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/e087b221-f813-4688-8d98-17980f98ac5b.json b/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/e087b221-f813-4688-8d98-17980f98ac5b.json new file mode 100644 index 000000000..5316de52d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/e087b221-f813-4688-8d98-17980f98ac5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Nova-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nova-14b-sce", + "id": "Quazim0t0/Nova-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4571 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json b/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json deleted file mode 100644 index df564a043..000000000 --- a/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_NovaScotia-14b-stock/1762652579.827381", - "retrieved_timestamp": "1762652579.827381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/NovaScotia-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/NovaScotia-14b-stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6787412953186434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6935261478148286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44934375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408909574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/f4d03bff-3b34-497f-a17f-0379bc562f11.json b/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/f4d03bff-3b34-497f-a17f-0379bc562f11.json new file mode 100644 index 000000000..b80b45290 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/f4d03bff-3b34-497f-a17f-0379bc562f11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_NovaScotia-14b-stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NovaScotia-14b-stock", + "id": "Quazim0t0/NovaScotia-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4493 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5409 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/2ca21612-ea90-41f3-b618-3ea81c09c3ae.json b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/2ca21612-ea90-41f3-b618-3ea81c09c3ae.json new file mode 100644 index 000000000..78c79f5cf --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/2ca21612-ea90-41f3-b618-3ea81c09c3ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ODB-14B-sce", + "id": "Quazim0t0/ODB-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3929 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json deleted file mode 100644 index 3e5138e58..000000000 --- a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14b-sce/1762652579.827807", - "retrieved_timestamp": "1762652579.827808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/ODB-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/ODB-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015973173402128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6941928144814953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.411631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4570625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411402925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json deleted file mode 100644 index 9aff2044f..000000000 --- a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14B-sce/1762652579.827594", - "retrieved_timestamp": "1762652579.827595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/ODB-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/ODB-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292235712354331 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6558922017209644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39288541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/d4dc2088-9911-4966-afe9-022df89dd522.json b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/d4dc2088-9911-4966-afe9-022df89dd522.json new file mode 100644 index 000000000..a373a9579 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/d4dc2088-9911-4966-afe9-022df89dd522.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ODB-14b-sce", + "id": "Quazim0t0/ODB-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7016 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6942 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4571 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json b/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json deleted file mode 100644 index f3f980215..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Oasis-14B-ties/1762652579.827992", - "retrieved_timestamp": "1762652579.8279932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Oasis-14B-ties", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Oasis-14B-ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6936539492989712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6914976731342066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4570625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404753989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/ad03a075-8f24-46f6-ae04-5a04eb7061c1.json b/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/ad03a075-8f24-46f6-ae04-5a04eb7061c1.json new file mode 100644 index 000000000..5c2da9f94 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/ad03a075-8f24-46f6-ae04-5a04eb7061c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Oasis-14B-ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Oasis-14B-ties", + "id": "Quazim0t0/Oasis-14B-ties", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6937 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6915 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4571 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5405 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/2d1da226-e65c-48a0-aabb-46b1cf670a82.json b/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/2d1da226-e65c-48a0-aabb-46b1cf670a82.json new file mode 100644 index 000000000..c9e2044e6 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/2d1da226-e65c-48a0-aabb-46b1cf670a82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Origami-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Origami-14B-sce", + "id": "Quazim0t0/Origami-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3259 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.662 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json b/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json deleted file mode 100644 index 2207bc07d..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Origami-14B-sce/1762652579.828193", - "retrieved_timestamp": "1762652579.8281941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Origami-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Origami-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259329689667859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620277470720752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40348958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill.16bit/7fb3a035-2b83-4a58-818f-16fe6d9a8ab3.json b/data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill.16bit/7fb3a035-2b83-4a58-818f-16fe6d9a8ab3.json new file mode 100644 index 000000000..75adf6c43 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill.16bit/7fb3a035-2b83-4a58-818f-16fe6d9a8ab3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill.16bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4.Turn.R1Distill.16bit", + "id": "Quazim0t0/Phi4.Turn.R1Distill.16bit", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6563 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2311 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3902 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5257 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/87018726-9f81-47b1-883e-609afea7fb37.json b/data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/87018726-9f81-47b1-883e-609afea7fb37.json new file mode 100644 index 000000000..5b87ab22e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/87018726-9f81-47b1-883e-609afea7fb37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4.Turn.R1Distill_v1.5.1-Tensors", + "id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6456 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3929 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Phi4Basis-14B-sce/292b9333-96c7-4fc7-bf35-78bbce9f10d3.json b/data/hfopenllm_v2/Quazim0t0/Phi4Basis-14B-sce/292b9333-96c7-4fc7-bf35-78bbce9f10d3.json new file mode 100644 index 000000000..0f7720935 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Phi4Basis-14B-sce/292b9333-96c7-4fc7-bf35-78bbce9f10d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4Basis-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4Basis-14B-sce", + "id": "Quazim0t0/Phi4Basis-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6502 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4789 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.539 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json b/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json deleted file mode 100644 index 190155827..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Ponder-14B-linear/1762652579.8290088", - "retrieved_timestamp": "1762652579.8290088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Ponder-14B-linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Ponder-14B-linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6906064796960952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6942602302118323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45576041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/b44224c3-ed2c-4120-9e2a-e6286358a4da.json b/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/b44224c3-ed2c-4120-9e2a-e6286358a4da.json new file mode 100644 index 000000000..e66c3086b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/b44224c3-ed2c-4120-9e2a-e6286358a4da.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Ponder-14B-linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ponder-14B-linear", + "id": "Quazim0t0/Ponder-14B-linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6906 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6943 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json b/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json deleted file mode 100644 index 1438e97c6..000000000 --- a/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_RZA-14B-sce/1762652579.829216", - "retrieved_timestamp": "1762652579.829216", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/RZA-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/RZA-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773578549360142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6685829139021245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538314494680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/f7a2c9af-c55c-4307-bfef-1ca709525d82.json b/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/f7a2c9af-c55c-4307-bfef-1ca709525d82.json new file mode 100644 index 000000000..eedf689a1 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/f7a2c9af-c55c-4307-bfef-1ca709525d82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_RZA-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RZA-14B-sce", + "id": "Quazim0t0/RZA-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4774 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6686 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4113 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json b/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json deleted file mode 100644 index 46ca09008..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Rosemary-14b/1762652579.829469", - "retrieved_timestamp": "1762652579.82947", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Rosemary-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Rosemary-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6915306941138402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6955261478148286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/d9655f35-edfd-4c53-b359-559870e8019e.json b/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/d9655f35-edfd-4c53-b359-559870e8019e.json new file mode 100644 index 000000000..38d6ae792 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/d9655f35-edfd-4c53-b359-559870e8019e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Rosemary-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rosemary-14b", + "id": "Quazim0t0/Rosemary-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6915 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6955 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4492 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json b/data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json deleted file mode 100644 index 1c947aac0..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Rune-14b/1762652579.829681", - "retrieved_timestamp": "1762652579.8296819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Rune-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Rune-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015973173402128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937489642141156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411402925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Rune-14b/afdd962d-652a-4395-92f7-c16dc874a779.json b/data/hfopenllm_v2/Quazim0t0/Rune-14b/afdd962d-652a-4395-92f7-c16dc874a779.json new file mode 100644 index 000000000..b20f38d6e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Rune-14b/afdd962d-652a-4395-92f7-c16dc874a779.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Rune-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rune-14b", + "id": "Quazim0t0/Rune-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7016 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4533 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/2594e917-3ebd-428b-8f36-cb0da668695d.json b/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/2594e917-3ebd-428b-8f36-cb0da668695d.json new file mode 100644 index 000000000..b8cc374a1 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/2594e917-3ebd-428b-8f36-cb0da668695d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_SZA-14B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SZA-14B-sce", + "id": "Quazim0t0/SZA-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5659 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6889 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json b/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json deleted file mode 100644 index 0613163c8..000000000 --- a/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_SZA-14B-sce/1762652579.829889", - "retrieved_timestamp": "1762652579.82989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/SZA-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/SZA-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5659095644002359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6888749072998727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241691842900302 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353224734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json b/data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json deleted file mode 100644 index 6111b6eb6..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Sake-20b/1762652579.830092", - "retrieved_timestamp": "1762652579.8300931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Sake-20b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Sake-20b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6692741924759638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769823539837527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44940625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391456117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Sake-20b/91a86644-ad96-4c66-8691-1c0b531b572c.json b/data/hfopenllm_v2/Quazim0t0/Sake-20b/91a86644-ad96-4c66-8691-1c0b531b572c.json new file mode 100644 index 000000000..a8bf797b3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Sake-20b/91a86644-ad96-4c66-8691-1c0b531b572c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Sake-20b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sake-20b", + "id": "Quazim0t0/Sake-20b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6693 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.677 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4653 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4494 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/331f56ce-5e45-46d8-9143-3f66be20b699.json b/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/331f56ce-5e45-46d8-9143-3f66be20b699.json new file mode 100644 index 000000000..ee6d0d2f3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/331f56ce-5e45-46d8-9143-3f66be20b699.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Spok-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Spok-14b-sce", + "id": "Quazim0t0/Spok-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6682 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6899 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5298 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json b/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json deleted file mode 100644 index 078168fd6..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Spok-14b-sce/1762652579.830291", - "retrieved_timestamp": "1762652579.830292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Spok-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Spok-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6681748870773991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6899172301380289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297539893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/6138ebe0-8483-4cfb-8d95-b334bb09e831.json b/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/6138ebe0-8483-4cfb-8d95-b334bb09e831.json new file mode 100644 index 000000000..e73a1374c --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/6138ebe0-8483-4cfb-8d95-b334bb09e831.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Sumatra-20b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sumatra-20b", + "id": "Quazim0t0/Sumatra-20b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6738 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6855 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json b/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json deleted file mode 100644 index 00bacbb3a..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Sumatra-20b/1762652579.830487", - "retrieved_timestamp": "1762652579.830488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Sumatra-20b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Sumatra-20b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.673795529195867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6855416597047258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4560104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/SuperNova14b/4d16dd47-42d1-4ea6-8f1b-dc50648bceab.json b/data/hfopenllm_v2/Quazim0t0/SuperNova14b/4d16dd47-42d1-4ea6-8f1b-dc50648bceab.json new file mode 100644 index 000000000..ae0580d15 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/SuperNova14b/4d16dd47-42d1-4ea6-8f1b-dc50648bceab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_SuperNova14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperNova14b", + "id": "Quazim0t0/SuperNova14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json b/data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json deleted file mode 100644 index 0bc6c03f6..000000000 --- a/data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_SuperNova14b/1762652579.830682", - "retrieved_timestamp": "1762652579.830683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/SuperNova14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/SuperNova14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.707642388861554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937489642141156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395770392749245 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4545208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json b/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json deleted file mode 100644 index 8567e0701..000000000 --- a/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_TB0-8B-sce/1762652579.8308768", - "retrieved_timestamp": "1762652579.8308768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/TB0-8B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/TB0-8B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107304175144174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074684221457483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3770777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/a6b0f2bf-08da-472f-b858-8be967a44cdc.json b/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/a6b0f2bf-08da-472f-b858-8be967a44cdc.json new file mode 100644 index 000000000..3f9a2e43e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/a6b0f2bf-08da-472f-b858-8be967a44cdc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_TB0-8B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TB0-8B-sce", + "id": "Quazim0t0/TB0-8B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1511 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3771 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json b/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json deleted file mode 100644 index 5b8aeef23..000000000 --- a/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_TBL-8B-sce/1762652579.831074", - "retrieved_timestamp": "1762652579.831075", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/TBL-8B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/TBL-8B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45809895521660304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008187839060233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42363541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689328457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/57c7553d-f3e5-4a31-8c16-66aae570d8ec.json b/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/57c7553d-f3e5-4a31-8c16-66aae570d8ec.json new file mode 100644 index 000000000..dbe71df0d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/57c7553d-f3e5-4a31-8c16-66aae570d8ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_TBL-8B-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TBL-8B-sce", + "id": "Quazim0t0/TBL-8B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/ThinkPhi1.1-Tensors/58c31bdd-f86f-4fbb-8549-191bb9f46f02.json b/data/hfopenllm_v2/Quazim0t0/ThinkPhi1.1-Tensors/58c31bdd-f86f-4fbb-8549-191bb9f46f02.json new file mode 100644 index 000000000..2cd01867f --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/ThinkPhi1.1-Tensors/58c31bdd-f86f-4fbb-8549-191bb9f46f02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_ThinkPhi1.1-Tensors/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ThinkPhi1.1-Tensors", + "id": "Quazim0t0/ThinkPhi1.1-Tensors", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4908 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json b/data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json deleted file mode 100644 index 7dd861b16..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-20b/1762652579.8314738", - "retrieved_timestamp": "1762652579.831475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Venti-20b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Venti-20b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6641034676879568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6901240010129452 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3391238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44797916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Venti-20b/dd25c1dd-0edf-44ca-b18c-633dbd47368f.json b/data/hfopenllm_v2/Quazim0t0/Venti-20b/dd25c1dd-0edf-44ca-b18c-633dbd47368f.json new file mode 100644 index 000000000..34f0d31fe --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Venti-20b/dd25c1dd-0edf-44ca-b18c-633dbd47368f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-20b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Venti-20b", + "id": "Quazim0t0/Venti-20b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6641 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3391 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.448 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/2a030613-b5f7-4393-ac39-d2d072c913dc.json b/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/2a030613-b5f7-4393-ac39-d2d072c913dc.json new file mode 100644 index 000000000..2f424d5ab --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/2a030613-b5f7-4393-ac39-d2d072c913dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-Blend-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Venti-Blend-sce", + "id": "Quazim0t0/Venti-Blend-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6843 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4056 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4389 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5414 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json b/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json deleted file mode 100644 index 54d15a697..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-Blend-sce/1762652579.831816", - "retrieved_timestamp": "1762652579.8318179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Venti-Blend-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Venti-Blend-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6879335718116819 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6842921511560114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40558912386706947 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43892708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413896276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json b/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json deleted file mode 100644 index 2f1de459c..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Vine-14b-sce/1762652579.8321972", - "retrieved_timestamp": "1762652579.832198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Vine-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Vine-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.673345611865406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6891400252742456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007552870090635 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/f8c73290-c400-4f1f-a00a-516592497b0d.json b/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/f8c73290-c400-4f1f-a00a-516592497b0d.json new file mode 100644 index 000000000..947f1d53a --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/f8c73290-c400-4f1f-a00a-516592497b0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Vine-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Vine-14b-sce", + "id": "Quazim0t0/Vine-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6733 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6891 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json b/data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json deleted file mode 100644 index 758f99a01..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Wendy-14B/1762652579.832468", - "retrieved_timestamp": "1762652579.832469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Wendy-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Wendy-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6772175605172055 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6957587467354328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48338368580060426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Wendy-14B/b31908fc-5e7e-45d6-835f-4e86a05b23fb.json b/data/hfopenllm_v2/Quazim0t0/Wendy-14B/b31908fc-5e7e-45d6-835f-4e86a05b23fb.json new file mode 100644 index 000000000..385c8931d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Wendy-14B/b31908fc-5e7e-45d6-835f-4e86a05b23fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Wendy-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Wendy-14B", + "id": "Quazim0t0/Wendy-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6772 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6958 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json b/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json deleted file mode 100644 index e678c5d39..000000000 --- a/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Wu-14b-sce/1762652579.832721", - "retrieved_timestamp": "1762652579.832722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Wu-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Wu-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6718218770639681 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6885164810743585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26132930513595165 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41142708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292553191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/4320cb98-7f9f-4510-bb88-448ce231bae8.json b/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/4320cb98-7f9f-4510-bb88-448ce231bae8.json new file mode 100644 index 000000000..8fbcfb8f2 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/4320cb98-7f9f-4510-bb88-448ce231bae8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Wu-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Wu-14b-sce", + "id": "Quazim0t0/Wu-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2613 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5293 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json b/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json deleted file mode 100644 index da13f621a..000000000 --- a/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_bloom-14b-stock/1762652579.8329449", - "retrieved_timestamp": "1762652579.8329458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/bloom-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/bloom-14b-stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6575087434673332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6877869223612597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373171542553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/28b986d1-2e67-4462-9165-6cb8f260b6c6.json b/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/28b986d1-2e67-4462-9165-6cb8f260b6c6.json new file mode 100644 index 000000000..f549634bc --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/28b986d1-2e67-4462-9165-6cb8f260b6c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_bloom-14b-stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bloom-14b-stock", + "id": "Quazim0t0/bloom-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6878 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4811 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json b/data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json deleted file mode 100644 index 649f0ef48..000000000 --- a/data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_caramel-14B/1762652579.833162", - "retrieved_timestamp": "1762652579.833163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/caramel-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/caramel-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744947849483814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6918707471458787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47129909365558914 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/caramel-14B/fe1e21cb-7934-4022-a74a-777172310021.json b/data/hfopenllm_v2/Quazim0t0/caramel-14B/fe1e21cb-7934-4022-a74a-777172310021.json new file mode 100644 index 000000000..fc8e481dc --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/caramel-14B/fe1e21cb-7934-4022-a74a-777172310021.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_caramel-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "caramel-14B", + "id": "Quazim0t0/caramel-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6745 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6919 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4713 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4454 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5436 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/graphite-14b-sce/90871638-b828-484d-8822-95ffceb20909.json b/data/hfopenllm_v2/Quazim0t0/graphite-14b-sce/90871638-b828-484d-8822-95ffceb20909.json new file mode 100644 index 000000000..b45a1e117 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/graphite-14b-sce/90871638-b828-484d-8822-95ffceb20909.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_graphite-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "graphite-14b-sce", + "id": "Quazim0t0/graphite-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3217 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6631 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.528 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/mocha-14B/04a98dfb-8e96-444c-8df4-ed7cf72a26ea.json b/data/hfopenllm_v2/Quazim0t0/mocha-14B/04a98dfb-8e96-444c-8df4-ed7cf72a26ea.json new file mode 100644 index 000000000..c48422312 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/mocha-14B/04a98dfb-8e96-444c-8df4-ed7cf72a26ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_mocha-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mocha-14B", + "id": "Quazim0t0/mocha-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6895 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json b/data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json deleted file mode 100644 index 13dd858c4..000000000 --- a/data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_mocha-14B/1762652579.833622", - "retrieved_timestamp": "1762652579.833623", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/mocha-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/mocha-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5893152391210876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6894730595527842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5264350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4271770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json b/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json deleted file mode 100644 index f775c0aec..000000000 --- a/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_mosaic-14b-sce/1762652579.8338351", - "retrieved_timestamp": "1762652579.833836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/mosaic-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/mosaic-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6875590100932193 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6907089244809823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025679758308157 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45579166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/8c5c22af-f230-4d34-b80d-f42ef27e1675.json b/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/8c5c22af-f230-4d34-b80d-f42ef27e1675.json new file mode 100644 index 000000000..e02cc70c3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/8c5c22af-f230-4d34-b80d-f42ef27e1675.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_mosaic-14b-sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mosaic-14b-sce", + "id": "Quazim0t0/mosaic-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6876 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6907 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json b/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json deleted file mode 100644 index ef213ac06..000000000 --- a/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_tesseract-14b-stock/1762652579.834054", - "retrieved_timestamp": "1762652579.834055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/tesseract-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/tesseract-14b-stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5847939024011845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6880007346047826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143504531722054 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42323958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5388962765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/f3466a90-541b-4a08-a9c6-d5a79b2299b0.json b/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/f3466a90-541b-4a08-a9c6-d5a79b2299b0.json new file mode 100644 index 000000000..f5ff78824 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/f3466a90-541b-4a08-a9c6-d5a79b2299b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_tesseract-14b-stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tesseract-14b-stock", + "id": "Quazim0t0/tesseract-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5848 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.688 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json b/data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json deleted file mode 100644 index 99304a7ef..000000000 --- a/data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_time-14b-stock/1762652579.834393", - "retrieved_timestamp": "1762652579.8343942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/time-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/time-14b-stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6699235805440675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6897025970028126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/Quazim0t0/time-14b-stock/ef9ee5ae-d92b-4143-af1b-d62a7c3c7fd4.json b/data/hfopenllm_v2/Quazim0t0/time-14b-stock/ef9ee5ae-d92b-4143-af1b-d62a7c3c7fd4.json new file mode 100644 index 000000000..763d06ba9 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/time-14b-stock/ef9ee5ae-d92b-4143-af1b-d62a7c3c7fd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Quazim0t0_time-14b-stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "time-14b-stock", + "id": "Quazim0t0/time-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6699 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/QwQ-32B-Preview/859af708-ac37-4749-bc06-73d92338d1f5.json b/data/hfopenllm_v2/Qwen/QwQ-32B-Preview/859af708-ac37-4749-bc06-73d92338d1f5.json new file mode 100644 index 000000000..9072fe047 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/QwQ-32B-Preview/859af708-ac37-4749-bc06-73d92338d1f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-32B-Preview", + "id": "Qwen/QwQ-32B-Preview", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6691 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4494 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.411 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/QwQ-32B/e274380d-e0f7-47c3-afc3-e603e6cecf9e.json b/data/hfopenllm_v2/Qwen/QwQ-32B/e274380d-e0f7-47c3-afc3-e603e6cecf9e.json new file mode 100644 index 000000000..8a460b8de --- /dev/null +++ b/data/hfopenllm_v2/Qwen/QwQ-32B/e274380d-e0f7-47c3-afc3-e603e6cecf9e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-32B", + "id": "Qwen/QwQ-32B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4206 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1196 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/19810be8-ea81-4db5-9854-1830b05a5732.json b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/19810be8-ea81-4db5-9854-1830b05a5732.json new file mode 100644 index 000000000..d12f434fb --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/19810be8-ea81-4db5-9854-1830b05a5732.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-0.5B-Chat", + "id": "Qwen/Qwen1.5-0.5B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.62 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1807 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1213 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json deleted file mode 100644 index fa14ed5ef..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B-Chat/1762652579.835679", - "retrieved_timestamp": "1762652579.83568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-0.5B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-0.5B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.62 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18072713732895385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166662152036714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3837083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12125997340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B/1258c282-3672-4b42-9d4d-117568e17bf5.json b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B/1258c282-3672-4b42-9d4d-117568e17bf5.json new file mode 100644 index 000000000..b3a7d8e15 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B/1258c282-3672-4b42-9d4d-117568e17bf5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-0.5B", + "id": "Qwen/Qwen1.5-0.5B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.62 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1706 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/9b9f6e01-238e-4893-b398-4e1c83c44dfa.json b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/9b9f6e01-238e-4893-b398-4e1c83c44dfa.json new file mode 100644 index 000000000..5d7e32fc8 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/9b9f6e01-238e-4893-b398-4e1c83c44dfa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-1.8B-Chat", + "id": "Qwen/Qwen1.5-1.8B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.837 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2019 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json deleted file mode 100644 index 7656eef3d..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B-Chat/1762652579.836214", - "retrieved_timestamp": "1762652579.836215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-1.8B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-1.8B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.837 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20190982149585324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255912875735599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42596875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18035239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B/b267621b-dbba-4c4a-bb9f-fa85734d0f59.json b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B/b267621b-dbba-4c4a-bb9f-fa85734d0f59.json new file mode 100644 index 000000000..c4b4b7c05 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B/b267621b-dbba-4c4a-bb9f-fa85734d0f59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-1.8B", + "id": "Qwen/Qwen1.5-1.8B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.837 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2154 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3476 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3605 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1882 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json b/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json deleted file mode 100644 index 75c87568a..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B-Chat/1762652579.836649", - "retrieved_timestamp": "1762652579.836649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-110B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-110B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 111.21 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5938864435254014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6183800385588633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45216666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48246343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/a7e4e787-8e95-48a0-9d50-53ba9f05cd1c.json b/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/a7e4e787-8e95-48a0-9d50-53ba9f05cd1c.json new file mode 100644 index 000000000..1539af866 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/a7e4e787-8e95-48a0-9d50-53ba9f05cd1c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-110B-Chat", + "id": "Qwen/Qwen1.5-110B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 111.21 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5939 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-110B/3d39dcab-55df-4ad3-bdc8-03ae684e4390.json b/data/hfopenllm_v2/Qwen/Qwen1.5-110B/3d39dcab-55df-4ad3-bdc8-03ae684e4390.json new file mode 100644 index 000000000..3d0d4bcdf --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-110B/3d39dcab-55df-4ad3-bdc8-03ae684e4390.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-110B", + "id": "Qwen/Qwen1.5-110B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 111.21 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.247 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/1b499881-9edb-4626-a919-977393d6bef1.json b/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/1b499881-9edb-4626-a919-977393d6bef1.json new file mode 100644 index 000000000..c47867900 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/1b499881-9edb-4626-a919-977393d6bef1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-14B-Chat", + "id": "Qwen/Qwen1.5-14B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.167 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5229 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json b/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json deleted file mode 100644 index be6389ed5..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B-Chat/1762652579.837058", - "retrieved_timestamp": "1762652579.837059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-14B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-14B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.167 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47680820223673187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5228587510703555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43997916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-14B/84b8970c-6c29-4ee1-93b8-c97e4a7c4950.json b/data/hfopenllm_v2/Qwen/Qwen1.5-14B/84b8970c-6c29-4ee1-93b8-c97e4a7c4950.json new file mode 100644 index 000000000..d12634623 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-14B/84b8970c-6c29-4ee1-93b8-c97e4a7c4950.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-14B", + "id": "Qwen/Qwen1.5-14B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.167 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/2e070663-2622-4a8e-bd39-7f0ef9df399e.json b/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/2e070663-2622-4a8e-bd39-7f0ef9df399e.json new file mode 100644 index 000000000..695a36dcd --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/2e070663-2622-4a8e-bd39-7f0ef9df399e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-32B-Chat", + "id": "Qwen/Qwen1.5-32B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.512 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6067 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1956 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4457 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json b/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json deleted file mode 100644 index 7258b54ff..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B-Chat/1762652579.8374798", - "retrieved_timestamp": "1762652579.8374798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-32B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-32B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.512 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5532199009738605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066899757930234 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457280585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-32B/047fa91e-2dc7-4881-8254-3dfbd4a2ff1b.json b/data/hfopenllm_v2/Qwen/Qwen1.5-32B/047fa91e-2dc7-4881-8254-3dfbd4a2ff1b.json new file mode 100644 index 000000000..ff8b2ab29 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-32B/047fa91e-2dc7-4881-8254-3dfbd4a2ff1b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-32B", + "id": "Qwen/Qwen1.5-32B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.512 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5715 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4278 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.45 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/6d73016e-078e-4ffe-b2ae-5b829d1456df.json b/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/6d73016e-078e-4ffe-b2ae-5b829d1456df.json new file mode 100644 index 000000000..3d23aaf52 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/6d73016e-078e-4ffe-b2ae-5b829d1456df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-4B-Chat", + "id": "Qwen/Qwen1.5-4B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.95 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4006 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json b/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json deleted file mode 100644 index 8d6a30570..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B-Chat/1762652579.837912", - "retrieved_timestamp": "1762652579.837912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-4B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-4B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.95 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31566576683200576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40055485611486114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23961103723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-4B/0b68b5bd-d22c-4194-9ddf-f22e9181f84d.json b/data/hfopenllm_v2/Qwen/Qwen1.5-4B/0b68b5bd-d22c-4194-9ddf-f22e9181f84d.json new file mode 100644 index 000000000..b317e8f1c --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-4B/0b68b5bd-d22c-4194-9ddf-f22e9181f84d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-4B", + "id": "Qwen/Qwen1.5-4B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.95 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.246 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/03d51d90-fd15-42b7-ad5f-c7326cc642a7.json b/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/03d51d90-fd15-42b7-ad5f-c7326cc642a7.json new file mode 100644 index 000000000..7865c5891 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/03d51d90-fd15-42b7-ad5f-c7326cc642a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-7B-Chat", + "id": "Qwen/Qwen1.5-7B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.721 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.451 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2951 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json b/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json deleted file mode 100644 index 6b4c2212e..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B-Chat/1762652579.838321", - "retrieved_timestamp": "1762652579.838322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-7B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-7B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.721 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43711574178734647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4510053116521351 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37790624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2951296542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-7B/d3e5c939-c53a-49d6-80cd-34420dbb176a.json b/data/hfopenllm_v2/Qwen/Qwen1.5-7B/d3e5c939-c53a-49d6-80cd-34420dbb176a.json new file mode 100644 index 000000000..29ccd9046 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-7B/d3e5c939-c53a-49d6-80cd-34420dbb176a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-7B", + "id": "Qwen/Qwen1.5-7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.721 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2684 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.456 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/ab321358-26f9-4577-a5fb-1f5d4b8784b4.json b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/ab321358-26f9-4577-a5fb-1f5d4b8784b4.json new file mode 100644 index 000000000..c414d02d4 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/ab321358-26f9-4577-a5fb-1f5d4b8784b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-MoE-A2.7B-Chat", + "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json deleted file mode 100644 index 12cc958b5..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B-Chat/1762652579.838758", - "retrieved_timestamp": "1762652579.838758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37953851336675576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272088620635824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38987499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B/a43aae68-f12c-4a6d-b846-c498cf35f6cd.json b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B/a43aae68-f12c-4a6d-b846-c498cf35f6cd.json new file mode 100644 index 000000000..07658bbfa --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B/a43aae68-f12c-4a6d-b846-c498cf35f6cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-MoE-A2.7B", + "id": "Qwen/Qwen1.5-MoE-A2.7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.266 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4013 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2778 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json b/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json deleted file mode 100644 index dbdbf724b..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B-Instruct/1762652579.839177", - "retrieved_timestamp": "1762652579.839178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-0.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-0.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22466610814860127 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31725179384863494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15309175531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/b84615c0-43c4-49ec-83fe-5d3f8e6026af.json b/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/b84615c0-43c4-49ec-83fe-5d3f8e6026af.json new file mode 100644 index 000000000..d6e75e508 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/b84615c0-43c4-49ec-83fe-5d3f8e6026af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-0.5B-Instruct", + "id": "Qwen/Qwen2-0.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-0.5B/7e687d24-9e12-4ecf-b283-e222efb9473a.json b/data/hfopenllm_v2/Qwen/Qwen2-0.5B/7e687d24-9e12-4ecf-b283-e222efb9473a.json new file mode 100644 index 000000000..164002250 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-0.5B/7e687d24-9e12-4ecf-b283-e222efb9473a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-0.5B", + "id": "Qwen/Qwen2-0.5B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/4aea143c-28fd-48bb-b911-37ac3fe58220.json b/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/4aea143c-28fd-48bb-b911-37ac3fe58220.json new file mode 100644 index 000000000..29c2d61f9 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/4aea143c-28fd-48bb-b911-37ac3fe58220.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B-Instruct", + "id": "Qwen/Qwen2-1.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2501 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json b/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json deleted file mode 100644 index a2488f310..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B-Instruct/1762652579.839607", - "retrieved_timestamp": "1762652579.839607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-1.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-1.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371232773485463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3852232408376059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25008311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-1.5B/34a8daec-bfff-4cf4-9011-0542b30c1d10.json b/data/hfopenllm_v2/Qwen/Qwen2-1.5B/34a8daec-bfff-4cf4-9011-0542b30c1d10.json new file mode 100644 index 000000000..290263529 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-1.5B/34a8daec-bfff-4cf4-9011-0542b30c1d10.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-1.5B", + "id": "Qwen/Qwen2-1.5B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2552 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/3e919d7b-53db-41fb-ac93-224e2768b9c6.json b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/3e919d7b-53db-41fb-ac93-224e2768b9c6.json new file mode 100644 index 000000000..c5c5b3781 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/3e919d7b-53db-41fb-ac93-224e2768b9c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-57B-A14B-Instruct", + "id": "Qwen/Qwen2-57B-A14B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 57.409 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6338 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5888 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json deleted file mode 100644 index 6c57f1dcd..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B-Instruct/1762652579.84003", - "retrieved_timestamp": "1762652579.840031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-57B-A14B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-57B-A14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 57.409 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6337783747124297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5887606963532052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28172205438066467 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45752992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B/66becca1-d92b-409f-ab56-44d05cac66fd.json b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B/66becca1-d92b-409f-ab56-44d05cac66fd.json new file mode 100644 index 000000000..b8c0fe259 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B/66becca1-d92b-409f-ab56-44d05cac66fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-57B-A14B", + "id": "Qwen/Qwen2-57B-A14B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 57.409 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5618 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/6293b269-7c4c-44da-bd85-e51954c173a1.json b/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/6293b269-7c4c-44da-bd85-e51954c173a1.json new file mode 100644 index 000000000..f6edd8027 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/6293b269-7c4c-44da-bd85-e51954c173a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-72B-Instruct", + "id": "Qwen/Qwen2-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7989 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6977 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4177 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5403 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json b/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json deleted file mode 100644 index e2a352050..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B-Instruct/1762652579.840446", - "retrieved_timestamp": "1762652579.840447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7989168738945996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.697730968386067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4176737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4560104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-72B/add3b058-e7bc-4b7b-bb98-0d7039979072.json b/data/hfopenllm_v2/Qwen/Qwen2-72B/add3b058-e7bc-4b7b-bb98-0d7039979072.json new file mode 100644 index 000000000..fbfd29d40 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-72B/add3b058-e7bc-4b7b-bb98-0d7039979072.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-72B", + "id": "Qwen/Qwen2-72B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3824 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4704 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5731 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json b/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json deleted file mode 100644 index d6c2fec4a..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B-Instruct/1762652579.84092", - "retrieved_timestamp": "1762652579.84092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5679075962889577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5544781563793189 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38472406914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/db0b6b3f-e5a9-4367-ab87-e58d5c6ccd81.json b/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/db0b6b3f-e5a9-4367-ab87-e58d5c6ccd81.json new file mode 100644 index 000000000..14d4c60a1 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/db0b6b3f-e5a9-4367-ab87-e58d5c6ccd81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B-Instruct", + "id": "Qwen/Qwen2-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-7B/54b055d0-80ae-4bba-b729-bd77b3ec7502.json b/data/hfopenllm_v2/Qwen/Qwen2-7B/54b055d0-80ae-4bba-b729-bd77b3ec7502.json new file mode 100644 index 000000000..e3631e1e9 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-7B/54b055d0-80ae-4bba-b729-bd77b3ec7502.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B", + "id": "Qwen/Qwen2-7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3149 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json b/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json deleted file mode 100644 index d0bea4177..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-72B-Instruct/1762652579.841145", - "retrieved_timestamp": "1762652579.8411462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-Math-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-Math-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.569381463405985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.634337660025181 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45169791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42727726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/5c22d0b3-5082-4c6e-865c-71da03cf9378.json b/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/5c22d0b3-5082-4c6e-865c-71da03cf9378.json new file mode 100644 index 000000000..a24afa6b4 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/5c22d0b3-5082-4c6e-865c-71da03cf9378.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-Math-72B-Instruct", + "id": "Qwen/Qwen2-Math-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-Math-7B/f8e5ee9f-519d-4ed8-bd2a-88897075f401.json b/data/hfopenllm_v2/Qwen/Qwen2-Math-7B/f8e5ee9f-519d-4ed8-bd2a-88897075f401.json new file mode 100644 index 000000000..93ad6f156 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-Math-7B/f8e5ee9f-519d-4ed8-bd2a-88897075f401.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-Math-7B", + "id": "Qwen/Qwen2-Math-7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2687 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json b/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json deleted file mode 100644 index 077447090..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-72B-Instruct/1762652579.841569", - "retrieved_timestamp": "1762652579.8415701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-VL-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-VL-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2VLForConditionalGeneration", - "params_billions": 73.406 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5982326892644849 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6946287292338682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5717253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/b74c3215-7bd5-42d1-9193-f4c9c6a8bec2.json b/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/b74c3215-7bd5-42d1-9193-f4c9c6a8bec2.json new file mode 100644 index 000000000..30ca6ebbd --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/b74c3215-7bd5-42d1-9193-f4c9c6a8bec2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-VL-72B-Instruct", + "id": "Qwen/Qwen2-VL-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2VLForConditionalGeneration", + "params_billions": 73.406 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5982 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6946 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4492 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5717 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/27df1e06-463b-4519-87eb-a1666ad3f98c.json b/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/27df1e06-463b-4519-87eb-a1666ad3f98c.json new file mode 100644 index 000000000..276403439 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/27df1e06-463b-4519-87eb-a1666ad3f98c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-VL-7B-Instruct", + "id": "Qwen/Qwen2-VL-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2VLForConditionalGeneration", + "params_billions": 8.291 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1986 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json b/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json deleted file mode 100644 index c722e2f7c..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-7B-Instruct/1762652579.841773", - "retrieved_timestamp": "1762652579.841774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-VL-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-VL-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2VLForConditionalGeneration", - "params_billions": 8.291 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599218961245052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5464507159069989 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1986404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json deleted file mode 100644 index 213ea1b9c..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842413", - "retrieved_timestamp": "1762652579.8424141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-0.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-0.5B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31529120511354314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3321916429549138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json deleted file mode 100644 index e5e358bc9..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842189", - "retrieved_timestamp": "1762652579.84219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-0.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-0.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.307122878407071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340729214937266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33288541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16971409574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9d975b05-7bee-462d-a33a-afa0d5af94d4.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9d975b05-7bee-462d-a33a-afa0d5af94d4.json new file mode 100644 index 000000000..d78bde574 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9d975b05-7bee-462d-a33a-afa0d5af94d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct", + "id": "Qwen/Qwen2.5-0.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9ef9135a-473e-43a5-a460-fd3ec50226f9.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9ef9135a-473e-43a5-a460-fd3ec50226f9.json new file mode 100644 index 000000000..38a9f628f --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/9ef9135a-473e-43a5-a460-fd3ec50226f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct", + "id": "Qwen/Qwen2.5-0.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3071 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B/c57cae01-328e-447b-8945-e3cd2c4b8a7b.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B/c57cae01-328e-447b-8945-e3cd2c4b8a7b.json new file mode 100644 index 000000000..7aa39af3d --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B/c57cae01-328e-447b-8945-e3cd2c4b8a7b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B", + "id": "Qwen/Qwen2.5-0.5B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1627 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1906 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/494c86cf-7f37-49d8-8160-b81859552c87.json b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/494c86cf-7f37-49d8-8160-b81859552c87.json new file mode 100644 index 000000000..6d0e76598 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/494c86cf-7f37-49d8-8160-b81859552c87.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B-Instruct", + "id": "Qwen/Qwen2.5-1.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2799 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json deleted file mode 100644 index 7938b543a..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B-Instruct/1762652579.842835", - "retrieved_timestamp": "1762652579.842836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-1.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-1.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4475569267321817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4288982740422907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27992021276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B/6de5e76e-4297-4bcd-b06e-f63fa28da0e0.json b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B/6de5e76e-4297-4bcd-b06e-f63fa28da0e0.json new file mode 100644 index 000000000..86834fae0 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B/6de5e76e-4297-4bcd-b06e-f63fa28da0e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B", + "id": "Qwen/Qwen2.5-1.5B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2674 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3576 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2855 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json deleted file mode 100644 index 435b7ca7f..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct-1M/1762652579.843473", - "retrieved_timestamp": "1762652579.843473", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-14B-Instruct-1M", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-14B-Instruct-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8413564896696322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6198222551365405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4849567819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/9b10cd14-82f3-4b36-a4be-5092127d68c3.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/9b10cd14-82f3-4b36-a4be-5092127d68c3.json new file mode 100644 index 000000000..7ccfcf4c1 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/9b10cd14-82f3-4b36-a4be-5092127d68c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Instruct-1M", + "id": "Qwen/Qwen2.5-14B-Instruct-1M", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8414 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json deleted file mode 100644 index d975472d5..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct/1762652579.843263", - "retrieved_timestamp": "1762652579.843264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-14B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8157776920792386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6390453705906222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4904421542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/bbd94181-0523-4543-80a7-056b041e03b7.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/bbd94181-0523-4543-80a7-056b041e03b7.json new file mode 100644 index 000000000..ffc120545 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/bbd94181-0523-4543-80a7-056b041e03b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Instruct", + "id": "Qwen/Qwen2.5-14B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.639 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B/e10d8573-e201-460e-a931-49a1b13ceeea.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B/e10d8573-e201-460e-a931-49a1b13ceeea.json new file mode 100644 index 000000000..20c1af18c --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-14B/e10d8573-e201-460e-a931-49a1b13ceeea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B", + "id": "Qwen/Qwen2.5-14B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6161 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.29 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5249 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json b/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json deleted file mode 100644 index 482cdb6fc..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B-Instruct/1762652579.843922", - "retrieved_timestamp": "1762652579.843922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-32B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-32B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8346121623957765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6912525080134339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6253776435045317 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.566655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/e2ca9477-2414-4b8a-8d22-68f9ced54ae5.json b/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/e2ca9477-2414-4b8a-8d22-68f9ced54ae5.json new file mode 100644 index 000000000..d6d218cf5 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/e2ca9477-2414-4b8a-8d22-68f9ced54ae5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-32B-Instruct", + "id": "Qwen/Qwen2.5-32B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8346 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6913 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4261 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5667 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-32B/831246b8-5433-48e6-ba11-8a4239373106.json b/data/hfopenllm_v2/Qwen/Qwen2.5-32B/831246b8-5433-48e6-ba11-8a4239373106.json new file mode 100644 index 000000000..ba1f6cfae --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-32B/831246b8-5433-48e6-ba11-8a4239373106.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-32B", + "id": "Qwen/Qwen2.5-32B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6771 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5805 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/8277994c-8bf5-4ece-9f34-4fe9a4310bbf.json b/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/8277994c-8bf5-4ece-9f34-4fe9a4310bbf.json new file mode 100644 index 000000000..a44b6ae5b --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/8277994c-8bf5-4ece-9f34-4fe9a4310bbf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Instruct", + "id": "Qwen/Qwen2.5-3B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4693 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json b/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json deleted file mode 100644 index 950b190b9..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B-Instruct/1762652579.844352", - "retrieved_timestamp": "1762652579.844352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-3B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6474919879253713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.469276665604885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254654255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-3B/5aabc7c5-eb3a-42e0-8b40-0a08004f6e1a.json b/data/hfopenllm_v2/Qwen/Qwen2.5-3B/5aabc7c5-eb3a-42e0-8b40-0a08004f6e1a.json new file mode 100644 index 000000000..229075fd2 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-3B/5aabc7c5-eb3a-42e0-8b40-0a08004f6e1a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B", + "id": "Qwen/Qwen2.5-3B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.269 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4612 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3203 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json b/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json deleted file mode 100644 index aeaa6e2da..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B-Instruct/1762652579.844789", - "retrieved_timestamp": "1762652579.844789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.863837949972739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7272747321744824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5981873111782477 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625831117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/cbb73c83-ad94-4973-9bf5-a5e7ca4d1653.json b/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/cbb73c83-ad94-4973-9bf5-a5e7ca4d1653.json new file mode 100644 index 000000000..9fcc6ecb3 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/cbb73c83-ad94-4973-9bf5-a5e7ca4d1653.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-72B-Instruct", + "id": "Qwen/Qwen2.5-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8638 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7273 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4206 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-72B/3ed06a16-d5fe-43d3-a369-f4ed29fb3a5d.json b/data/hfopenllm_v2/Qwen/Qwen2.5-72B/3ed06a16-d5fe-43d3-a369-f4ed29fb3a5d.json new file mode 100644 index 000000000..af273092b --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-72B/3ed06a16-d5fe-43d3-a369-f4ed29fb3a5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-72B", + "id": "Qwen/Qwen2.5-72B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4137 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6797 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4771 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json deleted file mode 100644 index e95bf0163..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct-1M/1762652579.845428", - "retrieved_timestamp": "1762652579.845428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-7B-Instruct-1M", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-7B-Instruct-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7447616767953474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403941270576822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335347432024169 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35048204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/fc817789-2f44-4d2b-b40e-2422fe33d104.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/fc817789-2f44-4d2b-b40e-2422fe33d104.json new file mode 100644 index 000000000..776013deb --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/fc817789-2f44-4d2b-b40e-2422fe33d104.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-1M", + "id": "Qwen/Qwen2.5-7B-Instruct-1M", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7448 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4335 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/5e1c8723-7c43-4d8f-8c7c-386c2eb6b9cf.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/5e1c8723-7c43-4d8f-8c7c-386c2eb6b9cf.json new file mode 100644 index 000000000..66802b86b --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/5e1c8723-7c43-4d8f-8c7c-386c2eb6b9cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct", + "id": "Qwen/Qwen2.5-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5394 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json deleted file mode 100644 index abcfd30eb..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct/1762652579.845207", - "retrieved_timestamp": "1762652579.8452082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7585251576926999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394231968299095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B/b6740747-19ac-4a9c-892f-6556013ddc8b.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B/b6740747-19ac-4a9c-892f-6556013ddc8b.json new file mode 100644 index 000000000..744db06b8 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-7B/b6740747-19ac-4a9c-892f-6556013ddc8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B", + "id": "Qwen/Qwen2.5-7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4365 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/3263ab46-09ae-4c24-9332-b6874d0d0330.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/3263ab46-09ae-4c24-9332-b6874d0d0330.json new file mode 100644 index 000000000..efdce1948 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/3263ab46-09ae-4c24-9332-b6874d0d0330.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-14B-Instruct", + "id": "Qwen/Qwen2.5-Coder-14B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6908 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.614 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json deleted file mode 100644 index 6fc060fc7..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B-Instruct/1762652579.846175", - "retrieved_timestamp": "1762652579.846175", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-14B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6907560827493273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6140296423661326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B/a8706a7e-5693-4768-a955-a448549d2e77.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B/a8706a7e-5693-4768-a955-a448549d2e77.json new file mode 100644 index 000000000..b59e9c9ed --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B/a8706a7e-5693-4768-a955-a448549d2e77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-14B", + "id": "Qwen/Qwen2.5-Coder-14B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2251 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/3c932329-0440-4799-886f-10bc4a5aeb09.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/3c932329-0440-4799-886f-10bc4a5aeb09.json new file mode 100644 index 000000000..61b3a04a0 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/3c932329-0440-4799-886f-10bc4a5aeb09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-32B-Instruct", + "id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7265 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6625 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json deleted file mode 100644 index c6b08677a..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B-Instruct/1762652579.846655", - "retrieved_timestamp": "1762652579.846655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-32B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7265267268625026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6625222222405129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4385833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B/b1e42d9d-827d-4109-8d1b-182694033b21.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B/b1e42d9d-827d-4109-8d1b-182694033b21.json new file mode 100644 index 000000000..636fd2224 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B/b1e42d9d-827d-4109-8d1b-182694033b21.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-32B", + "id": "Qwen/Qwen2.5-Coder-32B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/0c6f0d92-3ee0-48d7-b3fc-70149911a51d.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/0c6f0d92-3ee0-48d7-b3fc-70149911a51d.json new file mode 100644 index 000000000..00f370a8a --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/0c6f0d92-3ee0-48d7-b3fc-70149911a51d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-7B-Instruct", + "id": "Qwen/Qwen2.5-Coder-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6147 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4999 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/73b07681-8e10-414e-8922-650908f9cf6a.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/73b07681-8e10-414e-8922-650908f9cf6a.json new file mode 100644 index 000000000..f33929085 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/73b07681-8e10-414e-8922-650908f9cf6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-7B-Instruct", + "id": "Qwen/Qwen2.5-Coder-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6101 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json deleted file mode 100644 index d6b159ce3..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.847122", - "retrieved_timestamp": "1762652579.847123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6101477413263474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007976986224548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351894946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json deleted file mode 100644 index 1d2a97cbc..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.8473449", - "retrieved_timestamp": "1762652579.8473458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-7B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6147189457306613 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4999048550311305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4099375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33543882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B/8b1549f8-0602-4538-842c-abe9dca7baff.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B/8b1549f8-0602-4538-842c-abe9dca7baff.json new file mode 100644 index 000000000..db8e13ac7 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B/8b1549f8-0602-4538-842c-abe9dca7baff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-7B", + "id": "Qwen/Qwen2.5-Coder-7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4856 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json deleted file mode 100644 index 2d48e189e..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-1.5B-Instruct/1762652579.84755", - "retrieved_timestamp": "1762652579.84755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-1.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-1.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1855731680829089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37515353898426174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2628398791540785 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1801030585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/ad395ad4-0f9f-4b49-83c9-b89fa6b6dd89.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/ad395ad4-0f9f-4b49-83c9-b89fa6b6dd89.json new file mode 100644 index 000000000..376ff5457 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/ad395ad4-0f9f-4b49-83c9-b89fa6b6dd89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-1.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-1.5B-Instruct", + "id": "Qwen/Qwen2.5-Math-1.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2628 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1801 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/14c01681-fbef-49c4-b737-a7baaa02d393.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/14c01681-fbef-49c4-b737-a7baaa02d393.json new file mode 100644 index 000000000..0366bc367 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/14c01681-fbef-49c4-b737-a7baaa02d393.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-72B-Instruct", + "id": "Qwen/Qwen2.5-Math-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json deleted file mode 100644 index 376cf9919..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-72B-Instruct/1762652579.847774", - "retrieved_timestamp": "1762652579.847775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003466358151926 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6452266637803764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6238670694864048 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44727083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4812167553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/3ad495c0-da8e-4776-8d05-bc7dce1fe120.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/3ad495c0-da8e-4776-8d05-bc7dce1fe120.json new file mode 100644 index 000000000..c41423d24 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/3ad495c0-da8e-4776-8d05-bc7dce1fe120.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-7B-Instruct", + "id": "Qwen/Qwen2.5-Math-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.282 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json deleted file mode 100644 index f1ce12d77..000000000 --- a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B-Instruct/1762652579.848376", - "retrieved_timestamp": "1762652579.848377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26358395723347383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438762734452786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5808157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2819980053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B/0762ca9e-f0d4-408e-9992-e91a10e0e65f.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B/0762ca9e-f0d4-408e-9992-e91a10e0e65f.json new file mode 100644 index 000000000..e97af5279 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B/0762ca9e-f0d4-408e-9992-e91a10e0e65f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-7B", + "id": "Qwen/Qwen2.5-Math-7B", + "developer": "Qwen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4455 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/ec6c1d05-cea7-445c-bed3-9eee1e1ff03d.json b/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/ec6c1d05-cea7-445c-bed3-9eee1e1ff03d.json new file mode 100644 index 000000000..8ed8e2c46 --- /dev/null +++ b/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/ec6c1d05-cea7-445c-bed3-9eee1e1ff03d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RDson_WomboCombo-R1-Coder-14B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WomboCombo-R1-Coder-14B-Preview", + "id": "RDson/WomboCombo-R1-Coder-14B-Preview", + "developer": "RDson", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6286 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4844 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json b/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json deleted file mode 100644 index 465b04344..000000000 --- a/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RDson_WomboCombo-R1-Coder-14B-Preview/1762652579.848609", - "retrieved_timestamp": "1762652579.8486102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RDson/WomboCombo-R1-Coder-14B-Preview", - "developer": "RDson", - "inference_platform": "unknown", - "id": "RDson/WomboCombo-R1-Coder-14B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.628557782240012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392098699331132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5989425981873112 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4843854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167885638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/1fc39812-77fb-4d0c-b9fb-706e94c40afe.json b/data/hfopenllm_v2/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/1fc39812-77fb-4d0c-b9fb-706e94c40afe.json new file mode 100644 index 000000000..44c5b7786 --- /dev/null +++ b/data/hfopenllm_v2/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/1fc39812-77fb-4d0c-b9fb-706e94c40afe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-Qwen2.5-1.5B-FRFR", + "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", + "developer": "RESMPDEV", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3932 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3539 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.277 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RESMPDEV/Qwen2-Wukong-0.5B/fdc3c502-53ad-4bf7-85ce-51eaed72754b.json b/data/hfopenllm_v2/RESMPDEV/Qwen2-Wukong-0.5B/fdc3c502-53ad-4bf7-85ce-51eaed72754b.json new file mode 100644 index 000000000..7c90e36d8 --- /dev/null +++ b/data/hfopenllm_v2/RESMPDEV/Qwen2-Wukong-0.5B/fdc3c502-53ad-4bf7-85ce-51eaed72754b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RESMPDEV_Qwen2-Wukong-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-Wukong-0.5B", + "id": "RESMPDEV/Qwen2-Wukong-0.5B", + "developer": "RESMPDEV", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1854 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3085 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3525 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RLHFlow/ArmoRM-Llama3-8B-v0.1/3f74c1c7-f349-4193-95cf-b0033112fea0.json b/data/hfopenllm_v2/RLHFlow/ArmoRM-Llama3-8B-v0.1/3f74c1c7-f349-4193-95cf-b0033112fea0.json new file mode 100644 index 000000000..0705593b8 --- /dev/null +++ b/data/hfopenllm_v2/RLHFlow/ArmoRM-Llama3-8B-v0.1/3f74c1c7-f349-4193-95cf-b0033112fea0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RLHFlow_ArmoRM-Llama3-8B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArmoRM-Llama3-8B-v0.1", + "id": "RLHFlow/ArmoRM-Llama3-8B-v0.1", + "developer": "RLHFlow", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForRewardModelWithGating", + "params_billions": 7.511 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1897 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2876 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3948 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1078 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/36a803da-83ab-4c49-8855-9344aaa7a68b.json b/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/36a803da-83ab-4c49-8855-9344aaa7a68b.json new file mode 100644 index 000000000..49dd63ad0 --- /dev/null +++ b/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/36a803da-83ab-4c49-8855-9344aaa7a68b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RLHFlow_LLaMA3-iterative-DPO-final/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMA3-iterative-DPO-final", + "id": "RLHFlow/LLaMA3-iterative-DPO-final", + "developer": "RLHFlow", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.534 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3257 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json b/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json deleted file mode 100644 index 08fe1bcc3..000000000 --- a/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RLHFlow_LLaMA3-iterative-DPO-final/1762652579.849687", - "retrieved_timestamp": "1762652579.849688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RLHFlow/LLaMA3-iterative-DPO-final", - "developer": "RLHFlow", - "inference_platform": "unknown", - "id": "RLHFlow/LLaMA3-iterative-DPO-final", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53401086893886 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058257182733729 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32571476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json b/data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json deleted file mode 100644 index cd052f3d0..000000000 --- a/data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RWKV_rwkv-raven-14b/1762652579.849975", - "retrieved_timestamp": "1762652579.849976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RWKV/rwkv-raven-14b", - "developer": "RWKV", - "inference_platform": "unknown", - "id": "RWKV/rwkv-raven-14b", - "additional_details": { - "precision": "float16", - "architecture": "RwkvForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07683723631076655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307041176552897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22902684563758388 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11502659574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/RWKV/rwkv-raven-14b/df986996-249e-49f9-b074-91e8dcdf62e2.json b/data/hfopenllm_v2/RWKV/rwkv-raven-14b/df986996-249e-49f9-b074-91e8dcdf62e2.json new file mode 100644 index 000000000..608ee4b4e --- /dev/null +++ b/data/hfopenllm_v2/RWKV/rwkv-raven-14b/df986996-249e-49f9-b074-91e8dcdf62e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RWKV_rwkv-raven-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rwkv-raven-14b", + "id": "RWKV/rwkv-raven-14b", + "developer": "RWKV", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "RwkvForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3307 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.229 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json b/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json deleted file mode 100644 index 4babd1d28..000000000 --- a/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-2.0-mini-instruct/1762652579.850244", - "retrieved_timestamp": "1762652579.850244", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Rakuten/RakutenAI-2.0-mini-instruct", - "developer": "Rakuten", - "inference_platform": "unknown", - "id": "Rakuten/RakutenAI-2.0-mini-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 1.535 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6793906833867471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2867197270809481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/90f007e9-e323-4a82-b276-ac1b928030ca.json b/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/90f007e9-e323-4a82-b276-ac1b928030ca.json new file mode 100644 index 000000000..559b245ad --- /dev/null +++ b/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/90f007e9-e323-4a82-b276-ac1b928030ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-2.0-mini-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RakutenAI-2.0-mini-instruct", + "id": "Rakuten/RakutenAI-2.0-mini-instruct", + "developer": "Rakuten", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 1.535 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6794 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2867 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/2b627f93-5cc7-4a5e-b682-d129396362e5.json b/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/2b627f93-5cc7-4a5e-b682-d129396362e5.json new file mode 100644 index 000000000..29a9ba71c --- /dev/null +++ b/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/2b627f93-5cc7-4a5e-b682-d129396362e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RakutenAI-7B-chat", + "id": "Rakuten/RakutenAI-7B-chat", + "developer": "Rakuten", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.373 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2686 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json b/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json deleted file mode 100644 index fee0e2c1a..000000000 --- a/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B-chat/1762652579.850715", - "retrieved_timestamp": "1762652579.8507159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Rakuten/RakutenAI-7B-chat", - "developer": "Rakuten", - "inference_platform": "unknown", - "id": "Rakuten/RakutenAI-7B-chat", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.373 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26855521128383797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316204035758174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37895833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2798371010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-7B/2fde07ac-d218-4cc6-947e-8ceb87eedbee.json b/data/hfopenllm_v2/Rakuten/RakutenAI-7B/2fde07ac-d218-4cc6-947e-8ceb87eedbee.json new file mode 100644 index 000000000..7c3ef32cc --- /dev/null +++ b/data/hfopenllm_v2/Rakuten/RakutenAI-7B/2fde07ac-d218-4cc6-947e-8ceb87eedbee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RakutenAI-7B", + "id": "Rakuten/RakutenAI-7B", + "developer": "Rakuten", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.373 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2877 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json b/data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json deleted file mode 100644 index a79f38553..000000000 --- a/data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B/1762652579.8505", - "retrieved_timestamp": "1762652579.850501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Rakuten/RakutenAI-7B", - "developer": "Rakuten", - "inference_platform": "unknown", - "id": "Rakuten/RakutenAI-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.373 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555971488982566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43149052613615435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28773271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/2a141bfe-4632-4058-a232-1f2c5540c41f.json b/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/2a141bfe-4632-4058-a232-1f2c5540c41f.json new file mode 100644 index 000000000..4c2435507 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/2a141bfe-4632-4058-a232-1f2c5540c41f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_L3-Pneuma-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Pneuma-8B", + "id": "Replete-AI/L3-Pneuma-8B", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3176 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json b/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json deleted file mode 100644 index 334df8656..000000000 --- a/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_L3-Pneuma-8B/1762652579.85093", - "retrieved_timestamp": "1762652579.850931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/L3-Pneuma-8B", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/L3-Pneuma-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24132745559559746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908680380935449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3175698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json b/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json deleted file mode 100644 index 20dc34cb7..000000000 --- a/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_L3.1-Pneuma-8B/1762652579.851203", - "retrieved_timestamp": "1762652579.8512042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/L3.1-Pneuma-8B", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/L3.1-Pneuma-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.707642388861554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504990389092237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36909906914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/fa2d74a5-e8f6-4a1c-9310-a9b16c2e59d1.json b/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/fa2d74a5-e8f6-4a1c-9310-a9b16c2e59d1.json new file mode 100644 index 000000000..dc4c32906 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/fa2d74a5-e8f6-4a1c-9310-a9b16c2e59d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_L3.1-Pneuma-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Pneuma-8B", + "id": "Replete-AI/L3.1-Pneuma-8B", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3691 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json b/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json deleted file mode 100644 index 6e91f5b65..000000000 --- a/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/1762652579.8514109", - "retrieved_timestamp": "1762652579.851412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6915306941138402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48702618293318983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36339583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390957446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/c7c0ceff-9273-4cc3-8f8e-bd93181590ba.json b/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/c7c0ceff-9273-4cc3-8f8e-bd93181590ba.json new file mode 100644 index 000000000..1449f9a9c --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/c7c0ceff-9273-4cc3-8f8e-bd93181590ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-8B-Instruct-Replete-Adapted", + "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6915 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3634 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json deleted file mode 100644 index 80ab5d793..000000000 --- a/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Instruct-8b-Merged/1762652579.851615", - "retrieved_timestamp": "1762652579.851616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-Coder-Instruct-8b-Merged", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387571643239937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461693860075828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051861702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/c439478a-1734-4038-aa8b-bb2d12ec022d.json b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/c439478a-1734-4038-aa8b-bb2d12ec022d.json new file mode 100644 index 000000000..f5afd447c --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/c439478a-1734-4038-aa8b-bb2d12ec022d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Instruct-8b-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-Coder-Instruct-8b-Merged", + "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-Coder-Llama3-8B/4a36f73a-9495-4ea2-863c-220b8ca6bf99.json b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Llama3-8B/4a36f73a-9495-4ea2-863c-220b8ca6bf99.json new file mode 100644 index 000000000..c28e41a33 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Llama3-8B/4a36f73a-9495-4ea2-863c-220b8ca6bf99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Llama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-Coder-Llama3-8B", + "id": "Replete-AI/Replete-Coder-Llama3-8B", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-Coder-Qwen2-1.5b/faa9d3b9-343a-4a9e-82c5-6bc81bc87b9c.json b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Qwen2-1.5b/faa9d3b9-343a-4a9e-82c5-6bc81bc87b9c.json new file mode 100644 index 000000000..5845dc173 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Qwen2-1.5b/faa9d3b9-343a-4a9e-82c5-6bc81bc87b9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Qwen2-1.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-Coder-Qwen2-1.5b", + "id": "Replete-AI/Replete-Coder-Qwen2-1.5b", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/a55bf380-d567-4228-b30c-57e9df31e844.json b/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/a55bf380-d567-4228-b30c-57e9df31e844.json new file mode 100644 index 000000000..fcb0416cd --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/a55bf380-d567-4228-b30c-57e9df31e844.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-LLM-Qwen2-7b", + "id": "Replete-AI/Replete-LLM-Qwen2-7b", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0932 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2977 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3941 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/dfd92311-4f3d-4355-8ccf-a59f29914b8f.json b/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/dfd92311-4f3d-4355-8ccf-a59f29914b8f.json new file mode 100644 index 000000000..2013766f3 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b/dfd92311-4f3d-4355-8ccf-a59f29914b8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-LLM-Qwen2-7b", + "id": "Replete-AI/Replete-LLM-Qwen2-7b", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2985 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/d98e190e-5b5f-46eb-b701-e32d2dbef3a0.json b/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/d98e190e-5b5f-46eb-b701-e32d2dbef3a0.json new file mode 100644 index 000000000..43cd838cf --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/d98e190e-5b5f-46eb-b701-e32d2dbef3a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-LLM-Qwen2-7b_Beta-Preview", + "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0858 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2929 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/32edb764-2a42-4efe-ac86-9eda81942b84.json b/data/hfopenllm_v2/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/32edb764-2a42-4efe-ac86-9eda81942b84.json new file mode 100644 index 000000000..f78d9e7b1 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/32edb764-2a42-4efe-ac86-9eda81942b84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-LLM-V2-Llama-3.1-8b", + "id": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", + "developer": "Replete-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1405 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/36855ebd-2030-4d5d-9c42-ca049244e694.json b/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/36855ebd-2030-4d5d-9c42-ca049244e694.json new file mode 100644 index 000000000..bf5d8960d --- /dev/null +++ b/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/36855ebd-2030-4d5d-9c42-ca049244e694.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JAJUKA-WEWILLNEVERFORGETYOU-3B", + "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", + "developer": "RezVortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6858 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4619 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.363 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json b/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json deleted file mode 100644 index 859a20861..000000000 --- a/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652579.853197", - "retrieved_timestamp": "1762652579.853197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "developer": "RezVortex", - "inference_platform": "unknown", - "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858103166265509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46189139399865614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36302083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/RezVortex/Jajuka-3b/9651a0a1-4004-42f3-ad8f-2aebb38ec967.json b/data/hfopenllm_v2/RezVortex/Jajuka-3b/9651a0a1-4004-42f3-ad8f-2aebb38ec967.json new file mode 100644 index 000000000..797c96c6d --- /dev/null +++ b/data/hfopenllm_v2/RezVortex/Jajuka-3b/9651a0a1-4004-42f3-ad8f-2aebb38ec967.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RezVortex_Jajuka-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jajuka-3b", + "id": "RezVortex/Jajuka-3b", + "developer": "RezVortex", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4594 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json b/data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json deleted file mode 100644 index 7c6ed8615..000000000 --- a/data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RezVortex_Jajuka-3b/1762652579.85344", - "retrieved_timestamp": "1762652579.853441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RezVortex/Jajuka-3b", - "developer": "RezVortex", - "inference_platform": "unknown", - "id": "RezVortex/Jajuka-3b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6925047762159957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4593872338446621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json deleted file mode 100644 index bfde64932..000000000 --- a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-DARE-0/1762652579.8536398", - "retrieved_timestamp": "1762652579.853641", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ro-xe/FMixIA-7B-DARE-0", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-7B-DARE-0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341256754300811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5035332799973222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45448958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3016123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/a59e55dc-e2b5-43be-8469-49eee0e98d55.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/a59e55dc-e2b5-43be-8469-49eee0e98d55.json new file mode 100644 index 000000000..b2648eb78 --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/a59e55dc-e2b5-43be-8469-49eee0e98d55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-DARE-0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FMixIA-7B-DARE-0", + "id": "Ro-xe/FMixIA-7B-DARE-0", + "developer": "Ro-xe", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5035 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3016 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json deleted file mode 100644 index 7e70bcbf7..000000000 --- a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-SLERP-27/1762652579.853882", - "retrieved_timestamp": "1762652579.8538828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ro-xe/FMixIA-7B-SLERP-27", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-7B-SLERP-27", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765409114482905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150591725181265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44115624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30078125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/a956e306-f184-4dbc-ac7a-3793ae735801.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/a956e306-f184-4dbc-ac7a-3793ae735801.json new file mode 100644 index 000000000..f8798e128 --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/a956e306-f184-4dbc-ac7a-3793ae735801.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-SLERP-27/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FMixIA-7B-SLERP-27", + "id": "Ro-xe/FMixIA-7B-SLERP-27", + "developer": "Ro-xe", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3008 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json deleted file mode 100644 index 3af129076..000000000 --- a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-TIES-1/1762652579.8540852", - "retrieved_timestamp": "1762652579.8540852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ro-xe/FMixIA-7B-TIES-1", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-7B-TIES-1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34529160405501846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091539642456672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/c05cc6ce-12fd-491d-b41b-57cc14b6d34a.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/c05cc6ce-12fd-491d-b41b-57cc14b6d34a.json new file mode 100644 index 000000000..1a47f9d40 --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/c05cc6ce-12fd-491d-b41b-57cc14b6d34a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-TIES-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FMixIA-7B-TIES-1", + "id": "Ro-xe/FMixIA-7B-TIES-1", + "developer": "Ro-xe", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4689 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2992 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json b/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json deleted file mode 100644 index de5948a33..000000000 --- a/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/1762652579.8542862", - "retrieved_timestamp": "1762652579.8542871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.141 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19401632113902223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087851148631056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41703124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36569148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/415875b7-fe10-47e7-aca0-029c2f51c067.json b/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/415875b7-fe10-47e7-aca0-029c2f51c067.json new file mode 100644 index 000000000..2f038af4f --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/415875b7-fe10-47e7-aca0-029c2f51c067.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FMixIA-FrankenMerge-9.5B-PT-9", + "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", + "developer": "Ro-xe", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.141 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.194 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3657 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/c505ee64-3d3b-48e2-9c8a-f59609a758e9.json b/data/hfopenllm_v2/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/c505ee64-3d3b-48e2-9c8a-f59609a758e9.json new file mode 100644 index 000000000..92335ffa4 --- /dev/null +++ b/data/hfopenllm_v2/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/c505ee64-3d3b-48e2-9c8a-f59609a758e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombo-LLM-V2.5-Qwen-7b", + "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", + "developer": "Rombo-Org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4283 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/00003185-c291-40c5-bba1-f87eae0afc08.json b/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/00003185-c291-40c5-bba1-f87eae0afc08.json new file mode 100644 index 000000000..14bcc9da2 --- /dev/null +++ b/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/00003185-c291-40c5-bba1-f87eae0afc08.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LogoS-7Bx2-MoE-13B-v0.2", + "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", + "developer": "RubielLabarta", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3088 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json b/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json deleted file mode 100644 index 4b072007a..000000000 --- a/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/1762652579.85476", - "retrieved_timestamp": "1762652579.85476", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", - "developer": "RubielLabarta", - "inference_platform": "unknown", - "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378903531518593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206958722481815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087599734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/328f61d7-677b-4a06-b464-0da42153f9ae.json b/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/328f61d7-677b-4a06-b464-0da42153f9ae.json new file mode 100644 index 000000000..2ee369a00 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/328f61d7-677b-4a06-b464-0da42153f9ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Evil-Alpaca-3B-L3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Evil-Alpaca-3B-L3.2", + "id": "SaisExperiments/Evil-Alpaca-3B-L3.2", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3251 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2621 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json b/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json deleted file mode 100644 index 3b947f14f..000000000 --- a/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Evil-Alpaca-3B-L3.2/1762652579.8550148", - "retrieved_timestamp": "1762652579.8550148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SaisExperiments/Evil-Alpaca-3B-L3.2", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/Evil-Alpaca-3B-L3.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32510848991786234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4340757699220565 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2621343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json deleted file mode 100644 index c24cd0eeb..000000000 --- a/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Opus-Instruct/1762652579.855459", - "retrieved_timestamp": "1762652579.8554602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SaisExperiments/Gemma-2-2B-Opus-Instruct", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.474959773401242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4292846281445681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4056875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2650432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/9cb5b8fd-062c-4161-9301-640980d21b9f.json b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/9cb5b8fd-062c-4161-9301-640980d21b9f.json new file mode 100644 index 000000000..b9f60ffdb --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/9cb5b8fd-062c-4161-9301-640980d21b9f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Opus-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-2B-Opus-Instruct", + "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4057 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Stheno-Filtered/09284b75-a2f9-40ea-8135-7aa61c626fa2.json b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Stheno-Filtered/09284b75-a2f9-40ea-8135-7aa61c626fa2.json new file mode 100644 index 000000000..cef011e36 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Stheno-Filtered/09284b75-a2f9-40ea-8135-7aa61c626fa2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Stheno-Filtered/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-2B-Stheno-Filtered", + "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4149 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.263 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json b/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json deleted file mode 100644 index 6f788446c..000000000 --- a/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Not-So-Small-Alpaca-24B/1762652579.855924", - "retrieved_timestamp": "1762652579.855925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SaisExperiments/Not-So-Small-Alpaca-24B", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/Not-So-Small-Alpaca-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6243611395541607 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338637679203099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42816666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36943151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/e2502331-6ac3-43bc-8218-259b44333283.json b/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/e2502331-6ac3-43bc-8218-259b44333283.json new file mode 100644 index 000000000..f23479676 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/e2502331-6ac3-43bc-8218-259b44333283.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Not-So-Small-Alpaca-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Not-So-Small-Alpaca-24B", + "id": "SaisExperiments/Not-So-Small-Alpaca-24B", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6244 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3694 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/8dde454d-aa48-4ee1-b5c6-f3353087d492.json b/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/8dde454d-aa48-4ee1-b5c6-f3353087d492.json new file mode 100644 index 000000000..858e45e66 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/8dde454d-aa48-4ee1-b5c6-f3353087d492.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SaisExperiments_QwOwO-7B-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwOwO-7B-V1", + "id": "SaisExperiments/QwOwO-7B-V1", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4556 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json b/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json deleted file mode 100644 index 55ae125ed..000000000 --- a/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SaisExperiments_QwOwO-7B-V1/1762652579.856126", - "retrieved_timestamp": "1762652579.856126", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SaisExperiments/QwOwO-7B-V1", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/QwOwO-7B-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45562551806983254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431230107025949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42237367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/SaisExperiments/RightSheep-Llama3.2-3B/662c8ed2-2407-4606-ac1e-ec7ade185d2d.json b/data/hfopenllm_v2/SaisExperiments/RightSheep-Llama3.2-3B/662c8ed2-2407-4606-ac1e-ec7ade185d2d.json new file mode 100644 index 000000000..8ded05543 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/RightSheep-Llama3.2-3B/662c8ed2-2407-4606-ac1e-ec7ade185d2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SaisExperiments_RightSheep-Llama3.2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RightSheep-Llama3.2-3B", + "id": "SaisExperiments/RightSheep-Llama3.2-3B", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4156 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.254 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Anemoi-3B/332aef8c-7c62-463e-ba3c-07ae0205d457.json b/data/hfopenllm_v2/Sakalti/Anemoi-3B/332aef8c-7c62-463e-ba3c-07ae0205d457.json new file mode 100644 index 000000000..311f81095 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Anemoi-3B/332aef8c-7c62-463e-ba3c-07ae0205d457.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Anemoi-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Anemoi-3B", + "id": "Sakalti/Anemoi-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4922 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json b/data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json deleted file mode 100644 index 9d76286e7..000000000 --- a/data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Anemoi-3B/1762652579.856576", - "retrieved_timestamp": "1762652579.856576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Anemoi-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Anemoi-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803629924156793 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4921954661921298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Euphrates-14B/cfdfcf21-e445-430e-a295-946cb8c3fce9.json b/data/hfopenllm_v2/Sakalti/Euphrates-14B/cfdfcf21-e445-430e-a295-946cb8c3fce9.json new file mode 100644 index 000000000..228187fd8 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Euphrates-14B/cfdfcf21-e445-430e-a295-946cb8c3fce9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Euphrates-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Euphrates-14B", + "id": "Sakalti/Euphrates-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2647 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6138 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5255 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json b/data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json deleted file mode 100644 index af4717529..000000000 --- a/data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Euphrates-14B/1762652579.856813", - "retrieved_timestamp": "1762652579.8568141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Euphrates-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Euphrates-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26468326263203856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6137691668744961 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45157291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Llama3.2-3B-Uranus-1/a5606b92-aa2d-44e3-a92c-47d0b38fef9c.json b/data/hfopenllm_v2/Sakalti/Llama3.2-3B-Uranus-1/a5606b92-aa2d-44e3-a92c-47d0b38fef9c.json new file mode 100644 index 000000000..077b4b960 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Llama3.2-3B-Uranus-1/a5606b92-aa2d-44e3-a92c-47d0b38fef9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Llama3.2-3B-Uranus-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2-3B-Uranus-1", + "id": "Sakalti/Llama3.2-3B-Uranus-1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4437 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1495 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/465d473c-ef28-4725-8cac-02f2a031b22c.json b/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/465d473c-ef28-4725-8cac-02f2a031b22c.json new file mode 100644 index 000000000..748ddb98f --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/465d473c-ef28-4725-8cac-02f2a031b22c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Magro-7B-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magro-7B-v1.1", + "id": "Sakalti/Magro-7B-v1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json b/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json deleted file mode 100644 index a179e8fae..000000000 --- a/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Magro-7B-v1.1/1762652579.857256", - "retrieved_timestamp": "1762652579.857256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Magro-7B-v1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Magro-7B-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1204016454119514 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41790625208343796 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27642952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Neptuno-3B/2c636544-8676-4eee-8bcd-d623be0275be.json b/data/hfopenllm_v2/Sakalti/Neptuno-3B/2c636544-8676-4eee-8bcd-d623be0275be.json new file mode 100644 index 000000000..6e3a0d4d0 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Neptuno-3B/2c636544-8676-4eee-8bcd-d623be0275be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neptuno-3B", + "id": "Sakalti/Neptuno-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4296 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2553 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3773 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json b/data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json deleted file mode 100644 index c9ab4eba2..000000000 --- a/data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-3B/1762652579.857454", - "retrieved_timestamp": "1762652579.857455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Neptuno-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Neptuno-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42962229107656574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48335808848564965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40019791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3773271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json b/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json deleted file mode 100644 index e884eb784..000000000 --- a/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-Alpha/1762652579.857697", - "retrieved_timestamp": "1762652579.857698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Neptuno-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Neptuno-Alpha", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779649108809071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49247749379461303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/8b332fac-1cfa-498b-853a-52ec5492ddc7.json b/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/8b332fac-1cfa-498b-853a-52ec5492ddc7.json new file mode 100644 index 000000000..c98619fdf --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/8b332fac-1cfa-498b-853a-52ec5492ddc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neptuno-Alpha", + "id": "Sakalti/Neptuno-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4925 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Oxyge1-33B/2bf1b38b-e90b-4fa8-b19e-47d93ff9ab4e.json b/data/hfopenllm_v2/Sakalti/Oxyge1-33B/2bf1b38b-e90b-4fa8-b19e-47d93ff9ab4e.json new file mode 100644 index 000000000..eb5bc1d97 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Oxyge1-33B/2bf1b38b-e90b-4fa8-b19e-47d93ff9ab4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Oxyge1-33B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Oxyge1-33B", + "id": "Sakalti/Oxyge1-33B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5909 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json b/data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json deleted file mode 100644 index 8db19b640..000000000 --- a/data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Oxyge1-33B/1762652579.8578959", - "retrieved_timestamp": "1762652579.857897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Oxyge1-33B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Oxyge1-33B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4548265269484966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7033278292161169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007812500000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5909242021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Phi3.5-Comets-3.8B/69bb0243-75b2-4858-ba6b-5e70cfb516a7.json b/data/hfopenllm_v2/Sakalti/Phi3.5-Comets-3.8B/69bb0243-75b2-4858-ba6b-5e70cfb516a7.json new file mode 100644 index 000000000..0dfa27332 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Phi3.5-Comets-3.8B/69bb0243-75b2-4858-ba6b-5e70cfb516a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Phi3.5-Comets-3.8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi3.5-Comets-3.8B", + "id": "Sakalti/Phi3.5-Comets-3.8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2094 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3335 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/4bb7e325-8741-4c09-81f6-9efdb30ef5a5.json b/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/4bb7e325-8741-4c09-81f6-9efdb30ef5a5.json new file mode 100644 index 000000000..6c0cb747d --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/4bb7e325-8741-4c09-81f6-9efdb30ef5a5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Qwen2.5-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1B-Instruct", + "id": "Sakalti/Qwen2.5-1B-Instruct", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.988 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3027 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1213 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json b/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json deleted file mode 100644 index d0d3af8c3..000000000 --- a/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Qwen2.5-1B-Instruct/1762652579.858331", - "retrieved_timestamp": "1762652579.858331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Qwen2.5-1B-Instruct", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Qwen2.5-1B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.988 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17513198313807365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30271528035563927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33688541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12134308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/QwenTest-7/87878b74-22ce-4554-914c-03e486d13de3.json b/data/hfopenllm_v2/Sakalti/QwenTest-7/87878b74-22ce-4554-914c-03e486d13de3.json new file mode 100644 index 000000000..5e4bcbff5 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/QwenTest-7/87878b74-22ce-4554-914c-03e486d13de3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_QwenTest-7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenTest-7", + "id": "Sakalti/QwenTest-7", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.988 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3063 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1212 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-0.5B/5030f8d4-f216-4f78-84f1-dd03b0324bb0.json b/data/hfopenllm_v2/Sakalti/SJT-0.5B/5030f8d4-f216-4f78-84f1-dd03b0324bb0.json new file mode 100644 index 000000000..8cfc9ba99 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-0.5B/5030f8d4-f216-4f78-84f1-dd03b0324bb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-0.5B", + "id": "Sakalti/SJT-0.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1891 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json b/data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json deleted file mode 100644 index 0118f9422..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-0.5B/1762652579.858787", - "retrieved_timestamp": "1762652579.858787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-0.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-0.5B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24247662867857286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33055365550588683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31958333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907912234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/c5e244fd-e85e-4fbb-9703-b8e733fb91bf.json b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/c5e244fd-e85e-4fbb-9703-b8e733fb91bf.json new file mode 100644 index 000000000..b6b2e229c --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/c5e244fd-e85e-4fbb-9703-b8e733fb91bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha-1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-1.5B-Alpha-1.1", + "id": "Sakalti/SJT-1.5B-Alpha-1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3439 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2966 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json deleted file mode 100644 index 1ff9993a6..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha-1.1/1762652579.859199", - "retrieved_timestamp": "1762652579.859199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-1.5B-Alpha-1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-1.5B-Alpha-1.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439429602344003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4243160272518483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42391666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.296625664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json deleted file mode 100644 index d293cf3bb..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha/1762652579.858988", - "retrieved_timestamp": "1762652579.858989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-1.5B-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-1.5B-Alpha", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448671746521452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4240819448548446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961269946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/38261a01-62df-42b2-9b1d-f924598e70ef.json b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/38261a01-62df-42b2-9b1d-f924598e70ef.json new file mode 100644 index 000000000..b438fe370 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/38261a01-62df-42b2-9b1d-f924598e70ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-1.5B-Alpha", + "id": "Sakalti/SJT-1.5B-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.7B/5736f0b5-3903-4774-a84a-c3db260d36e4.json b/data/hfopenllm_v2/Sakalti/SJT-1.7B/5736f0b5-3903-4774-a84a-c3db260d36e4.json new file mode 100644 index 000000000..b6d4f8db5 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-1.7B/5736f0b5-3903-4774-a84a-c3db260d36e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-1.7B", + "id": "Sakalti/SJT-1.7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.684 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1776 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2934 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json b/data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json deleted file mode 100644 index f051e3a4a..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.7B/1762652579.859416", - "retrieved_timestamp": "1762652579.8594172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-1.7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-1.7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.684 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17762980004166723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2934008926922806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json b/data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json deleted file mode 100644 index a2ae4e2ef..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-14B/1762652579.8596292", - "retrieved_timestamp": "1762652579.85963", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494233079340594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6536135646865123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-14B/70134d58-972e-49c9-8cde-4ba2691d3dc3.json b/data/hfopenllm_v2/Sakalti/SJT-14B/70134d58-972e-49c9-8cde-4ba2691d3dc3.json new file mode 100644 index 000000000..ff7d304d7 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-14B/70134d58-972e-49c9-8cde-4ba2691d3dc3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-14B", + "id": "Sakalti/SJT-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5494 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json b/data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json deleted file mode 100644 index 6910cac59..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2.4B/1762652579.859841", - "retrieved_timestamp": "1762652579.859841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-2.4B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-2.4B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.432 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28042039566128985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.349012395546882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36990624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1858377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-2.4B/d4bb1440-2064-4752-bcb3-c9cec234fd1b.json b/data/hfopenllm_v2/Sakalti/SJT-2.4B/d4bb1440-2064-4752-bcb3-c9cec234fd1b.json new file mode 100644 index 000000000..b6553ae79 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-2.4B/d4bb1440-2064-4752-bcb3-c9cec234fd1b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2.4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-2.4B", + "id": "Sakalti/SJT-2.4B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.432 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2804 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/d9e6059e-d20b-4465-b7ba-2ee3a72562b6.json b/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/d9e6059e-d20b-4465-b7ba-2ee3a72562b6.json new file mode 100644 index 000000000..d24b4279e --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/d9e6059e-d20b-4465-b7ba-2ee3a72562b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-24B-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-24B-Alpha", + "id": "Sakalti/SJT-24B-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 24.125 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6081 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.253 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4857 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json b/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json deleted file mode 100644 index 412df07c4..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-24B-Alpha/1762652579.860041", - "retrieved_timestamp": "1762652579.860041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-24B-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-24B-Alpha", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 24.125 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3206370208823699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6080838080485248 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48570478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json b/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json deleted file mode 100644 index 69eb7a78c..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B-V1.1/1762652579.860439", - "retrieved_timestamp": "1762652579.860439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-2B-V1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-2B-V1.1", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3977235956151899 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39838417813569243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42993750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21243351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/f8b02d65-c8a0-43eb-b48e-d1e1f7f363d6.json b/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/f8b02d65-c8a0-43eb-b48e-d1e1f7f363d6.json new file mode 100644 index 000000000..1042b7d30 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/f8b02d65-c8a0-43eb-b48e-d1e1f7f363d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B-V1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-2B-V1.1", + "id": "Sakalti/SJT-2B-V1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-2B/7bf23db0-877c-4700-95c8-e35dee5e57b4.json b/data/hfopenllm_v2/Sakalti/SJT-2B/7bf23db0-877c-4700-95c8-e35dee5e57b4.json new file mode 100644 index 000000000..344d5d801 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-2B/7bf23db0-877c-4700-95c8-e35dee5e57b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-2B", + "id": "Sakalti/SJT-2B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2151 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3564 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1187 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json b/data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json deleted file mode 100644 index 9b5d951de..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B/1762652579.8602371", - "retrieved_timestamp": "1762652579.860238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-2B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-2B", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21507378200951255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29364597509285106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35641666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-3.7B/07f8351e-c7c6-463f-9e91-ee1d3bb2b35c.json b/data/hfopenllm_v2/Sakalti/SJT-3.7B/07f8351e-c7c6-463f-9e91-ee1d3bb2b35c.json new file mode 100644 index 000000000..c851ca6fd --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-3.7B/07f8351e-c7c6-463f-9e91-ee1d3bb2b35c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-3.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-3.7B", + "id": "Sakalti/SJT-3.7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.783 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1078 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3617 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json b/data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json deleted file mode 100644 index 307914e80..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-3.7B/1762652579.860638", - "retrieved_timestamp": "1762652579.8606389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-3.7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-3.7B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.783 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10776184966998675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393045259885476 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36171875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1505152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json b/data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json deleted file mode 100644 index a5655acff..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-4B/1762652579.8608499", - "retrieved_timestamp": "1762652579.860851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-4B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-4B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077403511571519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885743296577029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-4B/8535ffae-f39d-46ed-89bb-a1656885db91.json b/data/hfopenllm_v2/Sakalti/SJT-4B/8535ffae-f39d-46ed-89bb-a1656885db91.json new file mode 100644 index 000000000..841001a9a --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-4B/8535ffae-f39d-46ed-89bb-a1656885db91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-4B", + "id": "Sakalti/SJT-4B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json b/data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json deleted file mode 100644 index b7550e733..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7.5B/1762652579.861058", - "retrieved_timestamp": "1762652579.861058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-7.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-7.5B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42232831110342783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367364587851736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43988541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951130319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-7.5B/5e832121-9a67-44d9-973d-fffdb1b37975.json b/data/hfopenllm_v2/Sakalti/SJT-7.5B/5e832121-9a67-44d9-973d-fffdb1b37975.json new file mode 100644 index 000000000..157236e81 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-7.5B/5e832121-9a67-44d9-973d-fffdb1b37975.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-7.5B", + "id": "Sakalti/SJT-7.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4223 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2168 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json deleted file mode 100644 index e2d20e10b..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1-Multilingal/1762652579.861463", - "retrieved_timestamp": "1762652579.861464", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-7B-V1.1-Multilingal", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-7B-V1.1-Multilingal", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19494053555676716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2919597646466201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/92d3f67d-a026-49e3-a440-68c10fb358ae.json b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/92d3f67d-a026-49e3-a440-68c10fb358ae.json new file mode 100644 index 000000000..8dfb066a7 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/92d3f67d-a026-49e3-a440-68c10fb358ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1-Multilingal/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-7B-V1.1-Multilingal", + "id": "Sakalti/SJT-7B-V1.1-Multilingal", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/9d0baaef-bd31-4a96-bb2a-e92b62b748d2.json b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/9d0baaef-bd31-4a96-bb2a-e92b62b748d2.json new file mode 100644 index 000000000..7f654b278 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/9d0baaef-bd31-4a96-bb2a-e92b62b748d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-7B-V1.1", + "id": "Sakalti/SJT-7B-V1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4703 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2432 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4411 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json deleted file mode 100644 index 5329e1558..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1/1762652579.861262", - "retrieved_timestamp": "1762652579.861263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-7B-V1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-7B-V1.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702888336281067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418885259534293 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44106249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json b/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json deleted file mode 100644 index c52bc7c20..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B-V1.1/1762652579.8618612", - "retrieved_timestamp": "1762652579.861862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-8B-V1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-8B-V1.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.545 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620706392372239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120768392487195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20694864048338368 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231216755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/489e8e84-5e30-46fa-a421-f52308f051e7.json b/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/489e8e84-5e30-46fa-a421-f52308f051e7.json new file mode 100644 index 000000000..011bbc7a2 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/489e8e84-5e30-46fa-a421-f52308f051e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B-V1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-8B-V1.1", + "id": "Sakalti/SJT-8B-V1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.545 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2069 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-8B/a208f807-c930-4e81-8ebd-dcbb4db76442.json b/data/hfopenllm_v2/Sakalti/SJT-8B/a208f807-c930-4e81-8ebd-dcbb4db76442.json new file mode 100644 index 000000000..effa93eb9 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-8B/a208f807-c930-4e81-8ebd-dcbb4db76442.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-8B", + "id": "Sakalti/SJT-8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.548 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6535 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2538 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json b/data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json deleted file mode 100644 index 5cf1f9741..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B/1762652579.861662", - "retrieved_timestamp": "1762652579.8616629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-8B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-8B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.548 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6534871917623019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5281955607099067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-900M/4956539d-a255-4c56-877f-257e463fa3e4.json b/data/hfopenllm_v2/Sakalti/SJT-900M/4956539d-a255-4c56-877f-257e463fa3e4.json new file mode 100644 index 000000000..73e5c563b --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-900M/4956539d-a255-4c56-877f-257e463fa3e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-900M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-900M", + "id": "Sakalti/SJT-900M", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.899 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.241 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json b/data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json deleted file mode 100644 index d889325f7..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-900M/1762652579.862072", - "retrieved_timestamp": "1762652579.8620732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-900M", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-900M", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.899 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2410027615615456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31692036321713823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35945833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/3451eb65-020c-4e34-9128-7410e6b293cd.json b/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/3451eb65-020c-4e34-9128-7410e6b293cd.json new file mode 100644 index 000000000..af8b9e5e0 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/3451eb65-020c-4e34-9128-7410e6b293cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-Moe2x7.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJT-Moe2x7.5B", + "id": "Sakalti/SJT-Moe2x7.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.401 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4117 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3954 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json b/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json deleted file mode 100644 index 45f4b987b..000000000 --- a/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-Moe2x7.5B/1762652579.862277", - "retrieved_timestamp": "1762652579.862278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJT-Moe2x7.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-Moe2x7.5B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.401 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41166216749336204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370697921185069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43988541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953623670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json b/data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json deleted file mode 100644 index 5715d9160..000000000 --- a/data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-2/1762652579.8624809", - "retrieved_timestamp": "1762652579.8624818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJTPass-2", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJTPass-2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24002867945939 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33022032217255354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32225 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1901595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-2/b5cd0061-e4dd-4049-a51e-b16490e69120.json b/data/hfopenllm_v2/Sakalti/SJTPass-2/b5cd0061-e4dd-4049-a51e-b16490e69120.json new file mode 100644 index 000000000..b2c0d1e00 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJTPass-2/b5cd0061-e4dd-4049-a51e-b16490e69120.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJTPass-2", + "id": "Sakalti/SJTPass-2", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.24 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-4/c4686af6-0b7b-4df3-9152-14a3ef087b7f.json b/data/hfopenllm_v2/Sakalti/SJTPass-4/c4686af6-0b7b-4df3-9152-14a3ef087b7f.json new file mode 100644 index 000000000..fbfa47dbe --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJTPass-4/c4686af6-0b7b-4df3-9152-14a3ef087b7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJTPass-4", + "id": "Sakalti/SJTPass-4", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.167 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1913 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2964 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3898 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1083 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json b/data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json deleted file mode 100644 index 32e733c10..000000000 --- a/data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-4/1762652579.8627222", - "retrieved_timestamp": "1762652579.8627222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJTPass-4", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJTPass-4", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.167 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19129354557019818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2963644180215358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38981249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10829454787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-5/155885ca-11e7-4cd2-b26c-53e001e2a6f9.json b/data/hfopenllm_v2/Sakalti/SJTPass-5/155885ca-11e7-4cd2-b26c-53e001e2a6f9.json new file mode 100644 index 000000000..8136444ac --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJTPass-5/155885ca-11e7-4cd2-b26c-53e001e2a6f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SJTPass-5", + "id": "Sakalti/SJTPass-5", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.809 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json b/data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json deleted file mode 100644 index 4d3fb9106..000000000 --- a/data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-5/1762652579.862921", - "retrieved_timestamp": "1762652579.862922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SJTPass-5", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJTPass-5", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.809 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24247662867857286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31029599812555747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13272938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/d9ca5411-def6-43b3-a522-595131d8e5e6.json b/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/d9ca5411-def6-43b3-a522-595131d8e5e6.json new file mode 100644 index 000000000..4e51d28f5 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/d9ca5411-def6-43b3-a522-595131d8e5e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba-Passthrough-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba-Passthrough-2", + "id": "Sakalti/Saba-Passthrough-2", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.087 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json b/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json deleted file mode 100644 index 8b0b6a143..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba-Passthrough-2/1762652579.863117", - "retrieved_timestamp": "1762652579.8631182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba-Passthrough-2", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba-Passthrough-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.087 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16913677930114318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36724803467499195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20769614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json b/data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json deleted file mode 100644 index 9f3b3ba73..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-1.8B/1762652579.863334", - "retrieved_timestamp": "1762652579.863334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba1-1.8B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1-1.8B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332768166243345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4147375470428282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba1-1.8B/e54553ab-0897-4cb5-9213-5bb72758d2b5.json b/data/hfopenllm_v2/Sakalti/Saba1-1.8B/e54553ab-0897-4cb5-9213-5bb72758d2b5.json new file mode 100644 index 000000000..db3cc96a9 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1-1.8B/e54553ab-0897-4cb5-9213-5bb72758d2b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-1.8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba1-1.8B", + "id": "Sakalti/Saba1-1.8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2926 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json b/data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json deleted file mode 100644 index aa6c92e3c..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-7B/1762652579.863542", - "retrieved_timestamp": "1762652579.863542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba1-7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45847351693506566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489063327459239 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36631419939577037 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47932291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba1-7B/eed48cdc-18db-4c03-84bf-d2d50e3328b0.json b/data/hfopenllm_v2/Sakalti/Saba1-7B/eed48cdc-18db-4c03-84bf-d2d50e3328b0.json new file mode 100644 index 000000000..9e78fd60a --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1-7B/eed48cdc-18db-4c03-84bf-d2d50e3328b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba1-7B", + "id": "Sakalti/Saba1-7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json b/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json deleted file mode 100644 index 6ffcd0b83..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-1.5B/1762652579.8637571", - "retrieved_timestamp": "1762652579.863758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba1.5-1.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1.5-1.5B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332768166243345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4147375470428282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/d7952aef-37e2-4c15-a1a4-598690773bbb.json b/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/d7952aef-37e2-4c15-a1a4-598690773bbb.json new file mode 100644 index 000000000..d446d42e8 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/d7952aef-37e2-4c15-a1a4-598690773bbb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba1.5-1.5B", + "id": "Sakalti/Saba1.5-1.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2926 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json b/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json deleted file mode 100644 index f891577d4..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-Pro-3B/1762652579.863965", - "retrieved_timestamp": "1762652579.863966", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba1.5-Pro-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1.5-Pro-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.9 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23860468002677343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3622910501405146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44054166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19581117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/5e1e1376-bb22-4fc9-a1d6-3f2fe7d302b9.json b/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/5e1e1376-bb22-4fc9-a1d6-3f2fe7d302b9.json new file mode 100644 index 000000000..a268ff87b --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/5e1e1376-bb22-4fc9-a1d6-3f2fe7d302b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-Pro-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba1.5-Pro-3B", + "id": "Sakalti/Saba1.5-Pro-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.9 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2386 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3623 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4405 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1958 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/cfdae559-f3f1-4a78-b4cc-fbfb8bb37b16.json b/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/cfdae559-f3f1-4a78-b4cc-fbfb8bb37b16.json new file mode 100644 index 000000000..2d5a6e567 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/cfdae559-f3f1-4a78-b4cc-fbfb8bb37b16.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-14B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba2-14B-Preview", + "id": "Sakalti/Saba2-14B-Preview", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4722 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json b/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json deleted file mode 100644 index 7e3680c41..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-14B-Preview/1762652579.864167", - "retrieved_timestamp": "1762652579.864168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba2-14B-Preview", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba2-14B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4721871301480073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.649628096691823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4781458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saba2-3B/a12208ce-e9e1-4476-8054-0d565efad92c.json b/data/hfopenllm_v2/Sakalti/Saba2-3B/a12208ce-e9e1-4476-8054-0d565efad92c.json new file mode 100644 index 000000000..1efd679c7 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba2-3B/a12208ce-e9e1-4476-8054-0d565efad92c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saba2-3B", + "id": "Sakalti/Saba2-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2865 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2801 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json b/data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json deleted file mode 100644 index 555a8ae5b..000000000 --- a/data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-3B/1762652579.864372", - "retrieved_timestamp": "1762652579.864373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saba2-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba2-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28651533486704167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28011877359000464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2617449664429531 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39269791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12101063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json b/data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json deleted file mode 100644 index 4f745fe6b..000000000 --- a/data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Sailor-japanese/1762652579.864587", - "retrieved_timestamp": "1762652579.864588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Sailor-japanese", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Sailor-japanese", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16046866757979938 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2912583602962783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3911770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Sailor-japanese/f46e1eeb-8b8b-4d47-9510-445109b5518b.json b/data/hfopenllm_v2/Sakalti/Sailor-japanese/f46e1eeb-8b8b-4d47-9510-445109b5518b.json new file mode 100644 index 000000000..353e25d9c --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Sailor-japanese/f46e1eeb-8b8b-4d47-9510-445109b5518b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Sailor-japanese/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sailor-japanese", + "id": "Sakalti/Sailor-japanese", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1605 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-1.5B/7dc4970f-ce35-4ffa-9052-2ab40abb1e55.json b/data/hfopenllm_v2/Sakalti/Saka-1.5B/7dc4970f-ce35-4ffa-9052-2ab40abb1e55.json new file mode 100644 index 000000000..bae9cee17 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-1.5B/7dc4970f-ce35-4ffa-9052-2ab40abb1e55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saka-1.5B", + "id": "Sakalti/Saka-1.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2726 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json b/data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json deleted file mode 100644 index 7134f2fc9..000000000 --- a/data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-1.5B/1762652579.8647912", - "retrieved_timestamp": "1762652579.8647912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saka-1.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-1.5B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726266306732802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987868899865206 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24152260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json b/data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json deleted file mode 100644 index 965936c03..000000000 --- a/data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-14B/1762652579.8649821", - "retrieved_timestamp": "1762652579.864983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saka-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7174341857382855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6496945295195891 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saka-14B/823e886a-1431-4078-81a3-4b941983461d.json b/data/hfopenllm_v2/Sakalti/Saka-14B/823e886a-1431-4078-81a3-4b941983461d.json new file mode 100644 index 000000000..a2883bea2 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-14B/823e886a-1431-4078-81a3-4b941983461d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saka-14B", + "id": "Sakalti/Saka-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6497 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-24B/583609f0-de5b-43cd-a667-bb2c36679fd2.json b/data/hfopenllm_v2/Sakalti/Saka-24B/583609f0-de5b-43cd-a667-bb2c36679fd2.json new file mode 100644 index 000000000..873a3e63b --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-24B/583609f0-de5b-43cd-a667-bb2c36679fd2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saka-24B", + "id": "Sakalti/Saka-24B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6072 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json b/data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json deleted file mode 100644 index 63c7a5d24..000000000 --- a/data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-24B/1762652579.865175", - "retrieved_timestamp": "1762652579.865176", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saka-24B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-24B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38186123928952953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6072116494463233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45408333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4765625 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json b/data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json deleted file mode 100644 index 1154031e6..000000000 --- a/data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.2B/1762652579.86556", - "retrieved_timestamp": "1762652579.865563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saka-7.2B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-7.2B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.292 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1544989516704566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945156585364917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11602393617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saka-7.2B/2d2cea8b-167e-4d63-b01c-537f372672f9.json b/data/hfopenllm_v2/Sakalti/Saka-7.2B/2d2cea8b-167e-4d63-b01c-537f372672f9.json new file mode 100644 index 000000000..fc53daeea --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-7.2B/2d2cea8b-167e-4d63-b01c-537f372672f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saka-7.2B", + "id": "Sakalti/Saka-7.2B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.292 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1545 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3711 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json b/data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json deleted file mode 100644 index 817d37ded..000000000 --- a/data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.6B/1762652579.865891", - "retrieved_timestamp": "1762652579.8658922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Saka-7.6B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-7.6B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45242844541372446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5655284792075981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45403922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Saka-7.6B/f584f596-3a17-404a-81a2-3033ad38cad6.json b/data/hfopenllm_v2/Sakalti/Saka-7.6B/f584f596-3a17-404a-81a2-3033ad38cad6.json new file mode 100644 index 000000000..045f0e9ed --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-7.6B/f584f596-3a17-404a-81a2-3033ad38cad6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saka-7.6B", + "id": "Sakalti/Saka-7.6B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.454 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json b/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json deleted file mode 100644 index 885953ea3..000000000 --- a/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SakaMoe-3x1.6B-Instruct/1762652579.866188", - "retrieved_timestamp": "1762652579.8661902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SakaMoe-3x1.6B-Instruct", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SakaMoe-3x1.6B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 1.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23708094522533543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328247997224552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18824800531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/ebb0930f-92be-4e1b-a2a6-779f69d2151c.json b/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/ebb0930f-92be-4e1b-a2a6-779f69d2151c.json new file mode 100644 index 000000000..e8b9b5045 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/ebb0930f-92be-4e1b-a2a6-779f69d2151c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SakaMoe-3x1.6B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SakaMoe-3x1.6B-Instruct", + "id": "Sakalti/SakaMoe-3x1.6B-Instruct", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 1.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1882 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json deleted file mode 100644 index 4d0626d9a..000000000 --- a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Alpha/1762652579.866478", - "retrieved_timestamp": "1762652579.8664792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SakalFusion-7B-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SakalFusion-7B-Alpha", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5289653674472622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559133672829116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4581458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/b8926567-e208-442e-8ba8-c6dd4ecc5c4a.json b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/b8926567-e208-442e-8ba8-c6dd4ecc5c4a.json new file mode 100644 index 000000000..d9f49d39c --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/b8926567-e208-442e-8ba8-c6dd4ecc5c4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SakalFusion-7B-Alpha", + "id": "Sakalti/SakalFusion-7B-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5591 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4474 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/4bf6efe1-81fc-48f6-96ba-8df9ffbef2f2.json b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/4bf6efe1-81fc-48f6-96ba-8df9ffbef2f2.json new file mode 100644 index 000000000..55e8681c6 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/4bf6efe1-81fc-48f6-96ba-8df9ffbef2f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SakalFusion-7B-Beta", + "id": "Sakalti/SakalFusion-7B-Beta", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json deleted file mode 100644 index 61f07679f..000000000 --- a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Beta/1762652579.866734", - "retrieved_timestamp": "1762652579.8667352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/SakalFusion-7B-Beta", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SakalFusion-7B-Beta", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18090222830977362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2881298650933641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10895944148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/05ffcb7a-2694-4276-bf45-73e1110bc494.json b/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/05ffcb7a-2694-4276-bf45-73e1110bc494.json new file mode 100644 index 000000000..1aeae7ac8 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/05ffcb7a-2694-4276-bf45-73e1110bc494.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_Tara-3.8B-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tara-3.8B-v1.1", + "id": "Sakalti/Tara-3.8B-v1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json b/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json deleted file mode 100644 index 030dd19d2..000000000 --- a/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Tara-3.8B-v1.1/1762652579.866961", - "retrieved_timestamp": "1762652579.866962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Tara-3.8B-v1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Tara-3.8B-v1.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40621661635571393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885743296577029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json b/data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json deleted file mode 100644 index b81b9bff6..000000000 --- a/data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_light-1.1-3B/1762652579.867201", - "retrieved_timestamp": "1762652579.867202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/light-1.1-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-1.1-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27345110972220377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28027723572953045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2617449664429531 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12092752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/light-1.1-3B/dc3b944b-a57a-44ab-87ac-8e1882b7bcce.json b/data/hfopenllm_v2/Sakalti/light-1.1-3B/dc3b944b-a57a-44ab-87ac-8e1882b7bcce.json new file mode 100644 index 000000000..731f06557 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-1.1-3B/dc3b944b-a57a-44ab-87ac-8e1882b7bcce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_light-1.1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "light-1.1-3B", + "id": "Sakalti/light-1.1-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2803 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1209 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-3B/154f70b4-d77c-4d1b-b85c-bc81fe8162bd.json b/data/hfopenllm_v2/Sakalti/light-3B/154f70b4-d77c-4d1b-b85c-bc81fe8162bd.json new file mode 100644 index 000000000..08a907de4 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-3B/154f70b4-d77c-4d1b-b85c-bc81fe8162bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_light-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "light-3B", + "id": "Sakalti/light-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5337 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4831 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2591 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json b/data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json deleted file mode 100644 index 57924f8ba..000000000 --- a/data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_light-3B/1762652579.8674219", - "retrieved_timestamp": "1762652579.867423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/light-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5337360425892188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4831034368803701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40149999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json b/data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json deleted file mode 100644 index 267431f93..000000000 --- a/data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_light-3b-beta/1762652579.867648", - "retrieved_timestamp": "1762652579.867649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/light-3b-beta", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-3b-beta", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5485489612007252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48152297262112204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3758311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/light-3b-beta/998316d2-389a-4ce0-b0b0-0430c1361de7.json b/data/hfopenllm_v2/Sakalti/light-3b-beta/998316d2-389a-4ce0-b0b0-0430c1361de7.json new file mode 100644 index 000000000..e8992c2b0 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-3b-beta/998316d2-389a-4ce0-b0b0-0430c1361de7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_light-3b-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "light-3b-beta", + "id": "Sakalti/light-3b-beta", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5485 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4815 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2772 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json b/data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json deleted file mode 100644 index 04e1e4876..000000000 --- a/data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_light-7b-beta/1762652579.867865", - "retrieved_timestamp": "1762652579.867866", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/light-7b-beta", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-7b-beta", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6233870574520051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548193064288276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445561835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/light-7b-beta/ce803cde-6e23-433c-a4d2-38c5cb5ba14b.json b/data/hfopenllm_v2/Sakalti/light-7b-beta/ce803cde-6e23-433c-a4d2-38c5cb5ba14b.json new file mode 100644 index 000000000..0e0fb6d0e --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-7b-beta/ce803cde-6e23-433c-a4d2-38c5cb5ba14b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_light-7b-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "light-7b-beta", + "id": "Sakalti/light-7b-beta", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6234 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4291 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/2519485b-47cd-497c-a349-9e69db0266f3.json b/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/2519485b-47cd-497c-a349-9e69db0266f3.json new file mode 100644 index 000000000..6cdfb156a --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/2519485b-47cd-497c-a349-9e69db0266f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_llama-3-yanyuedao-8b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-yanyuedao-8b-instruct", + "id": "Sakalti/llama-3-yanyuedao-8b-instruct", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2186 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json b/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json deleted file mode 100644 index 0ae0ffb7b..000000000 --- a/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_llama-3-yanyuedao-8b-instruct/1762652579.8681011", - "retrieved_timestamp": "1762652579.8681011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/llama-3-yanyuedao-8b-instruct", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/llama-3-yanyuedao-8b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21857116894284942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43497849055247495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29105718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/magro-7B/56d86e26-4ee6-4652-9b7b-a538238a24d4.json b/data/hfopenllm_v2/Sakalti/magro-7B/56d86e26-4ee6-4652-9b7b-a538238a24d4.json new file mode 100644 index 000000000..412d9d54e --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/magro-7B/56d86e26-4ee6-4652-9b7b-a538238a24d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_magro-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magro-7B", + "id": "Sakalti/magro-7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2765 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json b/data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json deleted file mode 100644 index e21403072..000000000 --- a/data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_magro-7B/1762652579.868387", - "retrieved_timestamp": "1762652579.8683882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/magro-7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/magro-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13439008497453425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185526485966236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44598958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765126329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/mergekit-01/416b89e4-5e8a-4131-9403-e8967a4127b8.json b/data/hfopenllm_v2/Sakalti/mergekit-01/416b89e4-5e8a-4131-9403-e8967a4127b8.json new file mode 100644 index 000000000..6209d9fb6 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/mergekit-01/416b89e4-5e8a-4131-9403-e8967a4127b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-01", + "id": "Sakalti/mergekit-01", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6234 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4291 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json b/data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json deleted file mode 100644 index 79514fc79..000000000 --- a/data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-01/1762652579.868608", - "retrieved_timestamp": "1762652579.868609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/mergekit-01", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/mergekit-01", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6233870574520051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548193064288276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445561835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/347a90e8-d8b7-4266-8242-ceac865796a0.json b/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/347a90e8-d8b7-4266-8242-ceac865796a0.json new file mode 100644 index 000000000..17a180e43 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/347a90e8-d8b7-4266-8242-ceac865796a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-della_linear-vmeykci/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-della_linear-vmeykci", + "id": "Sakalti/mergekit-della_linear-vmeykci", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2816 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1089 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json b/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json deleted file mode 100644 index 753f96e5a..000000000 --- a/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-della_linear-vmeykci/1762652579.868854", - "retrieved_timestamp": "1762652579.868856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/mergekit-della_linear-vmeykci", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/mergekit-della_linear-vmeykci", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1126078804239418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28155028620092587 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38968749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10887632978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/model-3/389f7ab8-b30e-4d0c-b9a4-625e74a1f73f.json b/data/hfopenllm_v2/Sakalti/model-3/389f7ab8-b30e-4d0c-b9a4-625e74a1f73f.json new file mode 100644 index 000000000..768fa5f46 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/model-3/389f7ab8-b30e-4d0c-b9a4-625e74a1f73f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_model-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "model-3", + "id": "Sakalti/model-3", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6264 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4455 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json b/data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json deleted file mode 100644 index bf5ad7ac3..000000000 --- a/data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_model-3/1762652579.869146", - "retrieved_timestamp": "1762652579.869148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/model-3", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/model-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6263846593704703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554216994021922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37084592145015105 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4454787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/qwen2.5-2.3B/6ae33b7f-53a1-45c5-8b0b-d462188c3f9d.json b/data/hfopenllm_v2/Sakalti/qwen2.5-2.3B/6ae33b7f-53a1-45c5-8b0b-d462188c3f9d.json new file mode 100644 index 000000000..a8265114f --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/qwen2.5-2.3B/6ae33b7f-53a1-45c5-8b0b-d462188c3f9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_qwen2.5-2.3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-2.3B", + "id": "Sakalti/qwen2.5-2.3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2Model", + "params_billions": 2.339 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1288 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2849 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json b/data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json deleted file mode 100644 index e9f983a14..000000000 --- a/data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_tara-3.8B/1762652579.86961", - "retrieved_timestamp": "1762652579.869611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/tara-3.8B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/tara-3.8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077403511571519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885743296577029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/tara-3.8B/d96fb0b2-7cba-4cc4-a5f4-b8a451754857.json b/data/hfopenllm_v2/Sakalti/tara-3.8B/d96fb0b2-7cba-4cc4-a5f4-b8a451754857.json new file mode 100644 index 000000000..87315ad45 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/tara-3.8B/d96fb0b2-7cba-4cc4-a5f4-b8a451754857.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_tara-3.8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tara-3.8B", + "id": "Sakalti/tara-3.8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json deleted file mode 100644 index a935e2236..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.2/1762652579.870035", - "retrieved_timestamp": "1762652579.870036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-14B-v0.2", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7069930007934502 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6472012505703305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f8d362f6-eafc-4d11-bc40-d169d69d3a95.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f8d362f6-eafc-4d11-bc40-d169d69d3a95.json new file mode 100644 index 000000000..8ee1c7c15 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f8d362f6-eafc-4d11-bc40-d169d69d3a95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-14B-v0.2", + "id": "Sakalti/ultiima-14B-v0.2", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.707 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6472 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/4bacd3dd-44c2-42d8-98c0-3eeb920dc0f0.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/4bacd3dd-44c2-42d8-98c0-3eeb920dc0f0.json new file mode 100644 index 000000000..86e48a32c --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/4bacd3dd-44c2-42d8-98c0-3eeb920dc0f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-14B-v0.3", + "id": "Sakalti/ultiima-14B-v0.3", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json deleted file mode 100644 index ec8dcd010..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.3/1762652579.870242", - "retrieved_timestamp": "1762652579.870243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-14B-v0.3", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B-v0.3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7040452665593957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.639820771660141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47541666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336602393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json deleted file mode 100644 index 29e856523..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.4/1762652579.8704672", - "retrieved_timestamp": "1762652579.8704839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-14B-v0.4", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B-v0.4", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3008284684636764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6420007859105136 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527842420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/de073f45-0d14-4f8a-9d3b-d4fd961186b8.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/de073f45-0d14-4f8a-9d3b-d4fd961186b8.json new file mode 100644 index 000000000..172ff4245 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/de073f45-0d14-4f8a-9d3b-d4fd961186b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-14B-v0.4", + "id": "Sakalti/ultiima-14B-v0.4", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3008 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.642 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json b/data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json deleted file mode 100644 index ab0d9d8b9..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B/1762652579.869824", - "retrieved_timestamp": "1762652579.8698251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5700563394016764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6491153472177067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4717604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B/fd88d234-b3f9-4f48-896c-af58f1a69880.json b/data/hfopenllm_v2/Sakalti/ultiima-14B/fd88d234-b3f9-4f48-896c-af58f1a69880.json new file mode 100644 index 000000000..640b74564 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B/fd88d234-b3f9-4f48-896c-af58f1a69880.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-14B", + "id": "Sakalti/ultiima-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4698 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4718 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json b/data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json deleted file mode 100644 index 1012d5d75..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-32B/1762652579.870782", - "retrieved_timestamp": "1762652579.870784", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-32B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-32B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6854357549080883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037285782797875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4994791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5910073138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/ultiima-32B/273745b1-3761-463e-b9ab-7860968064eb.json b/data/hfopenllm_v2/Sakalti/ultiima-32B/273745b1-3761-463e-b9ab-7860968064eb.json new file mode 100644 index 000000000..d7a87927c --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-32B/273745b1-3761-463e-b9ab-7860968064eb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-32B", + "id": "Sakalti/ultiima-32B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6854 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7037 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.591 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/101d84d3-e741-4eb2-bd8a-db6c12022fe2.json b/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/101d84d3-e741-4eb2-bd8a-db6c12022fe2.json new file mode 100644 index 000000000..aec4261bf --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/101d84d3-e741-4eb2-bd8a-db6c12022fe2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B-v1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-72B-v1.5", + "id": "Sakalti/ultiima-72B-v1.5", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4691 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json b/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json deleted file mode 100644 index 0b87e4749..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B-v1.5/1762652579.8712351", - "retrieved_timestamp": "1762652579.8712351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-72B-v1.5", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-72B-v1.5", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6549610588793291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7391727188223717 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395770392749245 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6053856382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sakalti/ultiima-72B/9c82deca-1998-4506-b038-c5dd592324d8.json b/data/hfopenllm_v2/Sakalti/ultiima-72B/9c82deca-1998-4506-b038-c5dd592324d8.json new file mode 100644 index 000000000..8be07e143 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-72B/9c82deca-1998-4506-b038-c5dd592324d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ultiima-72B", + "id": "Sakalti/ultiima-72B", + "developer": "Sakalti", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4652 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5906 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json b/data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json deleted file mode 100644 index aa9398ade..000000000 --- a/data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B/1762652579.8710139", - "retrieved_timestamp": "1762652579.8710148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/ultiima-72B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-72B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7140121544169471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7217809739144654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41442953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46518750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.590591755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json b/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json deleted file mode 100644 index ae14a06e5..000000000 --- a/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1762652579.8714519", - "retrieved_timestamp": "1762652579.8714519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", - "developer": "Salesforce", - "inference_platform": "unknown", - "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38156203318306536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5011950469666927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36333333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172373670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/da620a94-4c0d-4c50-9619-10e12001fb5d.json b/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/da620a94-4c0d-4c50-9619-10e12001fb5d.json new file mode 100644 index 000000000..d2ff7565c --- /dev/null +++ b/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/da620a94-4c0d-4c50-9619-10e12001fb5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMA-3-8B-SFR-Iterative-DPO-R", + "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", + "developer": "Salesforce", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5012 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/51dade8f-34e7-4237-8691-22655249bf76.json b/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/51dade8f-34e7-4237-8691-22655249bf76.json new file mode 100644 index 000000000..ea148741d --- /dev/null +++ b/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/51dade8f-34e7-4237-8691-22655249bf76.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Kunoichi-DPO-v2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kunoichi-DPO-v2-7B", + "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B", + "developer": "SanjiWatsuki", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3107 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json b/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json deleted file mode 100644 index 577e116c4..000000000 --- a/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Kunoichi-DPO-v2-7B/1762652579.871708", - "retrieved_timestamp": "1762652579.871708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SanjiWatsuki/Kunoichi-DPO-v2-7B", - "developer": "SanjiWatsuki", - "inference_platform": "unknown", - "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431034100630772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415592450869275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json b/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json deleted file mode 100644 index 4b4125142..000000000 --- a/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Silicon-Maid-7B/1762652579.87197", - "retrieved_timestamp": "1762652579.8719711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SanjiWatsuki/Silicon-Maid-7B", - "developer": "SanjiWatsuki", - "inference_platform": "unknown", - "id": "SanjiWatsuki/Silicon-Maid-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367835121920947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4127972831009074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308344414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/cdd59385-0a54-4ca1-b24d-9316a70f2875.json b/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/cdd59385-0a54-4ca1-b24d-9316a70f2875.json new file mode 100644 index 000000000..f3201e23e --- /dev/null +++ b/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/cdd59385-0a54-4ca1-b24d-9316a70f2875.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Silicon-Maid-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Silicon-Maid-7B", + "id": "SanjiWatsuki/Silicon-Maid-7B", + "developer": "SanjiWatsuki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5368 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4128 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3083 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/514a3103-e8a1-49e8-b9da-a85963f5b3dd.json b/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/514a3103-e8a1-49e8-b9da-a85963f5b3dd.json new file mode 100644 index 000000000..c45307b07 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/514a3103-e8a1-49e8-b9da-a85963f5b3dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_70B-L3.3-Cirrus-x1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "70B-L3.3-Cirrus-x1", + "id": "Sao10K/70B-L3.3-Cirrus-x1", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6681 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4497 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4842 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json b/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json deleted file mode 100644 index cdae59ee2..000000000 --- a/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_70B-L3.3-Cirrus-x1/1762652579.8721752", - "retrieved_timestamp": "1762652579.8721762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/70B-L3.3-Cirrus-x1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/70B-L3.3-Cirrus-x1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6680751517085777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7028970787833794 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44966442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json b/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json deleted file mode 100644 index ef9c0f6cb..000000000 --- a/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_Fimbulvetr-11B-v2/1762652579.872427", - "retrieved_timestamp": "1762652579.872428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/Fimbulvetr-11B-v2", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/Fimbulvetr-11B-v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5100056738343152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4544495065184342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43536458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33011968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/daafaafa-1e00-4433-95f3-91c169598ebd.json b/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/daafaafa-1e00-4433-95f3-91c169598ebd.json new file mode 100644 index 000000000..3f9b9daef --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/daafaafa-1e00-4433-95f3-91c169598ebd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_Fimbulvetr-11B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fimbulvetr-11B-v2", + "id": "Sao10K/Fimbulvetr-11B-v2", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.51 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json deleted file mode 100644 index 4093fe157..000000000 --- a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872639", - "retrieved_timestamp": "1762652579.87264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/L3-70B-Euryale-v2.1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-70B-Euryale-v2.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7384417789243651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6471322811268715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42091666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103889627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/50e53ad5-8693-44c1-b5c7-45b91d7e0ae4.json b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/50e53ad5-8693-44c1-b5c7-45b91d7e0ae4.json new file mode 100644 index 000000000..6cc9b0ce2 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/50e53ad5-8693-44c1-b5c7-45b91d7e0ae4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-70B-Euryale-v2.1", + "id": "Sao10K/L3-70B-Euryale-v2.1", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7384 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6471 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5104 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/bda5d02f-7973-41a3-8f8e-4e33a12b74e0.json b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/bda5d02f-7973-41a3-8f8e-4e33a12b74e0.json new file mode 100644 index 000000000..f445ab801 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/bda5d02f-7973-41a3-8f8e-4e33a12b74e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-70B-Euryale-v2.1", + "id": "Sao10K/L3-70B-Euryale-v2.1", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6503 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json deleted file mode 100644 index 5783b2708..000000000 --- a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872864", - "retrieved_timestamp": "1762652579.872865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/L3-70B-Euryale-v2.1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-70B-Euryale-v2.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7281003293483512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6502778992745041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22432024169184292 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41958333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5095578457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/99ff5ca5-4409-4d9c-9ec0-4cf392afeff2.json b/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/99ff5ca5-4409-4d9c-9ec0-4cf392afeff2.json new file mode 100644 index 000000000..a7a1e4096 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/99ff5ca5-4409-4d9c-9ec0-4cf392afeff2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Lunaris-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-Lunaris-v1", + "id": "Sao10K/L3-8B-Lunaris-v1", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6895 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json b/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json deleted file mode 100644 index 3d93d4cfa..000000000 --- a/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Lunaris-v1/1762652579.8733618", - "retrieved_timestamp": "1762652579.873365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/L3-8B-Lunaris-v1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Lunaris-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6894573066131198 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235299282515419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3787400265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/362f5875-4dbc-4e68-90ce-789f692bb533.json b/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/362f5875-4dbc-4e68-90ce-789f692bb533.json new file mode 100644 index 000000000..70117da5c --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/362f5875-4dbc-4e68-90ce-789f692bb533.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Niitama-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-Niitama-v1", + "id": "Sao10K/L3-8B-Niitama-v1", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5303 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json b/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json deleted file mode 100644 index d1ed96a35..000000000 --- a/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Niitama-v1/1762652579.8737721", - "retrieved_timestamp": "1762652579.873773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/L3-8B-Niitama-v1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Niitama-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790659893526954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302980131787137 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700964095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json deleted file mode 100644 index f98ec9f3f..000000000 --- a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.2/1762652579.8740559", - "retrieved_timestamp": "1762652579.874058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/L3-8B-Stheno-v3.2", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Stheno-v3.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6872841837435781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522778637171633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/fdb5faf6-2cdd-42bb-b154-d6e93b2348bf.json b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/fdb5faf6-2cdd-42bb-b154-d6e93b2348bf.json new file mode 100644 index 000000000..cb2eabee5 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/fdb5faf6-2cdd-42bb-b154-d6e93b2348bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-Stheno-v3.2", + "id": "Sao10K/L3-8B-Stheno-v3.2", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5228 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json deleted file mode 100644 index cbe29bc6a..000000000 --- a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.3-32K/1762652579.874314", - "retrieved_timestamp": "1762652579.874315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/L3-8B-Stheno-v3.3-32K", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Stheno-v3.3-32K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46037181345496614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844012923008206 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1895777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/93f829b8-b8d9-4389-a210-2a38c3a30edb.json b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/93f829b8-b8d9-4389-a210-2a38c3a30edb.json new file mode 100644 index 000000000..317e3d07a --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/93f829b8-b8d9-4389-a210-2a38c3a30edb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.3-32K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-Stheno-v3.3-32K", + "id": "Sao10K/L3-8B-Stheno-v3.3-32K", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1896 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json b/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json deleted file mode 100644 index 4db57ccd3..000000000 --- a/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sao10K_MN-12B-Lyra-v3/1762652579.874634", - "retrieved_timestamp": "1762652579.874634", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sao10K/MN-12B-Lyra-v3", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/MN-12B-Lyra-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486063644463357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4803954360397243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40190624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32488364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/6ec3554d-377b-4bf6-88ef-8a4c9e70f485.json b/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/6ec3554d-377b-4bf6-88ef-8a4c9e70f485.json new file mode 100644 index 000000000..6ca309c31 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/6ec3554d-377b-4bf6-88ef-8a4c9e70f485.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sao10K_MN-12B-Lyra-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Lyra-v3", + "id": "Sao10K/MN-12B-Lyra-v3", + "developer": "Sao10K", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4804 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json deleted file mode 100644 index b6aadf0d6..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/1762652579.874861", - "retrieved_timestamp": "1762652579.8748622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.76 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7971681804279312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7000545067146033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5792885638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/70d749cf-2e92-4847-86de-7964fc8eb990.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/70d749cf-2e92-4847-86de-7964fc8eb990.json new file mode 100644 index 000000000..991d8a526 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/70d749cf-2e92-4847-86de-7964fc8eb990.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Avengers-V1-32B", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.76 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7972 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7001 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5793 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json deleted file mode 100644 index 2c5b8c6ab..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/1762652579.875268", - "retrieved_timestamp": "1762652579.8752692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.76 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7956444456264933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7023193256341814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5719747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/623f2b04-6cd7-4ea0-8844-badb0ff6c9c6.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/623f2b04-6cd7-4ea0-8844-badb0ff6c9c6.json new file mode 100644 index 000000000..f34b82b65 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/623f2b04-6cd7-4ea0-8844-badb0ff6c9c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Avengers-V2-32B", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.76 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7956 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7023 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json deleted file mode 100644 index 1026f1d62..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/1762652579.875521", - "retrieved_timestamp": "1762652579.8755221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8248702332034556 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6913199237437709 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42745833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.56640625 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/e1aca741-2765-4e47-b6a1-49f3d9532432.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/e1aca741-2765-4e47-b6a1-49f3d9532432.json new file mode 100644 index 000000000..32459a45a --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/e1aca741-2765-4e47-b6a1-49f3d9532432.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Avengers-V3-32B", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6913 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/4f42366e-e6aa-4974-9a40-5781e350616d.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/4f42366e-e6aa-4974-9a40-5781e350616d.json new file mode 100644 index 000000000..cba352636 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/4f42366e-e6aa-4974-9a40-5781e350616d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Avengers-V4-32B", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7631 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.692 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4643 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json deleted file mode 100644 index 93bb450f9..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/1762652579.87576", - "retrieved_timestamp": "1762652579.8757608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7630963620970137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6920204096666581 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5752160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/4ec2231d-c012-4ad3-830c-8ff86c977202.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/4ec2231d-c012-4ad3-830c-8ff86c977202.json new file mode 100644 index 000000000..9e429543e --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/4ec2231d-c012-4ad3-830c-8ff86c977202.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Avengers-V5-32B", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6929 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4709 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5762 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json deleted file mode 100644 index cbafd5a62..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/1762652579.876068", - "retrieved_timestamp": "1762652579.876069", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7515558717536137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6928650089977083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460725075528701 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47086458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5762134308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/1d2e5513-bd0c-4795-8487-f5266c6e368f.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/1d2e5513-bd0c-4795-8487-f5266c6e368f.json new file mode 100644 index 000000000..1faf938ef --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/1d2e5513-bd0c-4795-8487-f5266c6e368f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Avengers-V6-32B", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.76 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8209 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.689 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4274 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json deleted file mode 100644 index e3fbf9523..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/1762652579.87637", - "retrieved_timestamp": "1762652579.876371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.76 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8208985491828349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6889783858832969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42742708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5672373670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/104172b7-86f5-410a-a454-63e1cfbeb87f.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/104172b7-86f5-410a-a454-63e1cfbeb87f.json new file mode 100644 index 000000000..f8ce92b90 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/104172b7-86f5-410a-a454-63e1cfbeb87f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8146 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6463 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json deleted file mode 100644 index 0585654c9..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/1762652579.876656", - "retrieved_timestamp": "1762652579.876657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8145786513118525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6463223196116569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802114803625378 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45985704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json deleted file mode 100644 index e27011e90..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/1762652579.876898", - "retrieved_timestamp": "1762652579.876899", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.81420408959339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6403963618749583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44667708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4523769946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/d28e04ac-7d18-43fb-80b8-82c0662fec79.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/d28e04ac-7d18-43fb-80b8-82c0662fec79.json new file mode 100644 index 000000000..2df6b6d76 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/d28e04ac-7d18-43fb-80b8-82c0662fec79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8142 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4467 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/20bb3819-9d85-4d84-99ba-65e33965f0c5.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/20bb3819-9d85-4d84-99ba-65e33965f0c5.json new file mode 100644 index 000000000..f703921c4 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/20bb3819-9d85-4d84-99ba-65e33965f0c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Korean-Superb-22B", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6767 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2372 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json deleted file mode 100644 index ef4cbd0e1..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/1762652579.877154", - "retrieved_timestamp": "1762652579.877155", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6766679078179231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625539568927603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/3a4bdf58-0137-4d85-b567-59b3fed3dad5.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/3a4bdf58-0137-4d85-b567-59b3fed3dad5.json new file mode 100644 index 000000000..d43e428ac --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/3a4bdf58-0137-4d85-b567-59b3fed3dad5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Korean-Superb-27B", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4791 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json deleted file mode 100644 index 6167702e4..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/1762652579.87745", - "retrieved_timestamp": "1762652579.877451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7767601076255447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6518345685119445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47913541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4646775265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/04f843ba-947c-4732-979c-2aeae7d34e5a.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/04f843ba-947c-4732-979c-2aeae7d34e5a.json new file mode 100644 index 000000000..f7e4a247b --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/04f843ba-947c-4732-979c-2aeae7d34e5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Superb-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Linkbricks-Horizon-AI-Superb-27B", + "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6186 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2221 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json deleted file mode 100644 index f4eab19c3..000000000 --- a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Superb-27B/1762652579.877677", - "retrieved_timestamp": "1762652579.8776782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Superb-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7302235845334822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6186245528925046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.465 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406000664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/173a31d3-7d12-4ab1-a963-005a81aee767.json b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/173a31d3-7d12-4ab1-a963-005a81aee767.json new file mode 100644 index 000000000..03259aebd --- /dev/null +++ b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/173a31d3-7d12-4ab1-a963-005a81aee767.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeaLLM-7B-v2.5", + "id": "SeaLLMs/SeaLLM-7B-v2.5", + "developer": "SeaLLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3203 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json deleted file mode 100644 index d0f77dc1a..000000000 --- a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2.5/1762652579.878138", - "retrieved_timestamp": "1762652579.8781388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SeaLLMs/SeaLLM-7B-v2.5", - "developer": "SeaLLMs", - "inference_platform": "unknown", - "id": "SeaLLMs/SeaLLM-7B-v2.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4521536190640833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49802029594352754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42032291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3203125 - } - } - ] -} diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json deleted file mode 100644 index 0158bc4b4..000000000 --- a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2/1762652579.877889", - "retrieved_timestamp": "1762652579.877889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SeaLLMs/SeaLLM-7B-v2", - "developer": "SeaLLMs", - "inference_platform": "unknown", - "id": "SeaLLMs/SeaLLM-7B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.376 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36712367629002157 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4902100795458318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4069583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30826130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/d0555736-b614-43ca-91d7-8264e3566872.json b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/d0555736-b614-43ca-91d7-8264e3566872.json new file mode 100644 index 000000000..98630be5e --- /dev/null +++ b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/d0555736-b614-43ca-91d7-8264e3566872.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeaLLM-7B-v2", + "id": "SeaLLMs/SeaLLM-7B-v2", + "developer": "SeaLLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.376 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3083 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/4b7b13b7-4aee-4462-87e6-aa6c15068236.json b/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/4b7b13b7-4aee-4462-87e6-aa6c15068236.json new file mode 100644 index 000000000..05981a658 --- /dev/null +++ b/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/4b7b13b7-4aee-4462-87e6-aa6c15068236.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLMs-v3-7B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeaLLMs-v3-7B-Chat", + "id": "SeaLLMs/SeaLLMs-v3-7B-Chat", + "developer": "SeaLLMs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4377 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3895 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json b/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json deleted file mode 100644 index 07684edb3..000000000 --- a/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLMs-v3-7B-Chat/1762652579.8783438", - "retrieved_timestamp": "1762652579.878345", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SeaLLMs/SeaLLMs-v3-7B-Chat", - "developer": "SeaLLMs", - "inference_platform": "unknown", - "id": "SeaLLMs/SeaLLMs-v3-7B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43766539448662883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5266406284595359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3894614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/4b1f9ce5-bb12-42e3-b0e0-afaa784b0c4c.json b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/4b1f9ce5-bb12-42e3-b0e0-afaa784b0c4c.json new file mode 100644 index 000000000..e86dab4c2 --- /dev/null +++ b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/4b1f9ce5-bb12-42e3-b0e0-afaa784b0c4c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-CL-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReflectionCoder-CL-34B", + "id": "SenseLLM/ReflectionCoder-CL-34B", + "developer": "SenseLLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 33.744 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4008 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json deleted file mode 100644 index 0270f5cc2..000000000 --- a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-CL-34B/1762652579.8785448", - "retrieved_timestamp": "1762652579.878546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SenseLLM/ReflectionCoder-CL-34B", - "developer": "SenseLLM", - "inference_platform": "unknown", - "id": "SenseLLM/ReflectionCoder-CL-34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 33.744 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007710652180658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39529304297033296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41548958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14237034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json deleted file mode 100644 index 36e062af9..000000000 --- a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-DS-33B/1762652579.878793", - "retrieved_timestamp": "1762652579.878794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SenseLLM/ReflectionCoder-DS-33B", - "developer": "SenseLLM", - "inference_platform": "unknown", - "id": "SenseLLM/ReflectionCoder-DS-33B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 33.34 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3786641666334215 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449447540164568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12017952127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/acbcd5a5-bcd8-4209-b35f-425feada7e8b.json b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/acbcd5a5-bcd8-4209-b35f-425feada7e8b.json new file mode 100644 index 000000000..20a36d5af --- /dev/null +++ b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/acbcd5a5-bcd8-4209-b35f-425feada7e8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-DS-33B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReflectionCoder-DS-33B", + "id": "SenseLLM/ReflectionCoder-DS-33B", + "developer": "SenseLLM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 33.34 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1202 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/cb9a415f-1a02-46ad-a731-bf825ddd78ae.json b/data/hfopenllm_v2/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/cb9a415f-1a02-46ad-a731-bf825ddd78ae.json new file mode 100644 index 000000000..df38f37c7 --- /dev/null +++ b/data/hfopenllm_v2/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/cb9a415f-1a02-46ad-a731-bf825ddd78ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dobby-Mini-Leashed-Llama-3.1-8B", + "id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", + "developer": "SentientAGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7847 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5138 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3694 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/92cde6db-47f4-43c6-9ad5-643c35faa226.json b/data/hfopenllm_v2/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/92cde6db-47f4-43c6-9ad5-643c35faa226.json new file mode 100644 index 000000000..771f354ce --- /dev/null +++ b/data/hfopenllm_v2/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/92cde6db-47f4-43c6-9ad5-643c35faa226.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dobby-Mini-Unhinged-Llama-3.1-8B", + "id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", + "developer": "SentientAGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4013 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3585 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/5e88a037-f9bd-4b39-944f-f0781bb7884f.json b/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/5e88a037-f9bd-4b39-944f-f0781bb7884f.json new file mode 100644 index 000000000..d4f48f999 --- /dev/null +++ b/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/5e88a037-f9bd-4b39-944f-f0781bb7884f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", + "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", + "developer": "SeppeV", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0955 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json b/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json deleted file mode 100644 index b9233a74f..000000000 --- a/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/1762652579.879464", - "retrieved_timestamp": "1762652579.8794649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", - "developer": "SeppeV", - "inference_platform": "unknown", - "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09554648333089535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3072665948660797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40320833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json b/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json deleted file mode 100644 index 9aadbbd6d..000000000 --- a/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sharathhebbar24_SSH_355M/1762652579.8797262", - "retrieved_timestamp": "1762652579.8797271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sharathhebbar24/SSH_355M", - "developer": "Sharathhebbar24", - "inference_platform": "unknown", - "id": "Sharathhebbar24/SSH_355M", - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.355 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1423589409433636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30985907344593705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11760305851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/d4b08f5d-5add-49f4-b8db-c1a12e0a5313.json b/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/d4b08f5d-5add-49f4-b8db-c1a12e0a5313.json new file mode 100644 index 000000000..ab1e96d43 --- /dev/null +++ b/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/d4b08f5d-5add-49f4-b8db-c1a12e0a5313.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sharathhebbar24_SSH_355M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SSH_355M", + "id": "Sharathhebbar24/SSH_355M", + "developer": "Sharathhebbar24", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.355 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1176 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sharathhebbar24/chat_gpt2_dpo/ac5adf39-f0a4-439b-9873-9141e0a554b1.json b/data/hfopenllm_v2/Sharathhebbar24/chat_gpt2_dpo/ac5adf39-f0a4-439b-9873-9141e0a554b1.json new file mode 100644 index 000000000..3c6c53f61 --- /dev/null +++ b/data/hfopenllm_v2/Sharathhebbar24/chat_gpt2_dpo/ac5adf39-f0a4-439b-9873-9141e0a554b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sharathhebbar24_chat_gpt2_dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chat_gpt2_dpo", + "id": "Sharathhebbar24/chat_gpt2_dpo", + "developer": "Sharathhebbar24", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2902 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/62965c92-cdf4-4a3b-b035-990abaab615c.json b/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/62965c92-cdf4-4a3b-b035-990abaab615c.json new file mode 100644 index 000000000..5d50114d7 --- /dev/null +++ b/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/62965c92-cdf4-4a3b-b035-990abaab615c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Shreyash2010_Uma-4x4B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Uma-4x4B-Instruct-v0.1", + "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1", + "developer": "Shreyash2010", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5517 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json b/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json deleted file mode 100644 index 15e802b5b..000000000 --- a/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Shreyash2010_Uma-4x4B-Instruct-v0.1/1762652579.880244", - "retrieved_timestamp": "1762652579.880245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Shreyash2010/Uma-4x4B-Instruct-v0.1", - "developer": "Shreyash2010", - "inference_platform": "unknown", - "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516961661724225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5511602059856503 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/Brainy_LLAMA/3866ece8-d70a-4061-9e86-0798ecd98bd6.json b/data/hfopenllm_v2/Sicarius-Prototyping/Brainy_LLAMA/3866ece8-d70a-4061-9e86-0798ecd98bd6.json new file mode 100644 index 000000000..26ba36c75 --- /dev/null +++ b/data/hfopenllm_v2/Sicarius-Prototyping/Brainy_LLAMA/3866ece8-d70a-4061-9e86-0798ecd98bd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Brainy_LLAMA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Brainy_LLAMA", + "id": "Sicarius-Prototyping/Brainy_LLAMA", + "developer": "Sicarius-Prototyping", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3849 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json b/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json deleted file mode 100644 index 680dd37ed..000000000 --- a/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Micropenis_1B/1762652579.8808", - "retrieved_timestamp": "1762652579.880801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sicarius-Prototyping/Micropenis_1B", - "developer": "Sicarius-Prototyping", - "inference_platform": "unknown", - "id": "Sicarius-Prototyping/Micropenis_1B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.618 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460662154195313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3372377910880025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3325416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18600398936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/ff484d0e-bb14-4a80-ae29-2351b03cf278.json b/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/ff484d0e-bb14-4a80-ae29-2351b03cf278.json new file mode 100644 index 000000000..61ea14597 --- /dev/null +++ b/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/ff484d0e-bb14-4a80-ae29-2351b03cf278.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Micropenis_1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Micropenis_1B", + "id": "Sicarius-Prototyping/Micropenis_1B", + "developer": "Sicarius-Prototyping", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.618 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3325 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.186 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/06ac1718-fe71-4e05-a47f-1200e067336c.json b/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/06ac1718-fe71-4e05-a47f-1200e067336c.json new file mode 100644 index 000000000..a582b1eff --- /dev/null +++ b/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/06ac1718-fe71-4e05-a47f-1200e067336c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_bacon_and_food/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bacon_and_food", + "id": "Sicarius-Prototyping/bacon_and_food", + "developer": "Sicarius-Prototyping", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.586 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json b/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json deleted file mode 100644 index 9e7854589..000000000 --- a/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_bacon_and_food/1762652579.881054", - "retrieved_timestamp": "1762652579.881054", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sicarius-Prototyping/bacon_and_food", - "developer": "Sicarius-Prototyping", - "inference_platform": "unknown", - "id": "Sicarius-Prototyping/bacon_and_food", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5860428108529812 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47245798883729967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3883854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3262965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json b/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json deleted file mode 100644 index 16555e950..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B-ad/1762652579.88126", - "retrieved_timestamp": "1762652579.881261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/2B-ad", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/2B-ad", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.204 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378903531518593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40922431523996955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40153124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2662067819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/4ddb1616-7889-45ef-96de-823fee338e1d.json b/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/4ddb1616-7889-45ef-96de-823fee338e1d.json new file mode 100644 index 000000000..01e28f2ac --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/4ddb1616-7889-45ef-96de-823fee338e1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B-ad/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2B-ad", + "id": "SicariusSicariiStuff/2B-ad", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.204 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2662 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/487dd91b-5bc4-4355-90d3-c82ecc789ab3.json b/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/487dd91b-5bc4-4355-90d3-c82ecc789ab3.json new file mode 100644 index 000000000..55d96ac62 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/487dd91b-5bc4-4355-90d3-c82ecc789ab3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B_or_not_2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2B_or_not_2B", + "id": "SicariusSicariiStuff/2B_or_not_2B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json b/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json deleted file mode 100644 index 6d65f25b3..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B_or_not_2B/1762652579.881506", - "retrieved_timestamp": "1762652579.881506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/2B_or_not_2B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/2B_or_not_2B", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2062316874781136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3415917024092019 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13987699468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/a74e86d9-8b94-4f60-8f0c-73cc4b04d905.json b/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/a74e86d9-8b94-4f60-8f0c-73cc4b04d905.json new file mode 100644 index 000000000..0d3c0a4eb --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/a74e86d9-8b94-4f60-8f0c-73cc4b04d905.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Dusk_Rainbow/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dusk_Rainbow", + "id": "SicariusSicariiStuff/Dusk_Rainbow", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3588 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4772 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3443 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json b/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json deleted file mode 100644 index 82813fe05..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Dusk_Rainbow/1762652579.881711", - "retrieved_timestamp": "1762652579.8817122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Dusk_Rainbow", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Dusk_Rainbow", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3588057465303173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47717504280736184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40252083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3443317819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json b/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json deleted file mode 100644 index 2054348e1..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Eximius_Persona_5B/1762652579.881908", - "retrieved_timestamp": "1762652579.881909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Eximius_Persona_5B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Eximius_Persona_5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6559850086658954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4511736018571028 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38181249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31399601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/9a9239ab-9e0e-449b-bd1b-6ec280fad505.json b/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/9a9239ab-9e0e-449b-bd1b-6ec280fad505.json new file mode 100644 index 000000000..0bf2ac15d --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/9a9239ab-9e0e-449b-bd1b-6ec280fad505.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Eximius_Persona_5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eximius_Persona_5B", + "id": "SicariusSicariiStuff/Eximius_Persona_5B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_LLAMA_3B/2c710cd5-75a6-46b7-8356-212da7bf864d.json b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_LLAMA_3B/2c710cd5-75a6-46b7-8356-212da7bf864d.json new file mode 100644 index 000000000..a3193370f --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_LLAMA_3B/2c710cd5-75a6-46b7-8356-212da7bf864d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_LLAMA_3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Impish_LLAMA_3B", + "id": "SicariusSicariiStuff/Impish_LLAMA_3B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2941 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/377d5240-73b5-48d0-bbdc-0960ad1d9069.json b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/377d5240-73b5-48d0-bbdc-0960ad1d9069.json new file mode 100644 index 000000000..4fcce76c3 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/377d5240-73b5-48d0-bbdc-0960ad1d9069.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_Mind_8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Impish_Mind_8B", + "id": "SicariusSicariiStuff/Impish_Mind_8B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3179 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4674 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3309 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json deleted file mode 100644 index 7d8849729..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_Mind_8B/1762652579.8823712", - "retrieved_timestamp": "1762652579.8823712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_Mind_8B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_Mind_8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31791424531354584 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46736571616627115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4069583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3308676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_14B-1M/9f31a6da-c5bd-4143-b2f9-715c0e9f7b74.json b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_14B-1M/9f31a6da-c5bd-4143-b2f9-715c0e9f7b74.json new file mode 100644 index 000000000..644abf1f0 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_14B-1M/9f31a6da-c5bd-4143-b2f9-715c0e9f7b74.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_14B-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Impish_QWEN_14B-1M", + "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7868 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6283 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4615 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_7B-1M/104a0157-c614-44cf-b6cc-9f15dab4b187.json b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_7B-1M/104a0157-c614-44cf-b6cc-9f15dab4b187.json new file mode 100644 index 000000000..30b1d2ab9 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_QWEN_7B-1M/104a0157-c614-44cf-b6cc-9f15dab4b187.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_7B-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Impish_QWEN_7B-1M", + "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4074 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/bb379093-c169-44bd-ac86-edb8ab8fc225.json b/data/hfopenllm_v2/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/bb379093-c169-44bd-ac86-edb8ab8fc225.json new file mode 100644 index 000000000..be20ff82c --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/bb379093-c169-44bd-ac86-edb8ab8fc225.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLAMA-3_8B_Unaligned_BETA", + "id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4717 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Phi-Line_14B/e29001c0-17c0-4deb-8ca2-ce9ad06d8cb3.json b/data/hfopenllm_v2/SicariusSicariiStuff/Phi-Line_14B/e29001c0-17c0-4deb-8ca2-ce9ad06d8cb3.json new file mode 100644 index 000000000..d361cd930 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Phi-Line_14B/e29001c0-17c0-4deb-8ca2-ce9ad06d8cb3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-Line_14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-Line_14B", + "id": "SicariusSicariiStuff/Phi-Line_14B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5454 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Phi-lthy4/43d87bf5-2620-4f8e-a8b6-f86fc157d987.json b/data/hfopenllm_v2/SicariusSicariiStuff/Phi-lthy4/43d87bf5-2620-4f8e-a8b6-f86fc157d987.json new file mode 100644 index 000000000..9e1a945d7 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Phi-lthy4/43d87bf5-2620-4f8e-a8b6-f86fc157d987.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-lthy4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-lthy4", + "id": "SicariusSicariiStuff/Phi-lthy4", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.933 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5879 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4083 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/735d9d75-d9d1-4553-b7cf-f8e7c2e65218.json b/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/735d9d75-d9d1-4553-b7cf-f8e7c2e65218.json new file mode 100644 index 000000000..becab3e7a --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/735d9d75-d9d1-4553-b7cf-f8e7c2e65218.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B_Uncencored", + "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6309 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/0c6dcc87-343c-4973-a589-3e3393829184.json b/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/0c6dcc87-343c-4973-a589-3e3393829184.json new file mode 100644 index 000000000..ffe8535f1 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/0c6dcc87-343c-4973-a589-3e3393829184.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B_Uncensored", + "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6309 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/7c1d1657-e9ae-433f-be9d-523431bfc7ae.json b/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/7c1d1657-e9ae-433f-be9d-523431bfc7ae.json new file mode 100644 index 000000000..79cd730d2 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/7c1d1657-e9ae-433f-be9d-523431bfc7ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B_Uncensored_Instruct", + "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/0b2d9a65-c028-4f4b-a280-dc0c35ac9516.json b/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/0b2d9a65-c028-4f4b-a280-dc0c35ac9516.json new file mode 100644 index 000000000..9778a9581 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/0b2d9a65-c028-4f4b-a280-dc0c35ac9516.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Redemption_Wind_24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Redemption_Wind_24B", + "id": "SicariusSicariiStuff/Redemption_Wind_24B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2501 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6428 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5432 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json b/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json deleted file mode 100644 index 2c9a1add8..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Redemption_Wind_24B/1762652579.8843782", - "retrieved_timestamp": "1762652579.884379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Redemption_Wind_24B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Redemption_Wind_24B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25014517037017336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.642816406969129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json b/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json deleted file mode 100644 index b3d4754a1..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Winged_Imp_8B/1762652579.8845959", - "retrieved_timestamp": "1762652579.884597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Winged_Imp_8B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Winged_Imp_8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.743012983328679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120376322048542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41483333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3638630319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/e87e1d3f-1476-499d-a9f3-b6463b429262.json b/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/e87e1d3f-1476-499d-a9f3-b6463b429262.json new file mode 100644 index 000000000..d8868496d --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/e87e1d3f-1476-499d-a9f3-b6463b429262.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Winged_Imp_8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Winged_Imp_8B", + "id": "SicariusSicariiStuff/Winged_Imp_8B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.743 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3639 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json b/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json deleted file mode 100644 index 0b75b5f23..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Wingless_Imp_8B/1762652579.8848069", - "retrieved_timestamp": "1762652579.8848078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Wingless_Imp_8B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Wingless_Imp_8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.743012983328679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120376322048542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41483333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3638630319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/246e8450-3c53-4bde-99bb-5663f751e88e.json b/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/246e8450-3c53-4bde-99bb-5663f751e88e.json new file mode 100644 index 000000000..822e85e02 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/246e8450-3c53-4bde-99bb-5663f751e88e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Wingless_Imp_8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Wingless_Imp_8B", + "id": "SicariusSicariiStuff/Wingless_Imp_8B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.743 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3639 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/496b9e45-2f64-456e-b35e-12a94c5643b1.json b/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/496b9e45-2f64-456e-b35e-12a94c5643b1.json new file mode 100644 index 000000000..87bd4943e --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/496b9e45-2f64-456e-b35e-12a94c5643b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Zion_Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zion_Alpha", + "id": "SicariusSicariiStuff/Zion_Alpha", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4932 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json b/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json deleted file mode 100644 index f12b86058..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Zion_Alpha/1762652579.885025", - "retrieved_timestamp": "1762652579.885026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Zion_Alpha", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Zion_Alpha", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3324024698910003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49321099934509743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4726875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31316489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/05890047-a95a-433e-b6b6-fb037592cdd1.json b/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/05890047-a95a-433e-b6b6-fb037592cdd1.json new file mode 100644 index 000000000..1653d6cec --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/05890047-a95a-433e-b6b6-fb037592cdd1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_dn_ep02/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dn_ep02", + "id": "SicariusSicariiStuff/dn_ep02", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json b/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json deleted file mode 100644 index 9e450981d..000000000 --- a/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_dn_ep02/1762652579.885246", - "retrieved_timestamp": "1762652579.885247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/dn_ep02", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/dn_ep02", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064340394597445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5266008759836228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1419939577039275 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43163541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39976728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/4a30580c-1d25-49d4-984d-2d28ef3a5656.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/4a30580c-1d25-49d4-984d-2d28ef3a5656.json new file mode 100644 index 000000000..79a1347e0 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/4a30580c-1d25-49d4-984d-2d28ef3a5656.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.1-8B-lora-epoch1", + "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/696d7966-d140-4f43-91df-54f02247b34f.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/696d7966-d140-4f43-91df-54f02247b34f.json new file mode 100644 index 000000000..3e699beeb --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/696d7966-d140-4f43-91df-54f02247b34f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.1-8B-lora", + "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/fdf10ab8-e3f9-49e6-8fd0-ed116868c217.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/fdf10ab8-e3f9-49e6-8fd0-ed116868c217.json new file mode 100644 index 000000000..cc2e232f1 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/fdf10ab8-e3f9-49e6-8fd0-ed116868c217.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-1B-lora-epoch3", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/9ac16d1f-d894-414d-8a14-110e971d0ba6.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/9ac16d1f-d894-414d-8a14-110e971d0ba6.json new file mode 100644 index 000000000..c6e0028d4 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/9ac16d1f-d894-414d-8a14-110e971d0ba6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-1B-lora-epoch5", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3471 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/2eb01e0e-8f7b-4956-9a2d-b32ecaa936f6.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/2eb01e0e-8f7b-4956-9a2d-b32ecaa936f6.json new file mode 100644 index 000000000..13bfe0eca --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/2eb01e0e-8f7b-4956-9a2d-b32ecaa936f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-1B-lora-v2-epoch3", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3471 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/3b221b0e-6158-471f-bcd2-b09514f28bd7.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/3b221b0e-6158-471f-bcd2-b09514f28bd7.json new file mode 100644 index 000000000..e22db53cd --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/3b221b0e-6158-471f-bcd2-b09514f28bd7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-1B-lora-v2-epoch5", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3458 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/c8af8428-aab6-4d19-b185-2b437c0334fa.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/c8af8428-aab6-4d19-b185-2b437c0334fa.json new file mode 100644 index 000000000..e29268df8 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/c8af8428-aab6-4d19-b185-2b437c0334fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-3B-lora-epoch1", + "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/c617d12b-c37f-47ef-9704-e19774c67aeb.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/c617d12b-c37f-47ef-9704-e19774c67aeb.json new file mode 100644 index 000000000..69fae55da --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/c617d12b-c37f-47ef-9704-e19774c67aeb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-3B-lora-epoch2", + "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/577f31e2-1808-45e2-a528-5933019cfa85.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/577f31e2-1808-45e2-a528-5933019cfa85.json new file mode 100644 index 000000000..44054c6cc --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/577f31e2-1808-45e2-a528-5933019cfa85.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Llama3.2-3B-lora-epoch3", + "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/7bd7f5c8-be9e-473e-be18-03ad22a195ee.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/7bd7f5c8-be9e-473e-be18-03ad22a195ee.json new file mode 100644 index 000000000..db8427937 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/7bd7f5c8-be9e-473e-be18-03ad22a195ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Qwen2.5-3B-Instruct", + "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3534 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4024 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json deleted file mode 100644 index 64d707df5..000000000 --- a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/1762652579.887695", - "retrieved_timestamp": "1762652579.8876958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", - "developer": "SkyOrbis", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3534100630770799 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264821228336018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28116688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/5036a549-5583-4775-935a-1a12b6de3e7d.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/5036a549-5583-4775-935a-1a12b6de3e7d.json new file mode 100644 index 000000000..ce701d5f4 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/5036a549-5583-4775-935a-1a12b6de3e7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", + "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5078 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json deleted file mode 100644 index 95fefa175..000000000 --- a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/1762652579.888021", - "retrieved_timestamp": "1762652579.888022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", - "developer": "SkyOrbis", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38188672721711725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077962006048589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44360416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913730053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/5c0ffff9-542c-424e-88e9-89584e686e12.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/5c0ffff9-542c-424e-88e9-89584e686e12.json new file mode 100644 index 000000000..08f3d6990 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/5c0ffff9-542c-424e-88e9-89584e686e12.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", + "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4238 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4238 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json deleted file mode 100644 index 0a6014d0d..000000000 --- a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/1762652579.8882601", - "retrieved_timestamp": "1762652579.888261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", - "developer": "SkyOrbis", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812373391490135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389864554242366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4237916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42378656914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/5c6a045d-2c90-4938-9185-9c1a0f82903a.json b/data/hfopenllm_v2/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/5c6a045d-2c90-4938-9185-9c1a0f82903a.json new file mode 100644 index 000000000..ef679628f --- /dev/null +++ b/data/hfopenllm_v2/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/5c6a045d-2c90-4938-9185-9c1a0f82903a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Skywork-Reward-Gemma-2-27B-v0.2", + "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", + "developer": "Skywork", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForSequenceClassification", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7807 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.636 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Skywork/Skywork-o1-Open-Llama-3.1-8B/02480176-2058-4e71-a970-9698be8d235e.json b/data/hfopenllm_v2/Skywork/Skywork-o1-Open-Llama-3.1-8B/02480176-2058-4e71-a970-9698be8d235e.json new file mode 100644 index 000000000..92fd0cf95 --- /dev/null +++ b/data/hfopenllm_v2/Skywork/Skywork-o1-Open-Llama-3.1-8B/02480176-2058-4e71-a970-9698be8d235e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Skywork_Skywork-o1-Open-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Skywork-o1-Open-Llama-3.1-8B", + "id": "Skywork/Skywork-o1-Open-Llama-3.1-8B", + "developer": "Skywork", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.203 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Solshine/Brimful-merged-replete/4be1e5b4-254c-4287-907d-cc845042de37.json b/data/hfopenllm_v2/Solshine/Brimful-merged-replete/4be1e5b4-254c-4287-907d-cc845042de37.json new file mode 100644 index 000000000..8374a1a8d --- /dev/null +++ b/data/hfopenllm_v2/Solshine/Brimful-merged-replete/4be1e5b4-254c-4287-907d-cc845042de37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Solshine_Brimful-merged-replete/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Brimful-merged-replete", + "id": "Solshine/Brimful-merged-replete", + "developer": "Solshine", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 12.277 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1761 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2883 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1085 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json b/data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json deleted file mode 100644 index bd3f29b38..000000000 --- a/data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Solshine_Brimful-merged-replete/1762652579.8890932", - "retrieved_timestamp": "1762652579.8890939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Solshine/Brimful-merged-replete", - "developer": "Solshine", - "inference_platform": "unknown", - "id": "Solshine/Brimful-merged-replete", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 12.277 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17605619755581856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28834447696551024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10846077127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/21b51852-5cad-414e-92d5-31878f025d67.json b/data/hfopenllm_v2/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/21b51852-5cad-414e-92d5-31878f025d67.json new file mode 100644 index 000000000..92595ba08 --- /dev/null +++ b/data/hfopenllm_v2/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/21b51852-5cad-414e-92d5-31878f025d67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-1-big-thoughtful-passthrough-merge-2", + "id": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", + "developer": "Solshine", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 18.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2547 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3889 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1185 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sorawiz/Gemma-9B-Base/9eb07d4a-1f01-4696-9137-d477ffca43be.json b/data/hfopenllm_v2/Sorawiz/Gemma-9B-Base/9eb07d4a-1f01-4696-9137-d477ffca43be.json new file mode 100644 index 000000000..57b9d2ddf --- /dev/null +++ b/data/hfopenllm_v2/Sorawiz/Gemma-9B-Base/9eb07d4a-1f01-4696-9137-d477ffca43be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-9B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-9B-Base", + "id": "Sorawiz/Gemma-9B-Base", + "developer": "Sorawiz", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.593 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sorawiz/Gemma-Creative-9B-Base/4236485b-aa92-4bc4-a652-17ed3231ecf4.json b/data/hfopenllm_v2/Sorawiz/Gemma-Creative-9B-Base/4236485b-aa92-4bc4-a652-17ed3231ecf4.json new file mode 100644 index 000000000..5a311b551 --- /dev/null +++ b/data/hfopenllm_v2/Sorawiz/Gemma-Creative-9B-Base/4236485b-aa92-4bc4-a652-17ed3231ecf4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-Creative-9B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-Creative-9B-Base", + "id": "Sorawiz/Gemma-Creative-9B-Base", + "developer": "Sorawiz", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5459 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4008 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json deleted file mode 100644 index d46a8fea5..000000000 --- a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-8b-Sify/1762652579.89035", - "retrieved_timestamp": "1762652579.890351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sourjayon/DeepSeek-R1-8b-Sify", - "developer": "Sourjayon", - "inference_platform": "unknown", - "id": "Sourjayon/DeepSeek-R1-8b-Sify", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679481553389451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33793580116642347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3303125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19805518617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/9c0d6b71-8c6a-4294-961c-972a002b847f.json b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/9c0d6b71-8c6a-4294-961c-972a002b847f.json new file mode 100644 index 000000000..509be1166 --- /dev/null +++ b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/9c0d6b71-8c6a-4294-961c-972a002b847f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-8b-Sify/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-8b-Sify", + "id": "Sourjayon/DeepSeek-R1-8b-Sify", + "developer": "Sourjayon", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3379 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1981 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json deleted file mode 100644 index 8b6c80a17..000000000 --- a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-ForumNXT/1762652579.890614", - "retrieved_timestamp": "1762652579.890615", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sourjayon/DeepSeek-R1-ForumNXT", - "developer": "Sourjayon", - "inference_platform": "unknown", - "id": "Sourjayon/DeepSeek-R1-ForumNXT", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26028714920854445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310198487331462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3392395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16481050531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/d1e906d5-8f0d-49c2-88c3-cf71774de600.json b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/d1e906d5-8f0d-49c2-88c3-cf71774de600.json new file mode 100644 index 000000000..e4a1e60b8 --- /dev/null +++ b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/d1e906d5-8f0d-49c2-88c3-cf71774de600.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-ForumNXT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-ForumNXT", + "id": "Sourjayon/DeepSeek-R1-ForumNXT", + "developer": "Sourjayon", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2603 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3392 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1648 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json b/data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json deleted file mode 100644 index 70b5cd11a..000000000 --- a/data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SpaceYL_ECE_Poirot/1762652579.890822", - "retrieved_timestamp": "1762652579.890822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SpaceYL/ECE_Poirot", - "developer": "SpaceYL", - "inference_platform": "unknown", - "id": "SpaceYL/ECE_Poirot", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106956209524063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42622349736626014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2883144946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/SpaceYL/ECE_Poirot/798e4f83-6262-4d5b-a854-6ff114167209.json b/data/hfopenllm_v2/SpaceYL/ECE_Poirot/798e4f83-6262-4d5b-a854-6ff114167209.json new file mode 100644 index 000000000..fa2400717 --- /dev/null +++ b/data/hfopenllm_v2/SpaceYL/ECE_Poirot/798e4f83-6262-4d5b-a854-6ff114167209.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SpaceYL_ECE_Poirot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE_Poirot", + "id": "SpaceYL/ECE_Poirot", + "developer": "SpaceYL", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2883 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json b/data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json deleted file mode 100644 index 1d42cb093..000000000 --- a/data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Spestly_Athena-1-3B/1762652579.8910668", - "retrieved_timestamp": "1762652579.891068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Spestly/Athena-1-3B", - "developer": "Spestly", - "inference_platform": "unknown", - "id": "Spestly/Athena-1-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5569167586448401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47015477265388084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35189494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Spestly/Athena-1-3B/dd2603d5-e99e-4778-95d0-159c788626cf.json b/data/hfopenllm_v2/Spestly/Athena-1-3B/dd2603d5-e99e-4778-95d0-159c788626cf.json new file mode 100644 index 000000000..da9add401 --- /dev/null +++ b/data/hfopenllm_v2/Spestly/Athena-1-3B/dd2603d5-e99e-4778-95d0-159c788626cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Spestly_Athena-1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Athena-1-3B", + "id": "Spestly/Athena-1-3B", + "developer": "Spestly", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4702 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3519 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/41c71990-e79d-447f-b082-63c96fd67a1f.json b/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/41c71990-e79d-447f-b082-63c96fd67a1f.json new file mode 100644 index 000000000..81aa9d6c5 --- /dev/null +++ b/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/41c71990-e79d-447f-b082-63c96fd67a1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-1.5B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Atlas-Pro-1.5B-Preview", + "id": "Spestly/Atlas-Pro-1.5B-Preview", + "developer": "Spestly", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.243 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1925 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json b/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json deleted file mode 100644 index cb0becfda..000000000 --- a/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-1.5B-Preview/1762652579.891309", - "retrieved_timestamp": "1762652579.89131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Spestly/Atlas-Pro-1.5B-Preview", - "developer": "Spestly", - "inference_platform": "unknown", - "id": "Spestly/Atlas-Pro-1.5B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2429509257658568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.349893585329524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31948640483383683 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3354270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1924867021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json b/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json deleted file mode 100644 index 0b054a0ed..000000000 --- a/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-7B-Preview/1762652579.891519", - "retrieved_timestamp": "1762652579.89152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Spestly/Atlas-Pro-7B-Preview", - "developer": "Spestly", - "inference_platform": "unknown", - "id": "Spestly/Atlas-Pro-7B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31541642840995227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46679203304308553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2970412234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/b9e25948-2871-4b6c-933b-8a731e48e81b.json b/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/b9e25948-2871-4b6c-933b-8a731e48e81b.json new file mode 100644 index 000000000..5b58d6fbc --- /dev/null +++ b/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/b9e25948-2871-4b6c-933b-8a731e48e81b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Atlas-Pro-7B-Preview", + "id": "Spestly/Atlas-Pro-7B-Preview", + "developer": "Spestly", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/GutenLaserPi/7c70df74-2bc2-40e0-b0f4-77be1a7e044c.json b/data/hfopenllm_v2/Stark2008/GutenLaserPi/7c70df74-2bc2-40e0-b0f4-77be1a7e044c.json new file mode 100644 index 000000000..1770ffa03 --- /dev/null +++ b/data/hfopenllm_v2/Stark2008/GutenLaserPi/7c70df74-2bc2-40e0-b0f4-77be1a7e044c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Stark2008_GutenLaserPi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GutenLaserPi", + "id": "Stark2008/GutenLaserPi", + "developer": "Stark2008", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4227 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5212 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3106 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json b/data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json deleted file mode 100644 index 3f5cf048f..000000000 --- a/data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Stark2008_GutenLaserPi/1762652579.891723", - "retrieved_timestamp": "1762652579.891723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Stark2008/GutenLaserPi", - "developer": "Stark2008", - "inference_platform": "unknown", - "id": "Stark2008/GutenLaserPi", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42265300513747966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212342482489518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31058843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json b/data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json deleted file mode 100644 index 383b93021..000000000 --- a/data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Stark2008_LayleleFlamPi/1762652579.8919628", - "retrieved_timestamp": "1762652579.891964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Stark2008/LayleleFlamPi", - "developer": "Stark2008", - "inference_platform": "unknown", - "id": "Stark2008/LayleleFlamPi", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42842325030917966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5115654142581095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093417553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Stark2008/LayleleFlamPi/ea71bdd5-3aa1-4d26-9256-5aeb2f79fa8c.json b/data/hfopenllm_v2/Stark2008/LayleleFlamPi/ea71bdd5-3aa1-4d26-9256-5aeb2f79fa8c.json new file mode 100644 index 000000000..9aef1f6e6 --- /dev/null +++ b/data/hfopenllm_v2/Stark2008/LayleleFlamPi/ea71bdd5-3aa1-4d26-9256-5aeb2f79fa8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Stark2008_LayleleFlamPi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LayleleFlamPi", + "id": "Stark2008/LayleleFlamPi", + "developer": "Stark2008", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5116 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4608 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/VisFlamCat/b0e9c0ca-cd56-42c8-96ed-477884bfd9f9.json b/data/hfopenllm_v2/Stark2008/VisFlamCat/b0e9c0ca-cd56-42c8-96ed-477884bfd9f9.json new file mode 100644 index 000000000..afdd9f905 --- /dev/null +++ b/data/hfopenllm_v2/Stark2008/VisFlamCat/b0e9c0ca-cd56-42c8-96ed-477884bfd9f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Stark2008_VisFlamCat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VisFlamCat", + "id": "Stark2008/VisFlamCat", + "developer": "Stark2008", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5217 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4463 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json b/data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json deleted file mode 100644 index e903cd19f..000000000 --- a/data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Stark2008_VisFlamCat/1762652579.892166", - "retrieved_timestamp": "1762652579.892166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Stark2008/VisFlamCat", - "developer": "Stark2008", - "inference_platform": "unknown", - "id": "Stark2008/VisFlamCat", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43659157701565177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5216957865099948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44627083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json b/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json deleted file mode 100644 index 3378838bd..000000000 --- a/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-MS-Nevoria-70b/1762652579.8924139", - "retrieved_timestamp": "1762652579.892415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Steelskull/L3.3-MS-Nevoria-70b", - "developer": "Steelskull", - "inference_platform": "unknown", - "id": "Steelskull/L3.3-MS-Nevoria-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6963268571833845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6997536580025828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47063758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4682291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5535239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/7395fcde-49dd-47f4-a8ea-463eda40f5e3.json b/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/7395fcde-49dd-47f4-a8ea-463eda40f5e3.json new file mode 100644 index 000000000..26de03a08 --- /dev/null +++ b/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/7395fcde-49dd-47f4-a8ea-463eda40f5e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-MS-Nevoria-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.3-MS-Nevoria-70b", + "id": "Steelskull/L3.3-MS-Nevoria-70b", + "developer": "Steelskull", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6963 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3958 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4706 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5535 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json b/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json deleted file mode 100644 index 9bf4d3c5a..000000000 --- a/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-Nevoria-R1-70b/1762652579.892649", - "retrieved_timestamp": "1762652579.89265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Steelskull/L3.3-Nevoria-R1-70b", - "developer": "Steelskull", - "inference_platform": "unknown", - "id": "Steelskull/L3.3-Nevoria-R1-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023794642659255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6971668662651651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46895973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462932180851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/a130087f-566f-4405-b662-1102f1664c49.json b/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/a130087f-566f-4405-b662-1102f1664c49.json new file mode 100644 index 000000000..ff75741dd --- /dev/null +++ b/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/a130087f-566f-4405-b662-1102f1664c49.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-Nevoria-R1-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.3-Nevoria-R1-70b", + "id": "Steelskull/L3.3-Nevoria-R1-70b", + "developer": "Steelskull", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6024 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6972 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.469 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5463 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/StelleX/Qwen2.5_Math_7B_Cot/3be58cf3-4761-4459-9f3c-eabf812a3c19.json b/data/hfopenllm_v2/StelleX/Qwen2.5_Math_7B_Cot/3be58cf3-4761-4459-9f3c-eabf812a3c19.json new file mode 100644 index 000000000..e315d4ecc --- /dev/null +++ b/data/hfopenllm_v2/StelleX/Qwen2.5_Math_7B_Cot/3be58cf3-4761-4459-9f3c-eabf812a3c19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/StelleX_Qwen2.5_Math_7B_Cot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5_Math_7B_Cot", + "id": "StelleX/Qwen2.5_Math_7B_Cot", + "developer": "StelleX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2143 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4313 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json b/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json deleted file mode 100644 index d72b9d3ad..000000000 --- a/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/StelleX_Vorisatex-7B-preview/1762652579.893095", - "retrieved_timestamp": "1762652579.893096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "StelleX/Vorisatex-7B-preview", - "developer": "StelleX", - "inference_platform": "unknown", - "id": "StelleX/Vorisatex-7B-preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1515013497519914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3111695757290421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41923958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/dbdd71ad-db5b-4b4b-8856-68b55adbe127.json b/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/dbdd71ad-db5b-4b4b-8856-68b55adbe127.json new file mode 100644 index 000000000..7a0e33163 --- /dev/null +++ b/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/dbdd71ad-db5b-4b4b-8856-68b55adbe127.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/StelleX_Vorisatex-7B-preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Vorisatex-7B-preview", + "id": "StelleX/Vorisatex-7B-preview", + "developer": "StelleX", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4192 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json deleted file mode 100644 index 8ada648dc..000000000 --- a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Instruct/1762652579.893334", - "retrieved_timestamp": "1762652579.893334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SultanR/SmolTulu-1.7b-Instruct", - "developer": "SultanR", - "inference_platform": "unknown", - "id": "SultanR/SmolTulu-1.7b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6540867121459949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713086260572204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17104388297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/da159a16-48a0-45e3-ad4d-bdc9e8b5288c.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/da159a16-48a0-45e3-ad4d-bdc9e8b5288c.json new file mode 100644 index 000000000..0304b53c6 --- /dev/null +++ b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/da159a16-48a0-45e3-ad4d-bdc9e8b5288c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolTulu-1.7b-Instruct", + "id": "SultanR/SmolTulu-1.7b-Instruct", + "developer": "SultanR", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6541 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.171 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json deleted file mode 100644 index 16ee5556e..000000000 --- a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Reinforced/1762652579.893585", - "retrieved_timestamp": "1762652579.893586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SultanR/SmolTulu-1.7b-Reinforced", - "developer": "SultanR", - "inference_platform": "unknown", - "id": "SultanR/SmolTulu-1.7b-Reinforced", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790659893526954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551868188444029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17627992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/77d5f51e-5ad2-42a6-a32c-060cd844b949.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/77d5f51e-5ad2-42a6-a32c-060cd844b949.json new file mode 100644 index 000000000..37b00c3a4 --- /dev/null +++ b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/77d5f51e-5ad2-42a6-a32c-060cd844b949.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Reinforced/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolTulu-1.7b-Reinforced", + "id": "SultanR/SmolTulu-1.7b-Reinforced", + "developer": "SultanR", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3552 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json deleted file mode 100644 index 097e48f86..000000000 --- a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-it-v0/1762652579.8938031", - "retrieved_timestamp": "1762652579.8938031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SultanR/SmolTulu-1.7b-it-v0", - "developer": "SultanR", - "inference_platform": "unknown", - "id": "SultanR/SmolTulu-1.7b-it-v0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6540867121459949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713086260572204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17104388297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/724cc582-cc83-474b-9606-70dbc22f3581.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/724cc582-cc83-474b-9606-70dbc22f3581.json new file mode 100644 index 000000000..4d26688c4 --- /dev/null +++ b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/724cc582-cc83-474b-9606-70dbc22f3581.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-it-v0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolTulu-1.7b-it-v0", + "id": "SultanR/SmolTulu-1.7b-it-v0", + "developer": "SultanR", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6541 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.171 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBA-123/8a1b2aae-d717-4b49-8ed2-a7ee2cee1940.json b/data/hfopenllm_v2/Supichi/BBA-123/8a1b2aae-d717-4b49-8ed2-a7ee2cee1940.json new file mode 100644 index 000000000..c72fad264 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBA-123/8a1b2aae-d717-4b49-8ed2-a7ee2cee1940.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBA-123/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBA-123", + "id": "Supichi/BBA-123", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.161 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json b/data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json deleted file mode 100644 index 87184916b..000000000 --- a/data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBA-123/1762652579.894015", - "retrieved_timestamp": "1762652579.894016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBA-123", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBA-123", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.161 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2079548930171944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2920111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBA99/0dfb062d-a6ec-42a6-a9f9-6f6424bbdf0c.json b/data/hfopenllm_v2/Supichi/BBA99/0dfb062d-a6ec-42a6-a9f9-6f6424bbdf0c.json new file mode 100644 index 000000000..7dab217a1 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBA99/0dfb062d-a6ec-42a6-a9f9-6f6424bbdf0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBA99/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBA99", + "id": "Supichi/BBA99", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.161 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2769 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json b/data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json deleted file mode 100644 index 39e60384b..000000000 --- a/data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBA99/1762652579.8942661", - "retrieved_timestamp": "1762652579.8942661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBA99", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBA99", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.161 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14066011516110588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2768958340020912 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBAIK29/ab2512fa-2335-4817-9a76-3259690bbc67.json b/data/hfopenllm_v2/Supichi/BBAIK29/ab2512fa-2335-4817-9a76-3259690bbc67.json new file mode 100644 index 000000000..90e50a5c5 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAIK29/ab2512fa-2335-4817-9a76-3259690bbc67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAIK29/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAIK29", + "id": "Supichi/BBAIK29", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4588 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4469 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json b/data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json deleted file mode 100644 index ade0c084f..000000000 --- a/data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAIK29/1762652579.89447", - "retrieved_timestamp": "1762652579.894471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAIK29", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAIK29", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45884807865352817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589641249478369 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45008333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBAI_135_Gemma/fe7f1442-b7db-42d5-bc83-b8afd1d0c802.json b/data/hfopenllm_v2/Supichi/BBAI_135_Gemma/fe7f1442-b7db-42d5-bc83-b8afd1d0c802.json new file mode 100644 index 000000000..f1fa97e95 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_135_Gemma/fe7f1442-b7db-42d5-bc83-b8afd1d0c802.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_135_Gemma/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_135_Gemma", + "id": "Supichi/BBAI_135_Gemma", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 19.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3805 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json b/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json deleted file mode 100644 index d9ae64774..000000000 --- a/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_250_Xia0_gZ/1762652579.894933", - "retrieved_timestamp": "1762652579.894933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAI_250_Xia0_gZ", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_250_Xia0_gZ", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685401401614383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5567682997527722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4579270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4464760638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/0e14484a-69d7-423e-bf6c-33d0992f408c.json b/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/0e14484a-69d7-423e-bf6c-33d0992f408c.json new file mode 100644 index 000000000..dbf736418 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/0e14484a-69d7-423e-bf6c-33d0992f408c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_250_Xia0_gZ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_250_Xia0_gZ", + "id": "Supichi/BBAI_250_Xia0_gZ", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5568 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4465 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json b/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json deleted file mode 100644 index 1d2b35d05..000000000 --- a/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_275_Tsunami_gZ/1762652579.895135", - "retrieved_timestamp": "1762652579.895135", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAI_275_Tsunami_gZ", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_275_Tsunami_gZ", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369586031729146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5531259476127334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44478124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/881eaa2c-af5f-4e84-8807-d0835c10ebd2.json b/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/881eaa2c-af5f-4e84-8807-d0835c10ebd2.json new file mode 100644 index 000000000..c2465ba88 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/881eaa2c-af5f-4e84-8807-d0835c10ebd2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_275_Tsunami_gZ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_275_Tsunami_gZ", + "id": "Supichi/BBAI_275_Tsunami_gZ", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4448 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4492 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json b/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json deleted file mode 100644 index b73f71657..000000000 --- a/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_525_Tsu_gZ_Xia0/1762652579.8953412", - "retrieved_timestamp": "1762652579.8953412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAI_525_Tsu_gZ_Xia0", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_525_Tsu_gZ_Xia0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338612658856279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561933633430705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429003021148036 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44744791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44772273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/ef8a7079-9d13-42b7-ab2d-b72df5ae5d95.json b/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/ef8a7079-9d13-42b7-ab2d-b72df5ae5d95.json new file mode 100644 index 000000000..8e1fdf952 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/ef8a7079-9d13-42b7-ab2d-b72df5ae5d95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_525_Tsu_gZ_Xia0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_525_Tsu_gZ_Xia0", + "id": "Supichi/BBAI_525_Tsu_gZ_Xia0", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5339 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5562 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3429 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4477 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json b/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json deleted file mode 100644 index 074cc4097..000000000 --- a/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_78B_Calme_3_1_Ties/1762652579.895541", - "retrieved_timestamp": "1762652579.895541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAI_78B_Calme_3_1_Ties", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_78B_Calme_3_1_Ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 27.06 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18280052482967415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28281264175951776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22902684563758388 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30996874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/db8d3fc4-58f4-4f07-8c27-c73a4a4719fb.json b/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/db8d3fc4-58f4-4f07-8c27-c73a4a4719fb.json new file mode 100644 index 000000000..7bb33875a --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/db8d3fc4-58f4-4f07-8c27-c73a4a4719fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_78B_Calme_3_1_Ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_78B_Calme_3_1_Ties", + "id": "Supichi/BBAI_78B_Calme_3_1_Ties", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 27.06 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.229 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/0c44a429-e705-4794-b702-1a731e52df90.json b/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/0c44a429-e705-4794-b702-1a731e52df90.json new file mode 100644 index 000000000..4b3c57482 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/0c44a429-e705-4794-b702-1a731e52df90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_QWEEN_V000000_LUMEN_14B", + "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 10.366 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1815 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2297 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2315 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json b/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json deleted file mode 100644 index 59e553c08..000000000 --- a/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/1762652579.895749", - "retrieved_timestamp": "1762652579.8957498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 10.366 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18145188100905596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22972580681005383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23154362416107382 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3445416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11602393617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/HF_TOKEN/92b3d2c1-61f4-432a-82a7-43b4367f7ef0.json b/data/hfopenllm_v2/Supichi/HF_TOKEN/92b3d2c1-61f4-432a-82a7-43b4367f7ef0.json new file mode 100644 index 000000000..3690ec90c --- /dev/null +++ b/data/hfopenllm_v2/Supichi/HF_TOKEN/92b3d2c1-61f4-432a-82a7-43b4367f7ef0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_HF_TOKEN/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HF_TOKEN", + "id": "Supichi/HF_TOKEN", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.161 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.138 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json b/data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json deleted file mode 100644 index a4adc21a6..000000000 --- a/data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_HF_TOKEN/1762652579.895958", - "retrieved_timestamp": "1762652579.895958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/HF_TOKEN", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/HF_TOKEN", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.161 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1379872072766925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2763924734767205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32717708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/Supichi/NJS26/5703e81d-055c-459b-8202-80ec382a8d5b.json b/data/hfopenllm_v2/Supichi/NJS26/5703e81d-055c-459b-8202-80ec382a8d5b.json new file mode 100644 index 000000000..fb56689db --- /dev/null +++ b/data/hfopenllm_v2/Supichi/NJS26/5703e81d-055c-459b-8202-80ec382a8d5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Supichi_NJS26/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NJS26", + "id": "Supichi/NJS26", + "developer": "Supichi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0448 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3854 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json b/data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json deleted file mode 100644 index ceeac801d..000000000 --- a/data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_NJS26/1762652579.8961651", - "retrieved_timestamp": "1762652579.8961651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/NJS26", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/NJS26", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04481331755298164 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780152929488641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json deleted file mode 100644 index ffcaf6986..000000000 --- a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.0/1762652579.896373", - "retrieved_timestamp": "1762652579.896374", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Svak/MN-12B-Inferor-v0.0", - "developer": "Svak", - "inference_platform": "unknown", - "id": "Svak/MN-12B-Inferor-v0.0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5707555951541909 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195010930589931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46388541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3558843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/f6260b6e-52a2-4142-93ba-5393807fa0d4.json b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/f6260b6e-52a2-4142-93ba-5393807fa0d4.json new file mode 100644 index 000000000..cf81cb961 --- /dev/null +++ b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/f6260b6e-52a2-4142-93ba-5393807fa0d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Inferor-v0.0", + "id": "Svak/MN-12B-Inferor-v0.0", + "developer": "Svak", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5708 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5195 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4639 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3559 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/83b84506-4826-48de-a6fe-2af6ae5d425a.json b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/83b84506-4826-48de-a6fe-2af6ae5d425a.json new file mode 100644 index 000000000..30b42165b --- /dev/null +++ b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/83b84506-4826-48de-a6fe-2af6ae5d425a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Inferor-v0.1", + "id": "Svak/MN-12B-Inferor-v0.1", + "developer": "Svak", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json deleted file mode 100644 index 0557513a2..000000000 --- a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.1/1762652579.8966348", - "retrieved_timestamp": "1762652579.896636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Svak/MN-12B-Inferor-v0.1", - "developer": "Svak", - "inference_platform": "unknown", - "id": "Svak/MN-12B-Inferor-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6346527214457639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5146762089838804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3661901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json b/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json deleted file mode 100644 index 6a60f327b..000000000 --- a/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/1762652579.896852", - "retrieved_timestamp": "1762652579.896853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", - "developer": "Syed-Hasan-8503", - "inference_platform": "unknown", - "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5714049832222946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5681534123661078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38605385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/7483e260-9853-4d3f-aa10-187796d96de9.json b/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/7483e260-9853-4d3f-aa10-187796d96de9.json new file mode 100644 index 000000000..e37c2fec5 --- /dev/null +++ b/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/7483e260-9853-4d3f-aa10-187796d96de9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-4K-instruct-cpo-simpo", + "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", + "developer": "Syed-Hasan-8503", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5682 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json deleted file mode 100644 index 3812b4ec7..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P1/1762652579.897121", - "retrieved_timestamp": "1762652579.8971221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V1-P1", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V1-P1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7849783020164276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508544756293663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3759973404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/f9925806-4252-44e8-b67e-917737572bd4.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/f9925806-4252-44e8-b67e-917737572bd4.json new file mode 100644 index 000000000..a479386a3 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/f9925806-4252-44e8-b67e-917737572bd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V1-P1", + "id": "T145/KRONOS-8B-V1-P1", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5085 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/70470e6c-8d66-4249-b762-a5a2e3589a53.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/70470e6c-8d66-4249-b762-a5a2e3589a53.json new file mode 100644 index 000000000..9dacbbf53 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/70470e6c-8d66-4249-b762-a5a2e3589a53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V1-P2", + "id": "T145/KRONOS-8B-V1-P2", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6724 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4772 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3453 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json deleted file mode 100644 index 126dc2d28..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P2/1762652579.897378", - "retrieved_timestamp": "1762652579.8973792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V1-P2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V1-P2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6724213974476612 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47717566218002166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3453291223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json deleted file mode 100644 index 87befd625..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P3/1762652579.897578", - "retrieved_timestamp": "1762652579.897579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V1-P3", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V1-P3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7137373280673058 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127875870036823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34050864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/d3abfe3c-ebfe-4dfd-b0db-93c14d32c585.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/d3abfe3c-ebfe-4dfd-b0db-93c14d32c585.json new file mode 100644 index 000000000..4ae2c0b29 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/d3abfe3c-ebfe-4dfd-b0db-93c14d32c585.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V1-P3", + "id": "T145/KRONOS-8B-V1-P3", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7137 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5128 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3405 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V2/a35b06bc-d759-421a-94cf-f408a98e9273.json b/data/hfopenllm_v2/T145/KRONOS-8B-V2/a35b06bc-d759-421a-94cf-f408a98e9273.json new file mode 100644 index 000000000..1869aec88 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V2/a35b06bc-d759-421a-94cf-f408a98e9273.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V2", + "id": "T145/KRONOS-8B-V2", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json b/data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json deleted file mode 100644 index 9e5e59e0a..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V2/1762652579.897814", - "retrieved_timestamp": "1762652579.897815", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5180243974875552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513268555595521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V3/bbac659c-7cf8-41d4-98d4-ded4c471bd98.json b/data/hfopenllm_v2/T145/KRONOS-8B-V3/bbac659c-7cf8-41d4-98d4-ded4c471bd98.json new file mode 100644 index 000000000..86f598a6c --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V3/bbac659c-7cf8-41d4-98d4-ded4c471bd98.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V3", + "id": "T145/KRONOS-8B-V3", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2598 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json b/data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json deleted file mode 100644 index 65280ca28..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V3/1762652579.8980181", - "retrieved_timestamp": "1762652579.898019", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V3", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5474751437297483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511865544689898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3922291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V4/0c73f3a0-0a92-4b1c-abfa-6eb77138dacd.json b/data/hfopenllm_v2/T145/KRONOS-8B-V4/0c73f3a0-0a92-4b1c-abfa-6eb77138dacd.json new file mode 100644 index 000000000..b0d2848fc --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V4/0c73f3a0-0a92-4b1c-abfa-6eb77138dacd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V4", + "id": "T145/KRONOS-8B-V4", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3786 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json b/data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json deleted file mode 100644 index b3189d380..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V4/1762652579.898447", - "retrieved_timestamp": "1762652579.898448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V4", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092470034846742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38295833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37857380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json b/data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json deleted file mode 100644 index 82863ca38..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V5/1762652579.8986921", - "retrieved_timestamp": "1762652579.898693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V5", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405058577906621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088651598969166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2688821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40546875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37591422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V5/a7ab6f16-717f-4567-8057-a4a18e1a1e77.json b/data/hfopenllm_v2/T145/KRONOS-8B-V5/a7ab6f16-717f-4567-8057-a4a18e1a1e77.json new file mode 100644 index 000000000..fb35a1d47 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V5/a7ab6f16-717f-4567-8057-a4a18e1a1e77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V5", + "id": "T145/KRONOS-8B-V5", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5405 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5089 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2689 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4055 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3759 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V6/2abe2c9d-032d-469e-852b-114eca5e84f8.json b/data/hfopenllm_v2/T145/KRONOS-8B-V6/2abe2c9d-032d-469e-852b-114eca5e84f8.json new file mode 100644 index 000000000..55fd3a74f --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V6/2abe2c9d-032d-469e-852b-114eca5e84f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V6", + "id": "T145/KRONOS-8B-V6", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2598 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4121 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json b/data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json deleted file mode 100644 index 43b109060..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V6/1762652579.898935", - "retrieved_timestamp": "1762652579.898936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V6", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7022467054083166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033606149499412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41210416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3501496010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json b/data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json deleted file mode 100644 index 720ae39af..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V7/1762652579.899169", - "retrieved_timestamp": "1762652579.8991702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V7", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529102780622083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4526219443939161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36711458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2696974734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V7/2e8a83dc-c760-4f42-a361-e02cf3a65427.json b/data/hfopenllm_v2/T145/KRONOS-8B-V7/2e8a83dc-c760-4f42-a361-e02cf3a65427.json new file mode 100644 index 000000000..5846cc83c --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V7/2e8a83dc-c760-4f42-a361-e02cf3a65427.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V7", + "id": "T145/KRONOS-8B-V7", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json b/data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json deleted file mode 100644 index 770efc127..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V8/1762652579.899387", - "retrieved_timestamp": "1762652579.8993878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V8", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V8", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7770349339751859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5094406613555632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37824135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V8/743dfe64-e7cd-493e-817d-8d5fcdc2ea24.json b/data/hfopenllm_v2/T145/KRONOS-8B-V8/743dfe64-e7cd-493e-817d-8d5fcdc2ea24.json new file mode 100644 index 000000000..d7f05654d --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V8/743dfe64-e7cd-493e-817d-8d5fcdc2ea24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V8", + "id": "T145/KRONOS-8B-V8", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.777 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V9/4e37c90b-65a8-4b71-bfc2-d63541fb8962.json b/data/hfopenllm_v2/T145/KRONOS-8B-V9/4e37c90b-65a8-4b71-bfc2-d63541fb8962.json new file mode 100644 index 000000000..405395324 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V9/4e37c90b-65a8-4b71-bfc2-d63541fb8962.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KRONOS-8B-V9", + "id": "T145/KRONOS-8B-V9", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1986 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3868 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json b/data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json deleted file mode 100644 index 05ef96ba4..000000000 --- a/data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V9/1762652579.8996658", - "retrieved_timestamp": "1762652579.899667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/KRONOS-8B-V9", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7855778224001206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5099211908307056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1986404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751662234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/2e34d74e-1b69-4daf-8bee-77e5357fd439.json b/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/2e34d74e-1b69-4daf-8bee-77e5357fd439.json new file mode 100644 index 000000000..41ff9323c --- /dev/null +++ b/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/2e34d74e-1b69-4daf-8bee-77e5357fd439.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Instruct-Zeus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Instruct-Zeus", + "id": "T145/Llama-3.1-8B-Instruct-Zeus", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5174 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1956 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3976 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json b/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json deleted file mode 100644 index 47f0070e8..000000000 --- a/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Instruct-Zeus/1762652579.899903", - "retrieved_timestamp": "1762652579.899904", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/Llama-3.1-8B-Instruct-Zeus", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/Llama-3.1-8B-Instruct-Zeus", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7941207108250552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173982439996302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39762499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38929521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/Llama-3.1-8B-Zeus/0646e2f7-d2e6-42d3-8f09-f8daee302709.json b/data/hfopenllm_v2/T145/Llama-3.1-8B-Zeus/0646e2f7-d2e6-42d3-8f09-f8daee302709.json new file mode 100644 index 000000000..0756465ea --- /dev/null +++ b/data/hfopenllm_v2/T145/Llama-3.1-8B-Zeus/0646e2f7-d2e6-42d3-8f09-f8daee302709.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Zeus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Zeus", + "id": "T145/Llama-3.1-8B-Zeus", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1332 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json b/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json deleted file mode 100644 index bce57e5a8..000000000 --- a/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_Meta-Llama-3.1-8B-Instruct-TIES/1762652579.900369", - "retrieved_timestamp": "1762652579.900369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/Meta-Llama-3.1-8B-Instruct-TIES", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423542866261519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070111385564763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37799202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/c66b1ff8-9c04-4f9c-b83e-088f31f79590.json b/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/c66b1ff8-9c04-4f9c-b83e-088f31f79590.json new file mode 100644 index 000000000..d2e2124a8 --- /dev/null +++ b/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/c66b1ff8-9c04-4f9c-b83e-088f31f79590.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_Meta-Llama-3.1-8B-Instruct-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3.1-8B-Instruct-TIES", + "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V10/1bd2affc-9970-4149-b52b-51549b1f0029.json b/data/hfopenllm_v2/T145/ZEUS-8B-V10/1bd2affc-9970-4149-b52b-51549b1f0029.json new file mode 100644 index 000000000..0c3eeb4e0 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V10/1bd2affc-9970-4149-b52b-51549b1f0029.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V10", + "id": "T145/ZEUS-8B-V10", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7707 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.527 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3898 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json b/data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json deleted file mode 100644 index f71146fd4..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V10/1762652579.900583", - "retrieved_timestamp": "1762652579.900584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V10", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7706651684197928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269758270442659 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38978124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json b/data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json deleted file mode 100644 index 854b6bede..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V11/1762652579.900793", - "retrieved_timestamp": "1762652579.900793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V11", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V11", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8099575792231279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5161982586505715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38066666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38838098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V11/f0479d74-4684-4b41-a63b-16d7fe0e3290.json b/data/hfopenllm_v2/T145/ZEUS-8B-V11/f0479d74-4684-4b41-a63b-16d7fe0e3290.json new file mode 100644 index 000000000..65e7ba095 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V11/f0479d74-4684-4b41-a63b-16d7fe0e3290.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V11", + "id": "T145/ZEUS-8B-V11", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.81 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5162 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json b/data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json deleted file mode 100644 index f4730f67d..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V12/1762652579.901004", - "retrieved_timestamp": "1762652579.901004", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V12", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V12", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.781556270695089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5253912026310238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912067819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V12/95deb890-a15d-4c71-8151-ed45c3dfb87f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V12/95deb890-a15d-4c71-8151-ed45c3dfb87f.json new file mode 100644 index 000000000..2173c77bb --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V12/95deb890-a15d-4c71-8151-ed45c3dfb87f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V12/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V12", + "id": "T145/ZEUS-8B-V12", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7816 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/1c07fc4c-a773-4e03-bb14-7144e7815c01.json b/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/1c07fc4c-a773-4e03-bb14-7144e7815c01.json new file mode 100644 index 000000000..4f8852135 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/1c07fc4c-a773-4e03-bb14-7144e7815c01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V13-abliterated", + "id": "T145/ZEUS-8B-V13-abliterated", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7878 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.179 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json b/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json deleted file mode 100644 index e70a6cf0b..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13-abliterated/1762652579.901429", - "retrieved_timestamp": "1762652579.9014301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V13-abliterated", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V13-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7877509452696623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5197597316957202 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38721742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json deleted file mode 100644 index 49650d1a4..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13/1762652579.90121", - "retrieved_timestamp": "1762652579.9012108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V13", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V13", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7904238531540756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277128851736589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38447916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39112367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V13/e7e8388e-db3c-4881-b67c-5177c60562b9.json b/data/hfopenllm_v2/T145/ZEUS-8B-V13/e7e8388e-db3c-4881-b67c-5177c60562b9.json new file mode 100644 index 000000000..f1a974e92 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V13/e7e8388e-db3c-4881-b67c-5177c60562b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V13", + "id": "T145/ZEUS-8B-V13", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7904 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json b/data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json deleted file mode 100644 index 401a09c60..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V14/1762652579.901653", - "retrieved_timestamp": "1762652579.901653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V14", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V14", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.770939994769434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274593322517976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913730053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V14/c4923208-2a47-45f2-a74a-4483e4b99bee.json b/data/hfopenllm_v2/T145/ZEUS-8B-V14/c4923208-2a47-45f2-a74a-4483e4b99bee.json new file mode 100644 index 000000000..0b3a25678 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V14/c4923208-2a47-45f2-a74a-4483e4b99bee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V14/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V14", + "id": "T145/ZEUS-8B-V14", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7709 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json b/data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json deleted file mode 100644 index 86a6c0004..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V15/1762652579.901858", - "retrieved_timestamp": "1762652579.901859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V15", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V15", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.701272623306161 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5537552380544757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40591755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V15/b5f06a78-5b57-45a5-93be-4f3c1b36f208.json b/data/hfopenllm_v2/T145/ZEUS-8B-V15/b5f06a78-5b57-45a5-93be-4f3c1b36f208.json new file mode 100644 index 000000000..b2f90345b --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V15/b5f06a78-5b57-45a5-93be-4f3c1b36f208.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V15/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V15", + "id": "T145/ZEUS-8B-V15", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7013 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5538 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2304 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4059 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json b/data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json deleted file mode 100644 index 6a2c42ded..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V16/1762652579.9020631", - "retrieved_timestamp": "1762652579.902064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V16", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V16", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7925471083392066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265817990313368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39261968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V16/835f19d3-515c-4bc4-ab96-5cb5bece45dc.json b/data/hfopenllm_v2/T145/ZEUS-8B-V16/835f19d3-515c-4bc4-ab96-5cb5bece45dc.json new file mode 100644 index 000000000..5181ab5f4 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V16/835f19d3-515c-4bc4-ab96-5cb5bece45dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V16", + "id": "T145/ZEUS-8B-V16", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json deleted file mode 100644 index 930c705fe..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V2/1762652579.902674", - "retrieved_timestamp": "1762652579.902674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17-abliterated-V2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17-abliterated-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6532123654126606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49280119619174295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3407291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34017619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/7dd96382-6fc1-4a39-924b-d9034b5b0839.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/7dd96382-6fc1-4a39-924b-d9034b5b0839.json new file mode 100644 index 000000000..978451253 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/7dd96382-6fc1-4a39-924b-d9034b5b0839.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V17-abliterated-V2", + "id": "T145/ZEUS-8B-V17-abliterated-V2", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4928 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/77a666a2-a9b2-43cc-8e64-67172f4ab6c8.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/77a666a2-a9b2-43cc-8e64-67172f4ab6c8.json new file mode 100644 index 000000000..cee81252f --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/77a666a2-a9b2-43cc-8e64-67172f4ab6c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V17-abliterated-V4", + "id": "T145/ZEUS-8B-V17-abliterated-V4", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7228 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3774 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json deleted file mode 100644 index 959765d83..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V4/1762652579.902891", - "retrieved_timestamp": "1762652579.902891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17-abliterated-V4", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17-abliterated-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7228298691915229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169216944225185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37741023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json deleted file mode 100644 index 60f1096b7..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated/1762652579.902467", - "retrieved_timestamp": "1762652579.9024682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17-abliterated", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.594 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7576009432749549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520041374505222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42692708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36220079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/e3eae267-46ab-4433-a8f3-2a2f8448299b.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/e3eae267-46ab-4433-a8f3-2a2f8448299b.json new file mode 100644 index 000000000..f3ac720d6 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/e3eae267-46ab-4433-a8f3-2a2f8448299b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V17-abliterated", + "id": "T145/ZEUS-8B-V17-abliterated", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.594 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4269 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json deleted file mode 100644 index ac5bb11fe..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17/1762652579.902262", - "retrieved_timestamp": "1762652579.902263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7940708431406447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525086643033107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40162499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39345079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17/e31308c4-8eb2-4a72-8127-18049d58b814.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17/e31308c4-8eb2-4a72-8127-18049d58b814.json new file mode 100644 index 000000000..7b6a3680c --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17/e31308c4-8eb2-4a72-8127-18049d58b814.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V17", + "id": "T145/ZEUS-8B-V17", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5251 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V18/c7098a7a-e865-4ecd-b511-abeb2c0872bd.json b/data/hfopenllm_v2/T145/ZEUS-8B-V18/c7098a7a-e865-4ecd-b511-abeb2c0872bd.json new file mode 100644 index 000000000..4f93f40a0 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V18/c7098a7a-e865-4ecd-b511-abeb2c0872bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V18/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V18", + "id": "T145/ZEUS-8B-V18", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.527 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3942 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json b/data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json deleted file mode 100644 index 25c5138e2..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V18/1762652579.903114", - "retrieved_timestamp": "1762652579.903115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V18", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V18", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834046995305788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269802862530547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40429166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39419880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json b/data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json deleted file mode 100644 index b2b47a3aa..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V19/1762652579.903361", - "retrieved_timestamp": "1762652579.903362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V19", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V19", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7882507302845339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276233222408697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40429166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V19/b3a8c734-e63a-47f7-af2c-a3b6518802fa.json b/data/hfopenllm_v2/T145/ZEUS-8B-V19/b3a8c734-e63a-47f7-af2c-a3b6518802fa.json new file mode 100644 index 000000000..893297bba --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V19/b3a8c734-e63a-47f7-af2c-a3b6518802fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V19/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V19", + "id": "T145/ZEUS-8B-V19", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3934 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/35937965-2791-4f75-8954-5a2280381c91.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/35937965-2791-4f75-8954-5a2280381c91.json new file mode 100644 index 000000000..970e9dcd6 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/35937965-2791-4f75-8954-5a2280381c91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V2-ORPO", + "id": "T145/ZEUS-8B-V2-ORPO", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7187 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json deleted file mode 100644 index b252a7a53..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-ORPO/1762652579.903775", - "retrieved_timestamp": "1762652579.903776", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2-ORPO", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7186830941900824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075246906772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39349999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3677692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/4ab806fe-738d-4f5b-89e4-004134d2f7fe.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/4ab806fe-738d-4f5b-89e4-004134d2f7fe.json new file mode 100644 index 000000000..1e938c379 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/4ab806fe-738d-4f5b-89e4-004134d2f7fe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V2-abliterated", + "id": "T145/ZEUS-8B-V2-abliterated", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7895 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5129 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json deleted file mode 100644 index f280ae158..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-abliterated/1762652579.9039848", - "retrieved_timestamp": "1762652579.903986", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2-abliterated", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7895495064207414 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128868622210663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2/a937e27e-b757-4de7-b679-01ac29d8bb22.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2/a937e27e-b757-4de7-b679-01ac29d8bb22.json new file mode 100644 index 000000000..d403098af --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2/a937e27e-b757-4de7-b679-01ac29d8bb22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V2", + "id": "T145/ZEUS-8B-V2", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json deleted file mode 100644 index b8908b54b..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2/1762652579.9035678", - "retrieved_timestamp": "1762652579.903569", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8029384255996312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194405455747161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json b/data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json deleted file mode 100644 index 5bcad29bc..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V20/1762652579.904202", - "retrieved_timestamp": "1762652579.904203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V20", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V20", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7955945779420825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244005058415827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40432291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V20/1d906aab-33a6-4ffe-8a63-694482d83d09.json b/data/hfopenllm_v2/T145/ZEUS-8B-V20/1d906aab-33a6-4ffe-8a63-694482d83d09.json new file mode 100644 index 000000000..269fa1ea5 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V20/1d906aab-33a6-4ffe-8a63-694482d83d09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V20/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V20", + "id": "T145/ZEUS-8B-V20", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7956 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json b/data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json deleted file mode 100644 index cc6c997ba..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V21/1762652579.904516", - "retrieved_timestamp": "1762652579.904516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V21", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V21", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3785145635801894 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33975753940458464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17137632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V21/9e101298-6482-4ae8-83e4-b948ba8fa550.json b/data/hfopenllm_v2/T145/ZEUS-8B-V21/9e101298-6482-4ae8-83e4-b948ba8fa550.json new file mode 100644 index 000000000..55ec61818 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V21/9e101298-6482-4ae8-83e4-b948ba8fa550.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V21/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V21", + "id": "T145/ZEUS-8B-V21", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1714 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V22/3818710d-80a9-4e7d-90e3-f06afffb71ac.json b/data/hfopenllm_v2/T145/ZEUS-8B-V22/3818710d-80a9-4e7d-90e3-f06afffb71ac.json new file mode 100644 index 000000000..35e2f1e24 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V22/3818710d-80a9-4e7d-90e3-f06afffb71ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V22/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V22", + "id": "T145/ZEUS-8B-V22", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7995 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5245 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.399 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json b/data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json deleted file mode 100644 index 30b92ad7e..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V22/1762652579.9047282", - "retrieved_timestamp": "1762652579.9047291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V22", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V22", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7995163942782927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244915522507715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3937832446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V23/a18ec0c4-6f3f-4904-b69c-e40770df169e.json b/data/hfopenllm_v2/T145/ZEUS-8B-V23/a18ec0c4-6f3f-4904-b69c-e40770df169e.json new file mode 100644 index 000000000..9423e9265 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V23/a18ec0c4-6f3f-4904-b69c-e40770df169e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V23/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V23", + "id": "T145/ZEUS-8B-V23", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5195 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json b/data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json deleted file mode 100644 index 0b22e53e3..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V23/1762652579.904932", - "retrieved_timestamp": "1762652579.9049332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V23", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V23", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7621222799948582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519500470668349 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json b/data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json deleted file mode 100644 index 236344287..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V24/1762652579.905136", - "retrieved_timestamp": "1762652579.9051368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V24", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V24", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5999813827311533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4777962576721959 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3729166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32845744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V24/529c2bd4-6b8e-4e3c-8737-c0b794444d13.json b/data/hfopenllm_v2/T145/ZEUS-8B-V24/529c2bd4-6b8e-4e3c-8737-c0b794444d13.json new file mode 100644 index 000000000..cebba5e48 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V24/529c2bd4-6b8e-4e3c-8737-c0b794444d13.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V24", + "id": "T145/ZEUS-8B-V24", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4778 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3729 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json deleted file mode 100644 index 9d3bbe405..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V25/1762652579.905337", - "retrieved_timestamp": "1762652579.905338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V25", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V25", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33202790817253774 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4546907005207668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3488229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2884807180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V25/9e994362-a1d1-48f7-9db1-dd9d532b9f35.json b/data/hfopenllm_v2/T145/ZEUS-8B-V25/9e994362-a1d1-48f7-9db1-dd9d532b9f35.json new file mode 100644 index 000000000..7c03079a9 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V25/9e994362-a1d1-48f7-9db1-dd9d532b9f35.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V25/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V25", + "id": "T145/ZEUS-8B-V25", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4547 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2885 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json b/data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json deleted file mode 100644 index d758279e0..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V26/1762652579.905539", - "retrieved_timestamp": "1762652579.905539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V26", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V26", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6707979272774018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5231548583920674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40162499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39070811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V26/cf35b7db-f675-4362-8916-36b0582b64f4.json b/data/hfopenllm_v2/T145/ZEUS-8B-V26/cf35b7db-f675-4362-8916-36b0582b64f4.json new file mode 100644 index 000000000..1c35fccff --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V26/cf35b7db-f675-4362-8916-36b0582b64f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V26/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V26", + "id": "T145/ZEUS-8B-V26", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6708 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5232 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3907 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V27/79ee7e34-36cd-4024-8978-86c1b059ae5f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V27/79ee7e34-36cd-4024-8978-86c1b059ae5f.json new file mode 100644 index 000000000..6b17c7f6a --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V27/79ee7e34-36cd-4024-8978-86c1b059ae5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V27/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V27", + "id": "T145/ZEUS-8B-V27", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6544 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json b/data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json deleted file mode 100644 index 10214a5c5..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V27/1762652579.9057322", - "retrieved_timestamp": "1762652579.905733", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V27", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V27", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654361538495636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52303129292911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V28/9ec4fb99-ed4d-416e-9342-0c036aadd35d.json b/data/hfopenllm_v2/T145/ZEUS-8B-V28/9ec4fb99-ed4d-416e-9342-0c036aadd35d.json new file mode 100644 index 000000000..1ebb0754d --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V28/9ec4fb99-ed4d-416e-9342-0c036aadd35d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V28/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V28", + "id": "T145/ZEUS-8B-V28", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6353 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json b/data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json deleted file mode 100644 index b67c86ead..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V28/1762652579.905931", - "retrieved_timestamp": "1762652579.905931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V28", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V28", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635252241829457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5254256199968339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38962499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V29/8788e4fa-04c5-4f7c-bb4e-523287901f71.json b/data/hfopenllm_v2/T145/ZEUS-8B-V29/8788e4fa-04c5-4f7c-bb4e-523287901f71.json new file mode 100644 index 000000000..a0483b34c --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V29/8788e4fa-04c5-4f7c-bb4e-523287901f71.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V29/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V29", + "id": "T145/ZEUS-8B-V29", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json b/data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json deleted file mode 100644 index 69f6ff82a..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V29/1762652579.906123", - "retrieved_timestamp": "1762652579.906123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V29", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V29", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7417640748768822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5253330901112457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4002604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json deleted file mode 100644 index b87bc7475..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L1/1762652579.906316", - "retrieved_timestamp": "1762652579.906317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2L1", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2L1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191886416929303 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013485375260267 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38819791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36377992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/18097bf4-5149-40e9-9850-558c3f143ed8.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/18097bf4-5149-40e9-9850-558c3f143ed8.json new file mode 100644 index 000000000..e6c750d51 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/18097bf4-5149-40e9-9850-558c3f143ed8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V2L1", + "id": "T145/ZEUS-8B-V2L1", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3192 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3638 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json deleted file mode 100644 index 866cf80e8..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L2/1762652579.9065118", - "retrieved_timestamp": "1762652579.906513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2L2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2L2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8020640788662969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5202843665402132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39746875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38838098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/b5942721-5c30-4c49-a6e1-fb5419539652.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/b5942721-5c30-4c49-a6e1-fb5419539652.json new file mode 100644 index 000000000..d9184ab9f --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/b5942721-5c30-4c49-a6e1-fb5419539652.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V2L2", + "id": "T145/ZEUS-8B-V2L2", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json b/data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json deleted file mode 100644 index 38af1697f..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V3/1762652579.906709", - "retrieved_timestamp": "1762652579.90671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V3", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7886751596874072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265064133535374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4016875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V3/76d27de3-0309-4e4b-8d0d-0e402bde0a31.json b/data/hfopenllm_v2/T145/ZEUS-8B-V3/76d27de3-0309-4e4b-8d0d-0e402bde0a31.json new file mode 100644 index 000000000..5781ec510 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V3/76d27de3-0309-4e4b-8d0d-0e402bde0a31.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V3", + "id": "T145/ZEUS-8B-V3", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7887 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1677 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V30/5c0553ff-4910-45a9-aa8d-3a76af098403.json b/data/hfopenllm_v2/T145/ZEUS-8B-V30/5c0553ff-4910-45a9-aa8d-3a76af098403.json new file mode 100644 index 000000000..cf4cd7678 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V30/5c0553ff-4910-45a9-aa8d-3a76af098403.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V30/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V30", + "id": "T145/ZEUS-8B-V30", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3944 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json b/data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json deleted file mode 100644 index 65ebdaf89..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V30/1762652579.907134", - "retrieved_timestamp": "1762652579.907138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V30", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V30", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7435626360279614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243248855841048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4029270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3943650265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json b/data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json deleted file mode 100644 index 9694845b9..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V4/1762652579.9075332", - "retrieved_timestamp": "1762652579.907535", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V4", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807317916461656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245974297200655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37882313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V4/fd97d1d9-a1b5-429d-b73d-1ea92ae1d61c.json b/data/hfopenllm_v2/T145/ZEUS-8B-V4/fd97d1d9-a1b5-429d-b73d-1ea92ae1d61c.json new file mode 100644 index 000000000..b4583aa87 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V4/fd97d1d9-a1b5-429d-b73d-1ea92ae1d61c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V4", + "id": "T145/ZEUS-8B-V4", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7807 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3788 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json b/data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json deleted file mode 100644 index 0767225a2..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V6/1762652579.9077919", - "retrieved_timestamp": "1762652579.9077928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V6", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7837792612490415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5239561762634447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37591422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V6/f77aa103-5a09-409c-ad72-7992b6049f94.json b/data/hfopenllm_v2/T145/ZEUS-8B-V6/f77aa103-5a09-409c-ad72-7992b6049f94.json new file mode 100644 index 000000000..0d6b104c5 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V6/f77aa103-5a09-409c-ad72-7992b6049f94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V6", + "id": "T145/ZEUS-8B-V6", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7838 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3759 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V7/0afdaa1d-c1e7-4283-a2b3-f459c09df4a9.json b/data/hfopenllm_v2/T145/ZEUS-8B-V7/0afdaa1d-c1e7-4283-a2b3-f459c09df4a9.json new file mode 100644 index 000000000..4c23b2855 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V7/0afdaa1d-c1e7-4283-a2b3-f459c09df4a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V7", + "id": "T145/ZEUS-8B-V7", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json b/data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json deleted file mode 100644 index 5ccdfac25..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V7/1762652579.908076", - "retrieved_timestamp": "1762652579.908077", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V7", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7786085364610345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070394117180643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41616666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V8/044ed79b-0c54-4a7a-94ba-a3f999adeb0d.json b/data/hfopenllm_v2/T145/ZEUS-8B-V8/044ed79b-0c54-4a7a-94ba-a3f999adeb0d.json new file mode 100644 index 000000000..dac50a4bc --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V8/044ed79b-0c54-4a7a-94ba-a3f999adeb0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V8", + "id": "T145/ZEUS-8B-V8", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7914 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5065 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json b/data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json deleted file mode 100644 index 377ff03f1..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V8/1762652579.908298", - "retrieved_timestamp": "1762652579.908299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V8", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V8", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7913979352562313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064510419864701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V9/ac6b884d-62ea-4ff5-8eee-cfce08869030.json b/data/hfopenllm_v2/T145/ZEUS-8B-V9/ac6b884d-62ea-4ff5-8eee-cfce08869030.json new file mode 100644 index 000000000..73c3bb0b4 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V9/ac6b884d-62ea-4ff5-8eee-cfce08869030.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZEUS-8B-V9", + "id": "T145/ZEUS-8B-V9", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5551 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2137 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3949 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json b/data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json deleted file mode 100644 index 041a23980..000000000 --- a/data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V9/1762652579.908509", - "retrieved_timestamp": "1762652579.90851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/ZEUS-8B-V9", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551436854213487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207256346477752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39012632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/T145/qwen-2.5-3B-merge-test/8ffa696e-adef-4808-ba0e-bb04921a433d.json b/data/hfopenllm_v2/T145/qwen-2.5-3B-merge-test/8ffa696e-adef-4808-ba0e-bb04921a433d.json new file mode 100644 index 000000000..2f5ff3f18 --- /dev/null +++ b/data/hfopenllm_v2/T145/qwen-2.5-3B-merge-test/8ffa696e-adef-4808-ba0e-bb04921a433d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/T145_qwen-2.5-3B-merge-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-2.5-3B-merge-test", + "id": "T145/qwen-2.5-3B-merge-test", + "developer": "T145", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4842 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3202 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json deleted file mode 100644 index e3f1ac599..000000000 --- a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m-hf/1762652579.9096901", - "retrieved_timestamp": "1762652579.9096909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat-1m-hf", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat-1m-hf", - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.484 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5341106043076814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900953106836365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36888541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18143284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/8a2cfa62-5f13-447e-8d0f-2503e4962ac5.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/8a2cfa62-5f13-447e-8d0f-2503e4962ac5.json new file mode 100644 index 000000000..2a4ac4794 --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/8a2cfa62-5f13-447e-8d0f-2503e4962ac5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "glm-4-9b-chat-1m-hf", + "id": "THUDM/glm-4-9b-chat-1m-hf", + "developer": "THUDM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.484 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1814 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/4f24fc46-3686-41fa-bf25-a0e39b252cc9.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/4f24fc46-3686-41fa-bf25-a0e39b252cc9.json new file mode 100644 index 000000000..cbe32d26e --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/4f24fc46-3686-41fa-bf25-a0e39b252cc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "glm-4-9b-chat-1m", + "id": "THUDM/glm-4-9b-chat-1m", + "developer": "THUDM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "ChatGLMModel", + "params_billions": 9.484 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json deleted file mode 100644 index eeaef7c4c..000000000 --- a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m/1762652579.909478", - "retrieved_timestamp": "1762652579.909479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat-1m", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat-1m", - "additional_details": { - "precision": "bfloat16", - "architecture": "ChatGLMModel", - "params_billions": 9.484 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41800578218330303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31632313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json deleted file mode 100644 index c8f948390..000000000 --- a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-hf/1762652579.909895", - "retrieved_timestamp": "1762652579.909896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat-hf", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat-hf", - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.4 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6513140688927601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432308604245425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35930208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27742686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/b1375cb4-b0d5-4cb4-ad43-394ebd1a481f.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/b1375cb4-b0d5-4cb4-ad43-394ebd1a481f.json new file mode 100644 index 000000000..f54072bac --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/b1375cb4-b0d5-4cb4-ad43-394ebd1a481f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "glm-4-9b-chat-hf", + "id": "THUDM/glm-4-9b-chat-hf", + "developer": "THUDM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.4 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6513 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2774 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat/4ce062da-acfc-4684-95c2-679cbe5a697b.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat/4ce062da-acfc-4684-95c2-679cbe5a697b.json new file mode 100644 index 000000000..01dceb960 --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat/4ce062da-acfc-4684-95c2-679cbe5a697b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "glm-4-9b-chat", + "id": "THUDM/glm-4-9b-chat", + "developer": "THUDM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "ChatGLMModelM", + "params_billions": 9.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json deleted file mode 100644 index 4866525c0..000000000 --- a/data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat/1762652579.909267", - "retrieved_timestamp": "1762652579.909267", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "ChatGLMModelM", - "params_billions": 9.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47363884291035735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b/3d785765-befa-4e53-8672-769f7bb87dcd.json b/data/hfopenllm_v2/THUDM/glm-4-9b/3d785765-befa-4e53-8672-769f7bb87dcd.json new file mode 100644 index 000000000..f7c95a078 --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b/3d785765-befa-4e53-8672-769f7bb87dcd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "glm-4-9b", + "id": "THUDM/glm-4-9b", + "developer": "THUDM", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "ChatGLMModelM", + "params_billions": 9.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1426 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5528 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json b/data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json deleted file mode 100644 index 10ff460f0..000000000 --- a/data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b/1762652579.9090161", - "retrieved_timestamp": "1762652579.9090161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "THUDM/glm-4-9b", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "ChatGLMModelM", - "params_billions": 9.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1426082793654171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5528368141665274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4385833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/ab0d3a24-19db-4d00-892e-bcb7c0f2f30f.json b/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/ab0d3a24-19db-4d00-892e-bcb7c0f2f30f.json new file mode 100644 index 000000000..a74035d6b --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/ab0d3a24-19db-4d00-892e-bcb7c0f2f30f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCodeRM-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceCodeRM-7B", + "id": "TIGER-Lab/AceCodeRM-7B", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalRM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5855 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4773 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4192 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json b/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json deleted file mode 100644 index 6ff2b07a1..000000000 --- a/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCodeRM-7B/1762652579.9101062", - "retrieved_timestamp": "1762652579.910107", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TIGER-Lab/AceCodeRM-7B", - "developer": "TIGER-Lab", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCodeRM-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalRM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5854931581536988 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773230085351336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41920833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3361037234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/31f0b186-1805-42ff-86cf-d8455a66d538.json b/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/31f0b186-1805-42ff-86cf-d8455a66d538.json new file mode 100644 index 000000000..0ea92826f --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/31f0b186-1805-42ff-86cf-d8455a66d538.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceCoder-Qwen2.5-7B-Ins-Rule", + "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4322 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/ed6b3e7e-d294-420d-b9b9-460a52cd0239.json b/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/ed6b3e7e-d294-420d-b9b9-460a52cd0239.json new file mode 100644 index 000000000..60cf1a5b7 --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/ed6b3e7e-d294-420d-b9b9-460a52cd0239.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceCoder-Qwen2.5-Coder-7B-Base-Rule", + "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3745 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/91dec0c0-9854-4790-a0a5-e17d19636f17.json b/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/91dec0c0-9854-4790-a0a5-e17d19636f17.json new file mode 100644 index 000000000..e2e2f2697 --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/91dec0c0-9854-4790-a0a5-e17d19636f17.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceCoder-Qwen2.5-Coder-7B-Ins-Rule", + "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6222 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5089 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4046 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/599616fb-26c1-47e3-a98b-9ad922a95c08.json b/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/599616fb-26c1-47e3-a98b-9ad922a95c08.json new file mode 100644 index 000000000..0a1e9887a --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/599616fb-26c1-47e3-a98b-9ad922a95c08.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_MAmmoTH2-7B-Plus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MAmmoTH2-7B-Plus", + "id": "TIGER-Lab/MAmmoTH2-7B-Plus", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json b/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json deleted file mode 100644 index b8fdebe05..000000000 --- a/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_MAmmoTH2-7B-Plus/1762652579.9110248", - "retrieved_timestamp": "1762652579.911026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TIGER-Lab/MAmmoTH2-7B-Plus", - "developer": "TIGER-Lab", - "inference_platform": "unknown", - "id": "TIGER-Lab/MAmmoTH2-7B-Plus", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574664113441224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42346949888019064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30169547872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/TIGER-Lab/Qwen2.5-Math-7B-CFT/aeee4365-c34d-46b9-8c98-29976010bb62.json b/data/hfopenllm_v2/TIGER-Lab/Qwen2.5-Math-7B-CFT/aeee4365-c34d-46b9-8c98-29976010bb62.json new file mode 100644 index 000000000..92ab6326a --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/Qwen2.5-Math-7B-CFT/aeee4365-c34d-46b9-8c98-29976010bb62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_Qwen2.5-Math-7B-CFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-7B-CFT", + "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3887 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/1ec68708-94c9-4561-bb99-7f211d7a9950.json b/data/hfopenllm_v2/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/1ec68708-94c9-4561-bb99-7f211d7a9950.json new file mode 100644 index 000000000..f4ed2d887 --- /dev/null +++ b/data/hfopenllm_v2/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/1ec68708-94c9-4561-bb99-7f211d7a9950.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SimPO2-5e-7", + "id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", + "developer": "TTTXXX01", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tarek07/Progenitor-V1.1-LLaMa-70B/0b53e7b4-0e91-40a2-911b-cd0d415e9fad.json b/data/hfopenllm_v2/Tarek07/Progenitor-V1.1-LLaMa-70B/0b53e7b4-0e91-40a2-911b-cd0d415e9fad.json new file mode 100644 index 000000000..80dd34f48 --- /dev/null +++ b/data/hfopenllm_v2/Tarek07/Progenitor-V1.1-LLaMa-70B/0b53e7b4-0e91-40a2-911b-cd0d415e9fad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tarek07_Progenitor-V1.1-LLaMa-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Progenitor-V1.1-LLaMa-70B", + "id": "Tarek07/Progenitor-V1.1-LLaMa-70B", + "developer": "Tarek07", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6906 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6971 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3573 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tarek07/Thalassic-Alpha-LLaMa-70B/91bcd646-fe3d-458b-a426-a6a8863d69a0.json b/data/hfopenllm_v2/Tarek07/Thalassic-Alpha-LLaMa-70B/91bcd646-fe3d-458b-a426-a6a8863d69a0.json new file mode 100644 index 000000000..f5ea7f4d8 --- /dev/null +++ b/data/hfopenllm_v2/Tarek07/Thalassic-Alpha-LLaMa-70B/91bcd646-fe3d-458b-a426-a6a8863d69a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tarek07_Thalassic-Alpha-LLaMa-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Thalassic-Alpha-LLaMa-70B", + "id": "Tarek07/Thalassic-Alpha-LLaMa-70B", + "developer": "Tarek07", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7003 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.694 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4438 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4802 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json b/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json deleted file mode 100644 index 9dec4e787..000000000 --- a/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TeeZee_DoubleBagel-57B-v1.0/1762652579.9121659", - "retrieved_timestamp": "1762652579.9121659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TeeZee/DoubleBagel-57B-v1.0", - "developer": "TeeZee", - "inference_platform": "unknown", - "id": "TeeZee/DoubleBagel-57B-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 56.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23363342597640924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.325078559362514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43148958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14777260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/2e0458cc-e092-4770-bd80-00dff169d754.json b/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/2e0458cc-e092-4770-bd80-00dff169d754.json new file mode 100644 index 000000000..b1e13e640 --- /dev/null +++ b/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/2e0458cc-e092-4770-bd80-00dff169d754.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TeeZee_DoubleBagel-57B-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoubleBagel-57B-v1.0", + "id": "TeeZee/DoubleBagel-57B-v1.0", + "developer": "TeeZee", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 56.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3251 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1478 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/d56ef415-0edf-4fde-8277-ae44b4bb4ed2.json b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/d56ef415-0edf-4fde-8277-ae44b4bb4ed2.json new file mode 100644 index 000000000..2039e5d97 --- /dev/null +++ b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/d56ef415-0edf-4fde-8277-ae44b4bb4ed2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Indic-gemma-2b-finetuned-sft-Navarasa-2.0", + "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", + "developer": "Telugu-LLM-Labs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2103 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json deleted file mode 100644 index 067ca004b..000000000 --- a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/1762652579.912417", - "retrieved_timestamp": "1762652579.912417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", - "developer": "Telugu-LLM-Labs", - "inference_platform": "unknown", - "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21030310686755588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240881373468133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3899375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12790890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json deleted file mode 100644 index 3b1ad6bf1..000000000 --- a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/1762652579.912673", - "retrieved_timestamp": "1762652579.912673", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", - "developer": "Telugu-LLM-Labs", - "inference_platform": "unknown", - "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32368449048524583 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40229948924733394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40832291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23503989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/a0a1beb8-ee9a-4e88-b939-6e0104ed76a7.json b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/a0a1beb8-ee9a-4e88-b939-6e0104ed76a7.json new file mode 100644 index 000000000..a9a9aa606 --- /dev/null +++ b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/a0a1beb8-ee9a-4e88-b939-6e0104ed76a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Indic-gemma-7b-finetuned-sft-Navarasa-2.0", + "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", + "developer": "Telugu-LLM-Labs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3237 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4023 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4083 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json deleted file mode 100644 index 0cf475fc4..000000000 --- a/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B-Instruct/1762652579.9131231", - "retrieved_timestamp": "1762652579.913124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TencentARC/LLaMA-Pro-8B-Instruct", - "developer": "TencentARC", - "inference_platform": "unknown", - "id": "TencentARC/LLaMA-Pro-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.357 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486063644463357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224205282459997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41902083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/f9b7c3ee-ea8b-42f0-a55a-6171d4e3d0ea.json b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/f9b7c3ee-ea8b-42f0-a55a-6171d4e3d0ea.json new file mode 100644 index 000000000..2c3b0edf0 --- /dev/null +++ b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/f9b7c3ee-ea8b-42f0-a55a-6171d4e3d0ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMA-Pro-8B-Instruct", + "id": "TencentARC/LLaMA-Pro-8B-Instruct", + "developer": "TencentARC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.357 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.419 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B/2c8c6c6a-ce95-4d11-a33a-d547859fee11.json b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B/2c8c6c6a-ce95-4d11-a33a-d547859fee11.json new file mode 100644 index 000000000..5af85f4e1 --- /dev/null +++ b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B/2c8c6c6a-ce95-4d11-a33a-d547859fee11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMA-Pro-8B", + "id": "TencentARC/LLaMA-Pro-8B", + "developer": "TencentARC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.357 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1811 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TencentARC/MetaMath-Mistral-Pro/47858744-3378-4ed4-9101-8acbc3a53cda.json b/data/hfopenllm_v2/TencentARC/MetaMath-Mistral-Pro/47858744-3378-4ed4-9101-8acbc3a53cda.json new file mode 100644 index 000000000..666b6ee5b --- /dev/null +++ b/data/hfopenllm_v2/TencentARC/MetaMath-Mistral-Pro/47858744-3378-4ed4-9101-8acbc3a53cda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TencentARC_MetaMath-Mistral-Pro/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MetaMath-Mistral-Pro", + "id": "TencentARC/MetaMath-Mistral-Pro", + "developer": "TencentARC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TencentARC/Mistral_Pro_8B_v0.1/2aaeaaa7-89ed-4666-b0a5-8c1320ec4ec5.json b/data/hfopenllm_v2/TencentARC/Mistral_Pro_8B_v0.1/2aaeaaa7-89ed-4666-b0a5-8c1320ec4ec5.json new file mode 100644 index 000000000..4307e8288 --- /dev/null +++ b/data/hfopenllm_v2/TencentARC/Mistral_Pro_8B_v0.1/2aaeaaa7-89ed-4666-b0a5-8c1320ec4ec5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TencentARC_Mistral_Pro_8B_v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral_Pro_8B_v0.1", + "id": "TencentARC/Mistral_Pro_8B_v0.1", + "developer": "TencentARC", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2765 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/23ae6a72-5a1f-4961-8662-feb4d8ad8a26.json b/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/23ae6a72-5a1f-4961-8662-feb4d8ad8a26.json new file mode 100644 index 000000000..66b44eb6d --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/23ae6a72-5a1f-4961-8662-feb4d8ad8a26.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Cydonia-22B-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cydonia-22B-v1.2", + "id": "TheDrummer/Cydonia-22B-v1.2", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5635 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5809 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json b/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json deleted file mode 100644 index c2cde1a09..000000000 --- a/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Cydonia-22B-v1.2/1762652579.9138188", - "retrieved_timestamp": "1762652579.9138198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Cydonia-22B-v1.2", - "developer": "TheDrummer", - "inference_platform": "unknown", - "id": "TheDrummer/Cydonia-22B-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635114828654637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.580856074392761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40217708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheDrummer/Gemmasutra-9B-v1/312ec315-6175-4f99-8741-97d97eb26b47.json b/data/hfopenllm_v2/TheDrummer/Gemmasutra-9B-v1/312ec315-6175-4f99-8741-97d97eb26b47.json new file mode 100644 index 000000000..d22fd945e --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Gemmasutra-9B-v1/312ec315-6175-4f99-8741-97d97eb26b47.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-9B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemmasutra-9B-v1", + "id": "TheDrummer/Gemmasutra-9B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5887 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Gemmasutra-Mini-2B-v1/7869bbe3-fd17-4e6d-9546-94d3df5e83ef.json b/data/hfopenllm_v2/TheDrummer/Gemmasutra-Mini-2B-v1/7869bbe3-fd17-4e6d-9546-94d3df5e83ef.json new file mode 100644 index 000000000..ebce47bf5 --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Gemmasutra-Mini-2B-v1/7869bbe3-fd17-4e6d-9546-94d3df5e83ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-Mini-2B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemmasutra-Mini-2B-v1", + "id": "TheDrummer/Gemmasutra-Mini-2B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2055 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Llama-3SOME-8B-v2/68c9fb85-f90e-442f-aa96-458dabe30b39.json b/data/hfopenllm_v2/TheDrummer/Llama-3SOME-8B-v2/68c9fb85-f90e-442f-aa96-458dabe30b39.json new file mode 100644 index 000000000..4ed973176 --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Llama-3SOME-8B-v2/68c9fb85-f90e-442f-aa96-458dabe30b39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Llama-3SOME-8B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3SOME-8B-v2", + "id": "TheDrummer/Llama-3SOME-8B-v2", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3833 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json b/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json deleted file mode 100644 index 732cbdcd5..000000000 --- a/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Ministrations-8B-v1/1762652579.9148722", - "retrieved_timestamp": "1762652579.9148731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Ministrations-8B-v1", - "developer": "TheDrummer", - "inference_platform": "unknown", - "id": "TheDrummer/Ministrations-8B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.02 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28219346888478125 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48766312602251366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18429003021148035 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36436170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/6891d1dd-0e1a-42e8-9206-64a4c71854f9.json b/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/6891d1dd-0e1a-42e8-9206-64a4c71854f9.json new file mode 100644 index 000000000..f103c0ed1 --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/6891d1dd-0e1a-42e8-9206-64a4c71854f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Ministrations-8B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ministrations-8B-v1", + "id": "TheDrummer/Ministrations-8B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.02 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2822 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1843 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/c62eb6b3-2a3d-45bd-acdf-bad717e51766.json b/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/c62eb6b3-2a3d-45bd-acdf-bad717e51766.json new file mode 100644 index 000000000..f5272f067 --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/c62eb6b3-2a3d-45bd-acdf-bad717e51766.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Rocinante-12B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rocinante-12B-v1", + "id": "TheDrummer/Rocinante-12B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5065 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3477 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json b/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json deleted file mode 100644 index 453172fb4..000000000 --- a/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Rocinante-12B-v1/1762652579.915099", - "retrieved_timestamp": "1762652579.9150999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Rocinante-12B-v1", - "developer": "TheDrummer", - "inference_platform": "unknown", - "id": "TheDrummer/Rocinante-12B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076499244227538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065452085797449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40171874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34773936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v1/55d4a6ae-44e5-4a1b-9509-299fbc6c3a36.json b/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v1/55d4a6ae-44e5-4a1b-9509-299fbc6c3a36.json new file mode 100644 index 000000000..d5d96478e --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v1/55d4a6ae-44e5-4a1b-9509-299fbc6c3a36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tiger-Gemma-9B-v1", + "id": "TheDrummer/Tiger-Gemma-9B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7282 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5704 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v2/227e3e19-29d6-414f-b538-9f6f89d47677.json b/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v2/227e3e19-29d6-414f-b538-9f6f89d47677.json new file mode 100644 index 000000000..8fccf8d00 --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v2/227e3e19-29d6-414f-b538-9f6f89d47677.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tiger-Gemma-9B-v2", + "id": "TheDrummer/Tiger-Gemma-9B-v2", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v3/e922ac2c-e8d0-48f2-99fc-da70c925136c.json b/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v3/e922ac2c-e8d0-48f2-99fc-da70c925136c.json new file mode 100644 index 000000000..4a7777424 --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Tiger-Gemma-9B-v3/e922ac2c-e8d0-48f2-99fc-da70c925136c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tiger-Gemma-9B-v3", + "id": "TheDrummer/Tiger-Gemma-9B-v3", + "developer": "TheDrummer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5812 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4059 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json b/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json deleted file mode 100644 index 3839d3efc..000000000 --- a/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Daughter-of-Rhodia-12B/1762652579.91594", - "retrieved_timestamp": "1762652579.9159412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrunkenSnail/Daughter-of-Rhodia-12B", - "developer": "TheDrunkenSnail", - "inference_platform": "unknown", - "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6903815210308648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5179174184876773 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43477083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/59f93c1c-3712-4ee2-a3d2-999e5acc2ee5.json b/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/59f93c1c-3712-4ee2-a3d2-999e5acc2ee5.json new file mode 100644 index 000000000..c592a549e --- /dev/null +++ b/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/59f93c1c-3712-4ee2-a3d2-999e5acc2ee5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Daughter-of-Rhodia-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daughter-of-Rhodia-12B", + "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B", + "developer": "TheDrunkenSnail", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6904 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5179 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4348 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json b/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json deleted file mode 100644 index 4023e628e..000000000 --- a/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Mother-of-Rhodia-12B/1762652579.9161909", - "retrieved_timestamp": "1762652579.9161909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrunkenSnail/Mother-of-Rhodia-12B", - "developer": "TheDrunkenSnail", - "inference_platform": "unknown", - "id": "TheDrunkenSnail/Mother-of-Rhodia-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504895898438365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49479138664574934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41241666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35513630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/a98dcf1e-6abb-402b-9e0c-da7c23b74bde.json b/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/a98dcf1e-6abb-402b-9e0c-da7c23b74bde.json new file mode 100644 index 000000000..e5301b44e --- /dev/null +++ b/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/a98dcf1e-6abb-402b-9e0c-da7c23b74bde.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Mother-of-Rhodia-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mother-of-Rhodia-12B", + "id": "TheDrunkenSnail/Mother-of-Rhodia-12B", + "developer": "TheDrunkenSnail", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4948 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json b/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json deleted file mode 100644 index 41fc22c15..000000000 --- a/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Son-of-Rhodia/1762652579.916397", - "retrieved_timestamp": "1762652579.916397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrunkenSnail/Son-of-Rhodia", - "developer": "TheDrunkenSnail", - "inference_platform": "unknown", - "id": "TheDrunkenSnail/Son-of-Rhodia", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7046447869430887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097327647725524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3607878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/a889f561-0d8a-4345-9131-0a897ec215ac.json b/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/a889f561-0d8a-4345-9131-0a897ec215ac.json new file mode 100644 index 000000000..b5fd619fc --- /dev/null +++ b/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/a889f561-0d8a-4345-9131-0a897ec215ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Son-of-Rhodia/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Son-of-Rhodia", + "id": "TheDrunkenSnail/Son-of-Rhodia", + "developer": "TheDrunkenSnail", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3608 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/6402facc-6258-43a4-a0fd-78e21765c504.json b/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/6402facc-6258-43a4-a0fd-78e21765c504.json new file mode 100644 index 000000000..53650d960 --- /dev/null +++ b/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/6402facc-6258-43a4-a0fd-78e21765c504.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheHierophant_Underground-Cognitive-V0.3-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Underground-Cognitive-V0.3-test", + "id": "TheHierophant/Underground-Cognitive-V0.3-test", + "developer": "TheHierophant", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3318 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json b/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json deleted file mode 100644 index 2f0f5f167..000000000 --- a/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheHierophant_Underground-Cognitive-V0.3-test/1762652579.916598", - "retrieved_timestamp": "1762652579.916598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheHierophant/Underground-Cognitive-V0.3-test", - "developer": "TheHierophant", - "inference_platform": "unknown", - "id": "TheHierophant/Underground-Cognitive-V0.3-test", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808297539417634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290131900998047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43511458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.331781914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/29fbd2e0-e08a-48f4-905e-d2aa54886915.json b/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/29fbd2e0-e08a-48f4-905e-d2aa54886915.json new file mode 100644 index 000000000..2f11b0d16 --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/29fbd2e0-e08a-48f4-905e-d2aa54886915.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_nemo-carpmuscle-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nemo-carpmuscle-v0.1", + "id": "TheTsar1209/nemo-carpmuscle-v0.1", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2276 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json b/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json deleted file mode 100644 index df7d65e2d..000000000 --- a/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_nemo-carpmuscle-v0.1/1762652579.9168499", - "retrieved_timestamp": "1762652579.916851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/nemo-carpmuscle-v0.1", - "developer": "TheTsar1209", - "inference_platform": "unknown", - "id": "TheTsar1209/nemo-carpmuscle-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2275639746982451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083529697101391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4135 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3405917553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-r-v0.3/313e0379-d3ea-4f5a-8e06-4b0a94317487.json b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-r-v0.3/313e0379-d3ea-4f5a-8e06-4b0a94317487.json new file mode 100644 index 000000000..cd556b29c --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-r-v0.3/313e0379-d3ea-4f5a-8e06-4b0a94317487.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-r-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-carpmuscle-r-v0.3", + "id": "TheTsar1209/qwen-carpmuscle-r-v0.3", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6227 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4278 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.1/f326fbd0-5f92-4324-a587-1f08cf7da208.json b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.1/f326fbd0-5f92-4324-a587-1f08cf7da208.json new file mode 100644 index 000000000..ee6712ad0 --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.1/f326fbd0-5f92-4324-a587-1f08cf7da208.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-carpmuscle-v0.1", + "id": "TheTsar1209/qwen-carpmuscle-v0.1", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5622 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6434 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2628 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.2/d61310e9-5267-4a87-8e24-ae25172cd64e.json b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.2/d61310e9-5267-4a87-8e24-ae25172cd64e.json new file mode 100644 index 000000000..13326c754 --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.2/d61310e9-5267-4a87-8e24-ae25172cd64e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-carpmuscle-v0.2", + "id": "TheTsar1209/qwen-carpmuscle-v0.2", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5257 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6387 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2832 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.3/60953e5e-523d-43c0-ad00-f746308030b1.json b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.3/60953e5e-523d-43c0-ad00-f746308030b1.json new file mode 100644 index 000000000..246e41951 --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.3/60953e5e-523d-43c0-ad00-f746308030b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-carpmuscle-v0.3", + "id": "TheTsar1209/qwen-carpmuscle-v0.3", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6152 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3134 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4132 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5062 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4.1/5afd8861-d7cb-45cd-af1b-6db966cb56e0.json b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4.1/5afd8861-d7cb-45cd-af1b-6db966cb56e0.json new file mode 100644 index 000000000..255cd662b --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4.1/5afd8861-d7cb-45cd-af1b-6db966cb56e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-carpmuscle-v0.4.1", + "id": "TheTsar1209/qwen-carpmuscle-v0.4.1", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2779 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4/c3972df1-4414-4c71-b473-fb9459cf085b.json b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4/c3972df1-4414-4c71-b473-fb9459cf085b.json new file mode 100644 index 000000000..9edb7f769 --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/qwen-carpmuscle-v0.4/c3972df1-4414-4c71-b473-fb9459cf085b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-carpmuscle-v0.4", + "id": "TheTsar1209/qwen-carpmuscle-v0.4", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7202 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6454 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2772 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tijmen2/cosmosage-v3/b89d54b7-2329-4608-b9f6-07017e63f1cd.json b/data/hfopenllm_v2/Tijmen2/cosmosage-v3/b89d54b7-2329-4608-b9f6-07017e63f1cd.json new file mode 100644 index 000000000..39d760183 --- /dev/null +++ b/data/hfopenllm_v2/Tijmen2/cosmosage-v3/b89d54b7-2329-4608-b9f6-07017e63f1cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tijmen2_cosmosage-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cosmosage-v3", + "id": "Tijmen2/cosmosage-v3", + "developer": "Tijmen2", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4551 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2486 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json b/data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json deleted file mode 100644 index 2951e232e..000000000 --- a/data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tijmen2_cosmosage-v3/1762652579.918411", - "retrieved_timestamp": "1762652579.918412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tijmen2/cosmosage-v3", - "developer": "Tijmen2", - "inference_platform": "unknown", - "id": "Tijmen2/cosmosage-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44823180272787316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4550637900339029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24858710106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/50389350-af23-41ba-af46-5ffe338ff9d2.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/50389350-af23-41ba-af46-5ffe338ff9d2.json new file mode 100644 index 000000000..a2c39dd23 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/50389350-af23-41ba-af46-5ffe338ff9d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-1.1B-Chat-v0.1", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", + "developer": "TinyLlama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1479 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.229 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3592 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json deleted file mode 100644 index 0d7300f66..000000000 --- a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.1/1762652579.918663", - "retrieved_timestamp": "1762652579.918664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1478543597654224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30835294748680114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22902684563758388 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35923958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json deleted file mode 100644 index 76f801993..000000000 --- a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.5/1762652579.918914", - "retrieved_timestamp": "1762652579.918914", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1633665341294432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3105046915935697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10962433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/b8f8f045-2306-43ad-8fa0-6a8bdb494db6.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/b8f8f045-2306-43ad-8fa0-6a8bdb494db6.json new file mode 100644 index 000000000..f0a1f9188 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/b8f8f045-2306-43ad-8fa0-6a8bdb494db6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-1.1B-Chat-v0.5", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", + "developer": "TinyLlama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1634 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/7cd59011-75d7-4497-956c-322d5d609c5f.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/7cd59011-75d7-4497-956c-322d5d609c5f.json new file mode 100644 index 000000000..07b269a48 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/7cd59011-75d7-4497-956c-322d5d609c5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-1.1B-Chat-v0.6", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "developer": "TinyLlama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3067 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json deleted file mode 100644 index ea3b18a09..000000000 --- a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.6/1762652579.919127", - "retrieved_timestamp": "1762652579.919127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15742119797692344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3066976656166826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11486037234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json deleted file mode 100644 index e5b7f9e90..000000000 --- a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v1.0/1762652579.9193401", - "retrieved_timestamp": "1762652579.919341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0595763684800773 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103562867491015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1313d865-9c5b-45d2-ad64-629c65f07f2c.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1313d865-9c5b-45d2-ad64-629c65f07f2c.json new file mode 100644 index 000000000..03ae524fb --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/1313d865-9c5b-45d2-ad64-629c65f07f2c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-1.1B-Chat-v1.0", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "developer": "TinyLlama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0596 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1101 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/0efc2583-bf21-4b60-96cc-716928768eb1.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/0efc2583-bf21-4b60-96cc-716928768eb1.json new file mode 100644 index 000000000..36550ed25 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/0efc2583-bf21-4b60-96cc-716928768eb1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama-1.1B-intermediate-step-1431k-3T", + "id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", + "developer": "TinyLlama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama_v1.1/be0a2737-19a0-4401-998a-a03663467133.json b/data/hfopenllm_v2/TinyLlama/TinyLlama_v1.1/be0a2737-19a0-4401-998a-a03663467133.json new file mode 100644 index 000000000..0ccc783b3 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama_v1.1/be0a2737-19a0-4401-998a-a03663467133.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama_v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama_v1.1", + "id": "TinyLlama/TinyLlama_v1.1", + "developer": "TinyLlama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2001 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1049 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json b/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json deleted file mode 100644 index 273614d76..000000000 --- a/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ToastyPigeon_Sto-vo-kor-12B/1762652579.920128", - "retrieved_timestamp": "1762652579.920129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ToastyPigeon/Sto-vo-kor-12B", - "developer": "ToastyPigeon", - "inference_platform": "unknown", - "id": "ToastyPigeon/Sto-vo-kor-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5501225636865739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064617128925814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/71720e07-2de0-4402-bdfd-102150c61765.json b/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/71720e07-2de0-4402-bdfd-102150c61765.json new file mode 100644 index 000000000..65bce7ce5 --- /dev/null +++ b/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/71720e07-2de0-4402-bdfd-102150c61765.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ToastyPigeon_Sto-vo-kor-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sto-vo-kor-12B", + "id": "ToastyPigeon/Sto-vo-kor-12B", + "developer": "ToastyPigeon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5501 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5065 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/38c84c69-5cdb-4f24-820d-4b39c5b118ff.json b/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/38c84c69-5cdb-4f24-820d-4b39c5b118ff.json new file mode 100644 index 000000000..c9c3a7878 --- /dev/null +++ b/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/38c84c69-5cdb-4f24-820d-4b39c5b118ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Trappu_Magnum-Picaro-0.7-v2-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnum-Picaro-0.7-v2-12b", + "id": "Trappu/Magnum-Picaro-0.7-v2-12b", + "developer": "Trappu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json b/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json deleted file mode 100644 index 75682cd68..000000000 --- a/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Trappu_Magnum-Picaro-0.7-v2-12b/1762652579.920383", - "retrieved_timestamp": "1762652579.920383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Trappu/Magnum-Picaro-0.7-v2-12b", - "developer": "Trappu", - "inference_platform": "unknown", - "id": "Trappu/Magnum-Picaro-0.7-v2-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300278815764394 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5506661918828847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47271875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json b/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json deleted file mode 100644 index 07f353511..000000000 --- a/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Trappu_Nemo-Picaro-12B/1762652579.92064", - "retrieved_timestamp": "1762652579.92064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Trappu/Nemo-Picaro-12B", - "developer": "Trappu", - "inference_platform": "unknown", - "id": "Trappu/Nemo-Picaro-12B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2577139766929525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489586125997546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36045545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/de9d274d-f213-4037-9711-3e9d3dbbcc96.json b/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/de9d274d-f213-4037-9711-3e9d3dbbcc96.json new file mode 100644 index 000000000..0162cd45d --- /dev/null +++ b/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/de9d274d-f213-4037-9711-3e9d3dbbcc96.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Trappu_Nemo-Picaro-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemo-Picaro-12B", + "id": "Trappu/Nemo-Picaro-12B", + "developer": "Trappu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2577 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4726 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3605 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json b/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json deleted file mode 100644 index 2e0b694f3..000000000 --- a/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tremontaine_L3-12B-Lunaris-v1/1762652579.920848", - "retrieved_timestamp": "1762652579.920848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tremontaine/L3-12B-Lunaris-v1", - "developer": "Tremontaine", - "inference_platform": "unknown", - "id": "Tremontaine/L3-12B-Lunaris-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.52 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6909311737301471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230217237244009 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/92381da4-b9d1-43c4-a5c9-59f375017e11.json b/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/92381da4-b9d1-43c4-a5c9-59f375017e11.json new file mode 100644 index 000000000..8a5408f5a --- /dev/null +++ b/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/92381da4-b9d1-43c4-a5c9-59f375017e11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tremontaine_L3-12B-Lunaris-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-12B-Lunaris-v1", + "id": "Tremontaine/L3-12B-Lunaris-v1", + "developer": "Tremontaine", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.52 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json b/data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json deleted file mode 100644 index bb228b199..000000000 --- a/data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Annunaki-12b/1762652579.921084", - "retrieved_timestamp": "1762652579.921084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Annunaki-12b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Annunaki-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872070550583563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5498969437971782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44087499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Annunaki-12b/44ab6a50-027d-47df-a518-5aa944eb2a61.json b/data/hfopenllm_v2/Triangle104/Annunaki-12b/44ab6a50-027d-47df-a518-5aa944eb2a61.json new file mode 100644 index 000000000..d0a2060be --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Annunaki-12b/44ab6a50-027d-47df-a518-5aa944eb2a61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Annunaki-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Annunaki-12b", + "id": "Triangle104/Annunaki-12b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5499 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3721 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/2a1947d7-74e0-43d0-931d-b2862348e90a.json b/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/2a1947d7-74e0-43d0-931d-b2862348e90a.json new file mode 100644 index 000000000..c521e8873 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/2a1947d7-74e0-43d0-931d-b2862348e90a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_BigTalker-Lite-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BigTalker-Lite-8B", + "id": "Triangle104/BigTalker-Lite-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json b/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json deleted file mode 100644 index f44ef3cdb..000000000 --- a/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_BigTalker-Lite-8B/1762652579.92133", - "retrieved_timestamp": "1762652579.921331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/BigTalker-Lite-8B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/BigTalker-Lite-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689222374411007 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5308138241234059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34308510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/3677b71c-387d-4182-b15d-c3525bc7bc36.json b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/3677b71c-387d-4182-b15d-c3525bc7bc36.json new file mode 100644 index 000000000..2703e6bd2 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/3677b71c-387d-4182-b15d-c3525bc7bc36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chatty-Harry_V2.0", + "id": "Triangle104/Chatty-Harry_V2.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json deleted file mode 100644 index a51ecc36c..000000000 --- a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V2.0/1762652579.921529", - "retrieved_timestamp": "1762652579.92153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Chatty-Harry_V2.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Chatty-Harry_V2.0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3325520729442324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5318928049062546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40782291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/6b125a8e-5b53-48ca-8875-926249879f39.json b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/6b125a8e-5b53-48ca-8875-926249879f39.json new file mode 100644 index 000000000..58fe5380d --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/6b125a8e-5b53-48ca-8875-926249879f39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V3.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chatty-Harry_V3.0", + "id": "Triangle104/Chatty-Harry_V3.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json deleted file mode 100644 index f342b9c49..000000000 --- a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V3.0/1762652579.9217439", - "retrieved_timestamp": "1762652579.9217439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Chatty-Harry_V3.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Chatty-Harry_V3.0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36749823800848413 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5526193453608234 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json b/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json deleted file mode 100644 index 13f6d54b7..000000000 --- a/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Chronos-Prism_V1.0/1762652579.921948", - "retrieved_timestamp": "1762652579.921948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Chronos-Prism_V1.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Chronos-Prism_V1.0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259329689667859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5554188807010064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/af851d4b-69d4-49a9-a160-a180146c3963.json b/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/af851d4b-69d4-49a9-a160-a180146c3963.json new file mode 100644 index 000000000..a35b89646 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/af851d4b-69d4-49a9-a160-a180146c3963.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Chronos-Prism_V1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chronos-Prism_V1.0", + "id": "Triangle104/Chronos-Prism_V1.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3259 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1/7aa6ce37-c0e4-48ce-b9db-f158ac47d366.json b/data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1/7aa6ce37-c0e4-48ce-b9db-f158ac47d366.json new file mode 100644 index 000000000..bb6dc9f21 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1/7aa6ce37-c0e4-48ce-b9db-f158ac47d366.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DS-Distilled-Hermes-Llama-3.1", + "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3229 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2931 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/1bce093e-27c0-41ad-aad6-b656f6773ed5.json b/data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/1bce093e-27c0-41ad-aad6-b656f6773ed5.json new file mode 100644 index 000000000..5e892b1b2 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/1bce093e-27c0-41ad-aad6-b656f6773ed5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DS-Distilled-Hermes-Llama-3.1_TIES", + "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1364 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1104 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/5c6cffab-ef72-4e12-808c-c26ee8ec6999.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/5c6cffab-ef72-4e12-808c-c26ee8ec6999.json new file mode 100644 index 000000000..8e46d73b5 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/5c6cffab-ef72-4e12-808c-c26ee8ec6999.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DS-R1-Distill-Q2.5-10B-Harmony", + "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 10.366 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2106 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3128 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json deleted file mode 100644 index a8994a7cd..000000000 --- a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/1762652579.9225988", - "retrieved_timestamp": "1762652579.9226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 10.366 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17508211545366295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2643276743386568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2105704697986577 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json deleted file mode 100644 index 074c973e2..000000000 --- a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/1762652579.9228039", - "retrieved_timestamp": "1762652579.922805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515042309959796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5783379428926061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5566875000000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4601063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/e288a874-f750-4a90-be07-616094c220cf.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/e288a874-f750-4a90-be07-616094c220cf.json new file mode 100644 index 000000000..350f712ec --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/e288a874-f750-4a90-be07-616094c220cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DS-R1-Distill-Q2.5-14B-Harmony_V0.1", + "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5783 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5567 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4601 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/0607da8d-3f4e-468a-91a6-b975261a87c0.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/0607da8d-3f4e-468a-91a6-b975261a87c0.json new file mode 100644 index 000000000..0817ae888 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/0607da8d-3f4e-468a-91a6-b975261a87c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-7B-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DS-R1-Distill-Q2.5-7B-RP", + "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4683 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.403 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2891 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json deleted file mode 100644 index e16dfa9e1..000000000 --- a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-7B-RP/1762652579.923009", - "retrieved_timestamp": "1762652579.923009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34454248061809334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43834886662348205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2890625 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Llama-8B-Harmony/be2cc2fd-c8e7-4421-b8c8-d3b937272d0d.json b/data/hfopenllm_v2/Triangle104/DS-R1-Llama-8B-Harmony/be2cc2fd-c8e7-4421-b8c8-d3b937272d0d.json new file mode 100644 index 000000000..c407ea12b --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Llama-8B-Harmony/be2cc2fd-c8e7-4421-b8c8-d3b937272d0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Llama-8B-Harmony/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DS-R1-Llama-8B-Harmony", + "id": "Triangle104/DS-R1-Llama-8B-Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2744 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DSR1-Distill-Llama-Lit-8B/15ffe64e-72fd-4e65-8632-babf137a386d.json b/data/hfopenllm_v2/Triangle104/DSR1-Distill-Llama-Lit-8B/15ffe64e-72fd-4e65-8632-babf137a386d.json new file mode 100644 index 000000000..db0609b3f --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DSR1-Distill-Llama-Lit-8B/15ffe64e-72fd-4e65-8632-babf137a386d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Llama-Lit-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DSR1-Distill-Llama-Lit-8B", + "id": "Triangle104/DSR1-Distill-Llama-Lit-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1885 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DSR1-Distill-Qwen-7B-RP/ce1c0d4f-f5a3-49e7-ab77-65ff51bbd0ca.json b/data/hfopenllm_v2/Triangle104/DSR1-Distill-Qwen-7B-RP/ce1c0d4f-f5a3-49e7-ab77-65ff51bbd0ca.json new file mode 100644 index 000000000..78fbf1df3 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DSR1-Distill-Qwen-7B-RP/ce1c0d4f-f5a3-49e7-ab77-65ff51bbd0ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Qwen-7B-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DSR1-Distill-Qwen-7B-RP", + "id": "Triangle104/DSR1-Distill-Qwen-7B-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3609 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4804 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/b5afab38-13ba-4abd-9d04-a433c41061c5.json b/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/b5afab38-13ba-4abd-9d04-a433c41061c5.json new file mode 100644 index 000000000..48178e07b --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/b5afab38-13ba-4abd-9d04-a433c41061c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Dark-Chivalry_V1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dark-Chivalry_V1.0", + "id": "Triangle104/Dark-Chivalry_V1.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4974 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json b/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json deleted file mode 100644 index 8c4d724a2..000000000 --- a/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Dark-Chivalry_V1.0/1762652579.923868", - "retrieved_timestamp": "1762652579.923869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Dark-Chivalry_V1.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Dark-Chivalry_V1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325700253106203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4974207759950637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4181770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json deleted file mode 100644 index db75da80b..000000000 --- a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B/1762652579.9240808", - "retrieved_timestamp": "1762652579.9240808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Distilled-DarkPlanet-Allades-8B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Distilled-DarkPlanet-Allades-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460163477351206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4633948672868899 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29014295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/a862c2a5-f66b-4d09-ac57-6cbe565f9f35.json b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/a862c2a5-f66b-4d09-ac57-6cbe565f9f35.json new file mode 100644 index 000000000..b8c909e09 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/a862c2a5-f66b-4d09-ac57-6cbe565f9f35.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Distilled-DarkPlanet-Allades-8B", + "id": "Triangle104/Distilled-DarkPlanet-Allades-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4634 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2901 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json deleted file mode 100644 index cf70b5e7b..000000000 --- a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/1762652579.924282", - "retrieved_timestamp": "1762652579.924282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3891807071902552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5041556910813355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.340093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/d8254f6c-8110-44d3-800e-101fc731d779.json b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/d8254f6c-8110-44d3-800e-101fc731d779.json new file mode 100644 index 000000000..ab8e49bea --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/d8254f6c-8110-44d3-800e-101fc731d779.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Distilled-DarkPlanet-Allades-8B_TIES", + "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3892 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5042 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3868 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/ccbcd5a7-2b98-4d90-ace1-3ad5971a5f18.json b/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/ccbcd5a7-2b98-4d90-ace1-3ad5971a5f18.json new file mode 100644 index 000000000..56ce9bac6 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/ccbcd5a7-2b98-4d90-ace1-3ad5971a5f18.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-Whiskey-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Distilled-Whiskey-8b", + "id": "Triangle104/Distilled-Whiskey-8b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json b/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json deleted file mode 100644 index 022fa0f23..000000000 --- a/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-Whiskey-8b/1762652579.924494", - "retrieved_timestamp": "1762652579.9244952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Distilled-Whiskey-8b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Distilled-Whiskey-8b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34476743928332376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5027820189600739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41721874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3366855053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Dolphin3-Llama3.2-Smart/c208b19b-4ecf-4fad-b931-54f65d4b711b.json b/data/hfopenllm_v2/Triangle104/Dolphin3-Llama3.2-Smart/c208b19b-4ecf-4fad-b931-54f65d4b711b.json new file mode 100644 index 000000000..b54a4d6b2 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Dolphin3-Llama3.2-Smart/c208b19b-4ecf-4fad-b931-54f65d4b711b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Dolphin3-Llama3.2-Smart/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dolphin3-Llama3.2-Smart", + "id": "Triangle104/Dolphin3-Llama3.2-Smart", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4137 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2195 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Gemmadevi-Stock-10B/debaf4a0-c734-47ea-bea0-2ddc65dc397d.json b/data/hfopenllm_v2/Triangle104/Gemmadevi-Stock-10B/debaf4a0-c734-47ea-bea0-2ddc65dc397d.json new file mode 100644 index 000000000..edaf3ab86 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Gemmadevi-Stock-10B/debaf4a0-c734-47ea-bea0-2ddc65dc397d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Gemmadevi-Stock-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemmadevi-Stock-10B", + "id": "Triangle104/Gemmadevi-Stock-10B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1582 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT-Summary/0eeb5962-ccc0-407b-92e6-7cf17c00941f.json b/data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT-Summary/0eeb5962-ccc0-407b-92e6-7cf17c00941f.json new file mode 100644 index 000000000..2948054fa --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT-Summary/0eeb5962-ccc0-407b-92e6-7cf17c00941f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT-Summary/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-Llama-3.2-CoT-Summary", + "id": "Triangle104/Hermes-Llama-3.2-CoT-Summary", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.483 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2901 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT/4b60e863-482c-4f91-8cd1-6c993d3c5988.json b/data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT/4b60e863-482c-4f91-8cd1-6c993d3c5988.json new file mode 100644 index 000000000..156711b1f --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Hermes-Llama-3.2-CoT/4b60e863-482c-4f91-8cd1-6c993d3c5988.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-Llama-3.2-CoT", + "id": "Triangle104/Hermes-Llama-3.2-CoT", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4616 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2947 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json b/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json deleted file mode 100644 index cf54c7c8f..000000000 --- a/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Hermes3-L3.1-DirtyHarry-8B/1762652579.925645", - "retrieved_timestamp": "1762652579.925645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32423414318452815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5066388671914118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338597074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/f5f0bc72-427d-4703-aab1-1bb1bea73895.json b/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/f5f0bc72-427d-4703-aab1-1bb1bea73895.json new file mode 100644 index 000000000..92f5b47d3 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/f5f0bc72-427d-4703-aab1-1bb1bea73895.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Hermes3-L3.1-DirtyHarry-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes3-L3.1-DirtyHarry-8B", + "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3242 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json b/data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json deleted file mode 100644 index 345e75f00..000000000 --- a/data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B/1762652579.925863", - "retrieved_timestamp": "1762652579.925863", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Herodotos-14B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Herodotos-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4667415790103592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6435044367110887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4795416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290059840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Herodotos-14B/aae7f543-7b5b-435f-a506-e3ab901a8c5a.json b/data/hfopenllm_v2/Triangle104/Herodotos-14B/aae7f543-7b5b-435f-a506-e3ab901a8c5a.json new file mode 100644 index 000000000..1a742c4be --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Herodotos-14B/aae7f543-7b5b-435f-a506-e3ab901a8c5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Herodotos-14B", + "id": "Triangle104/Herodotos-14B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json b/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json deleted file mode 100644 index f24a974dc..000000000 --- a/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B_V0.1/1762652579.9261289", - "retrieved_timestamp": "1762652579.926136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Herodotos-14B_V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Herodotos-14B_V0.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1878715142488597 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30172239497895226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22399328859060402 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/6e6ff4c3-3cfd-4790-80c4-544d9cbe47e2.json b/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/6e6ff4c3-3cfd-4790-80c4-544d9cbe47e2.json new file mode 100644 index 000000000..cc2ae0230 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/6e6ff4c3-3cfd-4790-80c4-544d9cbe47e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B_V0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Herodotos-14B_V0.1", + "id": "Triangle104/Herodotos-14B_V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.224 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/3ee76278-89d4-44fb-a449-717534b00161.json b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/3ee76278-89d4-44fb-a449-717534b00161.json new file mode 100644 index 000000000..c76643578 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/3ee76278-89d4-44fb-a449-717534b00161.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-8B-Dusky-Ink", + "id": "Triangle104/L3.1-8B-Dusky-Ink", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json deleted file mode 100644 index 28e9f075e..000000000 --- a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink/1762652579.926589", - "retrieved_timestamp": "1762652579.92659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/L3.1-8B-Dusky-Ink", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/L3.1-8B-Dusky-Ink", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4529780981130068 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097902234872148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json deleted file mode 100644 index 2fe190b97..000000000 --- a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/1762652579.926839", - "retrieved_timestamp": "1762652579.92684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19848779017451473 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43372778578458115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.320561835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/fa2854d3-9e2f-4f79-ac8c-e1cb5a638745.json b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/fa2854d3-9e2f-4f79-ac8c-e1cb5a638745.json new file mode 100644 index 000000000..021c2c4e6 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/fa2854d3-9e2f-4f79-ac8c-e1cb5a638745.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-8B-Dusky-Ink_v0.r1", + "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1985 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/9ddaa721-bf3a-416a-9be8-291188793cc9.json b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/9ddaa721-bf3a-416a-9be8-291188793cc9.json new file mode 100644 index 000000000..e187b6fe9 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/9ddaa721-bf3a-416a-9be8-291188793cc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesBlackroot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LThreePointOne-8B-HermesBlackroot", + "id": "Triangle104/LThreePointOne-8B-HermesBlackroot", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1792 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3586 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json deleted file mode 100644 index e4f3e7af6..000000000 --- a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesBlackroot/1762652579.927087", - "retrieved_timestamp": "1762652579.927088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/LThreePointOne-8B-HermesBlackroot", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/LThreePointOne-8B-HermesBlackroot", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17920340252751588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4998333246909241 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3585520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32845744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json deleted file mode 100644 index b0b8b7364..000000000 --- a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesInk/1762652579.927316", - "retrieved_timestamp": "1762652579.927316", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/LThreePointOne-8B-HermesInk", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/LThreePointOne-8B-HermesInk", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031192790684273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222765555856439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4129375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34674202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/d659077d-7261-4c69-862c-d61be21662a2.json b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/d659077d-7261-4c69-862c-d61be21662a2.json new file mode 100644 index 000000000..d949801cc --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/d659077d-7261-4c69-862c-d61be21662a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesInk/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LThreePointOne-8B-HermesInk", + "id": "Triangle104/LThreePointOne-8B-HermesInk", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5223 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1722 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Llama3.1-Allades-Lit-8b/e87ba227-c55e-4666-949d-b45913f8336b.json b/data/hfopenllm_v2/Triangle104/Llama3.1-Allades-Lit-8b/e87ba227-c55e-4666-949d-b45913f8336b.json new file mode 100644 index 000000000..8839950e8 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Llama3.1-Allades-Lit-8b/e87ba227-c55e-4666-949d-b45913f8336b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-Allades-Lit-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-Allades-Lit-8b", + "id": "Triangle104/Llama3.1-Allades-Lit-8b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2461 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2724 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Llama3.1-cc-Lit-8b/077f683a-af6f-4a71-b599-b9b269546b7c.json b/data/hfopenllm_v2/Triangle104/Llama3.1-cc-Lit-8b/077f683a-af6f-4a71-b599-b9b269546b7c.json new file mode 100644 index 000000000..1c20850c6 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Llama3.1-cc-Lit-8b/077f683a-af6f-4a71-b599-b9b269546b7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-cc-Lit-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-cc-Lit-8b", + "id": "Triangle104/Llama3.1-cc-Lit-8b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2993 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3854 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json b/data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json deleted file mode 100644 index 479c53ccc..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b/1762652579.9280179", - "retrieved_timestamp": "1762652579.9280179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-1.5b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-1.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2694295580171722 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025709779119226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3655 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.269780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-1.5b/54808b08-d10d-4a06-ab60-8d99039311b8.json b/data/hfopenllm_v2/Triangle104/Minerva-1.5b/54808b08-d10d-4a06-ab60-8d99039311b8.json new file mode 100644 index 000000000..fc5d9a10e --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-1.5b/54808b08-d10d-4a06-ab60-8d99039311b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-1.5b", + "id": "Triangle104/Minerva-1.5b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3655 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2698 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/138e6fdb-7092-4ee6-be82-7bb86c1fc759.json b/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/138e6fdb-7092-4ee6-be82-7bb86c1fc759.json new file mode 100644 index 000000000..5d8751a69 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/138e6fdb-7092-4ee6-be82-7bb86c1fc759.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b_V0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-1.5b_V0.2", + "id": "Triangle104/Minerva-1.5b_V0.2", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3083 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3989 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json b/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json deleted file mode 100644 index 395a0240f..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b_V0.2/1762652579.928302", - "retrieved_timestamp": "1762652579.928303", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-1.5b_V0.2", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-1.5b_V0.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3083474071020448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989042137094949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29105718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-10b/1b27423f-62cc-4189-a293-5af84ef1f2c8.json b/data/hfopenllm_v2/Triangle104/Minerva-10b/1b27423f-62cc-4189-a293-5af84ef1f2c8.json new file mode 100644 index 000000000..90b1e2629 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-10b/1b27423f-62cc-4189-a293-5af84ef1f2c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-10b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-10b", + "id": "Triangle104/Minerva-10b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.067 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3627 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2318 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json b/data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json deleted file mode 100644 index 162f26fbd..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-10b/1762652579.928542", - "retrieved_timestamp": "1762652579.928543", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-10b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-10b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.067 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1878715142488597 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462036157096501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36270833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23179853723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/f5468512-d2c7-4486-9d31-bef61225af52.json b/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/f5468512-d2c7-4486-9d31-bef61225af52.json new file mode 100644 index 000000000..4c579dddf --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/f5468512-d2c7-4486-9d31-bef61225af52.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b-V0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-14b-V0.1", + "id": "Triangle104/Minerva-14b-V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.609 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.47 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json b/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json deleted file mode 100644 index 5f6b032a4..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b-V0.1/1762652579.92906", - "retrieved_timestamp": "1762652579.9290612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-14b-V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-14b-V0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0861292481726264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6089792638423274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47002083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118018617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-14b/0e0ec1a9-76aa-4d7e-9c0e-946d6b000a6a.json b/data/hfopenllm_v2/Triangle104/Minerva-14b/0e0ec1a9-76aa-4d7e-9c0e-946d6b000a6a.json new file mode 100644 index 000000000..814ca6f10 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-14b/0e0ec1a9-76aa-4d7e-9c0e-946d6b000a6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-14b", + "id": "Triangle104/Minerva-14b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3468 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6301 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json b/data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json deleted file mode 100644 index 587fa9c42..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b/1762652579.928819", - "retrieved_timestamp": "1762652579.928819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-14b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3467898509288687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6300829439447851 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193650265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-7b/07b87b98-0d61-4479-937f-7447565b4631.json b/data/hfopenllm_v2/Triangle104/Minerva-7b/07b87b98-0d61-4479-937f-7447565b4631.json new file mode 100644 index 000000000..6e62d362b --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-7b/07b87b98-0d61-4479-937f-7447565b4631.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-7b", + "id": "Triangle104/Minerva-7b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4444 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json b/data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json deleted file mode 100644 index 86beb4585..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-7b/1762652579.929375", - "retrieved_timestamp": "1762652579.929377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-7b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724196243744376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5498400501314606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44439827127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json b/data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json deleted file mode 100644 index 7f0632d25..000000000 --- a/data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-8b/1762652579.9296892", - "retrieved_timestamp": "1762652579.9296901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Minerva-8b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17208451353519771 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46686093526780637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30892619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Minerva-8b/85b11b91-d686-49e9-8db0-971dd7cafb75.json b/data/hfopenllm_v2/Triangle104/Minerva-8b/85b11b91-d686-49e9-8db0-971dd7cafb75.json new file mode 100644 index 000000000..fa931f160 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-8b/85b11b91-d686-49e9-8db0-971dd7cafb75.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minerva-8b", + "id": "Triangle104/Minerva-8b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4669 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Mistral-Redemption-Arc/21bac032-a092-4afa-8d29-ebdefb3a0650.json b/data/hfopenllm_v2/Triangle104/Mistral-Redemption-Arc/21bac032-a092-4afa-8d29-ebdefb3a0650.json new file mode 100644 index 000000000..5d71ad3d3 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Mistral-Redemption-Arc/21bac032-a092-4afa-8d29-ebdefb3a0650.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Redemption-Arc/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Redemption-Arc", + "id": "Triangle104/Mistral-Redemption-Arc", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6255 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.451 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Mistral-Small-24b-Harmony/29e3a687-429f-4f33-ae5f-48db85127364.json b/data/hfopenllm_v2/Triangle104/Mistral-Small-24b-Harmony/29e3a687-429f-4f33-ae5f-48db85127364.json new file mode 100644 index 000000000..9760eb31d --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Mistral-Small-24b-Harmony/29e3a687-429f-4f33-ae5f-48db85127364.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Small-24b-Harmony/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-24b-Harmony", + "id": "Triangle104/Mistral-Small-24b-Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1687 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6434 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1911 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4276 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json deleted file mode 100644 index 0fb2b5fdf..000000000 --- a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.1/1762652579.9304042", - "retrieved_timestamp": "1762652579.9304051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Pans_Gutenbergum_V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Pans_Gutenbergum_V0.1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.309696050922663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541091780465247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/d98493a6-f237-4565-8508-9e4cc3188d2d.json b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/d98493a6-f237-4565-8508-9e4cc3188d2d.json new file mode 100644 index 000000000..6cd483d58 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/d98493a6-f237-4565-8508-9e4cc3188d2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pans_Gutenbergum_V0.1", + "id": "Triangle104/Pans_Gutenbergum_V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3097 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5541 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/2def6fbd-7488-4e9f-a822-2405d4f7a315.json b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/2def6fbd-7488-4e9f-a822-2405d4f7a315.json new file mode 100644 index 000000000..9dd3b30ff --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/2def6fbd-7488-4e9f-a822-2405d4f7a315.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pans_Gutenbergum_V0.2", + "id": "Triangle104/Pans_Gutenbergum_V0.2", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3585 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json deleted file mode 100644 index 74aedd79d..000000000 --- a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.2/1762652579.93062", - "retrieved_timestamp": "1762652579.930621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Pans_Gutenbergum_V0.2", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Pans_Gutenbergum_V0.2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215113676157041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.55257930562769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46732291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3585438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/819143d4-9538-48b9-b7af-128bc15c518a.json b/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/819143d4-9538-48b9-b7af-128bc15c518a.json new file mode 100644 index 000000000..643b17008 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/819143d4-9538-48b9-b7af-128bc15c518a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Pantheon_ChatWaifu_V0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pantheon_ChatWaifu_V0.2", + "id": "Triangle104/Pantheon_ChatWaifu_V0.2", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2683 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5532 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4755 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3442 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json b/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json deleted file mode 100644 index 6812998bc..000000000 --- a/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Pantheon_ChatWaifu_V0.2/1762652579.930828", - "retrieved_timestamp": "1762652579.930829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Pantheon_ChatWaifu_V0.2", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Pantheon_ChatWaifu_V0.2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2682803849341968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5531574435698693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47551041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34424867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Phi-4-AbliteratedRP/c29d47af-a9de-4edb-acac-6763c0d44ca3.json b/data/hfopenllm_v2/Triangle104/Phi-4-AbliteratedRP/c29d47af-a9de-4edb-acac-6763c0d44ca3.json new file mode 100644 index 000000000..b786d9979 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Phi-4-AbliteratedRP/c29d47af-a9de-4edb-acac-6763c0d44ca3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Phi-4-AbliteratedRP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-AbliteratedRP", + "id": "Triangle104/Phi-4-AbliteratedRP", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4923 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6709 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5308 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Phi4-RP-o1-Ablit/22bf3fb7-9235-4a57-b8fd-c85b12047b0e.json b/data/hfopenllm_v2/Triangle104/Phi4-RP-o1-Ablit/22bf3fb7-9235-4a57-b8fd-c85b12047b0e.json new file mode 100644 index 000000000..82381bf4b --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Phi4-RP-o1-Ablit/22bf3fb7-9235-4a57-b8fd-c85b12047b0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1-Ablit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4-RP-o1-Ablit", + "id": "Triangle104/Phi4-RP-o1-Ablit", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0239 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.663 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Phi4-RP-o1/2bea7014-460d-470b-918f-468b58d70fd6.json b/data/hfopenllm_v2/Triangle104/Phi4-RP-o1/2bea7014-460d-470b-918f-468b58d70fd6.json new file mode 100644 index 000000000..49827c7aa --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Phi4-RP-o1/2bea7014-460d-470b-918f-468b58d70fd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4-RP-o1", + "id": "Triangle104/Phi4-RP-o1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6653 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Porpoise-R1-Llama3.2-3b/3927a5dd-002b-441a-b769-ba68547cd5f3.json b/data/hfopenllm_v2/Triangle104/Porpoise-R1-Llama3.2-3b/3927a5dd-002b-441a-b769-ba68547cd5f3.json new file mode 100644 index 000000000..341da9ea4 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Porpoise-R1-Llama3.2-3b/3927a5dd-002b-441a-b769-ba68547cd5f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Porpoise-R1-Llama3.2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Porpoise-R1-Llama3.2-3b", + "id": "Triangle104/Porpoise-R1-Llama3.2-3b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3576 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json b/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json deleted file mode 100644 index d8249e1a7..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1762652579.93199", - "retrieved_timestamp": "1762652579.931991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5986327389105351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338808682301471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4795416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074800531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/476fc734-dedd-4192-aa59-eb2f9dabf16b.json b/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/476fc734-dedd-4192-aa59-eb2f9dabf16b.json new file mode 100644 index 000000000..b32ca4aa3 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/476fc734-dedd-4192-aa59-eb2f9dabf16b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-14B-Instruct-1M-Harmony", + "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json b/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json deleted file mode 100644 index 389cdfa34..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-AthensCOT/1762652579.9322", - "retrieved_timestamp": "1762652579.932201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-AthensCOT", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-AthensCOT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45727447616767947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541692533534606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/817e2fbe-0866-489f-b987-391228a68c53.json b/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/817e2fbe-0866-489f-b987-391228a68c53.json new file mode 100644 index 000000000..46f11cded --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/817e2fbe-0866-489f-b987-391228a68c53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-AthensCOT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-AthensCOT", + "id": "Triangle104/Q2.5-AthensCOT", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4573 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json b/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json deleted file mode 100644 index f1df0ea9e..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-CodeR1-3B/1762652579.932402", - "retrieved_timestamp": "1762652579.9324028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-CodeR1-3B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-CodeR1-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35875587884590665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4660844324968853 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978723404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/f25f5eb1-ff22-4be3-a639-a9d25207078f.json b/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/f25f5eb1-ff22-4be3-a639-a9d25207078f.json new file mode 100644 index 000000000..61a0bc9d8 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/f25f5eb1-ff22-4be3-a639-a9d25207078f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-CodeR1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-CodeR1-3B", + "id": "Triangle104/Q2.5-CodeR1-3B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3588 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4661 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2979 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json b/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json deleted file mode 100644 index 039597fe6..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EVACOT-7b/1762652579.9326148", - "retrieved_timestamp": "1762652579.932616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-EVACOT-7b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-EVACOT-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5784241368457914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5505524946794311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4498645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43309507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/f71d1c31-184b-46be-a288-bdc92f0ebe09.json b/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/f71d1c31-184b-46be-a288-bdc92f0ebe09.json new file mode 100644 index 000000000..2603a2922 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/f71d1c31-184b-46be-a288-bdc92f0ebe09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EVACOT-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-EVACOT-7b", + "id": "Triangle104/Q2.5-EVACOT-7b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5784 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2825 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4499 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/0d9547b3-7bef-4815-9c44-7d714fe81bbb.json b/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/0d9547b3-7bef-4815-9c44-7d714fe81bbb.json new file mode 100644 index 000000000..356d6f7ee --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/0d9547b3-7bef-4815-9c44-7d714fe81bbb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EvaHumane-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-EvaHumane-RP", + "id": "Triangle104/Q2.5-EvaHumane-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4276 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json b/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json deleted file mode 100644 index 20f5ae820..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EvaHumane-RP/1762652579.932837", - "retrieved_timestamp": "1762652579.932837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-EvaHumane-RP", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-EvaHumane-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676234613048932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5328196297646768 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42763541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412400265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/22dbc5a2-0ff6-4566-9bfd-e5ce314be597.json b/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/22dbc5a2-0ff6-4566-9bfd-e5ce314be597.json new file mode 100644 index 000000000..99a18a092 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/22dbc5a2-0ff6-4566-9bfd-e5ce314be597.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Humane-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-Humane-RP", + "id": "Triangle104/Q2.5-Humane-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5649 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3391 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4492 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json b/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json deleted file mode 100644 index dcefd80c8..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Humane-RP/1762652579.933056", - "retrieved_timestamp": "1762652579.933057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-Humane-RP", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-Humane-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4411627814199657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5649289292164736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3391238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/afedb249-f1a5-42d6-b6c0-54b2cc303f64.json b/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/afedb249-f1a5-42d6-b6c0-54b2cc303f64.json new file mode 100644 index 000000000..cd9816ee7 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/afedb249-f1a5-42d6-b6c0-54b2cc303f64.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Instruct-1M_Harmony/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-Instruct-1M_Harmony", + "id": "Triangle104/Q2.5-Instruct-1M_Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5373 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3323 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json b/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json deleted file mode 100644 index b65ac22cd..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Instruct-1M_Harmony/1762652579.933266", - "retrieved_timestamp": "1762652579.9332669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-Instruct-1M_Harmony", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-Instruct-1M_Harmony", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6038034636985421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373243549676157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46878125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43658577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/61b1bf5e-6aa4-4e90-af2c-dcf5fc9903f2.json b/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/61b1bf5e-6aa4-4e90-af2c-dcf5fc9903f2.json new file mode 100644 index 000000000..7e4fafbbe --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/61b1bf5e-6aa4-4e90-af2c-dcf5fc9903f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-R1-3B", + "id": "Triangle104/Q2.5-R1-3B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2674 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3813 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json b/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json deleted file mode 100644 index 547e08d43..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-3B/1762652579.933473", - "retrieved_timestamp": "1762652579.933474", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-R1-3B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-R1-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4213542290012722 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48124304786769817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38131648936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json b/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json deleted file mode 100644 index 2bc4e69de..000000000 --- a/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-7B/1762652579.933674", - "retrieved_timestamp": "1762652579.933675", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Q2.5-R1-7B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-R1-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1346150436397647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30065625818799685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3607291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1180186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/c0adc04c-1e02-4891-a5a1-1fab0ddf18ca.json b/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/c0adc04c-1e02-4891-a5a1-1fab0ddf18ca.json new file mode 100644 index 000000000..7d190bdde --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/c0adc04c-1e02-4891-a5a1-1fab0ddf18ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-R1-7B", + "id": "Triangle104/Q2.5-R1-7B", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1346 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/cc57e6f0-ab55-4ab9-983c-63d74632d016.json b/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/cc57e6f0-ab55-4ab9-983c-63d74632d016.json new file mode 100644 index 000000000..96cf9ac6e --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/cc57e6f0-ab55-4ab9-983c-63d74632d016.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Robo-Gutenberg_V1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Robo-Gutenberg_V1.0", + "id": "Triangle104/Robo-Gutenberg_V1.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6008 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6537 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4744 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json b/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json deleted file mode 100644 index 11b18fd1c..000000000 --- a/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Robo-Gutenberg_V1.0/1762652579.9338748", - "retrieved_timestamp": "1762652579.933876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Robo-Gutenberg_V1.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Robo-Gutenberg_V1.0", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6007559940956662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.653716560941194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47436458333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391456117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/0d3c5fdb-c4a5-4436-b9d4-f0f42cb4db96.json b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/0d3c5fdb-c4a5-4436-b9d4-f0f42cb4db96.json new file mode 100644 index 000000000..670f07b44 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/0d3c5fdb-c4a5-4436-b9d4-f0f42cb4db96.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rocinante-Prism_V2.0", + "id": "Triangle104/Rocinante-Prism_V2.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json deleted file mode 100644 index 4c6885ebc..000000000 --- a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.0/1762652579.9340868", - "retrieved_timestamp": "1762652579.9340868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Rocinante-Prism_V2.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Rocinante-Prism_V2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2616103051015749 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361246041982355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640292553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json deleted file mode 100644 index a87d3c9d5..000000000 --- a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.1/1762652579.934289", - "retrieved_timestamp": "1762652579.93429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Rocinante-Prism_V2.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Rocinante-Prism_V2.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25584005992987496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332676401860506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44896874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/a6ec2934-e9fd-481d-8f00-932603bc6e0a.json b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/a6ec2934-e9fd-481d-8f00-932603bc6e0a.json new file mode 100644 index 000000000..5d588967a --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/a6ec2934-e9fd-481d-8f00-932603bc6e0a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rocinante-Prism_V2.1", + "id": "Triangle104/Rocinante-Prism_V2.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2558 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5333 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/RomboHermes3-R1-Llama3.2-3b/e2553c93-60df-4126-9e64-ecd4a5003389.json b/data/hfopenllm_v2/Triangle104/RomboHermes3-R1-Llama3.2-3b/e2553c93-60df-4126-9e64-ecd4a5003389.json new file mode 100644 index 000000000..1d3c45558 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/RomboHermes3-R1-Llama3.2-3b/e2553c93-60df-4126-9e64-ecd4a5003389.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_RomboHermes3-R1-Llama3.2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RomboHermes3-R1-Llama3.2-3b", + "id": "Triangle104/RomboHermes3-R1-Llama3.2-3b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3007 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3657 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2957 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json b/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json deleted file mode 100644 index 46e55d331..000000000 --- a/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Rombos-Novasky-7B_V1c/1762652579.934721", - "retrieved_timestamp": "1762652579.934722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Rombos-Novasky-7B_V1c", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Rombos-Novasky-7B_V1c", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40801517750679306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4349247829177707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44645833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27376994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/e7c2fb42-e82a-4dac-9cc3-a9f41ab54e0f.json b/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/e7c2fb42-e82a-4dac-9cc3-a9f41ab54e0f.json new file mode 100644 index 000000000..d45631435 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/e7c2fb42-e82a-4dac-9cc3-a9f41ab54e0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Rombos-Novasky-7B_V1c/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-Novasky-7B_V1c", + "id": "Triangle104/Rombos-Novasky-7B_V1c", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Set-70b/a807ee8c-509e-4b6d-a414-df24444d8a0a.json b/data/hfopenllm_v2/Triangle104/Set-70b/a807ee8c-509e-4b6d-a414-df24444d8a0a.json new file mode 100644 index 000000000..c29b4d9d5 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Set-70b/a807ee8c-509e-4b6d-a414-df24444d8a0a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Triangle104_Set-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Set-70b", + "id": "Triangle104/Set-70b", + "developer": "Triangle104", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7643 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7014 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4463 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5442 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json b/data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json deleted file mode 100644 index 85e56e470..000000000 --- a/data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Set-70b/1762652579.934931", - "retrieved_timestamp": "1762652579.934931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Set-70b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Set-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7642954028643998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.70142939330013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4463087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46956250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5442154255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/2199024b-7944-4950-8335-32a536efad02.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/2199024b-7944-4950-8335-32a536efad02.json new file mode 100644 index 000000000..8d21eff8a --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/2199024b-7944-4950-8335-32a536efad02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-0.5-7B-Instruct", + "id": "Tsunami-th/Tsunami-0.5-7B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.74 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4257 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json deleted file mode 100644 index 0f39cdbd3..000000000 --- a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5-7B-Instruct/1762652579.935141", - "retrieved_timestamp": "1762652579.9351418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-0.5-7B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-0.5-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7400153814102137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552369427738073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42571875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/97919c86-6161-4548-95b9-d44263a29f8a.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/97919c86-6161-4548-95b9-d44263a29f8a.json new file mode 100644 index 000000000..2f1a0d0d7 --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/97919c86-6161-4548-95b9-d44263a29f8a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5x-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-0.5x-7B-Instruct", + "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7099 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5593 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4458 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json deleted file mode 100644 index 2b4539a22..000000000 --- a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5x-7B-Instruct/1762652579.9353971", - "retrieved_timestamp": "1762652579.9353979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-0.5x-7B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.709915247099917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5592865858560252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46667708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44581117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json deleted file mode 100644 index 557f84f3e..000000000 --- a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-14B-Instruct/1762652579.935597", - "retrieved_timestamp": "1762652579.935597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-1.0-14B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-1.0-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7829049145157072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6438763263011559 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44593750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5248503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/c40c1a46-2e30-4cf1-bcf3-a316a793fbcd.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/c40c1a46-2e30-4cf1-bcf3-a316a793fbcd.json new file mode 100644 index 000000000..640901714 --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/c40c1a46-2e30-4cf1-bcf3-a316a793fbcd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-1.0-14B-Instruct", + "id": "Tsunami-th/Tsunami-1.0-14B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7829 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6439 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5249 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/c1294268-b5f5-4d64-b91a-147f58a21a47.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/c1294268-b5f5-4d64-b91a-147f58a21a47.json new file mode 100644 index 000000000..25c1c6f74 --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/c1294268-b5f5-4d64-b91a-147f58a21a47.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-1.0-7B-Instruct", + "id": "Tsunami-th/Tsunami-1.0-7B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7309 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4335 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4493 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json deleted file mode 100644 index 580c109ce..000000000 --- a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-7B-Instruct/1762652579.9358132", - "retrieved_timestamp": "1762652579.9358132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-1.0-7B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-1.0-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.730872972601586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549071195618326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335347432024169 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424035904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/2b029e6d-a0b8-4b6c-b62d-144b8dc4f739.json b/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/2b029e6d-a0b8-4b6c-b62d-144b8dc4f739.json new file mode 100644 index 000000000..b9787022d --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/2b029e6d-a0b8-4b6c-b62d-144b8dc4f739.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-9B-It-SPPO-Iter1", + "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3907 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/b926ca6c-60c9-4353-9671-0453b46d0222.json b/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/b926ca6c-60c9-4353-9671-0453b46d0222.json new file mode 100644 index 000000000..51f77c00b --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/b926ca6c-60c9-4353-9671-0453b46d0222.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-9B-It-SPPO-Iter2", + "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.599 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/44db30b4-2010-4f96-a39e-9ccc8568374f.json b/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/44db30b4-2010-4f96-a39e-9ccc8568374f.json new file mode 100644 index 000000000..6ac45353a --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/44db30b4-2010-4f96-a39e-9ccc8568374f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-9B-It-SPPO-Iter3", + "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json deleted file mode 100644 index 7d36a98e7..000000000 --- a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/1762652579.9367309", - "retrieved_timestamp": "1762652579.9367318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7298988904994304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057890691082708 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/2210d673-d417-46be-aeca-de48cd846e01.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/2210d673-d417-46be-aeca-de48cd846e01.json new file mode 100644 index 000000000..336128396 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/2210d673-d417-46be-aeca-de48cd846e01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SPPO-Iter1", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7299 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3711 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json deleted file mode 100644 index 22737028d..000000000 --- a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/1762652579.93697", - "retrieved_timestamp": "1762652579.936971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6988745417713889 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088696278852957 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36918218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/892d27cc-dfb3-40c7-ae0f-a7cd06784808.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/892d27cc-dfb3-40c7-ae0f-a7cd06784808.json new file mode 100644 index 000000000..22ac5bc50 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/892d27cc-dfb3-40c7-ae0f-a7cd06784808.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SPPO-Iter2", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6989 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5089 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3692 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49b3f293-721d-4d44-9748-88d1ce275050.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49b3f293-721d-4d44-9748-88d1ce275050.json new file mode 100644 index 000000000..973c80332 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49b3f293-721d-4d44-9748-88d1ce275050.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SPPO-Iter3", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json deleted file mode 100644 index af6bfb43f..000000000 --- a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937367", - "retrieved_timestamp": "1762652579.9373682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.67029814226253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076407742830437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/70fb41fe-46af-49e3-8270-5882e12f710f.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/70fb41fe-46af-49e3-8270-5882e12f710f.json new file mode 100644 index 000000000..9aab7af07 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/70fb41fe-46af-49e3-8270-5882e12f710f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SPPO-Iter3", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6703 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json deleted file mode 100644 index 28f8f31b6..000000000 --- a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937166", - "retrieved_timestamp": "1762652579.9371672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6834122350917787 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50795799761689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36606249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/13e2489f-9d96-4f68-8e22-c937604c2145.json b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/13e2489f-9d96-4f68-8e22-c937604c2145.json new file mode 100644 index 000000000..44afb2750 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/13e2489f-9d96-4f68-8e22-c937604c2145.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral7B-PairRM-SPPO-Iter1", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3992 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/0c386ea0-4706-4a6f-994c-b6ee21dbce92.json b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/0c386ea0-4706-4a6f-994c-b6ee21dbce92.json new file mode 100644 index 000000000..b7fd6fe39 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/0c386ea0-4706-4a6f-994c-b6ee21dbce92.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral7B-PairRM-SPPO-Iter2", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4085 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2677 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/a8d5a193-6c87-4b5b-8ea3-b3ab78e73104.json b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/a8d5a193-6c87-4b5b-8ea3-b3ab78e73104.json new file mode 100644 index 000000000..34544a27f --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/a8d5a193-6c87-4b5b-8ea3-b3ab78e73104.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral7B-PairRM-SPPO-Iter3", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO/4018f4bd-492a-4814-9a7a-1f0c376f2d2e.json b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO/4018f4bd-492a-4814-9a7a-1f0c376f2d2e.json new file mode 100644 index 000000000..d29d93827 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Mistral7B-PairRM-SPPO/4018f4bd-492a-4814-9a7a-1f0c376f2d2e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral7B-PairRM-SPPO", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2621 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UKzExecution/LlamaExecutor-8B-3.0.5/568072cb-118d-41af-bfe8-fa14cb4c7348.json b/data/hfopenllm_v2/UKzExecution/LlamaExecutor-8B-3.0.5/568072cb-118d-41af-bfe8-fa14cb4c7348.json new file mode 100644 index 000000000..d992c32d0 --- /dev/null +++ b/data/hfopenllm_v2/UKzExecution/LlamaExecutor-8B-3.0.5/568072cb-118d-41af-bfe8-fa14cb4c7348.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/UKzExecution_LlamaExecutor-8B-3.0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LlamaExecutor-8B-3.0.5", + "id": "UKzExecution/LlamaExecutor-8B-3.0.5", + "developer": "UKzExecution", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5006 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3625 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Unbabel/TowerInstruct-Mistral-7B-v0.2/a6d08766-8c36-41bf-8bbc-acdfdc3f8e23.json b/data/hfopenllm_v2/Unbabel/TowerInstruct-Mistral-7B-v0.2/a6d08766-8c36-41bf-8bbc-acdfdc3f8e23.json new file mode 100644 index 000000000..21f0ff869 --- /dev/null +++ b/data/hfopenllm_v2/Unbabel/TowerInstruct-Mistral-7B-v0.2/a6d08766-8c36-41bf-8bbc-acdfdc3f8e23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Unbabel_TowerInstruct-Mistral-7B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TowerInstruct-Mistral-7B-v0.2", + "id": "Unbabel/TowerInstruct-Mistral-7B-v0.2", + "developer": "Unbabel", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2843 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/2504fed5-c8a1-4ffc-8ce5-9559aa8c4325.json b/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/2504fed5-c8a1-4ffc-8ce5-9559aa8c4325.json new file mode 100644 index 000000000..c0dc3e377 --- /dev/null +++ b/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/2504fed5-c8a1-4ffc-8ce5-9559aa8c4325.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Undi95_MG-FinalMix-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MG-FinalMix-72B", + "id": "Undi95/MG-FinalMix-72B", + "developer": "Undi95", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8014 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6973 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4823 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5427 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json b/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json deleted file mode 100644 index c0c0d53f4..000000000 --- a/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Undi95_MG-FinalMix-72B/1762652579.938925", - "retrieved_timestamp": "1762652579.938925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Undi95/MG-FinalMix-72B", - "developer": "Undi95", - "inference_platform": "unknown", - "id": "Undi95/MG-FinalMix-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8013648231137825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6973017446417747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48227083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/Undi95/Phi4-abliterated/359dde31-d9dc-4c22-b829-77df652dcc73.json b/data/hfopenllm_v2/Undi95/Phi4-abliterated/359dde31-d9dc-4c22-b829-77df652dcc73.json new file mode 100644 index 000000000..8b09b9cab --- /dev/null +++ b/data/hfopenllm_v2/Undi95/Phi4-abliterated/359dde31-d9dc-4c22-b829-77df652dcc73.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Undi95_Phi4-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4-abliterated", + "id": "Undi95/Phi4-abliterated", + "developer": "Undi95", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6618 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6809 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/34a79823-b993-402a-89a7-538e126ee02a.json b/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/34a79823-b993-402a-89a7-538e126ee02a.json new file mode 100644 index 000000000..05681b21a --- /dev/null +++ b/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/34a79823-b993-402a-89a7-538e126ee02a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/V3N0M_Jenna-Tiny-2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jenna-Tiny-2.0", + "id": "V3N0M/Jenna-Tiny-2.0", + "developer": "V3N0M", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.631 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2309 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3148 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json b/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json deleted file mode 100644 index 7e0d7d5bd..000000000 --- a/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/V3N0M_Jenna-Tiny-2.0/1762652579.9394162", - "retrieved_timestamp": "1762652579.9394171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "V3N0M/Jenna-Tiny-2.0", - "developer": "V3N0M", - "inference_platform": "unknown", - "id": "V3N0M/Jenna-Tiny-2.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.631 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2309361383351729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31479264061817097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json deleted file mode 100644 index 85d3d4b54..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/1762652579.939689", - "retrieved_timestamp": "1762652579.939689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8044621604010691 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6663247245334951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43393750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392287234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/f392c5c3-9bee-4111-9a22-6a1b706fd2ad.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/f392c5c3-9bee-4111-9a22-6a1b706fd2ad.json new file mode 100644 index 000000000..a2a4784dd --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/f392c5c3-9bee-4111-9a22-6a1b706fd2ad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-SauerkrautLM-70b-Instruct", + "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6663 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/73bbdd22-4e5f-496b-b39f-290d8e0d2aa4.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/73bbdd22-4e5f-496b-b39f-290d8e0d2aa4.json new file mode 100644 index 000000000..af84212e1 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/73bbdd22-4e5f-496b-b39f-290d8e0d2aa4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-SauerkrautLM-8b-Instruct", + "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4943 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json deleted file mode 100644 index de18c9058..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/1762652579.93995", - "retrieved_timestamp": "1762652579.9399512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.494337579362695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42410416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json deleted file mode 100644 index aef1a0cae..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/1762652579.940237", - "retrieved_timestamp": "1762652579.940238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8656365111238181 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7006249194404001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4710833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334940159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/72a66eae-9c94-40e3-b3c9-211303e5cba8.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/72a66eae-9c94-40e3-b3c9-211303e5cba8.json new file mode 100644 index 000000000..c22f37f6c --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/72a66eae-9c94-40e3-b3c9-211303e5cba8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-SauerkrautLM-70b-Instruct", + "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7006 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4711 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5335 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json deleted file mode 100644 index f4cc59918..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/1762652579.9404852", - "retrieved_timestamp": "1762652579.940486", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8017393848322452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5114932190011187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19410876132930513 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3890458776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/ef7390b5-599b-4354-805b-9486e4ce34fa.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/ef7390b5-599b-4354-805b-9486e4ce34fa.json new file mode 100644 index 000000000..c83915153 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/ef7390b5-599b-4354-805b-9486e4ce34fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-SauerkrautLM-8b-Instruct", + "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5115 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1941 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json deleted file mode 100644 index f2136daca..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-1.5b/1762652579.940706", - "retrieved_timestamp": "1762652579.940707", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-1.5b", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-1.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24040324117785256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3703912164863146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21509308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/57f964c3-0504-4b60-9539-ce0e369816ea.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/57f964c3-0504-4b60-9539-ce0e369816ea.json new file mode 100644 index 000000000..f33769597 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/57f964c3-0504-4b60-9539-ce0e369816ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-1.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-1.5b", + "id": "VAGOsolutions/SauerkrautLM-1.5b", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/4e6c0336-5d94-4417-a194-92a4d6f38481.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/4e6c0336-5d94-4417-a194-92a4d6f38481.json new file mode 100644 index 000000000..dde4ae4ac --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/4e6c0336-5d94-4417-a194-92a4d6f38481.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-HerO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-7b-HerO", + "id": "VAGOsolutions/SauerkrautLM-7b-HerO", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5346 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4904 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3046 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json deleted file mode 100644 index fa55d6a09..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-HerO/1762652579.940931", - "retrieved_timestamp": "1762652579.940931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-7b-HerO", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-7b-HerO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534610389322553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49044349935812964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39238541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30460438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json deleted file mode 100644 index 3845b6897..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-LaserChat/1762652579.941142", - "retrieved_timestamp": "1762652579.941143", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-7b-LaserChat", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5987823419637672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45432707993295685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3304521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/fe38dea8-92f4-4fb2-afdf-c5932d7c9e27.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/fe38dea8-92f4-4fb2-afdf-c5932d7c9e27.json new file mode 100644 index 000000000..e0f9d6aca --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/fe38dea8-92f4-4fb2-afdf-c5932d7c9e27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-LaserChat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-7b-LaserChat", + "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5988 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-2b/5ced7497-5a05-40d2-80cb-cae63ca62022.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-2b/5ced7497-5a05-40d2-80cb-cae63ca62022.json new file mode 100644 index 000000000..66467bc2c --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-2b/5ced7497-5a05-40d2-80cb-cae63ca62022.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-Gemma-2b", + "id": "VAGOsolutions/SauerkrautLM-Gemma-2b", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1469 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-7b/52a66aaa-193a-48ca-b693-4dcab811eaa3.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-7b/52a66aaa-193a-48ca-b693-4dcab811eaa3.json new file mode 100644 index 000000000..f14465432 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Gemma-7b/52a66aaa-193a-48ca-b693-4dcab811eaa3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-Gemma-7b", + "id": "VAGOsolutions/SauerkrautLM-Gemma-7b", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/e0e4bcef-cb73-436b-9353-b18ade293e8b.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/e0e4bcef-cb73-436b-9353-b18ade293e8b.json new file mode 100644 index 000000000..56dde9357 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/e0e4bcef-cb73-436b-9353-b18ade293e8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-Mixtral-8x7B-Instruct", + "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5602 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.365 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json deleted file mode 100644 index 2504233e1..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/1762652579.9418082", - "retrieved_timestamp": "1762652579.941809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601891869129465 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277342269858817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3650265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/1ae45791-7e47-4083-bd72-4530fa26893c.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/1ae45791-7e47-4083-bd72-4530fa26893c.json new file mode 100644 index 000000000..fa2eb1b02 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/1ae45791-7e47-4083-bd72-4530fa26893c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-Nemo-12b-Instruct", + "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5214 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4469 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3385 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json deleted file mode 100644 index 072ade7cd..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/1762652579.942016", - "retrieved_timestamp": "1762652579.942017", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6112969144093228 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214128647611115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33851396276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Phi-3-medium/b2731f04-a9bd-4e36-a545-85be5b66f5a7.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Phi-3-medium/b2731f04-a9bd-4e36-a545-85be5b66f5a7.json new file mode 100644 index 000000000..b7b0c2153 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Phi-3-medium/b2731f04-a9bd-4e36-a545-85be5b66f5a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Phi-3-medium/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-Phi-3-medium", + "id": "VAGOsolutions/SauerkrautLM-Phi-3-medium", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6433 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4665 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json deleted file mode 100644 index 1c1bf403e..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/1762652579.942544", - "retrieved_timestamp": "1762652579.942544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49172085621705963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169447300097646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3965416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31831781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/ed6de552-d04b-4d51-8456-610e2cb41d85.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/ed6de552-d04b-4d51-8456-610e2cb41d85.json new file mode 100644 index 000000000..feb5d2dda --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/ed6de552-d04b-4d51-8456-610e2cb41d85.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-SOLAR-Instruct", + "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4917 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3183 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/3e08a589-d2b3-487b-900e-85725522a2e4.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/3e08a589-d2b3-487b-900e-85725522a2e4.json new file mode 100644 index 000000000..49079f005 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/3e08a589-d2b3-487b-900e-85725522a2e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-gemma-2-2b-it", + "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/b2717503-d081-40ee-b1ed-fcadaf239049.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/b2717503-d081-40ee-b1ed-fcadaf239049.json new file mode 100644 index 000000000..e6e7502ff --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/b2717503-d081-40ee-b1ed-fcadaf239049.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-gemma-2-9b-it", + "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3024 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4318 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/9915eb01-5c45-42b6-82a3-ad782411642f.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/9915eb01-5c45-42b6-82a3-ad782411642f.json new file mode 100644 index 000000000..7d69802d9 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/9915eb01-5c45-42b6-82a3-ad782411642f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-v2-14b-DPO", + "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.656 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json deleted file mode 100644 index 0cd06c2b4..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-DPO/1762652579.943197", - "retrieved_timestamp": "1762652579.943197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7411645544931892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6560374350756156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51171875 - } - } - ] -} diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/190eb7ca-46db-4e1d-8b71-9bb20af74ede.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/190eb7ca-46db-4e1d-8b71-9bb20af74ede.json new file mode 100644 index 000000000..ac7db90ff --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/190eb7ca-46db-4e1d-8b71-9bb20af74ede.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-v2-14b-SFT", + "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.621 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json deleted file mode 100644 index 798f27b0e..000000000 --- a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-SFT/1762652579.94341", - "retrieved_timestamp": "1762652579.9434109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6948529900663573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6210355880693049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5205285904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B-r-v-0.1/86b9077d-9ec3-411d-84c5-326ba97742c1.json b/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B-r-v-0.1/86b9077d-9ec3-411d-84c5-326ba97742c1.json new file mode 100644 index 000000000..40a4383a3 --- /dev/null +++ b/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B-r-v-0.1/86b9077d-9ec3-411d-84c5-326ba97742c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B-r-v-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Korean-8B-r-v-0.1", + "id": "VIRNECT/llama-3-Korean-8B-r-v-0.1", + "developer": "VIRNECT", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4916 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4806 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.326 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/18bfa50c-20be-4027-8ee7-f6cd1411c882.json b/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/18bfa50c-20be-4027-8ee7-f6cd1411c882.json new file mode 100644 index 000000000..f155f09ea --- /dev/null +++ b/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/18bfa50c-20be-4027-8ee7-f6cd1411c882.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Korean-8B", + "id": "VIRNECT/llama-3-Korean-8B", + "developer": "VIRNECT", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3539 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/eb1a099a-48c7-412b-b62f-143537c41f06.json b/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/eb1a099a-48c7-412b-b62f-143537c41f06.json new file mode 100644 index 000000000..92a869ede --- /dev/null +++ b/data/hfopenllm_v2/VIRNECT/llama-3-Korean-8B/eb1a099a-48c7-412b-b62f-143537c41f06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Korean-8B", + "id": "VIRNECT/llama-3-Korean-8B", + "developer": "VIRNECT", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4918 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3536 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3-70B-Fireplace/e530a4b7-c2f6-4bad-bab5-2895e950ed63.json b/data/hfopenllm_v2/ValiantLabs/Llama3-70B-Fireplace/e530a4b7-c2f6-4bad-bab5-2895e950ed63.json new file mode 100644 index 000000000..60a42d8a2 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3-70B-Fireplace/e530a4b7-c2f6-4bad-bab5-2895e950ed63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-Fireplace/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-70B-Fireplace", + "id": "ValiantLabs/Llama3-70B-Fireplace", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7774 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4893 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3-70B-ShiningValiant2/52ad7152-feea-46a6-b2d8-20e1a70514ce.json b/data/hfopenllm_v2/ValiantLabs/Llama3-70B-ShiningValiant2/52ad7152-feea-46a6-b2d8-20e1a70514ce.json new file mode 100644 index 000000000..7ec6b966f --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3-70B-ShiningValiant2/52ad7152-feea-46a6-b2d8-20e1a70514ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-ShiningValiant2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-70B-ShiningValiant2", + "id": "ValiantLabs/Llama3-70B-ShiningValiant2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6122 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6338 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4898 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-70B-ShiningValiant2/a61162a6-ef3e-46f4-8aa2-241547fadea2.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-70B-ShiningValiant2/a61162a6-ef3e-46f4-8aa2-241547fadea2.json new file mode 100644 index 000000000..3438ef88c --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-70B-ShiningValiant2/a61162a6-ef3e-46f4-8aa2-241547fadea2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-70B-ShiningValiant2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-70B-ShiningValiant2", + "id": "ValiantLabs/Llama3.1-70B-ShiningValiant2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5355 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6738 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4681 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/9f208aef-8544-47c8-bb1f-a3841aff208b.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/9f208aef-8544-47c8-bb1f-a3841aff208b.json new file mode 100644 index 000000000..e206b8681 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/9f208aef-8544-47c8-bb1f-a3841aff208b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-Cobalt", + "id": "ValiantLabs/Llama3.1-8B-Cobalt", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7168 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4911 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/da237ab6-df39-460f-9efc-e1649e1ac202.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/da237ab6-df39-460f-9efc-e1649e1ac202.json new file mode 100644 index 000000000..aa9dee935 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Cobalt/da237ab6-df39-460f-9efc-e1649e1ac202.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-Cobalt", + "id": "ValiantLabs/Llama3.1-8B-Cobalt", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4947 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Enigma/c81b3193-9d01-4590-8b72-da97aa3c9dc4.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Enigma/c81b3193-9d01-4590-8b72-da97aa3c9dc4.json new file mode 100644 index 000000000..c8bdca2cb --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Enigma/c81b3193-9d01-4590-8b72-da97aa3c9dc4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Enigma/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-Enigma", + "id": "ValiantLabs/Llama3.1-8B-Enigma", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2681 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Esper2/1a9ffe50-69ae-48bc-b636-89431391eb37.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Esper2/1a9ffe50-69ae-48bc-b636-89431391eb37.json new file mode 100644 index 000000000..ba7ad45fc --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Esper2/1a9ffe50-69ae-48bc-b636-89431391eb37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Esper2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-Esper2", + "id": "ValiantLabs/Llama3.1-8B-Esper2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3561 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/b0c67359-1da0-4f55-aa1c-f54f88038bd7.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/b0c67359-1da0-4f55-aa1c-f54f88038bd7.json new file mode 100644 index 000000000..2f2d8e167 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/b0c67359-1da0-4f55-aa1c-f54f88038bd7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-Fireplace2", + "id": "ValiantLabs/Llama3.1-8B-Fireplace2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5483 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2407 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/c700798b-583a-41be-94dd-382669bb495f.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/c700798b-583a-41be-94dd-382669bb495f.json new file mode 100644 index 000000000..fca031505 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-Fireplace2/c700798b-583a-41be-94dd-382669bb495f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-Fireplace2", + "id": "ValiantLabs/Llama3.1-8B-Fireplace2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4613 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/3c0b9735-2ef1-4f27-b94a-f246eb57b73c.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/3c0b9735-2ef1-4f27-b94a-f246eb57b73c.json new file mode 100644 index 000000000..2c1716490 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/3c0b9735-2ef1-4f27-b94a-f246eb57b73c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-ShiningValiant2", + "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4774 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/e8c9501b-c985-4b78-a902-a1a030c72e60.json b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/e8c9501b-c985-4b78-a902-a1a030c72e60.json new file mode 100644 index 000000000..b45bd8cf9 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.1-8B-ShiningValiant2/e8c9501b-c985-4b78-a902-a1a030c72e60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-ShiningValiant2", + "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2927 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Enigma/df978fce-3373-4073-8c44-d6a83df1d9d1.json b/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Enigma/df978fce-3373-4073-8c44-d6a83df1d9d1.json new file mode 100644 index 000000000..be7b22dec --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Enigma/df978fce-3373-4073-8c44-d6a83df1d9d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Enigma/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2-3B-Enigma", + "id": "ValiantLabs/Llama3.2-3B-Enigma", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3921 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Esper2/e46ee8d9-81af-4259-8fef-3d3113fb6168.json b/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Esper2/e46ee8d9-81af-4259-8fef-3d3113fb6168.json new file mode 100644 index 000000000..40bf9a025 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-Esper2/e46ee8d9-81af-4259-8fef-3d3113fb6168.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Esper2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2-3B-Esper2", + "id": "ValiantLabs/Llama3.2-3B-Esper2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3808 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2257 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-ShiningValiant2/aa6ab404-89ef-4336-b811-7c8064e26107.json b/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-ShiningValiant2/aa6ab404-89ef-4336-b811-7c8064e26107.json new file mode 100644 index 000000000..aad1ebcc3 --- /dev/null +++ b/data/hfopenllm_v2/ValiantLabs/Llama3.2-3B-ShiningValiant2/aa6ab404-89ef-4336-b811-7c8064e26107.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-ShiningValiant2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2-3B-ShiningValiant2", + "id": "ValiantLabs/Llama3.2-3B-ShiningValiant2", + "developer": "ValiantLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2625 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2829 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/a14e6c79-4a78-4c02-a7ca-35e783f32be1.json b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/a14e6c79-4a78-4c02-a7ca-35e783f32be1.json new file mode 100644 index 000000000..3f2934658 --- /dev/null +++ b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/a14e6c79-4a78-4c02-a7ca-35e783f32be1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Vikhr-Llama3.1-8B-Instruct-R-21-09-24", + "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", + "developer": "Vikhrmodels", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5272 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2175 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3547 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json deleted file mode 100644 index e23b2840e..000000000 --- a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/1762652579.9476302", - "retrieved_timestamp": "1762652579.9476311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", - "developer": "Vikhrmodels", - "inference_platform": "unknown", - "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.643145742186288 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527224269970207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3547207446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json deleted file mode 100644 index 2cd044ba8..000000000 --- a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/1762652579.947979", - "retrieved_timestamp": "1762652579.94798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", - "developer": "Vikhrmodels", - "inference_platform": "unknown", - "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5999315150467426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212309052827618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/ba1fb85b-bbc0-46ac-95d7-e61b91f65c2b.json b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/ba1fb85b-bbc0-46ac-95d7-e61b91f65c2b.json new file mode 100644 index 000000000..5191b77e3 --- /dev/null +++ b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/ba1fb85b-bbc0-46ac-95d7-e61b91f65c2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Vikhr-Nemo-12B-Instruct-R-21-09-24", + "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", + "developer": "Vikhrmodels", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5999 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5212 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1715 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json deleted file mode 100644 index 0182fe223..000000000 --- a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-2x34B/1762652579.948213", - "retrieved_timestamp": "1762652579.948214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Bagel-Hermes-2x34B", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/Bagel-Hermes-2x34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 60.814 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431532777474878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49166555632285514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45166666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4588597074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/f6312fc7-c7a8-45dc-a57c-91f56b4ca28a.json b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/f6312fc7-c7a8-45dc-a57c-91f56b4ca28a.json new file mode 100644 index 000000000..a15c3dfdc --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/f6312fc7-c7a8-45dc-a57c-91f56b4ca28a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-2x34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bagel-Hermes-2x34B", + "id": "Weyaxi/Bagel-Hermes-2x34B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 60.814 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5432 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4917 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json deleted file mode 100644 index bb00e8e6d..000000000 --- a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-34B-Slerp/1762652579.948482", - "retrieved_timestamp": "1762652579.948482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Bagel-Hermes-34B-Slerp", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/Bagel-Hermes-34B-Slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4602720780861448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5921903605860047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46220833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4703291223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/335f5c32-f3f0-4a16-8c9d-8f07b2aae54a.json b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/335f5c32-f3f0-4a16-8c9d-8f07b2aae54a.json new file mode 100644 index 000000000..b2da0af11 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/335f5c32-f3f0-4a16-8c9d-8f07b2aae54a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-34B-Slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bagel-Hermes-34B-Slerp", + "id": "Weyaxi/Bagel-Hermes-34B-Slerp", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4603 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5922 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4703 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json b/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json deleted file mode 100644 index 2870e9ffd..000000000 --- a/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v4-7B/1762652579.948704", - "retrieved_timestamp": "1762652579.948705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Einstein-v4-7B", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v4-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47081299839980145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38494699692741774 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22589760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/b7c7a907-7ecc-4d5b-bc6f-8b8d82954b21.json b/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/b7c7a907-7ecc-4d5b-bc6f-8b8d82954b21.json new file mode 100644 index 000000000..42818be4e --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/b7c7a907-7ecc-4d5b-bc6f-8b8d82954b21.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v4-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Einstein-v4-7B", + "id": "Weyaxi/Einstein-v4-7B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4708 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3849 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2259 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v6.1-Llama3-8B/112f01a2-f0fb-4257-86bf-61c9a184eb92.json b/data/hfopenllm_v2/Weyaxi/Einstein-v6.1-Llama3-8B/112f01a2-f0fb-4257-86bf-61c9a184eb92.json new file mode 100644 index 000000000..f43785b03 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Einstein-v6.1-Llama3-8B/112f01a2-f0fb-4257-86bf-61c9a184eb92.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-Llama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Einstein-v6.1-Llama3-8B", + "id": "Weyaxi/Einstein-v6.1-Llama3-8B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4568 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4213 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/2d9410d6-7162-4811-bf7d-9de2c2b48fd2.json b/data/hfopenllm_v2/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/2d9410d6-7162-4811-bf7d-9de2c2b48fd2.json new file mode 100644 index 000000000..4375030ce --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/2d9410d6-7162-4811-bf7d-9de2c2b48fd2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", + "id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v7-Qwen2-7B/16ff8fa3-4676-473c-99ad-908ddb59d8ed.json b/data/hfopenllm_v2/Weyaxi/Einstein-v7-Qwen2-7B/16ff8fa3-4676-473c-99ad-908ddb59d8ed.json new file mode 100644 index 000000000..3fd32bab2 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Einstein-v7-Qwen2-7B/16ff8fa3-4676-473c-99ad-908ddb59d8ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v7-Qwen2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Einstein-v7-Qwen2-7B", + "id": "Weyaxi/Einstein-v7-Qwen2-7B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.41 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5161 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1994 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v8-Llama3.2-1B/9b153ac9-f95b-419b-b7f9-beccd769ddad.json b/data/hfopenllm_v2/Weyaxi/Einstein-v8-Llama3.2-1B/9b153ac9-f95b-419b-b7f9-beccd769ddad.json new file mode 100644 index 000000000..96e3825ec --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Einstein-v8-Llama3.2-1B/9b153ac9-f95b-419b-b7f9-beccd769ddad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v8-Llama3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Einstein-v8-Llama3.2-1B", + "id": "Weyaxi/Einstein-v8-Llama3.2-1B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1862 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8a5df3c2-eb71-4e12-b013-fb43685f2916.json b/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8a5df3c2-eb71-4e12-b013-fb43685f2916.json new file mode 100644 index 000000000..e6c896811 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8a5df3c2-eb71-4e12-b013-fb43685f2916.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerkrautLM-UNA-SOLAR-Instruct", + "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", + "developer": "Weyaxi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4573 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json b/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json deleted file mode 100644 index b349c721b..000000000 --- a/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/1762652579.950165", - "retrieved_timestamp": "1762652579.950166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4573243438520902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166357112030591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31532579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/35fa3213-5c08-4b19-ae76-237fdd25444e.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/35fa3213-5c08-4b19-ae76-237fdd25444e.json new file mode 100644 index 000000000..ea8afc65e --- /dev/null +++ b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/35fa3213-5c08-4b19-ae76-237fdd25444e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLM-13B-V1.0", + "id": "WizardLMTeam/WizardLM-13B-V1.0", + "developer": "WizardLMTeam", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.185 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json deleted file mode 100644 index 9abe0fe42..000000000 --- a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.0/1762652579.9503958", - "retrieved_timestamp": "1762652579.950397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "WizardLMTeam/WizardLM-13B-V1.0", - "developer": "WizardLMTeam", - "inference_platform": "unknown", - "id": "WizardLMTeam/WizardLM-13B-V1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18504900331121424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29134447696551025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/242ce55f-1471-435e-bcd7-d28b5fc87fc4.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/242ce55f-1471-435e-bcd7-d28b5fc87fc4.json new file mode 100644 index 000000000..9777bbd90 --- /dev/null +++ b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/242ce55f-1471-435e-bcd7-d28b5fc87fc4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLM-13B-V1.2", + "id": "WizardLMTeam/WizardLM-13B-V1.2", + "developer": "WizardLMTeam", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3392 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2519 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json deleted file mode 100644 index d06e8aa1c..000000000 --- a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.2/1762652579.950676", - "retrieved_timestamp": "1762652579.950676", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "WizardLMTeam/WizardLM-13B-V1.2", - "developer": "WizardLMTeam", - "inference_platform": "unknown", - "id": "WizardLMTeam/WizardLM-13B-V1.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3392465325336773 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44619994364600474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43784375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25191156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json deleted file mode 100644 index 9b62f64b2..000000000 --- a/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-70B-V1.0/1762652579.950908", - "retrieved_timestamp": "1762652579.950909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "WizardLMTeam/WizardLM-70B-V1.0", - "developer": "WizardLMTeam", - "inference_platform": "unknown", - "id": "WizardLMTeam/WizardLM-70B-V1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49514288753839814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5590366047184262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43911458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34466422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/95f509f2-5e67-404a-968d-f7488d684e32.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/95f509f2-5e67-404a-968d-f7488d684e32.json new file mode 100644 index 000000000..8f43a920a --- /dev/null +++ b/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/95f509f2-5e67-404a-968d-f7488d684e32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-70B-V1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLM-70B-V1.0", + "id": "WizardLMTeam/WizardLM-70B-V1.0", + "developer": "WizardLMTeam", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4951 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json b/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json deleted file mode 100644 index 95492bf5a..000000000 --- a/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Wladastic_Mini-Think-Base-1B/1762652579.951128", - "retrieved_timestamp": "1762652579.9511292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Wladastic/Mini-Think-Base-1B", - "developer": "Wladastic", - "inference_platform": "unknown", - "id": "Wladastic/Mini-Think-Base-1B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5588405430923283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35741728048349203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32748958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17719414893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/bcbcdfe9-0663-417c-9a29-60906e63db8f.json b/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/bcbcdfe9-0663-417c-9a29-60906e63db8f.json new file mode 100644 index 000000000..ab878d450 --- /dev/null +++ b/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/bcbcdfe9-0663-417c-9a29-60906e63db8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Wladastic_Mini-Think-Base-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mini-Think-Base-1B", + "id": "Wladastic/Mini-Think-Base-1B", + "developer": "Wladastic", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5588 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1772 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json b/data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json deleted file mode 100644 index 80170b5be..000000000 --- a/data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xclbr7_Arcanum-12b/1762652579.9514", - "retrieved_timestamp": "1762652579.951401", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xclbr7/Arcanum-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/Arcanum-12b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906864896253053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265359354118465 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41703124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3586269946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/Xclbr7/Arcanum-12b/d95a7493-2f99-4c10-8067-711c7388af7d.json b/data/hfopenllm_v2/Xclbr7/Arcanum-12b/d95a7493-2f99-4c10-8067-711c7388af7d.json new file mode 100644 index 000000000..7bafc055c --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/Arcanum-12b/d95a7493-2f99-4c10-8067-711c7388af7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xclbr7_Arcanum-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arcanum-12b", + "id": "Xclbr7/Arcanum-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3586 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json b/data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json deleted file mode 100644 index ef022db6a..000000000 --- a/data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xclbr7_Hyena-12b/1762652579.9516642", - "retrieved_timestamp": "1762652579.951665", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xclbr7/Hyena-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/Hyena-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3404455733010634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5457182415468321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39842708333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439162234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/Xclbr7/Hyena-12b/789848a0-6d8a-4583-93c3-a72df74d0071.json b/data/hfopenllm_v2/Xclbr7/Hyena-12b/789848a0-6d8a-4583-93c3-a72df74d0071.json new file mode 100644 index 000000000..eff456eeb --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/Hyena-12b/789848a0-6d8a-4583-93c3-a72df74d0071.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xclbr7_Hyena-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hyena-12b", + "id": "Xclbr7/Hyena-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3984 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3439 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/caliburn-12b/14af87df-0fc5-46e1-9d0b-c25c8b6a7ce7.json b/data/hfopenllm_v2/Xclbr7/caliburn-12b/14af87df-0fc5-46e1-9d0b-c25c8b6a7ce7.json new file mode 100644 index 000000000..48b753f2c --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/caliburn-12b/14af87df-0fc5-46e1-9d0b-c25c8b6a7ce7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "caliburn-12b", + "id": "Xclbr7/caliburn-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4292 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json b/data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json deleted file mode 100644 index 1e46a7a2e..000000000 --- a/data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-12b/1762652579.951879", - "retrieved_timestamp": "1762652579.95188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xclbr7/caliburn-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/caliburn-12b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35763108551975425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5518630300231809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4291875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36751994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json b/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json deleted file mode 100644 index 09fd6079e..000000000 --- a/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-v2-12b/1762652579.952102", - "retrieved_timestamp": "1762652579.952102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xclbr7/caliburn-v2-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/caliburn-v2-12b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2966816934622358 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141426125097639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43703125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37840757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/379f559f-9bfa-444f-b477-562c25b4c299.json b/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/379f559f-9bfa-444f-b477-562c25b4c299.json new file mode 100644 index 000000000..475721de8 --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/379f559f-9bfa-444f-b477-562c25b4c299.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-v2-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "caliburn-v2-12b", + "id": "Xclbr7/caliburn-v2-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2967 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.437 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/effb6a3d-c98f-4c3a-be77-902c61cda21b.json b/data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/effb6a3d-c98f-4c3a-be77-902c61cda21b.json new file mode 100644 index 000000000..b05dd8d93 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/effb6a3d-c98f-4c3a-be77-902c61cda21b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2-1B-THREADRIPPER-v0.2", + "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5318 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1745 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/6c1c1405-afa4-412d-ba1f-49dc1cac4509.json b/data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/6c1c1405-afa4-412d-ba1f-49dc1cac4509.json new file mode 100644 index 000000000..cd6966cd3 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/6c1c1405-afa4-412d-ba1f-49dc1cac4509.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2-1B-THREADRIPPER", + "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.313 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Phi-4-Megatron-Empathetic/6f4ed7c2-c775-4fd2-8600-4cea523f53e4.json b/data/hfopenllm_v2/Xiaojian9992024/Phi-4-Megatron-Empathetic/6f4ed7c2-c775-4fd2-8600-4cea523f53e4.json new file mode 100644 index 000000000..b61bf3793 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Phi-4-Megatron-Empathetic/6f4ed7c2-c775-4fd2-8600-4cea523f53e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-Megatron-Empathetic/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Megatron-Empathetic", + "id": "Xiaojian9992024/Phi-4-Megatron-Empathetic", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6673 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2696 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5082 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Phi-4-mini-UNOFFICAL/5fd5206b-186a-43b9-a4f4-07e75aa0293a.json b/data/hfopenllm_v2/Xiaojian9992024/Phi-4-mini-UNOFFICAL/5fd5206b-186a-43b9-a4f4-07e75aa0293a.json new file mode 100644 index 000000000..76e78b910 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Phi-4-mini-UNOFFICAL/5fd5206b-186a-43b9-a4f4-07e75aa0293a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-mini-UNOFFICAL/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-mini-UNOFFICAL", + "id": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.754 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2944 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/b707ecbf-0658-4226-803d-53456d16d54b.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/b707ecbf-0658-4226-803d-53456d16d54b.json new file mode 100644 index 000000000..ddb71d89b --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/b707ecbf-0658-4226-803d-53456d16d54b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-MS-Destroyer", + "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7296 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.547 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4592 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.427 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/dca1ee57-5e86-4532-a2f3-ac6a619ca576.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/dca1ee57-5e86-4532-a2f3-ac6a619ca576.json new file mode 100644 index 000000000..cb19a6f74 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/dca1ee57-5e86-4532-a2f3-ac6a619ca576.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Dyanka-7B-Preview-v0.2", + "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6702 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4467 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/1233476a-7839-4a22-a7ca-1d0f237d8888.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/1233476a-7839-4a22-a7ca-1d0f237d8888.json new file mode 100644 index 000000000..61a8f0cb6 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/1233476a-7839-4a22-a7ca-1d0f237d8888.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Dyanka-7B-Preview", + "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.764 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4879 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4481 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/5c4bdeca-5ef8-4002-8f82-67d49b5ff722.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/5c4bdeca-5ef8-4002-8f82-67d49b5ff722.json new file mode 100644 index 000000000..6183b3899 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/5c4bdeca-5ef8-4002-8f82-67d49b5ff722.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-THREADRIPPER-Medium-Censored", + "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8112 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.534 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4929 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/18f5fd6c-2b79-4d48-b7e9-18845db16271.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/18f5fd6c-2b79-4d48-b7e9-18845db16271.json new file mode 100644 index 000000000..889a250b8 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/18f5fd6c-2b79-4d48-b7e9-18845db16271.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", + "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a9039374-fa5a-4b8b-800f-5f4651cf812d.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a9039374-fa5a-4b8b-800f-5f4651cf812d.json new file mode 100644 index 000000000..5724e01ef --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a9039374-fa5a-4b8b-800f-5f4651cf812d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-THREADRIPPER-Small", + "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/3f9704b4-bf25-40da-b6dc-b927c3569f40.json b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/3f9704b4-bf25-40da-b6dc-b927c3569f40.json new file mode 100644 index 000000000..35e891112 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/3f9704b4-bf25-40da-b6dc-b927c3569f40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Ultra-1.5B-25.02-Exp", + "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/a8f858d8-a792-409f-b79d-948a19e2aa87.json b/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/a8f858d8-a792-409f-b79d-948a19e2aa87.json new file mode 100644 index 000000000..163b699a4 --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/a8f858d8-a792-409f-b79d-948a19e2aa87.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflection-L3.2-JametMiniMix-3B", + "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4619 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.439 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2988 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json b/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json deleted file mode 100644 index 643a0eb03..000000000 --- a/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/1762652579.9550028", - "retrieved_timestamp": "1762652579.9550028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", - "developer": "Xiaojian9992024", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46194541594081484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4389528940684813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36673958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29878656914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/Xkev/Llama-3.2V-11B-cot/5c34a168-b8cf-436b-a3b7-a2d1feadffb9.json b/data/hfopenllm_v2/Xkev/Llama-3.2V-11B-cot/5c34a168-b8cf-436b-a3b7-a2d1feadffb9.json new file mode 100644 index 000000000..2b105fc64 --- /dev/null +++ b/data/hfopenllm_v2/Xkev/Llama-3.2V-11B-cot/5c34a168-b8cf-436b-a3b7-a2d1feadffb9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Xkev_Llama-3.2V-11B-cot/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2V-11B-cot", + "id": "Xkev/Llama-3.2V-11B-cot", + "developer": "Xkev", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4959 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4159 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/77092cfe-9820-45e8-94c5-31d27f1daa7c.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/77092cfe-9820-45e8-94c5-31d27f1daa7c.json new file mode 100644 index 000000000..750d1c03c --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/77092cfe-9820-45e8-94c5-31d27f1daa7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-1M-YOYO-V3", + "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8398 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6448 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0505/cab8fed8-de68-4fa5-b4fc-d9483fc56571.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0505/cab8fed8-de68-4fa5-b4fc-d9483fc56571.json new file mode 100644 index 000000000..7845c81e9 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0505/cab8fed8-de68-4fa5-b4fc-d9483fc56571.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0505/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-0505", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/a8103350-b208-4856-8e7b-8ea8918ba0d1.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/a8103350-b208-4856-8e7b-8ea8918ba0d1.json new file mode 100644 index 000000000..7d6288961 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/a8103350-b208-4856-8e7b-8ea8918ba0d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-0510-v2", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4744 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0805/e849c03c-c569-4059-8fc5-6a98cf391342.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0805/e849c03c-c569-4059-8fc5-6a98cf391342.json new file mode 100644 index 000000000..60cbcc72d --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-0805/e849c03c-c569-4059-8fc5-6a98cf391342.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0805/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-0805", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/f1d8bffa-61fc-47d5-85cf-48cebcb31af5.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/f1d8bffa-61fc-47d5-85cf-48cebcb31af5.json new file mode 100644 index 000000000..62d6d9013 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/f1d8bffa-61fc-47d5-85cf-48cebcb31af5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-1005-v2", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5953 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6551 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4731 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005/97bdb352-2e9d-4cc5-8b70-55348ef3a217.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005/97bdb352-2e9d-4cc5-8b70-55348ef3a217.json new file mode 100644 index 000000000..0d6a7c3b6 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1005/97bdb352-2e9d-4cc5-8b70-55348ef3a217.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-1005", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5972 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/78053a33-24c8-4e9f-8791-f127f21eec1c.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/78053a33-24c8-4e9f-8791-f127f21eec1c.json new file mode 100644 index 000000000..ba66678f4 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/78053a33-24c8-4e9f-8791-f127f21eec1c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-1010-v2", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4744 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/03082966-87ba-4560-a784-5d8677003500.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/03082966-87ba-4560-a784-5d8677003500.json new file mode 100644 index 000000000..0f3f65bb4 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/03082966-87ba-4560-a784-5d8677003500.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-1010", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5899 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.654 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4744 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/97f26b20-db66-4a30-ba2a-c18a31081271.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/97f26b20-db66-4a30-ba2a-c18a31081271.json new file mode 100644 index 000000000..2b3d5ce24 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-1010/97f26b20-db66-4a30-ba2a-c18a31081271.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-1010", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7905 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4181 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4944 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-SCE/85f9ccda-8c47-4fa1-9d47-e9da4730b077.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-SCE/85f9ccda-8c47-4fa1-9d47-e9da4730b077.json new file mode 100644 index 000000000..b49788cb1 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-SCE/85f9ccda-8c47-4fa1-9d47-e9da4730b077.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-SCE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-SCE", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5844 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4615 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4704 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/2a57d6f4-643b-4b30-8d67-03032d454887.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/2a57d6f4-643b-4b30-8d67-03032d454887.json new file mode 100644 index 000000000..84039bdda --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/2a57d6f4-643b-4b30-8d67-03032d454887.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-V4-p1", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.502 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/d333f360-c1c3-4916-8480-4a1fc490875a.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/d333f360-c1c3-4916-8480-4a1fc490875a.json new file mode 100644 index 000000000..54ba3e37e --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/d333f360-c1c3-4916-8480-4a1fc490875a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-V4-p2", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4/37a41261-a7b0-44b2-916f-770cdfa0ad39.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4/37a41261-a7b0-44b2-916f-770cdfa0ad39.json new file mode 100644 index 000000000..40718d9ee --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-V4/37a41261-a7b0-44b2-916f-770cdfa0ad39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-V4", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8398 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.649 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/c46cd6cc-b56d-44c5-a03c-b49381ba3462.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/c46cd6cc-b56d-44c5-a03c-b49381ba3462.json new file mode 100644 index 000000000..285272c1d --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/c46cd6cc-b56d-44c5-a03c-b49381ba3462.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-latest-V2", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6299 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest/612b6226-c25d-42e0-bcd7-be7faa844530.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest/612b6226-c25d-42e0-bcd7-be7faa844530.json new file mode 100644 index 000000000..045738a4d --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-YOYO-latest/612b6226-c25d-42e0-bcd7-be7faa844530.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-YOYO-latest", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5911 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6656 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4418 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4691 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-it-restore/2fc7a4d6-88e0-4f11-9110-dc53942870a4.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-it-restore/2fc7a4d6-88e0-4f11-9110-dc53942870a4.json new file mode 100644 index 000000000..69cc906a3 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-14B-it-restore/2fc7a4d6-88e0-4f11-9110-dc53942870a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-it-restore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-it-restore", + "id": "YOYO-AI/Qwen2.5-14B-it-restore", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8209 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.49 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-7B-it-restore/34665752-58d8-48ee-81a6-f1a068c23026.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-7B-it-restore/34665752-58d8-48ee-81a6-f1a068c23026.json new file mode 100644 index 000000000..8d8fd9130 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-7B-it-restore/34665752-58d8-48ee-81a6-f1a068c23026.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-7B-it-restore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-it-restore", + "id": "YOYO-AI/Qwen2.5-7B-it-restore", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7531 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5407 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/cc0767b5-4aaa-4418-8f68-72a721323e9c.json b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/cc0767b5-4aaa-4418-8f68-72a721323e9c.json new file mode 100644 index 000000000..72afbd4ba --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/cc0767b5-4aaa-4418-8f68-72a721323e9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-14B-YOYO-1010", + "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4075 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/ea507a41-1654-4515-94cc-ce2e38800c61.json b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/ea507a41-1654-4515-94cc-ce2e38800c61.json new file mode 100644 index 000000000..6a8e45bb1 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/ea507a41-1654-4515-94cc-ce2e38800c61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZYH-LLM-Qwen2.5-14B-V2", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5071 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4689 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/c44e773f-4cca-4780-bdd4-f486e65c18e0.json b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/c44e773f-4cca-4780-bdd4-f486e65c18e0.json new file mode 100644 index 000000000..e0448d4a8 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/c44e773f-4cca-4780-bdd4-f486e65c18e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZYH-LLM-Qwen2.5-14B-V3", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6359 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4881 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f8a46bda-d53b-484e-8832-7939f7d0762d.json b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f8a46bda-d53b-484e-8832-7939f7d0762d.json new file mode 100644 index 000000000..4f4bd31c8 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f8a46bda-d53b-484e-8832-7939f7d0762d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZYH-LLM-Qwen2.5-14B-V4", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8365 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5204 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B/c3968a2d-4a9a-4f62-8bea-a3b4b6dcd378.json b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B/c3968a2d-4a9a-4f62-8bea-a3b4b6dcd378.json new file mode 100644 index 000000000..7cecb6db8 --- /dev/null +++ b/data/hfopenllm_v2/YOYO-AI/ZYH-LLM-Qwen2.5-14B/c3968a2d-4a9a-4f62-8bea-a3b4b6dcd378.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZYH-LLM-Qwen2.5-14B", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", + "developer": "YOYO-AI", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6644 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5351 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json b/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json deleted file mode 100644 index 0084419b6..000000000 --- a/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Yash21_TinyYi-7B-Test/1762652579.960211", - "retrieved_timestamp": "1762652579.960212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Yash21/TinyYi-7B-Test", - "developer": "Yash21", - "inference_platform": "unknown", - "id": "Yash21/TinyYi-7B-Test", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18564852369490728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29098007801214715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3364479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10912566489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/da18242c-d6bb-4a0a-a2f9-2e42099f4e8a.json b/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/da18242c-d6bb-4a0a-a2f9-2e42099f4e8a.json new file mode 100644 index 000000000..f956d0abc --- /dev/null +++ b/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/da18242c-d6bb-4a0a-a2f9-2e42099f4e8a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Yash21_TinyYi-7B-Test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyYi-7B-Test", + "id": "Yash21/TinyYi-7B-Test", + "developer": "Yash21", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.291 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1091 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json b/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json deleted file mode 100644 index 3d1298e79..000000000 --- a/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_1PARAMMYL-8B-ModelStock/1762652579.9604638", - "retrieved_timestamp": "1762652579.960465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/1PARAMMYL-8B-ModelStock", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/1PARAMMYL-8B-ModelStock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371336941537344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215839663555125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000166223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/ac078124-85d9-4715-bf7c-1428b1063732.json b/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/ac078124-85d9-4715-bf7c-1428b1063732.json new file mode 100644 index 000000000..0dfc3106e --- /dev/null +++ b/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/ac078124-85d9-4715-bf7c-1428b1063732.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_1PARAMMYL-8B-ModelStock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1PARAMMYL-8B-ModelStock", + "id": "Youlln/1PARAMMYL-8B-ModelStock", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/9c1dcd75-8491-4890-ac6f-000868099a3e.json b/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/9c1dcd75-8491-4890-ac6f-000868099a3e.json new file mode 100644 index 000000000..9c066ab38 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/9c1dcd75-8491-4890-ac6f-000868099a3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "2PRYMMAL-Yi1.5-6B-SLERP", + "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2826 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.317 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json b/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json deleted file mode 100644 index 672aad52d..000000000 --- a/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/1762652579.9607239", - "retrieved_timestamp": "1762652579.960725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28259351853083153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46647504291710673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47560416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3169880319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/3PRYMMAL-PHI3-3B-SLERP/7850fc57-49c7-4124-b7c6-e1e7bb2bc726.json b/data/hfopenllm_v2/Youlln/3PRYMMAL-PHI3-3B-SLERP/7850fc57-49c7-4124-b7c6-e1e7bb2bc726.json new file mode 100644 index 000000000..d99b33828 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/3PRYMMAL-PHI3-3B-SLERP/7850fc57-49c7-4124-b7c6-e1e7bb2bc726.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_3PRYMMAL-PHI3-3B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3PRYMMAL-PHI3-3B-SLERP", + "id": "Youlln/3PRYMMAL-PHI3-3B-SLERP", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1715 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4648 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/8f38374e-f373-4639-9278-24441ebd0325.json b/data/hfopenllm_v2/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/8f38374e-f373-4639-9278-24441ebd0325.json new file mode 100644 index 000000000..676bdf378 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/8f38374e-f373-4639-9278-24441ebd0325.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "4PRYMMAL-GEMMA2-9B-SLERP", + "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5923 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/c007938e-3427-4896-8493-1500abdfbd2b.json b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/c007938e-3427-4896-8493-1500abdfbd2b.json new file mode 100644 index 000000000..7f4f92c00 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/c007938e-3427-4896-8493-1500abdfbd2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-MIRAGE-1-12B", + "id": "Youlln/ECE-MIRAGE-1-12B", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 15.21 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3011 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json deleted file mode 100644 index 4e2c865d1..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-12B/1762652579.96142", - "retrieved_timestamp": "1762652579.96142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-MIRAGE-1-12B", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-MIRAGE-1-12B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 15.21 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20698081091503875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30107140221306034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/df81dc0d-6c72-49e9-862b-02e9b6642cb6.json b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/df81dc0d-6c72-49e9-862b-02e9b6642cb6.json new file mode 100644 index 000000000..331e6d3a6 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/df81dc0d-6c72-49e9-862b-02e9b6642cb6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-15B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-MIRAGE-1-15B", + "id": "Youlln/ECE-MIRAGE-1-15B", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 15.21 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3011 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json deleted file mode 100644 index c5e7ab98a..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-15B/1762652579.961622", - "retrieved_timestamp": "1762652579.961622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-MIRAGE-1-15B", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-MIRAGE-1-15B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 15.21 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20698081091503875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30107140221306034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/46c96d8e-568c-48f8-a74b-9dd4b4195037.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/46c96d8e-568c-48f8-a74b-9dd4b4195037.json new file mode 100644 index 000000000..89fe56fd5 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/46c96d8e-568c-48f8-a74b-9dd4b4195037.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V3-MUSR", + "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1645 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json deleted file mode 100644 index 340a69389..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/1762652579.962029", - "retrieved_timestamp": "1762652579.962029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334977858748122 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3041148294962408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1644780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/1f4f7181-8a81-49f4-9e81-925d5d69a37c.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/1f4f7181-8a81-49f4-9e81-925d5d69a37c.json new file mode 100644 index 000000000..19716554b --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/1f4f7181-8a81-49f4-9e81-925d5d69a37c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V3", + "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1642 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json deleted file mode 100644 index 61e31eac4..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3/1762652579.9618208", - "retrieved_timestamp": "1762652579.9618208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16419101317836673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30931341134548046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/3ea343b6-93f6-4c61-a164-3db95d13cbdf.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/3ea343b6-93f6-4c61-a164-3db95d13cbdf.json new file mode 100644 index 000000000..8d60c846e --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/3ea343b6-93f6-4c61-a164-3db95d13cbdf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR", + "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1138 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3038 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json deleted file mode 100644 index a0b3cfc63..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/1762652579.9622452", - "retrieved_timestamp": "1762652579.962246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1137570535069172 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038362724383693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13214760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/a9ea8bb5-05fc-4da3-8e00-f53ab8ea6af5.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/a9ea8bb5-05fc-4da3-8e00-f53ab8ea6af5.json new file mode 100644 index 000000000..76582f768 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/a9ea8bb5-05fc-4da3-8e00-f53ab8ea6af5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-SLERP-V2", + "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1612 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json deleted file mode 100644 index c0d56a777..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/1762652579.962454", - "retrieved_timestamp": "1762652579.962455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1611934112599015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2934774313772131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10945811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/0ea74ce5-43c9-43eb-92bc-3d928062d9e0.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/0ea74ce5-43c9-43eb-92bc-3d928062d9e0.json new file mode 100644 index 000000000..cf85a25de --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/0ea74ce5-43c9-43eb-92bc-3d928062d9e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-SLERP-V3", + "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.167 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1087 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json deleted file mode 100644 index 3359ccf0d..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/1762652579.9626722", - "retrieved_timestamp": "1762652579.9626722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16701352411601217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29383772587210827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10871010638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/6896faa7-7204-4091-8f4e-9cc0b53d673a.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/6896faa7-7204-4091-8f4e-9cc0b53d673a.json new file mode 100644 index 000000000..af72a7152 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/6896faa7-7204-4091-8f4e-9cc0b53d673a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V1", + "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3251 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json deleted file mode 100644 index 9b4a8b47c..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/1762652579.962892", - "retrieved_timestamp": "1762652579.962893", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32510848991786234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4208506248736219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json deleted file mode 100644 index 86f751909..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/1762652579.963118", - "retrieved_timestamp": "1762652579.963118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32510848991786234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4208506248736219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/88064453-fd8c-4bd9-adf1-39f43972bec1.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/88064453-fd8c-4bd9-adf1-39f43972bec1.json new file mode 100644 index 000000000..b5b27cb03 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/88064453-fd8c-4bd9-adf1-39f43972bec1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V2", + "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3251 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/a18ade45-acba-4059-b969-445e529a82e2.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/a18ade45-acba-4059-b969-445e529a82e2.json new file mode 100644 index 000000000..b630ec36d --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/a18ade45-acba-4059-b969-445e529a82e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-7B-SLERP-V4", + "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.251 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.377 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3745 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json deleted file mode 100644 index 7b8b26c0b..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/1762652579.963329", - "retrieved_timestamp": "1762652579.963329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2509696494190969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37697272812325017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3744895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2131815159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json deleted file mode 100644 index d0534277a..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5-FT/1762652579.963541", - "retrieved_timestamp": "1762652579.963541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL0.5-FT", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL0.5-FT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18507338306803725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31320911187036277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14768949468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/6c0e4132-71e7-44af-95fc-83b0a6be2a82.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/6c0e4132-71e7-44af-95fc-83b0a6be2a82.json new file mode 100644 index 000000000..67a332158 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/6c0e4132-71e7-44af-95fc-83b0a6be2a82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5-FT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL0.5-FT", + "id": "Youlln/ECE-PRYMMAL0.5-FT", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1851 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1477 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json deleted file mode 100644 index dfbb89e3e..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5B-Youri/1762652579.963748", - "retrieved_timestamp": "1762652579.9637492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL0.5B-Youri", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL0.5B-Youri", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1446317991817267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28173574256265815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10954122340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/5d9ab422-4f4f-460d-bd39-51266b43d7e5.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/5d9ab422-4f4f-460d-bd39-51266b43d7e5.json new file mode 100644 index 000000000..21bba30b8 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/5d9ab422-4f4f-460d-bd39-51266b43d7e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5B-Youri/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL0.5B-Youri", + "id": "Youlln/ECE-PRYMMAL0.5B-Youri", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1446 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json deleted file mode 100644 index b47573d37..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL1B-FT-V1/1762652579.963949", - "retrieved_timestamp": "1762652579.9639502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL1B-FT-V1", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL1B-FT-V1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2143745262569981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4032647427840684 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2742686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/cda03c45-0782-40cc-a17d-67d808657b83.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/cda03c45-0782-40cc-a17d-67d808657b83.json new file mode 100644 index 000000000..1633dc538 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/cda03c45-0782-40cc-a17d-67d808657b83.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL1B-FT-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL1B-FT-V1", + "id": "Youlln/ECE-PRYMMAL1B-FT-V1", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2144 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/50f5451b-41c4-4ba5-8bee-ee8a2deb7e79.json b/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/50f5451b-41c4-4ba5-8bee-ee8a2deb7e79.json new file mode 100644 index 000000000..01985d185 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/50f5451b-41c4-4ba5-8bee-ee8a2deb7e79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-Qwen0.5B-FT-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-Qwen0.5B-FT-V2", + "id": "Youlln/ECE-Qwen0.5B-FT-V2", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2526 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3063 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json b/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json deleted file mode 100644 index 9b8d0d372..000000000 --- a/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-Qwen0.5B-FT-V2/1762652579.9641678", - "retrieved_timestamp": "1762652579.964169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE-Qwen0.5B-FT-V2", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-Qwen0.5B-FT-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25259311958935626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328970813623839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30628125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16655585106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json b/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json deleted file mode 100644 index b3e1a1f36..000000000 --- a/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/1762652579.964375", - "retrieved_timestamp": "1762652579.964375", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2561403742071038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33056720460862643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/cf758994-6e94-434d-bf68-74cca188b5e8.json b/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/cf758994-6e94-434d-bf68-74cca188b5e8.json new file mode 100644 index 000000000..69c6a2377 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/cf758994-6e94-434d-bf68-74cca188b5e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE.EIFFEIL.ia-0.5B-SLERP", + "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", + "developer": "Youlln", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3102 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/YoungPanda/qwenqwen/611f9549-0788-44e9-8125-18df06cd80d6.json b/data/hfopenllm_v2/YoungPanda/qwenqwen/611f9549-0788-44e9-8125-18df06cd80d6.json new file mode 100644 index 000000000..92e685942 --- /dev/null +++ b/data/hfopenllm_v2/YoungPanda/qwenqwen/611f9549-0788-44e9-8125-18df06cd80d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/YoungPanda_qwenqwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwenqwen", + "id": "YoungPanda/qwenqwen", + "developer": "YoungPanda", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1264 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3379 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json b/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json deleted file mode 100644 index feb023541..000000000 --- a/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Yuma42_KangalKhan-RawRuby-7B/1762652579.9648829", - "retrieved_timestamp": "1762652579.964884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Yuma42/KangalKhan-RawRuby-7B", - "developer": "Yuma42", - "inference_platform": "unknown", - "id": "Yuma42/KangalKhan-RawRuby-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547674614467391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47547278683676025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39495833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30227726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/59cf23ba-027d-4bac-a0e1-526376396b4d.json b/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/59cf23ba-027d-4bac-a0e1-526376396b4d.json new file mode 100644 index 000000000..2c57a54ac --- /dev/null +++ b/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/59cf23ba-027d-4bac-a0e1-526376396b4d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Yuma42_KangalKhan-RawRuby-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KangalKhan-RawRuby-7B", + "id": "Yuma42/KangalKhan-RawRuby-7B", + "developer": "Yuma42", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4755 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3023 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Yuma42/Llama3.1-IgneousIguana-8B/1f02bbd3-ddaf-4db6-b7f8-31bad8ffac66.json b/data/hfopenllm_v2/Yuma42/Llama3.1-IgneousIguana-8B/1f02bbd3-ddaf-4db6-b7f8-31bad8ffac66.json new file mode 100644 index 000000000..168e27964 --- /dev/null +++ b/data/hfopenllm_v2/Yuma42/Llama3.1-IgneousIguana-8B/1f02bbd3-ddaf-4db6-b7f8-31bad8ffac66.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-IgneousIguana-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-IgneousIguana-8B", + "id": "Yuma42/Llama3.1-IgneousIguana-8B", + "developer": "Yuma42", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8133 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3974 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Yuma42/Llama3.1-SuperHawk-8B/1e737e28-d926-43e8-9e4c-e39fa91d7977.json b/data/hfopenllm_v2/Yuma42/Llama3.1-SuperHawk-8B/1e737e28-d926-43e8-9e4c-e39fa91d7977.json new file mode 100644 index 000000000..75ea95504 --- /dev/null +++ b/data/hfopenllm_v2/Yuma42/Llama3.1-SuperHawk-8B/1e737e28-d926-43e8-9e4c-e39fa91d7977.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-SuperHawk-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-SuperHawk-8B", + "id": "Yuma42/Llama3.1-SuperHawk-8B", + "developer": "Yuma42", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3945 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/43ef8eee-5d8a-47e7-ac71-1a898421370a.json b/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/43ef8eee-5d8a-47e7-ac71-1a898421370a.json new file mode 100644 index 000000000..49916e11f --- /dev/null +++ b/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/43ef8eee-5d8a-47e7-ac71-1a898421370a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/Z1-Coder_Z1-Coder-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Z1-Coder-7B", + "id": "Z1-Coder/Z1-Coder-7B", + "developer": "Z1-Coder", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3215 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4842 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3759 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json b/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json deleted file mode 100644 index 3cbd50580..000000000 --- a/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Z1-Coder_Z1-Coder-7B/1762652579.9655669", - "retrieved_timestamp": "1762652579.965568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Z1-Coder/Z1-Coder-7B", - "developer": "Z1-Coder", - "inference_platform": "unknown", - "id": "Z1-Coder/Z1-Coder-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215113676157041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48418251218099567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37591422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json deleted file mode 100644 index 9eb63a815..000000000 --- a/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-dpo-avg/1762652579.9658082", - "retrieved_timestamp": "1762652579.9658089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZHLiu627/zephyr-7b-gemma-dpo-avg", - "developer": "ZHLiu627", - "inference_platform": "unknown", - "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30899679517014855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41488227982365095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4107083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28507313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/d8d03c71-942f-4aff-8a5e-5c265c639b44.json b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/d8d03c71-942f-4aff-8a5e-5c265c639b44.json new file mode 100644 index 000000000..8775cade5 --- /dev/null +++ b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/d8d03c71-942f-4aff-8a5e-5c265c639b44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-dpo-avg/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-7b-gemma-dpo-avg", + "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg", + "developer": "ZHLiu627", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.309 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4149 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4107 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-rpo-avg/96262938-1146-4993-92a1-a2ddb2519f8a.json b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-rpo-avg/96262938-1146-4993-92a1-a2ddb2519f8a.json new file mode 100644 index 000000000..399559c0f --- /dev/null +++ b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-rpo-avg/96262938-1146-4993-92a1-a2ddb2519f8a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-rpo-avg/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-7b-gemma-rpo-avg", + "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg", + "developer": "ZHLiu627", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/292d7cfb-3e3c-47d8-8cca-33507f9ff081.json b/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/292d7cfb-3e3c-47d8-8cca-33507f9ff081.json new file mode 100644 index 000000000..8a789283c --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/292d7cfb-3e3c-47d8-8cca-33507f9ff081.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_L3-Aspire-Heart-Matrix-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Aspire-Heart-Matrix-8B", + "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3785 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json b/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json deleted file mode 100644 index ef9fa6b92..000000000 --- a/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_L3-Aspire-Heart-Matrix-8B/1762652579.96632", - "retrieved_timestamp": "1762652579.966321", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", - "developer": "ZeroXClem", - "inference_platform": "unknown", - "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48335305877294465 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5384211938486898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3784906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/3f29c10f-57ef-435b-85df-2cae30ae72fa.json b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/3f29c10f-57ef-435b-85df-2cae30ae72fa.json new file mode 100644 index 000000000..a87472533 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/3f29c10f-57ef-435b-85df-2cae30ae72fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-AthenaSky-MegaMix", + "id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6301 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5163 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/d7f022fe-86cb-4e4e-a672-62c2dc8cffd3.json b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/d7f022fe-86cb-4e4e-a672-62c2dc8cffd3.json new file mode 100644 index 000000000..eb07cdad6 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/d7f022fe-86cb-4e4e-a672-62c2dc8cffd3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-RainbowLight-EtherealMix", + "id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4973 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5155 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/baa35c90-c494-4dff-af28-cb549e40bed8.json b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/baa35c90-c494-4dff-af28-cb549e40bed8.json new file mode 100644 index 000000000..4a6a4d956 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/baa35c90-c494-4dff-af28-cb549e40bed8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-SpecialTitanFusion", + "id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5439 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2334 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/2fdc3186-6791-4550-ac4f-a1a5a5a1d514.json b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/2fdc3186-6791-4550-ac4f-a1a5a5a1d514.json new file mode 100644 index 000000000..554e92142 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/2fdc3186-6791-4550-ac4f-a1a5a5a1d514.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-SuperNova-EtherealHermes", + "id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7339 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1745 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4066 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3745 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/f687df8b-42b5-4d94-b741-1b516d9221b2.json b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/f687df8b-42b5-4d94-b741-1b516d9221b2.json new file mode 100644 index 000000000..561402998 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/f687df8b-42b5-4d94-b741-1b516d9221b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-SuperTulu-LexiNova", + "id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5079 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.253 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3971 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/c3a8a952-6869-4eee-a59f-4ae33ac72986.json b/data/hfopenllm_v2/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/c3a8a952-6869-4eee-a59f-4ae33ac72986.json new file mode 100644 index 000000000..352964db5 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/c3a8a952-6869-4eee-a59f-4ae33ac72986.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-Aether-SlerpFusion-7B", + "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6262 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2734 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/a7a74117-71e4-49b2-bd65-add82c9165d8.json b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/a7a74117-71e4-49b2-bd65-add82c9165d8.json new file mode 100644 index 000000000..1324b28ff --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/a7a74117-71e4-49b2-bd65-add82c9165d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-CelestialHarmony-1M", + "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5944 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3474 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/04ee694c-0c89-4f25-b10f-315a24743ba2.json b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/04ee694c-0c89-4f25-b10f-315a24743ba2.json new file mode 100644 index 000000000..444e35d1b --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/04ee694c-0c89-4f25-b10f-315a24743ba2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-HomerAnvita-NerdMix", + "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7708 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5541 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/47fd4acb-acc3-4f12-8af5-c425d3754c38.json b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/47fd4acb-acc3-4f12-8af5-c425d3754c38.json new file mode 100644 index 000000000..3f9b0472a --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/47fd4acb-acc3-4f12-8af5-c425d3754c38.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-HomerCreative-Mix", + "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-Qandora-CySec/e19577f5-d1ba-45ad-8500-d18ae2b14440.json b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-Qandora-CySec/e19577f5-d1ba-45ad-8500-d18ae2b14440.json new file mode 100644 index 000000000..b81b82415 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/Qwen2.5-7B-Qandora-CySec/e19577f5-d1ba-45ad-8500-d18ae2b14440.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-Qandora-CySec/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Qandora-CySec", + "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6773 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2931 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4286 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json b/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json deleted file mode 100644 index c030d9840..000000000 --- a/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeusLabs_L3-Aethora-15B-V2/1762652579.968798", - "retrieved_timestamp": "1762652579.9687989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeusLabs/L3-Aethora-15B-V2", - "developer": "ZeusLabs", - "inference_platform": "unknown", - "id": "ZeusLabs/L3-Aethora-15B-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.01 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7208063493752133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010910465463698 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3870833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3499833776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/e86443cd-453b-4ca0-8e7e-054764fe4bb9.json b/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/e86443cd-453b-4ca0-8e7e-054764fe4bb9.json new file mode 100644 index 000000000..575ec1a14 --- /dev/null +++ b/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/e86443cd-453b-4ca0-8e7e-054764fe4bb9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZeusLabs_L3-Aethora-15B-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Aethora-15B-V2", + "id": "ZeusLabs/L3-Aethora-15B-V2", + "developer": "ZeusLabs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.01 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5011 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.35 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/24cd9977-f3fb-4619-aea1-59e1a36b2a5e.json b/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/24cd9977-f3fb-4619-aea1-59e1a36b2a5e.json new file mode 100644 index 000000000..63d3d967e --- /dev/null +++ b/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/24cd9977-f3fb-4619-aea1-59e1a36b2a5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SELM-Llama-3-8B-Instruct-iter-3", + "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", + "developer": "ZhangShenao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6903 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5046 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3783 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json b/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json deleted file mode 100644 index e9f11dddf..000000000 --- a/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/1762652579.9690418", - "retrieved_timestamp": "1762652579.969043", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", - "developer": "ZhangShenao", - "inference_platform": "unknown", - "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6902817856620433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046089390770511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38451041666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783244680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/aaditya/Llama3-OpenBioLLM-70B/1401f0d9-6f4c-41d2-819f-eb9487c5c1e6.json b/data/hfopenllm_v2/aaditya/Llama3-OpenBioLLM-70B/1401f0d9-6f4c-41d2-819f-eb9487c5c1e6.json new file mode 100644 index 000000000..82061858d --- /dev/null +++ b/data/hfopenllm_v2/aaditya/Llama3-OpenBioLLM-70B/1401f0d9-6f4c-41d2-819f-eb9487c5c1e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aaditya_Llama3-OpenBioLLM-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-OpenBioLLM-70B", + "id": "aaditya/Llama3-OpenBioLLM-70B", + "developer": "aaditya", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4867 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json b/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json deleted file mode 100644 index 7b20d91ec..000000000 --- a/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Dracarys-72B-Instruct/1762652579.969532", - "retrieved_timestamp": "1762652579.969532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Dracarys-72B-Instruct", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Dracarys-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7855778224001206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6944066392084981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4558229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/4b1f2aab-ef92-4231-9bdd-96918b26914c.json b/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/4b1f2aab-ef92-4231-9bdd-96918b26914c.json new file mode 100644 index 000000000..46a1a26ce --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/4b1f2aab-ef92-4231-9bdd-96918b26914c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Dracarys-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dracarys-72B-Instruct", + "id": "abacusai/Dracarys-72B-Instruct", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6944 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5456 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Liberated-Qwen1.5-14B/4956e127-14a1-405e-a0e0-76fe94ea727b.json b/data/hfopenllm_v2/abacusai/Liberated-Qwen1.5-14B/4956e127-14a1-405e-a0e0-76fe94ea727b.json new file mode 100644 index 000000000..afc41b73e --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Liberated-Qwen1.5-14B/4956e127-14a1-405e-a0e0-76fe94ea727b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Liberated-Qwen1.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Liberated-Qwen1.5-14B", + "id": "abacusai/Liberated-Qwen1.5-14B", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4948 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Llama-3-Smaug-8B/90fb6e40-88f7-4ce2-ae99-308d87e69718.json b/data/hfopenllm_v2/abacusai/Llama-3-Smaug-8B/90fb6e40-88f7-4ce2-ae99-308d87e69718.json new file mode 100644 index 000000000..aee6436ea --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Llama-3-Smaug-8B/90fb6e40-88f7-4ce2-ae99-308d87e69718.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Llama-3-Smaug-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Smaug-8B", + "id": "abacusai/Llama-3-Smaug-8B", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4867 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3185 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/cdad0f08-1c60-4493-bed0-9733894b367a.json b/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/cdad0f08-1c60-4493-bed0-9733894b367a.json new file mode 100644 index 000000000..a0a920244 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/cdad0f08-1c60-4493-bed0-9733894b367a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-34B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Smaug-34B-v0.1", + "id": "abacusai/Smaug-34B-v0.1", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5016 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5358 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json b/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json deleted file mode 100644 index 2f4b5b08b..000000000 --- a/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-34B-v0.1/1762652579.970392", - "retrieved_timestamp": "1762652579.9703932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Smaug-34B-v0.1", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-34B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015625207782018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5357785983493821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4542885638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/8e83b4f7-736f-4e03-8256-2a1fc421b04f.json b/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/8e83b4f7-736f-4e03-8256-2a1fc421b04f.json new file mode 100644 index 000000000..d0562870a --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/8e83b4f7-736f-4e03-8256-2a1fc421b04f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-72B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Smaug-72B-v0.1", + "id": "abacusai/Smaug-72B-v0.1", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5167 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5996 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1911 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json b/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json deleted file mode 100644 index 822b9eee5..000000000 --- a/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-72B-v0.1/1762652579.970887", - "retrieved_timestamp": "1762652579.9708889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Smaug-72B-v0.1", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-72B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167001334237601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995632330786429 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json b/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json deleted file mode 100644 index c57655f29..000000000 --- a/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Llama-3-70B-Instruct-32K/1762652579.971162", - "retrieved_timestamp": "1762652579.9711628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Smaug-Llama-3-70B-Instruct-32K", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7761107195574409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6493108088828602 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47647938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/f0d6639d-8485-4bcd-b069-046a747dfbfa.json b/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/f0d6639d-8485-4bcd-b069-046a747dfbfa.json new file mode 100644 index 000000000..cb2b7cd68 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/f0d6639d-8485-4bcd-b069-046a747dfbfa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Llama-3-70B-Instruct-32K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Smaug-Llama-3-70B-Instruct-32K", + "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7761 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4765 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json b/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json deleted file mode 100644 index 9a2a5acba..000000000 --- a/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Mixtral-v0.1/1762652579.971408", - "retrieved_timestamp": "1762652579.9714088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Smaug-Mixtral-v0.1", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-Mixtral-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5554428915278129 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162245602454115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351894946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/d1fe36ba-04f8-4110-8c39-81d393c4cbfc.json b/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/d1fe36ba-04f8-4110-8c39-81d393c4cbfc.json new file mode 100644 index 000000000..e773b34ab --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/d1fe36ba-04f8-4110-8c39-81d393c4cbfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Mixtral-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Smaug-Mixtral-v0.1", + "id": "abacusai/Smaug-Mixtral-v0.1", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5554 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5162 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/5a8ab5fb-ec1e-490c-b643-e3b9d49f5d34.json b/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/5a8ab5fb-ec1e-490c-b643-e3b9d49f5d34.json new file mode 100644 index 000000000..064650e3a --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/5a8ab5fb-ec1e-490c-b643-e3b9d49f5d34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Qwen2-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Smaug-Qwen2-72B-Instruct", + "id": "abacusai/Smaug-Qwen2-72B-Instruct", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.691 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.519 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json b/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json deleted file mode 100644 index ec778a995..000000000 --- a/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Qwen2-72B-Instruct/1762652579.9716392", - "retrieved_timestamp": "1762652579.97164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Smaug-Qwen2-72B-Instruct", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-Qwen2-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6909789934583822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4131419939577039 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519032579787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json b/data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json deleted file mode 100644 index 039a1db45..000000000 --- a/data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_bigstral-12b-32k/1762652579.971883", - "retrieved_timestamp": "1762652579.971884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/bigstral-12b-32k", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/bigstral-12b-32k", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.476 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41938057686937324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700122314782882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45597916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26412898936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/bigstral-12b-32k/de944f89-d2d4-4b01-b4b5-e7cbd1d8d1ae.json b/data/hfopenllm_v2/abacusai/bigstral-12b-32k/de944f89-d2d4-4b01-b4b5-e7cbd1d8d1ae.json new file mode 100644 index 000000000..0c72ea1cb --- /dev/null +++ b/data/hfopenllm_v2/abacusai/bigstral-12b-32k/de944f89-d2d4-4b01-b4b5-e7cbd1d8d1ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_bigstral-12b-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigstral-12b-32k", + "id": "abacusai/bigstral-12b-32k", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.476 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4194 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.47 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json b/data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json deleted file mode 100644 index 0f77ab967..000000000 --- a/data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_bigyi-15b/1762652579.972117", - "retrieved_timestamp": "1762652579.972117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/bigyi-15b", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/bigyi-15b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.058 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20940327220663396 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345298820215116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30028257978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/abacusai/bigyi-15b/db96601a-2f7f-438f-915b-55fee0e0d1d1.json b/data/hfopenllm_v2/abacusai/bigyi-15b/db96601a-2f7f-438f-915b-55fee0e0d1d1.json new file mode 100644 index 000000000..78bf1bb7e --- /dev/null +++ b/data/hfopenllm_v2/abacusai/bigyi-15b/db96601a-2f7f-438f-915b-55fee0e0d1d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abacusai_bigyi-15b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigyi-15b", + "id": "abacusai/bigyi-15b", + "developer": "abacusai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.058 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2094 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/27912f7d-7033-4b7c-b93a-af1673ce4a9b.json b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/27912f7d-7033-4b7c-b93a-af1673ce4a9b.json new file mode 100644 index 000000000..7ebb312b2 --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/27912f7d-7033-4b7c-b93a-af1673ce4a9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-0tmgq-5tpbg", + "id": "abhishek/autotrain-0tmgq-5tpbg", + "developer": "abhishek", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1957 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3135 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.365 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1151 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json deleted file mode 100644 index 55056498d..000000000 --- a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972783", - "retrieved_timestamp": "1762652579.972784", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abhishek/autotrain-0tmgq-5tpbg", - "developer": "abhishek", - "inference_platform": "unknown", - "id": "abhishek/autotrain-0tmgq-5tpbg", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19516549422199764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3127326480314375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35837499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/da58a484-4a45-4a70-a651-031ada8023d5.json b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/da58a484-4a45-4a70-a651-031ada8023d5.json new file mode 100644 index 000000000..f682d2d9d --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/da58a484-4a45-4a70-a651-031ada8023d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-0tmgq-5tpbg", + "id": "abhishek/autotrain-0tmgq-5tpbg", + "developer": "abhishek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1952 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3584 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json deleted file mode 100644 index 8c024f5a1..000000000 --- a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972393", - "retrieved_timestamp": "1762652579.972395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abhishek/autotrain-0tmgq-5tpbg", - "developer": "abhishek", - "inference_platform": "unknown", - "id": "abhishek/autotrain-0tmgq-5tpbg", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19571514692127998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3134513987945074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36504166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11510970744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v1/e8bd221d-8a89-4e3c-8815-0bff27574053.json b/data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v1/e8bd221d-8a89-4e3c-8815-0bff27574053.json new file mode 100644 index 000000000..3051b9372 --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v1/e8bd221d-8a89-4e3c-8815-0bff27574053.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-llama3-70b-orpo-v1", + "id": "abhishek/autotrain-llama3-70b-orpo-v1", + "developer": "abhishek", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v2/ffc21c2a-59fb-4ad8-88a4-930879b6eba0.json b/data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v2/ffc21c2a-59fb-4ad8-88a4-930879b6eba0.json new file mode 100644 index 000000000..cfef2a069 --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-llama3-70b-orpo-v2/ffc21c2a-59fb-4ad8-88a4-930879b6eba0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-llama3-70b-orpo-v2", + "id": "abhishek/autotrain-llama3-70b-orpo-v2", + "developer": "abhishek", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5899 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4113 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4818 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-llama3-orpo-v2/1e506afa-0d08-45d6-9242-b06104aa67e8.json b/data/hfopenllm_v2/abhishek/autotrain-llama3-orpo-v2/1e506afa-0d08-45d6-9242-b06104aa67e8.json new file mode 100644 index 000000000..6b46e2ceb --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-llama3-orpo-v2/1e506afa-0d08-45d6-9242-b06104aa67e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-orpo-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-llama3-orpo-v2", + "id": "abhishek/autotrain-llama3-orpo-v2", + "developer": "abhishek", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2218 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/7d66bb93-cb2f-4be6-b133-1f0325be58e1.json b/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/7d66bb93-cb2f-4be6-b133-1f0325be58e1.json new file mode 100644 index 000000000..c2368e06c --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/7d66bb93-cb2f-4be6-b133-1f0325be58e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-vr4a1-e5mms/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-vr4a1-e5mms", + "id": "abhishek/autotrain-vr4a1-e5mms", + "developer": "abhishek", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2142 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5001 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3891 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3667 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json b/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json deleted file mode 100644 index 1cbb18a63..000000000 --- a/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-vr4a1-e5mms/1762652579.973708", - "retrieved_timestamp": "1762652579.973709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abhishek/autotrain-vr4a1-e5mms", - "developer": "abhishek", - "inference_platform": "unknown", - "id": "abhishek/autotrain-vr4a1-e5mms", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21422492320376602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5000624442873264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.389125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36668882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/abideen/MedPhi-4-14B-v1/936f3c5f-7817-4118-96c8-e4061d4560fb.json b/data/hfopenllm_v2/abideen/MedPhi-4-14B-v1/936f3c5f-7817-4118-96c8-e4061d4560fb.json new file mode 100644 index 000000000..73811a080 --- /dev/null +++ b/data/hfopenllm_v2/abideen/MedPhi-4-14B-v1/936f3c5f-7817-4118-96c8-e4061d4560fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/abideen_MedPhi-4-14B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MedPhi-4-14B-v1", + "id": "abideen/MedPhi-4-14B-v1", + "developer": "abideen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2931 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5338 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/7d36ceed-2a1b-4b20-88ae-0a609cc161e9.json b/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/7d36ceed-2a1b-4b20-88ae-0a609cc161e9.json new file mode 100644 index 000000000..a4e2aa16d --- /dev/null +++ b/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/7d36ceed-2a1b-4b20-88ae-0a609cc161e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/adamo1139_Yi-34B-200K-AEZAKMI-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-34B-200K-AEZAKMI-v2", + "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2", + "developer": "adamo1139", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4513 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json b/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json deleted file mode 100644 index ee6d31751..000000000 --- a/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/adamo1139_Yi-34B-200K-AEZAKMI-v2/1762652579.974368", - "retrieved_timestamp": "1762652579.974369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "adamo1139/Yi-34B-200K-AEZAKMI-v2", - "developer": "adamo1139", - "inference_platform": "unknown", - "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4555257827010111 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383819237015192 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38860416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4512965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/77cace56-503f-4531-a4eb-0178a68cc283.json b/data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/77cace56-503f-4531-a4eb-0178a68cc283.json new file mode 100644 index 000000000..e2903971c --- /dev/null +++ b/data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/77cace56-503f-4531-a4eb-0178a68cc283.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QAIMath-Qwen2.5-7B-TIES", + "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", + "developer": "adriszmar", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3963 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1066 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/9e49b710-2413-42f3-8943-bc9dbf68cb3c.json b/data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/9e49b710-2413-42f3-8943-bc9dbf68cb3c.json new file mode 100644 index 000000000..dbd70e39f --- /dev/null +++ b/data/hfopenllm_v2/adriszmar/QAIMath-Qwen2.5-7B-TIES/9e49b710-2413-42f3-8943-bc9dbf68cb3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QAIMath-Qwen2.5-7B-TIES", + "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", + "developer": "adriszmar", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1746 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1087 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json b/data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json deleted file mode 100644 index 198a1b435..000000000 --- a/data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aevalone_distill_qw_test/1762652579.975426", - "retrieved_timestamp": "1762652579.9754272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aevalone/distill_qw_test", - "developer": "aevalone", - "inference_platform": "unknown", - "id": "aevalone/distill_qw_test", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.740889728143548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245748734435777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38596874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4091589095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/aevalone/distill_qw_test/9a5b3564-97df-4661-a171-37322386ac4d.json b/data/hfopenllm_v2/aevalone/distill_qw_test/9a5b3564-97df-4661-a171-37322386ac4d.json new file mode 100644 index 000000000..4e684cee1 --- /dev/null +++ b/data/hfopenllm_v2/aevalone/distill_qw_test/9a5b3564-97df-4661-a171-37322386ac4d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aevalone_distill_qw_test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "distill_qw_test", + "id": "aevalone/distill_qw_test", + "developer": "aevalone", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Gemma2-9B-AdvancedFuse/0fc0450d-cdf1-44b5-a809-202d1dd6b5e3.json b/data/hfopenllm_v2/agentlans/Gemma2-9B-AdvancedFuse/0fc0450d-cdf1-44b5-a809-202d1dd6b5e3.json new file mode 100644 index 000000000..65f6c605e --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Gemma2-9B-AdvancedFuse/0fc0450d-cdf1-44b5-a809-202d1dd6b5e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Gemma2-9B-AdvancedFuse/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2-9B-AdvancedFuse", + "id": "agentlans/Gemma2-9B-AdvancedFuse", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1543 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5859 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/7f06c78c-f95e-4e50-aa57-da0579adcdae.json b/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/7f06c78c-f95e-4e50-aa57-da0579adcdae.json new file mode 100644 index 000000000..9800d3c2e --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/7f06c78c-f95e-4e50-aa57-da0579adcdae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-Instruct-CrashCourse12K", + "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.321 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json b/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json deleted file mode 100644 index 57aa09d03..000000000 --- a/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/1762652579.976028", - "retrieved_timestamp": "1762652579.976029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", - "developer": "agentlans", - "inference_platform": "unknown", - "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395062877609188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35481032861183426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32104166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1809341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-8B-drill/06e55e47-9995-4fa2-877a-c728e9f9f1a1.json b/data/hfopenllm_v2/agentlans/Llama3.1-8B-drill/06e55e47-9995-4fa2-877a-c728e9f9f1a1.json new file mode 100644 index 000000000..3dd94e869 --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-8B-drill/06e55e47-9995-4fa2-877a-c728e9f9f1a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-8B-drill/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-drill", + "id": "agentlans/Llama3.1-8B-drill", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5016 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1715 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/39af1e0a-d1e3-4372-bc18-d07f3dff09f0.json b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/39af1e0a-d1e3-4372-bc18-d07f3dff09f0.json new file mode 100644 index 000000000..8cbe2b1db --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/39af1e0a-d1e3-4372-bc18-d07f3dff09f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-Daredevilish-Instruct", + "id": "agentlans/Llama3.1-Daredevilish-Instruct", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7926 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1722 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3877 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json deleted file mode 100644 index 777534e26..000000000 --- a/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish-Instruct/1762652579.9768262", - "retrieved_timestamp": "1762652579.976827", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama3.1-Daredevilish-Instruct", - "developer": "agentlans", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-Daredevilish-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7925969760236173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235442557198345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish/f32d59d6-8ab9-4b7d-ad9d-f62ce6d559bd.json b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish/f32d59d6-8ab9-4b7d-ad9d-f62ce6d559bd.json new file mode 100644 index 000000000..8fc5cbbdf --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish/f32d59d6-8ab9-4b7d-ad9d-f62ce6d559bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-Daredevilish", + "id": "agentlans/Llama3.1-Daredevilish", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-LexiHermes-SuperStorm/7ddc3aef-c6c5-4d04-8473-3b3bba219d7f.json b/data/hfopenllm_v2/agentlans/Llama3.1-LexiHermes-SuperStorm/7ddc3aef-c6c5-4d04-8473-3b3bba219d7f.json new file mode 100644 index 000000000..1c22faf44 --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-LexiHermes-SuperStorm/7ddc3aef-c6c5-4d04-8473-3b3bba219d7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-LexiHermes-SuperStorm/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-LexiHermes-SuperStorm", + "id": "agentlans/Llama3.1-LexiHermes-SuperStorm", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3963 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/ce80ac07-22d2-4883-ac6c-40b080e00b81.json b/data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/ce80ac07-22d2-4883-ac6c-40b080e00b81.json new file mode 100644 index 000000000..806df32bb --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/ce80ac07-22d2-4883-ac6c-40b080e00b81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-SuperDeepFuse-CrashCourse12K", + "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7187 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse/cbece170-f872-485f-a6c2-5db17ced73bc.json b/data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse/cbece170-f872-485f-a6c2-5db17ced73bc.json new file mode 100644 index 000000000..0f8c139a6 --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-SuperDeepFuse/cbece170-f872-485f-a6c2-5db17ced73bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-SuperDeepFuse", + "id": "agentlans/Llama3.1-SuperDeepFuse", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5049 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json b/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json deleted file mode 100644 index 20cdb8703..000000000 --- a/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/1762652579.9778361", - "retrieved_timestamp": "1762652579.977837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", - "developer": "agentlans", - "inference_platform": "unknown", - "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2948831323111566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3311726760218689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16082114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/c1fd751b-c6c3-4350-9618-f4b4840e1b69.json b/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/c1fd751b-c6c3-4350-9618-f4b4840e1b69.json new file mode 100644 index 000000000..8da33be6f --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/c1fd751b-c6c3-4350-9618-f4b4840e1b69.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-Instruct-CrashCourse-dropout", + "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", + "developer": "agentlans", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1608 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/bfd28b91-3a72-4417-b52b-804d2cbae12f.json b/data/hfopenllm_v2/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/bfd28b91-3a72-4417-b52b-804d2cbae12f.json new file mode 100644 index 000000000..d883b1cb1 --- /dev/null +++ b/data/hfopenllm_v2/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/bfd28b91-3a72-4417-b52b-804d2cbae12f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "13_outOf_32_pruned_layers_llama3.1-8b", + "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", + "developer": "ahmeda335", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.195 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1748 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2883 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3803 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ai21labs/Jamba-v0.1/32c26cbc-3697-47a6-bd12-18187df9dda9.json b/data/hfopenllm_v2/ai21labs/Jamba-v0.1/32c26cbc-3697-47a6-bd12-18187df9dda9.json new file mode 100644 index 000000000..61193fc54 --- /dev/null +++ b/data/hfopenllm_v2/ai21labs/Jamba-v0.1/32c26cbc-3697-47a6-bd12-18187df9dda9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ai21labs_Jamba-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jamba-v0.1", + "id": "ai21labs/Jamba-v0.1", + "developer": "ai21labs", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "JambaForCausalLM", + "params_billions": 51.57 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2026 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.359 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json b/data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json deleted file mode 100644 index 4d5bd55ad..000000000 --- a/data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ai21labs_Jamba-v0.1/1762652579.978585", - "retrieved_timestamp": "1762652579.978585", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ai21labs/Jamba-v0.1", - "developer": "ai21labs", - "inference_platform": "unknown", - "id": "ai21labs/Jamba-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "JambaForCausalLM", - "params_billions": 51.57 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20255920956395698 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36022602451645724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35902083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916888297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/ai4bharat/Airavata/02280b9f-bc01-4e44-9d09-1e4ae8c0438b.json b/data/hfopenllm_v2/ai4bharat/Airavata/02280b9f-bc01-4e44-9d09-1e4ae8c0438b.json new file mode 100644 index 000000000..0badacdc8 --- /dev/null +++ b/data/hfopenllm_v2/ai4bharat/Airavata/02280b9f-bc01-4e44-9d09-1e4ae8c0438b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ai4bharat_Airavata/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Airavata", + "id": "ai4bharat/Airavata", + "developer": "ai4bharat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.87 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3628 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3763 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1635 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json b/data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json deleted file mode 100644 index 4dd73544c..000000000 --- a/data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ai4bharat_Airavata/1762652579.978861", - "retrieved_timestamp": "1762652579.978862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ai4bharat/Airavata", - "developer": "ai4bharat", - "inference_platform": "unknown", - "id": "ai4bharat/Airavata", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.87 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05585402288150995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36276862514633795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1634807180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json b/data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json deleted file mode 100644 index 58751a63e..000000000 --- a/data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aixonlab_Aether-12b/1762652579.979132", - "retrieved_timestamp": "1762652579.979133", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aixonlab/Aether-12b", - "developer": "aixonlab", - "inference_platform": "unknown", - "id": "aixonlab/Aether-12b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23468286369056326 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5179400750435481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3410073138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/aixonlab/Aether-12b/a57d2d49-5ccf-48f5-8035-b1d480c80f40.json b/data/hfopenllm_v2/aixonlab/Aether-12b/a57d2d49-5ccf-48f5-8035-b1d480c80f40.json new file mode 100644 index 000000000..0edbd3648 --- /dev/null +++ b/data/hfopenllm_v2/aixonlab/Aether-12b/a57d2d49-5ccf-48f5-8035-b1d480c80f40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aixonlab_Aether-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aether-12b", + "id": "aixonlab/Aether-12b", + "developer": "aixonlab", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5179 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.341 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json b/data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json deleted file mode 100644 index 496e76b19..000000000 --- a/data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aixonlab_Grey-12b/1762652579.979384", - "retrieved_timestamp": "1762652579.9793851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aixonlab/Grey-12b", - "developer": "aixonlab", - "inference_platform": "unknown", - "id": "aixonlab/Grey-12b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679938119744496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5698957505959833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/aixonlab/Grey-12b/6b5a3c69-f8dd-4952-96fc-b6e4dec1ed9d.json b/data/hfopenllm_v2/aixonlab/Grey-12b/6b5a3c69-f8dd-4952-96fc-b6e4dec1ed9d.json new file mode 100644 index 000000000..ae2b8e520 --- /dev/null +++ b/data/hfopenllm_v2/aixonlab/Grey-12b/6b5a3c69-f8dd-4952-96fc-b6e4dec1ed9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aixonlab_Grey-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Grey-12b", + "id": "aixonlab/Grey-12b", + "developer": "aixonlab", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5699 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json b/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json deleted file mode 100644 index a191873ad..000000000 --- a/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aixonlab_Zara-14b-v1.2/1762652579.979647", - "retrieved_timestamp": "1762652579.979647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aixonlab/Zara-14b-v1.2", - "developer": "aixonlab", - "inference_platform": "unknown", - "id": "aixonlab/Zara-14b-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6197400674654362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6405368457456163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46747916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263464095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/fe0665dd-b976-4d90-b16b-6c2acfef15ff.json b/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/fe0665dd-b976-4d90-b16b-6c2acfef15ff.json new file mode 100644 index 000000000..fff6f1b78 --- /dev/null +++ b/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/fe0665dd-b976-4d90-b16b-6c2acfef15ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aixonlab_Zara-14b-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zara-14b-v1.2", + "id": "aixonlab/Zara-14b-v1.2", + "developer": "aixonlab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6197 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-First/8c6bdc44-fd29-45e7-b161-2c8e07ef2935.json b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-First/8c6bdc44-fd29-45e7-b161-2c8e07ef2935.json new file mode 100644 index 000000000..d89f7563d --- /dev/null +++ b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-First/8c6bdc44-fd29-45e7-b161-2c8e07ef2935.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-First/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2.1B.0.01-First", + "id": "akhadangi/Llama3.2.1B.0.01-First", + "developer": "akhadangi", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-Last/e7c70ff9-59ad-4d09-8af0-ef9cf16d1dfa.json b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-Last/e7c70ff9-59ad-4d09-8af0-ef9cf16d1dfa.json new file mode 100644 index 000000000..c7af9c6fd --- /dev/null +++ b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.01-Last/e7c70ff9-59ad-4d09-8af0-ef9cf16d1dfa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-Last/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2.1B.0.01-Last", + "id": "akhadangi/Llama3.2.1B.0.01-Last", + "developer": "akhadangi", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0917 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1227 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-First/26c4c993-ae49-42a0-be0a-f157be9f7d58.json b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-First/26c4c993-ae49-42a0-be0a-f157be9f7d58.json new file mode 100644 index 000000000..79629d7c0 --- /dev/null +++ b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-First/26c4c993-ae49-42a0-be0a-f157be9f7d58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-First/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2.1B.0.1-First", + "id": "akhadangi/Llama3.2.1B.0.1-First", + "developer": "akhadangi", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1001 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-Last/19adf124-c120-4e97-80cf-49c40a66eb81.json b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-Last/19adf124-c120-4e97-80cf-49c40a66eb81.json new file mode 100644 index 000000000..75815fcac --- /dev/null +++ b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.0.1-Last/19adf124-c120-4e97-80cf-49c40a66eb81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-Last/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2.1B.0.1-Last", + "id": "akhadangi/Llama3.2.1B.0.1-Last", + "developer": "akhadangi", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.095 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akhadangi/Llama3.2.1B.BaseFiT/66bc5d38-8d25-4934-bce8-41ce4ea0e385.json b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.BaseFiT/66bc5d38-8d25-4934-bce8-41ce4ea0e385.json new file mode 100644 index 000000000..37dbf8a34 --- /dev/null +++ b/data/hfopenllm_v2/akhadangi/Llama3.2.1B.BaseFiT/66bc5d38-8d25-4934-bce8-41ce4ea0e385.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.BaseFiT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.2.1B.BaseFiT", + "id": "akhadangi/Llama3.2.1B.BaseFiT", + "developer": "akhadangi", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/541eafe5-807e-44b0-b652-a0752210fc71.json b/data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/541eafe5-807e-44b0-b652-a0752210fc71.json new file mode 100644 index 000000000..a6b87b5af --- /dev/null +++ b/data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/541eafe5-807e-44b0-b652-a0752210fc71.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Storm-8B", + "id": "akjindal53244/Llama-3.1-Storm-8B", + "developer": "akjindal53244", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1722 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4028 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3803 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/845a2484-9f17-4c0e-b06b-6250992298bc.json b/data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/845a2484-9f17-4c0e-b06b-6250992298bc.json new file mode 100644 index 000000000..8d84dffa8 --- /dev/null +++ b/data/hfopenllm_v2/akjindal53244/Llama-3.1-Storm-8B/845a2484-9f17-4c0e-b06b-6250992298bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Storm-8B", + "id": "akjindal53244/Llama-3.1-Storm-8B", + "developer": "akjindal53244", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4028 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alcholjung/llama3_medical_tuned/e62b6b26-5f3c-42c9-9541-bb8b23caee66.json b/data/hfopenllm_v2/alcholjung/llama3_medical_tuned/e62b6b26-5f3c-42c9-9541-bb8b23caee66.json new file mode 100644 index 000000000..e59f216fd --- /dev/null +++ b/data/hfopenllm_v2/alcholjung/llama3_medical_tuned/e62b6b26-5f3c-42c9-9541-bb8b23caee66.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/alcholjung_llama3_medical_tuned/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3_medical_tuned", + "id": "alcholjung/llama3_medical_tuned", + "developer": "alcholjung", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4513 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.466 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json b/data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json deleted file mode 100644 index 23ffec9ab..000000000 --- a/data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi/1762652579.467683", - "retrieved_timestamp": "1762652579.4676852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "1-800-LLMs/Qwen-2.5-14B-Hindi", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "1-800-LLMs/Qwen-2.5-14B-Hindi", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.582570911847232 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6523901531956199 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262632978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json b/data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json deleted file mode 100644 index 5df2638f0..000000000 --- a/data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/1024m_QWEN-14B-B100/1762652579.468843", - "retrieved_timestamp": "1762652579.4688451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "1024m/QWEN-14B-B100", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "1024m/QWEN-14B-B100", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7762104549262623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.653271132679638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5438066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5178690159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json b/data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json deleted file mode 100644 index e7020dcb3..000000000 --- a/data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aashraf995_Qwen-Evo-7B/1762652579.4765608", - "retrieved_timestamp": "1762652579.476562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aashraf995/Qwen-Evo-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Aashraf995/Qwen-Evo-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757343847657549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5709361538590277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4541458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44622672872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json b/data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json deleted file mode 100644 index dac7bdee2..000000000 --- a/data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aashraf995_QwenStock-14B/1762652579.476816", - "retrieved_timestamp": "1762652579.476817", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aashraf995/QwenStock-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Aashraf995/QwenStock-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008632650256873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550130348108012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4792604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json b/data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json deleted file mode 100644 index 3db7b16d7..000000000 --- a/data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Alsebay_Qwen2.5-7B-test-novelist/1762652579.479883", - "retrieved_timestamp": "1762652579.4798841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Alsebay/Qwen2.5-7B-test-novelist", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Alsebay/Qwen2.5-7B-test-novelist", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351600420218354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515121518446605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47488541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865525265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json b/data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json deleted file mode 100644 index 1368fcd77..000000000 --- a/data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aryanne_QwentileSwap/1762652579.4827101", - "retrieved_timestamp": "1762652579.482711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aryanne/QwentileSwap", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Aryanne/QwentileSwap", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7378422585406721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7008370136278447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4640416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5945811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json b/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json deleted file mode 100644 index 4fae83f76..000000000 --- a/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483878", - "retrieved_timestamp": "1762652579.4838789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45105431366551857 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42746984992662185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36228124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28058510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json b/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json deleted file mode 100644 index 89011b946..000000000 --- a/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483521", - "retrieved_timestamp": "1762652579.483522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4605214165081982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42577470857933336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3636458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28116688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json b/data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json deleted file mode 100644 index c6d3ef409..000000000 --- a/data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.508495", - "retrieved_timestamp": "1762652579.5084958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8239958864701216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6370093752306357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json b/data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json deleted file mode 100644 index bc96be506..000000000 --- a/data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/1762652579.508758", - "retrieved_timestamp": "1762652579.508759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7564019025075688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402085849577634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341755319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json b/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json deleted file mode 100644 index 131b56cd5..000000000 --- a/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509247", - "retrieved_timestamp": "1762652579.509248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8206237228331937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.692924708291253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5944108761329305 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42072916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5720578457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json b/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json deleted file mode 100644 index d1d8b204a..000000000 --- a/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.509461", - "retrieved_timestamp": "1762652579.509462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8175762532303177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6335891556421077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4910239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json b/data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json deleted file mode 100644 index c42fd78fe..000000000 --- a/data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509675", - "retrieved_timestamp": "1762652579.509676", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8328136012446974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6955174427138592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5853474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43139583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5684840425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json deleted file mode 100644 index 4f2170640..000000000 --- a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge2/1762652579.511093", - "retrieved_timestamp": "1762652579.511094", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CoolSpring/Qwen2-0.5B-Abyme-merge2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2021846478454944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29942723009138733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14893617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json deleted file mode 100644 index 68840d7ba..000000000 --- a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge3/1762652579.51142", - "retrieved_timestamp": "1762652579.511421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CoolSpring/Qwen2-0.5B-Abyme-merge3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23860468002677343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30031404525933675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json deleted file mode 100644 index e84e49427..000000000 --- a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme/1762652579.5106628", - "retrieved_timestamp": "1762652579.510665", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CoolSpring/Qwen2-0.5B-Abyme", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CoolSpring/Qwen2-0.5B-Abyme", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19151850423542865 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2861834296481826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13331117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json deleted file mode 100644 index e0fc51422..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Broca/1762652579.5150259", - "retrieved_timestamp": "1762652579.5150259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Broca", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Broca", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.560414145578177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6527145981540362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47665625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364029255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json deleted file mode 100644 index 4f5f96433..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-BrocaV9/1762652579.515307", - "retrieved_timestamp": "1762652579.5153081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-BrocaV9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-BrocaV9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6762933460994606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6391383585238984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json deleted file mode 100644 index 71abf688a..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav3/1762652579.5155342", - "retrieved_timestamp": "1762652579.515535", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Brocav3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Brocav3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6951776841004091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6452353476182755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38746223564954685 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4756354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531748670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json deleted file mode 100644 index 43f8ed16b..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav6/1762652579.515748", - "retrieved_timestamp": "1762652579.5157492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Brocav6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Brocav6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6995239298394925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6388835266626555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38746223564954685 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47420833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319148936170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json deleted file mode 100644 index 1fa61be78..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav7/1762652579.5159612", - "retrieved_timestamp": "1762652579.5159621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Brocav7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Brocav7", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6723715297632504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6444026981327182 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47960416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257646276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json deleted file mode 100644 index d3c2ecd2f..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emerged/1762652579.516177", - "retrieved_timestamp": "1762652579.516178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Emerged", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Emerged", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7000237148543642 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6260033680703311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186170212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json deleted file mode 100644 index b44a07599..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emergedv3/1762652579.516385", - "retrieved_timestamp": "1762652579.516386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Emergedv3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Emergedv3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6388493641316153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6190728411056029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173703457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json deleted file mode 100644 index df0d25108..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-FinalMerge/1762652579.516642", - "retrieved_timestamp": "1762652579.516643", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-FinalMerge", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-FinalMerge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48909781601705693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5714945310011449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4574468085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json deleted file mode 100644 index a2eab7bb9..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyper/1762652579.516851", - "retrieved_timestamp": "1762652579.516851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyper", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyper", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391317260424563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6507453346766106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48983333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json deleted file mode 100644 index a977c3d75..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-HyperMarck-dl/1762652579.5170581", - "retrieved_timestamp": "1762652579.517059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-HyperMarck-dl", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-HyperMarck-dl", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650276821057017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6096480033153927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5090591755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json deleted file mode 100644 index c7c21a1e1..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv3/1762652579.517266", - "retrieved_timestamp": "1762652579.517267", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyperionv3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyperionv3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6836371937570092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6522165609411941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339926861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json deleted file mode 100644 index 7ff2f807c..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv4/1762652579.517484", - "retrieved_timestamp": "1762652579.517484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyperionv4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyperionv4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415796752616391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6471791978856551 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48319791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364029255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json deleted file mode 100644 index 2e3b90063..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv5/1762652579.517704", - "retrieved_timestamp": "1762652579.517704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyperionv5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyperionv5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6729211824625327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.644265785086055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3821752265861027 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4795416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json deleted file mode 100644 index 388670ec4..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MegaMerge-pt2/1762652579.517905", - "retrieved_timestamp": "1762652579.517906", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-MegaMerge-pt2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.568307645935008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577703330510146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.472875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420545212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json deleted file mode 100644 index ca4a38b6e..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MergeStock/1762652579.518343", - "retrieved_timestamp": "1762652579.518346", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-MergeStock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-MergeStock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5685326046002386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6579336391923106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41465256797583083 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4676354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json deleted file mode 100644 index bced68a94..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-ReasoningMerge/1762652579.518682", - "retrieved_timestamp": "1762652579.518684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-ReasoningMerge", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-ReasoningMerge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46054690443578594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6578226399295218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5165937500000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344913563829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json deleted file mode 100644 index c80229807..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Ultimav2/1762652579.519061", - "retrieved_timestamp": "1762652579.5190778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Ultimav2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Ultimav2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5500228283177524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555027486976712 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417220744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json deleted file mode 100644 index 4d0e64b42..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Unity/1762652579.519516", - "retrieved_timestamp": "1762652579.519517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Unity", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Unity", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6738952645646883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6019955540977778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4679479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507563164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json deleted file mode 100644 index ecc4a209f..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SLERP/1762652579.5203562", - "retrieved_timestamp": "1762652579.5203571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernicke-SLERP", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.491 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5588904107767391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6440929009604598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093916223404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json deleted file mode 100644 index 79361cc0d..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke/1762652579.519787", - "retrieved_timestamp": "1762652579.519788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernicke", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernicke", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5234699486252034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6568359662501574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json deleted file mode 100644 index 0c2a9cde6..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernickev3/1762652579.520611", - "retrieved_timestamp": "1762652579.520612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernickev3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernickev3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7048198779239085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6184146992839421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4716666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515126329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json deleted file mode 100644 index b05d00b30..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-partialmergept1/1762652579.5208588", - "retrieved_timestamp": "1762652579.52086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-partialmergept1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-partialmergept1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.633728507028019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6151178406213536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45392749244712993 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207779255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json b/data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json deleted file mode 100644 index e728ae563..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_Qwenfinity-2.5-14B/1762652579.521086", - "retrieved_timestamp": "1762652579.521087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/Qwenfinity-2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwenfinity-2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813794066410457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5655007271970033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45058333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4498005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json deleted file mode 100644 index 8966c0e92..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMerge/1762652579.5218382", - "retrieved_timestamp": "1762652579.5218382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14B-EvolMerge", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B-EvolMerge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381576439403006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6572183434723883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48208333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json deleted file mode 100644 index 01ac2db9f..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMergev1/1762652579.5221288", - "retrieved_timestamp": "1762652579.52213", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14B-EvolMergev1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B-EvolMergev1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5554683794554005 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545547382762975 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46227083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539311835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json deleted file mode 100644 index 59a6202a1..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-v5/1762652579.522369", - "retrieved_timestamp": "1762652579.522369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14B-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B-v5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5919881470055011 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517093605796943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33081570996978854 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47141666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json deleted file mode 100644 index c4e1c3a60..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B/1762652579.521544", - "retrieved_timestamp": "1762652579.521545", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351600420218354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505665291288972 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46661458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json deleted file mode 100644 index fa26a6559..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv1/1762652579.522592", - "retrieved_timestamp": "1762652579.522593", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14Bv1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14Bv1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678003253589365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6344673727103446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47042708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531998005319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json deleted file mode 100644 index 55b7ceeb6..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv2/1762652579.5228019", - "retrieved_timestamp": "1762652579.5228028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14Bv2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14Bv2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5785992278266112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304512627108576 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4601041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334109042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json deleted file mode 100644 index 483fbd40d..000000000 --- a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv3/1762652579.523057", - "retrieved_timestamp": "1762652579.523058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CultriX/SeQwence-14Bv3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14Bv3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5719047682371663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6302253848409948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4624270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334940159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json b/data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json deleted file mode 100644 index b8a39be91..000000000 --- a/data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-R1-Qwen-1.5b/1762652579.5346482", - "retrieved_timestamp": "1762652579.5346491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-R1-Qwen-1.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24280132271262472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35872011187392944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19090757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json b/data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json deleted file mode 100644 index d4e8fb0f5..000000000 --- a/data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/1762652579.53886", - "retrieved_timestamp": "1762652579.53886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 25.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34159474638403875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.580689592371853 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json deleted file mode 100644 index f9576ead9..000000000 --- a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/1762652579.543009", - "retrieved_timestamp": "1762652579.543009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 4.089 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17832905579418165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30326053640004424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3714583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json deleted file mode 100644 index 60addf5ed..000000000 --- a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/1762652579.543224", - "retrieved_timestamp": "1762652579.543225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 19.022 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28351773294857646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35922718767499157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24169184290030213 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38469791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1636469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json deleted file mode 100644 index ae4fc4f1a..000000000 --- a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/1762652579.543571", - "retrieved_timestamp": "1762652579.543573", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 8.714 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21067766858601844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32861776640637924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3404479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json deleted file mode 100644 index 03ea7766f..000000000 --- a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita/1762652579.5521228", - "retrieved_timestamp": "1762652579.5521238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173495214918638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39805765159128703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35037500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2771775265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json deleted file mode 100644 index 3ebdf930d..000000000 --- a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v2/1762652579.552372", - "retrieved_timestamp": "1762652579.552373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49998891829235315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953827803974795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37018749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30319148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json deleted file mode 100644 index fccc6afcc..000000000 --- a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v3/1762652579.552576", - "retrieved_timestamp": "1762652579.552577", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4890479483326463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948478837209111 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37415624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017785904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json deleted file mode 100644 index 0bb26cecd..000000000 --- a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v5/1762652579.552789", - "retrieved_timestamp": "1762652579.55279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4987400098405564 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40320443289745417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34225 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29429853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json deleted file mode 100644 index fe88dda9b..000000000 --- a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v6/1762652579.553008", - "retrieved_timestamp": "1762652579.5530088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.497 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29990425404593146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42486081646897506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28715093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json b/data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json deleted file mode 100644 index 1579a4c04..000000000 --- a/data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/1762652579.5556989", - "retrieved_timestamp": "1762652579.5557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40376866713653103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34425676981862185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36628124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23221409574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json b/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json deleted file mode 100644 index 7ad7408b4..000000000 --- a/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/1762652579.5920892", - "retrieved_timestamp": "1762652579.5920892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038429145777648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6090237540046592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4794479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5135472074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json b/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json deleted file mode 100644 index 1d3e68188..000000000 --- a/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/1762652579.59233", - "retrieved_timestamp": "1762652579.592331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6878837041272712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7088012228048761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47197916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.581283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json b/data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json deleted file mode 100644 index 009786479..000000000 --- a/data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-7B-della-test/1762652579.614594", - "retrieved_timestamp": "1762652579.6145952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Qwen2.5-7B-della-test", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Etherll/Qwen2.5-7B-della-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7624968417133207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447331985391859 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4360871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json b/data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json deleted file mode 100644 index 346ab1b63..000000000 --- a/data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/1762652579.6368651", - "retrieved_timestamp": "1762652579.636866", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553506917201914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048995904321122 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json b/data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json deleted file mode 100644 index e717210b8..000000000 --- a/data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/1762652579.6392472", - "retrieved_timestamp": "1762652579.639248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.164 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1869472998311148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29134447696551025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json b/data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json deleted file mode 100644 index 534c6704e..000000000 --- a/data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HeraiHench_Double-Down-Qwen-Math-7B/1762652579.63955", - "retrieved_timestamp": "1762652579.639551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HeraiHench/Double-Down-Qwen-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HeraiHench/Double-Down-Qwen-Math-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1669636564316015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2844613514203868 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json b/data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json deleted file mode 100644 index ddc2f30ce..000000000 --- a/data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HeraiHench_Marge-Qwen-Math-7B/1762652579.6397812", - "retrieved_timestamp": "1762652579.639782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HeraiHench/Marge-Qwen-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HeraiHench/Marge-Qwen-Math-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12622175826806206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068846024368302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39390624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10555186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json deleted file mode 100644 index 2f8524c9b..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-1epoch/1762652579.652392", - "retrieved_timestamp": "1762652579.6523929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-IRPO-1epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25891301746033857 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31638216610052033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3286354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json deleted file mode 100644 index c966cc8e3..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-5epoch/1762652579.652645", - "retrieved_timestamp": "1762652579.652645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-IRPO-5epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24867130325314607 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31891656220326015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32866666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1506815159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json deleted file mode 100644 index dea67fbe4..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-1epoch/1762652579.652854", - "retrieved_timestamp": "1762652579.6528552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-eDPO-1epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26233504878167707 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3180637583450692 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33269791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15525265957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json deleted file mode 100644 index 40da6cda0..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-5epoch/1762652579.653099", - "retrieved_timestamp": "1762652579.6531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-eDPO-5epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24774708883540117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3096491823869347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3326354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15226063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json deleted file mode 100644 index 62d64c45b..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/1762652579.680979", - "retrieved_timestamp": "1762652579.68098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2573892826589006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3279091360416723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16505984042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json deleted file mode 100644 index b740c1b59..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/1762652579.6812391", - "retrieved_timestamp": "1762652579.68124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3072481017034801 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32638442794247285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31564583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1624002659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json deleted file mode 100644 index c42092b23..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/1762652579.68145", - "retrieved_timestamp": "1762652579.68145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25509093649294984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3242353334886223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json deleted file mode 100644 index ec96d0746..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/1762652579.681671", - "retrieved_timestamp": "1762652579.681671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26358395723347383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3198054258965539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15857712765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json deleted file mode 100644 index 451095eb1..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/1762652579.681885", - "retrieved_timestamp": "1762652579.681886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23228478215579107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254731912466387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31688541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16115359042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json deleted file mode 100644 index d673129e6..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/1762652579.682102", - "retrieved_timestamp": "1762652579.682102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24137732328000816 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314225693635648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15317486702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json deleted file mode 100644 index ef61c84a2..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/1762652579.6823108", - "retrieved_timestamp": "1762652579.6823108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677805999193252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3361518077587983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json deleted file mode 100644 index 502ee00e0..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/1762652579.682508", - "retrieved_timestamp": "1762652579.682509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25606501859510544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3231121828613069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31955208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json deleted file mode 100644 index 2786c53ab..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/1762652579.682722", - "retrieved_timestamp": "1762652579.682723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2639086512675257 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3257435380157632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json deleted file mode 100644 index 34d3090f0..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/1762652579.682945", - "retrieved_timestamp": "1762652579.682946", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2517686405404327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213578303108222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31688541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1584940159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json deleted file mode 100644 index 12cfbe86a..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/1762652579.683157", - "retrieved_timestamp": "1762652579.683158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24382527249919106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3266053460297184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31955208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15541888297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json deleted file mode 100644 index 5d2aea629..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/1762652579.6833699", - "retrieved_timestamp": "1762652579.683371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24654804806801509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32458923603023143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15633311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json deleted file mode 100644 index e8e1f51c0..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/1762652579.683736", - "retrieved_timestamp": "1762652579.683738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2505695997730466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32614538576285174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15217752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json deleted file mode 100644 index 78e654eae..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/1762652579.684093", - "retrieved_timestamp": "1762652579.684094", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24567370133468086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179765517720094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3315208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15658244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json deleted file mode 100644 index ebac49c54..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/1762652579.684393", - "retrieved_timestamp": "1762652579.684394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24539887498503968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32157618750132033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1544215425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json deleted file mode 100644 index b5bfb9e93..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/1762652579.684617", - "retrieved_timestamp": "1762652579.684618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2341830786756916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3189252460411593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json deleted file mode 100644 index 25f3e5020..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/1762652579.684837", - "retrieved_timestamp": "1762652579.684837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23196008812173918 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233548545784329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33688541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15425531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json deleted file mode 100644 index 042f3a5e6..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/1762652579.6850612", - "retrieved_timestamp": "1762652579.685062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24175188499847072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3175499101875348 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json deleted file mode 100644 index 576729981..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/1762652579.6852841", - "retrieved_timestamp": "1762652579.685285", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24932069132124984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196623899087389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15708111702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json deleted file mode 100644 index b52b60835..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/1762652579.685507", - "retrieved_timestamp": "1762652579.685508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2520434668900739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197552188491219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15508643617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json deleted file mode 100644 index 6d312b43f..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1762652579.685728", - "retrieved_timestamp": "1762652579.685728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25803867072700437 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248229336342538 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15392287234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json deleted file mode 100644 index 85b7c1d83..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/1762652579.685941", - "retrieved_timestamp": "1762652579.685942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23196008812173918 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326545450978746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27097315436241615 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33948958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15367353723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json deleted file mode 100644 index 14cc0e915..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/1762652579.686151", - "retrieved_timestamp": "1762652579.686152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2487710386219675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3272739110084265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15309175531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json deleted file mode 100644 index f29623d7d..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/1762652579.686357", - "retrieved_timestamp": "1762652579.686357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25236816092412573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3129690310926447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15641622340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json deleted file mode 100644 index 2fb905204..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1762652579.686578", - "retrieved_timestamp": "1762652579.686579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2513940788219702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322095658026178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15383976063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json deleted file mode 100644 index 0f4776134..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/1762652579.686833", - "retrieved_timestamp": "1762652579.6868339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24567370133468086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3180087717709833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json deleted file mode 100644 index 66bbc80db..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/1762652579.6870458", - "retrieved_timestamp": "1762652579.687047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26363382491788456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806866682195567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3235208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json deleted file mode 100644 index d25bd46b2..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/1762652579.6873431", - "retrieved_timestamp": "1762652579.687347", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24829674153468353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3174312444218736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1558344414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json deleted file mode 100644 index 492e85d76..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/1762652579.687733", - "retrieved_timestamp": "1762652579.687735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2517686405404327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3218020653711833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json deleted file mode 100644 index 7f37da2dc..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/1762652579.6880698", - "retrieved_timestamp": "1762652579.688079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25361706937592254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3234331515135053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32355208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json deleted file mode 100644 index ba04d6dc2..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/1762652579.688372", - "retrieved_timestamp": "1762652579.688373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24479935460134664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32395300683134437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json deleted file mode 100644 index c87a057c8..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/1762652579.688585", - "retrieved_timestamp": "1762652579.688586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25046986440422525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322699453909483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json deleted file mode 100644 index 0afee0fdd..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/1762652579.6888041", - "retrieved_timestamp": "1762652579.688805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24719743613611883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.325505796038594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32079166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json deleted file mode 100644 index db89bcc08..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/1762652579.689013", - "retrieved_timestamp": "1762652579.689014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24165214962964932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255889369754366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32745833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json deleted file mode 100644 index 23f31df69..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1762652579.689225", - "retrieved_timestamp": "1762652579.689225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2526928549581776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32354099176995715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json deleted file mode 100644 index eda3831df..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/1762652579.68944", - "retrieved_timestamp": "1762652579.689441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26685638550158025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313735254746672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16339760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json deleted file mode 100644 index 7b0ecc39d..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/1762652579.689661", - "retrieved_timestamp": "1762652579.689662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.270228549138508 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3299802970903615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32079166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1634807180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json deleted file mode 100644 index f89b91b3d..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/1762652579.689882", - "retrieved_timestamp": "1762652579.689883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24802191518504235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33086196042215565 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3208229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16489361702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json deleted file mode 100644 index 35a4d8065..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/1762652579.690102", - "retrieved_timestamp": "1762652579.690103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26223531341285566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281993681712964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16339760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json deleted file mode 100644 index 7668b8099..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/1762652579.690311", - "retrieved_timestamp": "1762652579.690312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2608611816646498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32980236442597805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31679166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1651429521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json deleted file mode 100644 index 293555691..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/1762652579.69052", - "retrieved_timestamp": "1762652579.690521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930347034756668 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219547893625387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1590757978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json deleted file mode 100644 index a9bf04467..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/1762652579.690735", - "retrieved_timestamp": "1762652579.690736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28813880503730105 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32553831509236264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31024999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15816156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json deleted file mode 100644 index a67ceabc5..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/1762652579.690953", - "retrieved_timestamp": "1762652579.690954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2887383254209941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3237016212336586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16090425531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json deleted file mode 100644 index 1fa327b1d..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/1762652579.691165", - "retrieved_timestamp": "1762652579.691166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2905368865720732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254390641560331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3129166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json deleted file mode 100644 index e7e3e40b0..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/1762652579.691372", - "retrieved_timestamp": "1762652579.691373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2904870188876625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32381698216947513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30894791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15915890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json deleted file mode 100644 index 2b0ab710d..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/1762652579.691587", - "retrieved_timestamp": "1762652579.691587", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23925406809487715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3244192088381941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1573304521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json deleted file mode 100644 index 49cd89594..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/1762652579.691787", - "retrieved_timestamp": "1762652579.691788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24747226248576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32090616030928304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json deleted file mode 100644 index 1d77d57b9..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/1762652579.691988", - "retrieved_timestamp": "1762652579.691989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.232135179102559 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32779679775418075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14960106382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json deleted file mode 100644 index b12f06607..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/1762652579.6921952", - "retrieved_timestamp": "1762652579.6921952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2541667220752049 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3253117533747236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.318125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16090425531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json deleted file mode 100644 index 703ad92b5..000000000 --- a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/1762652579.6924422", - "retrieved_timestamp": "1762652579.692443", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.273875539125077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3245102552473828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3089166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json b/data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json deleted file mode 100644 index 921b09306..000000000 --- a/data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/1762652579.697056", - "retrieved_timestamp": "1762652579.697057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.732396707403024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7585971930826706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2862537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41694630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5884308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json b/data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json deleted file mode 100644 index 36e6bd8f3..000000000 --- a/data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Junhoee_Qwen-Megumin/1762652579.69731", - "retrieved_timestamp": "1762652579.697311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Junhoee/Qwen-Megumin", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Junhoee/Qwen-Megumin", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 15.231 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7141118897857683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528526812457251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json b/data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json deleted file mode 100644 index 8e5c48a34..000000000 --- a/data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_Qwen2.5-0.5b-Test-ft/1762652579.699473", - "retrieved_timestamp": "1762652579.699473", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/Qwen2.5-0.5b-Test-ft", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/Qwen2.5-0.5b-Test-ft", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26708134416681073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3231533857529747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16888297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json deleted file mode 100644 index 7bd912c3c..000000000 --- a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.1/1762652579.700618", - "retrieved_timestamp": "1762652579.700619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued-v2.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued-v2.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11268323603594019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30416583041069006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41539583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json deleted file mode 100644 index 889f5238f..000000000 --- a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.2/1762652579.7008262", - "retrieved_timestamp": "1762652579.700827", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued-v2.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued-v2.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14125963554479892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30586579449667844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1262466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json deleted file mode 100644 index 1eea19633..000000000 --- a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2/1762652579.7004201", - "retrieved_timestamp": "1762652579.700421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued-v2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578711153073844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31194932022650246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33927083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11926529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json deleted file mode 100644 index 8dfeb8dd4..000000000 --- a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued/1762652579.700214", - "retrieved_timestamp": "1762652579.700215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12547263483113694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29909543894796364 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38587499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1260804521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json b/data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json deleted file mode 100644 index 1cb4d254c..000000000 --- a/data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/1762652579.703295", - "retrieved_timestamp": "1762652579.703295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4210295349672203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601947823443537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726586102719033 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4776770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4363364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json b/data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json deleted file mode 100644 index fc7f387e6..000000000 --- a/data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/1762652579.707211", - "retrieved_timestamp": "1762652579.707212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47938137475232384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6458988582965893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47700000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538813164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json b/data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json deleted file mode 100644 index 6e2045ff0..000000000 --- a/data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/1762652579.713998", - "retrieved_timestamp": "1762652579.713999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22211842356059697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36835590195612017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18799867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json deleted file mode 100644 index 309314989..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/1762652579.736984", - "retrieved_timestamp": "1762652579.7369852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7048697456083193 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6478481476573447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5393949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json deleted file mode 100644 index 6b26695a2..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/1762652579.737248", - "retrieved_timestamp": "1762652579.7372491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6943033373670748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419880364363972 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251828457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json deleted file mode 100644 index 698686a72..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/1762652579.737468", - "retrieved_timestamp": "1762652579.737469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7485084021507378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6466679318879384 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140458776595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json deleted file mode 100644 index cd408f90f..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/1762652579.7379", - "retrieved_timestamp": "1762652579.7379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46634152936430895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6214839063250638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49373958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204454787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json deleted file mode 100644 index 3a5e522e9..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/1762652579.737686", - "retrieved_timestamp": "1762652579.737687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.704320092909037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6457646219275207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47678125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392287234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json deleted file mode 100644 index 860d0c1c5..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/1762652579.738374", - "retrieved_timestamp": "1762652579.7383769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.693054428915278 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6422587980411637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48881250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276761968085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json deleted file mode 100644 index f0243b775..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/1762652579.738115", - "retrieved_timestamp": "1762652579.738116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6793906833867471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.653127892154805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4833854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375664893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json deleted file mode 100644 index d67d73793..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/1762652579.738977", - "retrieved_timestamp": "1762652579.7389781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5928624937388352 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6451310724242122 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36555891238670696 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47696875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290059840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json deleted file mode 100644 index 0096e15bb..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/1762652579.7392142", - "retrieved_timestamp": "1762652579.7392151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5919382793210903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6457173605698173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49532291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json deleted file mode 100644 index 9459fd127..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/1762652579.7395148", - "retrieved_timestamp": "1762652579.739517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7874761189200211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6482757721443902 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524185505319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json deleted file mode 100644 index 74ce22608..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/1762652579.739795", - "retrieved_timestamp": "1762652579.739796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7027963581075989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565626437486437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4911979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5323304521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json deleted file mode 100644 index da720a04e..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/1762652579.74003", - "retrieved_timestamp": "1762652579.740031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7993413032974729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6483097746745584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370090634441088 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43282291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199468085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json deleted file mode 100644 index 665e47b1b..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/1762652579.738731", - "retrieved_timestamp": "1762652579.738732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7874761189200211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419472828128271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558912386706949 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43936458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206117021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json deleted file mode 100644 index 1da4f0abe..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/1762652579.740509", - "retrieved_timestamp": "1762652579.74051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6513639365771708 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6570671029574323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41842900302114805 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4819583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json deleted file mode 100644 index 985d59088..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/1762652579.74074", - "retrieved_timestamp": "1762652579.740741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8002655177152178 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554749578648256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468277945619335 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250997340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json deleted file mode 100644 index 03df1820a..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/1762652579.74097", - "retrieved_timestamp": "1762652579.74097", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7862272104682243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6537693501484436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43809375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5283410904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json deleted file mode 100644 index 72889ec55..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/1762652579.740272", - "retrieved_timestamp": "1762652579.740273", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523519816309614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545588984302916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43655589123867067 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4805625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542220744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json deleted file mode 100644 index db23d77fc..000000000 --- a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/1762652579.741195", - "retrieved_timestamp": "1762652579.741195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6141947809589667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6592166466793806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5238530585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json b/data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json deleted file mode 100644 index 1b661e71b..000000000 --- a/data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/1762652579.747411", - "retrieved_timestamp": "1762652579.747412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794961812435449 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42301343044108936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2922207446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json deleted file mode 100644 index 42c9cc814..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/1762652579.750923", - "retrieved_timestamp": "1762652579.750923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29543278501043896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920071454890602 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23803191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json deleted file mode 100644 index dd8ae645b..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-72b/1762652579.75234", - "retrieved_timestamp": "1762652579.752341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-qwen2-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.699 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8162774770941104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6965560971922596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47321875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json deleted file mode 100644 index b59d9ed18..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-7b/1762652579.752553", - "retrieved_timestamp": "1762652579.752554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816119008674761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045925887362795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44369791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json deleted file mode 100644 index 4b3d7188b..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2.5-72b/1762652579.752765", - "retrieved_timestamp": "1762652579.752765", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.7 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8662360315075112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7261624327092416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5913897280966768 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5619182180851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json deleted file mode 100644 index 6cc1ea07b..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-72b/1762652579.753872", - "retrieved_timestamp": "1762652579.753872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-qwen2-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8008151704145002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6939595229335245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json deleted file mode 100644 index ba7776e50..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-7b/1762652579.7540858", - "retrieved_timestamp": "1762652579.754087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35972996094806226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214913750127922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43582291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3898769946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json deleted file mode 100644 index dd1257934..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2.5-72b/1762652579.754294", - "retrieved_timestamp": "1762652579.754294", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.7 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8476763875406145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7276399007138082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.561751994680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json deleted file mode 100644 index 9e183d9c9..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-72b/1762652579.755723", - "retrieved_timestamp": "1762652579.755724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-qwen2-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3849840645044039 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6576306700720502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31722054380664655 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4112395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json deleted file mode 100644 index adabce859..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-7b/1762652579.755967", - "retrieved_timestamp": "1762652579.755968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3824862476008103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064049035932394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20694864048338368 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json deleted file mode 100644 index 5970fd4fe..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-qwen2-7b/1762652579.756743", - "retrieved_timestamp": "1762652579.756744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.4-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.4-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32995452067181746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101416326251771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976894946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json deleted file mode 100644 index 81f282977..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.5-qwen2-7b/1762652579.757269", - "retrieved_timestamp": "1762652579.75727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.5-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.5-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31449221399220734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4886561146965678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45646875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681848404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json deleted file mode 100644 index bdedd7304..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.6-qwen2-7b/1762652579.7575328", - "retrieved_timestamp": "1762652579.757534", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.6-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.6-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3442676542684522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930243946403894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2843959731543625 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4586145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json deleted file mode 100644 index c11a7842e..000000000 --- a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.7-qwen2-7b/1762652579.757804", - "retrieved_timestamp": "1762652579.757805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.7-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.7-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3592301759331906 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4883170901309997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48242708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json b/data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json deleted file mode 100644 index d3f49f1e1..000000000 --- a/data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o1-7B-Qwen/1762652579.759999", - "retrieved_timestamp": "1762652579.76", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Minami-su/Amara-o1-7B-Qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Minami-su/Amara-o1-7B-Qwen", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7389914316236474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199420077880453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181268882175226 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40066666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4083277925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json b/data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json deleted file mode 100644 index 41efa1984..000000000 --- a/data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o2-7B-Qwen/1762652579.760268", - "retrieved_timestamp": "1762652579.760268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Minami-su/Amara-o2-7B-Qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Minami-su/Amara-o2-7B-Qwen", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7146615424850509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173432604435285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41647273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json b/data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json deleted file mode 100644 index 12596ef0c..000000000 --- a/data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/1762652579.782197", - "retrieved_timestamp": "1762652579.782198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011954946209391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636652015725344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1805135951661631 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43204166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2859873670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json b/data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json deleted file mode 100644 index 8fe8546ba..000000000 --- a/data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/1762652579.783665", - "retrieved_timestamp": "1762652579.783666", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.157 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17485715678843247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2601595454586609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json b/data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json deleted file mode 100644 index 47dbcf698..000000000 --- a/data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/1762652579.811478", - "retrieved_timestamp": "1762652579.81148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.808 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7558023821238757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398673461520839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4285239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json b/data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json deleted file mode 100644 index 8370443d0..000000000 --- a/data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/1762652579.812139", - "retrieved_timestamp": "1762652579.812139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084819390328772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47105662040096935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44785416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35106382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json b/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json deleted file mode 100644 index f1ca85f4f..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B-Preview/1762652579.834909", - "retrieved_timestamp": "1762652579.83491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/QwQ-32B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/QwQ-32B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035437084713006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6691381482252744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2818791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5678191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json b/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json deleted file mode 100644 index bbaddf5e1..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B/1762652579.8346298", - "retrieved_timestamp": "1762652579.834631", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/QwQ-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/QwQ-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39767372793077926 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29829653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42063541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11959773936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json deleted file mode 100644 index 3fc1adc4a..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B/1762652579.835391", - "retrieved_timestamp": "1762652579.835392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-0.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.62 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17056077873375977 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3153538659142558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36162500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1307347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json deleted file mode 100644 index 3b3df7170..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B/1762652579.835954", - "retrieved_timestamp": "1762652579.835955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-1.8B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-1.8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.837 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2154239639711521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3476121558366305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36051041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18816489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json deleted file mode 100644 index 534551c1d..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B/1762652579.836433", - "retrieved_timestamp": "1762652579.836434", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-110B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-110B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 111.21 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3421942667677318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099964981780978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5360704787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json deleted file mode 100644 index 42e8c5afd..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B/1762652579.836853", - "retrieved_timestamp": "1762652579.836853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.167 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2905368865720732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080327493808331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36436170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json deleted file mode 100644 index b91fd2e9b..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B/1762652579.837265", - "retrieved_timestamp": "1762652579.837266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.512 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.329729562006587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5715390555959325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028700906344411 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4277916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4499667553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json deleted file mode 100644 index 75c8125e6..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B/1762652579.837696", - "retrieved_timestamp": "1762652579.837697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-4B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-4B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.95 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24447466056729478 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40538970296725463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24601063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json deleted file mode 100644 index fb1db2ae7..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B/1762652579.838115", - "retrieved_timestamp": "1762652579.8381162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.721 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684299879874289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4559896407693445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29163896276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json deleted file mode 100644 index c89c22dfd..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B/1762652579.83854", - "retrieved_timestamp": "1762652579.83854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen1.5-MoE-A2.7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-MoE-A2.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.265982038768246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113515433010766 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40134375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2777593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json deleted file mode 100644 index bfe89233e..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B/1762652579.838974", - "retrieved_timestamp": "1762652579.838975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-0.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json deleted file mode 100644 index ff1e12fd4..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B/1762652579.839384", - "retrieved_timestamp": "1762652579.839385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21132705665412216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35747931720577464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36581250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2551529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json deleted file mode 100644 index 00d84e48f..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B/1762652579.8398201", - "retrieved_timestamp": "1762652579.839821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-57B-A14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-57B-A14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 57.409 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31126965340851165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5618204938684165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4916057180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json deleted file mode 100644 index 0dd10ce28..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B/1762652579.8402402", - "retrieved_timestamp": "1762652579.840241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3823610243044012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.661734029856643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47036458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5730551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json deleted file mode 100644 index b254194f9..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B/1762652579.840696", - "retrieved_timestamp": "1762652579.840696", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148667757106699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531531595001889 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4439166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41830119680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json deleted file mode 100644 index 076a20171..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-7B/1762652579.841364", - "retrieved_timestamp": "1762652579.841364", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-Math-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2687048143370701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386954741074792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json deleted file mode 100644 index 3041dd71c..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B/1762652579.841982", - "retrieved_timestamp": "1762652579.841983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-0.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16271714606133947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32748148151196615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3433333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19057513297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json deleted file mode 100644 index f1e033440..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B/1762652579.8426108", - "retrieved_timestamp": "1762652579.842612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26743041795768563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40779509451366147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28548869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json deleted file mode 100644 index a19763b24..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B/1762652579.843051", - "retrieved_timestamp": "1762652579.8430521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3694464022127954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.616051493531774 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29003021148036257 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5248503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json deleted file mode 100644 index d06797d7f..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B/1762652579.843701", - "retrieved_timestamp": "1762652579.843702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40766499554515356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6770522448726507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41191275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49783333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5805352393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json deleted file mode 100644 index 77943e095..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B/1762652579.8441322", - "retrieved_timestamp": "1762652579.8441331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2689541527591236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4612475341011634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3203125 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json deleted file mode 100644 index 1dff570d2..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B/1762652579.844565", - "retrieved_timestamp": "1762652579.844566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4137100670664947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6797320670694852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39123867069486407 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4052013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.477125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json deleted file mode 100644 index 9f5c460bb..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B/1762652579.8449879", - "retrieved_timestamp": "1762652579.8449888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3374479713825982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416303767788616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25075528700906347 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4365026595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json deleted file mode 100644 index 2dde0a204..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B/1762652579.8457868", - "retrieved_timestamp": "1762652579.845789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472652561869174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5864860091741232 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4521276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json deleted file mode 100644 index 689ea4d2e..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B/1762652579.846424", - "retrieved_timestamp": "1762652579.846425", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-32B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4363411304228336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.640395506550809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30891238670694865 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302526595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json deleted file mode 100644 index 1741c8019..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B/1762652579.846894", - "retrieved_timestamp": "1762652579.8468952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.344592348302504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48556405534214747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679355053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json deleted file mode 100644 index 426f486af..000000000 --- a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B/1762652579.8480499", - "retrieved_timestamp": "1762652579.848052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24599839536873275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4454639372840941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27177526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json b/data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json deleted file mode 100644 index a83847bb6..000000000 --- a/data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/1762652579.848896", - "retrieved_timestamp": "1762652579.848896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308172316121225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3932411333682871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3539375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27701130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json b/data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json deleted file mode 100644 index c337fca2a..000000000 --- a/data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RESMPDEV_Qwen2-Wukong-0.5B/1762652579.849144", - "retrieved_timestamp": "1762652579.849144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RESMPDEV/Qwen2-Wukong-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "RESMPDEV/Qwen2-Wukong-0.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1854235650296768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308451428837168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13272938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json deleted file mode 100644 index 76388e275..000000000 --- a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Qwen2-1.5b/1762652579.852138", - "retrieved_timestamp": "1762652579.852139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-Coder-Qwen2-1.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-Coder-Qwen2-1.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30142798884736943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34747295666696026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21467752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json deleted file mode 100644 index aa3782fff..000000000 --- a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.852611", - "retrieved_timestamp": "1762652579.852612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-Qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09324813716494457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976924067792704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39409374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json deleted file mode 100644 index 26660a83c..000000000 --- a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.8524", - "retrieved_timestamp": "1762652579.8524008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-Qwen2-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09047549391170981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29852574011260374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38476041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1157746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json deleted file mode 100644 index a78854937..000000000 --- a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/1762652579.852791", - "retrieved_timestamp": "1762652579.852791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08575468645416384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2929321328066677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1284906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json b/data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json deleted file mode 100644 index cc233eedc..000000000 --- a/data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/1762652579.854495", - "retrieved_timestamp": "1762652579.854495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748183708116686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399745025607596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json b/data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json deleted file mode 100644 index c93c794ec..000000000 --- a/data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_QwenTest-7/1762652579.8585348", - "retrieved_timestamp": "1762652579.858536", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/QwenTest-7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Sakalti/QwenTest-7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.988 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16718861509683197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3063209532879154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34218750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12117686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json b/data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json deleted file mode 100644 index 4a987acc2..000000000 --- a/data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_qwen2.5-2.3B/1762652579.869403", - "retrieved_timestamp": "1762652579.8694038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/qwen2.5-2.3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Sakalti/qwen2.5-2.3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2Model", - "params_billions": 2.339 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12879493078365403 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2849449123234445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json deleted file mode 100644 index 13c7e37e3..000000000 --- a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_14B-1M/1762652579.8825831", - "retrieved_timestamp": "1762652579.882584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_QWEN_14B-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7867768631675067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282934814011238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46146875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504404920212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json deleted file mode 100644 index f81f6f773..000000000 --- a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_7B-1M/1762652579.8828428", - "retrieved_timestamp": "1762652579.882844", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_QWEN_7B-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6381744881359238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.537172912933626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30891238670694865 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40739583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265292553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json deleted file mode 100644 index 992da5136..000000000 --- a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/1762652579.883748", - "retrieved_timestamp": "1762652579.883749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31579099012841483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6308941945507827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31797583081570996 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45166666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json deleted file mode 100644 index 0b0deea59..000000000 --- a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/1762652579.883949", - "retrieved_timestamp": "1762652579.88395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3173147249298528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6308941945507827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31797583081570996 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45166666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json deleted file mode 100644 index 77564c1a6..000000000 --- a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/1762652579.884166", - "retrieved_timestamp": "1762652579.884167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3789389929830627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5936792404117958 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json b/data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json deleted file mode 100644 index eb5d4493f..000000000 --- a/data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/StelleX_Qwen2.5_Math_7B_Cot/1762652579.8928509", - "retrieved_timestamp": "1762652579.892852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "StelleX/Qwen2.5_Math_7B_Cot", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "StelleX/Qwen2.5_Math_7B_Cot", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2142747908881767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312922433417096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39241666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.281000664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json b/data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json deleted file mode 100644 index ea894cf55..000000000 --- a/data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_qwen-2.5-3B-merge-test/1762652579.908712", - "retrieved_timestamp": "1762652579.9087129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/qwen-2.5-3B-merge-test", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "T145/qwen-2.5-3B-merge-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5751018408932742 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4842488747720393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3202416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json deleted file mode 100644 index 0bd972828..000000000 --- a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/1762652579.910362", - "retrieved_timestamp": "1762652579.910363", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.742413462944986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404426673547671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json deleted file mode 100644 index a1fd818de..000000000 --- a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/1762652579.910613", - "retrieved_timestamp": "1762652579.910613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44076273177391545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49023782785253694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34488541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37450132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json deleted file mode 100644 index 8a9c37318..000000000 --- a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/1762652579.910825", - "retrieved_timestamp": "1762652579.910826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6222378843690297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5089236146835355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40463541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34283577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json deleted file mode 100644 index eb2846c87..000000000 --- a/data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_Qwen2.5-Math-7B-CFT/1762652579.911227", - "retrieved_timestamp": "1762652579.911228", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TIGER-Lab/Qwen2.5-Math-7B-CFT", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2776976200924658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46369414980230833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38866666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json deleted file mode 100644 index 47fa425a1..000000000 --- a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-r-v0.3/1762652579.917092", - "retrieved_timestamp": "1762652579.917093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-r-v0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-r-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44550902715904905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6227124007872 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42776041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json deleted file mode 100644 index 2f52b9e9b..000000000 --- a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.1/1762652579.917331", - "retrieved_timestamp": "1762652579.917332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5621628390448454 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.643430074129922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2628398791540785 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41610416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520029920212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json deleted file mode 100644 index b007094a2..000000000 --- a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.2/1762652579.917543", - "retrieved_timestamp": "1762652579.917544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256929391791557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6386922464145662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43455208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147107712765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json deleted file mode 100644 index 13dc60565..000000000 --- a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.3/1762652579.917758", - "retrieved_timestamp": "1762652579.917759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476322823441801 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6151533941210218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31344410876132933 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4131875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json deleted file mode 100644 index 1ed012fe3..000000000 --- a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4.1/1762652579.918201", - "retrieved_timestamp": "1762652579.9182022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.4.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.4.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7359938297051822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6506533698399672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json deleted file mode 100644 index 6c932f2d8..000000000 --- a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4/1762652579.917984", - "retrieved_timestamp": "1762652579.917985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7202068289915202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6453667027727318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45160416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143783244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json b/data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json deleted file mode 100644 index 5e710499b..000000000 --- a/data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Qwen-7B-RP/1762652579.923616", - "retrieved_timestamp": "1762652579.923616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DSR1-Distill-Qwen-7B-RP", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Triangle104/DSR1-Distill-Qwen-7B-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36092900171544834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4326490703099772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40454166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30277593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json b/data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json deleted file mode 100644 index f4720e82e..000000000 --- a/data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v7-Qwen2-7B/1762652579.949607", - "retrieved_timestamp": "1762652579.949609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Einstein-v7-Qwen2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v7-Qwen2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4099633417111043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5161472249498397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43997916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095744680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json deleted file mode 100644 index 79d60db25..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/1762652579.953399", - "retrieved_timestamp": "1762652579.953399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7295741964653786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469696828400438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412400265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json deleted file mode 100644 index 43eb48d88..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/1762652579.953881", - "retrieved_timestamp": "1762652579.9538822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6701984068937087 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.537439126573433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4467083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4370844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json deleted file mode 100644 index e5ca03aca..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/1762652579.95366", - "retrieved_timestamp": "1762652579.953661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7640205765147586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543342320067098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44807291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json deleted file mode 100644 index 6a4960460..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/1762652579.95415", - "retrieved_timestamp": "1762652579.954151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8112064876749248 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6431453053747279 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49285239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json deleted file mode 100644 index 294c494a7..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/1762652579.954578", - "retrieved_timestamp": "1762652579.954578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7403899431286763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465437953400678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075528700906344 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38069791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4393284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json deleted file mode 100644 index 894f870cd..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/1762652579.9543638", - "retrieved_timestamp": "1762652579.954365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7689164749531243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489785469339065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735649546827795 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json deleted file mode 100644 index 386dbd20b..000000000 --- a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/1762652579.954794", - "retrieved_timestamp": "1762652579.9547951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073403015111017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40655813090204523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3383125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26412898936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json deleted file mode 100644 index b0c7a5a22..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/1762652579.955529", - "retrieved_timestamp": "1762652579.95553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8398327548681941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6448491305599157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json deleted file mode 100644 index 01c85071c..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0505/1762652579.9557781", - "retrieved_timestamp": "1762652579.9557781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0505", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5882912893345214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539239511887702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json deleted file mode 100644 index a40fa4413..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/1762652579.955989", - "retrieved_timestamp": "1762652579.955989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594710922574325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6552826977321495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json deleted file mode 100644 index 3e15e60d6..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0805/1762652579.956195", - "retrieved_timestamp": "1762652579.956195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0805", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5882912893345214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539239511887702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json deleted file mode 100644 index f34faf0c8..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/1762652579.956619", - "retrieved_timestamp": "1762652579.956619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595310442958018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6551321410649699 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4730625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json deleted file mode 100644 index 1fbcc6a48..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005/1762652579.9563992", - "retrieved_timestamp": "1762652579.9564002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5971588717935079 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6542059787912534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47303125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json deleted file mode 100644 index a035007ed..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/1762652579.957223", - "retrieved_timestamp": "1762652579.957223", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594710922574325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6552826977321495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json deleted file mode 100644 index 0d00d88f9..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.957045", - "retrieved_timestamp": "1762652579.957045", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7904737208384863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6405986391086301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4180625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49443151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json deleted file mode 100644 index c21362a5e..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.956832", - "retrieved_timestamp": "1762652579.956832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5898648918203699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539973096042956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375664893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json deleted file mode 100644 index 1f1ac93b2..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-SCE/1762652579.957431", - "retrieved_timestamp": "1762652579.957431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5843694729983111 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6489486805510399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46148036253776437 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47042708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json deleted file mode 100644 index 6635821de..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/1762652579.957833", - "retrieved_timestamp": "1762652579.957834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8203488964835526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6515535751177631 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5019946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json deleted file mode 100644 index b70815b7d..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/1762652579.958032", - "retrieved_timestamp": "1762652579.9580328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8047868544351211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338919627514907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166163141993958 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44345833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49675864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json deleted file mode 100644 index afbcdf358..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4/1762652579.9576309", - "retrieved_timestamp": "1762652579.957632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8397828871837835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6490345839036636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169547872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json deleted file mode 100644 index 8d13692fd..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/1762652579.958441", - "retrieved_timestamp": "1762652579.958441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7771346693440072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6299023045601466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5158610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42993750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5223570478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json deleted file mode 100644 index 6e7eea1ca..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest/1762652579.95823", - "retrieved_timestamp": "1762652579.958231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.591063932587756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6656232526900528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418429003021148 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.469125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json deleted file mode 100644 index 5f18d598b..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-it-restore/1762652579.958646", - "retrieved_timestamp": "1762652579.958647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-it-restore", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-it-restore", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8209484168672456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6387730309916794 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370090634441088 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40872916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4900265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json deleted file mode 100644 index 029967deb..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-7B-it-restore/1762652579.958842", - "retrieved_timestamp": "1762652579.958842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-7B-it-restore", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-7B-it-restore", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7530796065550517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406524352251431 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40069791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42877327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json deleted file mode 100644 index be49f68da..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/1762652579.9590368", - "retrieved_timestamp": "1762652579.959038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5335864395359867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6186663964199025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json deleted file mode 100644 index 3840b6c02..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/1762652579.959567", - "retrieved_timestamp": "1762652579.9595678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070834275278483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6452083564140533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json deleted file mode 100644 index 9fbcf852f..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/1762652579.959789", - "retrieved_timestamp": "1762652579.959789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8577928784513978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6359248665982408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4881150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json deleted file mode 100644 index 03dfae71a..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/1762652579.959998", - "retrieved_timestamp": "1762652579.959999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8364605912312664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.651497220848125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44342708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5203623670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json deleted file mode 100644 index 7ccf05708..000000000 --- a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B/1762652579.959276", - "retrieved_timestamp": "1762652579.9592772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594111402190632 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6644460038734455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.411631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350731382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json b/data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json deleted file mode 100644 index 53a59e2e0..000000000 --- a/data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/YoungPanda_qwenqwen/1762652579.964632", - "retrieved_timestamp": "1762652579.964633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "YoungPanda/qwenqwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YoungPanda/qwenqwen", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12639684924888184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337898518087465 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34336458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json deleted file mode 100644 index 84c1560c9..000000000 --- a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/1762652579.9677062", - "retrieved_timestamp": "1762652579.9677062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6261597007052399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462236205548866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27341389728096677 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43267952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json deleted file mode 100644 index 769841aec..000000000 --- a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/1762652579.967964", - "retrieved_timestamp": "1762652579.967965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5943862285402732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431374181474681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4386635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json deleted file mode 100644 index 13ef5e1e8..000000000 --- a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/1762652579.96818", - "retrieved_timestamp": "1762652579.968181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7707649037886142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541319848156986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43905208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4431515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json deleted file mode 100644 index 6852acf30..000000000 --- a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/1762652579.968384", - "retrieved_timestamp": "1762652579.968385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7835044348994002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548068560095062 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4447307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json deleted file mode 100644 index 6f16ae527..000000000 --- a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-Qandora-CySec/1762652579.968593", - "retrieved_timestamp": "1762652579.9685938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6773172958860268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490022663689288 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930513595166163 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4484707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json b/data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json deleted file mode 100644 index 4701b37eb..000000000 --- a/data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Liberated-Qwen1.5-14B/1762652579.9698281", - "retrieved_timestamp": "1762652579.9698281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Liberated-Qwen1.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "abacusai/Liberated-Qwen1.5-14B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36310212458499 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49480009174671863 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35123005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json b/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json deleted file mode 100644 index 69ac14bd9..000000000 --- a/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.975151", - "retrieved_timestamp": "1762652579.975153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16853725891745014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31242688274884584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39629166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10663231382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json b/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json deleted file mode 100644 index 4f5ee3d32..000000000 --- a/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.9747589", - "retrieved_timestamp": "1762652579.9747598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.174632198123202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126379538396578 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10871010638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json deleted file mode 100644 index fadc02814..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-42B-AGI/1762652579.9983659", - "retrieved_timestamp": "1762652579.998367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-42B-AGI", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-42B-AGI", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 42.516 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19129354557019818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2942104150907988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json deleted file mode 100644 index 86d6028e9..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task2/1762652579.998622", - "retrieved_timestamp": "1762652579.998623", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45270327176336567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625940266685543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549848942598187 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43696874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4517121010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json deleted file mode 100644 index e1a5cff11..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task3/1762652579.998833", - "retrieved_timestamp": "1762652579.998834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.512903540383959 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397623813486384 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43557291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45013297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json deleted file mode 100644 index 43ba462a5..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task4/1762652579.999042", - "retrieved_timestamp": "1762652579.999042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5005385709916355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5583446038580263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43954166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45611702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json deleted file mode 100644 index 2dc2d6cf7..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task7/1762652579.999242", - "retrieved_timestamp": "1762652579.999243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42842325030917966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555243179835915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4133144946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json deleted file mode 100644 index 99783d09c..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task8/1762652579.9994612", - "retrieved_timestamp": "1762652579.999462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645185884564068 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524895381578828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45144791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44331781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json deleted file mode 100644 index 378ed6179..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-slerp-14B/1762652579.999685", - "retrieved_timestamp": "1762652579.999686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-slerp-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-slerp-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49282016161562425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.65124197415124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json deleted file mode 100644 index d8a1f9209..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp12-7B/1762652579.999902", - "retrieved_timestamp": "1762652579.999903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp12-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp12-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075577246151324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5556448443090559 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4460605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json deleted file mode 100644 index ec14e7356..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp4-14B/1762652580.000124", - "retrieved_timestamp": "1762652580.000125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp4-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp4-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6327544249258634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6483250205703057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46496875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json deleted file mode 100644 index b92fc6656..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp5-14B/1762652580.000389", - "retrieved_timestamp": "1762652580.0003898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7119387669162267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6356573710010681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4675416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390625 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json deleted file mode 100644 index 94bd11fdd..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp6-14B/1762652580.0006049", - "retrieved_timestamp": "1762652580.000606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp6-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp6-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6866846633598851 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6384454358065165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46896875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405585106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json deleted file mode 100644 index 4d736833d..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock1-14B/1762652580.0008268", - "retrieved_timestamp": "1762652580.0008278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenStock1-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenStock1-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5634117474966422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6528491305599156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418051861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json deleted file mode 100644 index 76c3a8808..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock2-14B/1762652580.001041", - "retrieved_timestamp": "1762652580.001042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenStock2-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenStock2-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5563427261887348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.656885010139055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47560416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405585106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json deleted file mode 100644 index f00811a90..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock3-14B/1762652580.0012438", - "retrieved_timestamp": "1762652580.001245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/QwenStock3-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenStock3-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5615134509767417 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565322062808641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428025265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json deleted file mode 100644 index 7414a937f..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-14B/1762652580.001452", - "retrieved_timestamp": "1762652580.0014532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp2-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp2-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007136619724553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554876216007552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4729375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json deleted file mode 100644 index e39d285a5..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-7B/1762652580.001649", - "retrieved_timestamp": "1762652580.0016499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294396645345462 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5609127334788001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3421450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515458776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json deleted file mode 100644 index 5871a2a14..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-14B/1762652580.001856", - "retrieved_timestamp": "1762652580.001856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp3-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp3-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052349986923584 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520835120117142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44637462235649544 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46760416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json deleted file mode 100644 index 32c433504..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-7B/1762652580.0020611", - "retrieved_timestamp": "1762652580.002062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp3-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp3-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.501837347127843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5580160200086862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45151041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45420545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json b/data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json deleted file mode 100644 index 7b0db4d17..000000000 --- a/data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/1762652580.002683", - "retrieved_timestamp": "1762652580.002683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 42.516 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1879213819332704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2969164076001621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36333333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json deleted file mode 100644 index d990c22a4..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/1762652580.03596", - "retrieved_timestamp": "1762652580.0359628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.261136008014291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27743669901671336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35952083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11835106382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json deleted file mode 100644 index afeba1e5f..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/1762652580.0363572", - "retrieved_timestamp": "1762652580.0363579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30327641768285923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2908444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33555208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json deleted file mode 100644 index b75803e23..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/1762652580.036823", - "retrieved_timestamp": "1762652580.036824", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751922676276723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4926903187457697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015105740181269 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42428523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json deleted file mode 100644 index 2c412e9c9..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/1762652580.03794", - "retrieved_timestamp": "1762652580.037941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554044380022784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337106084887115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15043218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json deleted file mode 100644 index e9e8e4dcd..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/1762652580.037686", - "retrieved_timestamp": "1762652580.037687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5221456845614081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3198581755956472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25075528700906347 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4526979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14835438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json deleted file mode 100644 index c51c3b998..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1762652580.038195", - "retrieved_timestamp": "1762652580.038196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5139274901705253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44333333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12890625 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json deleted file mode 100644 index eaeefee0b..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/1762652580.037415", - "retrieved_timestamp": "1762652580.037416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421791956453321 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3170339746824052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14311835106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json deleted file mode 100644 index faf1cf1be..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/1762652580.0370848", - "retrieved_timestamp": "1762652580.037087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5611632690151022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32828968244496226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45542708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14469747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json deleted file mode 100644 index 7e51598ce..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/1762652580.038453", - "retrieved_timestamp": "1762652580.038454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4290227706928727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.301225755504323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11294880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json deleted file mode 100644 index f79976d6e..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B/1762652580.036597", - "retrieved_timestamp": "1762652580.036598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4171575863154209 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30329653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json deleted file mode 100644 index 2aef3e2d0..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/1762652580.03893", - "retrieved_timestamp": "1762652580.038931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266246891581005 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29017781029884354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json deleted file mode 100644 index 91830006f..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/1762652580.03921", - "retrieved_timestamp": "1762652580.039211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3654503384353515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38460416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json deleted file mode 100644 index 77b673acb..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/1762652580.039571", - "retrieved_timestamp": "1762652580.039572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921783091087204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1155252659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json deleted file mode 100644 index ea9b15819..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B/1762652580.0386932", - "retrieved_timestamp": "1762652580.038694", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679938119744496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2886778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1141123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json b/data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json deleted file mode 100644 index 8d921d66f..000000000 --- a/data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B/1762652580.039853", - "retrieved_timestamp": "1762652580.039854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/Qwen2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.540854931581537 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5852660409288039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48836436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json b/data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json deleted file mode 100644 index d9e6b0e41..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_CyberCore-Qwen-2.1-7B/1762652580.0426219", - "retrieved_timestamp": "1762652580.042623", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/CyberCore-Qwen-2.1-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/CyberCore-Qwen-2.1-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765757080103016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5572089082936126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35876132930513593 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4444813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json b/data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json deleted file mode 100644 index 99f99d0e3..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepQwen-3B-LCoT-SCE/1762652580.042877", - "retrieved_timestamp": "1762652580.042878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/DeepQwen-3B-LCoT-SCE", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/DeepQwen-3B-LCoT-SCE", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489809261647983 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45123121380305237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json b/data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json deleted file mode 100644 index d5a01b9ac..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/1762652580.043099", - "retrieved_timestamp": "1762652580.043099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39010492160800014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3494110718041537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2508311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json b/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json deleted file mode 100644 index 1547e170d..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.1/1762652580.04422", - "retrieved_timestamp": "1762652580.044221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/FwF-Qwen-7B-0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/FwF-Qwen-7B-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30045390674521383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5019272523147252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4060837765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json b/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json deleted file mode 100644 index 7007e2419..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.2/1762652580.044472", - "retrieved_timestamp": "1762652580.0444732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/FwF-Qwen-7B-0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/FwF-Qwen-7B-0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44790710869382133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5596406929346521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382480053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json deleted file mode 100644 index 46401827a..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/1762652580.0542989", - "retrieved_timestamp": "1762652580.0542998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42080457630198986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139878251775055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2103557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json deleted file mode 100644 index 706fe0124..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/1762652580.054558", - "retrieved_timestamp": "1762652580.054559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5695066867023941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361336083539997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26435045317220546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4108958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40658244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json deleted file mode 100644 index b3885b5e0..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/1762652580.054795", - "retrieved_timestamp": "1762652580.054796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7752862405085175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452765042799131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41269791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341755319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json deleted file mode 100644 index edf2f9356..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/1762652580.05501", - "retrieved_timestamp": "1762652580.055011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45090471061228654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672461238794705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28316156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json deleted file mode 100644 index f6ae8087c..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Exp-Sce/1762652580.055233", - "retrieved_timestamp": "1762652580.055233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Exp-Sce", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Exp-Sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.765169749597734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5505865059891896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44302083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42586436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json deleted file mode 100644 index e085cc7bd..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-R1-Stock/1762652580.055454", - "retrieved_timestamp": "1762652580.055455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-R1-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-R1-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7573261169253137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5393363105747148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007552870090635 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429438164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json deleted file mode 100644 index 4ee229033..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/1762652580.0556722", - "retrieved_timestamp": "1762652580.0556731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206219497599702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49203477801491813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json deleted file mode 100644 index adfc840c7..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7b-S1k/1762652580.055886", - "retrieved_timestamp": "1762652580.0558872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7b-S1k", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7b-S1k", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7162351449708995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5562750208035135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4071458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382480053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json deleted file mode 100644 index 75b2b1a58..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-1.5B-Model-Stock/1762652580.0561001", - "retrieved_timestamp": "1762652580.056101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-1.5B-Model-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-1.5B-Model-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.776 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18292574812608325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2873695911207613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11003989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json deleted file mode 100644 index efd52718b..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v2/1762652580.0565188", - "retrieved_timestamp": "1762652580.05652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6490157227268093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46774789186946836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3867069486404834 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3269614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json deleted file mode 100644 index 7ad15c8ad..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/1762652580.056732", - "retrieved_timestamp": "1762652580.056733", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480915083090644 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.473722298403459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38972809667673713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json deleted file mode 100644 index f6639ff1a..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/1762652580.05695", - "retrieved_timestamp": "1762652580.05695", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6353021095138676 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727417689283166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3293716755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json deleted file mode 100644 index 33bcd9adb..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/1762652580.0571678", - "retrieved_timestamp": "1762652580.057169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6380747527671025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48202557906199406 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39409374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3386801861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json deleted file mode 100644 index 301d48883..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock/1762652580.056308", - "retrieved_timestamp": "1762652580.056309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6380747527671025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4712481909242632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37990936555891236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249667553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json deleted file mode 100644 index 2984490a2..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Mix/1762652580.057388", - "retrieved_timestamp": "1762652580.057389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-RP-Mix", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-RP-Mix", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5720543712903984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4894378989397821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42844791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37275598404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json deleted file mode 100644 index c13735c51..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker-V2/1762652580.057826", - "retrieved_timestamp": "1762652580.057826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419965691033125 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46784408133522204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json deleted file mode 100644 index 4e3b25323..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker/1762652580.0576031", - "retrieved_timestamp": "1762652580.057604", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-RP-Thinker", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-RP-Thinker", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.589414974489909 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4164134011392067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3287291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json deleted file mode 100644 index 8460e3f13..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-CyberRombos/1762652580.058041", - "retrieved_timestamp": "1762652580.058042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-CyberRombos", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-CyberRombos", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.751830698103255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5464960546716063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41254166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json deleted file mode 100644 index 5621353e6..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Fuse-Exp/1762652580.0583198", - "retrieved_timestamp": "1762652580.058321", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Fuse-Exp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Fuse-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468501354184675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5108680600425207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45728125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3308676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json deleted file mode 100644 index 53b193db4..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/1762652580.058998", - "retrieved_timestamp": "1762652580.058999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21197644472222593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3479005166788895 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17794215425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json deleted file mode 100644 index 57bb4884b..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-V0.1/1762652580.059214", - "retrieved_timestamp": "1762652580.059214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-MixStock-V0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7673428724672757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5479100568012056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31722054380664655 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4256150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json deleted file mode 100644 index 77a4c8d57..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/1762652580.059437", - "retrieved_timestamp": "1762652580.059438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726445830396681 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48221362910675625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34715757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json deleted file mode 100644 index 4b890188d..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/1762652580.059654", - "retrieved_timestamp": "1762652580.059655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3786641666334215 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41495531490332715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2687832446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json deleted file mode 100644 index b439867ff..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1762652580.060085", - "retrieved_timestamp": "1762652580.060086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23081091503876383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3481907488085136 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1768617021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json deleted file mode 100644 index afa81e203..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M/1762652580.059867", - "retrieved_timestamp": "1762652580.0598679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-RRP-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-RRP-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7481338404322753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.545239229980545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44826041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json deleted file mode 100644 index 740f1196a..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-ID/1762652580.0603101", - "retrieved_timestamp": "1762652580.0603101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-RRP-ID", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-RRP-ID", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.747259493698941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5479543512061099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json deleted file mode 100644 index d4a828c46..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Sky-R1-Mini/1762652580.061045", - "retrieved_timestamp": "1762652580.0610461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23048622100471194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3502939195575525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12533244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json b/data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json deleted file mode 100644 index bc7bdf0a5..000000000 --- a/data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_QwenMosaic-7B/1762652580.061329", - "retrieved_timestamp": "1762652580.0613298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/QwenMosaic-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/QwenMosaic-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5819215237791282 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5564132127895585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4163854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43101728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json b/data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json deleted file mode 100644 index 086a94353..000000000 --- a/data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/1762652580.112457", - "retrieved_timestamp": "1762652580.112458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697136930012367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31142229157184026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35545833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14128989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json b/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json deleted file mode 100644 index b4319ddbe..000000000 --- a/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-72b/1762652580.114711", - "retrieved_timestamp": "1762652580.114712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6343778950961227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6296364939584073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802114803625378 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45207291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547124335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json b/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json deleted file mode 100644 index 17c662f57..000000000 --- a/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-7b/1762652580.114933", - "retrieved_timestamp": "1762652580.114934", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3534599307614906 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48938263759195594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4050864361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json deleted file mode 100644 index 0a295793b..000000000 --- a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/1762652580.121964", - "retrieved_timestamp": "1762652580.1219652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34634104176917246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32409879947333436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36345833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json deleted file mode 100644 index 522c1bdb2..000000000 --- a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/1762652580.122241", - "retrieved_timestamp": "1762652580.122248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816517950150047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5905573130283358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4666722074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json deleted file mode 100644 index ae0e4f509..000000000 --- a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/1762652580.12255", - "retrieved_timestamp": "1762652580.1225522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186314534324481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41969150892898055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4526041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46866688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json deleted file mode 100644 index 498e6f5ff..000000000 --- a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/1762652580.1228092", - "retrieved_timestamp": "1762652580.1228101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40376866713653103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34425676981862185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36628124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2321309840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json b/data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json deleted file mode 100644 index 6b58fac3e..000000000 --- a/data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dfurman_Qwen2-72B-Orpo-v0.1/1762652580.125584", - "retrieved_timestamp": "1762652580.1255848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dfurman/Qwen2-72B-Orpo-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "dfurman/Qwen2-72B-Orpo-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.699 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7879759039348928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6969024790545039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40558912386706947 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47842708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5454621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json b/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json deleted file mode 100644 index 061fc0654..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT-Dare/1762652580.1400871", - "retrieved_timestamp": "1762652580.140088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/QwenQwen2.5-7B-IT-Dare", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397962708415814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5090634441087614 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json b/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json deleted file mode 100644 index 4469194b5..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT/1762652580.1398232", - "retrieved_timestamp": "1762652580.1398232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/QwenQwen2.5-7B-IT", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/QwenQwen2.5-7B-IT", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.751830698103255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397962708415814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5090634441087614 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json b/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json deleted file mode 100644 index c56411932..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.1/1762652580.140311", - "retrieved_timestamp": "1762652580.140312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/RQwen-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/RQwen-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7624968417133207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6446435015804635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5201961436170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json b/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json deleted file mode 100644 index a00a9f577..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.2/1762652580.140525", - "retrieved_timestamp": "1762652580.140526", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/RQwen-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/RQwen-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7503568309862276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6426888858891955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515874335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json b/data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json deleted file mode 100644 index b60580a84..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_coolqwen-3b-it/1762652580.140961", - "retrieved_timestamp": "1762652580.1409621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/coolqwen-3b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/coolqwen-3b-it", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6472670292601409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.485089343991756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41251041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json b/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json deleted file mode 100644 index 34da75dbe..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-duable4layers-it/1762652580.1428769", - "retrieved_timestamp": "1762652580.1428769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/frqwen2.5-from7b-duable4layers-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.545 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7728881589737453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263561044354216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4165729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4126496010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json b/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json deleted file mode 100644 index 62597e788..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-it/1762652580.143308", - "retrieved_timestamp": "1762652580.143309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/frqwen2.5-from7b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/frqwen2.5-from7b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 13.206 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6532123654126606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142906815349029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976894946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json b/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json deleted file mode 100644 index 309f403d6..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-test-32b-it/1762652580.144918", - "retrieved_timestamp": "1762652580.1449192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/qwen2.5-test-32b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/qwen2.5-test-32b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.708059329453303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5974320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765458776595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json b/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json deleted file mode 100644 index 058c5f97b..000000000 --- a/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-with-lora-think-3b-it/1762652580.1451252", - "retrieved_timestamp": "1762652580.1451259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/qwen2.5-with-lora-think-3b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/qwen2.5-with-lora-think-3b-it", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319374814381397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686847308109022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.236404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json b/data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json deleted file mode 100644 index 540b7d147..000000000 --- a/data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/freewheelin_free-evo-qwen72b-v0.8-re/1762652580.161332", - "retrieved_timestamp": "1762652580.161333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "freewheelin/free-evo-qwen72b-v0.8-re", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "freewheelin/free-evo-qwen72b-v0.8-re", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.288 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533086654521115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6127477065378042 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4870345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json deleted file mode 100644 index 567bcc6cf..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_2500_qwen/1762652580.170526", - "retrieved_timestamp": "1762652580.170526", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/ifd_2500_qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_2500_qwen", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33647388928044253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42983047351897224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2921376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json deleted file mode 100644 index 19ff62fe7..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_all_sample_2500_qwen/1762652580.170775", - "retrieved_timestamp": "1762652580.1707761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33757319467900726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019641175400575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3561666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2888962765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json deleted file mode 100644 index c578732d3..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_sample_2500_qwen/1762652580.170979", - "retrieved_timestamp": "1762652580.1709802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/ifd_new_correct_sample_2500_qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_new_correct_sample_2500_qwen", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33974631754854895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41103125849665423 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3078859060402685 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3626770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.293218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json deleted file mode 100644 index 428e5e9d5..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_qwen_2500/1762652580.171179", - "retrieved_timestamp": "1762652580.17118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/ifd_new_qwen_2500", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_new_qwen_2500", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323959316834887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41598162527775745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3589583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29105718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json deleted file mode 100644 index 4af000851..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen-2.5-1.5b-cherry/1762652580.1715672", - "retrieved_timestamp": "1762652580.1715689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/qwen-2.5-1.5b-cherry", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen-2.5-1.5b-cherry", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.772 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28933784580468713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40357573315752204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json deleted file mode 100644 index 8a912a415..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_2.5-1.5b-cherry_new/1762652580.171904", - "retrieved_timestamp": "1762652580.171905", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/qwen_2.5-1.5b-cherry_new", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen_2.5-1.5b-cherry_new", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3120442647730245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149628386006759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28939494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json deleted file mode 100644 index a3c8e0d35..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_full_data_alpaca/1762652580.1721501", - "retrieved_timestamp": "1762652580.172151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/qwen_full_data_alpaca", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen_full_data_alpaca", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3136178672588731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229212208733662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40515625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28507313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json deleted file mode 100644 index 099d5a3f3..000000000 --- a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_ins_ans_2500/1762652580.172384", - "retrieved_timestamp": "1762652580.172385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/qwen_ins_ans_2500", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen_ins_ans_2500", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2698041197356348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073950292977672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3588645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28091755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json deleted file mode 100644 index 3ae5ab83e..000000000 --- a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.1/1762652580.187419", - "retrieved_timestamp": "1762652580.18742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7505817896514582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5481580818735207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4405751329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json deleted file mode 100644 index 032872686..000000000 --- a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.2/1762652580.18783", - "retrieved_timestamp": "1762652580.187832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7417640748768822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516262466675281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43974401595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json deleted file mode 100644 index 69c13d74f..000000000 --- a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.3/1762652580.188173", - "retrieved_timestamp": "1762652580.188174", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7569515552068511 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494465314719504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json deleted file mode 100644 index 95481b239..000000000 --- a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.4/1762652580.188425", - "retrieved_timestamp": "1762652580.188426", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7582503313430586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524401094760039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48489425981873113 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json b/data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json deleted file mode 100644 index 04c4a62fb..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Deepseek-qwen-modelstock-2B/1762652580.1914759", - "retrieved_timestamp": "1762652580.191477", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Deepseek-qwen-modelstock-2B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/Deepseek-qwen-modelstock-2B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21487431127186973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549242330959277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19107380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json b/data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json deleted file mode 100644 index 5a5ec2a2a..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Qwen2.5-HomerSlerp-7B/1762652580.1961112", - "retrieved_timestamp": "1762652580.1961112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Qwen2.5-HomerSlerp-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/Qwen2.5-HomerSlerp-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44878145542715553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5632506117591088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4548703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json deleted file mode 100644 index 802db60e4..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenModelStock-1.8B/1762652580.196316", - "retrieved_timestamp": "1762652580.196316", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenModelStock-1.8B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenModelStock-1.8B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263075306852484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41881762650909504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958776595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json deleted file mode 100644 index 02d042d59..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-14B/1762652580.1965241", - "retrieved_timestamp": "1762652580.196525", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7024716640735471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6491286917834284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4634479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json deleted file mode 100644 index 2718b1923..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-3B/1762652580.1967301", - "retrieved_timestamp": "1762652580.1967309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4333690164319561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4892345530653528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43166666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693484042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json deleted file mode 100644 index 1e1db8931..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-7B/1762652580.197109", - "retrieved_timestamp": "1762652580.19711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672912317096415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5636352508232924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45088098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json deleted file mode 100644 index 47ee12950..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-14B/1762652580.197355", - "retrieved_timestamp": "1762652580.197356", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp2-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp2-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7036707048409332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492799322983842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48065625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json deleted file mode 100644 index b4d048936..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-3B/1762652580.197566", - "retrieved_timestamp": "1762652580.197566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp2-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp2-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4280486885907171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4801760257099328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4251875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3741688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json deleted file mode 100644 index 152be2f06..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp3-14B/1762652580.197938", - "retrieved_timestamp": "1762652580.1979399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp3-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp3-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6632291209546226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6266526215170748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43051359516616317 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48078125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262632978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json deleted file mode 100644 index e585f6140..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSparse-7B/1762652580.198252", - "retrieved_timestamp": "1762652580.198254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenSparse-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSparse-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10858632871891026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28956619468137906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35622916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json deleted file mode 100644 index 7faf44574..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-0.5B/1762652580.198598", - "retrieved_timestamp": "1762652580.1985989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenStock-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenStock-0.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json deleted file mode 100644 index 551818457..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-1.7B/1762652580.1988428", - "retrieved_timestamp": "1762652580.198844", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenStock-1.7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenStock-1.7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32141163224688274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187550547805281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json deleted file mode 100644 index ac55dcb8a..000000000 --- a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock1-14B/1762652580.1990862", - "retrieved_timestamp": "1762652580.1990871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/QwenStock1-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenStock1-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6693240601603745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6502248812491821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416389627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json b/data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json deleted file mode 100644 index cb66a53b8..000000000 --- a/data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/1762652580.200386", - "retrieved_timestamp": "1762652580.200386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42112927033604175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34869240677927044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47006250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19148936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json b/data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json deleted file mode 100644 index aafd7883d..000000000 --- a/data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jayasuryajsk_Qwen2.5-3B-reasoner/1762652580.280263", - "retrieved_timestamp": "1762652580.280264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jayasuryajsk/Qwen2.5-3B-reasoner", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jayasuryajsk/Qwen2.5-3B-reasoner", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159585455480348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46511772991620703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json b/data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json deleted file mode 100644 index 775883769..000000000 --- a/data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeanmichela_o-distil-qwen/1762652580.280534", - "retrieved_timestamp": "1762652580.280535", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeanmichela/o-distil-qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeanmichela/o-distil-qwen", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44823180272787316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5900367438200601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46575797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json b/data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json deleted file mode 100644 index 4a562eadf..000000000 --- a/data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_qwen2.5-0.5B-IHA-Hin/1762652580.28294", - "retrieved_timestamp": "1762652580.28294", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/qwen2.5-0.5B-IHA-Hin", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jebish7/qwen2.5-0.5B-IHA-Hin", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14163419726326149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29891753632624085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34748958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json deleted file mode 100644 index 1f0427b1a..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1762652580.283215", - "retrieved_timestamp": "1762652580.2832158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6135952605752737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421083753999172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47929166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json deleted file mode 100644 index 8768aa559..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-minperplexity-2/1762652580.28349", - "retrieved_timestamp": "1762652580.2834911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-minperplexity-2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509730847484674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552390586276348 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46245833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345910904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json deleted file mode 100644 index f964ba3a7..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/1762652580.2837172", - "retrieved_timestamp": "1762652580.2837179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6048274134851084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469701834138724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48198958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4363364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json deleted file mode 100644 index 869e76bdf..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/1762652580.283937", - "retrieved_timestamp": "1762652580.283938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7695159953368174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.541762771903226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47129909365558914 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4551145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json deleted file mode 100644 index 57316d4b1..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/1762652580.2841558", - "retrieved_timestamp": "1762652580.284157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6626296005709296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48640249867140106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38429166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3849734042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json deleted file mode 100644 index 0f1dc5fff..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/1762652580.284375", - "retrieved_timestamp": "1762652580.284375", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49646715160219335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.494592979290867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41724999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3968583776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json deleted file mode 100644 index f6fef58d7..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/1762652580.284589", - "retrieved_timestamp": "1762652580.284589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49951462120506923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026055485090198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41873958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015957446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json deleted file mode 100644 index 31a65db37..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/1762652580.284807", - "retrieved_timestamp": "1762652580.284807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078748830879843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5467076263362468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2809667673716012 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47138541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44190492021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json deleted file mode 100644 index 6be6ab956..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/1762652580.2850199", - "retrieved_timestamp": "1762652580.2850208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5650352176669016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5522599149696679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2756797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49820833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44481382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json deleted file mode 100644 index b4957da4a..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/1762652580.285239", - "retrieved_timestamp": "1762652580.285239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4201551882338861 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391718355132782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48484375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42802526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json deleted file mode 100644 index 6bc99d9aa..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/1762652580.2854452", - "retrieved_timestamp": "1762652580.285446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6255601803215468 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446899383425835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343417553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json deleted file mode 100644 index 186f04343..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.0/1762652580.285652", - "retrieved_timestamp": "1762652580.2856529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331365222055258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5659918212629057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2862537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42776041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4566156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json deleted file mode 100644 index 99e2857fc..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.1/1762652580.285865", - "retrieved_timestamp": "1762652580.285865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4329445870290828 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5478077656573704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48081250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json deleted file mode 100644 index 90cc39e6a..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.2/1762652580.286082", - "retrieved_timestamp": "1762652580.286083", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42025492360270744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5533340429711561 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2847432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46878125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json deleted file mode 100644 index 7489e2cfc..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.3/1762652580.286303", - "retrieved_timestamp": "1762652580.286304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4218540140161438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5531852688351706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104229607250755 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44697473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json deleted file mode 100644 index 52a27ca6b..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.4/1762652580.286527", - "retrieved_timestamp": "1762652580.2865438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4545018329144448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5581962445576828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46220833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457280585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json deleted file mode 100644 index 9211efe46..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.5/1762652580.286995", - "retrieved_timestamp": "1762652580.286996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4546514359676769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543943528577703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28172205438066467 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43991023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json b/data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json deleted file mode 100644 index 8ec5699cf..000000000 --- a/data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/1762652580.2872581", - "retrieved_timestamp": "1762652580.2872589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37571643239936703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5582354546195324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367519946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json b/data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json deleted file mode 100644 index 082dacc34..000000000 --- a/data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/1762652580.3057542", - "retrieved_timestamp": "1762652580.305755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6081497094376255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549941776226351 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37613293051359514 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42772916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44639295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json b/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json deleted file mode 100644 index ca7357302..000000000 --- a/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1/1762652580.310198", - "retrieved_timestamp": "1762652580.310199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 8.911 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.211052230304481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3578007894497858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21584109042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json b/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json deleted file mode 100644 index 88ac5d62b..000000000 --- a/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/1762652580.310462", - "retrieved_timestamp": "1762652580.310463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 15.231 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25836336476105626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892856967853256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30966767371601206 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24517952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json deleted file mode 100644 index 6b6fc8e79..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_QwenLawLo/1762652580.322983", - "retrieved_timestamp": "1762652580.322984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_212_QwenLawLo", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_212_QwenLawLo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4566250880995758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574113357405873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3602719033232628 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43696874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44888630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json deleted file mode 100644 index 3dedfc52b..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_Qwencore/1762652580.3232372", - "retrieved_timestamp": "1762652580.323238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_212_Qwencore", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_212_Qwencore", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4384400058511416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.556868234536878 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.448969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json deleted file mode 100644 index 834b1a576..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_230_Xiaqwen/1762652580.3234491", - "retrieved_timestamp": "1762652580.32345", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_230_Xiaqwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_230_Xiaqwen", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648931501748693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.557779565750489 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36631419939577037 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4480551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json deleted file mode 100644 index 76005ee28..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_375_QwenDyancabs/1762652580.323661", - "retrieved_timestamp": "1762652580.323662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_375_QwenDyancabs", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_375_QwenDyancabs", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4565752204151651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5571383122938682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44617708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476396276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json deleted file mode 100644 index 2f2b0ad3e..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_456_QwenKoen/1762652580.323869", - "retrieved_timestamp": "1762652580.323869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_456_QwenKoen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_456_QwenKoen", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45292823042859615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5552713612233481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json deleted file mode 100644 index 3a274764c..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_KoenQwenDyan/1762652580.324076", - "retrieved_timestamp": "1762652580.3240771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_KoenQwenDyan", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_KoenQwenDyan", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5807224830117421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536566841353078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43687499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44597739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json deleted file mode 100644 index 9f258e430..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_Qwen2.5koen/1762652580.324276", - "retrieved_timestamp": "1762652580.324277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_Qwen2.5koen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_Qwen2.5koen", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5544031312134464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36555891238670696 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43690625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4484707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json deleted file mode 100644 index 51a9e46af..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyanKoenLo/1762652580.324512", - "retrieved_timestamp": "1762652580.324513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_QwenDyanKoenLo", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_QwenDyanKoenLo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46631714960748594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5562461525503201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4464760638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json deleted file mode 100644 index 3d615245b..000000000 --- a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyancabsLAW/1762652580.3247318", - "retrieved_timestamp": "1762652580.3247318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_QwenDyancabsLAW", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_QwenDyancabsLAW", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549685944405289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5578836606885887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4471409574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json b/data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json deleted file mode 100644 index de6f98ec0..000000000 --- a/data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/macadeliccc_Samantha-Qwen-2-7B/1762652580.3290062", - "retrieved_timestamp": "1762652580.3290062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "macadeliccc/Samantha-Qwen-2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "macadeliccc/Samantha-Qwen-2-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4377152621710395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5082341412476951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4799479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json deleted file mode 100644 index 5edd92f08..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/1762652580.333172", - "retrieved_timestamp": "1762652580.333172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7444868504457063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555919540267728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429936835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json deleted file mode 100644 index 7c9770dff..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/1762652580.333376", - "retrieved_timestamp": "1762652580.3333771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.704320092909037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516165586639877 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44398271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json deleted file mode 100644 index aadd9636c..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST/1762652580.332918", - "retrieved_timestamp": "1762652580.3329191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7488330961847898 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458495423775734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41630651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json deleted file mode 100644 index c7234e470..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-Preview/1762652580.333591", - "retrieved_timestamp": "1762652580.3335922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7962439660101863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431064770878757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43641954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json deleted file mode 100644 index 3a746167c..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/1762652580.333802", - "retrieved_timestamp": "1762652580.333802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7727884236049238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295123017150106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368580060422961 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44327083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4134807180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json deleted file mode 100644 index 23994bc47..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/1762652580.334015", - "retrieved_timestamp": "1762652580.334016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383575636307666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4179375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json deleted file mode 100644 index 00ee2578d..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/1762652580.334213", - "retrieved_timestamp": "1762652580.334214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7865020368178655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.540250407222091 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44033232628398794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197140957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json deleted file mode 100644 index cbe4c64b9..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1762652580.334473", - "retrieved_timestamp": "1762652580.334474", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7856276900845313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5326893189699237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42463541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43450797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json deleted file mode 100644 index 8c39081b6..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/1762652580.334734", - "retrieved_timestamp": "1762652580.3347352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834545672149895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.54229983590397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4195478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json deleted file mode 100644 index 3ae24ed11..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Qwen2.5-7B-Preview/1762652580.334959", - "retrieved_timestamp": "1762652580.334959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Qwen2.5-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Qwen2.5-7B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7679423928509688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359781834039953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42578125 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json deleted file mode 100644 index 5d3d116f4..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/1762652580.335416", - "retrieved_timestamp": "1762652580.335417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5757013612769672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347734083768815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4059375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json deleted file mode 100644 index 511c8b430..000000000 --- a/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview/1762652580.335188", - "retrieved_timestamp": "1762652580.335188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5838696879834395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.537136379549371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40463541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37982047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json b/data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json deleted file mode 100644 index 6aca7e9e6..000000000 --- a/data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maywell_Qwen2-7B-Multilingual-RP/1762652580.342533", - "retrieved_timestamp": "1762652580.3425338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maywell/Qwen2-7B-Multilingual-RP", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "maywell/Qwen2-7B-Multilingual-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4347176602525743 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5062058680861069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3858876329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json b/data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json deleted file mode 100644 index 9801092b7..000000000 --- a/data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_SuperQwen-2.5-1.5B/1762652580.346312", - "retrieved_timestamp": "1762652580.346313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/SuperQwen-2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "mergekit-community/SuperQwen-2.5-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336409615376091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906897601443365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3355208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10746343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json b/data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json deleted file mode 100644 index a6fbb35d1..000000000 --- a/data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mhl1_Qwen2.5-0.5B-cinstruct-stage1/1762652580.3535528", - "retrieved_timestamp": "1762652580.353554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14817905379947427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32557832478283544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35003125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json b/data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json deleted file mode 100644 index d4b528bf5..000000000 --- a/data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5.2-Qwen2-72B/1762652580.359263", - "retrieved_timestamp": "1762652580.359264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Tess-v2.5.2-Qwen2-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "migtissera/Tess-v2.5.2-Qwen2-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44943084349525925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6646791891060648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json b/data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json deleted file mode 100644 index 07b0aa08a..000000000 --- a/data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/1762652580.360414", - "retrieved_timestamp": "1762652580.360415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.837 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27779735546128714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33746396801266015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17918882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json b/data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json deleted file mode 100644 index 652503bcb..000000000 --- a/data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/1762652580.371459", - "retrieved_timestamp": "1762652580.3714602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731512387132807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36983762765044165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40088541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23262965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json deleted file mode 100644 index 215f2051a..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-1.5B/1762652580.377223", - "retrieved_timestamp": "1762652580.377223", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3698963195432563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159743091354106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2771775265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json deleted file mode 100644 index 2de4c65f3..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-14B/1762652580.3774788", - "retrieved_timestamp": "1762652580.37748", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064010159709571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6450644262798378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30966767371601206 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json deleted file mode 100644 index e1e3c86ac..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/1762652580.3776908", - "retrieved_timestamp": "1762652580.377692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860004787297703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214228032573378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.236404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39586103723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json deleted file mode 100644 index 19b9474fc..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/1762652580.377894", - "retrieved_timestamp": "1762652580.377894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417906709752346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300548108450988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41215093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json b/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json deleted file mode 100644 index 809576539..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/1762652580.378096", - "retrieved_timestamp": "1762652580.3780968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41148707651254224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39965589836197535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27119348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json b/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json deleted file mode 100644 index a955a2a72..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/1762652580.378304", - "retrieved_timestamp": "1762652580.378304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.783554302583811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6372016353633118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4406666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211103723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json b/data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json deleted file mode 100644 index ac3b99e14..000000000 --- a/data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/1762652580.38376", - "retrieved_timestamp": "1762652580.38376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8090832324897937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6381735755183319 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415407854984894 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49210438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json b/data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json deleted file mode 100644 index 47ec99a45..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/1762652580.391702", - "retrieved_timestamp": "1762652580.3917031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11500596195871399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28767781029884354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10895944148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json deleted file mode 100644 index c815a3c6a..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/1762652580.400365", - "retrieved_timestamp": "1762652580.400365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5878413720040603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5236664966992856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3376132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39257291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json deleted file mode 100644 index fc24f1e23..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-MFANN-slerp/1762652580.4005811", - "retrieved_timestamp": "1762652580.4005818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-7b-MFANN-slerp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-7b-MFANN-slerp", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6532123654126606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088729928004616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416722074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json deleted file mode 100644 index b8a135424..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/1762652580.4007921", - "retrieved_timestamp": "1762652580.400793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15644711587476784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2920111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3791770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11003989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json deleted file mode 100644 index 00c0a120d..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/1762652580.4012349", - "retrieved_timestamp": "1762652580.401236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6486411610083467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065573474607916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990936555891239 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3431682180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json deleted file mode 100644 index 901b5bcf9..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/1762652580.4010181", - "retrieved_timestamp": "1762652580.4010189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5742274941599401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5071448530886461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json deleted file mode 100644 index cece7eaa8..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/1762652580.401459", - "retrieved_timestamp": "1762652580.40146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2675556412540947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37890218644722085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23238255033557048 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35279166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16771941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json deleted file mode 100644 index 720e56b3f..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-MFANN-7b/1762652580.401672", - "retrieved_timestamp": "1762652580.401673", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Qwen2.5-MFANN-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-MFANN-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6097233119234742 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054347004252888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4020625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32330452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json deleted file mode 100644 index 3b328746a..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/1762652580.40188", - "retrieved_timestamp": "1762652580.40188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6606060807546199 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111030308243185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34383311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json deleted file mode 100644 index ad747da7c..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/1762652580.402082", - "retrieved_timestamp": "1762652580.4020832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554852236510238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074761993537673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41263541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34483045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json deleted file mode 100644 index 9127b17b0..000000000 --- a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-v1.1/1762652580.402283", - "retrieved_timestamp": "1762652580.4022841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/qwen2.5-MFANN-7b-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/qwen2.5-MFANN-7b-v1.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6088489651901399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49666375554657477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json deleted file mode 100644 index f34deebe9..000000000 --- a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.3-Qwen2.5-7B/1762652580.4035761", - "retrieved_timestamp": "1762652580.403577", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-v0.3-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v0.3-Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154013572875525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480594290467807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30891238670694865 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47436458333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445561835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json deleted file mode 100644 index 42a60ce4d..000000000 --- a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.4-Qwen2.5-7B/1762652580.403887", - "retrieved_timestamp": "1762652580.4038882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-v0.4-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v0.4-Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.799940823681166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5533099174800821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4310833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json deleted file mode 100644 index d60b2f9ab..000000000 --- a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.5-Qwen2.5-7B/1762652580.404095", - "retrieved_timestamp": "1762652580.404096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-v0.5-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v0.5-Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7880756393037142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540181073562815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41930208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369182180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json deleted file mode 100644 index f4970242c..000000000 --- a/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-72B/1762652580.404309", - "retrieved_timestamp": "1762652580.40431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-v1.0-Qwen2.5-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v1.0-Qwen2.5-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7627716680629618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7309799550978827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4677291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6145279255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json deleted file mode 100644 index 71976b142..000000000 --- a/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-7B/1762652580.404567", - "retrieved_timestamp": "1762652580.404568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-v1.0-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v1.0-Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392737935344885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5655254177370223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42782291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json b/data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json deleted file mode 100644 index 9c76b3e67..000000000 --- a/data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nguyentd_FinancialAdvice-Qwen2.5-7B/1762652580.404779", - "retrieved_timestamp": "1762652580.4047801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nguyentd/FinancialAdvice-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nguyentd/FinancialAdvice-Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449605934476079 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4730934153895792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40248958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375249335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json b/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json deleted file mode 100644 index 2f591631e..000000000 --- a/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.407119", - "retrieved_timestamp": "1762652580.40712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nisten/franqwenstein-35b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nisten/franqwenstein-35b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 34.714 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39135383005979685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6591132598701116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5610871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json b/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json deleted file mode 100644 index e08845c0e..000000000 --- a/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.406877", - "retrieved_timestamp": "1762652580.406878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nisten/franqwenstein-35b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nisten/franqwenstein-35b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 34.714 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37986320740080765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6646579178049268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49402083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5730551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json b/data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json deleted file mode 100644 index 45668cb40..000000000 --- a/data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nisten_tqwendo-36b/1762652580.40731", - "retrieved_timestamp": "1762652580.4073112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nisten/tqwendo-36b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nisten/tqwendo-36b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 35.69 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6777672132164878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6431830832659088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41540785498489424 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json deleted file mode 100644 index e9cf25704..000000000 --- a/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp1/1762652580.463281", - "retrieved_timestamp": "1762652580.463281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Galactic-Qwen-14B-Exp1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Galactic-Qwen-14B-Exp1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5832202999153357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6582262489447345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181268882175225 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json deleted file mode 100644 index 2511ea111..000000000 --- a/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp2/1762652580.463546", - "retrieved_timestamp": "1762652580.463547", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Galactic-Qwen-14B-Exp2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Galactic-Qwen-14B-Exp2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620300801872365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7203002699449659 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39932885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5690658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json deleted file mode 100644 index 436755f0e..000000000 --- a/data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Qwen-25B-R999/1762652580.466958", - "retrieved_timestamp": "1762652580.466959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Magellanic-Qwen-25B-R999", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Magellanic-Qwen-25B-R999", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 24.962 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18727199386516663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26075689808294905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299867021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json deleted file mode 100644 index e5b1098e4..000000000 --- a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen-7B-Distill-Reasoner/1762652580.474049", - "retrieved_timestamp": "1762652580.47405", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Qwen-7B-Distill-Reasoner", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen-7B-Distill-Reasoner", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3395712265677292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409329229697952 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36596874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2818317819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json deleted file mode 100644 index 649397dd7..000000000 --- a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/1762652580.474647", - "retrieved_timestamp": "1762652580.474647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192808415005519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5934849375153814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4606041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48994348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json deleted file mode 100644 index 584476867..000000000 --- a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/1762652580.474907", - "retrieved_timestamp": "1762652580.4749079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18612282078219125 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3125554204779005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009640957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json b/data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json deleted file mode 100644 index 899f4d38f..000000000 --- a/data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-4B/1762652580.486805", - "retrieved_timestamp": "1762652580.486807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-4B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-4B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.168 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21584839337402537 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4269378314466817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2524933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json deleted file mode 100644 index 0acbe2cda..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-14b/1762652580.496415", - "retrieved_timestamp": "1762652580.496416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7047445223119102 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6165135323666455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3300604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json deleted file mode 100644 index d596efc7e..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-7b/1762652580.4966788", - "retrieved_timestamp": "1762652580.49668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6210388436016436 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077090028113894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3979375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json deleted file mode 100644 index 154eb23a9..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/1762652580.4969", - "retrieved_timestamp": "1762652580.4969", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28466690603155187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32936751831436256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32358333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18658577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json deleted file mode 100644 index fc410964b..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/1762652580.497122", - "retrieved_timestamp": "1762652580.497123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402461025634206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4256703145864387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2922207446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json deleted file mode 100644 index 82dbaefa2..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-14b/1762652580.4975061", - "retrieved_timestamp": "1762652580.497507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5840447789642593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6481086261669653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4717291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375664893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json deleted file mode 100644 index 5d0f683f6..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-32b/1762652580.497819", - "retrieved_timestamp": "1762652580.49782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6826631116548536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7045537070859799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39681208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5915890957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json deleted file mode 100644 index c1091d904..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-3b/1762652580.498058", - "retrieved_timestamp": "1762652580.498058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5342358276040905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808896246368473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json deleted file mode 100644 index 6e7d96615..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1762652580.498325", - "retrieved_timestamp": "1762652580.498326", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.715535889218385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7229589065788488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422960725075529 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39848993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.593500664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json deleted file mode 100644 index 0701bdd2f..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-7b/1762652580.498573", - "retrieved_timestamp": "1762652580.498574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6237117514860571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543885046903589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json deleted file mode 100644 index bc4b4c985..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.498805", - "retrieved_timestamp": "1762652580.498805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2595125378440316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884043024656656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667675 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39911458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27194148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json deleted file mode 100644 index fc217fb78..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.499037", - "retrieved_timestamp": "1762652580.499037", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2566401592219755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39000839740376536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39911458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27410239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json deleted file mode 100644 index 98028f876..000000000 --- a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Qwen-14b/1762652580.499588", - "retrieved_timestamp": "1762652580.4995892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8431550508207113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6442096596344892 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49609375 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json b/data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json deleted file mode 100644 index 053ed930b..000000000 --- a/data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/securin_Securin-LLM-V2.5-Qwen-1.5B/1762652580.510926", - "retrieved_timestamp": "1762652580.5109272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "securin/Securin-LLM-V2.5-Qwen-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "securin/Securin-LLM-V2.5-Qwen-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1492030035860406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158416288115425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16148603723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json b/data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json deleted file mode 100644 index b17542c4f..000000000 --- a/data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sethuiyer_Qwen2.5-7B-Anvita/1762652580.514066", - "retrieved_timestamp": "1762652580.514067", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sethuiyer/Qwen2.5-7B-Anvita", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sethuiyer/Qwen2.5-7B-Anvita", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480416406246536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465860266784314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4165558510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json b/data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json deleted file mode 100644 index 3214fb779..000000000 --- a/data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/someon98_qwen-CoMa-0.5b/1762652580.518077", - "retrieved_timestamp": "1762652580.5180779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "someon98/qwen-CoMa-0.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "someon98/qwen-CoMa-0.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22766371006706648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29533439538939815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40457291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json deleted file mode 100644 index 6dc75df01..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/1762652580.5196202", - "retrieved_timestamp": "1762652580.5196211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4906470387460826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6535142192324058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4846875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406416223404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json deleted file mode 100644 index 4dbab09a5..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-14B-ProseStock-v4/1762652580.522184", - "retrieved_timestamp": "1762652580.5221848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen-14B-ProseStock-v4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen-14B-ProseStock-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4942186731206532 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6498268976192769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49383333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json deleted file mode 100644 index 2fe96134a..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/1762652580.5224378", - "retrieved_timestamp": "1762652580.522439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813295389566351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6569729950776678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5377327127659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json deleted file mode 100644 index ffb303cc8..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/1762652580.52286", - "retrieved_timestamp": "1762652580.522861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4505301488938239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550336897572636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48189583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379820478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json deleted file mode 100644 index 620f7f622..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/1762652580.523307", - "retrieved_timestamp": "1762652580.523308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6412973133507981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5520788965536542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4588597074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json deleted file mode 100644 index c35592591..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/1762652580.523516", - "retrieved_timestamp": "1762652580.523516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6872343160591674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358769213927613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48071875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275099734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json deleted file mode 100644 index 7777073af..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/1762652580.5237172", - "retrieved_timestamp": "1762652580.5237179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7161852772864887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6420915332649074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5315824468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json deleted file mode 100644 index 8ac8c5618..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/1762652580.52309", - "retrieved_timestamp": "1762652580.52309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7256523801291683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.641460062329604 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4806875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json deleted file mode 100644 index cd643dbea..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso/1762652580.522644", - "retrieved_timestamp": "1762652580.522645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45742407922091166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6446348390056346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4858645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329122340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json deleted file mode 100644 index 8a6e72084..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/1762652580.524123", - "retrieved_timestamp": "1762652580.524123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347101246913745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5599089581177875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45017708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json deleted file mode 100644 index fc693d552..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/1762652580.5243258", - "retrieved_timestamp": "1762652580.5243268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49172085621705963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5498169530870823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2620845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4434166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306848404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json deleted file mode 100644 index 6fdbb4db9..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/1762652580.5239239", - "retrieved_timestamp": "1762652580.523925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748183708116686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5523808037550308 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40162499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43001994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json deleted file mode 100644 index 68e900d25..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentessential-14B-v1/1762652580.524672", - "retrieved_timestamp": "1762652580.524674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentessential-14B-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentessential-14B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6279083941719084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545165968552056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4872916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json deleted file mode 100644 index 06f0b7182..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v013/1762652580.5250719", - "retrieved_timestamp": "1762652580.525074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v013", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v013", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6711226213114536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6086634082040333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37084592145015105 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49908577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json deleted file mode 100644 index 41529ca3b..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v1/1762652580.5253482", - "retrieved_timestamp": "1762652580.5253491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031616111916382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6572572845221036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5409740691489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json deleted file mode 100644 index 6d8b63216..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v2/1762652580.525585", - "retrieved_timestamp": "1762652580.525586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378329499062487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555355668062347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47141666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408909574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json deleted file mode 100644 index 70f93df52..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v3/1762652580.525815", - "retrieved_timestamp": "1762652580.525816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6157683834448153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6538645567116264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48598958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413065159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json deleted file mode 100644 index bc3f7eebe..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v5/1762652580.5261161", - "retrieved_timestamp": "1762652580.526117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.628557782240012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654985060704008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4873854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418051861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json deleted file mode 100644 index 583aabb03..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6-Prose/1762652580.52656", - "retrieved_timestamp": "1762652580.526561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v6-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v6-Prose", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642860942299764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545112522796068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392287234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json deleted file mode 100644 index 2ba474c5e..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6/1762652580.526352", - "retrieved_timestamp": "1762652580.526353", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304062110755019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544517420216159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48995833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json deleted file mode 100644 index b07ba418e..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v7/1762652580.526774", - "retrieved_timestamp": "1762652580.526774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6109223526908603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6551430222697051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48198958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5409740691489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json deleted file mode 100644 index 34d79c59e..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v8/1762652580.526987", - "retrieved_timestamp": "1762652580.526987", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411552458587658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6534258495008117 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39123867069486407 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48732291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json deleted file mode 100644 index eb20ffe34..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v9/1762652580.5271978", - "retrieved_timestamp": "1762652580.527199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107304175144174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6580257842849174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34818731117824775 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json deleted file mode 100644 index 56305e394..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-qv256/1762652580.52741", - "retrieved_timestamp": "1762652580.527411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-qv256", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-qv256", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7006232352380573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6312084721949004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38972809667673713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177859042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json deleted file mode 100644 index 8af547819..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/1762652580.5276191", - "retrieved_timestamp": "1762652580.52762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6859854076073706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6249338707540049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json deleted file mode 100644 index 0f8a3a07b..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v10/1762652580.5278451", - "retrieved_timestamp": "1762652580.5278451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v10", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v10", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6756938257157675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6316425399409628 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json deleted file mode 100644 index a8c34830d..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v11/1762652580.528142", - "retrieved_timestamp": "1762652580.5281432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v11", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v11", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7192327468893647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6367548394062034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4754479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327460106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json deleted file mode 100644 index cfe882809..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/1762652580.52859", - "retrieved_timestamp": "1762652580.5285912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6173419859306639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6506726813719318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43051359516616317 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json deleted file mode 100644 index b6478763b..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose/1762652580.52837", - "retrieved_timestamp": "1762652580.5283709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v12-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v12-Prose", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412051135431766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504247508173936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json deleted file mode 100644 index c4bee270a..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/1762652580.528805", - "retrieved_timestamp": "1762652580.528806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.717808747456748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6405077084802886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49265625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json deleted file mode 100644 index 53934b9b9..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/1762652580.529013", - "retrieved_timestamp": "1762652580.529014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032114788760489 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550130348108012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632930513595166 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539311835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json deleted file mode 100644 index a28fddcd6..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v2-Prose/1762652580.529223", - "retrieved_timestamp": "1762652580.529224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v2-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v2-Prose", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47048830436574957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6518830473518972 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3557401812688822 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json deleted file mode 100644 index 242f3b1e6..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Prose/1762652580.5297742", - "retrieved_timestamp": "1762652580.5297751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3-Prose", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49177072390147036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6512913170949324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49389583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369847074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json deleted file mode 100644 index fb6aee67b..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530208", - "retrieved_timestamp": "1762652580.530208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278161943642867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6557437566824342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47541666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json deleted file mode 100644 index a858c6f77..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530001", - "retrieved_timestamp": "1762652580.530001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366837768232734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6561283957466177 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47402083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json deleted file mode 100644 index f13d2be28..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3/1762652580.529505", - "retrieved_timestamp": "1762652580.529512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504410519643435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654823836148701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json deleted file mode 100644 index 7a161fe9b..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/1762652580.530609", - "retrieved_timestamp": "1762652580.5306098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48110458029140457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6530441861690175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48989583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json deleted file mode 100644 index 5119e2e75..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose/1762652580.530398", - "retrieved_timestamp": "1762652580.530399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v6-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v6-Prose", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5990073006289978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6543750230807198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48865625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json deleted file mode 100644 index a0b3ed862..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v8/1762652580.530813", - "retrieved_timestamp": "1762652580.530813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5913387589373973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6522455361956444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40483383685800606 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47678125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json deleted file mode 100644 index 16aec66b3..000000000 --- a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v9/1762652580.531015", - "retrieved_timestamp": "1762652580.5310159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6598070896332842 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6165623747365094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110538563829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json b/data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json deleted file mode 100644 index aa3fb0cc5..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_Qwenftmodel/1762652580.5454028", - "retrieved_timestamp": "1762652580.545404", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/Qwenftmodel", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/Qwenftmodel", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17290899258412123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38226970256668574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36171875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23387632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json b/data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json deleted file mode 100644 index 1975f0658..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_Qwenmplus/1762652580.5456882", - "retrieved_timestamp": "1762652580.545689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/Qwenmplus", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/Qwenmplus", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20403307668098425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675511408391697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38283333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19921875 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json b/data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json deleted file mode 100644 index c7e15bfb2..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_Qwensci/1762652580.545888", - "retrieved_timestamp": "1762652580.5458891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/Qwensci", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/Qwensci", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17398281005509825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281870591856875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12599734042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json deleted file mode 100644 index f7ba2e9ea..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen/1762652580.546088", - "retrieved_timestamp": "1762652580.546089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/bbhqwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085236062536292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3388245916050106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16165226063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json deleted file mode 100644 index 1156a6a4f..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen2/1762652580.546288", - "retrieved_timestamp": "1762652580.546289", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/bbhqwen2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15329991090307052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30663248168563745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44305208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json deleted file mode 100644 index 1a1e075d1..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen3/1762652580.546491", - "retrieved_timestamp": "1762652580.546491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/bbhqwen3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1942911474886634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2950842029929075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3796145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json deleted file mode 100644 index 4d9cc1f63..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen4/1762652580.546697", - "retrieved_timestamp": "1762652580.546698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/bbhqwen4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen4", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14485675784695717 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199395559502713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15093085106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json deleted file mode 100644 index 2b38e3e31..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen5/1762652580.546902", - "retrieved_timestamp": "1762652580.5469031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/bbhqwen5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen5", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1521507378200951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29130964476405813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json deleted file mode 100644 index 0eeeb5ea2..000000000 --- a/data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen6/1762652580.547101", - "retrieved_timestamp": "1762652580.547102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/bbhqwen6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen6", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18929551368147626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2782242419852629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json b/data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json deleted file mode 100644 index dbf2d553e..000000000 --- a/data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/synergetic_FrankenQwen2.5-14B/1762652580.5505831", - "retrieved_timestamp": "1762652580.550584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "synergetic/FrankenQwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "synergetic/FrankenQwen2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 16.972 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1869472998311148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6047748435655343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json deleted file mode 100644 index bb60c06d2..000000000 --- a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-14b-tensopolis-v1/1762652580.556658", - "retrieved_timestamp": "1762652580.556659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/qwen2.5-14b-tensopolis-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-14b-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7990166092634211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6363595324538928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49110704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json deleted file mode 100644 index 58696be07..000000000 --- a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-3b-or1-tensopolis/1762652580.556941", - "retrieved_timestamp": "1762652580.556942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/qwen2.5-3b-or1-tensopolis", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-3b-or1-tensopolis", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35400958346077294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44214988544006467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37492708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json deleted file mode 100644 index 2fcaf7ff8..000000000 --- a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v1/1762652580.5571609", - "retrieved_timestamp": "1762652580.557162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/qwen2.5-7b-tensopolis-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-7b-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7660939640154789 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378740884658956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42686170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json deleted file mode 100644 index 473cd4a92..000000000 --- a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v2/1762652580.5574138", - "retrieved_timestamp": "1762652580.5574138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/qwen2.5-7b-tensopolis-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-7b-tensopolis-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.752105524452896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414622323974015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42463541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42428523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json b/data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json deleted file mode 100644 index 5de02a18a..000000000 --- a/data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Qwen-2.5-14B/1762652580.563489", - "retrieved_timestamp": "1762652580.5634902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/ReWiz-Qwen-2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Qwen-2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 16.743 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27854647889821227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6179492756426455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45389583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json deleted file mode 100644 index bffd8f769..000000000 --- a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-1_5B/1762652580.565519", - "retrieved_timestamp": "1762652580.565519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2-1_5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2-1_5B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2985556102253133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3635491993150823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36339583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23570478723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json deleted file mode 100644 index 4503f46de..000000000 --- a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B-0917/1762652580.56599", - "retrieved_timestamp": "1762652580.565991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2-7B-0917", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37191983935956596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169215573786009 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4245345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json deleted file mode 100644 index d65b92346..000000000 --- a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B/1762652580.565779", - "retrieved_timestamp": "1762652580.56578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44376033369238066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4936629157238895 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40581249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3924534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json deleted file mode 100644 index 50446fd0f..000000000 --- a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/1762652580.5662022", - "retrieved_timestamp": "1762652580.5662029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35770644113175265 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184106116987492 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json b/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json deleted file mode 100644 index 93a67e13d..000000000 --- a/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/1762652580.584905", - "retrieved_timestamp": "1762652580.584906", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8045120280854798 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.639849930188539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5264350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json b/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json deleted file mode 100644 index 270b3aedd..000000000 --- a/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-Lumen-14B/1762652580.585356", - "retrieved_timestamp": "1762652580.585357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "v000000/Qwen2.5-Lumen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "v000000/Qwen2.5-Lumen-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8063604569209697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6390809511149668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49027593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json b/data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json deleted file mode 100644 index 835bc912a..000000000 --- a/data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_Qwen2.5-Coder-0.5B-Merged/1762652580.5902011", - "retrieved_timestamp": "1762652580.590202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/Qwen2.5-Coder-0.5B-Merged", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "vonjack/Qwen2.5-Coder-0.5B-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30997087727230416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3076017752057237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33034375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12017952127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json b/data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json deleted file mode 100644 index 0e3bb6800..000000000 --- a/data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/wave-on-discord_qwent-7b/1762652580.592784", - "retrieved_timestamp": "1762652580.592785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "wave-on-discord/qwent-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "wave-on-discord/qwent-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20148539209297997 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228103286118343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16032247340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json b/data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json deleted file mode 100644 index 3e39285e5..000000000 --- a/data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_EVA-Norns-Qwen2.5-v0.1/1762652580.594388", - "retrieved_timestamp": "1762652580.594388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/EVA-Norns-Qwen2.5-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "win10/EVA-Norns-Qwen2.5-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6219630580193884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507240838017382 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26132930513595165 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40451041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3425033244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json b/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json deleted file mode 100644 index 07a6b838a..000000000 --- a/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-12B/1762652580.594881", - "retrieved_timestamp": "1762652580.594882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/Norns-Qwen2.5-12B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "win10/Norns-Qwen2.5-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 12.277 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48969733640074997 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46189201103923744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3554895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2660405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json b/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json deleted file mode 100644 index 276548f8e..000000000 --- a/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-7B/1762652580.5950878", - "retrieved_timestamp": "1762652580.595089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/Norns-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "win10/Norns-Qwen2.5-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6122211288270678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5072887832228614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2628398791540785 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40847916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34133976063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json b/data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json deleted file mode 100644 index fdc5e8e36..000000000 --- a/data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/1762652580.596637", - "retrieved_timestamp": "1762652580.596638", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4436107306391486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45690468424066283 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47396875000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json deleted file mode 100644 index bc7489985..000000000 --- a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-DPO/1762652579.9821", - "retrieved_timestamp": "1762652579.982101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B-DPO", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8281925291559729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6146203626958501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4922604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4632646276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/ec773b66-24fd-4b6f-ac9c-ebcd355e4be7.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/ec773b66-24fd-4b6f-ac9c-ebcd355e4be7.json new file mode 100644 index 000000000..a5182fb56 --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/ec773b66-24fd-4b6f-ac9c-ebcd355e4be7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-70B-DPO", + "id": "allenai/Llama-3.1-Tulu-3-70B-DPO", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8282 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6146 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4494 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4923 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4633 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json deleted file mode 100644 index da71cb74d..000000000 --- a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-SFT/1762652579.982346", - "retrieved_timestamp": "1762652579.982346", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B-SFT", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5951437800580934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46243351063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/a70b8356-94ce-4f0d-b44a-2215076eed5e.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/a70b8356-94ce-4f0d-b44a-2215076eed5e.json new file mode 100644 index 000000000..1684989ec --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/a70b8356-94ce-4f0d-b44a-2215076eed5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-70B-SFT", + "id": "allenai/Llama-3.1-Tulu-3-70B-SFT", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/b182807d-587e-4702-bf30-dab11983b8db.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/b182807d-587e-4702-bf30-dab11983b8db.json new file mode 100644 index 000000000..80ba532bf --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/b182807d-587e-4702-bf30-dab11983b8db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-70B", + "id": "allenai/Llama-3.1-Tulu-3-70B", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8291 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4948 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/c1f0944a-c44c-42e9-90ba-a847509cbd66.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/c1f0944a-c44c-42e9-90ba-a847509cbd66.json new file mode 100644 index 000000000..2e028f4fd --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B/c1f0944a-c44c-42e9-90ba-a847509cbd66.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-70B", + "id": "allenai/Llama-3.1-Tulu-3-70B", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6157 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4988 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4656 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/64bb8530-7071-402e-ba9b-1d15ecbe275c.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/64bb8530-7071-402e-ba9b-1d15ecbe275c.json new file mode 100644 index 000000000..012b3e9c5 --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/64bb8530-7071-402e-ba9b-1d15ecbe275c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-8B-DPO", + "id": "allenai/Llama-3.1-Tulu-3-8B-DPO", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4079 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2898 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json deleted file mode 100644 index 14f0639ce..000000000 --- a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-DPO/1762652579.9829278", - "retrieved_timestamp": "1762652579.982929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B-DPO", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8029384255996312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079428557044153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.236404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2898105053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-RM/4f1fc265-f8b7-47e6-a9e6-cfa61b89ad4a.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-RM/4f1fc265-f8b7-47e6-a9e6-cfa61b89ad4a.json new file mode 100644 index 000000000..9397e2c60 --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-RM/4f1fc265-f8b7-47e6-a9e6-cfa61b89ad4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-RM/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-8B-RM", + "id": "allenai/Llama-3.1-Tulu-3-8B-RM", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForSequenceClassification", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.167 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.295 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1082 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/1420df5c-690e-4b01-b99c-c21c793689ae.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/1420df5c-690e-4b01-b99c-c21c793689ae.json new file mode 100644 index 000000000..f10b3e75a --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/1420df5c-690e-4b01-b99c-c21c793689ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-8B-SFT", + "id": "allenai/Llama-3.1-Tulu-3-8B-SFT", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json deleted file mode 100644 index c6286a231..000000000 --- a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-SFT/1762652579.983397", - "retrieved_timestamp": "1762652579.983398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B-SFT", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7403400754442657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871863270501647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28116688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/aa9d0b0e-cb3f-452e-bc85-f7cf172d2b8b.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/aa9d0b0e-cb3f-452e-bc85-f7cf172d2b8b.json new file mode 100644 index 000000000..32b0d8b0b --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/aa9d0b0e-cb3f-452e-bc85-f7cf172d2b8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-8B", + "id": "allenai/Llama-3.1-Tulu-3-8B", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8255 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4061 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/dfabd777-8620-40e3-b19c-a9227f57b638.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/dfabd777-8620-40e3-b19c-a9227f57b638.json new file mode 100644 index 000000000..fe1b7eb80 --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B/dfabd777-8620-40e3-b19c-a9227f57b638.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Tulu-3-8B", + "id": "allenai/Llama-3.1-Tulu-3-8B", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8267 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/08fe3877-ab04-426a-9e27-72ec4ff8ffc3.json b/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/08fe3877-ab04-426a-9e27-72ec4ff8ffc3.json new file mode 100644 index 000000000..6810a21ef --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/08fe3877-ab04-426a-9e27-72ec4ff8ffc3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-1.7-7B-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMo-1.7-7B-hf", + "id": "allenai/OLMo-1.7-7B-hf", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json b/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json deleted file mode 100644 index a1e7abb19..000000000 --- a/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-1.7-7B-hf/1762652579.9836009", - "retrieved_timestamp": "1762652579.9836018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMo-1.7-7B-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-1.7-7B-hf", - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568970332052288 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013695911207614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34748958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMo-1B-hf/4b264bb0-bd7e-4b15-9591-50b5a521f100.json b/data/hfopenllm_v2/allenai/OLMo-1B-hf/4b264bb0-bd7e-4b15-9591-50b5a521f100.json new file mode 100644 index 000000000..667c017f1 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-1B-hf/4b264bb0-bd7e-4b15-9591-50b5a521f100.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-1B-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMo-1B-hf", + "id": "allenai/OLMo-1B-hf", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoForCausalLM", + "params_billions": 1.177 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2182 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json b/data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json deleted file mode 100644 index eb14b3edf..000000000 --- a/data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-1B-hf/1762652579.983823", - "retrieved_timestamp": "1762652579.983823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMo-1B-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-1B-hf", - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoForCausalLM", - "params_billions": 1.177 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21819660722438686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30519468988429327 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11735372340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json b/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json deleted file mode 100644 index 2c7f01014..000000000 --- a/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-2-1124-7B-Instruct/1762652579.9840362", - "retrieved_timestamp": "1762652579.9840372", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMo-2-1124-7B-Instruct", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-2-1124-7B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Olmo2ForCausalLM", - "params_billions": 7.299 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7244034716773715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40223602474417786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35083333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2672041223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/a8cfe336-0c3e-401c-a1e9-d951e64918ec.json b/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/a8cfe336-0c3e-401c-a1e9-d951e64918ec.json new file mode 100644 index 000000000..a7c81df8a --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/a8cfe336-0c3e-401c-a1e9-d951e64918ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-2-1124-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMo-2-1124-7B-Instruct", + "id": "allenai/OLMo-2-1124-7B-Instruct", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Olmo2ForCausalLM", + "params_billions": 7.299 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7244 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3508 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/5e66c653-41b1-46de-b677-ffd8426ba5ec.json b/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/5e66c653-41b1-46de-b677-ffd8426ba5ec.json new file mode 100644 index 000000000..d0864d416 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/5e66c653-41b1-46de-b677-ffd8426ba5ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-Instruct-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMo-7B-Instruct-hf", + "id": "allenai/OLMo-7B-Instruct-hf", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3706 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1785 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json b/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json deleted file mode 100644 index 73a1d2bbd..000000000 --- a/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-Instruct-hf/1762652579.98445", - "retrieved_timestamp": "1762652579.984452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMo-7B-Instruct-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-7B-Instruct-hf", - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472652561869174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3706469866662716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37647916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17852393617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json b/data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json deleted file mode 100644 index 3337862b9..000000000 --- a/data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-hf/1762652579.984753", - "retrieved_timestamp": "1762652579.984753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMo-7B-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-7B-hf", - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoForCausalLM", - "params_billions": 6.888 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719273749207658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32791316587362274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMo-7B-hf/9f0f0914-1f7a-468e-8a2e-7ae122fd064d.json b/data/hfopenllm_v2/allenai/OLMo-7B-hf/9f0f0914-1f7a-468e-8a2e-7ae122fd064d.json new file mode 100644 index 000000000..f93b86dbe --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-7B-hf/9f0f0914-1f7a-468e-8a2e-7ae122fd064d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMo-7B-hf", + "id": "allenai/OLMo-7B-hf", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoForCausalLM", + "params_billions": 6.888 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3279 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json deleted file mode 100644 index 930095640..000000000 --- a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0125-Instruct/1762652579.984983", - "retrieved_timestamp": "1762652579.984983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMoE-1B-7B-0125-Instruct", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMoE-1B-7B-0125-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6757436934001781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38245348916008676 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3635833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19148936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/cc64a143-4f1e-42ee-ade1-fafc4b316336.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/cc64a143-4f1e-42ee-ade1-fafc4b316336.json new file mode 100644 index 000000000..d3d1048ff --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/cc64a143-4f1e-42ee-ade1-fafc4b316336.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0125-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMoE-1B-7B-0125-Instruct", + "id": "allenai/OLMoE-1B-7B-0125-Instruct", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json deleted file mode 100644 index d78fe56cc..000000000 --- a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924-Instruct/1762652579.98542", - "retrieved_timestamp": "1762652579.98542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMoE-1B-7B-0924-Instruct", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMoE-1B-7B-0924-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4667415790103592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901610626816106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3848229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18758311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/cf322e64-2682-4a9a-a48f-c4ec47b852f2.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/cf322e64-2682-4a9a-a48f-c4ec47b852f2.json new file mode 100644 index 000000000..9edb56ae9 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/cf322e64-2682-4a9a-a48f-c4ec47b852f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMoE-1B-7B-0924-Instruct", + "id": "allenai/OLMoE-1B-7B-0924-Instruct", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3902 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1876 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/30b32261-b24a-49e3-ba57-172dc1d03ba0.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/30b32261-b24a-49e3-ba57-172dc1d03ba0.json new file mode 100644 index 000000000..9a8c2b352 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/30b32261-b24a-49e3-ba57-172dc1d03ba0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OLMoE-1B-7B-0924", + "id": "allenai/OLMoE-1B-7B-0924", + "developer": "allenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2185 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json deleted file mode 100644 index 1cdd2d164..000000000 --- a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924/1762652579.985209", - "retrieved_timestamp": "1762652579.9852102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/OLMoE-1B-7B-0924", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMoE-1B-7B-0924", - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21847143357402804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393437931177341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34879166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1739527925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/0681c01d-23f3-4b8b-9516-a5cc41761fc4.json b/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/0681c01d-23f3-4b8b-9516-a5cc41761fc4.json new file mode 100644 index 000000000..159a92fad --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/0681c01d-23f3-4b8b-9516-a5cc41761fc4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Chocolatine-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-24B", + "id": "allknowingroger/Chocolatine-24B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 24.184 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1958 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6191 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json b/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json deleted file mode 100644 index b503f7835..000000000 --- a/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Chocolatine-24B/1762652579.9856288", - "retrieved_timestamp": "1762652579.98563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Chocolatine-24B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Chocolatine-24B", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 24.184 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19581488229010136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6191260063262436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4566156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-2.6B/7693ed8a-f76d-482b-92c1-f11810e522ca.json b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-2.6B/7693ed8a-f76d-482b-92c1-f11810e522ca.json new file mode 100644 index 000000000..7d68b1a32 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-2.6B/7693ed8a-f76d-482b-92c1-f11810e522ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-2.6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Slerp1-2.6B", + "id": "allknowingroger/Gemma2Slerp1-2.6B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2689 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-27B/f8dc0128-c606-490a-b965-59d5377dd778.json b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-27B/f8dc0128-c606-490a-b965-59d5377dd778.json new file mode 100644 index 000000000..8516f0711 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp1-27B/f8dc0128-c606-490a-b965-59d5377dd778.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Slerp1-27B", + "id": "allknowingroger/Gemma2Slerp1-27B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7186 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2583 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-2.6B/844547f7-658f-41dd-ab4c-dc0569030e59.json b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-2.6B/844547f7-658f-41dd-ab4c-dc0569030e59.json new file mode 100644 index 000000000..58ea93670 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-2.6B/844547f7-658f-41dd-ab4c-dc0569030e59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-2.6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Slerp2-2.6B", + "id": "allknowingroger/Gemma2Slerp2-2.6B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5747 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4468 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2696 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-27B/75c291b5-6d60-4bde-8621-f865196a6ecc.json b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-27B/75c291b5-6d60-4bde-8621-f865196a6ecc.json new file mode 100644 index 000000000..921a675ca --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp2-27B/75c291b5-6d60-4bde-8621-f865196a6ecc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Slerp2-27B", + "id": "allknowingroger/Gemma2Slerp2-27B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7546 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2787 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4623 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Gemma2Slerp3-27B/36d54b12-594f-47fe-9637-a9b740416c5c.json b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp3-27B/36d54b12-594f-47fe-9637-a9b740416c5c.json new file mode 100644 index 000000000..59baa2f21 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp3-27B/36d54b12-594f-47fe-9637-a9b740416c5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp3-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Slerp3-27B", + "id": "allknowingroger/Gemma2Slerp3-27B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7426 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.65 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2742 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Gemma2Slerp4-27B/57733383-9573-463d-a467-068d2685014c.json b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp4-27B/57733383-9573-463d-a467-068d2685014c.json new file mode 100644 index 000000000..5c534391b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Gemma2Slerp4-27B/57733383-9573-463d-a467-068d2685014c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp4-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Slerp4-27B", + "id": "allknowingroger/Gemma2Slerp4-27B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.653 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/GemmaSlerp-9B/eda1ac9a-98e1-496f-bdeb-1e256b52c14a.json b/data/hfopenllm_v2/allknowingroger/GemmaSlerp-9B/eda1ac9a-98e1-496f-bdeb-1e256b52c14a.json new file mode 100644 index 000000000..edc562705 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/GemmaSlerp-9B/eda1ac9a-98e1-496f-bdeb-1e256b52c14a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GemmaSlerp-9B", + "id": "allknowingroger/GemmaSlerp-9B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5921 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/GemmaSlerp2-9B/00b8bfda-c6b1-4e1f-b68c-bff7335e2dff.json b/data/hfopenllm_v2/allknowingroger/GemmaSlerp2-9B/00b8bfda-c6b1-4e1f-b68c-bff7335e2dff.json new file mode 100644 index 000000000..126811a1c --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/GemmaSlerp2-9B/00b8bfda-c6b1-4e1f-b68c-bff7335e2dff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GemmaSlerp2-9B", + "id": "allknowingroger/GemmaSlerp2-9B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/GemmaSlerp4-10B/0a3b9ad6-b853-471d-a292-413b30273034.json b/data/hfopenllm_v2/allknowingroger/GemmaSlerp4-10B/0a3b9ad6-b853-471d-a292-413b30273034.json new file mode 100644 index 000000000..4749c39dc --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/GemmaSlerp4-10B/0a3b9ad6-b853-471d-a292-413b30273034.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp4-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GemmaSlerp4-10B", + "id": "allknowingroger/GemmaSlerp4-10B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.454 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/GemmaSlerp5-10B/d61c3ace-e353-4c0b-9472-c9a1928809cc.json b/data/hfopenllm_v2/allknowingroger/GemmaSlerp5-10B/d61c3ace-e353-4c0b-9472-c9a1928809cc.json new file mode 100644 index 000000000..16732735f --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/GemmaSlerp5-10B/d61c3ace-e353-4c0b-9472-c9a1928809cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp5-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GemmaSlerp5-10B", + "id": "allknowingroger/GemmaSlerp5-10B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7353 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4608 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/GemmaStock1-27B/2293a19a-b650-436d-9448-1b641e63d407.json b/data/hfopenllm_v2/allknowingroger/GemmaStock1-27B/2293a19a-b650-436d-9448-1b641e63d407.json new file mode 100644 index 000000000..c77b2f811 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/GemmaStock1-27B/2293a19a-b650-436d-9448-1b641e63d407.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaStock1-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GemmaStock1-27B", + "id": "allknowingroger/GemmaStock1-27B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6566 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2636 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json deleted file mode 100644 index ce35c5d7f..000000000 --- a/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp1-7B/1762652579.988248", - "retrieved_timestamp": "1762652579.988249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp1-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp1-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46212050692163464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551818027489446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43585416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503823138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/c15b977c-c781-4b17-ac9f-25c77602c875.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/c15b977c-c781-4b17-ac9f-25c77602c875.json new file mode 100644 index 000000000..a693fa309 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/c15b977c-c781-4b17-ac9f-25c77602c875.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HomerSlerp1-7B", + "id": "allknowingroger/HomerSlerp1-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4359 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/42c191be-c0ae-4170-8b6f-565053ae7d9c.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/42c191be-c0ae-4170-8b6f-565053ae7d9c.json new file mode 100644 index 000000000..a4cdc4f0b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/42c191be-c0ae-4170-8b6f-565053ae7d9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HomerSlerp2-7B", + "id": "allknowingroger/HomerSlerp2-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5649 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2968 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json deleted file mode 100644 index d5e017f71..000000000 --- a/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp2-7B/1762652579.988459", - "retrieved_timestamp": "1762652579.98846", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp2-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44868172005833407 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648943315947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43557291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45146276595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json deleted file mode 100644 index cc5162020..000000000 --- a/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp3-7B/1762652579.988729", - "retrieved_timestamp": "1762652579.9887302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp3-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp3-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362668829815999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5598063466560873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021148036253776 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44617708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/f5cb910d-6e5b-404a-a751-d5cb90668150.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/f5cb910d-6e5b-404a-a751-d5cb90668150.json new file mode 100644 index 000000000..776bb888f --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/f5cb910d-6e5b-404a-a751-d5cb90668150.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp3-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HomerSlerp3-7B", + "id": "allknowingroger/HomerSlerp3-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5598 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3021 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4535 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json deleted file mode 100644 index 3d15272a0..000000000 --- a/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp4-7B/1762652579.988936", - "retrieved_timestamp": "1762652579.988937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp4-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp4-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43741605606457534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5570767234678723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44722406914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/de806e4c-dbf8-48cc-a0d8-033a61dfc777.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/de806e4c-dbf8-48cc-a0d8-033a61dfc777.json new file mode 100644 index 000000000..304635237 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/de806e4c-dbf8-48cc-a0d8-033a61dfc777.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp4-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HomerSlerp4-7B", + "id": "allknowingroger/HomerSlerp4-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5571 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/59150b73-b05a-451e-ba3f-696d04effe05.json b/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/59150b73-b05a-451e-ba3f-696d04effe05.json new file mode 100644 index 000000000..992931ce3 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/59150b73-b05a-451e-ba3f-696d04effe05.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_LimyQstar-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LimyQstar-7B-slerp", + "id": "allknowingroger/LimyQstar-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3491 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json b/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json deleted file mode 100644 index 6d5e3c149..000000000 --- a/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_LimyQstar-7B-slerp/1762652579.98914", - "retrieved_timestamp": "1762652579.989141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/LimyQstar-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/LimyQstar-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34911368502240725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5023559424245442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4146458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103390957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Llama3.1-60B/84926b81-360a-480c-b240-f154ec7fe0ba.json b/data/hfopenllm_v2/allknowingroger/Llama3.1-60B/84926b81-360a-480c-b240-f154ec7fe0ba.json new file mode 100644 index 000000000..37c1e779c --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Llama3.1-60B/84926b81-360a-480c-b240-f154ec7fe0ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Llama3.1-60B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-60B", + "id": "allknowingroger/Llama3.1-60B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 61.997 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1815 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3242 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3596 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json b/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json deleted file mode 100644 index e18c6bb59..000000000 --- a/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Marco-01-slerp1-7B/1762652579.989768", - "retrieved_timestamp": "1762652579.98977", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Marco-01-slerp1-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Marco-01-slerp1-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46811571075856506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540943469864194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4451875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44830452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/8e6edb04-302b-4dfc-b38f-94b437c921a8.json b/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/8e6edb04-302b-4dfc-b38f-94b437c921a8.json new file mode 100644 index 000000000..d13ebcc1a --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/8e6edb04-302b-4dfc-b38f-94b437c921a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Marco-01-slerp1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marco-01-slerp1-7B", + "id": "allknowingroger/Marco-01-slerp1-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4681 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5541 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4483 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json b/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json deleted file mode 100644 index 27e2ceb97..000000000 --- a/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Meme-7B-slerp/1762652579.9900281", - "retrieved_timestamp": "1762652579.990029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Meme-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Meme-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163754393897082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4660944195552204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.281000664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/db92c564-1cf9-43db-9e25-1f450c7b1e7f.json b/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/db92c564-1cf9-43db-9e25-1f450c7b1e7f.json new file mode 100644 index 000000000..2747ba0f5 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/db92c564-1cf9-43db-9e25-1f450c7b1e7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Meme-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meme-7B-slerp", + "id": "allknowingroger/Meme-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5164 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4661 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4223 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/e3796243-cbba-4ec2-ad7c-89547ad24342.json b/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/e3796243-cbba-4ec2-ad7c-89547ad24342.json new file mode 100644 index 000000000..ae54b512b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/e3796243-cbba-4ec2-ad7c-89547ad24342.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ministral-8B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ministral-8B-slerp", + "id": "allknowingroger/Ministral-8B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4285 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json b/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json deleted file mode 100644 index 7becb70f9..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ministral-8B-slerp/1762652579.990243", - "retrieved_timestamp": "1762652579.9902442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ministral-8B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ministral-8B-slerp", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19608970863974257 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686018544963986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42853125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119182180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MistralPhi3-11B/1479be90-df8f-4e1d-b9db-03e84000187a.json b/data/hfopenllm_v2/allknowingroger/MistralPhi3-11B/1479be90-df8f-4e1d-b9db-03e84000187a.json new file mode 100644 index 000000000..97d6fd6ec --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MistralPhi3-11B/1479be90-df8f-4e1d-b9db-03e84000187a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MistralPhi3-11B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MistralPhi3-11B", + "id": "allknowingroger/MistralPhi3-11B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.234 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1943 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4267 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Mistralmash1-7B-s/d2e6c48c-1c18-45a6-ba1a-b335325c980c.json b/data/hfopenllm_v2/allknowingroger/Mistralmash1-7B-s/d2e6c48c-1c18-45a6-ba1a-b335325c980c.json new file mode 100644 index 000000000..c3b192913 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Mistralmash1-7B-s/d2e6c48c-1c18-45a6-ba1a-b335325c980c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash1-7B-s/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistralmash1-7B-s", + "id": "allknowingroger/Mistralmash1-7B-s", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4267 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3293 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Mistralmash2-7B-s/f843e45a-f66b-4091-a964-75583c2d7fc5.json b/data/hfopenllm_v2/allknowingroger/Mistralmash2-7B-s/f843e45a-f66b-4091-a964-75583c2d7fc5.json new file mode 100644 index 000000000..5d5a5cc33 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Mistralmash2-7B-s/f843e45a-f66b-4091-a964-75583c2d7fc5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash2-7B-s/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistralmash2-7B-s", + "id": "allknowingroger/Mistralmash2-7B-s", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4102 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json b/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json deleted file mode 100644 index f94f6ce65..000000000 --- a/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MixTAO-19B-pass/1762652579.991234", - "retrieved_timestamp": "1762652579.991235", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MixTAO-19B-pass", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MixTAO-19B-pass", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 19.188 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814368098866563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128248798224987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47827083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31050531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/cbc3cd41-e187-4c4f-b207-37bceab423a4.json b/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/cbc3cd41-e187-4c4f-b207-37bceab423a4.json new file mode 100644 index 000000000..860bc85e9 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/cbc3cd41-e187-4c4f-b207-37bceab423a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MixTAO-19B-pass/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MixTAO-19B-pass", + "id": "allknowingroger/MixTAO-19B-pass", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 19.188 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5128 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4783 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json b/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json deleted file mode 100644 index 699bfc1a7..000000000 --- a/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MixTaoTruthful-13B-slerp/1762652579.991453", - "retrieved_timestamp": "1762652579.991454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MixTaoTruthful-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MixTaoTruthful-13B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41388515804731446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207335343585151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42924999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/0f124566-5e94-4233-9a3f-5ff9cfdf160c.json b/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/0f124566-5e94-4233-9a3f-5ff9cfdf160c.json new file mode 100644 index 000000000..68ee45cec --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/0f124566-5e94-4233-9a3f-5ff9cfdf160c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MixTaoTruthful-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MixTaoTruthful-13B-slerp", + "id": "allknowingroger/MixTaoTruthful-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4292 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json b/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json deleted file mode 100644 index 2a2d62ce4..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiCalm-7B-slerp/1762652579.991671", - "retrieved_timestamp": "1762652579.991672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiCalm-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiCalm-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926526061960044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5121891599770304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43194791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3032746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/98fabba8-7d70-4a1f-b03c-37e1a9ac94e8.json b/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/98fabba8-7d70-4a1f-b03c-37e1a9ac94e8.json new file mode 100644 index 000000000..717e85ceb --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/98fabba8-7d70-4a1f-b03c-37e1a9ac94e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiCalm-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiCalm-7B-slerp", + "id": "allknowingroger/MultiCalm-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5122 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3033 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/91522dad-529b-477c-8372-793f631e14b7.json b/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/91522dad-529b-477c-8372-793f631e14b7.json new file mode 100644 index 000000000..7be2befa5 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/91522dad-529b-477c-8372-793f631e14b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash-12B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash-12B-slerp", + "id": "allknowingroger/MultiMash-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3974 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json b/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json deleted file mode 100644 index fc50a0bd0..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash-12B-slerp/1762652579.991891", - "retrieved_timestamp": "1762652579.9918919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash-12B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39744876926554873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141827379810838 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44379166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json b/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json deleted file mode 100644 index a337ae164..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash10-13B-slerp/1762652579.992115", - "retrieved_timestamp": "1762652579.992116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash10-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash10-13B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41628323958208663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186335995744094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43179166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3116688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/cec22734-493c-4d11-ba86-6c7ae2005124.json b/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/cec22734-493c-4d11-ba86-6c7ae2005124.json new file mode 100644 index 000000000..400c2095d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/cec22734-493c-4d11-ba86-6c7ae2005124.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash10-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash10-13B-slerp", + "id": "allknowingroger/MultiMash10-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5186 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4318 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json b/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json deleted file mode 100644 index 501367384..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash11-13B-slerp/1762652579.992343", - "retrieved_timestamp": "1762652579.9923441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash11-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash11-13B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4251009543566625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193864686484946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30851063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/704a6e19-0d86-42a5-b8f5-05a5856e9c29.json b/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/704a6e19-0d86-42a5-b8f5-05a5856e9c29.json new file mode 100644 index 000000000..d47ffa3a1 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/704a6e19-0d86-42a5-b8f5-05a5856e9c29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash11-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash11-13B-slerp", + "id": "allknowingroger/MultiMash11-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3085 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json b/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json deleted file mode 100644 index 3826780df..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash2-12B-slerp/1762652579.992556", - "retrieved_timestamp": "1762652579.992556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash2-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash2-12B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42607503645881817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5133973498532299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/bc54349d-59e0-4ae4-94f9-3f5ae98261f4.json b/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/bc54349d-59e0-4ae4-94f9-3f5ae98261f4.json new file mode 100644 index 000000000..ee50b15c0 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/bc54349d-59e0-4ae4-94f9-3f5ae98261f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash2-12B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash2-12B-slerp", + "id": "allknowingroger/MultiMash2-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4261 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5134 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3043 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/d20d533a-758b-477c-b4eb-073adaed640e.json b/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/d20d533a-758b-477c-b4eb-073adaed640e.json new file mode 100644 index 000000000..195752d1b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/d20d533a-758b-477c-b4eb-073adaed640e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash5-12B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash5-12B-slerp", + "id": "allknowingroger/MultiMash5-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4142 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5145 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3028 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json b/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json deleted file mode 100644 index 3efca05cd..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash5-12B-slerp/1762652579.992772", - "retrieved_timestamp": "1762652579.992772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash5-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash5-12B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41415998439695567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5144534995858502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30277593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json b/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json deleted file mode 100644 index e638877e1..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash6-12B-slerp/1762652579.992992", - "retrieved_timestamp": "1762652579.992993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash6-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash6-12B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43004672047943904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195916915718951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30909242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/f7c9ad0d-3fea-4bec-8ac3-46f01a3449fb.json b/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/f7c9ad0d-3fea-4bec-8ac3-46f01a3449fb.json new file mode 100644 index 000000000..a7902f000 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/f7c9ad0d-3fea-4bec-8ac3-46f01a3449fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash6-12B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash6-12B-slerp", + "id": "allknowingroger/MultiMash6-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.43 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4306 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3091 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json b/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json deleted file mode 100644 index 194f9e3f1..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash7-12B-slerp/1762652579.993205", - "retrieved_timestamp": "1762652579.993206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash7-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash7-12B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42127887338927383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111135397195524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42794791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029421542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/9db1f823-e068-4a39-a5cc-b9c588099427.json b/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/9db1f823-e068-4a39-a5cc-b9c588099427.json new file mode 100644 index 000000000..7142478be --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/9db1f823-e068-4a39-a5cc-b9c588099427.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash7-12B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash7-12B-slerp", + "id": "allknowingroger/MultiMash7-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4213 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/23818b45-bf5f-48a2-982f-1e2a0d35aac8.json b/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/23818b45-bf5f-48a2-982f-1e2a0d35aac8.json new file mode 100644 index 000000000..55e001fb8 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/23818b45-bf5f-48a2-982f-1e2a0d35aac8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash8-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash8-13B-slerp", + "id": "allknowingroger/MultiMash8-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json b/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json deleted file mode 100644 index 1d29c9eb8..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash8-13B-slerp/1762652579.9938078", - "retrieved_timestamp": "1762652579.99381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash8-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash8-13B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4320702402957486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5178483059643324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4423958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json b/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json deleted file mode 100644 index 9143f17a6..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash9-13B-slerp/1762652579.994061", - "retrieved_timestamp": "1762652579.994061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMash9-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash9-13B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187810564856802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193579939678727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/de6eda66-b8f5-4b23-89e1-44bbac600953.json b/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/de6eda66-b8f5-4b23-89e1-44bbac600953.json new file mode 100644 index 000000000..b3fb05f58 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/de6eda66-b8f5-4b23-89e1-44bbac600953.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash9-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMash9-13B-slerp", + "id": "allknowingroger/MultiMash9-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/632974c2-57e2-41f9-8c00-671e07e7594b.json b/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/632974c2-57e2-41f9-8c00-671e07e7594b.json new file mode 100644 index 000000000..a78094980 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/632974c2-57e2-41f9-8c00-671e07e7594b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMerge-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiMerge-7B-slerp", + "id": "allknowingroger/MultiMerge-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3948 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json b/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json deleted file mode 100644 index e7a488b59..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMerge-7B-slerp/1762652579.994297", - "retrieved_timestamp": "1762652579.994299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiMerge-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMerge-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3947758613811354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140224933103638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42797916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json b/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json deleted file mode 100644 index 65eef6d4c..000000000 --- a/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Multimash3-12B-slerp/1762652579.994557", - "retrieved_timestamp": "1762652579.994557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Multimash3-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Multimash3-12B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44371046600796993 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176624678276028 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/e86dcf4f-6282-4aa6-b645-00f93a2e9077.json b/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/e86dcf4f-6282-4aa6-b645-00f93a2e9077.json new file mode 100644 index 000000000..67777cbfb --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/e86dcf4f-6282-4aa6-b645-00f93a2e9077.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Multimash3-12B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Multimash3-12B-slerp", + "id": "allknowingroger/Multimash3-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4437 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4344 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json b/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json deleted file mode 100644 index f429285ea..000000000 --- a/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Multimerge-19B-pass/1762652579.9948218", - "retrieved_timestamp": "1762652579.994823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Multimerge-19B-pass", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Multimerge-19B-pass", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 19.188 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17730510600761534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2891778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/b20be5c9-9720-4076-b587-728549dd19af.json b/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/b20be5c9-9720-4076-b587-728549dd19af.json new file mode 100644 index 000000000..1204ac4a5 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/b20be5c9-9720-4076-b587-728549dd19af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Multimerge-19B-pass/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Multimerge-19B-pass", + "id": "allknowingroger/Multimerge-19B-pass", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 19.188 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1773 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2892 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json b/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json deleted file mode 100644 index 4057ffb44..000000000 --- a/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiverseEx26-7B-slerp/1762652579.995038", - "retrieved_timestamp": "1762652579.995039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MultiverseEx26-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiverseEx26-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3938516469633905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5133591871690678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3035239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/5e193803-39d1-4f12-8726-ebbe5f71563c.json b/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/5e193803-39d1-4f12-8726-ebbe5f71563c.json new file mode 100644 index 000000000..b5df1f90d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/5e193803-39d1-4f12-8726-ebbe5f71563c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiverseEx26-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultiverseEx26-7B-slerp", + "id": "allknowingroger/MultiverseEx26-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5134 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3035 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/61131a6c-f412-42bf-814b-7d711a840d44.json b/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/61131a6c-f412-42bf-814b-7d711a840d44.json new file mode 100644 index 000000000..b88992a9a --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/61131a6c-f412-42bf-814b-7d711a840d44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_NeuralWestSeverus-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralWestSeverus-7B-slerp", + "id": "allknowingroger/NeuralWestSeverus-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json b/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json deleted file mode 100644 index c9cbf050b..000000000 --- a/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_NeuralWestSeverus-7B-slerp/1762652579.995253", - "retrieved_timestamp": "1762652579.995254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/NeuralWestSeverus-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/NeuralWestSeverus-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41356046401326263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244283854305991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45287499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/535e72b1-17e0-40e3-9d66-d31f8ec70413.json b/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/535e72b1-17e0-40e3-9d66-d31f8ec70413.json new file mode 100644 index 000000000..035e133f8 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/535e72b1-17e0-40e3-9d66-d31f8ec70413.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralcoven-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neuralcoven-7B-slerp", + "id": "allknowingroger/Neuralcoven-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5303 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json b/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json deleted file mode 100644 index dd5c955ac..000000000 --- a/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralcoven-7B-slerp/1762652579.995681", - "retrieved_timestamp": "1762652579.995682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Neuralcoven-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Neuralcoven-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3858584112377381 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530287217712165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3293716755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json b/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json deleted file mode 100644 index 79752d18c..000000000 --- a/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralmultiverse-7B-slerp/1762652579.995954", - "retrieved_timestamp": "1762652579.995955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Neuralmultiverse-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Neuralmultiverse-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3769154731667531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5165722210470375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30418882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/ea15479e-24a8-4924-a754-a8567c511e61.json b/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/ea15479e-24a8-4924-a754-a8567c511e61.json new file mode 100644 index 000000000..534b2a6eb --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/ea15479e-24a8-4924-a754-a8567c511e61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralmultiverse-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neuralmultiverse-7B-slerp", + "id": "allknowingroger/Neuralmultiverse-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3042 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/5799f285-c61f-43a8-a6a6-053808cf4e8f.json b/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/5799f285-c61f-43a8-a6a6-053808cf4e8f.json new file mode 100644 index 000000000..0a78188b8 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/5799f285-c61f-43a8-a6a6-053808cf4e8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3della5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3della5-14B", + "id": "allknowingroger/Ph3della5-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1767 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4787 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json b/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json deleted file mode 100644 index f667672ce..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3della5-14B/1762652579.9961941", - "retrieved_timestamp": "1762652579.996195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3della5-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3della5-14B", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47985567183960776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6331746353794991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4386145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4787234042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/36feef44-3d3b-4102-8606-ee6420bddcff.json b/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/36feef44-3d3b-4102-8606-ee6420bddcff.json new file mode 100644 index 000000000..9d397a68f --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/36feef44-3d3b-4102-8606-ee6420bddcff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3merge-14B", + "id": "allknowingroger/Ph3merge-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.619 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6381 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json b/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json deleted file mode 100644 index 45a0815c9..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge-14B/1762652579.996419", - "retrieved_timestamp": "1762652579.9964201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3merge-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3merge-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.619 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27012881376968667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.638087568868341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4334375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4611037234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json b/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json deleted file mode 100644 index bf8210d18..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge2-14B/1762652579.996639", - "retrieved_timestamp": "1762652579.99664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3merge2-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3merge2-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.619 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17061064641817045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606937444321621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1722905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/fd55f19a-2c22-4f29-82e0-15b02f25b9a9.json b/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/fd55f19a-2c22-4f29-82e0-15b02f25b9a9.json new file mode 100644 index 000000000..3229ae0a1 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/fd55f19a-2c22-4f29-82e0-15b02f25b9a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge2-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3merge2-14B", + "id": "allknowingroger/Ph3merge2-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.619 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1706 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/18e5decd-c95e-43d2-9ba2-007ba32e216f.json b/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/18e5decd-c95e-43d2-9ba2-007ba32e216f.json new file mode 100644 index 000000000..56d85e1a4 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/18e5decd-c95e-43d2-9ba2-007ba32e216f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge3-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3merge3-14B", + "id": "allknowingroger/Ph3merge3-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.619 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1645 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3597 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4082 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json b/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json deleted file mode 100644 index 8374d16f8..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge3-14B/1762652579.99685", - "retrieved_timestamp": "1762652579.996851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3merge3-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3merge3-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.619 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1645157072124186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597431731140411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40819791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16472739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json b/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json deleted file mode 100644 index 7388751df..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task1-14B/1762652579.997059", - "retrieved_timestamp": "1762652579.99706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3task1-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3task1-14B", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46946435457918323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.63178060736657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45077083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734042553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/85a4996e-8c44-4e4f-9478-19a8c5513617.json b/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/85a4996e-8c44-4e4f-9478-19a8c5513617.json new file mode 100644 index 000000000..7b39272a0 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/85a4996e-8c44-4e4f-9478-19a8c5513617.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task1-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3task1-14B", + "id": "allknowingroger/Ph3task1-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4508 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json b/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json deleted file mode 100644 index ee90bdacb..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task2-14B/1762652579.99728", - "retrieved_timestamp": "1762652579.997281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3task2-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3task2-14B", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4713127834146731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098412220695854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4535 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44597739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/db6d57c8-df0b-407e-b937-67c55b513a5f.json b/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/db6d57c8-df0b-407e-b937-67c55b513a5f.json new file mode 100644 index 000000000..32356567b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/db6d57c8-df0b-407e-b937-67c55b513a5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task2-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3task2-14B", + "id": "allknowingroger/Ph3task2-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4713 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4535 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/89ac933d-0a7c-40e6-8fa7-35bb6205e44b.json b/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/89ac933d-0a7c-40e6-8fa7-35bb6205e44b.json new file mode 100644 index 000000000..817fe3f3d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/89ac933d-0a7c-40e6-8fa7-35bb6205e44b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task3-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3task3-14B", + "id": "allknowingroger/Ph3task3-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4771 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json b/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json deleted file mode 100644 index 420fd88b6..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task3-14B/1762652579.997498", - "retrieved_timestamp": "1762652579.997499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3task3-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3task3-14B", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962421929369628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6297915743094921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44255208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47706117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json b/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json deleted file mode 100644 index 7e96754c1..000000000 --- a/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3unsloth-3B-slerp/1762652579.99772", - "retrieved_timestamp": "1762652579.99772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Ph3unsloth-3B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3unsloth-3B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18944511673470835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468077356147099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45278124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700964095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/c79e690f-3e09-4fac-9412-937a3b7ef352.json b/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/c79e690f-3e09-4fac-9412-937a3b7ef352.json new file mode 100644 index 000000000..ba7ad3c78 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/c79e690f-3e09-4fac-9412-937a3b7ef352.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3unsloth-3B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ph3unsloth-3B-slerp", + "id": "allknowingroger/Ph3unsloth-3B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1894 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Phi3mash1-17B-pass/ce74b7e3-8505-4c79-a7de-12d1e6b47155.json b/data/hfopenllm_v2/allknowingroger/Phi3mash1-17B-pass/ce74b7e3-8505-4c79-a7de-12d1e6b47155.json new file mode 100644 index 000000000..fb008b871 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Phi3mash1-17B-pass/ce74b7e3-8505-4c79-a7de-12d1e6b47155.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Phi3mash1-17B-pass/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi3mash1-17B-pass", + "id": "allknowingroger/Phi3mash1-17B-pass", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 16.687 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1884 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6129 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4451 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Quen2-65B/3c562d8a-2df9-4d3f-9699-bfaee4a1ce2b.json b/data/hfopenllm_v2/allknowingroger/Quen2-65B/3c562d8a-2df9-4d3f-9699-bfaee4a1ce2b.json new file mode 100644 index 000000000..bd16b28e4 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Quen2-65B/3c562d8a-2df9-4d3f-9699-bfaee4a1ce2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Quen2-65B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quen2-65B", + "id": "allknowingroger/Quen2-65B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 63.923 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1758 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2757 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json b/data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json deleted file mode 100644 index 179f9d5f9..000000000 --- a/data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Quen2-65B/1762652579.9981499", - "retrieved_timestamp": "1762652579.9981499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Quen2-65B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Quen2-65B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 63.923 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17578137120617737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27565161872324456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11136968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-42B-AGI/152b0cbe-e27b-4438-8326-e67f4e70e600.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-42B-AGI/152b0cbe-e27b-4438-8326-e67f4e70e600.json new file mode 100644 index 000000000..a56e8fc53 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-42B-AGI/152b0cbe-e27b-4438-8326-e67f4e70e600.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-42B-AGI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-42B-AGI", + "id": "allknowingroger/Qwen2.5-42B-AGI", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 42.516 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1913 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2942 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task2/c733c91f-79a9-49e5-9398-3a424ee1940a.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task2/c733c91f-79a9-49e5-9398-3a424ee1940a.json new file mode 100644 index 000000000..c7409a22c --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task2/c733c91f-79a9-49e5-9398-3a424ee1940a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-task2", + "id": "allknowingroger/Qwen2.5-7B-task2", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.437 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task3/32d7b6c6-de5c-4864-a446-97dccce378c5.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task3/32d7b6c6-de5c-4864-a446-97dccce378c5.json new file mode 100644 index 000000000..0200a240d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task3/32d7b6c6-de5c-4864-a446-97dccce378c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-task3", + "id": "allknowingroger/Qwen2.5-7B-task3", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5129 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2606 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4501 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task4/7b22d02b-5bfd-4243-9ad9-c858d0af55a6.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task4/7b22d02b-5bfd-4243-9ad9-c858d0af55a6.json new file mode 100644 index 000000000..fcd61a787 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task4/7b22d02b-5bfd-4243-9ad9-c858d0af55a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-task4", + "id": "allknowingroger/Qwen2.5-7B-task4", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5005 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5583 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task7/99650529-55d9-42b0-b812-761a30277e5e.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task7/99650529-55d9-42b0-b812-761a30277e5e.json new file mode 100644 index 000000000..3af393c86 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task7/99650529-55d9-42b0-b812-761a30277e5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-task7", + "id": "allknowingroger/Qwen2.5-7B-task7", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5552 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task8/81abbc2a-791b-4a39-bb46-97edfa14b9c0.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task8/81abbc2a-791b-4a39-bb46-97edfa14b9c0.json new file mode 100644 index 000000000..fcd83a7a4 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-7B-task8/81abbc2a-791b-4a39-bb46-97edfa14b9c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-task8", + "id": "allknowingroger/Qwen2.5-7B-task8", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwen2.5-slerp-14B/c658e535-7098-40fc-bea0-f5734d8f4ca9.json b/data/hfopenllm_v2/allknowingroger/Qwen2.5-slerp-14B/c658e535-7098-40fc-bea0-f5734d8f4ca9.json new file mode 100644 index 000000000..9b625885c --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwen2.5-slerp-14B/c658e535-7098-40fc-bea0-f5734d8f4ca9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-slerp-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-slerp-14B", + "id": "allknowingroger/Qwen2.5-slerp-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4928 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4744 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenSlerp12-7B/9e0656e9-9b82-4f6d-b00a-c09cf9cbc105.json b/data/hfopenllm_v2/allknowingroger/QwenSlerp12-7B/9e0656e9-9b82-4f6d-b00a-c09cf9cbc105.json new file mode 100644 index 000000000..e6896f1a4 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenSlerp12-7B/9e0656e9-9b82-4f6d-b00a-c09cf9cbc105.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp12-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp12-7B", + "id": "allknowingroger/QwenSlerp12-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5556 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4461 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenSlerp4-14B/07c36058-e0e8-48ea-85f3-0a2cb2fe3443.json b/data/hfopenllm_v2/allknowingroger/QwenSlerp4-14B/07c36058-e0e8-48ea-85f3-0a2cb2fe3443.json new file mode 100644 index 000000000..38cf85407 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenSlerp4-14B/07c36058-e0e8-48ea-85f3-0a2cb2fe3443.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp4-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp4-14B", + "id": "allknowingroger/QwenSlerp4-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6483 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5436 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenSlerp5-14B/c41d8925-b56b-458e-b1a9-27dbbcaee149.json b/data/hfopenllm_v2/allknowingroger/QwenSlerp5-14B/c41d8925-b56b-458e-b1a9-27dbbcaee149.json new file mode 100644 index 000000000..615bfa0a9 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenSlerp5-14B/c41d8925-b56b-458e-b1a9-27dbbcaee149.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp5-14B", + "id": "allknowingroger/QwenSlerp5-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6357 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenSlerp6-14B/9136feb4-5c3e-48b3-bc70-c7816b8b189b.json b/data/hfopenllm_v2/allknowingroger/QwenSlerp6-14B/9136feb4-5c3e-48b3-bc70-c7816b8b189b.json new file mode 100644 index 000000000..32bf693fb --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenSlerp6-14B/9136feb4-5c3e-48b3-bc70-c7816b8b189b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp6-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp6-14B", + "id": "allknowingroger/QwenSlerp6-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6867 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.469 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenStock1-14B/c395ef02-9a50-4696-aad2-bcb32ba05f67.json b/data/hfopenllm_v2/allknowingroger/QwenStock1-14B/c395ef02-9a50-4696-aad2-bcb32ba05f67.json new file mode 100644 index 000000000..dd2bb81bf --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenStock1-14B/c395ef02-9a50-4696-aad2-bcb32ba05f67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock1-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock1-14B", + "id": "allknowingroger/QwenStock1-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5634 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6528 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5418 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenStock2-14B/93f47969-556a-4fd4-b7bb-4d1c861a8d71.json b/data/hfopenllm_v2/allknowingroger/QwenStock2-14B/93f47969-556a-4fd4-b7bb-4d1c861a8d71.json new file mode 100644 index 000000000..83694e0b6 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenStock2-14B/93f47969-556a-4fd4-b7bb-4d1c861a8d71.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock2-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock2-14B", + "id": "allknowingroger/QwenStock2-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5563 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6569 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/QwenStock3-14B/349ae559-6c1f-4b2f-954c-e83cba1e603a.json b/data/hfopenllm_v2/allknowingroger/QwenStock3-14B/349ae559-6c1f-4b2f-954c-e83cba1e603a.json new file mode 100644 index 000000000..5d15474d8 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/QwenStock3-14B/349ae559-6c1f-4b2f-954c-e83cba1e603a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock3-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock3-14B", + "id": "allknowingroger/QwenStock3-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5615 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6565 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwenslerp2-14B/3e43c3f6-645b-4ab3-b684-b23eb67bc5d9.json b/data/hfopenllm_v2/allknowingroger/Qwenslerp2-14B/3e43c3f6-645b-4ab3-b684-b23eb67bc5d9.json new file mode 100644 index 000000000..74527b77f --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwenslerp2-14B/3e43c3f6-645b-4ab3-b684-b23eb67bc5d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenslerp2-14B", + "id": "allknowingroger/Qwenslerp2-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4729 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5403 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwenslerp2-7B/500c8cd4-fe4e-44f3-86b7-b0efd387ab92.json b/data/hfopenllm_v2/allknowingroger/Qwenslerp2-7B/500c8cd4-fe4e-44f3-86b7-b0efd387ab92.json new file mode 100644 index 000000000..0d48394c6 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwenslerp2-7B/500c8cd4-fe4e-44f3-86b7-b0efd387ab92.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenslerp2-7B", + "id": "allknowingroger/Qwenslerp2-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5294 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5609 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3421 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwenslerp3-14B/340a3ebb-bc06-404f-84e7-aeccc016fd32.json b/data/hfopenllm_v2/allknowingroger/Qwenslerp3-14B/340a3ebb-bc06-404f-84e7-aeccc016fd32.json new file mode 100644 index 000000000..3dedb719e --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwenslerp3-14B/340a3ebb-bc06-404f-84e7-aeccc016fd32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenslerp3-14B", + "id": "allknowingroger/Qwenslerp3-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5052 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6521 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4464 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4676 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Qwenslerp3-7B/a6426f88-d7cc-4e6a-a2b5-76e59a52a6de.json b/data/hfopenllm_v2/allknowingroger/Qwenslerp3-7B/a6426f88-d7cc-4e6a-a2b5-76e59a52a6de.json new file mode 100644 index 000000000..c20870016 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Qwenslerp3-7B/a6426f88-d7cc-4e6a-a2b5-76e59a52a6de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenslerp3-7B", + "id": "allknowingroger/Qwenslerp3-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5018 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.558 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4542 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/ROGERphi-7B-slerp/bdd05c8f-b895-4c91-9a9f-a608a4259cbd.json b/data/hfopenllm_v2/allknowingroger/ROGERphi-7B-slerp/bdd05c8f-b895-4c91-9a9f-a608a4259cbd.json new file mode 100644 index 000000000..0ef272faf --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/ROGERphi-7B-slerp/bdd05c8f-b895-4c91-9a9f-a608a4259cbd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_ROGERphi-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ROGERphi-7B-slerp", + "id": "allknowingroger/ROGERphi-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3053 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/0e1e45d4-2747-480d-9b1f-2b200e250271.json b/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/0e1e45d4-2747-480d-9b1f-2b200e250271.json new file mode 100644 index 000000000..e44fe42b8 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/0e1e45d4-2747-480d-9b1f-2b200e250271.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_RogerMerge-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RogerMerge-7B-slerp", + "id": "allknowingroger/RogerMerge-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json b/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json deleted file mode 100644 index 1c35cbca1..000000000 --- a/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_RogerMerge-7B-slerp/1762652580.002474", - "retrieved_timestamp": "1762652580.002475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/RogerMerge-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/RogerMerge-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39330199426410817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5160176493085935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30302526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/00f3f9ca-ae7d-4e62-9e7e-6bd202dbed59.json b/data/hfopenllm_v2/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/00f3f9ca-ae7d-4e62-9e7e-6bd202dbed59.json new file mode 100644 index 000000000..213c2250c --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/00f3f9ca-ae7d-4e62-9e7e-6bd202dbed59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-42b", + "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 42.516 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/c9e57ab2-c2a4-4935-b976-4bf24647b777.json b/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/c9e57ab2-c2a4-4935-b976-4bf24647b777.json new file mode 100644 index 000000000..a4348c83f --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/c9e57ab2-c2a4-4935-b976-4bf24647b777.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Strangecoven-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Strangecoven-7B-slerp", + "id": "allknowingroger/Strangecoven-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3746 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5368 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json b/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json deleted file mode 100644 index 81fee9104..000000000 --- a/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Strangecoven-7B-slerp/1762652580.002888", - "retrieved_timestamp": "1762652580.002889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Strangecoven-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Strangecoven-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37464261492839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5368022290282338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33643617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json b/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json deleted file mode 100644 index ce85ee09d..000000000 --- a/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Weirdslerp2-25B/1762652580.00309", - "retrieved_timestamp": "1762652580.0030909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Weirdslerp2-25B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Weirdslerp2-25B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 25.204 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1754068094877148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2873695911207614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/c22436a2-ec60-4220-82b3-123618165eb2.json b/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/c22436a2-ec60-4220-82b3-123618165eb2.json new file mode 100644 index 000000000..a8975282d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/c22436a2-ec60-4220-82b3-123618165eb2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Weirdslerp2-25B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weirdslerp2-25B", + "id": "allknowingroger/Weirdslerp2-25B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 25.204 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2874 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/1f990438-dd84-44d2-99f9-a10035ecd652.json b/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/1f990438-dd84-44d2-99f9-a10035ecd652.json new file mode 100644 index 000000000..75ae48ca3 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/1f990438-dd84-44d2-99f9-a10035ecd652.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_WestlakeMaziyar-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WestlakeMaziyar-7B-slerp", + "id": "allknowingroger/WestlakeMaziyar-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4838 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5245 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3078 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json b/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json deleted file mode 100644 index 8dd4614f8..000000000 --- a/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_WestlakeMaziyar-7B-slerp/1762652580.003291", - "retrieved_timestamp": "1762652580.0032918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/WestlakeMaziyar-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/WestlakeMaziyar-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48377748817581795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245479952765804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44738541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3077626329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json b/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json deleted file mode 100644 index 1a168c29f..000000000 --- a/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_YamMaths-7B-slerp/1762652580.003488", - "retrieved_timestamp": "1762652580.003489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/YamMaths-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/YamMaths-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148093724650594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155845857281723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43836458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3130817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/f4564f5e-3595-466e-8201-0e2a4c50ff0d.json b/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/f4564f5e-3595-466e-8201-0e2a4c50ff0d.json new file mode 100644 index 000000000..ab0338d1d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/f4564f5e-3595-466e-8201-0e2a4c50ff0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_YamMaths-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YamMaths-7B-slerp", + "id": "allknowingroger/YamMaths-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4148 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5156 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4384 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/040def3a-702d-4868-b429-39697ca36207.json b/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/040def3a-702d-4868-b429-39697ca36207.json new file mode 100644 index 000000000..73e379495 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/040def3a-702d-4868-b429-39697ca36207.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-1.5-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-34B", + "id": "allknowingroger/Yi-1.5-34B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json b/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json deleted file mode 100644 index acb89e6c7..000000000 --- a/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-1.5-34B/1762652580.0036852", - "retrieved_timestamp": "1762652580.003686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yi-1.5-34B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yi-1.5-34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16391618682872555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28272506287695653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10954122340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/9e24fd65-56ec-4160-b299-b34d702a3231.json b/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/9e24fd65-56ec-4160-b299-b34d702a3231.json new file mode 100644 index 000000000..6517b6a6e --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/9e24fd65-56ec-4160-b299-b34d702a3231.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-blossom-40B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-blossom-40B", + "id": "allknowingroger/Yi-blossom-40B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 18.769 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2009 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json b/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json deleted file mode 100644 index 336baf1f5..000000000 --- a/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-blossom-40B/1762652580.004046", - "retrieved_timestamp": "1762652580.0040479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yi-blossom-40B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yi-blossom-40B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 18.769 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20088587170928693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32150442258143547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10804521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/216bf9f8-9521-4311-a40b-8a847271265c.json b/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/216bf9f8-9521-4311-a40b-8a847271265c.json new file mode 100644 index 000000000..a8d56323b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/216bf9f8-9521-4311-a40b-8a847271265c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yibuddy-35B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yibuddy-35B", + "id": "allknowingroger/Yibuddy-35B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json b/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json deleted file mode 100644 index 7faa4ec0b..000000000 --- a/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yibuddy-35B/1762652580.004411", - "retrieved_timestamp": "1762652580.004412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yibuddy-35B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yibuddy-35B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4234774841864032 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5916185369526096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45045833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44888630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Yillama-40B/45f8c4fb-3591-44df-a4f0-57093b9bae23.json b/data/hfopenllm_v2/allknowingroger/Yillama-40B/45f8c4fb-3591-44df-a4f0-57093b9bae23.json new file mode 100644 index 000000000..925316eac --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yillama-40B/45f8c4fb-3591-44df-a4f0-57093b9bae23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yillama-40B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yillama-40B", + "id": "allknowingroger/Yillama-40B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4063 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1981 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json b/data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json deleted file mode 100644 index a8d92c2d1..000000000 --- a/data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp-34B/1762652580.0049741", - "retrieved_timestamp": "1762652580.004975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yislerp-34B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yislerp-34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691970637907419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6158722731484186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4751496010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Yislerp-34B/d17275ef-8a32-4fcb-94f4-fb24299ba50e.json b/data/hfopenllm_v2/allknowingroger/Yislerp-34B/d17275ef-8a32-4fcb-94f4-fb24299ba50e.json new file mode 100644 index 000000000..8c5bc4734 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yislerp-34B/d17275ef-8a32-4fcb-94f4-fb24299ba50e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yislerp-34B", + "id": "allknowingroger/Yislerp-34B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3692 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4751 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/61b79e7d-0f50-4cfe-825c-ed5b23d943f3.json b/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/61b79e7d-0f50-4cfe-825c-ed5b23d943f3.json new file mode 100644 index 000000000..15c44b31d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/61b79e7d-0f50-4cfe-825c-ed5b23d943f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp2-34B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yislerp2-34B", + "id": "allknowingroger/Yislerp2-34B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3999 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2296 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.453 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4724 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json b/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json deleted file mode 100644 index 52fe82cdf..000000000 --- a/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp2-34B/1762652580.005196", - "retrieved_timestamp": "1762652580.005197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yislerp2-34B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yislerp2-34B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39994658616914236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6245771970170245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.472406914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/113c3507-b738-4b06-ada8-da93b19c6ae2.json b/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/113c3507-b738-4b06-ada8-da93b19c6ae2.json new file mode 100644 index 000000000..ca824d478 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/113c3507-b738-4b06-ada8-da93b19c6ae2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yunconglong-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yunconglong-13B-slerp", + "id": "allknowingroger/Yunconglong-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3036 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json b/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json deleted file mode 100644 index 887c9ad0d..000000000 --- a/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yunconglong-13B-slerp/1762652580.005601", - "retrieved_timestamp": "1762652580.005603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yunconglong-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yunconglong-13B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42417673993891764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5165807158493828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4160729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30360704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json b/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json deleted file mode 100644 index f97417a00..000000000 --- a/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_limyClown-7B-slerp/1762652580.005876", - "retrieved_timestamp": "1762652580.005877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/limyClown-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/limyClown-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4017451473202215 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147517317055973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30377327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/8835d5c1-8350-4d42-a753-82b94dffda3b.json b/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/8835d5c1-8350-4d42-a753-82b94dffda3b.json new file mode 100644 index 000000000..b54682c35 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/8835d5c1-8350-4d42-a753-82b94dffda3b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_limyClown-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "limyClown-7B-slerp", + "id": "allknowingroger/limyClown-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5148 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3038 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/llama3-Jallabi-40B-s/dc3bbda7-5007-44c7-b1ba-af0c82d100ee.json b/data/hfopenllm_v2/allknowingroger/llama3-Jallabi-40B-s/dc3bbda7-5007-44c7-b1ba-af0c82d100ee.json new file mode 100644 index 000000000..3d7d9aa70 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/llama3-Jallabi-40B-s/dc3bbda7-5007-44c7-b1ba-af0c82d100ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_llama3-Jallabi-40B-s/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-Jallabi-40B-s", + "id": "allknowingroger/llama3-Jallabi-40B-s", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 18.769 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1921 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3252 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/llama3AnFeng-40B/0d24ee06-a6b4-4be7-b3ef-c4f53b4fc414.json b/data/hfopenllm_v2/allknowingroger/llama3AnFeng-40B/0d24ee06-a6b4-4be7-b3ef-c4f53b4fc414.json new file mode 100644 index 000000000..ec77ddbf3 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/llama3AnFeng-40B/0d24ee06-a6b4-4be7-b3ef-c4f53b4fc414.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allknowingroger_llama3AnFeng-40B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3AnFeng-40B", + "id": "allknowingroger/llama3AnFeng-40B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 39.971 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.198 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json b/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json deleted file mode 100644 index d4c8f5ab3..000000000 --- a/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_L3.1-8b-RP-Ink/1762652580.006678", - "retrieved_timestamp": "1762652580.006679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/L3.1-8b-RP-Ink", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/L3.1-8b-RP-Ink", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7811063533646281 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48284724308518095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3427526595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/f2415b7a-2cd7-4a05-834b-7da992e1da1a.json b/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/f2415b7a-2cd7-4a05-834b-7da992e1da1a.json new file mode 100644 index 000000000..acab59baa --- /dev/null +++ b/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/f2415b7a-2cd7-4a05-834b-7da992e1da1a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_L3.1-8b-RP-Ink/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-8b-RP-Ink", + "id": "allura-org/L3.1-8b-RP-Ink", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7811 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3608 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/01af237f-40d8-4841-a90d-13dce6db8634.json b/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/01af237f-40d8-4841-a90d-13dce6db8634.json new file mode 100644 index 000000000..36c98b87c --- /dev/null +++ b/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/01af237f-40d8-4841-a90d-13dce6db8634.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_MN-12b-RP-Ink/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12b-RP-Ink", + "id": "allura-org/MN-12b-RP-Ink", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7186 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json b/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json deleted file mode 100644 index 093693258..000000000 --- a/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_MN-12b-RP-Ink/1762652580.006974", - "retrieved_timestamp": "1762652580.006975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/MN-12b-RP-Ink", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/MN-12b-RP-Ink", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7186332265056716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4833826588550261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38184375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3513962765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json b/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json deleted file mode 100644 index dc0b92289..000000000 --- a/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_MS-Meadowlark-22B/1762652580.007196", - "retrieved_timestamp": "1762652580.007197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/MS-Meadowlark-22B", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/MS-Meadowlark-22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669698621878837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162576933217772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38231382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/d69bb392-fd38-4f57-b567-24566896167b.json b/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/d69bb392-fd38-4f57-b567-24566896167b.json new file mode 100644 index 000000000..242f13b4f --- /dev/null +++ b/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/d69bb392-fd38-4f57-b567-24566896167b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_MS-Meadowlark-22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MS-Meadowlark-22B", + "id": "allura-org/MS-Meadowlark-22B", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5163 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3823 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/Mistral-Small-24b-Sertraline-0304/63503943-1c1e-4dac-9c41-4933fbb44b70.json b/data/hfopenllm_v2/allura-org/Mistral-Small-24b-Sertraline-0304/63503943-1c1e-4dac-9c41-4933fbb44b70.json new file mode 100644 index 000000000..a1e249c7d --- /dev/null +++ b/data/hfopenllm_v2/allura-org/Mistral-Small-24b-Sertraline-0304/63503943-1c1e-4dac-9c41-4933fbb44b70.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-24b-Sertraline-0304/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-24b-Sertraline-0304", + "id": "allura-org/Mistral-Small-24b-Sertraline-0304", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.68 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5106 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/Mistral-Small-Sisyphus-24b-2503/80c5d343-41e6-45d7-8921-62586a3cd270.json b/data/hfopenllm_v2/allura-org/Mistral-Small-Sisyphus-24b-2503/80c5d343-41e6-45d7-8921-62586a3cd270.json new file mode 100644 index 000000000..7a4d2fa7f --- /dev/null +++ b/data/hfopenllm_v2/allura-org/Mistral-Small-Sisyphus-24b-2503/80c5d343-41e6-45d7-8921-62586a3cd270.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-Sisyphus-24b-2503/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-Sisyphus-24b-2503", + "id": "allura-org/Mistral-Small-Sisyphus-24b-2503", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6848 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.627 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/2c27d7f6-60fd-49f3-8666-784f2a16031b.json b/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/2c27d7f6-60fd-49f3-8666-784f2a16031b.json new file mode 100644 index 000000000..93ee2f96c --- /dev/null +++ b/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/2c27d7f6-60fd-49f3-8666-784f2a16031b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_MoE-Girl-1BA-7BT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MoE-Girl-1BA-7BT", + "id": "allura-org/MoE-Girl-1BA-7BT", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2705 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3139 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1218 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json b/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json deleted file mode 100644 index 042ea8501..000000000 --- a/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_MoE-Girl-1BA-7BT/1762652580.0080209", - "retrieved_timestamp": "1762652580.008022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/MoE-Girl-1BA-7BT", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/MoE-Girl-1BA-7BT", - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27050337548814923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139175363262408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12175864361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json deleted file mode 100644 index 8c54b6efe..000000000 --- a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Aletheia-v1/1762652580.008265", - "retrieved_timestamp": "1762652580.008276", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/TQ2.5-14B-Aletheia-v1", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/TQ2.5-14B-Aletheia-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7530297388706411 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6585074769185942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44515625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241023936170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/cbcc1e64-8455-4382-8999-654d1757bbd6.json b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/cbcc1e64-8455-4382-8999-654d1757bbd6.json new file mode 100644 index 000000000..200aa0dee --- /dev/null +++ b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/cbcc1e64-8455-4382-8999-654d1757bbd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Aletheia-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TQ2.5-14B-Aletheia-v1", + "id": "allura-org/TQ2.5-14B-Aletheia-v1", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.753 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6585 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/1bea4f6b-7a41-4907-baca-430c7ea179e9.json b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/1bea4f6b-7a41-4907-baca-430c7ea179e9.json new file mode 100644 index 000000000..42bdffdcf --- /dev/null +++ b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/1bea4f6b-7a41-4907-baca-430c7ea179e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Neon-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TQ2.5-14B-Neon-v1", + "id": "allura-org/TQ2.5-14B-Neon-v1", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5253 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json deleted file mode 100644 index 300f7fa33..000000000 --- a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Neon-v1/1762652580.0085812", - "retrieved_timestamp": "1762652580.0085819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/TQ2.5-14B-Neon-v1", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/TQ2.5-14B-Neon-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6754189993661264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.655304131044165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.461 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252659574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/allura-org/Teleut-7b/298ce89b-966c-4f4e-9da5-3803a395188f.json b/data/hfopenllm_v2/allura-org/Teleut-7b/298ce89b-966c-4f4e-9da5-3803a395188f.json new file mode 100644 index 000000000..9486047d3 --- /dev/null +++ b/data/hfopenllm_v2/allura-org/Teleut-7b/298ce89b-966c-4f4e-9da5-3803a395188f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/allura-org_Teleut-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Teleut-7b", + "id": "allura-org/Teleut-7b", + "developer": "allura-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2409 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.464 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json b/data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json deleted file mode 100644 index f2ec4ee02..000000000 --- a/data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_Teleut-7b/1762652580.008814", - "retrieved_timestamp": "1762652580.008814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/Teleut-7b", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/Teleut-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6378752820294595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141277814496585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24093655589123866 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4640416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4130651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/aloobun/Meta-Llama-3-7B-28Layers/ea27a4d6-8c32-4b36-873d-1046ae6240e5.json b/data/hfopenllm_v2/aloobun/Meta-Llama-3-7B-28Layers/ea27a4d6-8c32-4b36-873d-1046ae6240e5.json new file mode 100644 index 000000000..1550e1cda --- /dev/null +++ b/data/hfopenllm_v2/aloobun/Meta-Llama-3-7B-28Layers/ea27a4d6-8c32-4b36-873d-1046ae6240e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aloobun_Meta-Llama-3-7B-28Layers/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-7B-28Layers", + "id": "aloobun/Meta-Llama-3-7B-28Layers", + "developer": "aloobun", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.158 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4437 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3589 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json b/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json deleted file mode 100644 index e4119c074..000000000 --- a/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aloobun_d-SmolLM2-360M/1762652580.0092921", - "retrieved_timestamp": "1762652580.009293", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aloobun/d-SmolLM2-360M", - "developer": "aloobun", - "inference_platform": "unknown", - "id": "aloobun/d-SmolLM2-360M", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20970358648386284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3195784405636826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/73d5905d-7825-43ba-8051-7e1f5639b857.json b/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/73d5905d-7825-43ba-8051-7e1f5639b857.json new file mode 100644 index 000000000..9ae14979c --- /dev/null +++ b/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/73d5905d-7825-43ba-8051-7e1f5639b857.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aloobun_d-SmolLM2-360M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "d-SmolLM2-360M", + "id": "aloobun/d-SmolLM2-360M", + "developer": "aloobun", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2097 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/956b8589-a048-43be-9cfd-05658d3c57ca.json b/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/956b8589-a048-43be-9cfd-05658d3c57ca.json new file mode 100644 index 000000000..11db1c919 --- /dev/null +++ b/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/956b8589-a048-43be-9cfd-05658d3c57ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/alpindale_WizardLM-2-8x22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLM-2-8x22B", + "id": "alpindale/WizardLM-2-8x22B", + "developer": "alpindale", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5272 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6377 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json b/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json deleted file mode 100644 index 99043c9c7..000000000 --- a/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/alpindale_WizardLM-2-8x22B/1762652580.009551", - "retrieved_timestamp": "1762652580.0095518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "alpindale/WizardLM-2-8x22B", - "developer": "alpindale", - "inference_platform": "unknown", - "id": "alpindale/WizardLM-2-8x22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5272166739805937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6377307938917097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45960771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json b/data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json deleted file mode 100644 index 862cd7fda..000000000 --- a/data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/alpindale_magnum-72b-v1/1762652580.0098088", - "retrieved_timestamp": "1762652580.00981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "alpindale/magnum-72b-v1", - "developer": "alpindale", - "inference_platform": "unknown", - "id": "alpindale/magnum-72b-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606484128778308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6982215794373214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5467918882978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/alpindale/magnum-72b-v1/36f597b4-8f53-4b40-9c0e-c9284743e456.json b/data/hfopenllm_v2/alpindale/magnum-72b-v1/36f597b4-8f53-4b40-9c0e-c9284743e456.json new file mode 100644 index 000000000..6b8bdd31e --- /dev/null +++ b/data/hfopenllm_v2/alpindale/magnum-72b-v1/36f597b4-8f53-4b40-9c0e-c9284743e456.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/alpindale_magnum-72b-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-72b-v1", + "id": "alpindale/magnum-72b-v1", + "developer": "alpindale", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6982 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5468 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/altomek/YiSM-34B-0rn/7b67e526-7588-4c62-9293-55e77851c4c7.json b/data/hfopenllm_v2/altomek/YiSM-34B-0rn/7b67e526-7588-4c62-9293-55e77851c4c7.json new file mode 100644 index 000000000..f2755bff5 --- /dev/null +++ b/data/hfopenllm_v2/altomek/YiSM-34B-0rn/7b67e526-7588-4c62-9293-55e77851c4c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/altomek_YiSM-34B-0rn/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YiSM-34B-0rn", + "id": "altomek/YiSM-34B-0rn", + "developer": "altomek", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.614 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4696 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json b/data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json deleted file mode 100644 index a1f616e2d..000000000 --- a/data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/altomek_YiSM-34B-0rn/1762652580.010027", - "retrieved_timestamp": "1762652580.0100281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "altomek/YiSM-34B-0rn", - "developer": "altomek", - "inference_platform": "unknown", - "id": "altomek/YiSM-34B-0rn", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.428373382624769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6140009573868866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4695811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/amazon/MegaBeam-Mistral-7B-300k/8bc96d6d-0cd7-49c4-8112-7d8fb1c45199.json b/data/hfopenllm_v2/amazon/MegaBeam-Mistral-7B-300k/8bc96d6d-0cd7-49c4-8112-7d8fb1c45199.json new file mode 100644 index 000000000..6587a17ee --- /dev/null +++ b/data/hfopenllm_v2/amazon/MegaBeam-Mistral-7B-300k/8bc96d6d-0cd7-49c4-8112-7d8fb1c45199.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/amazon_MegaBeam-Mistral-7B-300k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MegaBeam-Mistral-7B-300k", + "id": "amazon/MegaBeam-Mistral-7B-300k", + "developer": "amazon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2549 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/amd/AMD-Llama-135m/6751a200-0bd9-498e-a991-ebe22375633d.json b/data/hfopenllm_v2/amd/AMD-Llama-135m/6751a200-0bd9-498e-a991-ebe22375633d.json new file mode 100644 index 000000000..c580f04da --- /dev/null +++ b/data/hfopenllm_v2/amd/AMD-Llama-135m/6751a200-0bd9-498e-a991-ebe22375633d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AMD-Llama-135m", + "id": "amd/AMD-Llama-135m", + "developer": "amd", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.134 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/amd/AMD-Llama-135m/f41442e3-5aa7-4ca4-9e61-a5e13965a3e4.json b/data/hfopenllm_v2/amd/AMD-Llama-135m/f41442e3-5aa7-4ca4-9e61-a5e13965a3e4.json new file mode 100644 index 000000000..ac33d4f61 --- /dev/null +++ b/data/hfopenllm_v2/amd/AMD-Llama-135m/f41442e3-5aa7-4ca4-9e61-a5e13965a3e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AMD-Llama-135m", + "id": "amd/AMD-Llama-135m", + "developer": "amd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1842 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2974 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anakin87/gemma-2b-orpo/b105b62a-ce77-4387-b679-1adf2782b2f4.json b/data/hfopenllm_v2/anakin87/gemma-2b-orpo/b105b62a-ce77-4387-b679-1adf2782b2f4.json new file mode 100644 index 000000000..83e2b1b5b --- /dev/null +++ b/data/hfopenllm_v2/anakin87/gemma-2b-orpo/b105b62a-ce77-4387-b679-1adf2782b2f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anakin87_gemma-2b-orpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2b-orpo", + "id": "anakin87/gemma-2b-orpo", + "developer": "anakin87", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3426 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1306 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json b/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json deleted file mode 100644 index 2925e434c..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v1-72b/1762652580.0112262", - "retrieved_timestamp": "1762652580.011227", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v1-72b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v1-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606484128778308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6982215794373214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5486203457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/72180fd7-bf34-4758-b02f-7d11859700c7.json b/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/72180fd7-bf34-4758-b02f-7d11859700c7.json new file mode 100644 index 000000000..18b204986 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/72180fd7-bf34-4758-b02f-7d11859700c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v1-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v1-72b", + "id": "anthracite-org/magnum-v1-72b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6982 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5486 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json b/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json deleted file mode 100644 index df841f420..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-12b/1762652580.011473", - "retrieved_timestamp": "1762652580.011474", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v2-12b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v2-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.376166349729828 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020864013200114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31673869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/ac5aaa9c-79ab-4082-b8c5-084fba3e122a.json b/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/ac5aaa9c-79ab-4082-b8c5-084fba3e122a.json new file mode 100644 index 000000000..26fb22a26 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/ac5aaa9c-79ab-4082-b8c5-084fba3e122a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v2-12b", + "id": "anthracite-org/magnum-v2-12b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/2d266d7f-8edd-40fd-adfc-597a7742167b.json b/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/2d266d7f-8edd-40fd-adfc-597a7742167b.json new file mode 100644 index 000000000..ea532bb60 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/2d266d7f-8edd-40fd-adfc-597a7742167b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v2-72b", + "id": "anthracite-org/magnum-v2-72b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.756 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7005 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5456 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json b/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json deleted file mode 100644 index c3d22857c..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-72b/1762652580.01168", - "retrieved_timestamp": "1762652580.01168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v2-72b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v2-72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7560273407891063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7005076514129516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/484ccbf2-87e2-423f-9de4-a4bd54291b54.json b/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/484ccbf2-87e2-423f-9de4-a4bd54291b54.json new file mode 100644 index 000000000..d05197a33 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/484ccbf2-87e2-423f-9de4-a4bd54291b54.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2.5-12b-kto/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v2.5-12b-kto", + "id": "anthracite-org/magnum-v2.5-12b-kto", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3215 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json b/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json deleted file mode 100644 index 0f281d8ad..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2.5-12b-kto/1762652580.011887", - "retrieved_timestamp": "1762652580.011888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v2.5-12b-kto", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v2.5-12b-kto", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865576669902525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076961186254344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40863541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3214760638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/4de79504-f9e8-4235-9aad-d38f0799e081.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/4de79504-f9e8-4235-9aad-d38f0799e081.json new file mode 100644 index 000000000..81b95e27b --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/4de79504-f9e8-4235-9aad-d38f0799e081.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-27b-kto/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v3-27b-kto", + "id": "anthracite-org/magnum-v3-27b-kto", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5675 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3855 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4238 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json deleted file mode 100644 index fa5d9c2f0..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-27b-kto/1762652580.012144", - "retrieved_timestamp": "1762652580.0121448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-27b-kto", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-27b-kto", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5674831668860845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.586040577894583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38546874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42378656914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json deleted file mode 100644 index cd9158563..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-34b/1762652580.012352", - "retrieved_timestamp": "1762652580.012352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-34b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-34b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5115294086357531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6087828692085228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47523271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/b4bde9d8-f50c-448c-ada4-5bc05f302c04.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/b4bde9d8-f50c-448c-ada4-5bc05f302c04.json new file mode 100644 index 000000000..84369bf89 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/b4bde9d8-f50c-448c-ada4-5bc05f302c04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-34b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v3-34b", + "id": "anthracite-org/magnum-v3-34b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json deleted file mode 100644 index d619c5af5..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-chatml/1762652580.0125592", - "retrieved_timestamp": "1762652580.0125592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-9b-chatml", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-9b-chatml", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12747066671985885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427688488887096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4242021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/5da3240b-b5e3-4333-ba61-925343b56043.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/5da3240b-b5e3-4333-ba61-925343b56043.json new file mode 100644 index 000000000..48d20512c --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/5da3240b-b5e3-4333-ba61-925343b56043.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-chatml/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v3-9b-chatml", + "id": "anthracite-org/magnum-v3-9b-chatml", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5428 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-customgemma2/d6727b7d-cdf3-48d5-8e30-484e86ad60b6.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-customgemma2/d6727b7d-cdf3-48d5-8e30-484e86ad60b6.json new file mode 100644 index 000000000..08eeb4ea5 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-customgemma2/d6727b7d-cdf3-48d5-8e30-484e86ad60b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-customgemma2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v3-9b-customgemma2", + "id": "anthracite-org/magnum-v3-9b-customgemma2", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.534 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/15b86bbf-8d3b-474b-98f0-abb3972a7271.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/15b86bbf-8d3b-474b-98f0-abb3972a7271.json new file mode 100644 index 000000000..98e18e6b7 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/15b86bbf-8d3b-474b-98f0-abb3972a7271.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v4-12b", + "id": "anthracite-org/magnum-v4-12b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4093 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json deleted file mode 100644 index c17e4245e..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-12b/1762652580.013016", - "retrieved_timestamp": "1762652580.013016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-12b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33929640021808805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176693046591915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json deleted file mode 100644 index 209deb7a6..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-22b/1762652580.013223", - "retrieved_timestamp": "1762652580.013224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-22b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-22b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628620947973599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.548612004937422 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/c0b339f6-4a46-46eb-b2d0-945176afe676.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/c0b339f6-4a46-46eb-b2d0-945176afe676.json new file mode 100644 index 000000000..8cce65095 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/c0b339f6-4a46-46eb-b2d0-945176afe676.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-22b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v4-22b", + "id": "anthracite-org/magnum-v4-22b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5629 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json deleted file mode 100644 index fd1461939..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-27b/1762652580.013432", - "retrieved_timestamp": "1762652580.013433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-27b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-27b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541682735142754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867298109891389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/79367289-6245-4bf0-99e9-42bc3ff7649c.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/79367289-6245-4bf0-99e9-42bc3ff7649c.json new file mode 100644 index 000000000..fd3218df0 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/79367289-6245-4bf0-99e9-42bc3ff7649c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-27b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v4-27b", + "id": "anthracite-org/magnum-v4-27b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5867 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json deleted file mode 100644 index b4927c7d2..000000000 --- a/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-9b/1762652580.013639", - "retrieved_timestamp": "1762652580.013639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-9b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3502628581053826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336423991931557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45157291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3952792553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/c3ec5505-1086-446a-9739-523810e93d13.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/c3ec5505-1086-446a-9739-523810e93d13.json new file mode 100644 index 000000000..d1166e7fe --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/c3ec5505-1086-446a-9739-523810e93d13.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnum-v4-9b", + "id": "anthracite-org/magnum-v4-9b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5336 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json b/data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json deleted file mode 100644 index d8aa36a7a..000000000 --- a/data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/1762652580.602767", - "retrieved_timestamp": "1762652580.602768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", - "developer": "anthropic", - "inference_platform": "unknown", - "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6696487541944263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070848048063867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42893749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json b/data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json deleted file mode 100644 index 28fd15f2b..000000000 --- a/data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/apple_DCLM-7B/1762652580.0138528", - "retrieved_timestamp": "1762652580.013854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "apple/DCLM-7B", - "developer": "apple", - "inference_platform": "unknown", - "id": "apple/DCLM-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "OpenLMModel", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21727239280664196 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42321423668184166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3110871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/apple/DCLM-7B/c6c5e462-d373-4536-afc3-b740fb7e300f.json b/data/hfopenllm_v2/apple/DCLM-7B/c6c5e462-d373-4536-afc3-b740fb7e300f.json new file mode 100644 index 000000000..92760e54b --- /dev/null +++ b/data/hfopenllm_v2/apple/DCLM-7B/c6c5e462-d373-4536-afc3-b740fb7e300f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/apple_DCLM-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DCLM-7B", + "id": "apple/DCLM-7B", + "developer": "apple", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "OpenLMModel", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3921 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json b/data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json deleted file mode 100644 index d8ae34162..000000000 --- a/data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/appvoid_arco-2-instruct/1762652580.014716", - "retrieved_timestamp": "1762652580.0147169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "appvoid/arco-2-instruct", - "developer": "appvoid", - "inference_platform": "unknown", - "id": "appvoid/arco-2-instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2164479137577184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31330470624451107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/appvoid/arco-2-instruct/b7537abe-8177-4206-999f-5bb7e95c72c8.json b/data/hfopenllm_v2/appvoid/arco-2-instruct/b7537abe-8177-4206-999f-5bb7e95c72c8.json new file mode 100644 index 000000000..c53efb7dc --- /dev/null +++ b/data/hfopenllm_v2/appvoid/arco-2-instruct/b7537abe-8177-4206-999f-5bb7e95c72c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/appvoid_arco-2-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arco-2-instruct", + "id": "appvoid/arco-2-instruct", + "developer": "appvoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2164 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json b/data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json deleted file mode 100644 index 9417c66e8..000000000 --- a/data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/appvoid_arco-2/1762652580.014345", - "retrieved_timestamp": "1762652580.014347", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "appvoid/arco-2", - "developer": "appvoid", - "inference_platform": "unknown", - "id": "appvoid/arco-2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19913717824261848 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31456676274830814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/appvoid/arco-2/eb2f6159-e37e-46db-9419-6a66cb7e539e.json b/data/hfopenllm_v2/appvoid/arco-2/eb2f6159-e37e-46db-9419-6a66cb7e539e.json new file mode 100644 index 000000000..a19063325 --- /dev/null +++ b/data/hfopenllm_v2/appvoid/arco-2/eb2f6159-e37e-46db-9419-6a66cb7e539e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/appvoid_arco-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arco-2", + "id": "appvoid/arco-2", + "developer": "appvoid", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1991 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3536 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json b/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json deleted file mode 100644 index 7b8f16246..000000000 --- a/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Blitz/1762652580.0149639", - "retrieved_timestamp": "1762652580.014965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Arcee-Blitz", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Blitz", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543435861292482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6606628431550884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34818731117824775 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50471875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6153590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/0b2d0a06-2907-4258-be33-1591e18ac6a2.json b/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/0b2d0a06-2907-4258-be33-1591e18ac6a2.json new file mode 100644 index 000000000..8851d8d7b --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/0b2d0a06-2907-4258-be33-1591e18ac6a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Blitz/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arcee-Blitz", + "id": "arcee-ai/Arcee-Blitz", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5543 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6154 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/0284d867-45c4-4fe4-883c-8e3ea169d66c.json b/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/0284d867-45c4-4fe4-883c-8e3ea169d66c.json new file mode 100644 index 000000000..9080738a7 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/0284d867-45c4-4fe4-883c-8e3ea169d66c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Maestro-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arcee-Maestro-7B-Preview", + "id": "arcee-ai/Arcee-Maestro-7B-Preview", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4648 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3039 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json b/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json deleted file mode 100644 index f2da56219..000000000 --- a/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Maestro-7B-Preview/1762652580.015253", - "retrieved_timestamp": "1762652580.015254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Arcee-Maestro-7B-Preview", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Maestro-7B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2750247122080524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648373015709704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3039394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Nova/1a2da513-104e-4074-b3b7-601ab11bf6d8.json b/data/hfopenllm_v2/arcee-ai/Arcee-Nova/1a2da513-104e-4074-b3b7-601ab11bf6d8.json new file mode 100644 index 000000000..ec647a01e --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Nova/1a2da513-104e-4074-b3b7-601ab11bf6d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Nova/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arcee-Nova", + "id": "arcee-ai/Arcee-Nova", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6942 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5452 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json b/data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json deleted file mode 100644 index c460373cf..000000000 --- a/data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Nova/1762652580.0154781", - "retrieved_timestamp": "1762652580.015479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Arcee-Nova", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Nova", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7907485471881275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.694196965855899 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45616666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452127659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/189db16b-5e78-439f-9f79-6eec979c3a79.json b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/189db16b-5e78-439f-9f79-6eec979c3a79.json new file mode 100644 index 000000000..38ed0acf4 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/189db16b-5e78-439f-9f79-6eec979c3a79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arcee-Spark", + "id": "arcee-ai/Arcee-Spark", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3822 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json deleted file mode 100644 index ab7628d38..000000000 --- a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.0159192", - "retrieved_timestamp": "1762652580.0159202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Arcee-Spark", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Spark", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.571829412625168 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480864114714127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38131648936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json deleted file mode 100644 index a9e328f6a..000000000 --- a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.015698", - "retrieved_timestamp": "1762652580.015699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Arcee-Spark", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Spark", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5620874834328471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489474198567446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3822307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/d751f1c5-5505-4c12-8d51-091538b49949.json b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/d751f1c5-5505-4c12-8d51-091538b49949.json new file mode 100644 index 000000000..f4c610574 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/d751f1c5-5505-4c12-8d51-091538b49949.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arcee-Spark", + "id": "arcee-ai/Arcee-Spark", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3813 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Llama-3.1-SuperNova-Lite/b6f9144f-57a0-4c18-9e52-ffccf2d8ca9c.json b/data/hfopenllm_v2/arcee-ai/Llama-3.1-SuperNova-Lite/b6f9144f-57a0-4c18-9e52-ffccf2d8ca9c.json new file mode 100644 index 000000000..894a616bf --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Llama-3.1-SuperNova-Lite/b6f9144f-57a0-4c18-9e52-ffccf2d8ca9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-3.1-SuperNova-Lite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-SuperNova-Lite", + "id": "arcee-ai/Llama-3.1-SuperNova-Lite", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5152 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3877 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Llama-Spark/67dc7fb2-1455-4f60-9dcb-59a8197741d7.json b/data/hfopenllm_v2/arcee-ai/Llama-Spark/67dc7fb2-1455-4f60-9dcb-59a8197741d7.json new file mode 100644 index 000000000..ffa60c546 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Llama-Spark/67dc7fb2-1455-4f60-9dcb-59a8197741d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-Spark/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Spark", + "id": "arcee-ai/Llama-Spark", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7911 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3721 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json b/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json deleted file mode 100644 index 277bb81e0..000000000 --- a/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_SuperNova-Medius/1762652580.016611", - "retrieved_timestamp": "1762652580.016612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/SuperNova-Medius", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/SuperNova-Medius", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183584001560305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6377284463115707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7f4ab590-29fa-473a-b617-00135dd1d6ee.json b/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7f4ab590-29fa-473a-b617-00135dd1d6ee.json new file mode 100644 index 000000000..4067be61a --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7f4ab590-29fa-473a-b617-00135dd1d6ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_SuperNova-Medius/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperNova-Medius", + "id": "arcee-ai/SuperNova-Medius", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7184 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6377 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.469 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5035 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json deleted file mode 100644 index c1b9feb05..000000000 --- a/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Lite/1762652580.0168262", - "retrieved_timestamp": "1762652580.0168269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Virtuoso-Lite", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Virtuoso-Lite", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8099575792231279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098520975127147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/d67db62e-e21d-43c8-8b4c-bfa353e47636.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/d67db62e-e21d-43c8-8b4c-bfa353e47636.json new file mode 100644 index 000000000..e33d0d23e --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/d67db62e-e21d-43c8-8b4c-bfa353e47636.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Lite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Virtuoso-Lite", + "id": "arcee-ai/Virtuoso-Lite", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.81 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.253 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json deleted file mode 100644 index 7e34da7fa..000000000 --- a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small-v2/1762652580.0172758", - "retrieved_timestamp": "1762652580.017277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Virtuoso-Small-v2", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Virtuoso-Small-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8273181824226385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554097094586643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.466012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43133333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518783244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/85abff46-8ae5-4a75-9522-721793224363.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/85abff46-8ae5-4a75-9522-721793224363.json new file mode 100644 index 000000000..5a503f2da --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/85abff46-8ae5-4a75-9522-721793224363.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Virtuoso-Small-v2", + "id": "arcee-ai/Virtuoso-Small-v2", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.466 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4313 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5188 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/1736bbd8-4457-4d55-8c0b-0ae6e001ee62.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/1736bbd8-4457-4d55-8c0b-0ae6e001ee62.json new file mode 100644 index 000000000..4e3877887 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/1736bbd8-4457-4d55-8c0b-0ae6e001ee62.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Virtuoso-Small", + "id": "arcee-ai/Virtuoso-Small", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7935 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json deleted file mode 100644 index 131f83780..000000000 --- a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small/1762652580.017056", - "retrieved_timestamp": "1762652580.017057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Virtuoso-Small", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Virtuoso-Small", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7935211904413622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517633129454784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/arcee-ai/raspberry-3B/4777e427-8d17-4e06-8cbf-0883c95bbfd8.json b/data/hfopenllm_v2/arcee-ai/raspberry-3B/4777e427-8d17-4e06-8cbf-0883c95bbfd8.json new file mode 100644 index 000000000..e1c6d4dab --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/raspberry-3B/4777e427-8d17-4e06-8cbf-0883c95bbfd8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arcee-ai_raspberry-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "raspberry-3B", + "id": "arcee-ai/raspberry-3B", + "developer": "arcee-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2854 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json b/data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json deleted file mode 100644 index 932afe5a4..000000000 --- a/data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_raspberry-3B/1762652580.017479", - "retrieved_timestamp": "1762652580.017479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/raspberry-3B", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/raspberry-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31541642840995227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42689280188827033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41232291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.285405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4df0b890-d4c5-408e-8994-88f7383e9235.json b/data/hfopenllm_v2/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4df0b890-d4c5-408e-8994-88f7383e9235.json new file mode 100644 index 000000000..c9896a281 --- /dev/null +++ b/data/hfopenllm_v2/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4df0b890-d4c5-408e-8994-88f7383e9235.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-MagPie-Ultra", + "id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", + "developer": "argilla-warehouse", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3543 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/argilla/notus-7b-v1/76a5a59d-f5fd-4fb0-849e-7db7772b555a.json b/data/hfopenllm_v2/argilla/notus-7b-v1/76a5a59d-f5fd-4fb0-849e-7db7772b555a.json new file mode 100644 index 000000000..d4480efe6 --- /dev/null +++ b/data/hfopenllm_v2/argilla/notus-7b-v1/76a5a59d-f5fd-4fb0-849e-7db7772b555a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/argilla_notus-7b-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "notus-7b-v1", + "id": "argilla/notus-7b-v1", + "developer": "argilla", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json b/data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json deleted file mode 100644 index 8d4cf80f2..000000000 --- a/data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/argilla_notus-7b-v1/1762652580.017684", - "retrieved_timestamp": "1762652580.017685", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "argilla/notus-7b-v1", - "developer": "argilla", - "inference_platform": "unknown", - "id": "argilla/notus-7b-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508207112683236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4511857407381495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json b/data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json deleted file mode 100644 index f3a28d9df..000000000 --- a/data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/argilla_notux-8x7b-v1/1762652580.017979", - "retrieved_timestamp": "1762652580.0179799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "argilla/notux-8x7b-v1", - "developer": "argilla", - "inference_platform": "unknown", - "id": "argilla/notux-8x7b-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422290633297429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363304164516353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3660239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/argilla/notux-8x7b-v1/6c8399d0-01ce-45cb-a20f-a49e4e760a1e.json b/data/hfopenllm_v2/argilla/notux-8x7b-v1/6c8399d0-01ce-45cb-a20f-a49e4e760a1e.json new file mode 100644 index 000000000..8c4516521 --- /dev/null +++ b/data/hfopenllm_v2/argilla/notux-8x7b-v1/6c8399d0-01ce-45cb-a20f-a49e4e760a1e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/argilla_notux-8x7b-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "notux-8x7b-v1", + "id": "argilla/notux-8x7b-v1", + "developer": "argilla", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4176 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/92c2c5ee-dfa2-4db3-8401-887d02cc21dd.json b/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/92c2c5ee-dfa2-4db3-8401-887d02cc21dd.json new file mode 100644 index 000000000..593200335 --- /dev/null +++ b/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/92c2c5ee-dfa2-4db3-8401-887d02cc21dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arisin_orca-platypus-13B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca-platypus-13B-slerp", + "id": "arisin/orca-platypus-13B-slerp", + "developer": "arisin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4631 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json b/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json deleted file mode 100644 index a7c0f33c3..000000000 --- a/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arisin_orca-platypus-13B-slerp/1762652580.018446", - "retrieved_timestamp": "1762652580.018446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arisin/orca-platypus-13B-slerp", - "developer": "arisin", - "inference_platform": "unknown", - "id": "arisin/orca-platypus-13B-slerp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26718107953563214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46306234976954946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2592253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json b/data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json deleted file mode 100644 index 2870ee145..000000000 --- a/data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arshiaafshani_Arsh-V1/1762652580.0186949", - "retrieved_timestamp": "1762652580.0186958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arshiaafshani/Arsh-V1", - "developer": "arshiaafshani", - "inference_platform": "unknown", - "id": "arshiaafshani/Arsh-V1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6043276284702368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6739657491720434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2620845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48989583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256815159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/arshiaafshani/Arsh-V1/b40ef568-f277-4d5c-87cd-53feaa71598b.json b/data/hfopenllm_v2/arshiaafshani/Arsh-V1/b40ef568-f277-4d5c-87cd-53feaa71598b.json new file mode 100644 index 000000000..9bb389c7b --- /dev/null +++ b/data/hfopenllm_v2/arshiaafshani/Arsh-V1/b40ef568-f277-4d5c-87cd-53feaa71598b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/arshiaafshani_Arsh-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arsh-V1", + "id": "arshiaafshani/Arsh-V1", + "developer": "arshiaafshani", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.674 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2621 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5257 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/893d5149-c535-41c7-8a1a-26bb6b33e407.json b/data/hfopenllm_v2/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/893d5149-c535-41c7-8a1a-26bb6b33e407.json new file mode 100644 index 000000000..1530afe24 --- /dev/null +++ b/data/hfopenllm_v2/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/893d5149-c535-41c7-8a1a-26bb6b33e407.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLAMA_Harsha_8_B_ORDP_10k", + "id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", + "developer": "asharsha30", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3464 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4669 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ashercn97/a1-v0.0.1/0b649ed5-5af4-4910-b853-2408e3b58f1f.json b/data/hfopenllm_v2/ashercn97/a1-v0.0.1/0b649ed5-5af4-4910-b853-2408e3b58f1f.json new file mode 100644 index 000000000..d679dcf37 --- /dev/null +++ b/data/hfopenllm_v2/ashercn97/a1-v0.0.1/0b649ed5-5af4-4910-b853-2408e3b58f1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ashercn97_a1-v0.0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "a1-v0.0.1", + "id": "ashercn97/a1-v0.0.1", + "developer": "ashercn97", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5188 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json b/data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json deleted file mode 100644 index 5ec97df56..000000000 --- a/data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ashercn97_a1-v0.0.1/1762652580.019211", - "retrieved_timestamp": "1762652580.019212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ashercn97/a1-v0.0.1", - "developer": "ashercn97", - "inference_platform": "unknown", - "id": "ashercn97/a1-v0.0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21984445715146922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188122863232913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41647273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json b/data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json deleted file mode 100644 index 570d79b52..000000000 --- a/data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ashercn97_a1-v002/1762652580.019455", - "retrieved_timestamp": "1762652580.019456", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ashercn97/a1-v002", - "developer": "ashercn97", - "inference_platform": "unknown", - "id": "ashercn97/a1-v002", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2584631001298776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261137844506322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41591666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41747007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/ashercn97/a1-v002/5c8edeba-5c65-4168-b67e-02143acbcafb.json b/data/hfopenllm_v2/ashercn97/a1-v002/5c8edeba-5c65-4168-b67e-02143acbcafb.json new file mode 100644 index 000000000..ae1d8850d --- /dev/null +++ b/data/hfopenllm_v2/ashercn97/a1-v002/5c8edeba-5c65-4168-b67e-02143acbcafb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ashercn97_a1-v002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "a1-v002", + "id": "ashercn97/a1-v002", + "developer": "ashercn97", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5261 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4159 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/67e657ef-d602-4f58-b898-874a22f4a009.json b/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/67e657ef-d602-4f58-b898-874a22f4a009.json new file mode 100644 index 000000000..72305cf3d --- /dev/null +++ b/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/67e657ef-d602-4f58-b898-874a22f4a009.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/assskelad_smollm2-360M-sft_SmallThoughts/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smollm2-360M-sft_SmallThoughts", + "id": "assskelad/smollm2-360M-sft_SmallThoughts", + "developer": "assskelad", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2007 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json b/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json deleted file mode 100644 index f193d151a..000000000 --- a/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/assskelad_smollm2-360M-sft_SmallThoughts/1762652580.019667", - "retrieved_timestamp": "1762652580.0196679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "assskelad/smollm2-360M-sft_SmallThoughts", - "developer": "assskelad", - "inference_platform": "unknown", - "id": "assskelad/smollm2-360M-sft_SmallThoughts", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20071078072846715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149572469619188 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3395208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11818484042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/53d2bf07-689a-4e69-a534-b288313c8481.json b/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/53d2bf07-689a-4e69-a534-b288313c8481.json new file mode 100644 index 000000000..f4805d33d --- /dev/null +++ b/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/53d2bf07-689a-4e69-a534-b288313c8481.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", + "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", + "developer": "athirdpath", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4521 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4939 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3864 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json b/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json deleted file mode 100644 index c623fad87..000000000 --- a/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/1762652580.019914", - "retrieved_timestamp": "1762652580.019914", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", - "developer": "athirdpath", - "inference_platform": "unknown", - "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4521037513796726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4939066588253951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564660904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json b/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json deleted file mode 100644 index 1d64ca3c6..000000000 --- a/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/automerger_YamshadowExperiment28-7B/1762652580.020166", - "retrieved_timestamp": "1762652580.0201669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "automerger/YamshadowExperiment28-7B", - "developer": "automerger", - "inference_platform": "unknown", - "id": "automerger/YamshadowExperiment28-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070156074770498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150030227855061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/34d6a184-d4d5-4609-8305-c0e2ee1c585b.json b/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/34d6a184-d4d5-4609-8305-c0e2ee1c585b.json new file mode 100644 index 000000000..2b426dfef --- /dev/null +++ b/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/34d6a184-d4d5-4609-8305-c0e2ee1c585b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/automerger_YamshadowExperiment28-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YamshadowExperiment28-7B", + "id": "automerger/YamshadowExperiment28-7B", + "developer": "automerger", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4306 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/39b627ab-3e64-42f7-a88d-abe5764fcf4d.json b/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/39b627ab-3e64-42f7-a88d-abe5764fcf4d.json new file mode 100644 index 000000000..a782f820f --- /dev/null +++ b/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/39b627ab-3e64-42f7-a88d-abe5764fcf4d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GRAG-NEMO-12B-ORPO-HESSIAN-AI", + "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", + "developer": "avemio", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1061 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json b/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json deleted file mode 100644 index 5691e54fb..000000000 --- a/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/1762652580.020413", - "retrieved_timestamp": "1762652580.0204139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", - "developer": "avemio", - "inference_platform": "unknown", - "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26065954545866094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10605053191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-2/d8467b15-8a03-4cde-9fc5-5c08bdabb6c6.json b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-2/d8467b15-8a03-4cde-9fc5-5c08bdabb6c6.json new file mode 100644 index 000000000..edcb076d2 --- /dev/null +++ b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-2/d8467b15-8a03-4cde-9fc5-5c08bdabb6c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.1-signtensors-1-over-2", + "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", + "developer": "awnr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2179 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-4/85bc5976-0d40-4416-bbf8-9b1dbf372343.json b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-4/85bc5976-0d40-4416-bbf8-9b1dbf372343.json new file mode 100644 index 000000000..cddb44dc5 --- /dev/null +++ b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-1-over-4/85bc5976-0d40-4416-bbf8-9b1dbf372343.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.1-signtensors-1-over-4", + "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", + "developer": "awnr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2133 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-3-over-8/8c7e8e64-672e-4c7e-a808-a49f1792d3a8.json b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-3-over-8/8c7e8e64-672e-4c7e-a808-a49f1792d3a8.json new file mode 100644 index 000000000..d4c3cdd30 --- /dev/null +++ b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-3-over-8/8c7e8e64-672e-4c7e-a808-a49f1792d3a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-3-over-8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.1-signtensors-3-over-8", + "id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", + "developer": "awnr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2394 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.43 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3001 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-5-over-16/de8651eb-16d1-46ee-a1df-b8c72caaf205.json b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-5-over-16/de8651eb-16d1-46ee-a1df-b8c72caaf205.json new file mode 100644 index 000000000..cde7a7057 --- /dev/null +++ b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-5-over-16/de8651eb-16d1-46ee-a1df-b8c72caaf205.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-5-over-16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.1-signtensors-5-over-16", + "id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", + "developer": "awnr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2118 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2958 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-7-over-16/6a744db8-814f-4e8e-b6e5-0d096267dfa5.json b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-7-over-16/6a744db8-814f-4e8e-b6e5-0d096267dfa5.json new file mode 100644 index 000000000..7819c9d49 --- /dev/null +++ b/data/hfopenllm_v2/awnr/Mistral-7B-v0.1-signtensors-7-over-16/6a744db8-814f-4e8e-b6e5-0d096267dfa5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-7-over-16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.1-signtensors-7-over-16", + "id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", + "developer": "awnr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2294 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aws-prototyping/MegaBeam-Mistral-7B-512k/028b7c37-770e-4356-a7c6-0cc74650d5fd.json b/data/hfopenllm_v2/aws-prototyping/MegaBeam-Mistral-7B-512k/028b7c37-770e-4356-a7c6-0cc74650d5fd.json new file mode 100644 index 000000000..8bff123e1 --- /dev/null +++ b/data/hfopenllm_v2/aws-prototyping/MegaBeam-Mistral-7B-512k/028b7c37-770e-4356-a7c6-0cc74650d5fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/aws-prototyping_MegaBeam-Mistral-7B-512k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MegaBeam-Mistral-7B-512k", + "id": "aws-prototyping/MegaBeam-Mistral-7B-512k", + "developer": "aws-prototyping", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5973 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3b399c64-922a-48ba-9a25-862102749647.json b/data/hfopenllm_v2/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3b399c64-922a-48ba-9a25-862102749647.json new file mode 100644 index 000000000..fa7e408a7 --- /dev/null +++ b/data/hfopenllm_v2/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3b399c64-922a-48ba-9a25-862102749647.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "romulus-mistral-nemo-12b-simpo", + "id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", + "developer": "axolotl-ai-co", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6079 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3469 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json deleted file mode 100644 index be7c1ae28..000000000 --- a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.0/1762652580.0222468", - "retrieved_timestamp": "1762652580.022248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "baconnier/Napoleon_24B_V0.0", - "developer": "baconnier", - "inference_platform": "unknown", - "id": "baconnier/Napoleon_24B_V0.0", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1801021290176731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6367110843973786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5039893617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/d5e46a11-3e81-457d-9d26-9fd17f96f076.json b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/d5e46a11-3e81-457d-9d26-9fd17f96f076.json new file mode 100644 index 000000000..e7ee5d6e9 --- /dev/null +++ b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/d5e46a11-3e81-457d-9d26-9fd17f96f076.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Napoleon_24B_V0.0", + "id": "baconnier/Napoleon_24B_V0.0", + "developer": "baconnier", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1801 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6367 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.442 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json deleted file mode 100644 index 059a68d24..000000000 --- a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.2/1762652580.022489", - "retrieved_timestamp": "1762652580.022489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "baconnier/Napoleon_24B_V0.2", - "developer": "baconnier", - "inference_platform": "unknown", - "id": "baconnier/Napoleon_24B_V0.2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2527172347150006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5910621269874454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/b3abfbc1-911a-43b7-a338-efb25f746f9d.json b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/b3abfbc1-911a-43b7-a338-efb25f746f9d.json new file mode 100644 index 000000000..d0b53963a --- /dev/null +++ b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/b3abfbc1-911a-43b7-a338-efb25f746f9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Napoleon_24B_V0.2", + "id": "baconnier/Napoleon_24B_V0.2", + "developer": "baconnier", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5911 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json b/data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json deleted file mode 100644 index 4de8d2d80..000000000 --- a/data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/baebee_7B-Cetacea/1762652580.022699", - "retrieved_timestamp": "1762652580.022699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "baebee/7B-Cetacea", - "developer": "baebee", - "inference_platform": "unknown", - "id": "baebee/7B-Cetacea", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278660620486975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757171853895546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/baebee/7B-Cetacea/6b471ee0-9444-45ff-92cf-da624aa59bf6.json b/data/hfopenllm_v2/baebee/7B-Cetacea/6b471ee0-9444-45ff-92cf-da624aa59bf6.json new file mode 100644 index 000000000..a0e865f81 --- /dev/null +++ b/data/hfopenllm_v2/baebee/7B-Cetacea/6b471ee0-9444-45ff-92cf-da624aa59bf6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/baebee_7B-Cetacea/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "7B-Cetacea", + "id": "baebee/7B-Cetacea", + "developer": "baebee", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2955 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/b56bd924-0a63-4ca2-8f2f-97b581e47a36.json b/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/b56bd924-0a63-4ca2-8f2f-97b581e47a36.json new file mode 100644 index 000000000..bee951289 --- /dev/null +++ b/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/b56bd924-0a63-4ca2-8f2f-97b581e47a36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/baebee_mergekit-model_stock-nzjnheg/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-model_stock-nzjnheg", + "id": "baebee/mergekit-model_stock-nzjnheg", + "developer": "baebee", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4844 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1677 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json b/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json deleted file mode 100644 index 2f1148826..000000000 --- a/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/baebee_mergekit-model_stock-nzjnheg/1762652580.022936", - "retrieved_timestamp": "1762652580.022937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "baebee/mergekit-model_stock-nzjnheg", - "developer": "baebee", - "inference_platform": "unknown", - "id": "baebee/mergekit-model_stock-nzjnheg", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48442687624392167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287391310729729 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38466666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699301861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json b/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json deleted file mode 100644 index a2d6ddb5b..000000000 --- a/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/baebee_mergekit-ties-fnjenli/1762652580.0231512", - "retrieved_timestamp": "1762652580.023152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "baebee/mergekit-ties-fnjenli", - "developer": "baebee", - "inference_platform": "unknown", - "id": "baebee/mergekit-ties-fnjenli", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19881248420856662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30236959112076134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.002265861027190332 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/bfe9098d-7207-4f8c-9a3f-549a29303b5f.json b/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/bfe9098d-7207-4f8c-9a3f-549a29303b5f.json new file mode 100644 index 000000000..73adf9632 --- /dev/null +++ b/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/bfe9098d-7207-4f8c-9a3f-549a29303b5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/baebee_mergekit-ties-fnjenli/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-ties-fnjenli", + "id": "baebee/mergekit-ties-fnjenli", + "developer": "baebee", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1988 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/7856172d-ec3e-4e71-befe-54952478e330.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/7856172d-ec3e-4e71-befe-54952478e330.json new file mode 100644 index 000000000..8956821d3 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/7856172d-ec3e-4e71-befe-54952478e330.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_0.1v", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5436 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4132 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json deleted file mode 100644 index a2c6ce817..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/1762652580.023659", - "retrieved_timestamp": "1762652580.023659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36362628935668473 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5436022524587655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41315624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673537234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/a68aada5-61bd-4a4c-a8e1-b9a2ace349df.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/a68aada5-61bd-4a4c-a8e1-b9a2ace349df.json new file mode 100644 index 000000000..4b040ddba --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/a68aada5-61bd-4a4c-a8e1-b9a2ace349df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_0.2v", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5434 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json deleted file mode 100644 index 7e5bf547c..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/1762652580.023869", - "retrieved_timestamp": "1762652580.02387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3623773809048879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5434355857920987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36627327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json deleted file mode 100644 index 4d5d92b71..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/1762652580.02432", - "retrieved_timestamp": "1762652580.024322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38698209639312575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431389316665282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/9d19c44f-4912-4c95-ab3f-2dddb055d932.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/9d19c44f-4912-4c95-ab3f-2dddb055d932.json new file mode 100644 index 000000000..f38472040 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/9d19c44f-4912-4c95-ab3f-2dddb055d932.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_0.3v", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.387 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json deleted file mode 100644 index fdc51cec9..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/1762652580.024673", - "retrieved_timestamp": "1762652580.024674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6508142838778884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5094241395384186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41762499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/6cef3550-27d7-4073-b4bb-0f19a2c5f553.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/6cef3550-27d7-4073-b4bb-0f19a2c5f553.json new file mode 100644 index 000000000..ac3a04ead --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/6cef3550-27d7-4073-b4bb-0f19a2c5f553.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_0.4v", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4176 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/08ab8f6a-9aaf-4ab4-ada3-eb4a75f46995.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/08ab8f6a-9aaf-4ab4-ada3-eb4a75f46995.json new file mode 100644 index 000000000..95a75b4b9 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/08ab8f6a-9aaf-4ab4-ada3-eb4a75f46995.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_0.5v", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3746 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4132 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json deleted file mode 100644 index 7a34fb116..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/1762652580.0249128", - "retrieved_timestamp": "1762652580.024914", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3745672593163916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421932988679541 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41315624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36610704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/622f9379-6a30-43ba-a7a8-fbd08c484fa5.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/622f9379-6a30-43ba-a7a8-fbd08c484fa5.json new file mode 100644 index 000000000..a048d396f --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/622f9379-6a30-43ba-a7a8-fbd08c484fa5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_0.6v", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json deleted file mode 100644 index 2930035dd..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/1762652580.025138", - "retrieved_timestamp": "1762652580.0251389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43656608908806416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5448909065942131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3661901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/24f728e6-de5e-44cc-8b6d-51e0065c1475.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/24f728e6-de5e-44cc-8b6d-51e0065c1475.json new file mode 100644 index 000000000..5db8c43d5 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/24f728e6-de5e-44cc-8b6d-51e0065c1475.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_III_IV_V", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json deleted file mode 100644 index e38a97a8a..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/1762652580.0253649", - "retrieved_timestamp": "1762652580.025366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40309379114083965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.54645347832278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41982291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json deleted file mode 100644 index 42330d3f9..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/1762652580.025593", - "retrieved_timestamp": "1762652580.025593", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43162032296528763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5448926891254073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648603723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/c3b2bf18-d355-40fc-a862-376c1b988305.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/c3b2bf18-d355-40fc-a862-376c1b988305.json new file mode 100644 index 000000000..33f7812de --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/c3b2bf18-d355-40fc-a862-376c1b988305.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_III_ex_V", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json deleted file mode 100644 index bc9d41009..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_Neo/1762652580.0258071", - "retrieved_timestamp": "1762652580.0258079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6249606599378538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077574728717519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41502083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36851728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/79474be5-2587-4087-a2cc-1337e3b696dd.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/79474be5-2587-4087-a2cc-1337e3b696dd.json new file mode 100644 index 000000000..ab7a30fec --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/79474be5-2587-4087-a2cc-1337e3b696dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_Neo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B-Mix_Neo", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.625 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5078 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/22ff2700-70c0-459e-96a2-0ce1710947bc.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/22ff2700-70c0-459e-96a2-0ce1710947bc.json new file mode 100644 index 000000000..2f6482099 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/22ff2700-70c0-459e-96a2-0ce1710947bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MISCHIEVOUS-12B", + "id": "bamec66557/MISCHIEVOUS-12B", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json deleted file mode 100644 index e567c456a..000000000 --- a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B/1762652580.02337", - "retrieved_timestamp": "1762652580.02337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851835352420466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404981575206657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671875 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/7d3a47a3-83d3-4f51-ab72-6a2fa5b5ef80.json b/data/hfopenllm_v2/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/7d3a47a3-83d3-4f51-ab72-6a2fa5b5ef80.json new file mode 100644 index 000000000..d39a7d3db --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/7d3a47a3-83d3-4f51-ab72-6a2fa5b5ef80.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-VICIOUS_MESH-12B-2407", + "id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6706 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5156 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3677 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/69dc0f8e-16d7-4907-9741-484eafa62b8c.json b/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/69dc0f8e-16d7-4907-9741-484eafa62b8c.json new file mode 100644 index 000000000..e2704ecca --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/69dc0f8e-16d7-4907-9741-484eafa62b8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_NameLess-12B-prob/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NameLess-12B-prob", + "id": "bamec66557/NameLess-12B-prob", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6602 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json b/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json deleted file mode 100644 index 759931c01..000000000 --- a/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_NameLess-12B-prob/1762652580.026292", - "retrieved_timestamp": "1762652580.026293", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/NameLess-12B-prob", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/NameLess-12B-prob", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6602315190361574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5158141019151304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json deleted file mode 100644 index 328e35f98..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.1v/1762652580.026718", - "retrieved_timestamp": "1762652580.026719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-0.1v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-0.1v", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36574954454181574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412276004529172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/e516abc1-9c3c-4921-a385-e2533d45fed3.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/e516abc1-9c3c-4921-a385-e2533d45fed3.json new file mode 100644 index 000000000..6ba84e23e --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/e516abc1-9c3c-4921-a385-e2533d45fed3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.1v/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-0.1v", + "id": "bamec66557/VICIOUS_MESH-12B-0.1v", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3657 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/8baa5832-cc07-4a31-a815-0e8151426ea6.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/8baa5832-cc07-4a31-a815-0e8151426ea6.json new file mode 100644 index 000000000..cd846c308 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/8baa5832-cc07-4a31-a815-0e8151426ea6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.X.ver/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-0.X.ver", + "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json deleted file mode 100644 index f919d6ab3..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.X.ver/1762652580.0269299", - "retrieved_timestamp": "1762652580.0269299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-0.X.ver", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37756486123485683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.541624689936422 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41982291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36710438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json deleted file mode 100644 index 024c98055..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-ALPHA/1762652580.0271401", - "retrieved_timestamp": "1762652580.027141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-ALPHA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-ALPHA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6365011502812536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093679898057982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/509fbca4-f405-4c27-85a9-1eea59025070.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/509fbca4-f405-4c27-85a9-1eea59025070.json new file mode 100644 index 000000000..98b8dfc2e --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/509fbca4-f405-4c27-85a9-1eea59025070.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-ALPHA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-ALPHA", + "id": "bamec66557/VICIOUS_MESH-12B-ALPHA", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6365 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json deleted file mode 100644 index ce5b0bb48..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-BETA/1762652580.0273511", - "retrieved_timestamp": "1762652580.0273511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-BETA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-BETA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720967034136092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155964285724085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/6f45ed56-6bec-4439-9adb-e79fcd74667c.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/6f45ed56-6bec-4439-9adb-e79fcd74667c.json new file mode 100644 index 000000000..5e63ad14b --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/6f45ed56-6bec-4439-9adb-e79fcd74667c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-BETA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-BETA", + "id": "bamec66557/VICIOUS_MESH-12B-BETA", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5156 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/512ff924-c1d3-4d75-a468-2bcdcda25cf6.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/512ff924-c1d3-4d75-a468-2bcdcda25cf6.json new file mode 100644 index 000000000..d55e617b1 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/512ff924-c1d3-4d75-a468-2bcdcda25cf6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DELTA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-DELTA", + "id": "bamec66557/VICIOUS_MESH-12B-DELTA", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5055 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4057 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json deleted file mode 100644 index 85735df3c..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DELTA/1762652580.027563", - "retrieved_timestamp": "1762652580.027563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-DELTA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-DELTA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6468924675416783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5055418480543742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json deleted file mode 100644 index 90ed5f93c..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DIGAMMA/1762652580.027769", - "retrieved_timestamp": "1762652580.02777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6429207835210575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506116784464076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36585771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/86b561ae-c4d3-4293-a884-bcab26df026d.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/86b561ae-c4d3-4293-a884-bcab26df026d.json new file mode 100644 index 000000000..33dd088e9 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/86b561ae-c4d3-4293-a884-bcab26df026d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DIGAMMA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-DIGAMMA", + "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6429 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5061 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4097 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3659 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json deleted file mode 100644 index 2f8d48c45..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-EPSILON/1762652580.0279832", - "retrieved_timestamp": "1762652580.0279832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-EPSILON", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-EPSILON", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304560787599126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5037995611302296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4069895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36477726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/516d1972-9731-4234-a4b3-b96423ebba5c.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/516d1972-9731-4234-a4b3-b96423ebba5c.json new file mode 100644 index 000000000..e7811c1a1 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/516d1972-9731-4234-a4b3-b96423ebba5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-EPSILON/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-EPSILON", + "id": "bamec66557/VICIOUS_MESH-12B-EPSILON", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6305 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5038 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/274f6e02-c81f-4f2e-9747-e5de5cee1933.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/274f6e02-c81f-4f2e-9747-e5de5cee1933.json new file mode 100644 index 000000000..1e3744cbe --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/274f6e02-c81f-4f2e-9747-e5de5cee1933.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-GAMMA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-GAMMA", + "id": "bamec66557/VICIOUS_MESH-12B-GAMMA", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6362 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json deleted file mode 100644 index d5782039e..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-GAMMA/1762652580.028181", - "retrieved_timestamp": "1762652580.028182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-GAMMA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-GAMMA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6361764562472019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181908355069679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43632291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/61638b55-296b-40fd-a39f-cc2276d9f94a.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/61638b55-296b-40fd-a39f-cc2276d9f94a.json new file mode 100644 index 000000000..07670d514 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/61638b55-296b-40fd-a39f-cc2276d9f94a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-NEMO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-NEMO", + "id": "bamec66557/VICIOUS_MESH-12B-NEMO", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5442 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json deleted file mode 100644 index 0600e8193..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-NEMO/1762652580.028384", - "retrieved_timestamp": "1762652580.028385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-NEMO", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-NEMO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40221944440750546 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441680901949261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/11c1b6fe-4815-415b-a4a8-d14073df6ee1.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/11c1b6fe-4815-415b-a4a8-d14073df6ee1.json new file mode 100644 index 000000000..6ef877a1b --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/11c1b6fe-4815-415b-a4a8-d14073df6ee1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-OMEGA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-OMEGA", + "id": "bamec66557/VICIOUS_MESH-12B-OMEGA", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.67 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3677 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json deleted file mode 100644 index ce1decaec..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-OMEGA/1762652580.028594", - "retrieved_timestamp": "1762652580.028594", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-OMEGA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-OMEGA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6699734482284783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516644373777888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36768617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json deleted file mode 100644 index 204fcf307..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-UNION/1762652580.028806", - "retrieved_timestamp": "1762652580.028807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-UNION", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-UNION", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6428709158366468 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5106643448765741 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4256875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671875 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/88e2cb24-288e-4f37-8753-f0daa825051c.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/88e2cb24-288e-4f37-8753-f0daa825051c.json new file mode 100644 index 000000000..149e4a3b1 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/88e2cb24-288e-4f37-8753-f0daa825051c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-UNION/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B-UNION", + "id": "bamec66557/VICIOUS_MESH-12B-UNION", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6429 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4257 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/8a1a6c44-17fd-402e-a22e-e795a1f612e3.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/8a1a6c44-17fd-402e-a22e-e795a1f612e3.json new file mode 100644 index 000000000..d0b02080e --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/8a1a6c44-17fd-402e-a22e-e795a1f612e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B", + "id": "bamec66557/VICIOUS_MESH-12B", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5436 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json deleted file mode 100644 index 3bc9f9400..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B/1762652580.026504", - "retrieved_timestamp": "1762652580.026504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37156965739792636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5436022524587655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/1121af0b-61fe-424a-bc66-3164bcb1d833.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/1121af0b-61fe-424a-bc66-3164bcb1d833.json new file mode 100644 index 000000000..c043f1c67 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/1121af0b-61fe-424a-bc66-3164bcb1d833.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B_Razor/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VICIOUS_MESH-12B_Razor", + "id": "bamec66557/VICIOUS_MESH-12B_Razor", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json deleted file mode 100644 index 50722d1b7..000000000 --- a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B_Razor/1762652580.029016", - "retrieved_timestamp": "1762652580.029016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B_Razor", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B_Razor", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37364304489864675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447127693928118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40915624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/35300d67-7ee1-4874-b351-87f46267cec9.json b/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/35300d67-7ee1-4874-b351-87f46267cec9.json new file mode 100644 index 000000000..ddc69cf5a --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/35300d67-7ee1-4874-b351-87f46267cec9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-model_stock-zdaysvi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-model_stock-zdaysvi", + "id": "bamec66557/mergekit-model_stock-zdaysvi", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6426 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5063 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json b/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json deleted file mode 100644 index fa58397d8..000000000 --- a/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-model_stock-zdaysvi/1762652580.029272", - "retrieved_timestamp": "1762652580.029272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/mergekit-model_stock-zdaysvi", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/mergekit-model_stock-zdaysvi", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6425960894870055 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5062803896601668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41238541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36884973404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/6180b7b3-4b21-42aa-a62d-084a91568b43.json b/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/6180b7b3-4b21-42aa-a62d-084a91568b43.json new file mode 100644 index 000000000..f59ec5045 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/6180b7b3-4b21-42aa-a62d-084a91568b43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-ties-sinbkow/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-ties-sinbkow", + "id": "bamec66557/mergekit-ties-sinbkow", + "developer": "bamec66557", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6432 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json b/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json deleted file mode 100644 index 43b45d83a..000000000 --- a/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-ties-sinbkow/1762652580.029482", - "retrieved_timestamp": "1762652580.029482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/mergekit-ties-sinbkow", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/mergekit-ties-sinbkow", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6431956098706986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092084289828543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36028922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/belztjti/dffghgjh/7414d344-0e67-424a-9e16-00de0487ce02.json b/data/hfopenllm_v2/belztjti/dffghgjh/7414d344-0e67-424a-9e16-00de0487ce02.json new file mode 100644 index 000000000..007ff11a3 --- /dev/null +++ b/data/hfopenllm_v2/belztjti/dffghgjh/7414d344-0e67-424a-9e16-00de0487ce02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/belztjti_dffghgjh/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dffghgjh", + "id": "belztjti/dffghgjh", + "developer": "belztjti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5784 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json b/data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json deleted file mode 100644 index afd242bf2..000000000 --- a/data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/belztjti_dffghgjh/1762652580.0296938", - "retrieved_timestamp": "1762652580.029695", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "belztjti/dffghgjh", - "developer": "belztjti", - "inference_platform": "unknown", - "id": "belztjti/dffghgjh", - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5784241368457914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35817085768640783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3421708776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json b/data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json deleted file mode 100644 index 2ee88fd49..000000000 --- a/data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/belztjti_dtfgv/1762652580.029931", - "retrieved_timestamp": "1762652580.029932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "belztjti/dtfgv", - "developer": "belztjti", - "inference_platform": "unknown", - "id": "belztjti/dtfgv", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 9.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334450369464133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32815316667476035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15043218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/belztjti/dtfgv/f5fcd407-080c-4cb7-a299-7a7f919c734d.json b/data/hfopenllm_v2/belztjti/dtfgv/f5fcd407-080c-4cb7-a299-7a7f919c734d.json new file mode 100644 index 000000000..05d2c4ee6 --- /dev/null +++ b/data/hfopenllm_v2/belztjti/dtfgv/f5fcd407-080c-4cb7-a299-7a7f919c734d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/belztjti_dtfgv/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dtfgv", + "id": "belztjti/dtfgv", + "developer": "belztjti", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 9.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/benhaotang/phi4-qwq-sky-t1/efe03731-6021-4dcf-b7fe-24cbf2d60fac.json b/data/hfopenllm_v2/benhaotang/phi4-qwq-sky-t1/efe03731-6021-4dcf-b7fe-24cbf2d60fac.json new file mode 100644 index 000000000..4a2fac32d --- /dev/null +++ b/data/hfopenllm_v2/benhaotang/phi4-qwq-sky-t1/efe03731-6021-4dcf-b7fe-24cbf2d60fac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/benhaotang_phi4-qwq-sky-t1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi4-qwq-sky-t1", + "id": "benhaotang/phi4-qwq-sky-t1", + "developer": "benhaotang", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6711 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.49 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/beomi/gemma-mling-7b/6ffed624-cc22-4b62-a447-3c02b0e43ded.json b/data/hfopenllm_v2/beomi/gemma-mling-7b/6ffed624-cc22-4b62-a447-3c02b0e43ded.json new file mode 100644 index 000000000..dda19f01b --- /dev/null +++ b/data/hfopenllm_v2/beomi/gemma-mling-7b/6ffed624-cc22-4b62-a447-3c02b0e43ded.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/beomi_gemma-mling-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-mling-7b", + "id": "beomi/gemma-mling-7b", + "developer": "beomi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3759 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2633 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/ed867fa8-be8a-49b0-8c94-38085808b58b.json b/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/ed867fa8-be8a-49b0-8c94-38085808b58b.json new file mode 100644 index 000000000..4c755301d --- /dev/null +++ b/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/ed867fa8-be8a-49b0-8c94-38085808b58b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/beowolx_CodeNinja-1.0-OpenChat-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeNinja-1.0-OpenChat-7B", + "id": "beowolx/CodeNinja-1.0-OpenChat-7B", + "developer": "beowolx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3015 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json b/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json deleted file mode 100644 index 52408be9c..000000000 --- a/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/beowolx_CodeNinja-1.0-OpenChat-7B/1762652580.030703", - "retrieved_timestamp": "1762652580.030704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "beowolx/CodeNinja-1.0-OpenChat-7B", - "developer": "beowolx", - "inference_platform": "unknown", - "id": "beowolx/CodeNinja-1.0-OpenChat-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446770125489258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441338669403703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3015292553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/c8b9a56b-0933-4085-8d5f-a1d8294699db.json b/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/c8b9a56b-0933-4085-8d5f-a1d8294699db.json new file mode 100644 index 000000000..59ff6b3cf --- /dev/null +++ b/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/c8b9a56b-0933-4085-8d5f-a1d8294699db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/berkeley-nest_Starling-LM-7B-alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Starling-LM-7B-alpha", + "id": "berkeley-nest/Starling-LM-7B-alpha", + "developer": "berkeley-nest", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.444 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json b/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json deleted file mode 100644 index bb3950e98..000000000 --- a/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/berkeley-nest_Starling-LM-7B-alpha/1762652580.030957", - "retrieved_timestamp": "1762652580.0309582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "berkeley-nest/Starling-LM-7B-alpha", - "developer": "berkeley-nest", - "inference_platform": "unknown", - "id": "berkeley-nest/Starling-LM-7B-alpha", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480491761858536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440065261164004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41201041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171542553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/Gunny/9b178661-ed9a-427d-b93c-b905b8089ad8.json b/data/hfopenllm_v2/bfuzzy1/Gunny/9b178661-ed9a-427d-b93c-b905b8089ad8.json new file mode 100644 index 000000000..c2995af50 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/Gunny/9b178661-ed9a-427d-b93c-b905b8089ad8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_Gunny/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gunny", + "id": "bfuzzy1/Gunny", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7129 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3583 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3039 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json b/data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json deleted file mode 100644 index 2b7d1c92c..000000000 --- a/data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_Gunny/1762652580.031208", - "retrieved_timestamp": "1762652580.031209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/Gunny", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/Gunny", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7128629813339716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45459857092962414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35828124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-c/69588e07-7559-49c2-9423-19fd143e42f7.json b/data/hfopenllm_v2/bfuzzy1/acheron-c/69588e07-7559-49c2-9423-19fd143e42f7.json new file mode 100644 index 000000000..ca1f71360 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-c/69588e07-7559-49c2-9423-19fd143e42f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-c/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "acheron-c", + "id": "bfuzzy1/acheron-c", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1929 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json b/data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json deleted file mode 100644 index bbc823413..000000000 --- a/data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-c/1762652580.031654", - "retrieved_timestamp": "1762652580.0316548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/acheron-c", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-c", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19286714805604685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30260703404313577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1171875 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json b/data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json deleted file mode 100644 index 9e087ae36..000000000 --- a/data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-d/1762652580.031856", - "retrieved_timestamp": "1762652580.031857", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/acheron-d", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-d", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.192542454021995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139959864926003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11344747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-d/317589da-d673-4f90-93e9-59983f2ef54b.json b/data/hfopenllm_v2/bfuzzy1/acheron-d/317589da-d673-4f90-93e9-59983f2ef54b.json new file mode 100644 index 000000000..e74d10763 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-d/317589da-d673-4f90-93e9-59983f2ef54b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-d/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "acheron-d", + "id": "bfuzzy1/acheron-d", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-m/efab322e-ea15-4fe7-9bfc-15246003e59c.json b/data/hfopenllm_v2/bfuzzy1/acheron-m/efab322e-ea15-4fe7-9bfc-15246003e59c.json new file mode 100644 index 000000000..21c3ead2f --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-m/efab322e-ea15-4fe7-9bfc-15246003e59c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "acheron-m", + "id": "bfuzzy1/acheron-m", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1758 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json b/data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json deleted file mode 100644 index 6ab1a43f2..000000000 --- a/data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m/1762652580.032056", - "retrieved_timestamp": "1762652580.032057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/acheron-m", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-m", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17583123889058808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29284447696551025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-m1a-llama/b1eac68e-b292-414b-9594-c921f8e10818.json b/data/hfopenllm_v2/bfuzzy1/acheron-m1a-llama/b1eac68e-b292-414b-9594-c921f8e10818.json new file mode 100644 index 000000000..a0d0cda28 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-m1a-llama/b1eac68e-b292-414b-9594-c921f8e10818.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m1a-llama/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "acheron-m1a-llama", + "id": "bfuzzy1/acheron-m1a-llama", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2956 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1146 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json b/data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json deleted file mode 100644 index 5fa882098..000000000 --- a/data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron/1762652580.031447", - "retrieved_timestamp": "1762652580.031447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/acheron", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19831269919369493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107918622526179 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3510520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10962433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/acheron/b7d08c65-8219-4067-9504-99e438a86038.json b/data/hfopenllm_v2/bfuzzy1/acheron/b7d08c65-8219-4067-9504-99e438a86038.json new file mode 100644 index 000000000..599c0add4 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron/b7d08c65-8219-4067-9504-99e438a86038.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "acheron", + "id": "bfuzzy1/acheron", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1983 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3108 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json b/data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json deleted file mode 100644 index b4bdfb79f..000000000 --- a/data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_llambses-1/1762652580.032492", - "retrieved_timestamp": "1762652580.032493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/llambses-1", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/llambses-1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553837152089788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046977405175623 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45290625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31399601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/bfuzzy1/llambses-1/e9c5b479-0dce-4de3-84d6-90c7515337f1.json b/data/hfopenllm_v2/bfuzzy1/llambses-1/e9c5b479-0dce-4de3-84d6-90c7515337f1.json new file mode 100644 index 000000000..f8f06d954 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/llambses-1/e9c5b479-0dce-4de3-84d6-90c7515337f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bfuzzy1_llambses-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llambses-1", + "id": "bfuzzy1/llambses-1", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3554 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bhuvneshsaini/merged_model/3c766465-29db-4b3d-b42f-a3222b38a096.json b/data/hfopenllm_v2/bhuvneshsaini/merged_model/3c766465-29db-4b3d-b42f-a3222b38a096.json new file mode 100644 index 000000000..477afd11a --- /dev/null +++ b/data/hfopenllm_v2/bhuvneshsaini/merged_model/3c766465-29db-4b3d-b42f-a3222b38a096.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bhuvneshsaini_merged_model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merged_model", + "id": "bhuvneshsaini/merged_model", + "developer": "bhuvneshsaini", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.715 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.336 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1445 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json b/data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json deleted file mode 100644 index fa51f7bcc..000000000 --- a/data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bhuvneshsaini_merged_model/1762652580.032705", - "retrieved_timestamp": "1762652580.032706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bhuvneshsaini/merged_model", - "developer": "bhuvneshsaini", - "inference_platform": "unknown", - "id": "bhuvneshsaini/merged_model", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.715 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1812767900282362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3359777949071243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14453125 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json b/data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json deleted file mode 100644 index 1a8d5448b..000000000 --- a/data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-15b/1762652580.032956", - "retrieved_timestamp": "1762652580.0329568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigcode/starcoder2-15b", - "developer": "bigcode", - "inference_platform": "unknown", - "id": "bigcode/starcoder2-15b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 15.958 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2780223141265177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4447957841230437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23528922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigcode/starcoder2-15b/e6c85677-61ed-475b-85a5-48b91ec76bcf.json b/data/hfopenllm_v2/bigcode/starcoder2-15b/e6c85677-61ed-475b-85a5-48b91ec76bcf.json new file mode 100644 index 000000000..9a0162bcf --- /dev/null +++ b/data/hfopenllm_v2/bigcode/starcoder2-15b/e6c85677-61ed-475b-85a5-48b91ec76bcf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-15b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "starcoder2-15b", + "id": "bigcode/starcoder2-15b", + "developer": "bigcode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 15.958 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.278 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4448 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json b/data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json deleted file mode 100644 index 690c3f5b4..000000000 --- a/data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-3b/1762652580.0331972", - "retrieved_timestamp": "1762652580.0331972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigcode/starcoder2-3b", - "developer": "bigcode", - "inference_platform": "unknown", - "id": "bigcode/starcoder2-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 3.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20370838264693236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35087141384601755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34345833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1636469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigcode/starcoder2-3b/7b68fa5e-dbbf-4542-8767-6874aabf8f40.json b/data/hfopenllm_v2/bigcode/starcoder2-3b/7b68fa5e-dbbf-4542-8767-6874aabf8f40.json new file mode 100644 index 000000000..4f9f33d8f --- /dev/null +++ b/data/hfopenllm_v2/bigcode/starcoder2-3b/7b68fa5e-dbbf-4542-8767-6874aabf8f40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "starcoder2-3b", + "id": "bigcode/starcoder2-3b", + "developer": "bigcode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 3.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2037 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3435 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1636 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json b/data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json deleted file mode 100644 index 45ace56c1..000000000 --- a/data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-7b/1762652580.0333922", - "retrieved_timestamp": "1762652580.0333922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigcode/starcoder2-7b", - "developer": "bigcode", - "inference_platform": "unknown", - "id": "bigcode/starcoder2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 7.174 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22091938279321088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36609857669123036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16422872340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigcode/starcoder2-7b/c103b7f4-a432-42d6-86ef-cb369e0c16ff.json b/data/hfopenllm_v2/bigcode/starcoder2-7b/c103b7f4-a432-42d6-86ef-cb369e0c16ff.json new file mode 100644 index 000000000..ba7f43161 --- /dev/null +++ b/data/hfopenllm_v2/bigcode/starcoder2-7b/c103b7f4-a432-42d6-86ef-cb369e0c16ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "starcoder2-7b", + "id": "bigcode/starcoder2-7b", + "developer": "bigcode", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 7.174 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2209 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1642 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json b/data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json deleted file mode 100644 index 9b2d5ef10..000000000 --- a/data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b1/1762652580.033589", - "retrieved_timestamp": "1762652580.033589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigscience/bloom-1b1", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-1b1", - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 1.065 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13733781920858879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31072762377370394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36999999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigscience/bloom-1b1/643dda41-37d0-4c1e-b856-58b774612886.json b/data/hfopenllm_v2/bigscience/bloom-1b1/643dda41-37d0-4c1e-b856-58b774612886.json new file mode 100644 index 000000000..02b4a4909 --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-1b1/643dda41-37d0-4c1e-b856-58b774612886.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bloom-1b1", + "id": "bigscience/bloom-1b1", + "developer": "bigscience", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 1.065 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1373 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json b/data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json deleted file mode 100644 index 8f38960eb..000000000 --- a/data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b7/1762652580.033839", - "retrieved_timestamp": "1762652580.033839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigscience/bloom-1b7", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-1b7", - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 1.722 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10438968603305895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.314054919904072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38857291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10862699468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigscience/bloom-1b7/ba2f284b-d7c6-4748-a8dc-4f80caa30c6c.json b/data/hfopenllm_v2/bigscience/bloom-1b7/ba2f284b-d7c6-4748-a8dc-4f80caa30c6c.json new file mode 100644 index 000000000..e53021d83 --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-1b7/ba2f284b-d7c6-4748-a8dc-4f80caa30c6c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bloom-1b7", + "id": "bigscience/bloom-1b7", + "developer": "bigscience", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 1.722 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1044 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1086 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-3b/16e30aa0-736a-4ef8-8ba6-78285b84546f.json b/data/hfopenllm_v2/bigscience/bloom-3b/16e30aa0-736a-4ef8-8ba6-78285b84546f.json new file mode 100644 index 000000000..0929bf77c --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-3b/16e30aa0-736a-4ef8-8ba6-78285b84546f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bloom-3b", + "id": "bigscience/bloom-3b", + "developer": "bigscience", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 3.003 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1271 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3063 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json b/data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json deleted file mode 100644 index d61e94fbd..000000000 --- a/data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-3b/1762652580.034177", - "retrieved_timestamp": "1762652580.034179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigscience/bloom-3b", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 3.003 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1270961050013963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062918592346337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigscience/bloom-560m/73eb729d-adfd-4dee-9bde-04a31f5528f6.json b/data/hfopenllm_v2/bigscience/bloom-560m/73eb729d-adfd-4dee-9bde-04a31f5528f6.json new file mode 100644 index 000000000..2294ec54f --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-560m/73eb729d-adfd-4dee-9bde-04a31f5528f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-560m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bloom-560m", + "id": "bigscience/bloom-560m", + "developer": "bigscience", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 0.559 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json b/data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json deleted file mode 100644 index 614921465..000000000 --- a/data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-560m/1762652580.034546", - "retrieved_timestamp": "1762652580.034548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigscience/bloom-560m", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-560m", - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 0.559 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06202431769926019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3025950541549823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4030833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/bigscience/bloom-7b1/0daad2ae-92d0-4522-a067-20332f72c96f.json b/data/hfopenllm_v2/bigscience/bloom-7b1/0daad2ae-92d0-4522-a067-20332f72c96f.json new file mode 100644 index 000000000..2cfad20ec --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-7b1/0daad2ae-92d0-4522-a067-20332f72c96f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-7b1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bloom-7b1", + "id": "bigscience/bloom-7b1", + "developer": "bigscience", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "BloomForCausalLM", + "params_billions": 7.069 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3114 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json b/data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json deleted file mode 100644 index 911215713..000000000 --- a/data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-7b1/1762652580.0348449", - "retrieved_timestamp": "1762652580.034846", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bigscience/bloom-7b1", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-7b1", - "additional_details": { - "precision": "float16", - "architecture": "BloomForCausalLM", - "params_billions": 7.069 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13221696210499254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3113718529627139 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11045545212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json b/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json deleted file mode 100644 index 7c1cc4853..000000000 --- a/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bluuwhale_L3-SthenoMaid-8B-V1/1762652580.035146", - "retrieved_timestamp": "1762652580.035147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bluuwhale/L3-SthenoMaid-8B-V1", - "developer": "bluuwhale", - "inference_platform": "unknown", - "id": "bluuwhale/L3-SthenoMaid-8B-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344700949037443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5218759253208048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3656083776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/a3e3849f-a289-4132-b4a8-f67d67ad46a1.json b/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/a3e3849f-a289-4132-b4a8-f67d67ad46a1.json new file mode 100644 index 000000000..bc1d74cae --- /dev/null +++ b/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/a3e3849f-a289-4132-b4a8-f67d67ad46a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bluuwhale_L3-SthenoMaid-8B-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-SthenoMaid-8B-V1", + "id": "bluuwhale/L3-SthenoMaid-8B-V1", + "developer": "bluuwhale", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json b/data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json deleted file mode 100644 index ee409f30c..000000000 --- a/data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bond005_meno-tiny-0.1/1762652580.035417", - "retrieved_timestamp": "1762652580.035417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bond005/meno-tiny-0.1", - "developer": "bond005", - "inference_platform": "unknown", - "id": "bond005/meno-tiny-0.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45497613000172876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262909130965971 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785904255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/bond005/meno-tiny-0.1/59a9ed26-a67a-4e76-8858-520400c90766.json b/data/hfopenllm_v2/bond005/meno-tiny-0.1/59a9ed26-a67a-4e76-8858-520400c90766.json new file mode 100644 index 000000000..f9ab2bb8c --- /dev/null +++ b/data/hfopenllm_v2/bond005/meno-tiny-0.1/59a9ed26-a67a-4e76-8858-520400c90766.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bond005_meno-tiny-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meno-tiny-0.1", + "id": "bond005/meno-tiny-0.1", + "developer": "bond005", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2786 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bosonai/Higgs-Llama-3-70B/6c5c61b4-8037-4b28-8616-1aefa7963eb8.json b/data/hfopenllm_v2/bosonai/Higgs-Llama-3-70B/6c5c61b4-8037-4b28-8616-1aefa7963eb8.json new file mode 100644 index 000000000..7f13e2ca9 --- /dev/null +++ b/data/hfopenllm_v2/bosonai/Higgs-Llama-3-70B/6c5c61b4-8037-4b28-8616-1aefa7963eb8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bosonai_Higgs-Llama-3-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Higgs-Llama-3-70B", + "id": "bosonai/Higgs-Llama-3-70B", + "developer": "bosonai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6258 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2523 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4471 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/e9f9b836-fbdf-4996-9b35-2c8145a7f01b.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/e9f9b836-fbdf-4996-9b35-2c8145a7f01b.json new file mode 100644 index 000000000..4c23d4552 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/e9f9b836-fbdf-4996-9b35-2c8145a7f01b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-1.5B-Blunt", + "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2611 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2774 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1382 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/5b3dae43-5d5c-4d19-bd47-5c0f68ecbb81.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/5b3dae43-5d5c-4d19-bd47-5c0f68ecbb81.json new file mode 100644 index 000000000..7fd796153 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/5b3dae43-5d5c-4d19-bd47-5c0f68ecbb81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-1.5B-Reflective", + "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/d5b31b1f-ace0-457f-bf8a-9041398b8344.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/d5b31b1f-ace0-457f-bf8a-9041398b8344.json new file mode 100644 index 000000000..71e6c0585 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/d5b31b1f-ace0-457f-bf8a-9041398b8344.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4927 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/b34702cf-ffb8-4e75-9c9b-f5c52623d4c8.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/b34702cf-ffb8-4e75-9c9b-f5c52623d4c8.json new file mode 100644 index 000000000..ce916802a --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/b34702cf-ffb8-4e75-9c9b-f5c52623d4c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.554 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3371 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2372 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4248 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/c701f1fd-166d-416b-8f78-edf17f2fecd4.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/c701f1fd-166d-416b-8f78-edf17f2fecd4.json new file mode 100644 index 000000000..d0206075b --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/c701f1fd-166d-416b-8f78-edf17f2fecd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5221 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1484 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/4217b403-e924-4f67-9b0e-ad1d4ed293a1.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/4217b403-e924-4f67-9b0e-ad1d4ed293a1.json new file mode 100644 index 000000000..7a63e7232 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/4217b403-e924-4f67-9b0e-ad1d4ed293a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5139 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1473 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1289 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/03816e41-5fb8-4815-ab9c-4108ab19a3bc.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/03816e41-5fb8-4815-ab9c-4108ab19a3bc.json new file mode 100644 index 000000000..93ba8529b --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/03816e41-5fb8-4815-ab9c-4108ab19a3bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.317 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/a763b10e-350a-4342-ade3-b782437ca3e2.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/a763b10e-350a-4342-ade3-b782437ca3e2.json new file mode 100644 index 000000000..511ffb178 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/a763b10e-350a-4342-ade3-b782437ca3e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5612 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3283 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/9e806fd2-edbf-40e2-a008-834cee537bb6.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/9e806fd2-edbf-40e2-a008-834cee537bb6.json new file mode 100644 index 000000000..c56a5b418 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/9e806fd2-edbf-40e2-a008-834cee537bb6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-Reflective", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B/fbcf861c-62db-4079-bba6-becd4e231216.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B/fbcf861c-62db-4079-bba6-becd4e231216.json new file mode 100644 index 000000000..cd51e1d40 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-14B/fbcf861c-62db-4079-bba6-becd4e231216.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/22b591c0-3386-4bd5-860c-20c0c6001986.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/22b591c0-3386-4bd5-860c-20c0c6001986.json new file mode 100644 index 000000000..e9582887e --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/22b591c0-3386-4bd5-860c-20c0c6001986.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B-Blunt", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2902 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/dfb9a9c4-114e-4188-9940-4d6df7e4815f.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/dfb9a9c4-114e-4188-9940-4d6df7e4815f.json new file mode 100644 index 000000000..b9fd0837f --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/dfb9a9c4-114e-4188-9940-4d6df7e4815f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3655 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2958 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1737 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/38fd5f4d-0f3c-4dc2-b250-a9ee7090aac2.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/38fd5f4d-0f3c-4dc2-b250-a9ee7090aac2.json new file mode 100644 index 000000000..dea2a0ad9 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/38fd5f4d-0f3c-4dc2-b250-a9ee7090aac2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B-Reflective", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2907 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1155 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B/e53cbc94-fc9f-4d53-ae28-26bc8c2caef8.json b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B/e53cbc94-fc9f-4d53-ae28-26bc8c2caef8.json new file mode 100644 index 000000000..256a50564 --- /dev/null +++ b/data/hfopenllm_v2/braindao/DeepSeek-R1-Distill-Qwen-7B/e53cbc94-fc9f-4d53-ae28-26bc8c2caef8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2887 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1141 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/2165e69a-c50c-419a-932e-909f53b73b71.json b/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/2165e69a-c50c-419a-932e-909f53b73b71.json new file mode 100644 index 000000000..5a58485ba --- /dev/null +++ b/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/2165e69a-c50c-419a-932e-909f53b73b71.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Instruct", + "id": "braindao/Qwen2.5-14B-Instruct", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8143 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4889 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json b/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json deleted file mode 100644 index 557b2d8ae..000000000 --- a/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B-Instruct/1762652580.040103", - "retrieved_timestamp": "1762652580.040104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/Qwen2.5-14B-Instruct", - "developer": "braindao", - "inference_platform": "unknown", - "id": "braindao/Qwen2.5-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8142539572778007 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6403640774008682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48894614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/braindao/Qwen2.5-14B/46430a07-15c8-4727-9102-2f471d4f1d3c.json b/data/hfopenllm_v2/braindao/Qwen2.5-14B/46430a07-15c8-4727-9102-2f471d4f1d3c.json new file mode 100644 index 000000000..fd68ca929 --- /dev/null +++ b/data/hfopenllm_v2/braindao/Qwen2.5-14B/46430a07-15c8-4727-9102-2f471d4f1d3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B", + "id": "braindao/Qwen2.5-14B", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4884 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/3c7f540a-c850-4e20-ad93-60e021d17133.json b/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/3c7f540a-c850-4e20-ad93-60e021d17133.json new file mode 100644 index 000000000..7ebae94fb --- /dev/null +++ b/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/3c7f540a-c850-4e20-ad93-60e021d17133.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/braindao_iq-code-evmind-0.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iq-code-evmind-0.5b", + "id": "braindao/iq-code-evmind-0.5b", + "developer": "braindao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3216 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3304 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1189 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json b/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json deleted file mode 100644 index 85f7d9c15..000000000 --- a/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/braindao_iq-code-evmind-0.5b/1762652580.0403671", - "retrieved_timestamp": "1762652580.040368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "braindao/iq-code-evmind-0.5b", - "developer": "braindao", - "inference_platform": "unknown", - "id": "braindao/iq-code-evmind-0.5b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215612353001148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31637440507987097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33037500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11893284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json b/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json deleted file mode 100644 index 1d649409e..000000000 --- a/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/1762652580.040573", - "retrieved_timestamp": "1762652580.0405738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32893057088525113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458008312900208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/c3ab4f38-6f7b-4589-ae4f-21ace05b8c44.json b/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/c3ab4f38-6f7b-4589-ae4f-21ace05b8c44.json new file mode 100644 index 000000000..e96867233 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/c3ab4f38-6f7b-4589-ae4f-21ace05b8c44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3Bgeneral-ECE-PRYMMAL-Martial", + "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5458 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3934 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/2708c0d6-03e7-4a17-b6b9-e16f3ddcf5bb.json b/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/2708c0d6-03e7-4a17-b6b9-e16f3ddcf5bb.json new file mode 100644 index 000000000..bebdea7ac --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/2708c0d6-03e7-4a17-b6b9-e16f3ddcf5bb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3Bgeneralv2-ECE-PRYMMAL-Martial", + "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5677 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json b/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json deleted file mode 100644 index c25eb0787..000000000 --- a/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/1762652580.040823", - "retrieved_timestamp": "1762652580.0408242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.567708125551315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5607195549186694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43563541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45054853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6427a5ef-8508-430d-970d-054fc485e754.json b/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6427a5ef-8508-430d-970d-054fc485e754.json new file mode 100644 index 000000000..27aa2a2d2 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6427a5ef-8508-430d-970d-054fc485e754.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/brgx53_3Blareneg-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3Blareneg-ECE-PRYMMAL-Martial", + "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2876 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5358 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json b/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json deleted file mode 100644 index 00b4210a5..000000000 --- a/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/brgx53_3Blareneg-ECE-PRYMMAL-Martial/1762652580.041033", - "retrieved_timestamp": "1762652580.041034", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28763902002242936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.535846215598753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015957446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/08984ad9-1e9b-4916-b214-af26dadfcc0b.json b/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/08984ad9-1e9b-4916-b214-af26dadfcc0b.json new file mode 100644 index 000000000..3114b7908 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/08984ad9-1e9b-4916-b214-af26dadfcc0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3Blarenegv2-ECE-PRYMMAL-Martial", + "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5662 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json b/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json deleted file mode 100644 index 4dabec33f..000000000 --- a/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/1762652580.04124", - "retrieved_timestamp": "1762652580.04124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5661843907498769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5607195549186694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43563541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45054853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/1dbb5d03-fdfa-4059-9d50-d037ada6b1ac.json b/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/1dbb5d03-fdfa-4059-9d50-d037ada6b1ac.json new file mode 100644 index 000000000..ee39d8a64 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/1dbb5d03-fdfa-4059-9d50-d037ada6b1ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/brgx53_Barracuda-PRYMMAL-ECE-TW3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Barracuda-PRYMMAL-ECE-TW3", + "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3", + "developer": "brgx53", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.164 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3002 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3609 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json b/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json deleted file mode 100644 index db2c17e79..000000000 --- a/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/brgx53_Barracuda-PRYMMAL-ECE-TW3/1762652580.041505", - "retrieved_timestamp": "1762652580.041506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "brgx53/Barracuda-PRYMMAL-ECE-TW3", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16401592219754696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30024599561514337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36085416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10929188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/6bf42faa-c3e9-4069-bf93-ffd626062f0f.json b/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/6bf42faa-c3e9-4069-bf93-ffd626062f0f.json new file mode 100644 index 000000000..f4004b73f --- /dev/null +++ b/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/6bf42faa-c3e9-4069-bf93-ffd626062f0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/brgx53_LaConfiance-PRYMMAL-ECE-TW3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LaConfiance-PRYMMAL-ECE-TW3", + "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", + "developer": "brgx53", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1146 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json b/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json deleted file mode 100644 index cc2649023..000000000 --- a/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/brgx53_LaConfiance-PRYMMAL-ECE-TW3/1762652580.041717", - "retrieved_timestamp": "1762652580.041717", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1579209829917951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29624186550380993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38457291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Best-Mix-Llama-3.1-8B/9feccbdc-18eb-4077-b50b-986db0047fc8.json b/data/hfopenllm_v2/bunnycore/Best-Mix-Llama-3.1-8B/9feccbdc-18eb-4077-b50b-986db0047fc8.json new file mode 100644 index 000000000..b5b81fbcc --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Best-Mix-Llama-3.1-8B/9feccbdc-18eb-4077-b50b-986db0047fc8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Best-Mix-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Best-Mix-Llama-3.1-8B", + "id": "bunnycore/Best-Mix-Llama-3.1-8B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2067 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3432 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2929 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json b/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json deleted file mode 100644 index 96708413b..000000000 --- a/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.0/1762652580.0421708", - "retrieved_timestamp": "1762652580.042172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Blabbertron-1.0", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Blabbertron-1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7433376773627309 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496552006589083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4336875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/a074c33f-782a-409c-987b-7dd62c65ccc7.json b/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/a074c33f-782a-409c-987b-7dd62c65ccc7.json new file mode 100644 index 000000000..313ecbf7a --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/a074c33f-782a-409c-987b-7dd62c65ccc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Blabbertron-1.0", + "id": "bunnycore/Blabbertron-1.0", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7433 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5497 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/2f2c0dea-dcd4-4e54-9f40-9fda4b91bd40.json b/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/2f2c0dea-dcd4-4e54-9f40-9fda4b91bd40.json new file mode 100644 index 000000000..47f457e44 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/2f2c0dea-dcd4-4e54-9f40-9fda4b91bd40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Blabbertron-1.1", + "id": "bunnycore/Blabbertron-1.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7265 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5534 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4804 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json b/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json deleted file mode 100644 index 7eed76238..000000000 --- a/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.1/1762652580.0424142", - "retrieved_timestamp": "1762652580.0424151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Blabbertron-1.1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Blabbertron-1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7265267268625026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534000697428705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44306848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/CyberCore-Qwen-2.1-7B/84481fee-3727-427b-912a-30e2744df28a.json b/data/hfopenllm_v2/bunnycore/CyberCore-Qwen-2.1-7B/84481fee-3727-427b-912a-30e2744df28a.json new file mode 100644 index 000000000..a37c47c92 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/CyberCore-Qwen-2.1-7B/84481fee-3727-427b-912a-30e2744df28a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_CyberCore-Qwen-2.1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CyberCore-Qwen-2.1-7B", + "id": "bunnycore/CyberCore-Qwen-2.1-7B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5766 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5572 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3588 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4445 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepQwen-3B-LCoT-SCE/aaa801dc-1a47-4009-9ad4-7129a8d4e651.json b/data/hfopenllm_v2/bunnycore/DeepQwen-3B-LCoT-SCE/aaa801dc-1a47-4009-9ad4-7129a8d4e651.json new file mode 100644 index 000000000..ee38b50d8 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/DeepQwen-3B-LCoT-SCE/aaa801dc-1a47-4009-9ad4-7129a8d4e651.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepQwen-3B-LCoT-SCE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepQwen-3B-LCoT-SCE", + "id": "bunnycore/DeepQwen-3B-LCoT-SCE", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.449 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.247 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/3ac92cbf-c85b-4e00-9ef9-4322f961591a.json b/data/hfopenllm_v2/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/3ac92cbf-c85b-4e00-9ef9-4322f961591a.json new file mode 100644 index 000000000..b208debb8 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/3ac92cbf-c85b-4e00-9ef9-4322f961591a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", + "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3494 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/162b511b-4684-4595-9261-a33f3a4117f9.json b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/162b511b-4684-4595-9261-a33f3a4117f9.json new file mode 100644 index 000000000..e66755432 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/162b511b-4684-4595-9261-a33f3a4117f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepThinker-7B-Sce-v1", + "id": "bunnycore/DeepThinker-7B-Sce-v1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1218 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json deleted file mode 100644 index 507c8d6c3..000000000 --- a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v1/1762652580.043317", - "retrieved_timestamp": "1762652580.043317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/DeepThinker-7B-Sce-v1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/DeepThinker-7B-Sce-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12180015691698028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30182806791122846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/20d5d59a-028d-4e34-9414-d9edaf2e59b8.json b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/20d5d59a-028d-4e34-9414-d9edaf2e59b8.json new file mode 100644 index 000000000..36edeced3 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/20d5d59a-028d-4e34-9414-d9edaf2e59b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepThinker-7B-Sce-v2", + "id": "bunnycore/DeepThinker-7B-Sce-v2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1146 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json deleted file mode 100644 index ce68b9f5b..000000000 --- a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v2/1762652580.0435221", - "retrieved_timestamp": "1762652580.043523", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/DeepThinker-7B-Sce-v2", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/DeepThinker-7B-Sce-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16306621985221434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056842322947901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/a21b53fb-783b-440b-9f3d-d8ada3bd18ea.json b/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/a21b53fb-783b-440b-9f3d-d8ada3bd18ea.json new file mode 100644 index 000000000..ae646dc09 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/a21b53fb-783b-440b-9f3d-d8ada3bd18ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseCyberMix-Qwen-2.5-7B-Instruct", + "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7019 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4841 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json b/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json deleted file mode 100644 index af43d6a7d..000000000 --- a/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/1762652580.043724", - "retrieved_timestamp": "1762652580.043725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7019220113742648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5517973725429837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43367686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json b/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json deleted file mode 100644 index 785ddb7cb..000000000 --- a/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_FuseQwQen-7B/1762652580.0439281", - "retrieved_timestamp": "1762652580.043929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/FuseQwQen-7B", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/FuseQwQen-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5504256932515404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43655589123867067 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4406582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/0d2ab1e8-a2d7-45cf-b123-67bcab2d9dff.json b/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/0d2ab1e8-a2d7-45cf-b123-67bcab2d9dff.json new file mode 100644 index 000000000..066c75efc --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/0d2ab1e8-a2d7-45cf-b123-67bcab2d9dff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_FuseQwQen-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseQwQen-7B", + "id": "bunnycore/FuseQwQen-7B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5504 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.1/6b4a37c8-c7e6-4156-9d6d-8cba51b74d82.json b/data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.1/6b4a37c8-c7e6-4156-9d6d-8cba51b74d82.json new file mode 100644 index 000000000..13a373a0b --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.1/6b4a37c8-c7e6-4156-9d6d-8cba51b74d82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FwF-Qwen-7B-0.1", + "id": "bunnycore/FwF-Qwen-7B-0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3005 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5019 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4061 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.2/78582fec-2f69-4b37-8497-12ceb097b44b.json b/data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.2/78582fec-2f69-4b37-8497-12ceb097b44b.json new file mode 100644 index 000000000..c94f58add --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/FwF-Qwen-7B-0.2/78582fec-2f69-4b37-8497-12ceb097b44b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FwF-Qwen-7B-0.2", + "id": "bunnycore/FwF-Qwen-7B-0.2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5596 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4218 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Gemma-2-2B-Smart/949bf65e-c2ae-4701-82f0-39d0c62a0e87.json b/data/hfopenllm_v2/bunnycore/Gemma-2-2B-Smart/949bf65e-c2ae-4701-82f0-39d0c62a0e87.json new file mode 100644 index 000000000..7d739968f --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Gemma-2-2B-Smart/949bf65e-c2ae-4701-82f0-39d0c62a0e87.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Gemma-2-2B-Smart/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-2B-Smart", + "id": "bunnycore/Gemma-2-2B-Smart", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3974 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2426 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Gemma2-9B-TitanFusion/8812151c-4301-4131-a414-d64d025e476e.json b/data/hfopenllm_v2/bunnycore/Gemma2-9B-TitanFusion/8812151c-4301-4131-a414-d64d025e476e.json new file mode 100644 index 000000000..7da88954d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Gemma2-9B-TitanFusion/8812151c-4301-4131-a414-d64d025e476e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Gemma2-9B-TitanFusion/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2-9B-TitanFusion", + "id": "bunnycore/Gemma2-9B-TitanFusion", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1618 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5712 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/HyperLlama-3.1-8B/2db1542f-a8da-4fb8-91a5-6dd1a942b55e.json b/data/hfopenllm_v2/bunnycore/HyperLlama-3.1-8B/2db1542f-a8da-4fb8-91a5-6dd1a942b55e.json new file mode 100644 index 000000000..0347a413c --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/HyperLlama-3.1-8B/2db1542f-a8da-4fb8-91a5-6dd1a942b55e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_HyperLlama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HyperLlama-3.1-8B", + "id": "bunnycore/HyperLlama-3.1-8B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3783 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-Mix/9feeffb2-3763-4e43-933e-89100b76f7fa.json b/data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-Mix/9feeffb2-3763-4e43-933e-89100b76f7fa.json new file mode 100644 index 000000000..0258259b7 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-Mix/9feeffb2-3763-4e43-933e-89100b76f7fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-Mix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-TitanFusion-Mix", + "id": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5756 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-v3/721102b5-ed5e-4631-8600-a6adfff0c784.json b/data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-v3/721102b5-ed5e-4631-8600-a6adfff0c784.json new file mode 100644 index 000000000..9e1223273 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.1-8B-TitanFusion-v3/721102b5-ed5e-4631-8600-a6adfff0c784.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-TitanFusion-v3", + "id": "bunnycore/Llama-3.1-8B-TitanFusion-v3", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3806 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-All-Mix/18c185f7-5ca4-46ff-81c2-6c538f096409.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-All-Mix/18c185f7-5ca4-46ff-81c2-6c538f096409.json new file mode 100644 index 000000000..e63ada024 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-All-Mix/18c185f7-5ca4-46ff-81c2-6c538f096409.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-All-Mix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-All-Mix", + "id": "bunnycore/Llama-3.2-3B-All-Mix", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7226 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1503 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Bespoke-Thought/7ab5911c-e229-43e5-a798-095287d0a597.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Bespoke-Thought/7ab5911c-e229-43e5-a798-095287d0a597.json new file mode 100644 index 000000000..8d51fa17b --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Bespoke-Thought/7ab5911c-e229-43e5-a798-095287d0a597.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Bespoke-Thought/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Bespoke-Thought", + "id": "bunnycore/Llama-3.2-3B-Bespoke-Thought", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Booval/f800c4e5-e918-45bb-8a12-3ca2a64c6b23.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Booval/f800c4e5-e918-45bb-8a12-3ca2a64c6b23.json new file mode 100644 index 000000000..596f8d2f9 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Booval/f800c4e5-e918-45bb-8a12-3ca2a64c6b23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Booval/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Booval", + "id": "bunnycore/Llama-3.2-3B-Booval", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3058 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/5fcf41bc-30dc-46a7-9cf2-4ce2c7a5850c.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/5fcf41bc-30dc-46a7-9cf2-4ce2c7a5850c.json new file mode 100644 index 000000000..cce4b4015 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/5fcf41bc-30dc-46a7-9cf2-4ce2c7a5850c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Deep-Test", + "id": "bunnycore/Llama-3.2-3B-Deep-Test", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/d4b20ef4-734e-40a7-818e-f77e170d7437.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/d4b20ef4-734e-40a7-818e-f77e170d7437.json new file mode 100644 index 000000000..89f5cb978 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Deep-Test/d4b20ef4-734e-40a7-818e-f77e170d7437.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Deep-Test", + "id": "bunnycore/Llama-3.2-3B-Deep-Test", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.803 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.295 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1049 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Della/e0996c96-c9e5-4d39-8e6d-1455ef1f9544.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Della/e0996c96-c9e5-4d39-8e6d-1455ef1f9544.json new file mode 100644 index 000000000..10dca8528 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Della/e0996c96-c9e5-4d39-8e6d-1455ef1f9544.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Della/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Della", + "id": "bunnycore/Llama-3.2-3B-Della", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3902 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Long-Think/3ad2b31e-ce2a-4cb4-9b85-79cdebd5d364.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Long-Think/3ad2b31e-ce2a-4cb4-9b85-79cdebd5d364.json new file mode 100644 index 000000000..a4a40c69d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Long-Think/3ad2b31e-ce2a-4cb4-9b85-79cdebd5d364.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Long-Think/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Long-Think", + "id": "bunnycore/Llama-3.2-3B-Long-Think", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3048 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Mix-Skill/9aff874c-1953-4b97-9bff-9e6120b0bfa7.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Mix-Skill/9aff874c-1953-4b97-9bff-9e6120b0bfa7.json new file mode 100644 index 000000000..80946bb54 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-Mix-Skill/9aff874c-1953-4b97-9bff-9e6120b0bfa7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Mix-Skill/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Mix-Skill", + "id": "bunnycore/Llama-3.2-3B-Mix-Skill", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1473 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlus/45ae7f45-8c36-46c6-989d-bc672cdf8eff.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlus/45ae7f45-8c36-46c6-989d-bc672cdf8eff.json new file mode 100644 index 000000000..dfe4a54af --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlus/45ae7f45-8c36-46c6-989d-bc672cdf8eff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-ProdigyPlus", + "id": "bunnycore/Llama-3.2-3B-ProdigyPlus", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/7d36e44e-a329-4b96-a891-365ad900f718.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/7d36e44e-a329-4b96-a891-365ad900f718.json new file mode 100644 index 000000000..29f6d9a02 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/7d36e44e-a329-4b96-a891-365ad900f718.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-ProdigyPlusPlus", + "id": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1645 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.369 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.15 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RP-DeepThink/a8c26325-1eec-43a6-a8ad-3bcb2e378924.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RP-DeepThink/a8c26325-1eec-43a6-a8ad-3bcb2e378924.json new file mode 100644 index 000000000..467586c96 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RP-DeepThink/a8c26325-1eec-43a6-a8ad-3bcb2e378924.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RP-DeepThink/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-RP-DeepThink", + "id": "bunnycore/Llama-3.2-3B-RP-DeepThink", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7144 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4563 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3242 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RRStock/bde1a879-6852-42ce-9217-f427af85a46a.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RRStock/bde1a879-6852-42ce-9217-f427af85a46a.json new file mode 100644 index 000000000..aa68c7658 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-RRStock/bde1a879-6852-42ce-9217-f427af85a46a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RRStock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-RRStock", + "id": "bunnycore/Llama-3.2-3B-RRStock", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6657 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4568 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1699 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ToxicKod/dd7a0377-f4d6-4390-b9f2-bf50b05ec0f7.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ToxicKod/dd7a0377-f4d6-4390-b9f2-bf50b05ec0f7.json new file mode 100644 index 000000000..69e6b8e9a --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3B-ToxicKod/dd7a0377-f4d6-4390-b9f2-bf50b05ec0f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ToxicKod/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-ToxicKod", + "id": "bunnycore/Llama-3.2-3B-ToxicKod", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6319 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1699 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/12cbf241-d6d4-4d25-ad3d-13a42d7adc74.json b/data/hfopenllm_v2/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/12cbf241-d6d4-4d25-ad3d-13a42d7adc74.json new file mode 100644 index 000000000..3cdd73e74 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/12cbf241-d6d4-4d25-ad3d-13a42d7adc74.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3b-RP-Toxic-Fuse", + "id": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2402 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3954 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3106 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/1f66fd7c-40ee-4249-8963-5c7bb93a3eaf.json b/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/1f66fd7c-40ee-4249-8963-5c7bb93a3eaf.json new file mode 100644 index 000000000..56e2b2c00 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/1f66fd7c-40ee-4249-8963-5c7bb93a3eaf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Maestro-S1k-7B-Sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Maestro-S1k-7B-Sce", + "id": "bunnycore/Maestro-S1k-7B-Sce", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2523 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json b/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json deleted file mode 100644 index 5906dd05a..000000000 --- a/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Maestro-S1k-7B-Sce/1762652580.048955", - "retrieved_timestamp": "1762652580.048955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Maestro-S1k-7B-Sce", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Maestro-S1k-7B-Sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2522684255553044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104380842714463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11702127659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Phi-3.5-mini-TitanFusion-0.1/7076406b-7e0a-49c7-8150-2e6a243aa23b.json b/data/hfopenllm_v2/bunnycore/Phi-3.5-mini-TitanFusion-0.1/7076406b-7e0a-49c7-8150-2e6a243aa23b.json new file mode 100644 index 000000000..700136bdc --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-3.5-mini-TitanFusion-0.1/7076406b-7e0a-49c7-8150-2e6a243aa23b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-3.5-mini-TitanFusion-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3.5-mini-TitanFusion-0.1", + "id": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5228 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4453 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v2/96c3fd80-a601-4629-a1ab-bf7f366a909a.json b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v2/96c3fd80-a601-4629-a1ab-bf7f366a909a.json new file mode 100644 index 000000000..82650a142 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v2/96c3fd80-a601-4629-a1ab-bf7f366a909a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Model-Stock-v2", + "id": "bunnycore/Phi-4-Model-Stock-v2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6825 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4662 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v3/1302c9a5-d35c-400c-b9f3-d990243e5d59.json b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v3/1302c9a5-d35c-400c-b9f3-d990243e5d59.json new file mode 100644 index 000000000..2532056a9 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v3/1302c9a5-d35c-400c-b9f3-d990243e5d59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Model-Stock-v3", + "id": "bunnycore/Phi-4-Model-Stock-v3", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5912 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6726 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v4/c7f48bbf-6583-4ddd-ae4d-671c43218dae.json b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v4/c7f48bbf-6583-4ddd-ae4d-671c43218dae.json new file mode 100644 index 000000000..6189289e4 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock-v4/c7f48bbf-6583-4ddd-ae4d-671c43218dae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Model-Stock-v4", + "id": "bunnycore/Phi-4-Model-Stock-v4", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.711 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6924 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3691 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5394 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock/5f07e092-2eb0-44c2-b2ce-5f1b31a9ea99.json b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock/5f07e092-2eb0-44c2-b2ce-5f1b31a9ea99.json new file mode 100644 index 000000000..fd4a9988a --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Model-Stock/5f07e092-2eb0-44c2-b2ce-5f1b31a9ea99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Model-Stock", + "id": "bunnycore/Phi-4-Model-Stock", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.689 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-RP-v0/15701682-97ce-46cf-8010-a6bdeaf8c7aa.json b/data/hfopenllm_v2/bunnycore/Phi-4-RP-v0/15701682-97ce-46cf-8010-a6bdeaf8c7aa.json new file mode 100644 index 000000000..695b852da --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-RP-v0/15701682-97ce-46cf-8010-a6bdeaf8c7aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RP-v0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-RP-v0", + "id": "bunnycore/Phi-4-RP-v0", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6827 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6856 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-RR-Shoup/c6eecf0b-fa16-484a-8eeb-d196203b3c3e.json b/data/hfopenllm_v2/bunnycore/Phi-4-RR-Shoup/c6eecf0b-fa16-484a-8eeb-d196203b3c3e.json new file mode 100644 index 000000000..a4fe6d434 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-RR-Shoup/c6eecf0b-fa16-484a-8eeb-d196203b3c3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RR-Shoup/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-RR-Shoup", + "id": "bunnycore/Phi-4-RR-Shoup", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6587 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6947 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.444 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5429 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-RStock-v0.1/4337b1c1-cc00-4a15-8148-e8d0739561b9.json b/data/hfopenllm_v2/bunnycore/Phi-4-RStock-v0.1/4337b1c1-cc00-4a15-8148-e8d0739561b9.json new file mode 100644 index 000000000..476936c1d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-RStock-v0.1/4337b1c1-cc00-4a15-8148-e8d0739561b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RStock-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-RStock-v0.1", + "id": "bunnycore/Phi-4-RStock-v0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7019 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6928 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4584 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-ReasoningRP/1151ee14-8fe9-4f97-808d-8103b353c2ec.json b/data/hfopenllm_v2/bunnycore/Phi-4-ReasoningRP/1151ee14-8fe9-4f97-808d-8103b353c2ec.json new file mode 100644 index 000000000..c71d4c31e --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-ReasoningRP/1151ee14-8fe9-4f97-808d-8103b353c2ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-ReasoningRP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-ReasoningRP", + "id": "bunnycore/Phi-4-ReasoningRP", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6922 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4491 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Sce-exp-v0.1/a2c18179-aca3-422c-b9f5-8345109cea13.json b/data/hfopenllm_v2/bunnycore/Phi-4-Sce-exp-v0.1/a2c18179-aca3-422c-b9f5-8345109cea13.json new file mode 100644 index 000000000..ca8ff9b68 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Sce-exp-v0.1/a2c18179-aca3-422c-b9f5-8345109cea13.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Sce-exp-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Sce-exp-v0.1", + "id": "bunnycore/Phi-4-Sce-exp-v0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6595 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6943 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.503 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5423 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Stock-Ex/07495d34-1505-45a9-bb48-887af0da8a0c.json b/data/hfopenllm_v2/bunnycore/Phi-4-Stock-Ex/07495d34-1505-45a9-bb48-887af0da8a0c.json new file mode 100644 index 000000000..d7f7a2004 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Stock-Ex/07495d34-1505-45a9-bb48-887af0da8a0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-Ex/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Stock-Ex", + "id": "bunnycore/Phi-4-Stock-Ex", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6864 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5375 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Stock-RP/567baf6d-99f9-46a5-8c40-c6899986f1ff.json b/data/hfopenllm_v2/bunnycore/Phi-4-Stock-RP/567baf6d-99f9-46a5-8c40-c6899986f1ff.json new file mode 100644 index 000000000..bfb812571 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Stock-RP/567baf6d-99f9-46a5-8c40-c6899986f1ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Stock-RP", + "id": "bunnycore/Phi-4-Stock-RP", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6399 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.686 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4715 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-4-Trim-Exp1/a337df3a-28ff-46c9-adae-4bc029937101.json b/data/hfopenllm_v2/bunnycore/Phi-4-Trim-Exp1/a337df3a-28ff-46c9-adae-4bc029937101.json new file mode 100644 index 000000000..803b9c23e --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-4-Trim-Exp1/a337df3a-28ff-46c9-adae-4bc029937101.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Trim-Exp1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Trim-Exp1", + "id": "bunnycore/Phi-4-Trim-Exp1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.503 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1219 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4177 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Phi-Seek-4-Sce-V1/b201a849-44e9-4598-918b-ffa27c894ee9.json b/data/hfopenllm_v2/bunnycore/Phi-Seek-4-Sce-V1/b201a849-44e9-4598-918b-ffa27c894ee9.json new file mode 100644 index 000000000..b2a61d834 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Phi-Seek-4-Sce-V1/b201a849-44e9-4598-918b-ffa27c894ee9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-Seek-4-Sce-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-Seek-4-Sce-V1", + "id": "bunnycore/Phi-Seek-4-Sce-V1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2935 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6459 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3982 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json b/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json deleted file mode 100644 index 0c0ff5166..000000000 --- a/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qandora-2.5-7B-Creative/1762652580.0529459", - "retrieved_timestamp": "1762652580.052947", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qandora-2.5-7B-Creative", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Qandora-2.5-7B-Creative", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6803148978044922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541763892398439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30589123867069484 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4211875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4479720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/dd87ebf3-3088-43b1-851c-a97d12a68ea8.json b/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/dd87ebf3-3088-43b1-851c-a97d12a68ea8.json new file mode 100644 index 000000000..3f60d9d68 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/dd87ebf3-3088-43b1-851c-a97d12a68ea8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qandora-2.5-7B-Creative/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qandora-2.5-7B-Creative", + "id": "bunnycore/Qandora-2.5-7B-Creative", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6803 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3059 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.448 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/1b3ef805-8b0c-44bf-b048-773a0dd94d0d.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/1b3ef805-8b0c-44bf-b048-773a0dd94d0d.json new file mode 100644 index 000000000..3676e6214 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/1b3ef805-8b0c-44bf-b048-773a0dd94d0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-Persona/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QandoraExp-7B-Persona", + "id": "bunnycore/QandoraExp-7B-Persona", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5558 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json deleted file mode 100644 index bcb3129c6..000000000 --- a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-Persona/1762652580.0533981", - "retrieved_timestamp": "1762652580.053399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/QandoraExp-7B-Persona", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QandoraExp-7B-Persona", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6246858335882126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558337526959515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104229607250755 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43715624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44074135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/220cb478-58c0-4028-b51a-ec5fe1050746.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/220cb478-58c0-4028-b51a-ec5fe1050746.json new file mode 100644 index 000000000..69de747fd --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/220cb478-58c0-4028-b51a-ec5fe1050746.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QandoraExp-7B-v2", + "id": "bunnycore/QandoraExp-7B-v2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5607 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5445 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4713 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json deleted file mode 100644 index 0e629df4f..000000000 --- a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-v2/1762652580.053621", - "retrieved_timestamp": "1762652580.053621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/QandoraExp-7B-v2", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QandoraExp-7B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5606889719278182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444864824489132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47129909365558914 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40454166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390874335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B/17cb8ab1-e7ba-4daf-95d4-2cdbd2777434.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B/17cb8ab1-e7ba-4daf-95d4-2cdbd2777434.json new file mode 100644 index 000000000..7fc71a8b5 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QandoraExp-7B/17cb8ab1-e7ba-4daf-95d4-2cdbd2777434.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QandoraExp-7B", + "id": "bunnycore/QandoraExp-7B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4743 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4312 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.441 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json deleted file mode 100644 index 8cc5e2af3..000000000 --- a/data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B/1762652580.0531762", - "retrieved_timestamp": "1762652580.0531762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/QandoraExp-7B", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QandoraExp-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5477959748047708 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4743202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43120833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/2b55023b-b8bc-42a2-aca8-dcaf39890232.json b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/2b55023b-b8bc-42a2-aca8-dcaf39890232.json new file mode 100644 index 000000000..cfea94b78 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/2b55023b-b8bc-42a2-aca8-dcaf39890232.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT-R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQen-3B-LCoT-R1", + "id": "bunnycore/QwQen-3B-LCoT-R1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5342 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4799 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json deleted file mode 100644 index f2251277f..000000000 --- a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT-R1/1762652580.05408", - "retrieved_timestamp": "1762652580.054081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/QwQen-3B-LCoT-R1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QwQen-3B-LCoT-R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534160471992092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4798600168403517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/31736569-5992-4b1d-9d66-27a6c1620506.json b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/31736569-5992-4b1d-9d66-27a6c1620506.json new file mode 100644 index 000000000..c7d4b48fe --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/31736569-5992-4b1d-9d66-27a6c1620506.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQen-3B-LCoT", + "id": "bunnycore/QwQen-3B-LCoT", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6025 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json deleted file mode 100644 index 92875a1f1..000000000 --- a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT/1762652580.05384", - "retrieved_timestamp": "1762652580.0538409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/QwQen-3B-LCoT", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QwQen-3B-LCoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6025290673191577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4899306773152123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178247734138974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699301861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/630b37b5-351c-403c-ac76-ccb68ffc5d53.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/630b37b5-351c-403c-ac76-ccb68ffc5d53.json new file mode 100644 index 000000000..e8e23a052 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/630b37b5-351c-403c-ac76-ccb68ffc5d53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-Deep-Sky-T1", + "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2104 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/69cdef01-30dc-4f75-97fa-9daeebcec72f.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/69cdef01-30dc-4f75-97fa-9daeebcec72f.json new file mode 100644 index 000000000..949efbe5f --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/69cdef01-30dc-4f75-97fa-9daeebcec72f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-Deep-Stock-v1", + "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2644 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4109 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4066 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/9aa1acb0-c791-4dea-aa1e-c912cea69466.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/9aa1acb0-c791-4dea-aa1e-c912cea69466.json new file mode 100644 index 000000000..39d26d113 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/9aa1acb0-c791-4dea-aa1e-c912cea69466.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-Deep-Stock-v4", + "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7753 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5453 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4127 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/0c1d66f3-8fd7-47f2-8538-a1aa8985aebf.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/0c1d66f3-8fd7-47f2-8538-a1aa8985aebf.json new file mode 100644 index 000000000..7ef80c9ec --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/0c1d66f3-8fd7-47f2-8538-a1aa8985aebf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-Deep-Stock-v5", + "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1473 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2832 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Exp-Sce/2872dcd9-421b-4346-812c-b27bb32c6e86.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Exp-Sce/2872dcd9-421b-4346-812c-b27bb32c6e86.json new file mode 100644 index 000000000..e05a367ea --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Exp-Sce/2872dcd9-421b-4346-812c-b27bb32c6e86.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Exp-Sce/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-Exp-Sce", + "id": "bunnycore/Qwen-2.5-7B-Exp-Sce", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-R1-Stock/2f3e2fc0-f1e0-43cb-8a8c-6aadcc538646.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-R1-Stock/2f3e2fc0-f1e0-43cb-8a8c-6aadcc538646.json new file mode 100644 index 000000000..126635626 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-R1-Stock/2f3e2fc0-f1e0-43cb-8a8c-6aadcc538646.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-R1-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-R1-Stock", + "id": "bunnycore/Qwen-2.5-7B-R1-Stock", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7573 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/d0a76497-84b0-45b9-b748-04ffe9bc13a3.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/d0a76497-84b0-45b9-b748-04ffe9bc13a3.json new file mode 100644 index 000000000..4940eaf52 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/d0a76497-84b0-45b9-b748-04ffe9bc13a3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7B-Stock-Deep-Bespoke", + "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5206 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen-2.5-7b-S1k/185b6560-6790-417f-aeba-f7405fee808a.json b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7b-S1k/185b6560-6790-417f-aeba-f7405fee808a.json new file mode 100644 index 000000000..4b2686474 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen-2.5-7b-S1k/185b6560-6790-417f-aeba-f7405fee808a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7b-S1k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-7b-S1k", + "id": "bunnycore/Qwen-2.5-7b-S1k", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5563 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-1.5B-Model-Stock/30a8074e-df03-4866-9b8d-a5a7eece3c71.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-1.5B-Model-Stock/30a8074e-df03-4866-9b8d-a5a7eece3c71.json new file mode 100644 index 000000000..3a805a02c --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-1.5B-Model-Stock/30a8074e-df03-4866-9b8d-a5a7eece3c71.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-1.5B-Model-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B-Model-Stock", + "id": "bunnycore/Qwen2.5-1.5B-Model-Stock", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.776 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1829 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2874 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.11 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v2/ac8874ae-d6d6-45d3-aabc-06a3852f68d0.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v2/ac8874ae-d6d6-45d3-aabc-06a3852f68d0.json new file mode 100644 index 000000000..edae05519 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v2/ac8874ae-d6d6-45d3-aabc-06a3852f68d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Model-Stock-v2", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4677 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/bc98b048-18d4-438e-80c4-0cd851798da5.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/bc98b048-18d4-438e-80c4-0cd851798da5.json new file mode 100644 index 000000000..eae5a80e4 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/bc98b048-18d4-438e-80c4-0cd851798da5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Model-Stock-v3.1", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4737 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/c88c011f-0a24-4e78-a104-035d25af2430.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/c88c011f-0a24-4e78-a104-035d25af2430.json new file mode 100644 index 000000000..6dedfe6ac --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/c88c011f-0a24-4e78-a104-035d25af2430.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Model-Stock-v3.2", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6353 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/f9e3c31c-02c0-4f5e-ad4f-3be0801a0f41.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/f9e3c31c-02c0-4f5e-ad4f-3be0801a0f41.json new file mode 100644 index 000000000..e50f85cd0 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/f9e3c31c-02c0-4f5e-ad4f-3be0801a0f41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Model-Stock-v4.1", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3941 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock/5484405a-2ec8-4515-af75-76a5dd348d3d.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock/5484405a-2ec8-4515-af75-76a5dd348d3d.json new file mode 100644 index 000000000..1e65375e1 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-Model-Stock/5484405a-2ec8-4515-af75-76a5dd348d3d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Model-Stock", + "id": "bunnycore/Qwen2.5-3B-Model-Stock", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4712 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3942 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.325 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Mix/7dc117b9-c2a2-44c1-8471-f3bc8a116e3e.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Mix/7dc117b9-c2a2-44c1-8471-f3bc8a116e3e.json new file mode 100644 index 000000000..3bc4e4b91 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Mix/7dc117b9-c2a2-44c1-8471-f3bc8a116e3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Mix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-RP-Mix", + "id": "bunnycore/Qwen2.5-3B-RP-Mix", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker-V2/e2d314dd-b5b3-49b5-8e64-1e3464f4b963.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker-V2/e2d314dd-b5b3-49b5-8e64-1e3464f4b963.json new file mode 100644 index 000000000..8be20f53d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker-V2/e2d314dd-b5b3-49b5-8e64-1e3464f4b963.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-RP-Thinker-V2", + "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.642 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4678 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3271 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker/7ecb453b-1ba7-44ec-abfd-1f8be4c817fd.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker/7ecb453b-1ba7-44ec-abfd-1f8be4c817fd.json new file mode 100644 index 000000000..aa4332eb8 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-3B-RP-Thinker/7ecb453b-1ba7-44ec-abfd-1f8be4c817fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-RP-Thinker", + "id": "bunnycore/Qwen2.5-3B-RP-Thinker", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5894 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-CyberRombos/d0a70e95-fc72-41c6-ac42-09b8f379b566.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-CyberRombos/d0a70e95-fc72-41c6-ac42-09b8f379b566.json new file mode 100644 index 000000000..d0560f8b6 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-CyberRombos/d0a70e95-fc72-41c6-ac42-09b8f379b566.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-CyberRombos/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-CyberRombos", + "id": "bunnycore/Qwen2.5-7B-CyberRombos", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Fuse-Exp/e2ef8ea6-b464-445e-81df-ef0779c1d0d4.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Fuse-Exp/e2ef8ea6-b464-445e-81df-ef0779c1d0d4.json new file mode 100644 index 000000000..3450ede55 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Fuse-Exp/e2ef8ea6-b464-445e-81df-ef0779c1d0d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Fuse-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Fuse-Exp", + "id": "bunnycore/Qwen2.5-7B-Fuse-Exp", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4573 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3309 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json deleted file mode 100644 index 97f3bf64e..000000000 --- a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Fusion/1762652580.0585442", - "retrieved_timestamp": "1762652580.0585449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Instruct-Fusion", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6962016338869754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491903018724945 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4467253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/f3d7cca2-141c-4b84-abc4-396ad2d59e3c.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/f3d7cca2-141c-4b84-abc4-396ad2d59e3c.json new file mode 100644 index 000000000..96f112f7d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/f3d7cca2-141c-4b84-abc4-396ad2d59e3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Fusion/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-Fusion", + "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4297 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4467 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/e3f48d7a-c8a3-4e75-99d6-7f2946696b12.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/e3f48d7a-c8a3-4e75-99d6-7f2946696b12.json new file mode 100644 index 000000000..192a551c4 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/e3f48d7a-c8a3-4e75-99d6-7f2946696b12.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-Merge-Stock-v0.1", + "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json deleted file mode 100644 index 985f591cb..000000000 --- a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/1762652580.0587678", - "retrieved_timestamp": "1762652580.058769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5529431709465797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/3feb9449-49a2-427f-a317-c21e6d1ca66c.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/3feb9449-49a2-427f-a317-c21e6d1ca66c.json new file mode 100644 index 000000000..ce27f83f1 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/3feb9449-49a2-427f-a317-c21e6d1ca66c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-MixStock-Sce-V0.3", + "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3479 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1779 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-V0.1/6359e37e-0405-436b-903c-8f0e740dd6c7.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-V0.1/6359e37e-0405-436b-903c-8f0e740dd6c7.json new file mode 100644 index 000000000..cda1c1002 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-MixStock-V0.1/6359e37e-0405-436b-903c-8f0e740dd6c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-V0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-MixStock-V0.1", + "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5479 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/f5daed76-f6e5-4a7d-84d7-80537a046b83.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/f5daed76-f6e5-4a7d-84d7-80537a046b83.json new file mode 100644 index 000000000..7a2c17b90 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/f5daed76-f6e5-4a7d-84d7-80537a046b83.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-R1-Bespoke-Stock", + "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3726 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4822 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/03af2b1d-989f-4afc-ab13-8793093b9c50.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/03af2b1d-989f-4afc-ab13-8793093b9c50.json new file mode 100644 index 000000000..c62431a02 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/03af2b1d-989f-4afc-ab13-8793093b9c50.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-R1-Bespoke-Task", + "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3569 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2688 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/5db7ec54-7feb-4c11-b2e0-042226ba1f94.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/5db7ec54-7feb-4c11-b2e0-042226ba1f94.json new file mode 100644 index 000000000..8ddd86827 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/5db7ec54-7feb-4c11-b2e0-042226ba1f94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-RRP-1M-Thinker", + "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2308 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1769 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M/f1f5615d-8a78-43c9-b5c6-edc180252381.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M/f1f5615d-8a78-43c9-b5c6-edc180252381.json new file mode 100644 index 000000000..80b3d9332 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-1M/f1f5615d-8a78-43c9-b5c6-edc180252381.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-RRP-1M", + "id": "bunnycore/Qwen2.5-7B-RRP-1M", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4483 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-ID/9c89bf8f-4b8a-4c01-8685-fafc687c673e.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-ID/9c89bf8f-4b8a-4c01-8685-fafc687c673e.json new file mode 100644 index 000000000..ebca5e44d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-RRP-ID/9c89bf8f-4b8a-4c01-8685-fafc687c673e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-ID/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-RRP-ID", + "id": "bunnycore/Qwen2.5-7B-RRP-ID", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Sky-R1-Mini/58b69c0f-826d-414f-915e-dd0b78d9298c.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Sky-R1-Mini/58b69c0f-826d-414f-915e-dd0b78d9298c.json new file mode 100644 index 000000000..592f70f7e --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Sky-R1-Mini/58b69c0f-826d-414f-915e-dd0b78d9298c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Sky-R1-Mini/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Sky-R1-Mini", + "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2305 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1253 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QwenMosaic-7B/101ea548-2ffe-4f47-b3b5-5fbe9a3854b4.json b/data/hfopenllm_v2/bunnycore/QwenMosaic-7B/101ea548-2ffe-4f47-b3b5-5fbe9a3854b4.json new file mode 100644 index 000000000..c1f5056f6 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QwenMosaic-7B/101ea548-2ffe-4f47-b3b5-5fbe9a3854b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_QwenMosaic-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenMosaic-7B", + "id": "bunnycore/QwenMosaic-7B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5819 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5564 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4164 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Smol-Llama-3.2-3B/259c4798-ff03-4f58-8fb4-59150710212b.json b/data/hfopenllm_v2/bunnycore/Smol-Llama-3.2-3B/259c4798-ff03-4f58-8fb4-59150710212b.json new file mode 100644 index 000000000..3ffc953ab --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Smol-Llama-3.2-3B/259c4798-ff03-4f58-8fb4-59150710212b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Smol-Llama-3.2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Smol-Llama-3.2-3B", + "id": "bunnycore/Smol-Llama-3.2-3B", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1382 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json deleted file mode 100644 index 012e0c9bc..000000000 --- a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7-Persona/1762652580.062155", - "retrieved_timestamp": "1762652580.062156", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/SmolLM2-1.7-Persona", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/SmolLM2-1.7-Persona", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465254413844156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3623213930905173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1973902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/f731caa1-f777-494a-8490-da0c815f0708.json b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/f731caa1-f777-494a-8490-da0c815f0708.json new file mode 100644 index 000000000..1cde2b7fa --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/f731caa1-f777-494a-8490-da0c815f0708.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7-Persona/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-1.7-Persona", + "id": "bunnycore/SmolLM2-1.7-Persona", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3623 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1974 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json deleted file mode 100644 index 8f76bc1a4..000000000 --- a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7B-roleplay-lora/1762652580.062429", - "retrieved_timestamp": "1762652580.06243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/SmolLM2-1.7B-roleplay-lora", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/SmolLM2-1.7B-roleplay-lora", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 3.423 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382075116247114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610343412303005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19664228723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/d4d25d38-b21a-490e-9ca9-556504ec00ea.json b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/d4d25d38-b21a-490e-9ca9-556504ec00ea.json new file mode 100644 index 000000000..fc1b7c8b5 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/d4d25d38-b21a-490e-9ca9-556504ec00ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7B-roleplay-lora/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-1.7B-roleplay-lora", + "id": "bunnycore/SmolLM2-1.7B-roleplay-lora", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 3.423 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1966 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/75bb85a3-40bb-4630-95a0-50e40b008412.json b/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/75bb85a3-40bb-4630-95a0-50e40b008412.json new file mode 100644 index 000000000..c128cbf2c --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/75bb85a3-40bb-4630-95a0-50e40b008412.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/bunnycore_Tulu-3.1-8B-SuperNova/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tulu-3.1-8B-SuperNova", + "id": "bunnycore/Tulu-3.1-8B-SuperNova", + "developer": "bunnycore", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8194 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2462 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json b/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json deleted file mode 100644 index 13cb438c9..000000000 --- a/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Tulu-3.1-8B-SuperNova/1762652580.062763", - "retrieved_timestamp": "1762652580.0627651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Tulu-3.1-8B-SuperNova", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Tulu-3.1-8B-SuperNova", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8193748143813969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5254122754311122 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3813996010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/bb44f3ef-eefa-48ef-a257-2eb345c89a00.json b/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/bb44f3ef-eefa-48ef-a257-2eb345c89a00.json new file mode 100644 index 000000000..096e9bd79 --- /dev/null +++ b/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/bb44f3ef-eefa-48ef-a257-2eb345c89a00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/byroneverson_Mistral-Small-Instruct-2409-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-Instruct-2409-abliterated", + "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated", + "developer": "byroneverson", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6971 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5238 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json b/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json deleted file mode 100644 index c1515e163..000000000 --- a/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/byroneverson_Mistral-Small-Instruct-2409-abliterated/1762652580.063036", - "retrieved_timestamp": "1762652580.063037", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "byroneverson/Mistral-Small-Instruct-2409-abliterated", - "developer": "byroneverson", - "inference_platform": "unknown", - "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6970759806203096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237864400325174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36971875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39228723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/2dcf1771-3dbe-43ad-974c-54e2e2860bcc.json b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/2dcf1771-3dbe-43ad-974c-54e2e2860bcc.json new file mode 100644 index 000000000..7084716cd --- /dev/null +++ b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/2dcf1771-3dbe-43ad-974c-54e2e2860bcc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-9B-Chat-16K-abliterated", + "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", + "developer": "byroneverson", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4734 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3823 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json deleted file mode 100644 index 6e12f3571..000000000 --- a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/1762652580.068388", - "retrieved_timestamp": "1762652580.068392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", - "developer": "byroneverson", - "inference_platform": "unknown", - "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5528453392553979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5282050829986801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38231382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json deleted file mode 100644 index 93b901e70..000000000 --- a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-abliterated/1762652580.070213", - "retrieved_timestamp": "1762652580.070215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "byroneverson/Yi-1.5-9B-Chat-abliterated", - "developer": "byroneverson", - "inference_platform": "unknown", - "id": "byroneverson/Yi-1.5-9B-Chat-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5723291976400395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401219363002313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43886458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3715093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/caa0c8df-5488-4bf9-a5b8-0fff831e6732.json b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/caa0c8df-5488-4bf9-a5b8-0fff831e6732.json new file mode 100644 index 000000000..0098a2f51 --- /dev/null +++ b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/caa0c8df-5488-4bf9-a5b8-0fff831e6732.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-1.5-9B-Chat-abliterated", + "id": "byroneverson/Yi-1.5-9B-Chat-abliterated", + "developer": "byroneverson", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5723 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5401 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1662 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4389 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3715 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json b/data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json deleted file mode 100644 index 23a4cc7af..000000000 --- a/data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/c10x_Q-Pluse/1762652580.070795", - "retrieved_timestamp": "1762652580.070796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "c10x/Q-Pluse", - "developer": "c10x", - "inference_platform": "unknown", - "id": "c10x/Q-Pluse", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228318638988993 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2875111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39381249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/c10x/Q-Pluse/c6f8e581-e849-4e28-b3a6-1838ee522770.json b/data/hfopenllm_v2/c10x/Q-Pluse/c6f8e581-e849-4e28-b3a6-1838ee522770.json new file mode 100644 index 000000000..10eb44f61 --- /dev/null +++ b/data/hfopenllm_v2/c10x/Q-Pluse/c6f8e581-e849-4e28-b3a6-1838ee522770.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/c10x_Q-Pluse/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q-Pluse", + "id": "c10x/Q-Pluse", + "developer": "c10x", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2875 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1135 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/c10x/longthinker/f0c361a1-a3ac-4415-ab5d-069bdf27e7a3.json b/data/hfopenllm_v2/c10x/longthinker/f0c361a1-a3ac-4415-ab5d-069bdf27e7a3.json new file mode 100644 index 000000000..c9bd5fd8a --- /dev/null +++ b/data/hfopenllm_v2/c10x/longthinker/f0c361a1-a3ac-4415-ab5d-069bdf27e7a3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/c10x_longthinker/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "longthinker", + "id": "c10x/longthinker", + "developer": "c10x", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3609 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4927 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2319 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json b/data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json deleted file mode 100644 index 8d04b272c..000000000 --- a/data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/c10x_longthinker/1762652580.078971", - "retrieved_timestamp": "1762652580.078974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "c10x/longthinker", - "developer": "c10x", - "inference_platform": "unknown", - "id": "c10x/longthinker", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36087913403103766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49274888053364546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3909583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/carsenk/flippa-v6/44129be7-f73d-4580-8375-e8ef324e73a8.json b/data/hfopenllm_v2/carsenk/flippa-v6/44129be7-f73d-4580-8375-e8ef324e73a8.json new file mode 100644 index 000000000..5fe1e9b7d --- /dev/null +++ b/data/hfopenllm_v2/carsenk/flippa-v6/44129be7-f73d-4580-8375-e8ef324e73a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/carsenk_flippa-v6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flippa-v6", + "id": "carsenk/flippa-v6", + "developer": "carsenk", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3439 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1405 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4089 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3668 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json b/data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json deleted file mode 100644 index 166e6bed6..000000000 --- a/data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/carsenk_flippa-v6/1762652580.079394", - "retrieved_timestamp": "1762652580.079395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "carsenk/flippa-v6", - "developer": "carsenk", - "inference_platform": "unknown", - "id": "carsenk/flippa-v6", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439429602344003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046972457053399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40887500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3667719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/carsenk/phi3.5_mini_exp_825_uncensored/2925ecde-a9a5-4369-b391-d23a8605d35c.json b/data/hfopenllm_v2/carsenk/phi3.5_mini_exp_825_uncensored/2925ecde-a9a5-4369-b391-d23a8605d35c.json new file mode 100644 index 000000000..e36d77eec --- /dev/null +++ b/data/hfopenllm_v2/carsenk/phi3.5_mini_exp_825_uncensored/2925ecde-a9a5-4369-b391-d23a8605d35c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/carsenk_phi3.5_mini_exp_825_uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi3.5_mini_exp_825_uncensored", + "id": "carsenk/phi3.5_mini_exp_825_uncensored", + "developer": "carsenk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1364 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2965 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/8409e464-fd16-4b41-b533-2f6cae4fe894.json b/data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/8409e464-fd16-4b41-b533-2f6cae4fe894.json new file mode 100644 index 000000000..0abe1f2fd --- /dev/null +++ b/data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/8409e464-fd16-4b41-b533-2f6cae4fe894.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-sppo-iter-1-evol-1", + "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", + "developer": "cat-searcher", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2942 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5939 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1/86f6c6eb-8b08-4e6c-a1bc-0d941a00f10b.json b/data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1/86f6c6eb-8b08-4e6c-a1bc-0d941a00f10b.json new file mode 100644 index 000000000..a688e85bd --- /dev/null +++ b/data/hfopenllm_v2/cat-searcher/gemma-2-9b-it-sppo-iter-1/86f6c6eb-8b08-4e6c-a1bc-0d941a00f10b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-sppo-iter-1", + "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1", + "developer": "cat-searcher", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3015 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5972 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3854 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cckm/tinymistral_950m/aa2e6df7-a0b0-42f7-8057-e2763fc34834.json b/data/hfopenllm_v2/cckm/tinymistral_950m/aa2e6df7-a0b0-42f7-8057-e2763fc34834.json new file mode 100644 index 000000000..c5f1e1fe2 --- /dev/null +++ b/data/hfopenllm_v2/cckm/tinymistral_950m/aa2e6df7-a0b0-42f7-8057-e2763fc34834.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cckm_tinymistral_950m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinymistral_950m", + "id": "cckm/tinymistral_950m", + "developer": "cckm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 0.955 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/2bf9a06e-f3bf-4b55-804b-e553a722e0de.json b/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/2bf9a06e-f3bf-4b55-804b-e553a722e0de.json new file mode 100644 index 000000000..56d6c2c5f --- /dev/null +++ b/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/2bf9a06e-f3bf-4b55-804b-e553a722e0de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cgato_TheSalt-L3-8b-v0.3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheSalt-L3-8b-v0.3.2", + "id": "cgato/TheSalt-L3-8b-v0.3.2", + "developer": "cgato", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2705 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json b/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json deleted file mode 100644 index 1db536fc9..000000000 --- a/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cgato_TheSalt-L3-8b-v0.3.2/1762652580.100134", - "retrieved_timestamp": "1762652580.100136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cgato/TheSalt-L3-8b-v0.3.2", - "developer": "cgato", - "inference_platform": "unknown", - "id": "cgato/TheSalt-L3-8b-v0.3.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27050337548814923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29679653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38962499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/chargoddard/prometheus-2-llama-3-8b/b380a675-39ea-4950-ad0a-d9771f09ddde.json b/data/hfopenllm_v2/chargoddard/prometheus-2-llama-3-8b/b380a675-39ea-4950-ad0a-d9771f09ddde.json new file mode 100644 index 000000000..fbec5b2fc --- /dev/null +++ b/data/hfopenllm_v2/chargoddard/prometheus-2-llama-3-8b/b380a675-39ea-4950-ad0a-d9771f09ddde.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/chargoddard_prometheus-2-llama-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prometheus-2-llama-3-8b", + "id": "chargoddard/prometheus-2-llama-3-8b", + "developer": "chargoddard", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5289 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/482358eb-7d3b-4de0-b5d9-451308f104e2.json b/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/482358eb-7d3b-4de0-b5d9-451308f104e2.json new file mode 100644 index 000000000..122ad0855 --- /dev/null +++ b/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/482358eb-7d3b-4de0-b5d9-451308f104e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SimPO-ExPO", + "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", + "developer": "chujiezheng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6434 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4765 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json b/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json deleted file mode 100644 index 6470e538f..000000000 --- a/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/1762652580.1008909", - "retrieved_timestamp": "1762652580.100893", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", - "developer": "chujiezheng", - "inference_platform": "unknown", - "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6433707008515184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4764515968840137 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.340093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/ef04a83d-7b89-43ec-ba33-30e1006422dc.json b/data/hfopenllm_v2/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/ef04a83d-7b89-43ec-ba33-30e1006422dc.json new file mode 100644 index 000000000..c2d924a8a --- /dev/null +++ b/data/hfopenllm_v2/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/ef04a83d-7b89-43ec-ba33-30e1006422dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral7B-PairRM-SPPO-ExPO", + "id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", + "developer": "chujiezheng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4055 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2552 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cjvt/GaMS-1B/7b64cf2e-c7c6-4b48-8e51-ea2aa0914145.json b/data/hfopenllm_v2/cjvt/GaMS-1B/7b64cf2e-c7c6-4b48-8e51-ea2aa0914145.json new file mode 100644 index 000000000..9129e2209 --- /dev/null +++ b/data/hfopenllm_v2/cjvt/GaMS-1B/7b64cf2e-c7c6-4b48-8e51-ea2aa0914145.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cjvt_GaMS-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GaMS-1B", + "id": "cjvt/GaMS-1B", + "developer": "cjvt", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "OPTForCausalLM", + "params_billions": 1.54 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1635 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json b/data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json deleted file mode 100644 index 87085d670..000000000 --- a/data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cjvt_GaMS-1B/1762652580.101496", - "retrieved_timestamp": "1762652580.1014972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cjvt/GaMS-1B", - "developer": "cjvt", - "inference_platform": "unknown", - "id": "cjvt/GaMS-1B", - "additional_details": { - "precision": "float16", - "architecture": "OPTForCausalLM", - "params_billions": 1.54 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.163541625110263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074752552734472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11486037234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/cloudyu/Llama-3-70Bx2-MOE/52c8e3f4-1063-4d9c-80d9-fdd0a72fc98e.json b/data/hfopenllm_v2/cloudyu/Llama-3-70Bx2-MOE/52c8e3f4-1063-4d9c-80d9-fdd0a72fc98e.json new file mode 100644 index 000000000..91fc09dc7 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Llama-3-70Bx2-MOE/52c8e3f4-1063-4d9c-80d9-fdd0a72fc98e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3-70Bx2-MOE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-70Bx2-MOE", + "id": "cloudyu/Llama-3-70Bx2-MOE", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 126.926 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6636 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2175 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Llama-3.2-3Bx4/1f4a827d-31cd-42e6-871d-7c0cad010f58.json b/data/hfopenllm_v2/cloudyu/Llama-3.2-3Bx4/1f4a827d-31cd-42e6-871d-7c0cad010f58.json new file mode 100644 index 000000000..2d3c69d77 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Llama-3.2-3Bx4/1f4a827d-31cd-42e6-871d-7c0cad010f58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3.2-3Bx4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3Bx4", + "id": "cloudyu/Llama-3.2-3Bx4", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 9.949 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5069 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2985 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/56d6d99c-fba1-42e7-aad4-631370b44da3.json b/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/56d6d99c-fba1-42e7-aad4-631370b44da3.json new file mode 100644 index 000000000..1913318a3 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/56d6d99c-fba1-42e7-aad4-631370b44da3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_11Bx2_MoE_19B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral_11Bx2_MoE_19B", + "id": "cloudyu/Mixtral_11Bx2_MoE_19B", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 19.188 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4297 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json b/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json deleted file mode 100644 index f7fee6290..000000000 --- a/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_11Bx2_MoE_19B/1762652580.102268", - "retrieved_timestamp": "1762652580.102269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/Mixtral_11Bx2_MoE_19B", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Mixtral_11Bx2_MoE_19B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 19.188 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850837998732253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5208516020145867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/006a0ac7-d6c3-42c1-b0cc-6a0bfe74f884.json b/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/006a0ac7-d6c3-42c1-b0cc-6a0bfe74f884.json new file mode 100644 index 000000000..10f736b8c --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/006a0ac7-d6c3-42c1-b0cc-6a0bfe74f884.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_34Bx2_MoE_60B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral_34Bx2_MoE_60B", + "id": "cloudyu/Mixtral_34Bx2_MoE_60B", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 60.814 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4538 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json b/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json deleted file mode 100644 index 7de46c7e2..000000000 --- a/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_34Bx2_MoE_60B/1762652580.102543", - "retrieved_timestamp": "1762652580.1025438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/Mixtral_34Bx2_MoE_60B", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Mixtral_34Bx2_MoE_60B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 60.814 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4537770892343427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5869701263465353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4625208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47664561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/33a82686-6202-4a4d-ba34-bd4537105e5f.json b/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/33a82686-6202-4a4d-ba34-bd4537105e5f.json new file mode 100644 index 000000000..3cdeaa925 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/33a82686-6202-4a4d-ba34-bd4537105e5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_7Bx2_MoE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral_7Bx2_MoE", + "id": "cloudyu/Mixtral_7Bx2_MoE", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.448 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json b/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json deleted file mode 100644 index ea990b1ed..000000000 --- a/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_7Bx2_MoE/1762652580.102766", - "retrieved_timestamp": "1762652580.102767", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/Mixtral_7Bx2_MoE", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Mixtral_7Bx2_MoE", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4480068440626427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5159732691655027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44729166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30435505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/cloudyu/S1-Llama-3.2-3Bx4-MoE/38d45554-44bd-4b40-b7c9-c0b7ba44b862.json b/data/hfopenllm_v2/cloudyu/S1-Llama-3.2-3Bx4-MoE/38d45554-44bd-4b40-b7c9-c0b7ba44b862.json new file mode 100644 index 000000000..7127e4c67 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/S1-Llama-3.2-3Bx4-MoE/38d45554-44bd-4b40-b7c9-c0b7ba44b862.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_S1-Llama-3.2-3Bx4-MoE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "S1-Llama-3.2-3Bx4-MoE", + "id": "cloudyu/S1-Llama-3.2-3Bx4-MoE", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 9.555 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/37d7e3ab-db9c-4ad7-81d1-933c030a6250.json b/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/37d7e3ab-db9c-4ad7-81d1-933c030a6250.json new file mode 100644 index 000000000..44ea55fed --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/37d7e3ab-db9c-4ad7-81d1-933c030a6250.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cloudyu_Yi-34Bx2-MoE-60B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yi-34Bx2-MoE-60B-DPO", + "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO", + "developer": "cloudyu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 60.814 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5168 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4677 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json b/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json deleted file mode 100644 index 9f6995bc5..000000000 --- a/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_Yi-34Bx2-MoE-60B-DPO/1762652580.108832", - "retrieved_timestamp": "1762652580.1088362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/Yi-34Bx2-MoE-60B-DPO", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 60.814 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531887613753729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516831447641953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46766954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/9cc49b3c-4e51-4f67-92ea-4ac8a3cbed43.json b/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/9cc49b3c-4e51-4f67-92ea-4ac8a3cbed43.json new file mode 100644 index 000000000..3ef6239d5 --- /dev/null +++ b/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/9cc49b3c-4e51-4f67-92ea-4ac8a3cbed43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-paraphrase-type-generation-apty-ipo", + "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", + "developer": "cluebbers", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2591 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/b6bd8515-4c95-40ce-b2d5-af8873d261ab.json b/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/b6bd8515-4c95-40ce-b2d5-af8873d261ab.json new file mode 100644 index 000000000..d349e2332 --- /dev/null +++ b/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/b6bd8515-4c95-40ce-b2d5-af8873d261ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", + "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", + "developer": "cluebbers", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1318 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4306 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/d102e75d-3e20-482b-a243-bae3ec44e2bb.json b/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/d102e75d-3e20-482b-a243-bae3ec44e2bb.json new file mode 100644 index 000000000..8314442a4 --- /dev/null +++ b/data/hfopenllm_v2/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/d102e75d-3e20-482b-a243-bae3ec44e2bb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-paraphrase-type-generation-etpc", + "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", + "developer": "cluebbers", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1209 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2556 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.1-8B/68920da1-af71-4ccd-88b9-554e3c72c4dc.json b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.1-8B/68920da1-af71-4ccd-88b9-554e3c72c4dc.json new file mode 100644 index 000000000..e9c8f9f33 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.1-8B/68920da1-af71-4ccd-88b9-554e3c72c4dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dolphin3.0-Llama3.1-8B", + "id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3653 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2992 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.2-1B/c0eb144f-c726-4a80-bce9-384fb7a641a7.json b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.2-1B/c0eb144f-c726-4a80-bce9-384fb7a641a7.json new file mode 100644 index 000000000..feeca651a --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Llama3.2-1B/c0eb144f-c726-4a80-bce9-384fb7a641a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dolphin3.0-Llama3.2-1B", + "id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5428 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3122 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2299 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/0b26f82d-36f6-4fd0-a0fd-05e4a1368a6e.json b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/0b26f82d-36f6-4fd0-a0fd-05e4a1368a6e.json new file mode 100644 index 000000000..cf6e079cd --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/0b26f82d-36f6-4fd0-a0fd-05e4a1368a6e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dolphin3.0-Qwen2.5-0.5B", + "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3114 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8fe4360a-0924-4386-b4cd-89069f7ff55f.json b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8fe4360a-0924-4386-b4cd-89069f7ff55f.json new file mode 100644 index 000000000..5e1bed350 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8fe4360a-0924-4386-b4cd-89069f7ff55f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dolphin3.0-R1-Mistral-24B", + "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3005 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9-llama3-8b/eeeb082b-7112-4a08-a87a-b2c9ae37efff.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9-llama3-8b/eeeb082b-7112-4a08-a87a-b2c9ae37efff.json new file mode 100644 index 000000000..e996e7392 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9-llama3-8b/eeeb082b-7112-4a08-a87a-b2c9ae37efff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9-llama3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9-llama3-8b", + "id": "cognitivecomputations/dolphin-2.9-llama3-8b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.385 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2771 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-llama-3-70b/b8f933e9-867f-4934-9648-371d1e632116.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-llama-3-70b/b8f933e9-867f-4934-9648-371d1e632116.json new file mode 100644 index 000000000..96525d9b7 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-llama-3-70b/b8f933e9-867f-4934-9648-371d1e632116.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-llama-3-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.1-llama-3-70b", + "id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.376 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5205 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4976 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/8d225023-4b7e-48cd-ae67-6d00b541f17d.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/8d225023-4b7e-48cd-ae67-6d00b541f17d.json new file mode 100644 index 000000000..e70bb20f7 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/8d225023-4b7e-48cd-ae67-6d00b541f17d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.1-yi-1.5-34b", + "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6076 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4598 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4519 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/ee3b45e7-a5d6-4fa8-8abd-f6a77d5a6d5b.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/ee3b45e7-a5d6-4fa8-8abd-f6a77d5a6d5b.json new file mode 100644 index 000000000..d916c8a8c --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/ee3b45e7-a5d6-4fa8-8abd-f6a77d5a6d5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.1-yi-1.5-9b", + "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4465 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4348 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3967 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/177ef040-da5c-4a65-adac-efdc555bd110.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/177ef040-da5c-4a65-adac-efdc555bd110.json new file mode 100644 index 000000000..2947eef38 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/177ef040-da5c-4a65-adac-efdc555bd110.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.2-Phi-3-Medium-abliterated", + "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3613 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6123 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4112 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4494 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/e9dc8337-eb35-4eb9-bca7-30ec1cd44092.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/e9dc8337-eb35-4eb9-bca7-30ec1cd44092.json new file mode 100644 index 000000000..118e42ec5 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/e9dc8337-eb35-4eb9-bca7-30ec1cd44092.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.2-Phi-3-Medium-abliterated", + "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6383 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.182 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4525 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/f4549a39-0b28-4e06-998a-774f5f02cfba.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/f4549a39-0b28-4e06-998a-774f5f02cfba.json new file mode 100644 index 000000000..a8dbae20b --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/f4549a39-0b28-4e06-998a-774f5f02cfba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.2-Phi-3-Medium", + "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": -1.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4248 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1828 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-72b/a79af78a-adab-406f-995a-adb3893e1510.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-72b/a79af78a-adab-406f-995a-adb3893e1510.json new file mode 100644 index 000000000..027fcc0d4 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-72b/a79af78a-adab-406f-995a-adb3893e1510.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.2-qwen2-72b", + "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6344 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6296 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4521 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5471 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-7b/4e8e457a-85eb-4afb-a9fe-8f8ce6eaf4d7.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-7b/4e8e457a-85eb-4afb-a9fe-8f8ce6eaf4d7.json new file mode 100644 index 000000000..e8d10e1e5 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.2-qwen2-7b/4e8e457a-85eb-4afb-a9fe-8f8ce6eaf4d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.2-qwen2-7b", + "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4051 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/eeb3a10a-d584-414a-90de-e018c47615c2.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/eeb3a10a-d584-414a-90de-e018c47615c2.json new file mode 100644 index 000000000..a64db73d8 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/eeb3a10a-d584-414a-90de-e018c47615c2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.3-Yi-1.5-34B-32k", + "id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3639 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/e83dadb0-5092-48b8-b408-e6bb1ac8a0ba.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/e83dadb0-5092-48b8-b408-e6bb1ac8a0ba.json new file mode 100644 index 000000000..88711de67 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/e83dadb0-5092-48b8-b408-e6bb1ac8a0ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.3-mistral-7B-32k", + "id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4126 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4813 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4643 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/cebc7767-fbc9-45a2-808b-51e1a4f0f35c.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/cebc7767-fbc9-45a2-808b-51e1a4f0f35c.json new file mode 100644 index 000000000..470ec6033 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/cebc7767-fbc9-45a2-808b-51e1a4f0f35c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.3-mistral-nemo-12b", + "id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5601 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3377 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-gemma2-2b/b64b6416-b18b-47cc-a516-c613cd670b37.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-gemma2-2b/b64b6416-b18b-47cc-a516-c613cd670b37.json new file mode 100644 index 000000000..859c42976 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-gemma2-2b/b64b6416-b18b-47cc-a516-c613cd670b37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-gemma2-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.4-gemma2-2b", + "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0896 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/64e96d56-72a9-413f-8903-45821b98f71e.json b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/64e96d56-72a9-413f-8903-45821b98f71e.json new file mode 100644 index 000000000..f6d9ab604 --- /dev/null +++ b/data/hfopenllm_v2/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/64e96d56-72a9-413f-8903-45821b98f71e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolphin-2.9.4-llama3.1-8b", + "id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", + "developer": "cognitivecomputations", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1237 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/a3f44cfd-d1fc-4a3c-aa5b-a0f37fc4a192.json b/data/hfopenllm_v2/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/a3f44cfd-d1fc-4a3c-aa5b-a0f37fc4a192.json new file mode 100644 index 000000000..faa327cca --- /dev/null +++ b/data/hfopenllm_v2/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/a3f44cfd-d1fc-4a3c-aa5b-a0f37fc4a192.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Collaiborator-MEDLLM-Llama-3-8B-v2", + "id": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", + "developer": "collaiborateorg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4648 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3481 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json b/data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json deleted file mode 100644 index ed768616e..000000000 --- a/data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024-instruct/1762652580.116854", - "retrieved_timestamp": "1762652580.116854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cpayne1303/cp2024-instruct", - "developer": "cpayne1303", - "inference_platform": "unknown", - "id": "cpayne1303/cp2024-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17061064641817045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2946778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/cpayne1303/cp2024-instruct/79314f48-d92b-4992-b3c6-d31278c0867a.json b/data/hfopenllm_v2/cpayne1303/cp2024-instruct/79314f48-d92b-4992-b3c6-d31278c0867a.json new file mode 100644 index 000000000..0a74e80db --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/cp2024-instruct/79314f48-d92b-4992-b3c6-d31278c0867a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cp2024-instruct", + "id": "cpayne1303/cp2024-instruct", + "developer": "cpayne1303", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1706 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2947 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json b/data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json deleted file mode 100644 index 897a77769..000000000 --- a/data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024/1762652580.116582", - "retrieved_timestamp": "1762652580.1165829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cpayne1303/cp2024", - "developer": "cpayne1303", - "inference_platform": "unknown", - "id": "cpayne1303/cp2024", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16581448334862608 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853854089245085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3383125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/cpayne1303/cp2024/5a007612-c8e7-4f6b-baa9-a21af7e908c6.json b/data/hfopenllm_v2/cpayne1303/cp2024/5a007612-c8e7-4f6b-baa9-a21af7e908c6.json new file mode 100644 index 000000000..5ffe334d2 --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/cp2024/5a007612-c8e7-4f6b-baa9-a21af7e908c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cp2024", + "id": "cpayne1303/cp2024", + "developer": "cpayne1303", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2985 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1101 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/llama-43m-beta/fdefdd3e-2d83-4430-bd95-e16a1935dff1.json b/data/hfopenllm_v2/cpayne1303/llama-43m-beta/fdefdd3e-2d83-4430-bd95-e16a1935dff1.json new file mode 100644 index 000000000..d8c1991cd --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/llama-43m-beta/fdefdd3e-2d83-4430-bd95-e16a1935dff1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-43m-beta", + "id": "cpayne1303/llama-43m-beta", + "developer": "cpayne1303", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.043 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2965 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/llama-43m-beta/ffdd45bf-3409-4b92-909a-25a32ba27f82.json b/data/hfopenllm_v2/cpayne1303/llama-43m-beta/ffdd45bf-3409-4b92-909a-25a32ba27f82.json new file mode 100644 index 000000000..979e3c2d6 --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/llama-43m-beta/ffdd45bf-3409-4b92-909a-25a32ba27f82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-43m-beta", + "id": "cpayne1303/llama-43m-beta", + "developer": "cpayne1303", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.043 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1916 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2977 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/smallcp2024/a78ab8ac-2c2e-405a-95ee-0d1d27cf533b.json b/data/hfopenllm_v2/cpayne1303/smallcp2024/a78ab8ac-2c2e-405a-95ee-0d1d27cf533b.json new file mode 100644 index 000000000..fba1477ee --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/smallcp2024/a78ab8ac-2c2e-405a-95ee-0d1d27cf533b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cpayne1303_smallcp2024/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smallcp2024", + "id": "cpayne1303/smallcp2024", + "developer": "cpayne1303", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.002 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1582 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3027 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json b/data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json deleted file mode 100644 index fda02a6fb..000000000 --- a/data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cpayne1303_smallcp2024/1762652580.117528", - "retrieved_timestamp": "1762652580.117528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cpayne1303/smallcp2024", - "developer": "cpayne1303", - "inference_platform": "unknown", - "id": "cpayne1303/smallcp2024", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.002 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1581958093414363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3027047714604053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23070469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34246874999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11136968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json b/data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json deleted file mode 100644 index 8263ff0e1..000000000 --- a/data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/crestf411_MN-Slush/1762652580.117737", - "retrieved_timestamp": "1762652580.117738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "crestf411/MN-Slush", - "developer": "crestf411", - "inference_platform": "unknown", - "id": "crestf411/MN-Slush", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077148632295642 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340014235282594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3508144946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/crestf411/MN-Slush/d9d49bf7-f6f0-4c25-9182-d815454940e3.json b/data/hfopenllm_v2/crestf411/MN-Slush/d9d49bf7-f6f0-4c25-9182-d815454940e3.json new file mode 100644 index 000000000..5057d4ade --- /dev/null +++ b/data/hfopenllm_v2/crestf411/MN-Slush/d9d49bf7-f6f0-4c25-9182-d815454940e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/crestf411_MN-Slush/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-Slush", + "id": "crestf411/MN-Slush", + "developer": "crestf411", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.534 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3508 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cstr/llama3.1-8b-spaetzle-v90/deb48e93-0378-482f-8a5d-7ec350497e0b.json b/data/hfopenllm_v2/cstr/llama3.1-8b-spaetzle-v90/deb48e93-0378-482f-8a5d-7ec350497e0b.json new file mode 100644 index 000000000..cf3237c07 --- /dev/null +++ b/data/hfopenllm_v2/cstr/llama3.1-8b-spaetzle-v90/deb48e93-0378-482f-8a5d-7ec350497e0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cstr_llama3.1-8b-spaetzle-v90/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3.1-8b-spaetzle-v90", + "id": "cstr/llama3.1-8b-spaetzle-v90", + "developer": "cstr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7356 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5303 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1495 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4134 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3731 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cyberagent/calm3-22b-chat/302a9a47-8603-42d9-85fb-64c60e7c6f44.json b/data/hfopenllm_v2/cyberagent/calm3-22b-chat/302a9a47-8603-42d9-85fb-64c60e7c6f44.json new file mode 100644 index 000000000..f7b5122d0 --- /dev/null +++ b/data/hfopenllm_v2/cyberagent/calm3-22b-chat/302a9a47-8603-42d9-85fb-64c60e7c6f44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/cyberagent_calm3-22b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "calm3-22b-chat", + "id": "cyberagent/calm3-22b-chat", + "developer": "cyberagent", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 22.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5091 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4553 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.295 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json b/data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json deleted file mode 100644 index cfb8e25b8..000000000 --- a/data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cyberagent_calm3-22b-chat/1762652580.118237", - "retrieved_timestamp": "1762652580.118238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cyberagent/calm3-22b-chat", - "developer": "cyberagent", - "inference_platform": "unknown", - "id": "cyberagent/calm3-22b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 22.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509131327100981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991683247746046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45532291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29496343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/28d52801-3998-421f-a37a-2b7b677d0eaa.json b/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/28d52801-3998-421f-a37a-2b7b677d0eaa.json new file mode 100644 index 000000000..8fd65a779 --- /dev/null +++ b/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/28d52801-3998-421f-a37a-2b7b677d0eaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassNeverSleeps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BuddyGlassNeverSleeps", + "id": "darkc0de/BuddyGlassNeverSleeps", + "developer": "darkc0de", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4977 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3452 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json b/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json deleted file mode 100644 index 0b902c80b..000000000 --- a/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassNeverSleeps/1762652580.1184928", - "retrieved_timestamp": "1762652580.118494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "darkc0de/BuddyGlassNeverSleeps", - "developer": "darkc0de", - "inference_platform": "unknown", - "id": "darkc0de/BuddyGlassNeverSleeps", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4239019135892764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49772281653646816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34524601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/32b4e23b-9430-45a8-bfa2-eea2e89792c4.json b/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/32b4e23b-9430-45a8-bfa2-eea2e89792c4.json new file mode 100644 index 000000000..0e633e15d --- /dev/null +++ b/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/32b4e23b-9430-45a8-bfa2-eea2e89792c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassUncensored2025.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BuddyGlassUncensored2025.2", + "id": "darkc0de/BuddyGlassUncensored2025.2", + "developer": "darkc0de", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7731 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6095 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2402 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4336 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json b/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json deleted file mode 100644 index 44074acb1..000000000 --- a/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassUncensored2025.2/1762652580.118735", - "retrieved_timestamp": "1762652580.1187358", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "darkc0de/BuddyGlassUncensored2025.2", - "developer": "darkc0de", - "inference_platform": "unknown", - "id": "darkc0de/BuddyGlassUncensored2025.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7731131176389756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095411371819216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24018126888217523 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43359375 - } - } - ] -} diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/0336e168-e313-44cb-a030-42e6d20e92df.json b/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/0336e168-e313-44cb-a030-42e6d20e92df.json new file mode 100644 index 000000000..d8470fccf --- /dev/null +++ b/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/0336e168-e313-44cb-a030-42e6d20e92df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", + "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", + "developer": "darkc0de", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.007 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json b/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json deleted file mode 100644 index b8e0cc30f..000000000 --- a/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/1762652580.1189609", - "retrieved_timestamp": "1762652580.1189609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", - "developer": "darkc0de", - "inference_platform": "unknown", - "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.007 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43584245357872664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243087998656722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/databricks/dbrx-base/11bd8b5b-2ea4-4ec5-8fe6-654aedb40fc9.json b/data/hfopenllm_v2/databricks/dbrx-base/11bd8b5b-2ea4-4ec5-8fe6-654aedb40fc9.json new file mode 100644 index 000000000..e2da8eba9 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dbrx-base/11bd8b5b-2ea4-4ec5-8fe6-654aedb40fc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/databricks_dbrx-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dbrx-base", + "id": "databricks/dbrx-base", + "developer": "databricks", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3267 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4067 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.35 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json b/data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json deleted file mode 100644 index 7622bd600..000000000 --- a/data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/databricks_dbrx-base/1762652580.1191711", - "retrieved_timestamp": "1762652580.1191711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "databricks/dbrx-base", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dbrx-base", - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08214723926380368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195833333333334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32666666666666666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35 - } - } - ] -} diff --git a/data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json b/data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json deleted file mode 100644 index c823e6b83..000000000 --- a/data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/databricks_dbrx-instruct/1762652580.119466", - "retrieved_timestamp": "1762652580.119467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "databricks/dbrx-instruct", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dbrx-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "DbrxForCausalLM", - "params_billions": 131.597 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415796752616391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428960796934387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42692708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/databricks/dbrx-instruct/6d97749c-3bfa-4c32-b581-a5e2b73303f3.json b/data/hfopenllm_v2/databricks/dbrx-instruct/6d97749c-3bfa-4c32-b581-a5e2b73303f3.json new file mode 100644 index 000000000..3b824e9c7 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dbrx-instruct/6d97749c-3bfa-4c32-b581-a5e2b73303f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/databricks_dbrx-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dbrx-instruct", + "id": "databricks/dbrx-instruct", + "developer": "databricks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "DbrxForCausalLM", + "params_billions": 131.597 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5429 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4269 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json b/data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json deleted file mode 100644 index 312d60628..000000000 --- a/data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v1-6b/1762652580.1196742", - "retrieved_timestamp": "1762652580.119675", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "databricks/dolly-v1-6b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v1-6b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22244311759464885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172089528774696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40041666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12657912234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/databricks/dolly-v1-6b/ec58907d-b67c-467e-a3dd-b9f9c10138f0.json b/data/hfopenllm_v2/databricks/dolly-v1-6b/ec58907d-b67c-467e-a3dd-b9f9c10138f0.json new file mode 100644 index 000000000..ad7981890 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v1-6b/ec58907d-b67c-467e-a3dd-b9f9c10138f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v1-6b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolly-v1-6b", + "id": "databricks/dolly-v1-6b", + "developer": "databricks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2224 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-12b/a7f09a3d-025c-48fa-9358-863b9ae382b1.json b/data/hfopenllm_v2/databricks/dolly-v2-12b/a7f09a3d-025c-48fa-9358-863b9ae382b1.json new file mode 100644 index 000000000..68d7dcd5a --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v2-12b/a7f09a3d-025c-48fa-9358-863b9ae382b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolly-v2-12b", + "id": "databricks/dolly-v2-12b", + "developer": "databricks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 12.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2355 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json b/data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json deleted file mode 100644 index b5c304908..000000000 --- a/data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-12b/1762652580.1198819", - "retrieved_timestamp": "1762652580.119883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "databricks/dolly-v2-12b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v2-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 12.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23550734273948679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33199731673771277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json b/data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json deleted file mode 100644 index 89019da12..000000000 --- a/data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-3b/1762652580.1200871", - "retrieved_timestamp": "1762652580.1200871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "databricks/dolly-v2-3b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v2-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22471597583301195 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30792785961544844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/databricks/dolly-v2-3b/bf2be2d5-58de-4550-b733-a5910bded48d.json b/data/hfopenllm_v2/databricks/dolly-v2-3b/bf2be2d5-58de-4550-b733-a5910bded48d.json new file mode 100644 index 000000000..0f29fbdd0 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v2-3b/bf2be2d5-58de-4550-b733-a5910bded48d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolly-v2-3b", + "id": "databricks/dolly-v2-3b", + "developer": "databricks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-7b/52b32c1f-6189-4850-b3f4-de442eb2ccb5.json b/data/hfopenllm_v2/databricks/dolly-v2-7b/52b32c1f-6189-4850-b3f4-de442eb2ccb5.json new file mode 100644 index 000000000..99647f517 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v2-7b/52b32c1f-6189-4850-b3f4-de442eb2ccb5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dolly-v2-7b", + "id": "databricks/dolly-v2-7b", + "developer": "databricks", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.201 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3553 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json b/data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json deleted file mode 100644 index 62444ae17..000000000 --- a/data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-7b/1762652580.120286", - "retrieved_timestamp": "1762652580.120287", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "databricks/dolly-v2-7b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2009856070781083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31730628122070326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json b/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json deleted file mode 100644 index 03f75df43..000000000 --- a/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/davidkim205_Rhea-72b-v0.5/1762652580.1208682", - "retrieved_timestamp": "1762652580.1208699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "davidkim205/Rhea-72b-v0.5", - "developer": "davidkim205", - "inference_platform": "unknown", - "id": "davidkim205/Rhea-72b-v0.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014538092261865185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30783395929068597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42413541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/87b44160-c3dd-452d-8c15-c4f758f8db7b.json b/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/87b44160-c3dd-452d-8c15-c4f758f8db7b.json new file mode 100644 index 000000000..6a35282cd --- /dev/null +++ b/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/87b44160-c3dd-452d-8c15-c4f758f8db7b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/davidkim205_Rhea-72b-v0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rhea-72b-v0.5", + "id": "davidkim205/Rhea-72b-v0.5", + "developer": "davidkim205", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0145 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3078 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1737 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/3e6814d3-54ea-493f-a9fc-85ae9eed1b05.json b/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/3e6814d3-54ea-493f-a9fc-85ae9eed1b05.json new file mode 100644 index 000000000..0227496f4 --- /dev/null +++ b/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/3e6814d3-54ea-493f-a9fc-85ae9eed1b05.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/davidkim205_nox-solar-10.7b-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nox-solar-10.7b-v4", + "id": "davidkim205/nox-solar-10.7b-v4", + "developer": "davidkim205", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4814 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3333 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json b/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json deleted file mode 100644 index e65953e5e..000000000 --- a/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/davidkim205_nox-solar-10.7b-v4/1762652580.1212", - "retrieved_timestamp": "1762652580.1212008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "davidkim205/nox-solar-10.7b-v4", - "developer": "davidkim205", - "inference_platform": "unknown", - "id": "davidkim205/nox-solar-10.7b-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753418706809044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4814038018918371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332779255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/35b7ff42-3825-4240-97bf-f8af7e8c23ff.json b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/35b7ff42-3825-4240-97bf-f8af7e8c23ff.json new file mode 100644 index 000000000..d4fcf871a --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/35b7ff42-3825-4240-97bf-f8af7e8c23ff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Llama-70B", + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4748 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c108173e-1582-4c99-9291-46986d7ba1cf.json b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c108173e-1582-4c99-9291-46986d7ba1cf.json new file mode 100644 index 000000000..50037a506 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/c108173e-1582-4c99-9291-46986d7ba1cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Llama-8B", + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.325 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2089 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6feb08b0-1c67-4fe2-a001-0b3b84529687.json b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6feb08b0-1c67-4fe2-a001-0b3b84529687.json new file mode 100644 index 000000000..642a4c84c --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/6feb08b0-1c67-4fe2-a001-0b3b84529687.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-1.5B", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3463 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1692 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3635 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1187 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d4ab3df2-109a-4eec-9742-dc3bb79d5a58.json b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d4ab3df2-109a-4eec-9742-dc3bb79d5a58.json new file mode 100644 index 000000000..6772800d9 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/d4ab3df2-109a-4eec-9742-dc3bb79d5a58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5906 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/53ec995e-bcfd-4a72-bd9a-45d14da3f219.json b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/53ec995e-bcfd-4a72-bd9a-45d14da3f219.json new file mode 100644 index 000000000..69ee0e546 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/53ec995e-bcfd-4a72-bd9a-45d14da3f219.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-32B", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4526 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4687 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/299a0397-89c7-4329-9599-9fc29a52db87.json b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/299a0397-89c7-4329-9599-9fc29a52db87.json new file mode 100644 index 000000000..8f7f946ab --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/299a0397-89c7-4329-9599-9fc29a52db87.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-7B", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3443 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1956 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/41adbc32-6cdf-49ba-980c-6eb6f722b40b.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/41adbc32-6cdf-49ba-980c-6eb6f722b40b.json new file mode 100644 index 000000000..e02fd22ba --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/41adbc32-6cdf-49ba-980c-6eb6f722b40b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-67b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-llm-67b-chat", + "id": "deepseek-ai/deepseek-llm-67b-chat", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 67.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5587 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5059 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3944 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json deleted file mode 100644 index cb52cbea5..000000000 --- a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-67b-chat/1762652580.1230679", - "retrieved_timestamp": "1762652580.1230688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/deepseek-llm-67b-chat", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-llm-67b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 67.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5587153197959193 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243416179742358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3943650265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/4236ece5-f2b2-44e7-9503-9731bff20155.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/4236ece5-f2b2-44e7-9503-9731bff20155.json new file mode 100644 index 000000000..fa4327008 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/4236ece5-f2b2-44e7-9503-9731bff20155.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-llm-7b-base", + "id": "deepseek-ai/deepseek-llm-7b-base", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2179 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1806 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json deleted file mode 100644 index c06e87638..000000000 --- a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-base/1762652580.1234062", - "retrieved_timestamp": "1762652580.1234071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/deepseek-llm-7b-base", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-llm-7b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.217871913190335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35030315829299524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37378124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18060172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b33d672c-4a96-4093-bc13-25c42303b918.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b33d672c-4a96-4093-bc13-25c42303b918.json new file mode 100644 index 000000000..cd8ef9eab --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b33d672c-4a96-4093-bc13-25c42303b918.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-llm-7b-chat", + "id": "deepseek-ai/deepseek-llm-7b-chat", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3632 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4668 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json deleted file mode 100644 index e648625cd..000000000 --- a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-chat/1762652580.123629", - "retrieved_timestamp": "1762652580.12363", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/deepseek-llm-7b-chat", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-llm-7b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4170822307034225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632079760108669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46677083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21334773936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/2b4f42fc-8b25-481c-98f7-911c52fdd242.json b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/2b4f42fc-8b25-481c-98f7-911c52fdd242.json new file mode 100644 index 000000000..a987a5874 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/2b4f42fc-8b25-481c-98f7-911c52fdd242.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-moe-16b-base", + "id": "deepseek-ai/deepseek-moe-16b-base", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "DeepseekForCausalLM", + "params_billions": 16.376 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json deleted file mode 100644 index 6bc519ad0..000000000 --- a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-base/1762652580.123848", - "retrieved_timestamp": "1762652580.123849", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/deepseek-moe-16b-base", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-moe-16b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "DeepseekForCausalLM", - "params_billions": 16.376 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2449744455821664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409461055246395 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1505152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/634b7a64-2bd3-48b8-b2f4-a93189801850.json b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/634b7a64-2bd3-48b8-b2f4-a93189801850.json new file mode 100644 index 000000000..92b956141 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/634b7a64-2bd3-48b8-b2f4-a93189801850.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-moe-16b-chat", + "id": "deepseek-ai/deepseek-moe-16b-chat", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "DeepseekForCausalLM", + "params_billions": 16.376 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2248 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json deleted file mode 100644 index 0702fa36d..000000000 --- a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-chat/1762652580.1240609", - "retrieved_timestamp": "1762652580.124062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/deepseek-moe-16b-chat", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-moe-16b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "DeepseekForCausalLM", - "params_billions": 16.376 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36629919724109805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3274953026448241 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22483221476510068 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38076041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1963929521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json b/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json deleted file mode 100644 index b4ce17cd6..000000000 --- a/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dfurman_CalmeRys-78B-Orpo-v0.1/1762652580.124436", - "retrieved_timestamp": "1762652580.124437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dfurman/CalmeRys-78B-Orpo-v0.1", - "developer": "dfurman", - "inference_platform": "unknown", - "id": "dfurman/CalmeRys-78B-Orpo-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8163273447785211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7262282792249927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5901770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7012134308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/72a4bcc3-9dfc-4268-be4e-cda5837a3da2.json b/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/72a4bcc3-9dfc-4268-be4e-cda5837a3da2.json new file mode 100644 index 000000000..152bc2222 --- /dev/null +++ b/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/72a4bcc3-9dfc-4268-be4e-cda5837a3da2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dfurman_CalmeRys-78B-Orpo-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CalmeRys-78B-Orpo-v0.1", + "id": "dfurman/CalmeRys-78B-Orpo-v0.1", + "developer": "dfurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8163 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4063 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5902 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7012 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dfurman/Llama-3-70B-Orpo-v0.1/78fa85f6-baff-4d95-ad3a-a0663f51b0a0.json b/data/hfopenllm_v2/dfurman/Llama-3-70B-Orpo-v0.1/78fa85f6-baff-4d95-ad3a-a0663f51b0a0.json new file mode 100644 index 000000000..2fb382007 --- /dev/null +++ b/data/hfopenllm_v2/dfurman/Llama-3-70B-Orpo-v0.1/78fa85f6-baff-4d95-ad3a-a0663f51b0a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-70B-Orpo-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-70B-Orpo-v0.1", + "id": "dfurman/Llama-3-70B-Orpo-v0.1", + "developer": "dfurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4534 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/359231a5-6eb9-4f73-a6f1-d7fd7f35c7ed.json b/data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/359231a5-6eb9-4f73-a6f1-d7fd7f35c7ed.json new file mode 100644 index 000000000..cafc237a9 --- /dev/null +++ b/data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/359231a5-6eb9-4f73-a6f1-d7fd7f35c7ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Orpo-v0.1", + "id": "dfurman/Llama-3-8B-Orpo-v0.1", + "developer": "dfurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2298 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/79b81e37-f75e-4b18-b145-73c42625ced5.json b/data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/79b81e37-f75e-4b18-b145-73c42625ced5.json new file mode 100644 index 000000000..f0f0c7311 --- /dev/null +++ b/data/hfopenllm_v2/dfurman/Llama-3-8B-Orpo-v0.1/79b81e37-f75e-4b18-b145-73c42625ced5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Orpo-v0.1", + "id": "dfurman/Llama-3-8B-Orpo-v0.1", + "developer": "dfurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dfurman/Qwen2-72B-Orpo-v0.1/2d99af7a-f67c-4e74-9ba2-f1401dfdf9fb.json b/data/hfopenllm_v2/dfurman/Qwen2-72B-Orpo-v0.1/2d99af7a-f67c-4e74-9ba2-f1401dfdf9fb.json new file mode 100644 index 000000000..561c35bcc --- /dev/null +++ b/data/hfopenllm_v2/dfurman/Qwen2-72B-Orpo-v0.1/2d99af7a-f67c-4e74-9ba2-f1401dfdf9fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dfurman_Qwen2-72B-Orpo-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-72B-Orpo-v0.1", + "id": "dfurman/Qwen2-72B-Orpo-v0.1", + "developer": "dfurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.699 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.788 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4056 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4784 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5455 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/315fa815-fab0-47c9-8185-00bc597c0176.json b/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/315fa815-fab0-47c9-8185-00bc597c0176.json new file mode 100644 index 000000000..66466a5b8 --- /dev/null +++ b/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/315fa815-fab0-47c9-8185-00bc597c0176.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dictalm2.0-instruct", + "id": "dicta-il/dictalm2.0-instruct", + "developer": "dicta-il", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.251 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2605 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json b/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json deleted file mode 100644 index 53cacf646..000000000 --- a/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0-instruct/1762652580.126274", - "retrieved_timestamp": "1762652580.126276", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dicta-il/dictalm2.0-instruct", - "developer": "dicta-il", - "inference_platform": "unknown", - "id": "dicta-il/dictalm2.0-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.251 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44121264910437635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42560784985912875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2604720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/dicta-il/dictalm2.0/0c1686db-b396-4ecf-86f1-e4e092491acd.json b/data/hfopenllm_v2/dicta-il/dictalm2.0/0c1686db-b396-4ecf-86f1-e4e092491acd.json new file mode 100644 index 000000000..d0fee0d39 --- /dev/null +++ b/data/hfopenllm_v2/dicta-il/dictalm2.0/0c1686db-b396-4ecf-86f1-e4e092491acd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dictalm2.0", + "id": "dicta-il/dictalm2.0", + "developer": "dicta-il", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.251 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2605 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json b/data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json deleted file mode 100644 index a3a1bba25..000000000 --- a/data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0/1762652580.125907", - "retrieved_timestamp": "1762652580.125909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dicta-il/dictalm2.0", - "developer": "dicta-il", - "inference_platform": "unknown", - "id": "dicta-il/dictalm2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.251 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24132745559559746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4017869112495909 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38196874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2604720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/distilbert/distilgpt2/57455fbc-b5a9-4a3b-9a30-7da0593fd778.json b/data/hfopenllm_v2/distilbert/distilgpt2/57455fbc-b5a9-4a3b-9a30-7da0593fd778.json new file mode 100644 index 000000000..4f626e3c3 --- /dev/null +++ b/data/hfopenllm_v2/distilbert/distilgpt2/57455fbc-b5a9-4a3b-9a30-7da0593fd778.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/distilbert_distilgpt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "distilgpt2", + "id": "distilbert/distilgpt2", + "developer": "distilbert", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.088 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0611 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3038 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1187 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/a8f9d0e6-5a1a-4d09-ac78-47fd586384df.json b/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/a8f9d0e6-5a1a-4d09-ac78-47fd586384df.json new file mode 100644 index 000000000..419e211aa --- /dev/null +++ b/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/a8f9d0e6-5a1a-4d09-ac78-47fd586384df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/divyanshukunwar_SASTRI_1_9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SASTRI_1_9B", + "id": "divyanshukunwar/SASTRI_1_9B", + "developer": "divyanshukunwar", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 5.211 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3187 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json b/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json deleted file mode 100644 index b0de15a80..000000000 --- a/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/divyanshukunwar_SASTRI_1_9B/1762652580.1269271", - "retrieved_timestamp": "1762652580.1269279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "divyanshukunwar/SASTRI_1_9B", - "developer": "divyanshukunwar", - "inference_platform": "unknown", - "id": "divyanshukunwar/SASTRI_1_9B", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 5.211 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207292206899914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4680499051118341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/9d0d4eee-0b87-485c-843f-e32d08aa601b.json b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/9d0d4eee-0b87-485c-843f-e32d08aa601b.json new file mode 100644 index 000000000..fd16ac893 --- /dev/null +++ b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/9d0d4eee-0b87-485c-843f-e32d08aa601b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST-L3.2-ReWish-3B-ties-w-base", + "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", + "developer": "djuna-test-lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6353 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json deleted file mode 100644 index da13f9dcc..000000000 --- a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/1762652580.131253", - "retrieved_timestamp": "1762652580.131254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", - "developer": "djuna-test-lab", - "inference_platform": "unknown", - "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635252241829457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449540552927623 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json deleted file mode 100644 index 7cf6b2aea..000000000 --- a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B/1762652580.131", - "retrieved_timestamp": "1762652580.131001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna-test-lab/TEST-L3.2-ReWish-3B", - "developer": "djuna-test-lab", - "inference_platform": "unknown", - "id": "djuna-test-lab/TEST-L3.2-ReWish-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6367759766308949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449540552927623 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/e47c83ff-9a16-488b-8ccf-4a2fad2b14fc.json b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/e47c83ff-9a16-488b-8ccf-4a2fad2b14fc.json new file mode 100644 index 000000000..0a5268a87 --- /dev/null +++ b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/e47c83ff-9a16-488b-8ccf-4a2fad2b14fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TEST-L3.2-ReWish-3B", + "id": "djuna-test-lab/TEST-L3.2-ReWish-3B", + "developer": "djuna-test-lab", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6368 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json b/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json deleted file mode 100644 index b2b8e59a8..000000000 --- a/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_G2-BigGSHT-27B-2/1762652580.1272058", - "retrieved_timestamp": "1762652580.1272068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/G2-BigGSHT-27B-2", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/G2-BigGSHT-27B-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7974430067775724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.641474454273013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40720833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45279255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/8c7e25df-884d-4940-8185-4c1b82fac8c5.json b/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/8c7e25df-884d-4940-8185-4c1b82fac8c5.json new file mode 100644 index 000000000..294e1f1ce --- /dev/null +++ b/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/8c7e25df-884d-4940-8185-4c1b82fac8c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_G2-BigGSHT-27B-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "G2-BigGSHT-27B-2", + "id": "djuna/G2-BigGSHT-27B-2", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7974 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4072 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/G2-GSHT/83611d50-01d0-4642-a104-daf77f1a0fe8.json b/data/hfopenllm_v2/djuna/G2-GSHT/83611d50-01d0-4642-a104-daf77f1a0fe8.json new file mode 100644 index 000000000..efbf1c971 --- /dev/null +++ b/data/hfopenllm_v2/djuna/G2-GSHT/83611d50-01d0-4642-a104-daf77f1a0fe8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_G2-GSHT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "G2-GSHT", + "id": "djuna/G2-GSHT", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.563 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.527 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json b/data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json deleted file mode 100644 index fe7a1fcf8..000000000 --- a/data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_G2-GSHT/1762652580.127527", - "retrieved_timestamp": "1762652580.127528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/G2-GSHT", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/G2-GSHT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5630116978505919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269730491270207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40057291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/Gemma-2-gemmama-9b/5cbdafba-6071-4da1-8b19-3de612e9ff18.json b/data/hfopenllm_v2/djuna/Gemma-2-gemmama-9b/5cbdafba-6071-4da1-8b19-3de612e9ff18.json new file mode 100644 index 000000000..2777bf875 --- /dev/null +++ b/data/hfopenllm_v2/djuna/Gemma-2-gemmama-9b/5cbdafba-6071-4da1-8b19-3de612e9ff18.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_Gemma-2-gemmama-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-gemmama-9b", + "id": "djuna/Gemma-2-gemmama-9b", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7703 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-ForStHS/1c934cba-c94a-4aad-9645-84658e0b5588.json b/data/hfopenllm_v2/djuna/L3.1-ForStHS/1c934cba-c94a-4aad-9645-84658e0b5588.json new file mode 100644 index 000000000..be9ca7b2a --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-ForStHS/1c934cba-c94a-4aad-9645-84658e0b5588.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-ForStHS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-ForStHS", + "id": "djuna/L3.1-ForStHS", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1503 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3735 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json b/data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json deleted file mode 100644 index 3dbc921b9..000000000 --- a/data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-ForStHS/1762652580.128124", - "retrieved_timestamp": "1762652580.128125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/L3.1-ForStHS", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-ForStHS", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7813313120298586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5202703381267152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37350398936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/7aad3f6b-89d9-4c9e-9339-cf4111fc37c6.json b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/7aad3f6b-89d9-4c9e-9339-cf4111fc37c6.json new file mode 100644 index 000000000..5e6b32a51 --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/7aad3f6b-89d9-4c9e-9339-cf4111fc37c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Promissum_Mane-8B-Della-1.5-calc", + "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5433 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json deleted file mode 100644 index 75bdb9880..000000000 --- a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/1762652580.1283488", - "retrieved_timestamp": "1762652580.12835", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7235291249440374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5432920704935255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/38d4a8ca-4273-4e6a-8a39-3b5ff20ec461.json b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/38d4a8ca-4273-4e6a-8a39-3b5ff20ec461.json new file mode 100644 index 000000000..2ab5da35c --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/38d4a8ca-4273-4e6a-8a39-3b5ff20ec461.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-calc/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Promissum_Mane-8B-Della-calc", + "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5442 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1843 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json deleted file mode 100644 index 8bbc6b280..000000000 --- a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-calc/1762652580.128573", - "retrieved_timestamp": "1762652580.128574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/L3.1-Promissum_Mane-8B-Della-calc", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544152847777231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.548587625935678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18429003021148035 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/3d65fbc2-bf91-479c-a687-e9ef702794fb.json b/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/3d65fbc2-bf91-479c-a687-e9ef702794fb.json new file mode 100644 index 000000000..14e62740c --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/3d65fbc2-bf91-479c-a687-e9ef702794fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Purosani-2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Purosani-2-8B", + "id": "djuna/L3.1-Purosani-2-8B", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4988 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json b/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json deleted file mode 100644 index 3896e881f..000000000 --- a/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Purosani-2-8B/1762652580.128782", - "retrieved_timestamp": "1762652580.128783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/L3.1-Purosani-2-8B", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Purosani-2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988153654525548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5182122256069372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38162499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751662234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json b/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json deleted file mode 100644 index 99000568c..000000000 --- a/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Suze-Vume-calc/1762652580.128992", - "retrieved_timestamp": "1762652580.128992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/L3.1-Suze-Vume-calc", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Suze-Vume-calc", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7296739318341999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516421105092519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38429166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35147938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/650cdbbb-e066-4581-8d61-77aa6a4c402c.json b/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/650cdbbb-e066-4581-8d61-77aa6a4c402c.json new file mode 100644 index 000000000..7bf4c3862 --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/650cdbbb-e066-4581-8d61-77aa6a4c402c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Suze-Vume-calc/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Suze-Vume-calc", + "id": "djuna/L3.1-Suze-Vume-calc", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7297 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/05d566c5-1810-483c-8ce0-84635b9457dc.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/05d566c5-1810-483c-8ce0-84635b9457dc.json new file mode 100644 index 000000000..5e17b60d7 --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/05d566c5-1810-483c-8ce0-84635b9457dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-Chinofun-12B-2", + "id": "djuna/MN-Chinofun-12B-2", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6171 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5037 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json deleted file mode 100644 index 003f6e9a0..000000000 --- a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-2/1762652580.129499", - "retrieved_timestamp": "1762652580.1295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/MN-Chinofun-12B-2", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun-12B-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170671595810228 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5036959998266032 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42683333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json deleted file mode 100644 index 0df09d666..000000000 --- a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-3/1762652580.129836", - "retrieved_timestamp": "1762652580.129837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/MN-Chinofun-12B-3", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun-12B-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052744495715812 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53478574603334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3026097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/37e3456a-92ff-4122-a697-ffbdc1c79555.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/37e3456a-92ff-4122-a697-ffbdc1c79555.json new file mode 100644 index 000000000..d1f440712 --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/37e3456a-92ff-4122-a697-ffbdc1c79555.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-Chinofun-12B-3", + "id": "djuna/MN-Chinofun-12B-3", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3053 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5348 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json deleted file mode 100644 index 976c4b9a6..000000000 --- a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-4/1762652580.13009", - "retrieved_timestamp": "1762652580.130091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/MN-Chinofun-12B-4", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun-12B-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404305021786637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347693369790583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3497340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/70c908d4-f1bf-4553-9bf7-95eb593b4853.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/70c908d4-f1bf-4553-9bf7-95eb593b4853.json new file mode 100644 index 000000000..c46c8ff73 --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/70c908d4-f1bf-4553-9bf7-95eb593b4853.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-Chinofun-12B-4", + "id": "djuna/MN-Chinofun-12B-4", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5348 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4307 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json b/data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json deleted file mode 100644 index 45612d459..000000000 --- a/data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun/1762652580.1291971", - "retrieved_timestamp": "1762652580.1291971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/MN-Chinofun", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6110220880596817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49527033812671534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40835416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36028922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun/2ccc9c20-5414-4286-abcd-ad2b20f8652d.json b/data/hfopenllm_v2/djuna/MN-Chinofun/2ccc9c20-5414-4286-abcd-ad2b20f8652d.json new file mode 100644 index 000000000..3bb80844d --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun/2ccc9c20-5414-4286-abcd-ad2b20f8652d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-Chinofun", + "id": "djuna/MN-Chinofun", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.611 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4953 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/50f4560a-e172-42b9-b552-437aff158a38.json b/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/50f4560a-e172-42b9-b552-437aff158a38.json new file mode 100644 index 000000000..0893e6c11 --- /dev/null +++ b/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/50f4560a-e172-42b9-b552-437aff158a38.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Partron-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-Partron-7B", + "id": "djuna/Q2.5-Partron-7B", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4826 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4283 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json b/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json deleted file mode 100644 index b2919e3dd..000000000 --- a/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Partron-7B/1762652580.130363", - "retrieved_timestamp": "1762652580.130364", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/Q2.5-Partron-7B", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/Q2.5-Partron-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7321218810533828 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418474850726388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4826283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41654166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json deleted file mode 100644 index 4e3ada639..000000000 --- a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B-0.5/1762652580.13079", - "retrieved_timestamp": "1762652580.130791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/Q2.5-Veltha-14B-0.5", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/Q2.5-Veltha-14B-0.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7795826185631901 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6523026688308357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295046542553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/c6a3abac-8a34-4725-915b-c27c3d0bc484.json b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/c6a3abac-8a34-4725-915b-c27c3d0bc484.json new file mode 100644 index 000000000..8d30205b4 --- /dev/null +++ b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/c6a3abac-8a34-4725-915b-c27c3d0bc484.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B-0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-Veltha-14B-0.5", + "id": "djuna/Q2.5-Veltha-14B-0.5", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json deleted file mode 100644 index ec6b10907..000000000 --- a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B/1762652580.130576", - "retrieved_timestamp": "1762652580.1305768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/Q2.5-Veltha-14B", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/Q2.5-Veltha-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8291666112581284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.648421390292023 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5298371010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/a8ed68ea-6463-4ff9-9dcd-034080272dec.json b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/a8ed68ea-6463-4ff9-9dcd-034080272dec.json new file mode 100644 index 000000000..b3a521555 --- /dev/null +++ b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/a8ed68ea-6463-4ff9-9dcd-034080272dec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q2.5-Veltha-14B", + "id": "djuna/Q2.5-Veltha-14B", + "developer": "djuna", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4789 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5298 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/5799ce8b-c00d-49f6-96dc-f7dd057a268c.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/5799ce8b-c00d-49f6-96dc-f7dd057a268c.json new file mode 100644 index 000000000..9ecc384c0 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/5799ce8b-c00d-49f6-96dc-f7dd057a268c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Llama-3-8B-Instruct", + "id": "dnhkng/RYS-Llama-3-8B-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6958 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4809 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json deleted file mode 100644 index 55bdf9359..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-8B-Instruct/1762652580.131744", - "retrieved_timestamp": "1762652580.131744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3-8B-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6957772044841022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808708123069005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.355718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0d261023-3e35-4160-98ca-241bbaee927e.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0d261023-3e35-4160-98ca-241bbaee927e.json new file mode 100644 index 000000000..f9db9541c --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0d261023-3e35-4160-98ca-241bbaee927e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Huge-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Llama-3-Huge-Instruct", + "id": "dnhkng/RYS-Llama-3-Huge-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 99.646 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7686 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2289 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.511 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json deleted file mode 100644 index 62980471b..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Huge-Instruct/1762652580.1319628", - "retrieved_timestamp": "1762652580.131964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3-Huge-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3-Huge-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 99.646 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7685917809190725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480872171360044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22885196374622357 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510970744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f0454d3b-18b4-488a-94dd-fb24729996c7.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f0454d3b-18b4-488a-94dd-fb24729996c7.json new file mode 100644 index 000000000..b5b981469 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f0454d3b-18b4-488a-94dd-fb24729996c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Large-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Llama-3-Large-Instruct", + "id": "dnhkng/RYS-Llama-3-Large-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 73.976 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2304 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json deleted file mode 100644 index cf6a11c91..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Large-Instruct/1762652580.132239", - "retrieved_timestamp": "1762652580.132241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3-Large-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3-Large-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 73.976 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.65252690724939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5137134308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json deleted file mode 100644 index a1f66c17e..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3.1-8B-Instruct/1762652580.132753", - "retrieved_timestamp": "1762652580.1327538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3.1-8B-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 8.685 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7684920455502511 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163645317446665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36394614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/6bafa7a7-3a2a-4141-9564-a762d1cdb1d0.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/6bafa7a7-3a2a-4141-9564-a762d1cdb1d0.json new file mode 100644 index 000000000..8a33c3dbf --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/6bafa7a7-3a2a-4141-9564-a762d1cdb1d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Llama-3.1-8B-Instruct", + "id": "dnhkng/RYS-Llama-3.1-8B-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 8.685 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3681 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3639 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama3.1-Large/37f20f86-40ba-4f63-b29d-efff6cb0e09b.json b/data/hfopenllm_v2/dnhkng/RYS-Llama3.1-Large/37f20f86-40ba-4f63-b29d-efff6cb0e09b.json new file mode 100644 index 000000000..bd2270331 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama3.1-Large/37f20f86-40ba-4f63-b29d-efff6cb0e09b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama3.1-Large/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Llama3.1-Large", + "id": "dnhkng/RYS-Llama3.1-Large", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 81.677 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8492 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6899 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3505 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5249 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Medium/bf0e7ce4-09e9-4879-993a-eb50b2a421d7.json b/data/hfopenllm_v2/dnhkng/RYS-Medium/bf0e7ce4-09e9-4879-993a-eb50b2a421d7.json new file mode 100644 index 000000000..f208cf7e5 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Medium/bf0e7ce4-09e9-4879-993a-eb50b2a421d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Medium/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Medium", + "id": "dnhkng/RYS-Medium", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 18.731 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4406 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6285 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json b/data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json deleted file mode 100644 index d001e1e19..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Medium/1762652580.131469", - "retrieved_timestamp": "1762652580.13147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Medium", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Medium", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 18.731 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4406131287206833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284726872432828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325964095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json b/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json deleted file mode 100644 index e7b6885d5..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Phi-3-medium-4k-instruct/1762652580.133586", - "retrieved_timestamp": "1762652580.133587", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Phi-3-medium-4k-instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Phi-3-medium-4k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 17.709 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4391392616036561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6226313539198264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.484624335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/bcbc29f7-ea03-4dbe-a83e-d4940b2c6bea.json b/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/bcbc29f7-ea03-4dbe-a83e-d4940b2c6bea.json new file mode 100644 index 000000000..e893c8a74 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/bcbc29f7-ea03-4dbe-a83e-d4940b2c6bea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Phi-3-medium-4k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-Phi-3-medium-4k-instruct", + "id": "dnhkng/RYS-Phi-3-medium-4k-instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 17.709 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6226 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4846 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json deleted file mode 100644 index 2f3c02f41..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge-base/1762652580.134071", - "retrieved_timestamp": "1762652580.134072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-XLarge-base", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-XLarge-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.972 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7910233735377686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7047291858548728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4902708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5430518617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/cbea8d66-0370-4998-8e3a-06fef0a60f0c.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/cbea8d66-0370-4998-8e3a-06fef0a60f0c.json new file mode 100644 index 000000000..7bb7b0a97 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/cbea8d66-0370-4998-8e3a-06fef0a60f0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-XLarge-base", + "id": "dnhkng/RYS-XLarge-base", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.972 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4903 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json deleted file mode 100644 index f4f44d473..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge/1762652580.1338398", - "retrieved_timestamp": "1762652580.1338408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-XLarge", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-XLarge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7995662619627034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7050033079850099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49696875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428025265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge/ca48b670-b82e-46cc-beb9-2fd0f11d3585.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge/ca48b670-b82e-46cc-beb9-2fd0f11d3585.json new file mode 100644 index 000000000..8542415fb --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-XLarge/ca48b670-b82e-46cc-beb9-2fd0f11d3585.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-XLarge", + "id": "dnhkng/RYS-XLarge", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7996 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.705 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4252 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.497 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json deleted file mode 100644 index 9d8d85d68..000000000 --- a/data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge2/1762652580.1343", - "retrieved_timestamp": "1762652580.134301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-XLarge2", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-XLarge2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49019712141562166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6573947106260754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge2/d37f99f7-f9c3-48b6-84d3-7da5d77f5030.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge2/d37f99f7-f9c3-48b6-84d3-7da5d77f5030.json new file mode 100644 index 000000000..c6ee9080f --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-XLarge2/d37f99f7-f9c3-48b6-84d3-7da5d77f5030.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RYS-XLarge2", + "id": "dnhkng/RYS-XLarge2", + "developer": "dnhkng", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6574 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4508 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/503c8a24-4ced-4dca-b9df-5733ce89c2ca.json b/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/503c8a24-4ced-4dca-b9df-5733ce89c2ca.json new file mode 100644 index 000000000..ea7995a74 --- /dev/null +++ b/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/503c8a24-4ced-4dca-b9df-5733ce89c2ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dreamgen_WizardLM-2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLM-2-7B", + "id": "dreamgen/WizardLM-2-7B", + "developer": "dreamgen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3941 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json b/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json deleted file mode 100644 index 0fef12d96..000000000 --- a/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dreamgen_WizardLM-2-7B/1762652580.1345458", - "retrieved_timestamp": "1762652580.134547", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dreamgen/WizardLM-2-7B", - "developer": "dreamgen", - "inference_platform": "unknown", - "id": "dreamgen/WizardLM-2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45829842595424586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34867856163972016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39409374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2660405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/5c5283a0-819f-4112-bb90-5277423d9c00.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/5c5283a0-819f-4112-bb90-5277423d9c00.json new file mode 100644 index 000000000..c68eb92a9 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/5c5283a0-819f-4112-bb90-5277423d9c00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v1", + "id": "dustinwloring1988/Reflexis-8b-chat-v1", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4664 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json deleted file mode 100644 index 22c471656..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v1/1762652580.134872", - "retrieved_timestamp": "1762652580.134874", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v1", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657750324694034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4663596290293861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3384308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json deleted file mode 100644 index 6f6f7293d..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v2/1762652580.135156", - "retrieved_timestamp": "1762652580.135157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v2", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912042270065648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47238018945807153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3526354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3377659574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/b636bc82-1625-49b1-beec-cadaf4e1b1a9.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/b636bc82-1625-49b1-beec-cadaf4e1b1a9.json new file mode 100644 index 000000000..9bbf76e4e --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/b636bc82-1625-49b1-beec-cadaf4e1b1a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v2", + "id": "dustinwloring1988/Reflexis-8b-chat-v2", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4724 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3526 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/00f481c1-0ef0-40bd-bd95-81dc9443a62c.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/00f481c1-0ef0-40bd-bd95-81dc9443a62c.json new file mode 100644 index 000000000..4ff73b25c --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/00f481c1-0ef0-40bd-bd95-81dc9443a62c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v3", + "id": "dustinwloring1988/Reflexis-8b-chat-v3", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3548 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json deleted file mode 100644 index e793c88c9..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v3/1762652580.1353788", - "retrieved_timestamp": "1762652580.1353788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v3", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536733644507684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658310598309874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35117708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35480385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/7ea22fef-2d79-49ae-bf72-9153a4e239c5.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/7ea22fef-2d79-49ae-bf72-9153a4e239c5.json new file mode 100644 index 000000000..6e5776566 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/7ea22fef-2d79-49ae-bf72-9153a4e239c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v4", + "id": "dustinwloring1988/Reflexis-8b-chat-v4", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4698 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json deleted file mode 100644 index 27ce8d90a..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v4/1762652580.135605", - "retrieved_timestamp": "1762652580.135605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v4", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697890486132351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46860140660011185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33930208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390126329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json deleted file mode 100644 index 15514b7a9..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v5/1762652580.135817", - "retrieved_timestamp": "1762652580.135818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v5", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42375231053604434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4781685533183147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33536458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/64f441df-1781-4d01-b73b-2156413ad403.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/64f441df-1781-4d01-b73b-2156413ad403.json new file mode 100644 index 000000000..6d8fa5edd --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/64f441df-1781-4d01-b73b-2156413ad403.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v5", + "id": "dustinwloring1988/Reflexis-8b-chat-v5", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4238 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4782 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3217 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/4e3676eb-8607-416e-986a-7098bc192820.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/4e3676eb-8607-416e-986a-7098bc192820.json new file mode 100644 index 000000000..be5afc0ca --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/4e3676eb-8607-416e-986a-7098bc192820.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v6", + "id": "dustinwloring1988/Reflexis-8b-chat-v6", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4939 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3479 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json deleted file mode 100644 index 4ff64fbab..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v6/1762652580.136029", - "retrieved_timestamp": "1762652580.13603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v6", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4938939790866014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4809537068664902 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.347905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/2101369c-5042-48f3-a8f2-f9f56e7b6ae7.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/2101369c-5042-48f3-a8f2-f9f56e7b6ae7.json new file mode 100644 index 000000000..601437128 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/2101369c-5042-48f3-a8f2-f9f56e7b6ae7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflexis-8b-chat-v7", + "id": "dustinwloring1988/Reflexis-8b-chat-v7", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.398 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3643 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json deleted file mode 100644 index 4ff9cbb07..000000000 --- a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v7/1762652580.1362429", - "retrieved_timestamp": "1762652580.136244", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v7", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39804828964924177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4809830787114964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642785904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/duyhv1411/Llama-3.2-1B-en-vi/c4b86264-3725-4742-91f0-3e01f8d965a4.json b/data/hfopenllm_v2/duyhv1411/Llama-3.2-1B-en-vi/c4b86264-3725-4742-91f0-3e01f8d965a4.json new file mode 100644 index 000000000..f91722bb5 --- /dev/null +++ b/data/hfopenllm_v2/duyhv1411/Llama-3.2-1B-en-vi/c4b86264-3725-4742-91f0-3e01f8d965a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-1B-en-vi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-en-vi", + "id": "duyhv1411/Llama-3.2-1B-en-vi", + "developer": "duyhv1411", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4788 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3291 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1341 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/duyhv1411/Llama-3.2-3B-en-vi/0308147c-dabb-46bb-8add-d332fcd5a800.json b/data/hfopenllm_v2/duyhv1411/Llama-3.2-3B-en-vi/0308147c-dabb-46bb-8add-d332fcd5a800.json new file mode 100644 index 000000000..7c38df979 --- /dev/null +++ b/data/hfopenllm_v2/duyhv1411/Llama-3.2-3B-en-vi/0308147c-dabb-46bb-8add-d332fcd5a800.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-3B-en-vi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-en-vi", + "id": "duyhv1411/Llama-3.2-3B-en-vi", + "developer": "duyhv1411", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4852 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.321 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1359 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-inst/a9977a0d-e199-488a-a26e-6269806fdb2b.json b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-inst/a9977a0d-e199-488a-a26e-6269806fdb2b.json new file mode 100644 index 000000000..ebf7ac8d4 --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-inst/a9977a0d-e199-488a-a26e-6269806fdb2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-inst/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-id-inst", + "id": "dwikitheduck/gemma-2-2b-id-inst", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/56b89ec8-90c5-4e1e-a458-1bb8b5b92be8.json b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/56b89ec8-90c5-4e1e-a458-1bb8b5b92be8.json new file mode 100644 index 000000000..83e3a462d --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/56b89ec8-90c5-4e1e-a458-1bb8b5b92be8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-id-instruct", + "id": "dwikitheduck/gemma-2-2b-id-instruct", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json deleted file mode 100644 index 625fd53c9..000000000 --- a/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-instruct/1762652580.137409", - "retrieved_timestamp": "1762652580.1374102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dwikitheduck/gemma-2-2b-id-instruct", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gemma-2-2b-id-instruct", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38785644312646006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39621721241423097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21733710106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id/4185c376-91c6-435d-ae3b-47cd85151049.json b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id/4185c376-91c6-435d-ae3b-47cd85151049.json new file mode 100644 index 000000000..f7fe9743d --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id/4185c376-91c6-435d-ae3b-47cd85151049.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-id", + "id": "dwikitheduck/gemma-2-2b-id", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-inst-1/26e45f5d-1e3d-425f-ba4d-b444dcda7f74.json b/data/hfopenllm_v2/dwikitheduck/gen-inst-1/26e45f5d-1e3d-425f-ba4d-b444dcda7f74.json new file mode 100644 index 000000000..0250810c2 --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gen-inst-1/26e45f5d-1e3d-425f-ba4d-b444dcda7f74.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-inst-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gen-inst-1", + "id": "dwikitheduck/gen-inst-1", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.642 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4205 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5089 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json b/data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json deleted file mode 100644 index 890786109..000000000 --- a/data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-inst-1/1762652580.1376698", - "retrieved_timestamp": "1762652580.137671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dwikitheduck/gen-inst-1", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gen-inst-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7750114141588762 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419926671215591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42054166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088929521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/09be48ce-61f8-4ba9-b082-b9c475fa714d.json b/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/09be48ce-61f8-4ba9-b082-b9c475fa714d.json new file mode 100644 index 000000000..ce2a46bd9 --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/09be48ce-61f8-4ba9-b082-b9c475fa714d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1-notemp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gen-try1-notemp", + "id": "dwikitheduck/gen-try1-notemp", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2627 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6263 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json b/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json deleted file mode 100644 index 2c52bd9b2..000000000 --- a/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1-notemp/1762652580.13809", - "retrieved_timestamp": "1762652580.138091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dwikitheduck/gen-try1-notemp", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gen-try1-notemp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26270961050013963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.626267088306491 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31797583081570996 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47141666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5210272606382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/dwikitheduck/gen-try1/27417bcb-fb2f-41d2-9dfa-9865a36f38d5.json b/data/hfopenllm_v2/dwikitheduck/gen-try1/27417bcb-fb2f-41d2-9dfa-9865a36f38d5.json new file mode 100644 index 000000000..e7929b7e4 --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gen-try1/27417bcb-fb2f-41d2-9dfa-9865a36f38d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gen-try1", + "id": "dwikitheduck/gen-try1", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7522 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6359 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json b/data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json deleted file mode 100644 index 8095c8b87..000000000 --- a/data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1/1762652580.137886", - "retrieved_timestamp": "1762652580.137887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dwikitheduck/gen-try1", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gen-try1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7522052598217175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358510933470735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110538563829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/7b6fc3c2-a67d-450e-858c-fa87be122376.json b/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/7b6fc3c2-a67d-450e-858c-fa87be122376.json new file mode 100644 index 000000000..1562aefaf --- /dev/null +++ b/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/7b6fc3c2-a67d-450e-858c-fa87be122376.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/dzakwan_dzakwan-MoE-4x7b-Beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dzakwan-MoE-4x7b-Beta", + "id": "dzakwan/dzakwan-MoE-4x7b-Beta", + "developer": "dzakwan", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4443 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4267 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json b/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json deleted file mode 100644 index c63d60171..000000000 --- a/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dzakwan_dzakwan-MoE-4x7b-Beta/1762652580.138297", - "retrieved_timestamp": "1762652580.138298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dzakwan/dzakwan-MoE-4x7b-Beta", - "developer": "dzakwan", - "inference_platform": "unknown", - "id": "dzakwan/dzakwan-MoE-4x7b-Beta", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44426011870725235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514044131159397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42673958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json b/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json deleted file mode 100644 index 606c3c740..000000000 --- a/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-8B-Franken-Basestruct/1762652580.138562", - "retrieved_timestamp": "1762652580.1385632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/Falcon3-8B-Franken-Basestruct", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/Falcon3-8B-Franken-Basestruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.406 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17148499315150467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462828074770284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3554895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/76b86418-5450-48c6-ae56-58a19016d055.json b/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/76b86418-5450-48c6-ae56-58a19016d055.json new file mode 100644 index 000000000..8426beecc --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/76b86418-5450-48c6-ae56-58a19016d055.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-8B-Franken-Basestruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-8B-Franken-Basestruct", + "id": "ehristoforu/Falcon3-8B-Franken-Basestruct", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.406 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1715 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5463 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json b/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json deleted file mode 100644 index 75e3c1f0e..000000000 --- a/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-MoE-2x7B-Insruct/1762652580.1388721", - "retrieved_timestamp": "1762652580.138873", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/Falcon3-MoE-2x7B-Insruct", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.401 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7642954028643998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.564789641564995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4123867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4840416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/e06594e4-899a-4285-b130-f7b605e5a6b9.json b/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/e06594e4-899a-4285-b130-f7b605e5a6b9.json new file mode 100644 index 000000000..02842cc41 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/e06594e4-899a-4285-b130-f7b605e5a6b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-MoE-2x7B-Insruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-MoE-2x7B-Insruct", + "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.401 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7643 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5648 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.484 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/Gemma2-9B-it-psy10k-mental_health/9efdc773-a5c7-4709-88c8-96a67d84a742.json b/data/hfopenllm_v2/ehristoforu/Gemma2-9B-it-psy10k-mental_health/9efdc773-a5c7-4709-88c8-96a67d84a742.json new file mode 100644 index 000000000..4328adf7f --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/Gemma2-9B-it-psy10k-mental_health/9efdc773-a5c7-4709-88c8-96a67d84a742.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9B-it-psy10k-mental_health/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2-9B-it-psy10k-mental_health", + "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5887 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1631 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/Gemma2-9b-it-train6/1fcc2f96-afc9-403f-b82e-8e1804506582.json b/data/hfopenllm_v2/ehristoforu/Gemma2-9b-it-train6/1fcc2f96-afc9-403f-b82e-8e1804506582.json new file mode 100644 index 000000000..b1b1e648f --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/Gemma2-9b-it-train6/1fcc2f96-afc9-403f-b82e-8e1804506582.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9b-it-train6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2-9b-it-train6", + "id": "ehristoforu/Gemma2-9b-it-train6", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7025 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5898 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1911 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3942 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/HappyLlama1/bee1e134-9a43-441a-b977-522c510dd1ce.json b/data/hfopenllm_v2/ehristoforu/HappyLlama1/bee1e134-9a43-441a-b977-522c510dd1ce.json new file mode 100644 index 000000000..ee8741019 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/HappyLlama1/bee1e134-9a43-441a-b977-522c510dd1ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_HappyLlama1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HappyLlama1", + "id": "ehristoforu/HappyLlama1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7363 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4996 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1427 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3546 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT-Dare/b70e1089-d136-4b2f-a253-f361bcf8cdcc.json b/data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT-Dare/b70e1089-d136-4b2f-a253-f361bcf8cdcc.json new file mode 100644 index 000000000..db80ae6ad --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT-Dare/b70e1089-d136-4b2f-a253-f361bcf8cdcc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT-Dare/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenQwen2.5-7B-IT-Dare", + "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT/8b7e9c34-a982-4f4d-b5dc-66a12578601f.json b/data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT/8b7e9c34-a982-4f4d-b5dc-66a12578601f.json new file mode 100644 index 000000000..a51f16a50 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/QwenQwen2.5-7B-IT/8b7e9c34-a982-4f4d-b5dc-66a12578601f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenQwen2.5-7B-IT", + "id": "ehristoforu/QwenQwen2.5-7B-IT", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/RQwen-v0.1/0ccc36d0-f546-46d1-91d3-15a40c7bf6c1.json b/data/hfopenllm_v2/ehristoforu/RQwen-v0.1/0ccc36d0-f546-46d1-91d3-15a40c7bf6c1.json new file mode 100644 index 000000000..552e8c521 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/RQwen-v0.1/0ccc36d0-f546-46d1-91d3-15a40c7bf6c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RQwen-v0.1", + "id": "ehristoforu/RQwen-v0.1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7625 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5202 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/RQwen-v0.2/066abe97-2c6c-4f3b-9e5e-e144f130258a.json b/data/hfopenllm_v2/ehristoforu/RQwen-v0.2/066abe97-2c6c-4f3b-9e5e-e144f130258a.json new file mode 100644 index 000000000..9ac5c419c --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/RQwen-v0.2/066abe97-2c6c-4f3b-9e5e-e144f130258a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RQwen-v0.2", + "id": "ehristoforu/RQwen-v0.2", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7504 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6427 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/SoRu-0009/a3af8f77-d915-4482-a2b6-c99744aada4b.json b/data/hfopenllm_v2/ehristoforu/SoRu-0009/a3af8f77-d915-4482-a2b6-c99744aada4b.json new file mode 100644 index 000000000..1af53918b --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/SoRu-0009/a3af8f77-d915-4482-a2b6-c99744aada4b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_SoRu-0009/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SoRu-0009", + "id": "ehristoforu/SoRu-0009", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2582 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json b/data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json deleted file mode 100644 index 2312307c8..000000000 --- a/data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_SoRu-0009/1762652580.1407459", - "retrieved_timestamp": "1762652580.140747", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/SoRu-0009", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/SoRu-0009", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25818827378023645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149981683579724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12391954787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/coolqwen-3b-it/82cc8b37-e242-441e-ac74-1662bcc0a0e2.json b/data/hfopenllm_v2/ehristoforu/coolqwen-3b-it/82cc8b37-e242-441e-ac74-1662bcc0a0e2.json new file mode 100644 index 000000000..fcf5a776a --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/coolqwen-3b-it/82cc8b37-e242-441e-ac74-1662bcc0a0e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_coolqwen-3b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "coolqwen-3b-it", + "id": "ehristoforu/coolqwen-3b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4851 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3601 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/1527c8bc-c1ec-45f4-9663-4cffbb808f94.json b/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/1527c8bc-c1ec-45f4-9663-4cffbb808f94.json new file mode 100644 index 000000000..367d47be6 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/1527c8bc-c1ec-45f4-9663-4cffbb808f94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_della-70b-test-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "della-70b-test-v1", + "id": "ehristoforu/della-70b-test-v1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json b/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json deleted file mode 100644 index 2f7405385..000000000 --- a/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_della-70b-test-v1/1762652580.141174", - "retrieved_timestamp": "1762652580.141175", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/della-70b-test-v1", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/della-70b-test-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49786566310722213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029452113782393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45545833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/337b8ce8-d697-47f6-94ac-7a420dd7d91b.json b/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/337b8ce8-d697-47f6-94ac-7a420dd7d91b.json new file mode 100644 index 000000000..e2151cd4e --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/337b8ce8-d697-47f6-94ac-7a420dd7d91b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_falcon3-ultraset/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon3-ultraset", + "id": "ehristoforu/falcon3-ultraset", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7135 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5584 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4853 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3982 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json b/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json deleted file mode 100644 index 39d207875..000000000 --- a/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_falcon3-ultraset/1762652580.1413918", - "retrieved_timestamp": "1762652580.141393", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/falcon3-ultraset", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/falcon3-ultraset", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7135123694020753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5583684420918801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48531250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398188164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/3d6ed2bb-5be7-4838-abb7-49754f9c3bfe.json b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/3d6ed2bb-5be7-4838-abb7-49754f9c3bfe.json new file mode 100644 index 000000000..af627d759 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/3d6ed2bb-5be7-4838-abb7-49754f9c3bfe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-16x32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fd-lora-merged-16x32", + "id": "ehristoforu/fd-lora-merged-16x32", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.776 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json deleted file mode 100644 index afccc1fd1..000000000 --- a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-16x32/1762652580.141611", - "retrieved_timestamp": "1762652580.141612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/fd-lora-merged-16x32", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fd-lora-merged-16x32", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.776 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480897352358409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307564619842368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35142708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12051196808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/0a6c7056-1bce-479e-84b0-f4eeea0bd3cc.json b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/0a6c7056-1bce-479e-84b0-f4eeea0bd3cc.json new file mode 100644 index 000000000..185158297 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/0a6c7056-1bce-479e-84b0-f4eeea0bd3cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-64x128/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fd-lora-merged-64x128", + "id": "ehristoforu/fd-lora-merged-64x128", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3281 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json deleted file mode 100644 index 829850cd5..000000000 --- a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-64x128/1762652580.14183", - "retrieved_timestamp": "1762652580.141831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/fd-lora-merged-64x128", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fd-lora-merged-64x128", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281060918363276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33447107385638297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15367353723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json b/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json deleted file mode 100644 index 0b09b0bd8..000000000 --- a/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-it-v1/1762652580.1420429", - "retrieved_timestamp": "1762652580.1420438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/fp4-14b-it-v1", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fp4-14b-it-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25346746632269046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5739715511094247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35948958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4204621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/3e236ad8-3828-407f-9076-743b465b8d15.json b/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/3e236ad8-3828-407f-9076-743b465b8d15.json new file mode 100644 index 000000000..cd4c75104 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/3e236ad8-3828-407f-9076-743b465b8d15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-it-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fp4-14b-it-v1", + "id": "ehristoforu/fp4-14b-it-v1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2535 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.574 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json b/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json deleted file mode 100644 index 6bf4a05c1..000000000 --- a/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-v1-fix/1762652580.142252", - "retrieved_timestamp": "1762652580.1422532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/fp4-14b-v1-fix", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fp4-14b-v1-fix", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6741700909143296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6817274121032688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4531875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353224734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/9e90dcdf-ce2a-4a7c-8b89-6af8b7c2bcfe.json b/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/9e90dcdf-ce2a-4a7c-8b89-6af8b7c2bcfe.json new file mode 100644 index 000000000..ba50dee2a --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/9e90dcdf-ce2a-4a7c-8b89-6af8b7c2bcfe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-v1-fix/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fp4-14b-v1-fix", + "id": "ehristoforu/fp4-14b-v1-fix", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6817 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/940d88e9-085b-4065-b8c8-92ebe685deb0.json b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/940d88e9-085b-4065-b8c8-92ebe685deb0.json new file mode 100644 index 000000000..06d2d501b --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/940d88e9-085b-4065-b8c8-92ebe685deb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_false/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fq2.5-7b-it-normalize_false", + "id": "ehristoforu/fq2.5-7b-it-normalize_false", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7399 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.552 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4612 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json deleted file mode 100644 index 8c3cbeab6..000000000 --- a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_false/1762652580.142459", - "retrieved_timestamp": "1762652580.1424599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/fq2.5-7b-it-normalize_false", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fq2.5-7b-it-normalize_false", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399156460413925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551986272150289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/7fdcd616-2c72-4c44-9646-9c32344bfa0b.json b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/7fdcd616-2c72-4c44-9646-9c32344bfa0b.json new file mode 100644 index 000000000..0dfb08d8b --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/7fdcd616-2c72-4c44-9646-9c32344bfa0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_true/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fq2.5-7b-it-normalize_true", + "id": "ehristoforu/fq2.5-7b-it-normalize_true", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7399 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.552 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4612 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json deleted file mode 100644 index 2818c3f0d..000000000 --- a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_true/1762652580.1426702", - "retrieved_timestamp": "1762652580.142671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/fq2.5-7b-it-normalize_true", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fq2.5-7b-it-normalize_true", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399156460413925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551986272150289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-duable4layers-it/9d358f55-810c-4ac1-adc7-83f95bd74c11.json b/data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-duable4layers-it/9d358f55-810c-4ac1-adc7-83f95bd74c11.json new file mode 100644 index 000000000..073ffd258 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-duable4layers-it/9d358f55-810c-4ac1-adc7-83f95bd74c11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-duable4layers-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "frqwen2.5-from7b-duable4layers-it", + "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.545 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5264 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-it/9ba3fe31-772a-4cf7-aa13-3680b6ad51ba.json b/data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-it/9ba3fe31-772a-4cf7-aa13-3680b6ad51ba.json new file mode 100644 index 000000000..61c9293ae --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/frqwen2.5-from7b-it/9ba3fe31-772a-4cf7-aa13-3680b6ad51ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "frqwen2.5-from7b-it", + "id": "ehristoforu/frqwen2.5-from7b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 13.206 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json deleted file mode 100644 index f1367e5ac..000000000 --- a/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-instruct/1762652580.143588", - "retrieved_timestamp": "1762652580.143589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/mllama-3.1-8b-instruct", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/mllama-3.1-8b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3457913890698901 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47176616480333583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533244680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/651a32b1-77fb-4acf-89bf-2d45b684944d.json b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/651a32b1-77fb-4acf-89bf-2d45b684944d.json new file mode 100644 index 000000000..364558711 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/651a32b1-77fb-4acf-89bf-2d45b684944d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mllama-3.1-8b-instruct", + "id": "ehristoforu/mllama-3.1-8b-instruct", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3458 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4718 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-it/192c4037-753a-4790-80d0-33c4d277102d.json b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-it/192c4037-753a-4790-80d0-33c4d277102d.json new file mode 100644 index 000000000..601171444 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-it/192c4037-753a-4790-80d0-33c4d277102d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mllama-3.1-8b-it", + "id": "ehristoforu/mllama-3.1-8b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4868 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3349 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json b/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json deleted file mode 100644 index 86b7ca4e1..000000000 --- a/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge-upscaled/1762652580.144358", - "retrieved_timestamp": "1762652580.1443589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/moremerge-upscaled", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/moremerge-upscaled", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.545 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1978882697908217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26977370070980244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35930208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10413896276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/679d66bf-244e-4080-9a42-0a0c6cfdc965.json b/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/679d66bf-244e-4080-9a42-0a0c6cfdc965.json new file mode 100644 index 000000000..62db8ac19 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/679d66bf-244e-4080-9a42-0a0c6cfdc965.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge-upscaled/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "moremerge-upscaled", + "id": "ehristoforu/moremerge-upscaled", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.545 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2698 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1041 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json b/data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json deleted file mode 100644 index 72d2d5b31..000000000 --- a/data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge/1762652580.1440692", - "retrieved_timestamp": "1762652580.14407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/moremerge", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/moremerge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20190982149585324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28684447696551024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35657291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10654920212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/moremerge/73b0ca8a-fb16-43eb-a9af-a01219cf6196.json b/data/hfopenllm_v2/ehristoforu/moremerge/73b0ca8a-fb16-43eb-a9af-a01219cf6196.json new file mode 100644 index 000000000..f1591edbb --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/moremerge/73b0ca8a-fb16-43eb-a9af-a01219cf6196.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "moremerge", + "id": "ehristoforu/moremerge", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2019 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2868 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/phi-4-25b/7f00ecbc-fcc8-43ae-867b-cb160e63a80c.json b/data/hfopenllm_v2/ehristoforu/phi-4-25b/7f00ecbc-fcc8-43ae-867b-cb160e63a80c.json new file mode 100644 index 000000000..524b66617 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/phi-4-25b/7f00ecbc-fcc8-43ae-867b-cb160e63a80c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_phi-4-25b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-25b", + "id": "ehristoforu/phi-4-25b", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 24.883 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6484 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5351 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/qwen2.5-test-32b-it/a8238bd4-3982-4e45-92e4-bab77e528e29.json b/data/hfopenllm_v2/ehristoforu/qwen2.5-test-32b-it/a8238bd4-3982-4e45-92e4-bab77e528e29.json new file mode 100644 index 000000000..e044dab4c --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/qwen2.5-test-32b-it/a8238bd4-3982-4e45-92e4-bab77e528e29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-test-32b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-test-32b-it", + "id": "ehristoforu/qwen2.5-test-32b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7081 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4578 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5765 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/qwen2.5-with-lora-think-3b-it/f87f9f08-e989-4e99-a254-a3650e7ab1b6.json b/data/hfopenllm_v2/ehristoforu/qwen2.5-with-lora-think-3b-it/f87f9f08-e989-4e99-a254-a3650e7ab1b6.json new file mode 100644 index 000000000..c5897513d --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/qwen2.5-with-lora-think-3b-it/f87f9f08-e989-4e99-a254-a3650e7ab1b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-with-lora-think-3b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-with-lora-think-3b-it", + "id": "ehristoforu/qwen2.5-with-lora-think-3b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4687 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3403 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json b/data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json deleted file mode 100644 index da5a04464..000000000 --- a/data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_rmoe-v1/1762652580.1453388", - "retrieved_timestamp": "1762652580.14534", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/rmoe-v1", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/rmoe-v1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 11.026 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26500795666609045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29292907133609175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36634374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/rmoe-v1/f40496a9-fb14-4b2d-8070-84f55e6417f6.json b/data/hfopenllm_v2/ehristoforu/rmoe-v1/f40496a9-fb14-4b2d-8070-84f55e6417f6.json new file mode 100644 index 000000000..622169295 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/rmoe-v1/f40496a9-fb14-4b2d-8070-84f55e6417f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_rmoe-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rmoe-v1", + "id": "ehristoforu/rmoe-v1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 11.026 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.265 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2929 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json b/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json deleted file mode 100644 index 3a584b6c7..000000000 --- a/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_rufalcon3-3b-it/1762652580.14555", - "retrieved_timestamp": "1762652580.14555", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/rufalcon3-3b-it", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/rufalcon3-3b-it", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5942111375594533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41554222543957625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38953124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2347905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/cc52f59d-5669-44b0-b1af-e6fd0836e284.json b/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/cc52f59d-5669-44b0-b1af-e6fd0836e284.json new file mode 100644 index 000000000..17d1c2ff2 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/cc52f59d-5669-44b0-b1af-e6fd0836e284.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_rufalcon3-3b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rufalcon3-3b-it", + "id": "ehristoforu/rufalcon3-3b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5942 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3895 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2348 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/ruphi-4b/67525a37-f658-40e8-89a1-de8bf6275a00.json b/data/hfopenllm_v2/ehristoforu/ruphi-4b/67525a37-f658-40e8-89a1-de8bf6275a00.json new file mode 100644 index 000000000..9d1d684fd --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/ruphi-4b/67525a37-f658-40e8-89a1-de8bf6275a00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_ruphi-4b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ruphi-4b", + "id": "ehristoforu/ruphi-4b", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2906 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/testq-32b/3cb34886-7a93-42b9-a8fa-fab5f4bd8624.json b/data/hfopenllm_v2/ehristoforu/testq-32b/3cb34886-7a93-42b9-a8fa-fab5f4bd8624.json new file mode 100644 index 000000000..50e07975c --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/testq-32b/3cb34886-7a93-42b9-a8fa-fab5f4bd8624.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_testq-32b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "testq-32b", + "id": "ehristoforu/testq-32b", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 56.165 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1876 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3715 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json b/data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json deleted file mode 100644 index a3be1b99e..000000000 --- a/data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_testq-32b/1762652580.145958", - "retrieved_timestamp": "1762652580.145958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/testq-32b", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/testq-32b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 56.165 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18759668789921852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876549792486152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3714583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json b/data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json deleted file mode 100644 index e30721f32..000000000 --- a/data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe-v2/1762652580.146366", - "retrieved_timestamp": "1762652580.146367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/tmoe-v2", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/tmoe-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 11.026 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19026959578363187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2896740649804915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4150833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11003989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/tmoe-v2/0dd1f9fc-cf54-47ff-8ccd-148b45f3c921.json b/data/hfopenllm_v2/ehristoforu/tmoe-v2/0dd1f9fc-cf54-47ff-8ccd-148b45f3c921.json new file mode 100644 index 000000000..ed11e623d --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/tmoe-v2/0dd1f9fc-cf54-47ff-8ccd-148b45f3c921.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tmoe-v2", + "id": "ehristoforu/tmoe-v2", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 11.026 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4151 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.11 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json b/data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json deleted file mode 100644 index aa09c37be..000000000 --- a/data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe/1762652580.1461592", - "retrieved_timestamp": "1762652580.1461592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/tmoe", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/tmoe", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 11.026 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11930234001338672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30728601408520645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2231543624161074 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36990624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11909906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/tmoe/7a05616e-7335-419a-914d-00fb287fe663.json b/data/hfopenllm_v2/ehristoforu/tmoe/7a05616e-7335-419a-914d-00fb287fe663.json new file mode 100644 index 000000000..d62cd4fdb --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/tmoe/7a05616e-7335-419a-914d-00fb287fe663.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tmoe", + "id": "ehristoforu/tmoe", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 11.026 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2232 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/trd-7b-it/070a21b5-4cd3-41b7-9653-0d2d2e4f273d.json b/data/hfopenllm_v2/ehristoforu/trd-7b-it/070a21b5-4cd3-41b7-9653-0d2d2e4f273d.json new file mode 100644 index 000000000..bf0489323 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/trd-7b-it/070a21b5-4cd3-41b7-9653-0d2d2e4f273d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_trd-7b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "trd-7b-it", + "id": "ehristoforu/trd-7b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2185 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.299 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json b/data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json deleted file mode 100644 index 607b1a0d5..000000000 --- a/data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_trd-7b-it/1762652580.146566", - "retrieved_timestamp": "1762652580.1465669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/trd-7b-it", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/trd-7b-it", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21847143357402804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990238931062931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/ehristoforu/ud-14b/5afc044a-3138-443f-89cf-74f1272cc632.json b/data/hfopenllm_v2/ehristoforu/ud-14b/5afc044a-3138-443f-89cf-74f1272cc632.json new file mode 100644 index 000000000..5bff4a74e --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/ud-14b/5afc044a-3138-443f-89cf-74f1272cc632.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ehristoforu_ud-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ud-14b", + "id": "ehristoforu/ud-14b", + "developer": "ehristoforu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3324 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json b/data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json deleted file mode 100644 index 311e63a18..000000000 --- a/data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_ud-14b/1762652580.146786", - "retrieved_timestamp": "1762652580.146786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/ud-14b", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/ud-14b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4235273518708139 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323819044961654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43942708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24152260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json b/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json deleted file mode 100644 index 218a48cd1..000000000 --- a/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/elinas_Chronos-Gold-12B-1.0/1762652580.1470149", - "retrieved_timestamp": "1762652580.147016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "elinas/Chronos-Gold-12B-1.0", - "developer": "elinas", - "inference_platform": "unknown", - "id": "elinas/Chronos-Gold-12B-1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3165656014929277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514664110708439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47398958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/a6c1d914-647c-46b7-b0e1-712b8d506780.json b/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/a6c1d914-647c-46b7-b0e1-712b8d506780.json new file mode 100644 index 000000000..1bd60921e --- /dev/null +++ b/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/a6c1d914-647c-46b7-b0e1-712b8d506780.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/elinas_Chronos-Gold-12B-1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chronos-Gold-12B-1.0", + "id": "elinas/Chronos-Gold-12B-1.0", + "developer": "elinas", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3166 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ell44ot/gemma-2b-def/43f35eac-0946-42f9-a128-eb8011c29588.json b/data/hfopenllm_v2/ell44ot/gemma-2b-def/43f35eac-0946-42f9-a128-eb8011c29588.json new file mode 100644 index 000000000..07020d3eb --- /dev/null +++ b/data/hfopenllm_v2/ell44ot/gemma-2b-def/43f35eac-0946-42f9-a128-eb8011c29588.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ell44ot_gemma-2b-def/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2b-def", + "id": "ell44ot/gemma-2b-def", + "developer": "ell44ot", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaModel", + "params_billions": 1.546 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/euclaise/ReMask-3B/04c22be7-2cf4-4774-b479-863199c7c3a4.json b/data/hfopenllm_v2/euclaise/ReMask-3B/04c22be7-2cf4-4774-b479-863199c7c3a4.json new file mode 100644 index 000000000..f550f9a02 --- /dev/null +++ b/data/hfopenllm_v2/euclaise/ReMask-3B/04c22be7-2cf4-4774-b479-863199c7c3a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/euclaise_ReMask-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReMask-3B", + "id": "euclaise/ReMask-3B", + "developer": "euclaise", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 2.795 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2419 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json b/data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json deleted file mode 100644 index 390b62f9b..000000000 --- a/data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/euclaise_ReMask-3B/1762652580.14753", - "retrieved_timestamp": "1762652580.147531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "euclaise/ReMask-3B", - "developer": "euclaise", - "inference_platform": "unknown", - "id": "euclaise/ReMask-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 2.795 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2419269759792905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516779692917367 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13572140957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json b/data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json deleted file mode 100644 index 0e77fb418..000000000 --- a/data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/eworojoshua_vas-01/1762652580.1477718", - "retrieved_timestamp": "1762652580.147773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "eworojoshua/vas-01", - "developer": "eworojoshua", - "inference_platform": "unknown", - "id": "eworojoshua/vas-01", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7612479332615238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417819433732887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735649546827795 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44323958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4347573138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/eworojoshua/vas-01/fc3d436b-ec61-4458-a3c6-1df41057ea70.json b/data/hfopenllm_v2/eworojoshua/vas-01/fc3d436b-ec61-4458-a3c6-1df41057ea70.json new file mode 100644 index 000000000..d331211d6 --- /dev/null +++ b/data/hfopenllm_v2/eworojoshua/vas-01/fc3d436b-ec61-4458-a3c6-1df41057ea70.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/eworojoshua_vas-01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vas-01", + "id": "eworojoshua/vas-01", + "developer": "eworojoshua", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7612 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4348 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json b/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json deleted file mode 100644 index 963f99318..000000000 --- a/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/1762652580.148031", - "retrieved_timestamp": "1762652580.148032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44388555698878973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4273125047156003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36553125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2886469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/e3ed157f-f306-40fb-b3a1-d3434236759e.json b/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/e3ed157f-f306-40fb-b3a1-d3434236759e.json new file mode 100644 index 000000000..99fd3f7c5 --- /dev/null +++ b/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/e3ed157f-f306-40fb-b3a1-d3434236759e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Thinker-Llama-3.2-3B-Instruct-Reasoning", + "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", + "developer": "ewre324", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3655 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/8793b3e3-f409-499a-81f8-c250c8092841.json b/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/8793b3e3-f409-499a-81f8-c250c8092841.json new file mode 100644 index 000000000..eba4ef4a3 --- /dev/null +++ b/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/8793b3e3-f409-499a-81f8-c250c8092841.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Thinker-Qwen2.5-0.5B-Instruct-Reasoning", + "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", + "developer": "ewre324", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2476 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1647 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json b/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json deleted file mode 100644 index 32d20856c..000000000 --- a/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/1762652580.148299", - "retrieved_timestamp": "1762652580.1483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2476473534665798 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3292122979013761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16472739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/33572f63-15ba-4fbc-b1cf-56b978384d02.json b/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/33572f63-15ba-4fbc-b1cf-56b978384d02.json new file mode 100644 index 000000000..3408e328d --- /dev/null +++ b/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/33572f63-15ba-4fbc-b1cf-56b978384d02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Thinker-SmolLM2-135M-Instruct-Reasoning", + "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", + "developer": "ewre324", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json b/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json deleted file mode 100644 index 7acb0a3ec..000000000 --- a/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/1762652580.148509", - "retrieved_timestamp": "1762652580.14851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25836336476105626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3071349750892843 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/44c636ba-8303-4d75-bcb5-46e3c07a991a.json b/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/44c636ba-8303-4d75-bcb5-46e3c07a991a.json new file mode 100644 index 000000000..5cd36efa3 --- /dev/null +++ b/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/44c636ba-8303-4d75-bcb5-46e3c07a991a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ewre324_ewre324-R1-SmolLM2-135M-Distill/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ewre324-R1-SmolLM2-135M-Distill", + "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill", + "developer": "ewre324", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3042 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json b/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json deleted file mode 100644 index 17af7d665..000000000 --- a/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ewre324_ewre324-R1-SmolLM2-135M-Distill/1762652580.148724", - "retrieved_timestamp": "1762652580.148725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ewre324/ewre324-R1-SmolLM2-135M-Distill", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16489026893088118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3041695757290421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11336436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/experiment-llm/exp-3-q-r/0a002444-3e5a-4fc8-acc6-72210a4181a9.json b/data/hfopenllm_v2/experiment-llm/exp-3-q-r/0a002444-3e5a-4fc8-acc6-72210a4181a9.json new file mode 100644 index 000000000..3553a3e14 --- /dev/null +++ b/data/hfopenllm_v2/experiment-llm/exp-3-q-r/0a002444-3e5a-4fc8-acc6-72210a4181a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/experiment-llm_exp-3-q-r/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "exp-3-q-r", + "id": "experiment-llm/exp-3-q-r", + "developer": "experiment-llm", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6036 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2787 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json b/data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json deleted file mode 100644 index c128cd8e9..000000000 --- a/data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/experiment-llm_exp-3-q-r/1762652580.148931", - "retrieved_timestamp": "1762652580.148932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "experiment-llm/exp-3-q-r", - "developer": "experiment-llm", - "inference_platform": "unknown", - "id": "experiment-llm/exp-3-q-r", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6035785050333116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397159253811645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43159906914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json b/data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json deleted file mode 100644 index 97fe15dd4..000000000 --- a/data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/facebook_opt-1.3b/1762652580.14919", - "retrieved_timestamp": "1762652580.14919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "facebook/opt-1.3b", - "developer": "facebook", - "inference_platform": "unknown", - "id": "facebook/opt-1.3b", - "additional_details": { - "precision": "float16", - "architecture": "OPTForCausalLM", - "params_billions": 1.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23832985367713222 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093947052760125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/facebook/opt-1.3b/bbf936a5-3594-4d0a-b5af-7a01740d0c81.json b/data/hfopenllm_v2/facebook/opt-1.3b/bbf936a5-3594-4d0a-b5af-7a01740d0c81.json new file mode 100644 index 000000000..fc192890e --- /dev/null +++ b/data/hfopenllm_v2/facebook/opt-1.3b/bbf936a5-3594-4d0a-b5af-7a01740d0c81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/facebook_opt-1.3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "opt-1.3b", + "id": "facebook/opt-1.3b", + "developer": "facebook", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "OPTForCausalLM", + "params_billions": 1.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1107 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/facebook/opt-30b/1164abea-4cc2-46a7-a44b-f024a2ce40b4.json b/data/hfopenllm_v2/facebook/opt-30b/1164abea-4cc2-46a7-a44b-f024a2ce40b4.json new file mode 100644 index 000000000..d05075cf6 --- /dev/null +++ b/data/hfopenllm_v2/facebook/opt-30b/1164abea-4cc2-46a7-a44b-f024a2ce40b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/facebook_opt-30b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "opt-30b", + "id": "facebook/opt-30b", + "developer": "facebook", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "OPTForCausalLM", + "params_billions": 30.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json b/data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json deleted file mode 100644 index 39f17b883..000000000 --- a/data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/facebook_opt-30b/1762652580.14943", - "retrieved_timestamp": "1762652580.149431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "facebook/opt-30b", - "developer": "facebook", - "inference_platform": "unknown", - "id": "facebook/opt-30b", - "additional_details": { - "precision": "float16", - "architecture": "OPTForCausalLM", - "params_billions": 30.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2452991396162183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30703447525623373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/bfd88bec-fcc2-4580-a5c7-4792a0300a5b.json b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/bfd88bec-fcc2-4580-a5c7-4792a0300a5b.json new file mode 100644 index 000000000..c959c146e --- /dev/null +++ b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/bfd88bec-fcc2-4580-a5c7-4792a0300a5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-MopeyMule/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-MopeyMule", + "id": "failspy/Llama-3-8B-Instruct-MopeyMule", + "developer": "failspy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.675 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3839 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json deleted file mode 100644 index 79a240c30..000000000 --- a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-MopeyMule/1762652580.1496441", - "retrieved_timestamp": "1762652580.1496441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "failspy/Llama-3-8B-Instruct-MopeyMule", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Llama-3-8B-Instruct-MopeyMule", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6750444376476638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383874490132152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17644614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/7f49e582-a01f-481f-8345-1c384fc8b567.json b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/7f49e582-a01f-481f-8345-1c384fc8b567.json new file mode 100644 index 000000000..b05984172 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/7f49e582-a01f-481f-8345-1c384fc8b567.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-abliterated", + "id": "failspy/Llama-3-8B-Instruct-abliterated", + "developer": "failspy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2742 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json deleted file mode 100644 index 47fd05b71..000000000 --- a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-abliterated/1762652580.1499012", - "retrieved_timestamp": "1762652580.149902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "failspy/Llama-3-8B-Instruct-abliterated", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Llama-3-8B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5908888416069362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353752684977051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41158333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2741855053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/10937ed1-56e2-4aad-b717-5125bc8ac72a.json b/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/10937ed1-56e2-4aad-b717-5125bc8ac72a.json new file mode 100644 index 000000000..794c2e176 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/10937ed1-56e2-4aad-b717-5125bc8ac72a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-70B-Instruct-abliterated-v3.5", + "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", + "developer": "failspy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7747 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5747 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3982 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json b/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json deleted file mode 100644 index 1aa3bbcc8..000000000 --- a/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/1762652580.1501682", - "retrieved_timestamp": "1762652580.1501691", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7746867201248244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.574710022890038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39818749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44522938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json b/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json deleted file mode 100644 index 37ff4b0ad..000000000 --- a/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/1762652580.150389", - "retrieved_timestamp": "1762652580.15039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7244533393617822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924562150856957 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36218749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3653590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/f4622539-c0ac-4e9f-86d4-00e3c826d03b.json b/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/f4622539-c0ac-4e9f-86d4-00e3c826d03b.json new file mode 100644 index 000000000..eb2bf7ad8 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/f4622539-c0ac-4e9f-86d4-00e3c826d03b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-8B-Instruct-abliterated-v3", + "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", + "developer": "failspy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4925 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3654 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json b/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json deleted file mode 100644 index 7cb1aa910..000000000 --- a/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/failspy_Phi-3-medium-4k-instruct-abliterated-v3/1762652580.1505978", - "retrieved_timestamp": "1762652580.150599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6319299458769398 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304799176474429 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4604166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/6b13b2b1-68cd-4aae-8f2b-2400f40760d7.json b/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/6b13b2b1-68cd-4aae-8f2b-2400f40760d7.json new file mode 100644 index 000000000..8fbcaddc2 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/6b13b2b1-68cd-4aae-8f2b-2400f40760d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/failspy_Phi-3-medium-4k-instruct-abliterated-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-medium-4k-instruct-abliterated-v3", + "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", + "developer": "failspy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6319 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/5b02726c-ba3f-482b-9f10-87b8d69ffeb4.json b/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/5b02726c-ba3f-482b-9f10-87b8d69ffeb4.json new file mode 100644 index 000000000..c9da5b3a7 --- /dev/null +++ b/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/5b02726c-ba3f-482b-9f10-87b8d69ffeb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/failspy_llama-3-70B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-70B-Instruct-abliterated", + "id": "failspy/llama-3-70B-Instruct-abliterated", + "developer": "failspy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8023 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2432 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4128 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json b/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json deleted file mode 100644 index be42389fb..000000000 --- a/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/failspy_llama-3-70B-Instruct-abliterated/1762652580.1508029", - "retrieved_timestamp": "1762652580.150804", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "failspy/llama-3-70B-Instruct-abliterated", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/llama-3-70B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8023389052159382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6464853840398571 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4127604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5145445478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/21d6f2dd-7bd6-42a9-b14e-c25777497890.json b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/21d6f2dd-7bd6-42a9-b14e-c25777497890.json new file mode 100644 index 000000000..bfc6fcad1 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/21d6f2dd-7bd6-42a9-b14e-c25777497890.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheBeagle-v2beta-32B-MGS", + "id": "fblgit/TheBeagle-v2beta-32B-MGS", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5181 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4947 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json deleted file mode 100644 index fbecba788..000000000 --- a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.1510022", - "retrieved_timestamp": "1762652580.151003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/TheBeagle-v2beta-32B-MGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/TheBeagle-v2beta-32B-MGS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518074265171966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7032634749563558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947129909365559 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5915059840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json deleted file mode 100644 index fe4a5d838..000000000 --- a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.151249", - "retrieved_timestamp": "1762652580.151249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/TheBeagle-v2beta-32B-MGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/TheBeagle-v2beta-32B-MGS", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503051902285935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.703542441088263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3942598187311178 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5910904255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/d0bc11cb-56ff-4c77-9446-e76e550e0919.json b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/d0bc11cb-56ff-4c77-9446-e76e550e0919.json new file mode 100644 index 000000000..6080ab1e6 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/d0bc11cb-56ff-4c77-9446-e76e550e0919.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheBeagle-v2beta-32B-MGS", + "id": "fblgit/TheBeagle-v2beta-32B-MGS", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7035 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json b/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json deleted file mode 100644 index 1998fffe3..000000000 --- a/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_UNA-SimpleSmaug-34b-v1beta/1762652580.151433", - "retrieved_timestamp": "1762652580.151433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/UNA-SimpleSmaug-34b-v1beta", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/UNA-SimpleSmaug-34b-v1beta", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45562551806983254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286654104993475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4255625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/ff78dc97-e9cf-4215-a607-3e80892af82c.json b/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/ff78dc97-e9cf-4215-a607-3e80892af82c.json new file mode 100644 index 000000000..a5cda2338 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/ff78dc97-e9cf-4215-a607-3e80892af82c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_UNA-SimpleSmaug-34b-v1beta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UNA-SimpleSmaug-34b-v1beta", + "id": "fblgit/UNA-SimpleSmaug-34b-v1beta", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4556 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.454 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/0ff1c6ff-5404-4d61-b6c6-f6ef7ae9ca8b.json b/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/0ff1c6ff-5404-4d61-b6c6-f6ef7ae9ca8b.json new file mode 100644 index 000000000..4d873390f --- /dev/null +++ b/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/0ff1c6ff-5404-4d61-b6c6-f6ef7ae9ca8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_UNA-TheBeagle-7b-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UNA-TheBeagle-7b-v1", + "id": "fblgit/UNA-TheBeagle-7b-v1", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3019 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json b/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json deleted file mode 100644 index 29b21cf10..000000000 --- a/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_UNA-TheBeagle-7b-v1/1762652580.151644", - "retrieved_timestamp": "1762652580.151645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/UNA-TheBeagle-7b-v1", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/UNA-TheBeagle-7b-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36887236975669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5028691097522866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3019448138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/48837141-2556-4658-87e0-bb88cfcd562a.json b/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/48837141-2556-4658-87e0-bb88cfcd562a.json new file mode 100644 index 000000000..a60c55895 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/48837141-2556-4658-87e0-bb88cfcd562a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_UNA-ThePitbull-21.4B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UNA-ThePitbull-21.4B-v2", + "id": "fblgit/UNA-ThePitbull-21.4B-v2", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json b/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json deleted file mode 100644 index d6e5b27a3..000000000 --- a/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_UNA-ThePitbull-21.4B-v2/1762652580.151847", - "retrieved_timestamp": "1762652580.151847", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/UNA-ThePitbull-21.4B-v2", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/UNA-ThePitbull-21.4B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790387283518841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635038821016254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515625 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json deleted file mode 100644 index b01451412..000000000 --- a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-MGS/1762652580.15205", - "retrieved_timestamp": "1762652580.152051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/cybertron-v4-qw7B-MGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/cybertron-v4-qw7B-MGS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6263846593704703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5591772533435835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43709375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44730718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/f2d6da5d-3685-43de-8ceb-5b798f88e24c.json b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/f2d6da5d-3685-43de-8ceb-5b798f88e24c.json new file mode 100644 index 000000000..8b3f04151 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/f2d6da5d-3685-43de-8ceb-5b798f88e24c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-MGS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cybertron-v4-qw7B-MGS", + "id": "fblgit/cybertron-v4-qw7B-MGS", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6264 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5592 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json deleted file mode 100644 index b1dd920e8..000000000 --- a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-UNAMGS/1762652580.1522481", - "retrieved_timestamp": "1762652580.152249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/cybertron-v4-qw7B-UNAMGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/cybertron-v4-qw7B-UNAMGS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6090240561709597 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642509108139038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731117824773414 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4500498670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/9ec02ccd-329a-4d62-9f04-87de6fda5011.json b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/9ec02ccd-329a-4d62-9f04-87de6fda5011.json new file mode 100644 index 000000000..4e95a568a --- /dev/null +++ b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/9ec02ccd-329a-4d62-9f04-87de6fda5011.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-UNAMGS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cybertron-v4-qw7B-UNAMGS", + "id": "fblgit/cybertron-v4-qw7B-UNAMGS", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.609 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3731 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.45 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/juanako-7b-UNA/781d0332-e332-4ff7-8585-9c2d8395a147.json b/data/hfopenllm_v2/fblgit/juanako-7b-UNA/781d0332-e332-4ff7-8585-9c2d8395a147.json new file mode 100644 index 000000000..0a2dc5709 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/juanako-7b-UNA/781d0332-e332-4ff7-8585-9c2d8395a147.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_juanako-7b-UNA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "juanako-7b-UNA", + "id": "fblgit/juanako-7b-UNA", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4837 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2771 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json b/data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json deleted file mode 100644 index 562ee91b8..000000000 --- a/data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_juanako-7b-UNA/1762652580.1524491", - "retrieved_timestamp": "1762652580.15245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/juanako-7b-UNA", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/juanako-7b-UNA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4837276204914073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507001145736535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46449999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json deleted file mode 100644 index 4400694d3..000000000 --- a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/1762652580.153163", - "retrieved_timestamp": "1762652580.1531641", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3518364605912313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.423443453814005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42543749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/d6dd460e-c352-4d31-8941-183c6eabd0a7.json b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/d6dd460e-c352-4d31-8941-183c6eabd0a7.json new file mode 100644 index 000000000..5152ad99c --- /dev/null +++ b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/d6dd460e-c352-4d31-8941-183c6eabd0a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miniclaus-qw1.5B-UNAMGS-GRPO", + "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json deleted file mode 100644 index 3150b30a1..000000000 --- a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS/1762652580.152649", - "retrieved_timestamp": "1762652580.152649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/miniclaus-qw1.5B-UNAMGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/miniclaus-qw1.5B-UNAMGS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3348005514257725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238588294007628 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2937167553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/66bf6442-04ea-437b-88c4-e61afc6f7139.json b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/66bf6442-04ea-437b-88c4-e61afc6f7139.json new file mode 100644 index 000000000..5d5d165d7 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/66bf6442-04ea-437b-88c4-e61afc6f7139.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miniclaus-qw1.5B-UNAMGS", + "id": "fblgit/miniclaus-qw1.5B-UNAMGS", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2937 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/0d1911f5-a2e7-4511-a8d8-098cbf9207df.json b/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/0d1911f5-a2e7-4511-a8d8-098cbf9207df.json new file mode 100644 index 000000000..2dcb46652 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/0d1911f5-a2e7-4511-a8d8-098cbf9207df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_pancho-v1-qw25-3B-UNAMGS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pancho-v1-qw25-3B-UNAMGS", + "id": "fblgit/pancho-v1-qw25-3B-UNAMGS", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4926 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4027 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json b/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json deleted file mode 100644 index 971442ff0..000000000 --- a/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_pancho-v1-qw25-3B-UNAMGS/1762652580.153452", - "retrieved_timestamp": "1762652580.153453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/pancho-v1-qw25-3B-UNAMGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/pancho-v1-qw25-3B-UNAMGS", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536134124123991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49258278193390775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json b/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json deleted file mode 100644 index d9c32c4ca..000000000 --- a/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fblgit_una-cybertron-7b-v2-bf16/1762652580.153698", - "retrieved_timestamp": "1762652580.1536992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fblgit/una-cybertron-7b-v2-bf16", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/una-cybertron-7b-v2-bf16", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47371086494944525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3973388920486269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2442652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/abc18648-ef96-4695-94d5-fa14be277431.json b/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/abc18648-ef96-4695-94d5-fa14be277431.json new file mode 100644 index 000000000..56fbad07e --- /dev/null +++ b/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/abc18648-ef96-4695-94d5-fa14be277431.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fblgit_una-cybertron-7b-v2-bf16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "una-cybertron-7b-v2-bf16", + "id": "fblgit/una-cybertron-7b-v2-bf16", + "developer": "fblgit", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4737 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2443 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json b/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json deleted file mode 100644 index edb3c667d..000000000 --- a/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fhai50032_RolePlayLake-7B/1762652580.153994", - "retrieved_timestamp": "1762652580.153995", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fhai50032/RolePlayLake-7B", - "developer": "fhai50032", - "inference_platform": "unknown", - "id": "fhai50032/RolePlayLake-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5056594280952318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252170095233862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/ff1e7aaa-3f29-4192-a0e0-80fcd11ba055.json b/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/ff1e7aaa-3f29-4192-a0e0-80fcd11ba055.json new file mode 100644 index 000000000..5c8be692e --- /dev/null +++ b/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/ff1e7aaa-3f29-4192-a0e0-80fcd11ba055.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fhai50032_RolePlayLake-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RolePlayLake-7B", + "id": "fhai50032/RolePlayLake-7B", + "developer": "fhai50032", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5252 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fhai50032/Unaligned-Thinker-PHI-4/cc8ef5bd-957f-4308-9539-00a696182056.json b/data/hfopenllm_v2/fhai50032/Unaligned-Thinker-PHI-4/cc8ef5bd-957f-4308-9539-00a696182056.json new file mode 100644 index 000000000..97e284a57 --- /dev/null +++ b/data/hfopenllm_v2/fhai50032/Unaligned-Thinker-PHI-4/cc8ef5bd-957f-4308-9539-00a696182056.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fhai50032_Unaligned-Thinker-PHI-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Unaligned-Thinker-PHI-4", + "id": "fhai50032/Unaligned-Thinker-PHI-4", + "developer": "fhai50032", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0563 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4679 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/Llama3.1-Flammades-70B/abc7652f-b88e-40ba-847c-c99dce9f2719.json b/data/hfopenllm_v2/flammenai/Llama3.1-Flammades-70B/abc7652f-b88e-40ba-847c-c99dce9f2719.json new file mode 100644 index 000000000..791c615fd --- /dev/null +++ b/data/hfopenllm_v2/flammenai/Llama3.1-Flammades-70B/abc7652f-b88e-40ba-847c-c99dce9f2719.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/flammenai_Llama3.1-Flammades-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-Flammades-70B", + "id": "flammenai/Llama3.1-Flammades-70B", + "developer": "flammenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7058 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.666 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2092 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/Mahou-1.2a-llama3-8B/56e36294-e616-45a1-8dc9-2c14cf3ee8d0.json b/data/hfopenllm_v2/flammenai/Mahou-1.2a-llama3-8B/56e36294-e616-45a1-8dc9-2c14cf3ee8d0.json new file mode 100644 index 000000000..b1967f57c --- /dev/null +++ b/data/hfopenllm_v2/flammenai/Mahou-1.2a-llama3-8B/56e36294-e616-45a1-8dc9-2c14cf3ee8d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-llama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mahou-1.2a-llama3-8B", + "id": "flammenai/Mahou-1.2a-llama3-8B", + "developer": "flammenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5093 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/Mahou-1.2a-mistral-7B/4b81caad-92ed-4bd5-98bd-58582854b5d8.json b/data/hfopenllm_v2/flammenai/Mahou-1.2a-mistral-7B/4b81caad-92ed-4bd5-98bd-58582854b5d8.json new file mode 100644 index 000000000..cca5cbc09 --- /dev/null +++ b/data/hfopenllm_v2/flammenai/Mahou-1.2a-mistral-7B/4b81caad-92ed-4bd5-98bd-58582854b5d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mahou-1.2a-mistral-7B", + "id": "flammenai/Mahou-1.2a-mistral-7B", + "developer": "flammenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4552 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5118 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/Mahou-1.5-llama3.1-70B/2cef0040-6d4c-4c38-be40-5477911f3063.json b/data/hfopenllm_v2/flammenai/Mahou-1.5-llama3.1-70B/2cef0040-6d4c-4c38-be40-5477911f3063.json new file mode 100644 index 000000000..5ef38b931 --- /dev/null +++ b/data/hfopenllm_v2/flammenai/Mahou-1.5-llama3.1-70B/2cef0040-6d4c-4c38-be40-5477911f3063.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-llama3.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mahou-1.5-llama3.1-70B", + "id": "flammenai/Mahou-1.5-llama3.1-70B", + "developer": "flammenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7147 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6651 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.495 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4749 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/Mahou-1.5-mistral-nemo-12B/4aeef94f-823e-4be5-b4f1-37463e052748.json b/data/hfopenllm_v2/flammenai/Mahou-1.5-mistral-nemo-12B/4aeef94f-823e-4be5-b4f1-37463e052748.json new file mode 100644 index 000000000..818a33929 --- /dev/null +++ b/data/hfopenllm_v2/flammenai/Mahou-1.5-mistral-nemo-12B/4aeef94f-823e-4be5-b4f1-37463e052748.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-mistral-nemo-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mahou-1.5-mistral-nemo-12B", + "id": "flammenai/Mahou-1.5-mistral-nemo-12B", + "developer": "flammenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5522 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.452 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3602 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json b/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json deleted file mode 100644 index b76160886..000000000 --- a/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/flammenai_flammen15-gutenberg-DPO-v1-7B/1762652580.155953", - "retrieved_timestamp": "1762652580.155954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "flammenai/flammen15-gutenberg-DPO-v1-7B", - "developer": "flammenai", - "inference_platform": "unknown", - "id": "flammenai/flammen15-gutenberg-DPO-v1-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47980580415519714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5202983979716951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3185671542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/3d367147-373f-4543-be19-55a6429558a2.json b/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/3d367147-373f-4543-be19-55a6429558a2.json new file mode 100644 index 000000000..fd18593b8 --- /dev/null +++ b/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/3d367147-373f-4543-be19-55a6429558a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/flammenai_flammen15-gutenberg-DPO-v1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammen15-gutenberg-DPO-v1-7B", + "id": "flammenai/flammen15-gutenberg-DPO-v1-7B", + "developer": "flammenai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4798 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3186 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json b/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json deleted file mode 100644 index 0d1c625bb..000000000 --- a/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fluently-lm_FluentlyLM-Prinum/1762652580.156252", - "retrieved_timestamp": "1762652580.1562529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fluently-lm/FluentlyLM-Prinum", - "developer": "fluently-lm", - "inference_platform": "unknown", - "id": "fluently-lm/FluentlyLM-Prinum", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.809033364805383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7143813967889198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44714583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5807845744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/cb93091a-6c46-438a-b111-cbf7e2fac420.json b/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/cb93091a-6c46-438a-b111-cbf7e2fac420.json new file mode 100644 index 000000000..921cf029a --- /dev/null +++ b/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/cb93091a-6c46-438a-b111-cbf7e2fac420.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fluently-lm_FluentlyLM-Prinum/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FluentlyLM-Prinum", + "id": "fluently-lm/FluentlyLM-Prinum", + "developer": "fluently-lm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7144 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4471 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5808 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json deleted file mode 100644 index 552b1a0db..000000000 --- a/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B-Instruct/1762652580.156872", - "retrieved_timestamp": "1762652580.156876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fluently-lm/Llama-TI-8B-Instruct", - "developer": "fluently-lm", - "inference_platform": "unknown", - "id": "fluently-lm/Llama-TI-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7716392505219485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252143041749421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38134375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37258976063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/ea6048f1-8be4-4ec8-a5d5-35ff1523d74a.json b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/ea6048f1-8be4-4ec8-a5d5-35ff1523d74a.json new file mode 100644 index 000000000..9c0f198a8 --- /dev/null +++ b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/ea6048f1-8be4-4ec8-a5d5-35ff1523d74a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-TI-8B-Instruct", + "id": "fluently-lm/Llama-TI-8B-Instruct", + "developer": "fluently-lm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7716 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5252 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2304 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3813 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3726 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-lm/Llama-TI-8B/f4dc1659-800f-49d2-a290-48e9d4b15581.json b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B/f4dc1659-800f-49d2-a290-48e9d4b15581.json new file mode 100644 index 000000000..bacff0c4e --- /dev/null +++ b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B/f4dc1659-800f-49d2-a290-48e9d4b15581.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-TI-8B", + "id": "fluently-lm/Llama-TI-8B", + "developer": "fluently-lm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.288 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5201 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json b/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json deleted file mode 100644 index 1efa1b09a..000000000 --- a/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fluently-sets_FalconThink3-10B-IT/1762652580.1573172", - "retrieved_timestamp": "1762652580.1573179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fluently-sets/FalconThink3-10B-IT", - "developer": "fluently-sets", - "inference_platform": "unknown", - "id": "fluently-sets/FalconThink3-10B-IT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326216660682544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.620016981648187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4434840425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/d4d8a784-5bd5-4437-8e0d-75dcb967ae33.json b/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/d4d8a784-5bd5-4437-8e0d-75dcb967ae33.json new file mode 100644 index 000000000..9c1acfca1 --- /dev/null +++ b/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/d4d8a784-5bd5-4437-8e0d-75dcb967ae33.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fluently-sets_FalconThink3-10B-IT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconThink3-10B-IT", + "id": "fluently-sets/FalconThink3-10B-IT", + "developer": "fluently-sets", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.62 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/91017e73-f33a-49f5-ac87-f6e6a178d885.json b/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/91017e73-f33a-49f5-ac87-f6e6a178d885.json new file mode 100644 index 000000000..3cfba9c12 --- /dev/null +++ b/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/91017e73-f33a-49f5-ac87-f6e6a178d885.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fluently-sets_reasoning-1-1k-demo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "reasoning-1-1k-demo", + "id": "fluently-sets/reasoning-1-1k-demo", + "developer": "fluently-sets", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7525 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4282 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4061 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4774 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json b/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json deleted file mode 100644 index 06eba7f85..000000000 --- a/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fluently-sets_reasoning-1-1k-demo/1762652580.157624", - "retrieved_timestamp": "1762652580.1576252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fluently-sets/reasoning-1-1k-demo", - "developer": "fluently-sets", - "inference_platform": "unknown", - "id": "fluently-sets/reasoning-1-1k-demo", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7524800861713586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6396692351083745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4060625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json b/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json deleted file mode 100644 index 9e650aed0..000000000 --- a/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/1762652580.1578538", - "retrieved_timestamp": "1762652580.157855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16139288199754429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29763925404210967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4219375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11735372340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/b7a75bca-6afe-448a-8e5c-53ebd577c964.json b/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/b7a75bca-6afe-448a-8e5c-53ebd577c964.json new file mode 100644 index 000000000..dc662457b --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/b7a75bca-6afe-448a-8e5c-53ebd577c964.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", + "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1614 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json b/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json deleted file mode 100644 index e726f2d9f..000000000 --- a/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-7b-2-25-2025/1762652580.158112", - "retrieved_timestamp": "1762652580.158113", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-elite-v1.1-7b-2-25-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-v1.1-7b-2-25-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1249728498162653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28673660666639783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/8cdced5c-23bc-4426-a0c9-b9bf82913683.json b/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/8cdced5c-23bc-4426-a0c9-b9bf82913683.json new file mode 100644 index 000000000..735daf408 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/8cdced5c-23bc-4426-a0c9-b9bf82913683.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-7b-2-25-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-elite-v1.1-7b-2-25-2025", + "id": "formulae/mita-elite-v1.1-7b-2-25-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.125 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2867 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json b/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json deleted file mode 100644 index eced4eb1e..000000000 --- a/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/1762652580.158336", - "retrieved_timestamp": "1762652580.158336", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14108454456397912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292375183445424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35409375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/368784c8-6fc2-4340-8277-a6a9a9800a99.json b/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/368784c8-6fc2-4340-8277-a6a9a9800a99.json new file mode 100644 index 000000000..c6044805f --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/368784c8-6fc2-4340-8277-a6a9a9800a99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-elite-v1.1-gen2-7b-2-25-2025", + "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1411 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2924 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1101 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json b/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json deleted file mode 100644 index c0b55f915..000000000 --- a/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.2-7b-2-26-2025/1762652580.158752", - "retrieved_timestamp": "1762652580.158756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-elite-v1.2-7b-2-26-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-v1.2-7b-2-26-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14800396281865452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29300480737441686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1186003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/f7ddf26b-4b4c-404b-b9d3-6ceaf78d39aa.json b/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/f7ddf26b-4b4c-404b-b9d3-6ceaf78d39aa.json new file mode 100644 index 000000000..218786c8e --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/f7ddf26b-4b4c-404b-b9d3-6ceaf78d39aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.2-7b-2-26-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-elite-v1.2-7b-2-26-2025", + "id": "formulae/mita-elite-v1.2-7b-2-26-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json b/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json deleted file mode 100644 index d7079584f..000000000 --- a/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-7b-2-26-2025/1762652580.159164", - "retrieved_timestamp": "1762652580.159165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-gen3-7b-2-26-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-gen3-7b-2-26-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1964144026737944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2915705776174771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/f423b0d1-3536-4865-9615-f89b9d15b14c.json b/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/f423b0d1-3536-4865-9615-f89b9d15b14c.json new file mode 100644 index 000000000..45585cf79 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/f423b0d1-3536-4865-9615-f89b9d15b14c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-7b-2-26-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-gen3-7b-2-26-2025", + "id": "formulae/mita-gen3-7b-2-26-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json b/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json deleted file mode 100644 index 1a5064d15..000000000 --- a/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-v1.2-7b-2-26-2025/1762652580.15945", - "retrieved_timestamp": "1762652580.1594508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-gen3-v1.2-7b-2-26-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-gen3-v1.2-7b-2-26-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2043577707150361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30577476935056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/c7e8333d-1d79-4cfa-9833-fa42f9fcbb4b.json b/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/c7e8333d-1d79-4cfa-9833-fa42f9fcbb4b.json new file mode 100644 index 000000000..edcc1062c --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/c7e8333d-1d79-4cfa-9833-fa42f9fcbb4b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-v1.2-7b-2-26-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-gen3-v1.2-7b-2-26-2025", + "id": "formulae/mita-gen3-v1.2-7b-2-26-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2044 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3058 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/b6149d15-3e0f-43d2-ae90-eca290a94edb.json b/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/b6149d15-3e0f-43d2-ae90-eca290a94edb.json new file mode 100644 index 000000000..e68dfb118 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/b6149d15-3e0f-43d2-ae90-eca290a94edb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-math-v2.3-2-25-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-math-v2.3-2-25-2025", + "id": "formulae/mita-math-v2.3-2-25-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1373 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2949 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json b/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json deleted file mode 100644 index 3173171e7..000000000 --- a/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-math-v2.3-2-25-2025/1762652580.159737", - "retrieved_timestamp": "1762652580.159738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-math-v2.3-2-25-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-math-v2.3-2-25-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13733781920858879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2949403673764691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json b/data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json deleted file mode 100644 index c333ba4ea..000000000 --- a/data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-v1-7b/1762652580.160087", - "retrieved_timestamp": "1762652580.160088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-v1-7b", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-v1-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19723888172271792 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003216459152819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.002265861027190332 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-v1-7b/e21f5d83-6b71-488d-ad55-d23268fbd611.json b/data/hfopenllm_v2/formulae/mita-v1-7b/e21f5d83-6b71-488d-ad55-d23268fbd611.json new file mode 100644 index 000000000..fd92b7206 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-v1-7b/e21f5d83-6b71-488d-ad55-d23268fbd611.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-v1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-v1-7b", + "id": "formulae/mita-v1-7b", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1972 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4152 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json b/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json deleted file mode 100644 index 8dace8363..000000000 --- a/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-v1.1-7b-2-24-2025/1762652580.1604211", - "retrieved_timestamp": "1762652580.1604218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-v1.1-7b-2-24-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-v1.1-7b-2-24-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34122018466557624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5442430910797442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45569791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4523769946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/68e1a42e-4318-4b5a-a45b-2607b7c2fe05.json b/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/68e1a42e-4318-4b5a-a45b-2607b7c2fe05.json new file mode 100644 index 000000000..354c32cae --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/68e1a42e-4318-4b5a-a45b-2607b7c2fe05.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-v1.1-7b-2-24-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-v1.1-7b-2-24-2025", + "id": "formulae/mita-v1.1-7b-2-24-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5442 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/12a03ffb-d66b-4d00-a43b-fd5be80e1b07.json b/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/12a03ffb-d66b-4d00-a43b-fd5be80e1b07.json new file mode 100644 index 000000000..514af2af5 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/12a03ffb-d66b-4d00-a43b-fd5be80e1b07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/formulae_mita-v1.2-7b-2-24-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mita-v1.2-7b-2-24-2025", + "id": "formulae/mita-v1.2-7b-2-24-2025", + "developer": "formulae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4919 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4879 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4344 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3359 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json b/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json deleted file mode 100644 index f8c72597f..000000000 --- a/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/formulae_mita-v1.2-7b-2-24-2025/1762652580.160727", - "retrieved_timestamp": "1762652580.160728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "formulae/mita-v1.2-7b-2-24-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-v1.2-7b-2-24-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.256415200556745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4919464940215105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33585438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/frameai/Loxa-4B/adbad8dc-7d13-44cc-a5c6-e8da1de27c37.json b/data/hfopenllm_v2/frameai/Loxa-4B/adbad8dc-7d13-44cc-a5c6-e8da1de27c37.json new file mode 100644 index 000000000..3d74cbfc7 --- /dev/null +++ b/data/hfopenllm_v2/frameai/Loxa-4B/adbad8dc-7d13-44cc-a5c6-e8da1de27c37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/frameai_Loxa-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Loxa-4B", + "id": "frameai/Loxa-4B", + "developer": "frameai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.018 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3377 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json b/data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json deleted file mode 100644 index 35d79e6ae..000000000 --- a/data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/frameai_Loxa-4B/1762652580.160984", - "retrieved_timestamp": "1762652580.160984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "frameai/Loxa-4B", - "developer": "frameai", - "inference_platform": "unknown", - "id": "frameai/Loxa-4B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.018 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47648350820268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42171373309002896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28016954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/freewheelin/free-evo-qwen72b-v0.8-re/7fb595e5-abbc-43ff-8135-c4bb4a2ea593.json b/data/hfopenllm_v2/freewheelin/free-evo-qwen72b-v0.8-re/7fb595e5-abbc-43ff-8135-c4bb4a2ea593.json new file mode 100644 index 000000000..86d886da9 --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-evo-qwen72b-v0.8-re/7fb595e5-abbc-43ff-8135-c4bb4a2ea593.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/freewheelin_free-evo-qwen72b-v0.8-re/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "free-evo-qwen72b-v0.8-re", + "id": "freewheelin/free-evo-qwen72b-v0.8-re", + "developer": "freewheelin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.288 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6127 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.487 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/1bb09da7-1675-4e57-b46a-9791c888ce6f.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/1bb09da7-1675-4e57-b46a-9791c888ce6f.json new file mode 100644 index 000000000..d246d43e9 --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/1bb09da7-1675-4e57-b46a-9791c888ce6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "free-solar-evo-v0.1", + "id": "freewheelin/free-solar-evo-v0.1", + "developer": "freewheelin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.205 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json deleted file mode 100644 index 5ddab5ca2..000000000 --- a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.1/1762652580.16175", - "retrieved_timestamp": "1762652580.161752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "freewheelin/free-solar-evo-v0.1", - "developer": "freewheelin", - "inference_platform": "unknown", - "id": "freewheelin/free-solar-evo-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20500715878313985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502211109638701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4945833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414228723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/3ed7dd5a-e431-480a-91a7-5ccd915057e4.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/3ed7dd5a-e431-480a-91a7-5ccd915057e4.json new file mode 100644 index 000000000..75ddbaee6 --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/3ed7dd5a-e431-480a-91a7-5ccd915057e4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "free-solar-evo-v0.11", + "id": "freewheelin/free-solar-evo-v0.11", + "developer": "freewheelin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2027 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json deleted file mode 100644 index 0a2cdd3a3..000000000 --- a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.11/1762652580.1621969", - "retrieved_timestamp": "1762652580.162198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "freewheelin/free-solar-evo-v0.11", - "developer": "freewheelin", - "inference_platform": "unknown", - "id": "freewheelin/free-solar-evo-v0.11", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20265894493277836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4545155032474882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052187499999999 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34674202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json deleted file mode 100644 index d850bdfb9..000000000 --- a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.13/1762652580.1624699", - "retrieved_timestamp": "1762652580.1624708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "freewheelin/free-solar-evo-v0.13", - "developer": "freewheelin", - "inference_platform": "unknown", - "id": "freewheelin/free-solar-evo-v0.13", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2320598234905606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554839670962904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50515625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34699135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/9cab35b6-d6a7-475e-b715-e4493d07cd92.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/9cab35b6-d6a7-475e-b715-e4493d07cd92.json new file mode 100644 index 000000000..e1d46c7d8 --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/9cab35b6-d6a7-475e-b715-e4493d07cd92.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.13/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "free-solar-evo-v0.13", + "id": "freewheelin/free-solar-evo-v0.13", + "developer": "freewheelin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.347 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fulim/FineLlama-3.1-8B/ef7149ae-8d50-4890-89ae-fb561a86d130.json b/data/hfopenllm_v2/fulim/FineLlama-3.1-8B/ef7149ae-8d50-4890-89ae-fb561a86d130.json new file mode 100644 index 000000000..9613a08ac --- /dev/null +++ b/data/hfopenllm_v2/fulim/FineLlama-3.1-8B/ef7149ae-8d50-4890-89ae-fb561a86d130.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/fulim_FineLlama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineLlama-3.1-8B", + "id": "fulim/FineLlama-3.1-8B", + "developer": "fulim", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1439 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/3fa14e1f-82a5-4c04-9c76-2a3f6d56aa81.json b/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/3fa14e1f-82a5-4c04-9c76-2a3f6d56aa81.json new file mode 100644 index 000000000..690658385 --- /dev/null +++ b/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/3fa14e1f-82a5-4c04-9c76-2a3f6d56aa81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM-1.7B-Instruct-IFEval", + "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", + "developer": "gabrielmbmb", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2306 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json b/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json deleted file mode 100644 index 10a27cadd..000000000 --- a/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/1762652580.162997", - "retrieved_timestamp": "1762652580.162998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", - "developer": "gabrielmbmb", - "inference_platform": "unknown", - "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23058595637353335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313843378282092 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11560837765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json deleted file mode 100644 index 4147e1a0b..000000000 --- a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/1762652580.163272", - "retrieved_timestamp": "1762652580.1632729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", - "developer": "gaverfraxz", - "inference_platform": "unknown", - "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40094615619888563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3984844272016949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36504166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16539228723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/4418c7d1-72da-4ed3-9d5c-9d8520f6641c.json b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/4418c7d1-72da-4ed3-9d5c-9d8520f6641c.json new file mode 100644 index 000000000..f193f5a9b --- /dev/null +++ b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/4418c7d1-72da-4ed3-9d5c-9d8520f6641c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", + "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", + "developer": "gaverfraxz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4009 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3985 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.365 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json deleted file mode 100644 index c5db313db..000000000 --- a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/1762652580.163549", - "retrieved_timestamp": "1762652580.16355", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", - "developer": "gaverfraxz", - "inference_platform": "unknown", - "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45505148561372716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043660783243713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/8fe13380-a045-4d63-96f8-ec977540478c.json b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/8fe13380-a045-4d63-96f8-ec977540478c.json new file mode 100644 index 000000000..280b09aa0 --- /dev/null +++ b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/8fe13380-a045-4d63-96f8-ec977540478c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", + "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", + "developer": "gaverfraxz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4551 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gbueno86/Brinebreath-Llama-3.1-70B/6da42427-c7de-4830-b368-ca7757ee1d51.json b/data/hfopenllm_v2/gbueno86/Brinebreath-Llama-3.1-70B/6da42427-c7de-4830-b368-ca7757ee1d51.json new file mode 100644 index 000000000..700b6cf24 --- /dev/null +++ b/data/hfopenllm_v2/gbueno86/Brinebreath-Llama-3.1-70B/6da42427-c7de-4830-b368-ca7757ee1d51.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gbueno86_Brinebreath-Llama-3.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Brinebreath-Llama-3.1-70B", + "id": "gbueno86/Brinebreath-Llama-3.1-70B", + "developer": "gbueno86", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5533 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/5faf24b3-38af-4f3f-8377-bba70d75f8df.json b/data/hfopenllm_v2/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/5faf24b3-38af-4f3f-8377-bba70d75f8df.json new file mode 100644 index 000000000..8becc522b --- /dev/null +++ b/data/hfopenllm_v2/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/5faf24b3-38af-4f3f-8377-bba70d75f8df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-LLama-3-Cat-Smaug-LLama-70b", + "id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", + "developer": "gbueno86", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8072 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6674 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2938 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/9a26214c-2601-49be-b1b1-03796b704059.json b/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/9a26214c-2601-49be-b1b1-03796b704059.json new file mode 100644 index 000000000..e2d293c3a --- /dev/null +++ b/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/9a26214c-2601-49be-b1b1-03796b704059.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ghost-x_ghost-8b-beta-1608/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ghost-8b-beta-1608", + "id": "ghost-x/ghost-8b-beta-1608", + "developer": "ghost-x", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.284 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json b/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json deleted file mode 100644 index 5320e2cab..000000000 --- a/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ghost-x_ghost-8b-beta-1608/1762652580.16434", - "retrieved_timestamp": "1762652580.164341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ghost-x/ghost-8b-beta-1608", - "developer": "ghost-x", - "inference_platform": "unknown", - "id": "ghost-x/ghost-8b-beta-1608", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42727407722620425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45165496100352914 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35158333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2839926861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/glaiveai/Reflection-Llama-3.1-70B/fa71ed09-45d4-4a5b-bfb1-a61a359a8f0c.json b/data/hfopenllm_v2/glaiveai/Reflection-Llama-3.1-70B/fa71ed09-45d4-4a5b-bfb1-a61a359a8f0c.json new file mode 100644 index 000000000..3344ef2b5 --- /dev/null +++ b/data/hfopenllm_v2/glaiveai/Reflection-Llama-3.1-70B/fa71ed09-45d4-4a5b-bfb1-a61a359a8f0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/glaiveai_Reflection-Llama-3.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflection-Llama-3.1-70B", + "id": "glaiveai/Reflection-Llama-3.1-70B", + "developer": "glaiveai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 69.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5991 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5681 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2757 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6341 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-Llama-11B-Test/25c5b304-46d3-4df3-9ac3-75ffa972849a.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-Llama-11B-Test/25c5b304-46d3-4df3-9ac3-75ffa972849a.json new file mode 100644 index 000000000..579c66afe --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/SahabatAI-Llama-11B-Test/25c5b304-46d3-4df3-9ac3-75ffa972849a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Llama-11B-Test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SahabatAI-Llama-11B-Test", + "id": "gmonsoon/SahabatAI-Llama-11B-Test", + "developer": "gmonsoon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.52 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3376 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json deleted file mode 100644 index 63e97605c..000000000 --- a/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-MediChatIndo-8B-v1/1762652580.165248", - "retrieved_timestamp": "1762652580.165249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41628323958208663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508834027881236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/88ed0272-39f8-4676-970a-525aee058991.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/88ed0272-39f8-4676-970a-525aee058991.json new file mode 100644 index 000000000..4c3ee6998 --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/88ed0272-39f8-4676-970a-525aee058991.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-MediChatIndo-8B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SahabatAI-MediChatIndo-8B-v1", + "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", + "developer": "gmonsoon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json deleted file mode 100644 index 218ea1332..000000000 --- a/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Rebase-8B-Test/1762652580.165493", - "retrieved_timestamp": "1762652580.165493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gmonsoon/SahabatAI-Rebase-8B-Test", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/SahabatAI-Rebase-8B-Test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5156263159527831 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522960549734047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/d8eff5d0-061b-4b83-b96a-04f9ba47ea6c.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/d8eff5d0-061b-4b83-b96a-04f9ba47ea6c.json new file mode 100644 index 000000000..58ed22e54 --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/d8eff5d0-061b-4b83-b96a-04f9ba47ea6c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Rebase-8B-Test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SahabatAI-Rebase-8B-Test", + "id": "gmonsoon/SahabatAI-Rebase-8B-Test", + "developer": "gmonsoon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5156 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json b/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json deleted file mode 100644 index 0ab71dfaa..000000000 --- a/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gmonsoon_StockSeaLLMs-7B-v1/1762652580.165695", - "retrieved_timestamp": "1762652580.165696", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gmonsoon/StockSeaLLMs-7B-v1", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/StockSeaLLMs-7B-v1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599218961245052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271087932535433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39519614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/dcb90e75-8709-4729-8c00-e756e6a9a49d.json b/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/dcb90e75-8709-4729-8c00-e756e6a9a49d.json new file mode 100644 index 000000000..0e6375ecc --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/dcb90e75-8709-4729-8c00-e756e6a9a49d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gmonsoon_StockSeaLLMs-7B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StockSeaLLMs-7B-v1", + "id": "gmonsoon/StockSeaLLMs-7B-v1", + "developer": "gmonsoon", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4214 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json b/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json deleted file mode 100644 index daf52cc6a..000000000 --- a/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/1762652580.165903", - "retrieved_timestamp": "1762652580.1659038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6077244532441547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47780208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43467420212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/81dcf3ca-f5c2-40a1-8871-b0188d5e9ceb.json b/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/81dcf3ca-f5c2-40a1-8871-b0188d5e9ceb.json new file mode 100644 index 000000000..61f640923 --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/81dcf3ca-f5c2-40a1-8871-b0188d5e9ceb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma2-9b-sahabatai-v1-instruct-BaseTIES", + "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", + "developer": "gmonsoon", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1994 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4347 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/0a0a4d32-c7a9-49c9-bba4-dae6b464a5b6.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/0a0a4d32-c7a9-49c9-bba4-dae6b464a5b6.json new file mode 100644 index 000000000..cf37f2ef2 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/0a0a4d32-c7a9-49c9-bba4-dae6b464a5b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_full_2", + "id": "godlikehhd/alpaca_data_full_2", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2854 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json deleted file mode 100644 index ba2f754c2..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_2/1762652580.166118", - "retrieved_timestamp": "1762652580.166118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_full_2", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_full_2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31781450994472443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216953430035033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40515625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.285405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/82a3a8ef-7e5f-48d0-a48e-41ea2c5b6452.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/82a3a8ef-7e5f-48d0-a48e-41ea2c5b6452.json new file mode 100644 index 000000000..4c1cc5329 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/82a3a8ef-7e5f-48d0-a48e-41ea2c5b6452.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_full_3B", + "id": "godlikehhd/alpaca_data_full_3B", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3696 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4684 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json deleted file mode 100644 index 789c9bc8c..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_3B/1762652580.166356", - "retrieved_timestamp": "1762652580.166357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_full_3B", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_full_3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36957162550920447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46841893776834337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.335688164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json deleted file mode 100644 index 27e8cb189..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600/1762652580.16661", - "retrieved_timestamp": "1762652580.166613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_max_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_max_2600", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042504997850149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40285133876405865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3508645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29163896276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/e635e798-fa85-4430-bf1e-9d5ad7fe9f22.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/e635e798-fa85-4430-bf1e-9d5ad7fe9f22.json new file mode 100644 index 000000000..dae6a5835 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/e635e798-fa85-4430-bf1e-9d5ad7fe9f22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ifd_max_2600", + "id": "godlikehhd/alpaca_data_ifd_max_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3509 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json deleted file mode 100644 index 45f78e257..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600_3B/1762652580.1669528", - "retrieved_timestamp": "1762652580.166954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_max_2600_3B", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_max_2600_3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.298155560579263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4626377955326701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43455208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32878989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/7ccaa29a-4f73-4794-83a2-b925d755d91e.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/7ccaa29a-4f73-4794-83a2-b925d755d91e.json new file mode 100644 index 000000000..3e8447d76 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/7ccaa29a-4f73-4794-83a2-b925d755d91e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600_3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ifd_max_2600_3B", + "id": "godlikehhd/alpaca_data_ifd_max_2600_3B", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2982 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/ba8de8f6-c118-4bc3-ae8d-851e964684ed.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/ba8de8f6-c118-4bc3-ae8d-851e964684ed.json new file mode 100644 index 000000000..6e4a08360 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/ba8de8f6-c118-4bc3-ae8d-851e964684ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_me_max_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ifd_me_max_5200", + "id": "godlikehhd/alpaca_data_ifd_me_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4153 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3483 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2982 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json deleted file mode 100644 index 4187047bc..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_me_max_5200/1762652580.167201", - "retrieved_timestamp": "1762652580.167202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_me_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_me_max_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36832271705740766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4153453015610935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29820478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/4011975a-e2a0-466a-9b34-923e1b4f8733.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/4011975a-e2a0-466a-9b34-923e1b4f8733.json new file mode 100644 index 000000000..bc8f77337 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/4011975a-e2a0-466a-9b34-923e1b4f8733.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_min_2600/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ifd_min_2600", + "id": "godlikehhd/alpaca_data_ifd_min_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4219 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json deleted file mode 100644 index affc4c2ae..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_min_2600/1762652580.167441", - "retrieved_timestamp": "1762652580.167443", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_min_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_min_2600", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749673089624419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4219047173013076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36562500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.289311835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/8a172205-39c6-4dd1-86b2-11b234b37e3c.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/8a172205-39c6-4dd1-86b2-11b234b37e3c.json new file mode 100644 index 000000000..8ecd5019c --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/8a172205-39c6-4dd1-86b2-11b234b37e3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_ans_max_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ins_ans_max_5200", + "id": "godlikehhd/alpaca_data_ins_ans_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3479 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3602 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2901 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json deleted file mode 100644 index 75c08bce1..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_ans_max_5200/1762652580.167691", - "retrieved_timestamp": "1762652580.1676931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_ans_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_ans_max_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34786477657061043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40982060224148426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2900598404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/495b2e8e-e2d8-4158-bc6e-7568604d44e9.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/495b2e8e-e2d8-4158-bc6e-7568604d44e9.json new file mode 100644 index 000000000..b1a0ba707 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/495b2e8e-e2d8-4158-bc6e-7568604d44e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_max_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ins_max_5200", + "id": "godlikehhd/alpaca_data_ins_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3614 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json deleted file mode 100644 index c238dc158..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_max_5200/1762652580.1679769", - "retrieved_timestamp": "1762652580.167978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_max_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32750657145263457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41550742328078477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2915558510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json deleted file mode 100644 index 02fdfd6d6..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_2600/1762652580.1682088", - "retrieved_timestamp": "1762652580.16821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_min_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_min_2600", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33300199027469335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41873469888886056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38534375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28798204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/e6a97d0d-9dc3-43a5-a69f-8132e19f9c77.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/e6a97d0d-9dc3-43a5-a69f-8132e19f9c77.json new file mode 100644 index 000000000..31d6adcd2 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/e6a97d0d-9dc3-43a5-a69f-8132e19f9c77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_2600/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ins_min_2600", + "id": "godlikehhd/alpaca_data_ins_min_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/4aecfd45-f47b-4f02-a0ed-288cbef46a6f.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/4aecfd45-f47b-4f02-a0ed-288cbef46a6f.json new file mode 100644 index 000000000..b84f600b2 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/4aecfd45-f47b-4f02-a0ed-288cbef46a6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_ins_min_5200", + "id": "godlikehhd/alpaca_data_ins_min_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3906 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2949 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json deleted file mode 100644 index b449903b2..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_5200/1762652580.1684108", - "retrieved_timestamp": "1762652580.1684108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_min_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_min_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3359995921931586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289279419241076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39055208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29488031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/a6f7bc45-c2b5-47d8-a062-60f20c3d7ea4.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/a6f7bc45-c2b5-47d8-a062-60f20c3d7ea4.json new file mode 100644 index 000000000..64a2916b2 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/a6f7bc45-c2b5-47d8-a062-60f20c3d7ea4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_sampled_ifd_5200", + "id": "godlikehhd/alpaca_data_sampled_ifd_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2924 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3521 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2896 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json deleted file mode 100644 index fac4fc396..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_5200/1762652580.1686149", - "retrieved_timestamp": "1762652580.1686149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_sampled_ifd_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_sampled_ifd_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2923853154075631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4032969715626326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3520729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2896442819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json deleted file mode 100644 index f5b20da00..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_new_5200/1762652580.1688168", - "retrieved_timestamp": "1762652580.168818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_sampled_ifd_new_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36632468516868577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4177831234050982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29247007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/c85c79d6-28e0-4deb-ad84-901b725aeca8.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/c85c79d6-28e0-4deb-ad84-901b725aeca8.json new file mode 100644 index 000000000..081b3f249 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/c85c79d6-28e0-4deb-ad84-901b725aeca8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_new_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_sampled_ifd_new_5200", + "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3613 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2925 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json deleted file mode 100644 index 26ada3498..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.1_2600/1762652580.1691651", - "retrieved_timestamp": "1762652580.169167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_0.1_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_0.1_2600", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3287554799044313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42522607952607777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37064583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/73271472-d06f-405b-af9d-2da7c17e1eb0.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/73271472-d06f-405b-af9d-2da7c17e1eb0.json new file mode 100644 index 000000000..3d3313dd2 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/73271472-d06f-405b-af9d-2da7c17e1eb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.1_2600/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_score_max_0.1_2600", + "id": "godlikehhd/alpaca_data_score_max_0.1_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4252 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3706 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json deleted file mode 100644 index 87e63f4dd..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.3_2600/1762652580.1694138", - "retrieved_timestamp": "1762652580.169415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_0.3_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_0.3_2600", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33752332699459653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151448369012765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37594791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29130651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/4e40bb43-c33d-4324-aa02-5bb7f88a5d1f.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/4e40bb43-c33d-4324-aa02-5bb7f88a5d1f.json new file mode 100644 index 000000000..90908a71a --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/4e40bb43-c33d-4324-aa02-5bb7f88a5d1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.3_2600/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_score_max_0.3_2600", + "id": "godlikehhd/alpaca_data_score_max_0.3_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3759 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json deleted file mode 100644 index b9f1aae73..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.7_2600/1762652580.169624", - "retrieved_timestamp": "1762652580.169625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_0.7_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_0.7_2600", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3639764713183243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41845266250678703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2982878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/9b36e4c0-0d13-4988-8145-b9254da2e76e.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/9b36e4c0-0d13-4988-8145-b9254da2e76e.json new file mode 100644 index 000000000..97a5ad3b9 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/9b36e4c0-0d13-4988-8145-b9254da2e76e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.7_2600/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_score_max_0.7_2600", + "id": "godlikehhd/alpaca_data_score_max_0.7_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3469 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2983 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/6a464798-0111-4c71-b156-72a5aba1da63.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/6a464798-0111-4c71-b156-72a5aba1da63.json new file mode 100644 index 000000000..5e21ebf38 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/6a464798-0111-4c71-b156-72a5aba1da63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2500/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_score_max_2500", + "id": "godlikehhd/alpaca_data_score_max_2500", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3627 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json deleted file mode 100644 index 1b4f1526b..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2500/1762652580.1698968", - "retrieved_timestamp": "1762652580.169898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_2500", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_2500", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3563577973111345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41801375075895447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36270833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2939660904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/78252135-f15b-427d-86de-c32cd3dbcd0f.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/78252135-f15b-427d-86de-c32cd3dbcd0f.json new file mode 100644 index 000000000..6e58fff0e --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/78252135-f15b-427d-86de-c32cd3dbcd0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2600_3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_score_max_2600_3B", + "id": "godlikehhd/alpaca_data_score_max_2600_3B", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4716 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json deleted file mode 100644 index abe4842c5..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2600_3B/1762652580.170121", - "retrieved_timestamp": "1762652580.170122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_2600_3B", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_2600_3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33577463352792813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4716306839273412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44744791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/c3b7bd57-9bc3-4d83-aad9-7d6315748c0a.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/c3b7bd57-9bc3-4d83-aad9-7d6315748c0a.json new file mode 100644 index 000000000..25483eeb0 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/c3b7bd57-9bc3-4d83-aad9-7d6315748c0a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_5200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpaca_data_score_max_5200", + "id": "godlikehhd/alpaca_data_score_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3878 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json deleted file mode 100644 index 703d5ae12..000000000 --- a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_5200/1762652580.170327", - "retrieved_timestamp": "1762652580.170327", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_5200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34454248061809334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42417102847687554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/godlikehhd/ifd_2500_qwen/bce17582-e807-4b91-b0e7-0a890bf5eb24.json b/data/hfopenllm_v2/godlikehhd/ifd_2500_qwen/bce17582-e807-4b91-b0e7-0a890bf5eb24.json new file mode 100644 index 000000000..3d6fa2b02 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/ifd_2500_qwen/bce17582-e807-4b91-b0e7-0a890bf5eb24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_2500_qwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ifd_2500_qwen", + "id": "godlikehhd/ifd_2500_qwen", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3365 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2921 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/ifd_new_correct_all_sample_2500_qwen/f8371e81-f6d4-4441-bc6c-5d4a18da7d08.json b/data/hfopenllm_v2/godlikehhd/ifd_new_correct_all_sample_2500_qwen/f8371e81-f6d4-4441-bc6c-5d4a18da7d08.json new file mode 100644 index 000000000..9f6d40e63 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/ifd_new_correct_all_sample_2500_qwen/f8371e81-f6d4-4441-bc6c-5d4a18da7d08.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_all_sample_2500_qwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ifd_new_correct_all_sample_2500_qwen", + "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3376 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3562 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2889 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/ifd_new_correct_sample_2500_qwen/78407b2e-1f44-46f0-bc21-76bdc68f8d9c.json b/data/hfopenllm_v2/godlikehhd/ifd_new_correct_sample_2500_qwen/78407b2e-1f44-46f0-bc21-76bdc68f8d9c.json new file mode 100644 index 000000000..72f6ab1df --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/ifd_new_correct_sample_2500_qwen/78407b2e-1f44-46f0-bc21-76bdc68f8d9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_sample_2500_qwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ifd_new_correct_sample_2500_qwen", + "id": "godlikehhd/ifd_new_correct_sample_2500_qwen", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3397 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3627 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2932 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/ifd_new_qwen_2500/bdb9e2d2-8d09-4994-a320-2f968bcb4898.json b/data/hfopenllm_v2/godlikehhd/ifd_new_qwen_2500/bdb9e2d2-8d09-4994-a320-2f968bcb4898.json new file mode 100644 index 000000000..dd934c455 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/ifd_new_qwen_2500/bdb9e2d2-8d09-4994-a320-2f968bcb4898.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_qwen_2500/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ifd_new_qwen_2500", + "id": "godlikehhd/ifd_new_qwen_2500", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.359 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/qwen-2.5-1.5b-cherry/c57d15c8-9581-4bb5-89e4-2fea1e3c584e.json b/data/hfopenllm_v2/godlikehhd/qwen-2.5-1.5b-cherry/c57d15c8-9581-4bb5-89e4-2fea1e3c584e.json new file mode 100644 index 000000000..0eae1c7ae --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/qwen-2.5-1.5b-cherry/c57d15c8-9581-4bb5-89e4-2fea1e3c584e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen-2.5-1.5b-cherry/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-2.5-1.5b-cherry", + "id": "godlikehhd/qwen-2.5-1.5b-cherry", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.772 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/qwen_2.5-1.5b-cherry_new/550d5665-7a8a-437e-b318-000690dd250f.json b/data/hfopenllm_v2/godlikehhd/qwen_2.5-1.5b-cherry_new/550d5665-7a8a-437e-b318-000690dd250f.json new file mode 100644 index 000000000..8676d2e1f --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/qwen_2.5-1.5b-cherry_new/550d5665-7a8a-437e-b318-000690dd250f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_2.5-1.5b-cherry_new/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen_2.5-1.5b-cherry_new", + "id": "godlikehhd/qwen_2.5-1.5b-cherry_new", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.312 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/qwen_full_data_alpaca/a1922f33-32f5-4f99-8df6-e2080808d292.json b/data/hfopenllm_v2/godlikehhd/qwen_full_data_alpaca/a1922f33-32f5-4f99-8df6-e2080808d292.json new file mode 100644 index 000000000..0a3408038 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/qwen_full_data_alpaca/a1922f33-32f5-4f99-8df6-e2080808d292.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_full_data_alpaca/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen_full_data_alpaca", + "id": "godlikehhd/qwen_full_data_alpaca", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4229 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/qwen_ins_ans_2500/6ccc376b-24a4-42cc-8ea0-823ef14336db.json b/data/hfopenllm_v2/godlikehhd/qwen_ins_ans_2500/6ccc376b-24a4-42cc-8ea0-823ef14336db.json new file mode 100644 index 000000000..79114530d --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/qwen_ins_ans_2500/6ccc376b-24a4-42cc-8ea0-823ef14336db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_ins_ans_2500/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen_ins_ans_2500", + "id": "godlikehhd/qwen_ins_ans_2500", + "developer": "godlikehhd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2698 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4074 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3589 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json b/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json deleted file mode 100644 index 09a6bb34e..000000000 --- a/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K-100steps/1762652579.472713", - "retrieved_timestamp": "1762652579.472714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AALF/gemma-2-27b-it-SimPO-37K-100steps", - "developer": "google", - "inference_platform": "unknown", - "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2567642743476199 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39308230769885016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3329166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21251662234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json b/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json deleted file mode 100644 index 54d6ba781..000000000 --- a/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K/1762652579.472391", - "retrieved_timestamp": "1762652579.4723918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AALF/gemma-2-27b-it-SimPO-37K", - "developer": "google", - "inference_platform": "unknown", - "id": "AALF/gemma-2-27b-it-SimPO-37K", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24065257959990605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3911343917952534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971409574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json b/data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json deleted file mode 100644 index c16cdeda6..000000000 --- a/data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-aeria-infinity-9b/1762652579.4729412", - "retrieved_timestamp": "1762652579.472942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AELLM/gemma-2-aeria-infinity-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "AELLM/gemma-2-aeria-infinity-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.759399504426034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5983336669577649 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38622007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json b/data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json deleted file mode 100644 index 97ddd4d06..000000000 --- a/data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-lyco-infinity-9b/1762652579.473207", - "retrieved_timestamp": "1762652579.473208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AELLM/gemma-2-lyco-infinity-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "AELLM/gemma-2-lyco-infinity-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7316475839660989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5839534871023703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40063541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json b/data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json deleted file mode 100644 index 2abf2b8a0..000000000 --- a/data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Aashraf995_Gemma-Evo-10B/1762652579.476305", - "retrieved_timestamp": "1762652579.476305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Aashraf995/Gemma-Evo-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "Aashraf995/Gemma-Evo-10B", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332211864519476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6044352897552882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4275265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json b/data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json deleted file mode 100644 index d8888da78..000000000 --- a/data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/1762652579.487571", - "retrieved_timestamp": "1762652579.487571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", - "developer": "google", - "inference_platform": "unknown", - "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31763831079314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5979459664230056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39657291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868849734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json b/data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json deleted file mode 100644 index b58d6b4b8..000000000 --- a/data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Gemma-2-9b/1762652579.4958751", - "retrieved_timestamp": "1762652579.495876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Neos-Gemma-2-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Gemma-2-9b", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5875665456544192 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5502975126048852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39810505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json b/data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json deleted file mode 100644 index 327cb7890..000000000 --- a/data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/1762652579.507273", - "retrieved_timestamp": "1762652579.507273", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30664858131978706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3895836210706875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42791666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1692154255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json deleted file mode 100644 index 19e6a75a5..000000000 --- a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-9B/1762652579.539702", - "retrieved_timestamp": "1762652579.5397062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17403156956874427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5905439384199537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39793882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json deleted file mode 100644 index 167fee840..000000000 --- a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-DEADLINE-10B/1762652579.5400288", - "retrieved_timestamp": "1762652579.54003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-DEADLINE-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.952 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23315802071836061 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896087932535433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39461436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json deleted file mode 100644 index 29aa16845..000000000 --- a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/1762652579.5402539", - "retrieved_timestamp": "1762652579.540255", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.034 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28578948301617485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5909421265868766 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json deleted file mode 100644 index 4df7e3189..000000000 --- a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/1762652579.540473", - "retrieved_timestamp": "1762652579.5404742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911959785635329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4111770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39677526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json deleted file mode 100644 index 759dae739..000000000 --- a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/1762652579.540709", - "retrieved_timestamp": "1762652579.54071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.034 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7070927361622716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5922294775018883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41632291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3966090425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json b/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json deleted file mode 100644 index 305ef00ee..000000000 --- a/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it-Philos/1762652579.598697", - "retrieved_timestamp": "1762652579.598698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Athena-gemma-2-2b-it-Philos", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620950189940469 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37947768790586744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43136458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22481715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json b/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json deleted file mode 100644 index 1a5e93422..000000000 --- a/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it/1762652579.598221", - "retrieved_timestamp": "1762652579.598221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Athena-gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI/Athena-gemma-2-2b-it", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3134172883504657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42642293591146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43505208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2421875 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json b/data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json deleted file mode 100644 index 8a8869591..000000000 --- a/data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/1762652579.598942", - "retrieved_timestamp": "1762652579.598943", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40299405577201824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4331916189482215 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25872672872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json b/data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json deleted file mode 100644 index d4e5f9e06..000000000 --- a/data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/1762652579.609552", - "retrieved_timestamp": "1762652579.6095529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4351177098986245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41754154460978427 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41696875000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22972074468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json deleted file mode 100644 index 97249e8ad..000000000 --- a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/1762652579.627253", - "retrieved_timestamp": "1762652579.627253", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", - "developer": "google", - "inference_platform": "unknown", - "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30637375497014585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887493166323577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4550208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23844747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json deleted file mode 100644 index f2fcc0a8e..000000000 --- a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-Merged/1762652579.627504", - "retrieved_timestamp": "1762652579.6275048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", - "developer": "google", - "inference_platform": "unknown", - "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30637375497014585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4130633897394575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22282247340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json deleted file mode 100644 index 823cb0ce9..000000000 --- a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaUltra-Merged/1762652579.627715", - "retrieved_timestamp": "1762652579.627716", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", - "developer": "google", - "inference_platform": "unknown", - "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30207737691547315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4141445378464817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25335570469798663 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2265625 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json deleted file mode 100644 index fa0f8d10b..000000000 --- a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635783", - "retrieved_timestamp": "1762652579.635786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gunulhona/Gemma-Ko-Merge-PEFT", - "developer": "google", - "inference_platform": "unknown", - "id": "Gunulhona/Gemma-Ko-Merge-PEFT", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 20.318 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441348954108433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4862989687822461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3985833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json deleted file mode 100644 index bf66b914e..000000000 --- a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635457", - "retrieved_timestamp": "1762652579.635457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gunulhona/Gemma-Ko-Merge-PEFT", - "developer": "google", - "inference_platform": "unknown", - "id": "Gunulhona/Gemma-Ko-Merge-PEFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 20.318 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28803906966847964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154093999781059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40801041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38173204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json deleted file mode 100644 index 442840a3c..000000000 --- a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge/1762652579.635146", - "retrieved_timestamp": "1762652579.635147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gunulhona/Gemma-Ko-Merge", - "developer": "google", - "inference_platform": "unknown", - "id": "Gunulhona/Gemma-Ko-Merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6415721397004392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5813027258981727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878823138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json b/data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json deleted file mode 100644 index a27977aaa..000000000 --- a/data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-gemma-v0.1/1762652579.641236", - "retrieved_timestamp": "1762652579.641237", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-7b-gemma-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3363741539116212 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623735014679749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37396874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2847406914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json b/data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json deleted file mode 100644 index 26d153096..000000000 --- a/data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/1762652579.645844", - "retrieved_timestamp": "1762652579.645845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json b/data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json deleted file mode 100644 index 27b41052d..000000000 --- a/data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-2b-it-abliterated/1762652579.646105", - "retrieved_timestamp": "1762652579.646106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "IlyaGusev/gemma-2-2b-it-abliterated", - "developer": "google", - "inference_platform": "unknown", - "id": "IlyaGusev/gemma-2-2b-it-abliterated", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533086654521115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118601326211988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37818749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25382313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json b/data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json deleted file mode 100644 index 2fa34ac9b..000000000 --- a/data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-9b-it-abliterated/1762652579.646349", - "retrieved_timestamp": "1762652579.6463501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "IlyaGusev/gemma-2-9b-it-abliterated", - "developer": "google", - "inference_platform": "unknown", - "id": "IlyaGusev/gemma-2-9b-it-abliterated", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.747259493698941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.59063299776093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39153922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json b/data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json deleted file mode 100644 index c45754e81..000000000 --- a/data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-gemma-2b-v0/1762652579.7101068", - "retrieved_timestamp": "1762652579.7101078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LenguajeNaturalAI/leniachat-gemma-2b-v0", - "developer": "google", - "inference_platform": "unknown", - "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21497404664069114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30740211895412034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36590625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11702127659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json b/data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json deleted file mode 100644 index 775c9e77a..000000000 --- a/data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ModelSpace_GemmaX2-28-9B-v0.1/1762652579.76179", - "retrieved_timestamp": "1762652579.761791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ModelSpace/GemmaX2-28-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "ModelSpace/GemmaX2-28-9B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.003921816336210145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687226427280163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2230718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json b/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json deleted file mode 100644 index b37a8818c..000000000 --- a/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v-0.1.0/1762652579.7653928", - "retrieved_timestamp": "1762652579.765394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json b/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json deleted file mode 100644 index 90bce0632..000000000 --- a/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v0.1.0/1762652579.765648", - "retrieved_timestamp": "1762652579.7656488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-gemma-2-27b-v0.1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json b/data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json deleted file mode 100644 index 56ad7f1d6..000000000 --- a/data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Stheno-Filtered/1762652579.855671", - "retrieved_timestamp": "1762652579.8556721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", - "developer": "google", - "inference_platform": "unknown", - "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196554032190144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149234152222183 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40029166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2629654255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json b/data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json deleted file mode 100644 index 8e875bc02..000000000 --- a/data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/1762652579.8884969", - "retrieved_timestamp": "1762652579.8884978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForSequenceClassification", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807317916461656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635960062329604 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103224734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json b/data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json deleted file mode 100644 index f56da30f9..000000000 --- a/data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-9B-Base/1762652579.8897338", - "retrieved_timestamp": "1762652579.889735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sorawiz/Gemma-9B-Base", - "developer": "google", - "inference_platform": "unknown", - "id": "Sorawiz/Gemma-9B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16673758959560633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.593040577894583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40451041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42353723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json b/data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json deleted file mode 100644 index 77d97ed0d..000000000 --- a/data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-Creative-9B-Base/1762652579.890075", - "retrieved_timestamp": "1762652579.890076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sorawiz/Gemma-Creative-9B-Base", - "developer": "google", - "inference_platform": "unknown", - "id": "Sorawiz/Gemma-Creative-9B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1515002415812267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458614335095562 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007646276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json b/data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json deleted file mode 100644 index eba73fc24..000000000 --- a/data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_135_Gemma/1762652579.8946822", - "retrieved_timestamp": "1762652579.894683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Supichi/BBAI_135_Gemma", - "developer": "google", - "inference_platform": "unknown", - "id": "Supichi/BBAI_135_Gemma", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 19.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06562144000141845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35684129093449685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38047916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16722074468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json b/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json deleted file mode 100644 index d138614ac..000000000 --- a/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-9B-v1/1762652579.9140742", - "retrieved_timestamp": "1762652579.914075", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Gemmasutra-9B-v1", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Gemmasutra-9B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24155130609006326 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5886914248369671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48459375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4045046542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json b/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json deleted file mode 100644 index b4894fff0..000000000 --- a/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-Mini-2B-v1/1762652579.914318", - "retrieved_timestamp": "1762652579.914319", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Gemmasutra-Mini-2B-v1", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Gemmasutra-Mini-2B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25486597782771936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35750190791471836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3489791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20545212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json deleted file mode 100644 index 5ea02cf9d..000000000 --- a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v1/1762652579.915312", - "retrieved_timestamp": "1762652579.915313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Tiger-Gemma-9B-v1", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Tiger-Gemma-9B-v1", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.728150197032762 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5703687739329574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41616666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41181848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json deleted file mode 100644 index 273890750..000000000 --- a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v2/1762652579.915529", - "retrieved_timestamp": "1762652579.91553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Tiger-Gemma-9B-v2", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Tiger-Gemma-9B-v2", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6985997154217476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5617191114121779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40841666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41123670212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json deleted file mode 100644 index edbaa9603..000000000 --- a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v3/1762652579.915734", - "retrieved_timestamp": "1762652579.915734", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Tiger-Gemma-9B-v3", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Tiger-Gemma-9B-v3", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6820635912711606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5812231557853248 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40591755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json b/data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json deleted file mode 100644 index bd0367ef4..000000000 --- a/data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Gemmadevi-Stock-10B/1762652579.9249291", - "retrieved_timestamp": "1762652579.9249291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Gemmadevi-Stock-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "Triangle104/Gemmadevi-Stock-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15819470117067158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6065922684184144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46211458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4261968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json deleted file mode 100644 index 7fe911c20..000000000 --- a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/1762652579.936019", - "retrieved_timestamp": "1762652579.93602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", - "developer": "google", - "inference_platform": "unknown", - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308221075634871 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968934762705508 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4099375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39070811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json deleted file mode 100644 index d9958a31d..000000000 --- a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/1762652579.936279", - "retrieved_timestamp": "1762652579.93628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", - "developer": "google", - "inference_platform": "unknown", - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100196367859502 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5989880877421281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json deleted file mode 100644 index 7118be0b9..000000000 --- a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/1762652579.9364889", - "retrieved_timestamp": "1762652579.93649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", - "developer": "google", - "inference_platform": "unknown", - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671409637539505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6007080229268026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41660416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.382563164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json deleted file mode 100644 index cfb7b42f6..000000000 --- a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-2b/1762652579.941349", - "retrieved_timestamp": "1762652579.94135", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Gemma-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Gemma-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24752213017017072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416315376053174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14685837765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json deleted file mode 100644 index 09155e563..000000000 --- a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-7b/1762652579.9415941", - "retrieved_timestamp": "1762652579.9415948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Gemma-7b", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Gemma-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406705319662939 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41879127895858687 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961269946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json deleted file mode 100644 index 23a75486b..000000000 --- a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/1762652579.9427688", - "retrieved_timestamp": "1762652579.94277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206625088099574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42408371860644856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.269281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json deleted file mode 100644 index 2623d45f1..000000000 --- a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/1762652579.94298", - "retrieved_timestamp": "1762652579.942981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3024009627787604 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6072645787154746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43182291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40907579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json b/data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json deleted file mode 100644 index 19842195b..000000000 --- a/data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/1762652579.961175", - "retrieved_timestamp": "1762652579.9611762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", - "developer": "google", - "inference_platform": "unknown", - "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2713766140507188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5922529923998928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42096077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json b/data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json deleted file mode 100644 index bedcf6cf1..000000000 --- a/data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-rpo-avg/1762652579.9660559", - "retrieved_timestamp": "1762652579.966057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZHLiu627/zephyr-7b-gemma-rpo-avg", - "developer": "google", - "inference_platform": "unknown", - "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060350979844586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41832761356743015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2830784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json b/data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json deleted file mode 100644 index 3e2037bc2..000000000 --- a/data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Gemma2-9B-AdvancedFuse/1762652579.975734", - "retrieved_timestamp": "1762652579.975735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Gemma2-9B-AdvancedFuse", - "developer": "google", - "inference_platform": "unknown", - "id": "agentlans/Gemma2-9B-AdvancedFuse", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15427288483446144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.585936684475517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000166223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json deleted file mode 100644 index dbf9c6435..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-2.6B/1762652579.985875", - "retrieved_timestamp": "1762652579.985876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp1-2.6B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp1-2.6B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354348683714766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343094462630086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45616666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26886635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json deleted file mode 100644 index 9b90cef6d..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-27B/1762652579.986121", - "retrieved_timestamp": "1762652579.986122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp1-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp1-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7186332265056716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6398902146527521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2583081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44564494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json deleted file mode 100644 index 1c468574e..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-2.6B/1762652579.98633", - "retrieved_timestamp": "1762652579.98633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp2-2.6B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp2-2.6B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5747272791748117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4307646783089521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44677083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26961436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json deleted file mode 100644 index 1e4d94bc9..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-27B/1762652579.986531", - "retrieved_timestamp": "1762652579.9865322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp2-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp2-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7545534736720789 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6557274121032689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46208333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46226728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json deleted file mode 100644 index 723c89cdb..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp3-27B/1762652579.986752", - "retrieved_timestamp": "1762652579.986753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp3-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp3-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7426384216102164 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6499638721230724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27416918429003023 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47402083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4640957446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json deleted file mode 100644 index 397c2a094..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp4-27B/1762652579.986965", - "retrieved_timestamp": "1762652579.9869661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp4-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp4-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7496575752337131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6529581339749019 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46492686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json deleted file mode 100644 index 1e24494a1..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp-9B/1762652579.987181", - "retrieved_timestamp": "1762652579.9871821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.704320092909037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.592057786577488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46732291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41605718085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json deleted file mode 100644 index 5a2c26ea6..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp2-9B/1762652579.987394", - "retrieved_timestamp": "1762652579.987395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7281003293483512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.598271299766216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42386968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json deleted file mode 100644 index ddacaa254..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp4-10B/1762652579.9875991", - "retrieved_timestamp": "1762652579.9875998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp4-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp4-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326216660682544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6027862253440982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45398958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4250332446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json deleted file mode 100644 index f7ebc56d5..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp5-10B/1762652579.9878101", - "retrieved_timestamp": "1762652579.987811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp5-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp5-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7353444416370785 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.605447654436423 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46078125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4328457446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json b/data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json deleted file mode 100644 index fc6a1b13f..000000000 --- a/data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaStock1-27B/1762652579.9880252", - "retrieved_timestamp": "1762652579.9880252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/GemmaStock1-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaStock1-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565607454366021 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.263595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45268749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47298869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json b/data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json deleted file mode 100644 index 5a9f4cbb4..000000000 --- a/data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anakin87_gemma-2b-orpo/1762652580.010973", - "retrieved_timestamp": "1762652580.010974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anakin87/gemma-2b-orpo", - "developer": "google", - "inference_platform": "unknown", - "id": "anakin87/gemma-2b-orpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24779695651981187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34261709435617754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1305684840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json b/data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json deleted file mode 100644 index aaf7ad005..000000000 --- a/data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-customgemma2/1762652580.012768", - "retrieved_timestamp": "1762652580.012769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-9b-customgemma2", - "developer": "google", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-9b-customgemma2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1272955757390391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340136936916174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45646875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4204621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json b/data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json deleted file mode 100644 index c57192acc..000000000 --- a/data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/beomi_gemma-mling-7b/1762652580.030431", - "retrieved_timestamp": "1762652580.030431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "beomi/gemma-mling-7b", - "developer": "google", - "inference_platform": "unknown", - "id": "beomi/gemma-mling-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20290939152559653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40675941947154004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37585416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2632978723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json b/data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json deleted file mode 100644 index b5b203959..000000000 --- a/data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Gemma-2-2B-Smart/1762652580.044707", - "retrieved_timestamp": "1762652580.044708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Gemma-2-2B-Smart", - "developer": "google", - "inference_platform": "unknown", - "id": "bunnycore/Gemma-2-2B-Smart", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206625088099574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39742674570492836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4248541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2426030585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json b/data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json deleted file mode 100644 index c9937f1da..000000000 --- a/data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Gemma2-9B-TitanFusion/1762652580.044988", - "retrieved_timestamp": "1762652580.0449889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Gemma2-9B-TitanFusion", - "developer": "google", - "inference_platform": "unknown", - "id": "bunnycore/Gemma2-9B-TitanFusion", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16184169115724056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5712026020785131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json b/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json deleted file mode 100644 index 4d729583e..000000000 --- a/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/1762652580.099224", - "retrieved_timestamp": "1762652580.099225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", - "developer": "google", - "inference_platform": "unknown", - "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2941827683878775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5939369622672414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39257291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37998670212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json b/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json deleted file mode 100644 index 74dccf2fa..000000000 --- a/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1/1762652580.091131", - "retrieved_timestamp": "1762652580.091137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1", - "developer": "google", - "inference_platform": "unknown", - "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30147674836101546 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5971867698707507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39266666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38538896276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/codegemma-1.1-2b/6547b6f3-63dd-4516-b294-62c4246c3dc7.json b/data/hfopenllm_v2/google/codegemma-1.1-2b/6547b6f3-63dd-4516-b294-62c4246c3dc7.json new file mode 100644 index 000000000..262dce559 --- /dev/null +++ b/data/hfopenllm_v2/google/codegemma-1.1-2b/6547b6f3-63dd-4516-b294-62c4246c3dc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_codegemma-1.1-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "codegemma-1.1-2b", + "id": "google/codegemma-1.1-2b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2294 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json b/data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json deleted file mode 100644 index 0662cbd01..000000000 --- a/data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-gemma2-2b/1762652580.115823", - "retrieved_timestamp": "1762652580.115823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08955127949396491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40813187411055213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2105219414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json b/data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json deleted file mode 100644 index 89719ac15..000000000 --- a/data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/djuna_Gemma-2-gemmama-9b/1762652580.12782", - "retrieved_timestamp": "1762652580.127821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "djuna/Gemma-2-gemmama-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "djuna/Gemma-2-gemmama-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7703404743857409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420037856495951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3109208776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json b/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json deleted file mode 100644 index 44798c8c1..000000000 --- a/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-inst/1762652580.137194", - "retrieved_timestamp": "1762652580.137195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dwikitheduck/gemma-2-2b-id-inst", - "developer": "google", - "inference_platform": "unknown", - "id": "dwikitheduck/gemma-2-2b-id-inst", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38785644312646006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39621721241423097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21733710106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json b/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json deleted file mode 100644 index 6d6aa1261..000000000 --- a/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id/1762652580.136933", - "retrieved_timestamp": "1762652580.136933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dwikitheduck/gemma-2-2b-id", - "developer": "google", - "inference_platform": "unknown", - "id": "dwikitheduck/gemma-2-2b-id", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38785644312646006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39621721241423097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21733710106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json b/data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json deleted file mode 100644 index 7dd4b281a..000000000 --- a/data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9B-it-psy10k-mental_health/1762652580.139083", - "retrieved_timestamp": "1762652580.139084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", - "developer": "google", - "inference_platform": "unknown", - "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5886658510529839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5539376944027642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40860416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38289561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json b/data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json deleted file mode 100644 index 79f72dc7a..000000000 --- a/data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9b-it-train6/1762652580.1393359", - "retrieved_timestamp": "1762652580.139337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/Gemma2-9b-it-train6", - "developer": "google", - "inference_platform": "unknown", - "id": "ehristoforu/Gemma2-9b-it-train6", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7025215317579578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5898092579133603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40841666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39419880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json b/data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json deleted file mode 100644 index b48fe8c4c..000000000 --- a/data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ell44ot_gemma-2b-def/1762652580.147274", - "retrieved_timestamp": "1762652580.147275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ell44ot/gemma-2b-def", - "developer": "google", - "inference_platform": "unknown", - "id": "ell44ot/gemma-2b-def", - "additional_details": { - "precision": "float16", - "architecture": "GemmaModel", - "params_billions": 1.546 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26930433472076315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31586532094752634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json b/data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json deleted file mode 100644 index e5838932c..000000000 --- a/data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-t5-base/1762652580.172907", - "retrieved_timestamp": "1762652580.172908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-t5-base", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-base", - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907055501624578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3525980599300322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36711458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13572140957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-t5-base/a58bf2d3-d209-41b8-a795-ba7a16e4a28f.json b/data/hfopenllm_v2/google/flan-t5-base/a58bf2d3-d209-41b8-a795-ba7a16e4a28f.json new file mode 100644 index 000000000..42df32f4d --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-base/a58bf2d3-d209-41b8-a795-ba7a16e4a28f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-t5-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-t5-base", + "id": "google/flan-t5-base", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1891 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-large/b15ad3b5-7ef2-439e-9acd-a85eab520d31.json b/data/hfopenllm_v2/google/flan-t5-large/b15ad3b5-7ef2-439e-9acd-a85eab520d31.json new file mode 100644 index 000000000..12a83a4b2 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-large/b15ad3b5-7ef2-439e-9acd-a85eab520d31.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-t5-large/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-t5-large", + "id": "google/flan-t5-large", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.783 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2201 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4153 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4083 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json b/data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json deleted file mode 100644 index 79a0186fa..000000000 --- a/data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-t5-large/1762652580.173132", - "retrieved_timestamp": "1762652580.1731331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-t5-large", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-large", - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.783 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22009490374428736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41531150356794316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40832291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17087765957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json b/data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json deleted file mode 100644 index 2d8eb105a..000000000 --- a/data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-t5-small/1762652580.173366", - "retrieved_timestamp": "1762652580.173366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-t5-small", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-small", - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.077 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1524255641697363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3282901097640842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1233377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-t5-small/64da2654-9fdb-4a08-ad16-cf8793a30ed8.json b/data/hfopenllm_v2/google/flan-t5-small/64da2654-9fdb-4a08-ad16-cf8793a30ed8.json new file mode 100644 index 000000000..736e0e185 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-small/64da2654-9fdb-4a08-ad16-cf8793a30ed8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-t5-small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-t5-small", + "id": "google/flan-t5-small", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.077 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3283 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1233 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xl/37080215-ee30-4e59-a407-b14695ac2a38.json b/data/hfopenllm_v2/google/flan-t5-xl/37080215-ee30-4e59-a407-b14695ac2a38.json new file mode 100644 index 000000000..5eaa5962e --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-xl/37080215-ee30-4e59-a407-b14695ac2a38.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-t5-xl", + "id": "google/flan-t5-xl", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 2.85 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2237 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4181 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json b/data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json deleted file mode 100644 index 58ea882b2..000000000 --- a/data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.1738272", - "retrieved_timestamp": "1762652580.1738281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-t5-xl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-xl", - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 2.85 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2206944241279804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45372172155693963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634442 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21417885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json b/data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json deleted file mode 100644 index f899cbfae..000000000 --- a/data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.173602", - "retrieved_timestamp": "1762652580.173603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-t5-xl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-xl", - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 2.85 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22374189373085634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45310636062112314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41809375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21467752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-t5-xl/b83a0ce7-bf13-4a98-81f3-04e5a44105f7.json b/data/hfopenllm_v2/google/flan-t5-xl/b83a0ce7-bf13-4a98-81f3-04e5a44105f7.json new file mode 100644 index 000000000..e4219b2cd --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-xl/b83a0ce7-bf13-4a98-81f3-04e5a44105f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-t5-xl", + "id": "google/flan-t5-xl", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 2.85 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4537 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xxl/bb7bea21-5bc6-460d-98ff-b3ed02d5b215.json b/data/hfopenllm_v2/google/flan-t5-xxl/bb7bea21-5bc6-460d-98ff-b3ed02d5b215.json new file mode 100644 index 000000000..e29cc1652 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-xxl/bb7bea21-5bc6-460d-98ff-b3ed02d5b215.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-t5-xxl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-t5-xxl", + "id": "google/flan-t5-xxl", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 11.267 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.22 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4218 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json b/data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json deleted file mode 100644 index 37661faa3..000000000 --- a/data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-t5-xxl/1762652580.174026", - "retrieved_timestamp": "1762652580.174026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-t5-xxl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-xxl", - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 11.267 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2200450360598767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065888015776924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23429188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json b/data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json deleted file mode 100644 index f6a6f92d5..000000000 --- a/data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_flan-ul2/1762652580.174251", - "retrieved_timestamp": "1762652580.174251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/flan-ul2", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-ul2", - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 19.46 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23925406809487715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5053738049125648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24933510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/flan-ul2/da9ddecc-43cf-4055-a19e-795b1ee98826.json b/data/hfopenllm_v2/google/flan-ul2/da9ddecc-43cf-4055-a19e-795b1ee98826.json new file mode 100644 index 000000000..efa59a78c --- /dev/null +++ b/data/hfopenllm_v2/google/flan-ul2/da9ddecc-43cf-4055-a19e-795b1ee98826.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_flan-ul2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flan-ul2", + "id": "google/flan-ul2", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 19.46 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2493 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-1.1-2b-it/a93ccb3f-f2d9-415d-8397-0c7fb765fada.json b/data/hfopenllm_v2/google/gemma-1.1-2b-it/a93ccb3f-f2d9-415d-8397-0c7fb765fada.json new file mode 100644 index 000000000..f13cf1295 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-1.1-2b-it/a93ccb3f-f2d9-415d-8397-0c7fb765fada.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-1.1-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-1.1-2b-it", + "id": "google/gemma-1.1-2b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3067 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1484 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-1.1-7b-it/d0f86765-bdb4-4367-986b-28303bbe1844.json b/data/hfopenllm_v2/google/gemma-1.1-7b-it/d0f86765-bdb4-4367-986b-28303bbe1844.json new file mode 100644 index 000000000..53d0c2f3f --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-1.1-7b-it/d0f86765-bdb4-4367-986b-28303bbe1844.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-1.1-7b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-1.1-7b-it", + "id": "google/gemma-1.1-7b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5039 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-27b-it/693bb191-ae83-49dc-9df1-2f68b1b5fe4a.json b/data/hfopenllm_v2/google/gemma-2-27b-it/693bb191-ae83-49dc-9df1-2f68b1b5fe4a.json new file mode 100644 index 000000000..3ca551306 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-27b-it/693bb191-ae83-49dc-9df1-2f68b1b5fe4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-27b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-27b-it", + "id": "google/gemma-2-27b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7978 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6451 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2387 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4451 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-27b/7b2c0b72-6421-4f33-8593-a4bbfd0c6d6b.json b/data/hfopenllm_v2/google/gemma-2-27b/7b2c0b72-6421-4f33-8593-a4bbfd0c6d6b.json new file mode 100644 index 000000000..b5cad47da --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-27b/7b2c0b72-6421-4f33-8593-a4bbfd0c6d6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-27b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-27b", + "id": "google/gemma-2-27b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1662 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-2b-it/c4ee822f-fc8b-4523-95b6-7c3f12a334b3.json b/data/hfopenllm_v2/google/gemma-2-2b-it/c4ee822f-fc8b-4523-95b6-7c3f12a334b3.json new file mode 100644 index 000000000..9103c1efd --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-2b-it/c4ee822f-fc8b-4523-95b6-7c3f12a334b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-it", + "id": "google/gemma-2-2b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5668 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3929 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-2b-jpn-it/1810033a-185b-4c91-91d3-43b8f6c61443.json b/data/hfopenllm_v2/google/gemma-2-2b-jpn-it/1810033a-185b-4c91-91d3-43b8f6c61443.json new file mode 100644 index 000000000..4fde83e45 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-2b-jpn-it/1810033a-185b-4c91-91d3-43b8f6c61443.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it", + "id": "google/gemma-2-2b-jpn-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5078 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2578 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-2b-jpn-it/beb721ae-a35c-4f6b-a80f-aac4835d5f8d.json b/data/hfopenllm_v2/google/gemma-2-2b-jpn-it/beb721ae-a35c-4f6b-a80f-aac4835d5f8d.json new file mode 100644 index 000000000..1442b3bca --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-2b-jpn-it/beb721ae-a35c-4f6b-a80f-aac4835d5f8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it", + "id": "google/gemma-2-2b-jpn-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5288 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2467 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-2b/cf20e77a-340f-4d8d-b593-9645bdfc5877.json b/data/hfopenllm_v2/google/gemma-2-2b/cf20e77a-340f-4d8d-b593-9645bdfc5877.json new file mode 100644 index 000000000..c8343d88b --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-2b/cf20e77a-340f-4d8d-b593-9645bdfc5877.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b", + "id": "google/gemma-2-2b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2018 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3709 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2217 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-2b/eec73e49-ac2b-42ed-a115-76e45007cd5d.json b/data/hfopenllm_v2/google/gemma-2-2b/eec73e49-ac2b-42ed-a115-76e45007cd5d.json new file mode 100644 index 000000000..96086b370 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-2b/eec73e49-ac2b-42ed-a115-76e45007cd5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b", + "id": "google/gemma-2-2b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1993 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.218 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-9b-it/aa06d058-87f9-4fde-ad53-139b29a71448.json b/data/hfopenllm_v2/google/gemma-2-9b-it/aa06d058-87f9-4fde-ad53-139b29a71448.json new file mode 100644 index 000000000..8f9ad5f6e --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-9b-it/aa06d058-87f9-4fde-ad53-139b29a71448.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it", + "id": "google/gemma-2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.599 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2-9b/3f1d571a-fc42-411b-88ab-4700d5861367.json b/data/hfopenllm_v2/google/gemma-2-9b/3f1d571a-fc42-411b-88ab-4700d5861367.json new file mode 100644 index 000000000..c496a90c9 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2-9b/3f1d571a-fc42-411b-88ab-4700d5861367.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b", + "id": "google/gemma-2-9b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5377 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2b-it/74a56080-aeb2-4cc6-a825-bbe4d9a5900a.json b/data/hfopenllm_v2/google/gemma-2b-it/74a56080-aeb2-4cc6-a825-bbe4d9a5900a.json new file mode 100644 index 000000000..e1752b69f --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2b-it/74a56080-aeb2-4cc6-a825-bbe4d9a5900a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2b-it", + "id": "google/gemma-2b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.269 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-2b/2eb433ba-5c93-4355-99dd-edcb65721603.json b/data/hfopenllm_v2/google/gemma-2b/2eb433ba-5c93-4355-99dd-edcb65721603.json new file mode 100644 index 000000000..3c6b79553 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-2b/2eb433ba-5c93-4355-99dd-edcb65721603.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2b", + "id": "google/gemma-2b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3366 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-7b-it/826fc3ab-6ff8-44fa-a745-a0b80bcb2db4.json b/data/hfopenllm_v2/google/gemma-7b-it/826fc3ab-6ff8-44fa-a745-a0b80bcb2db4.json new file mode 100644 index 000000000..47f028978 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-7b-it/826fc3ab-6ff8-44fa-a745-a0b80bcb2db4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-7b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-7b-it", + "id": "google/gemma-7b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3868 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3646 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4274 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/gemma-7b/6da54964-e3b5-4567-8ce4-7e0f279af84f.json b/data/hfopenllm_v2/google/gemma-7b/6da54964-e3b5-4567-8ce4-7e0f279af84f.json new file mode 100644 index 000000000..0bb149bf8 --- /dev/null +++ b/data/hfopenllm_v2/google/gemma-7b/6da54964-e3b5-4567-8ce4-7e0f279af84f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_gemma-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-7b", + "id": "google/gemma-7b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4362 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4062 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2948 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json b/data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json deleted file mode 100644 index bc697fa88..000000000 --- a/data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_codegemma-1.1-2b/1762652580.172607", - "retrieved_timestamp": "1762652580.172608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/codegemma-1.1-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/codegemma-1.1-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22936253584932426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353417790248454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json b/data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json deleted file mode 100644 index cc746f34f..000000000 --- a/data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-1.1-2b-it/1762652580.1745641", - "retrieved_timestamp": "1762652580.174565", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-1.1-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-1.1-2b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30674831668860847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3184634974814922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14835438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json b/data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json deleted file mode 100644 index bc1e7a49b..000000000 --- a/data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-1.1-7b-it/1762652580.1748302", - "retrieved_timestamp": "1762652580.1748302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-1.1-7b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-1.1-7b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5039107346285633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935297962833251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42302083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2583942819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json b/data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json deleted file mode 100644 index c80663738..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-27b-it/1762652580.17537", - "retrieved_timestamp": "1762652580.175371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-27b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-27b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7977677008116243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6451387433168799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23867069486404835 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4451462765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json b/data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json deleted file mode 100644 index e01cd730a..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-27b/1762652580.175144", - "retrieved_timestamp": "1762652580.175145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-27b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-27b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24752213017017072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642908317482057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43963541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4370844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json b/data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json deleted file mode 100644 index 9cd5ae017..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-it/1762652580.176172", - "retrieved_timestamp": "1762652580.176194", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5668337788179807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41992308914274706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39288541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25498670212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json b/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json deleted file mode 100644 index 8d4aaab7d..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.1767948", - "retrieved_timestamp": "1762652580.176796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-2b-jpn-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b-jpn-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5288401441508531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4178440226217119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2466755319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json b/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json deleted file mode 100644 index e6a7a2434..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.176506", - "retrieved_timestamp": "1762652580.176507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-2b-jpn-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b-jpn-it", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077826832803628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42255698900658106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39638541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2578125 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json b/data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json deleted file mode 100644 index 8de032749..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.175597", - "retrieved_timestamp": "1762652580.1755981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19931226922343825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3655966996422591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21800199468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json b/data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json deleted file mode 100644 index e93f23829..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.1759539", - "retrieved_timestamp": "1762652580.175955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b", - "additional_details": { - "precision": "float16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20176021844262113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3708674612470255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22165890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json b/data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json deleted file mode 100644 index f1a643b09..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-9b-it/1762652580.177257", - "retrieved_timestamp": "1762652580.177258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7435626360279614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5990342504164132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875498670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json b/data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json deleted file mode 100644 index 3e92d5b08..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2-9b/1762652580.177011", - "retrieved_timestamp": "1762652580.177012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20398320899657355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5377373397621884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103224734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json b/data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json deleted file mode 100644 index 949c358fb..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2b-it/1762652580.17777", - "retrieved_timestamp": "1762652580.17777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26902950837112194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31508191988788464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13530585106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json b/data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json deleted file mode 100644 index 0d9409e46..000000000 --- a/data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-2b/1762652580.177512", - "retrieved_timestamp": "1762652580.177513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20375825033134307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33656381705857935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13655252659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json b/data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json deleted file mode 100644 index b47a1e6dc..000000000 --- a/data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-7b-it/1762652580.178242", - "retrieved_timestamp": "1762652580.1782432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-7b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-7b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868324933398937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36459012743300967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42742708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16946476063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json b/data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json deleted file mode 100644 index e2eb0d05e..000000000 --- a/data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_gemma-7b/1762652580.1780128", - "retrieved_timestamp": "1762652580.178014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/gemma-7b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2659321710838353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43615285239286355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4062395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2947972074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json b/data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json deleted file mode 100644 index 2df98c020..000000000 --- a/data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b-it/1762652580.17961", - "retrieved_timestamp": "1762652580.179611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/recurrentgemma-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-2b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 2.683 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2949329999955673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33300047272606553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1402094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json b/data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json deleted file mode 100644 index 80bf1decf..000000000 --- a/data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b/1762652580.179393", - "retrieved_timestamp": "1762652580.179394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/recurrentgemma-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 2.683 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017028151970106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31973582830084474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3445729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11760305851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json b/data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json deleted file mode 100644 index 46738231a..000000000 --- a/data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b-it/1762652580.180049", - "retrieved_timestamp": "1762652580.18005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/recurrentgemma-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 9.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010383560065071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367189649027647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2843251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json b/data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json deleted file mode 100644 index 27edffbc5..000000000 --- a/data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b/1762652580.17984", - "retrieved_timestamp": "1762652580.179841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/recurrentgemma-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 9.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31159434744256354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39562568669428394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2604720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json b/data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json deleted file mode 100644 index 07724e435..000000000 --- a/data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v1-gemma2-9b-it/1762652580.1819131", - "retrieved_timestamp": "1762652580.1819131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Gigantes-v1-gemma2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Gigantes-v1-gemma2-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.692454908531585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597792552822268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42253989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json b/data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json deleted file mode 100644 index 3df799b6c..000000000 --- a/data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v2-gemma2-9b-it/1762652580.182155", - "retrieved_timestamp": "1762652580.182156", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Gigantes-v2-gemma2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Gigantes-v2-gemma2-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7350696152874374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5986559388303995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259474734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json b/data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json deleted file mode 100644 index 425cd3a3e..000000000 --- a/data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v3-gemma2-9b-it/1762652580.182362", - "retrieved_timestamp": "1762652580.1823628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Gigantes-v3-gemma2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Gigantes-v3-gemma2-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.697625633319592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5983513792324827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4608125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json b/data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json deleted file mode 100644 index 4c3691c26..000000000 --- a/data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v1-Gemma2-8k-9B/1762652580.1841059", - "retrieved_timestamp": "1762652580.1841059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v1-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v1-Gemma2-8k-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35308536904302806 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589031767575711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16842900302114805 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46446875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4242021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json b/data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json deleted file mode 100644 index 3953c274d..000000000 --- a/data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-Gemma2-8k-9B/1762652580.184566", - "retrieved_timestamp": "1762652580.184567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v2-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v2-Gemma2-8k-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7384417789243651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6015773428405322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4331781914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json b/data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json deleted file mode 100644 index 4961b205d..000000000 --- a/data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-Gemma2-8k-9B/1762652580.1850398", - "retrieved_timestamp": "1762652580.185041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v3-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v3-Gemma2-8k-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7378422585406721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6015406636327695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4488125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43367686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json b/data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json deleted file mode 100644 index cf26ff3a2..000000000 --- a/data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magot-v1-Gemma2-8k-9B/1762652580.185666", - "retrieved_timestamp": "1762652580.185667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magot-v1-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magot-v1-Gemma2-8k-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29967818720993633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6019447732218105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43367686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json b/data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json deleted file mode 100644 index 26774abe2..000000000 --- a/data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magot-v2-Gemma2-8k-9B/1762652580.185882", - "retrieved_timestamp": "1762652580.1858828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magot-v2-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magot-v2-Gemma2-8k-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347449212533854 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896713649821103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4222905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json deleted file mode 100644 index 25f98ed1b..000000000 --- a/data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2Crono-27B/1762652580.193866", - "retrieved_timestamp": "1762652580.193866", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Gemma2Crono-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2Crono-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7086164709637096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505341690680219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45668749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4632646276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json deleted file mode 100644 index 10902b631..000000000 --- a/data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2SimPO-27B/1762652580.194106", - "retrieved_timestamp": "1762652580.1941068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Gemma2SimPO-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2SimPO-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7222303488078299 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6413158976157102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28172205438066467 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44465625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46417885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json deleted file mode 100644 index 2d4c9982a..000000000 --- a/data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2atlas-27B/1762652580.1943119", - "retrieved_timestamp": "1762652580.194313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Gemma2atlas-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2atlas-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7213560020744957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544960921220462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44453125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4749833776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json deleted file mode 100644 index 5faf800db..000000000 --- a/data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2magnum-27b/1762652580.1945128", - "retrieved_timestamp": "1762652580.194514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Gemma2magnum-27b", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2magnum-27b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5050599077115387 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6199590493843724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47234375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45960771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json b/data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json deleted file mode 100644 index 9db7abddd..000000000 --- a/data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ifable_gemma-2-Ifable-9B/1762652580.225604", - "retrieved_timestamp": "1762652580.225605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ifable/gemma-2-Ifable-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "ifable/gemma-2-Ifable-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2984292787581395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5866115556693244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40525000000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json b/data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json deleted file mode 100644 index 0921a29de..000000000 --- a/data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-2b-it/1762652580.2824588", - "retrieved_timestamp": "1762652580.2824588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "jebish7/gemma-2-2b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12717035244263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43951564907099594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42444791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27152593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json b/data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json deleted file mode 100644 index 20a89a939..000000000 --- a/data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-9b-it/1762652580.282719", - "retrieved_timestamp": "1762652580.28272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/gemma-2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "jebish7/gemma-2-9b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1557467519514887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949210568047724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414311835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json deleted file mode 100644 index 753a24093..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-9B/1762652580.31483", - "retrieved_timestamp": "1762652580.314831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3008772279773224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5931298417725773 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json deleted file mode 100644 index dfa81bb92..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Advanced-9B/1762652580.315091", - "retrieved_timestamp": "1762652580.315092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5515964308036011 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5889067263184956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3760729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4243683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json deleted file mode 100644 index 2987cb417..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Remix-9B/1762652580.31531", - "retrieved_timestamp": "1762652580.3153112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-Remix-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7083416446140685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5892021015046846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42386968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json deleted file mode 100644 index 43f6656b0..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2-9B/1762652580.315539", - "retrieved_timestamp": "1762652580.31554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21362429464930827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765835815625312 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34838541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.422124335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json deleted file mode 100644 index 1c1f91267..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2a-9B/1762652580.315754", - "retrieved_timestamp": "1762652580.315755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v2a-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15946909755005606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518248966271832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31647916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35147938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json deleted file mode 100644 index d3e11ff53..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2f-9B/1762652580.315967", - "retrieved_timestamp": "1762652580.315968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v2f-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37911408396388246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5192845467961766 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3231458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json deleted file mode 100644 index bf2c9cb49..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/1762652580.316169", - "retrieved_timestamp": "1762652580.316169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6601816513517467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5935146853737787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44496874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41963098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json deleted file mode 100644 index 3ad3c7306..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3b-9B/1762652580.3163798", - "retrieved_timestamp": "1762652580.316381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3b-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6809144181881852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5907698162898164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44887499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4204621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json deleted file mode 100644 index a14a3232c..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3i-9B/1762652580.3165948", - "retrieved_timestamp": "1762652580.316596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3i-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203047912871182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625750779805955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663896276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json deleted file mode 100644 index a3b034576..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3j-9B/1762652580.3168168", - "retrieved_timestamp": "1762652580.316818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3j-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4169326276501904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5632286961183511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41339760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json deleted file mode 100644 index e34b39613..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/1762652580.317157", - "retrieved_timestamp": "1762652580.3171608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015474496558022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023627309683861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4580520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json deleted file mode 100644 index 352189aec..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/1762652580.317515", - "retrieved_timestamp": "1762652580.317516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7135123694020753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.598838715496553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json deleted file mode 100644 index 861ec17c1..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4b-9B/1762652580.317803", - "retrieved_timestamp": "1762652580.317804", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4b-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6878338364428604 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6039158192304305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json deleted file mode 100644 index b112af8f5..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4c-9B/1762652580.318075", - "retrieved_timestamp": "1762652580.318076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4c-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6945282960323054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6084319292299174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45278124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43949468085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json deleted file mode 100644 index d24d7631d..000000000 --- a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4d-9B/1762652580.318495", - "retrieved_timestamp": "1762652580.318496", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4d-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7250029920610646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6054158192304304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4541458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345910904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json b/data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json deleted file mode 100644 index 1ecec60e6..000000000 --- a/data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_200_Gemma/1762652580.32272", - "retrieved_timestamp": "1762652580.32272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_200_Gemma", - "developer": "google", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_200_Gemma", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 19.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07051733843978422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449044607726533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16788563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json b/data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json deleted file mode 100644 index 428d9464c..000000000 --- a/data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/1762652580.372597", - "retrieved_timestamp": "1762652580.372598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", - "developer": "google", - "inference_platform": "unknown", - "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902545246612322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36496861927498697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3643854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19872007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json b/data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json deleted file mode 100644 index c6add2894..000000000 --- a/data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_mt5-base/1762652580.178463", - "retrieved_timestamp": "1762652580.178463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/mt5-base", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-base", - "additional_details": { - "precision": "float16", - "architecture": "MT5ForConditionalGeneration", - "params_billions": 0.39 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1645157072124186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28831600228488835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36720833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10696476063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/mt5-base/a7dde688-a0ae-4731-909f-0bef0c6eeba9.json b/data/hfopenllm_v2/google/mt5-base/a7dde688-a0ae-4731-909f-0bef0c6eeba9.json new file mode 100644 index 000000000..3221afa06 --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-base/a7dde688-a0ae-4731-909f-0bef0c6eeba9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_mt5-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mt5-base", + "id": "google/mt5-base", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MT5ForConditionalGeneration", + "params_billions": 0.39 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1645 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2883 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.107 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json b/data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json deleted file mode 100644 index 69e1be77b..000000000 --- a/data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_mt5-small/1762652580.1787279", - "retrieved_timestamp": "1762652580.178729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/mt5-small", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-small", - "additional_details": { - "precision": "float16", - "architecture": "MT5ForConditionalGeneration", - "params_billions": 0.17 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17180968718555653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765842029929075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/mt5-small/eb2a8a60-2240-4b08-9dc3-be0215aa7bfc.json b/data/hfopenllm_v2/google/mt5-small/eb2a8a60-2240-4b08-9dc3-be0215aa7bfc.json new file mode 100644 index 000000000..f462ca442 --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-small/eb2a8a60-2240-4b08-9dc3-be0215aa7bfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_mt5-small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mt5-small", + "id": "google/mt5-small", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MT5ForConditionalGeneration", + "params_billions": 0.17 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2766 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json b/data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json deleted file mode 100644 index 54ee50d77..000000000 --- a/data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_mt5-xl/1762652580.17897", - "retrieved_timestamp": "1762652580.1789708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/mt5-xl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-xl", - "additional_details": { - "precision": "float16", - "architecture": "MT5ForConditionalGeneration", - "params_billions": 3.23 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19596448534333347 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304735837080435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3795208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11195146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/mt5-xl/9b05919f-d7c1-4e04-9dd8-9ae70e0005e6.json b/data/hfopenllm_v2/google/mt5-xl/9b05919f-d7c1-4e04-9dd8-9ae70e0005e6.json new file mode 100644 index 000000000..53e4e6095 --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-xl/9b05919f-d7c1-4e04-9dd8-9ae70e0005e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_mt5-xl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mt5-xl", + "id": "google/mt5-xl", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MT5ForConditionalGeneration", + "params_billions": 3.23 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.196 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json b/data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json deleted file mode 100644 index 797916be3..000000000 --- a/data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_mt5-xxl/1762652580.1791801", - "retrieved_timestamp": "1762652580.1791801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/mt5-xxl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-xxl", - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 11.9 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23575668116154028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2959344159116905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36894791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10887632978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/mt5-xxl/6cd98538-74b6-4ac6-a3ac-9a311cfe47f6.json b/data/hfopenllm_v2/google/mt5-xxl/6cd98538-74b6-4ac6-a3ac-9a311cfe47f6.json new file mode 100644 index 000000000..151ffbc18 --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-xxl/6cd98538-74b6-4ac6-a3ac-9a311cfe47f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_mt5-xxl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mt5-xxl", + "id": "google/mt5-xxl", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 11.9 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2959 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1089 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json b/data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json deleted file mode 100644 index 22b5ca481..000000000 --- a/data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Gemma2-Gutenberg-Doppel-9B/1762652580.378716", - "retrieved_timestamp": "1762652580.378717", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7171094917042337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5870114193661848 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46078125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41273271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json b/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json deleted file mode 100644 index 078cae3f0..000000000 --- a/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-27B/1762652580.384448", - "retrieved_timestamp": "1762652580.3844512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/gemma2-gutenberg-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "nbeerbower/gemma2-gutenberg-27B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29470804133033685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37965683503451614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3727291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19822140957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json b/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json deleted file mode 100644 index 539f7c022..000000000 --- a/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-9B/1762652580.384712", - "retrieved_timestamp": "1762652580.384713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/gemma2-gutenberg-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "nbeerbower/gemma2-gutenberg-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2795948084416016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5950904001490335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192154255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json b/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json deleted file mode 100644 index cc2783d8d..000000000 --- a/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241029_1532/1762652580.4059799", - "retrieved_timestamp": "1762652580.4059808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nhyha/N3N_gemma-2-9b-it_20241029_1532", - "developer": "google", - "inference_platform": "unknown", - "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6751940407008958 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5863124381827675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4593541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json b/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json deleted file mode 100644 index c5b964c55..000000000 --- a/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241110_2026/1762652580.406234", - "retrieved_timestamp": "1762652580.406235", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nhyha/N3N_gemma-2-9b-it_20241110_2026", - "developer": "google", - "inference_platform": "unknown", - "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282829558903709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867149609980419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40201130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json b/data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json deleted file mode 100644 index b9df3656e..000000000 --- a/data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nidum_Nidum-Limitless-Gemma-2B/1762652580.406632", - "retrieved_timestamp": "1762652580.406633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nidum/Nidum-Limitless-Gemma-2B", - "developer": "google", - "inference_platform": "unknown", - "id": "nidum/Nidum-Limitless-Gemma-2B", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24235140538216376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3078801520076317 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11735372340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json b/data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json deleted file mode 100644 index 7b04d5f8c..000000000 --- a/data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-2b-it-ties/1762652580.4097438", - "retrieved_timestamp": "1762652580.409745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/gemma-2-2b-it-ties", - "developer": "google", - "inference_platform": "unknown", - "id": "noname0202/gemma-2-2b-it-ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12657083205893696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42057403060290816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39288541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2560671542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json b/data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json deleted file mode 100644 index ac8fd1db4..000000000 --- a/data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-SimPO/1762652580.454763", - "retrieved_timestamp": "1762652580.4547682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/gemma-2-9b-it-SimPO", - "developer": "google", - "inference_platform": "unknown", - "id": "princeton-nlp/gemma-2-9b-it-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3206857803960159 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5839179923162123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41232291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39752327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json b/data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json deleted file mode 100644 index 7f6868e10..000000000 --- a/data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qq8933_OpenLongCoT-Base-Gemma2-2B/1762652580.488883", - "retrieved_timestamp": "1762652580.488883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qq8933/OpenLongCoT-Base-Gemma2-2B", - "developer": "google", - "inference_platform": "unknown", - "id": "qq8933/OpenLongCoT-Base-Gemma2-2B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.204 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1965141380426158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106362870893106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32225 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1315658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json b/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json deleted file mode 100644 index 627ff2913..000000000 --- a/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491333", - "retrieved_timestamp": "1762652580.491333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7648949232480928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597438766061506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207114361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json b/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json deleted file mode 100644 index 872c6fdeb..000000000 --- a/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491603", - "retrieved_timestamp": "1762652580.491603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28536505361330156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5983926033872208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46065625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4162234042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json deleted file mode 100644 index 9b0e583c6..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.1/1762652580.491797", - "retrieved_timestamp": "1762652580.491798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.751506004069203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995309756292291 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4158909574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json deleted file mode 100644 index 73045fd81..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.4922318", - "retrieved_timestamp": "1762652580.492233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2746989100032359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6030832642626502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json deleted file mode 100644 index f07fed1bd..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.492019", - "retrieved_timestamp": "1762652580.49202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7591745457608035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6025964285724085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41630651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json deleted file mode 100644 index 735f3e0ea..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492416", - "retrieved_timestamp": "1762652580.492416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.3", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.3", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.743937197746424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5992527878628748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072473404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json deleted file mode 100644 index ccacf273a..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492666", - "retrieved_timestamp": "1762652580.492667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.3", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.57607592299543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6019827101058847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46322916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039228723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json deleted file mode 100644 index 43bc30f29..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.4/1762652580.4928808", - "retrieved_timestamp": "1762652580.492882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.4", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2561891337207498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5967285833554881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4726875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4405751329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json deleted file mode 100644 index c939a9334..000000000 --- a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.5/1762652580.4931269", - "retrieved_timestamp": "1762652580.493134", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.5", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.5", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7664186580495308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5981472549925003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41996343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/recurrentgemma-2b-it/b0ca2dec-387f-4b27-9adb-772af1899832.json b/data/hfopenllm_v2/google/recurrentgemma-2b-it/b0ca2dec-387f-4b27-9adb-772af1899832.json new file mode 100644 index 000000000..a93defb31 --- /dev/null +++ b/data/hfopenllm_v2/google/recurrentgemma-2b-it/b0ca2dec-387f-4b27-9adb-772af1899832.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recurrentgemma-2b-it", + "id": "google/recurrentgemma-2b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 2.683 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.333 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recurrentgemma-2b/53c4b397-b78e-4699-a01e-3535aa072225.json b/data/hfopenllm_v2/google/recurrentgemma-2b/53c4b397-b78e-4699-a01e-3535aa072225.json new file mode 100644 index 000000000..ea66c049b --- /dev/null +++ b/data/hfopenllm_v2/google/recurrentgemma-2b/53c4b397-b78e-4699-a01e-3535aa072225.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recurrentgemma-2b", + "id": "google/recurrentgemma-2b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 2.683 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1176 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recurrentgemma-9b-it/f5b251f0-741c-4ad5-ab04-19c5202854ea.json b/data/hfopenllm_v2/google/recurrentgemma-9b-it/f5b251f0-741c-4ad5-ab04-19c5202854ea.json new file mode 100644 index 000000000..001b78f3b --- /dev/null +++ b/data/hfopenllm_v2/google/recurrentgemma-9b-it/f5b251f0-741c-4ad5-ab04-19c5202854ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recurrentgemma-9b-it", + "id": "google/recurrentgemma-9b-it", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 9.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.501 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4367 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2843 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recurrentgemma-9b/7b2ba13a-e01d-4442-9abe-d16df1a1668a.json b/data/hfopenllm_v2/google/recurrentgemma-9b/7b2ba13a-e01d-4442-9abe-d16df1a1668a.json new file mode 100644 index 000000000..549f07884 --- /dev/null +++ b/data/hfopenllm_v2/google/recurrentgemma-9b/7b2ba13a-e01d-4442-9abe-d16df1a1668a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recurrentgemma-9b", + "id": "google/recurrentgemma-9b", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 9.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3116 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3956 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3803 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2605 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json b/data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json deleted file mode 100644 index 04a64eacd..000000000 --- a/data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sequelbox_gemma-2-9B-MOTH/1762652580.5126731", - "retrieved_timestamp": "1762652580.512674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sequelbox/gemma-2-9B-MOTH", - "developer": "google", - "inference_platform": "unknown", - "id": "sequelbox/gemma-2-9B-MOTH", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20588150551647405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30797000521562534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11402925531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json b/data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json deleted file mode 100644 index e25989ae0..000000000 --- a/data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_switch-base-8/1762652580.180255", - "retrieved_timestamp": "1762652580.180256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/switch-base-8", - "developer": "google", - "inference_platform": "unknown", - "id": "google/switch-base-8", - "additional_details": { - "precision": "float16", - "architecture": "SwitchTransformersForConditionalGeneration", - "params_billions": 0.62 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15852050337548815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28763132730669333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35173958333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/switch-base-8/bf79f87c-3f14-49e8-acba-725e709d5f11.json b/data/hfopenllm_v2/google/switch-base-8/bf79f87c-3f14-49e8-acba-725e709d5f11.json new file mode 100644 index 000000000..1bc6517df --- /dev/null +++ b/data/hfopenllm_v2/google/switch-base-8/bf79f87c-3f14-49e8-acba-725e709d5f11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_switch-base-8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "switch-base-8", + "id": "google/switch-base-8", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "SwitchTransformersForConditionalGeneration", + "params_billions": 0.62 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2876 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3517 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/umt5-base/3fbac7d4-cbbb-4b77-9db4-fd7e122cc90e.json b/data/hfopenllm_v2/google/umt5-base/3fbac7d4-cbbb-4b77-9db4-fd7e122cc90e.json new file mode 100644 index 000000000..ee9655c66 --- /dev/null +++ b/data/hfopenllm_v2/google/umt5-base/3fbac7d4-cbbb-4b77-9db4-fd7e122cc90e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/google_umt5-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "umt5-base", + "id": "google/umt5-base", + "developer": "google", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "UMT5ForConditionalGeneration", + "params_billions": -1.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1746 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2788 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1078 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json b/data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json deleted file mode 100644 index 4d08697c4..000000000 --- a/data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/google_umt5-base/1762652580.180466", - "retrieved_timestamp": "1762652580.180467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "google/umt5-base", - "developer": "google", - "inference_platform": "unknown", - "id": "google/umt5-base", - "additional_details": { - "precision": "float16", - "architecture": "UMT5ForConditionalGeneration", - "params_billions": -1.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.174632198123202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27877262328945457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10779587765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json b/data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json deleted file mode 100644 index 36c4c761d..000000000 --- a/data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/wzhouad_gemma-2-9b-it-WPO-HB/1762652580.596365", - "retrieved_timestamp": "1762652580.5963662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "wzhouad/gemma-2-9b-it-WPO-HB", - "developer": "google", - "inference_platform": "unknown", - "id": "wzhouad/gemma-2-9b-it-WPO-HB", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437029304467702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628624376751974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33602061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json deleted file mode 100644 index bffbf304b..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/1762652580.609323", - "retrieved_timestamp": "1762652580.609324", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5218209905273563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414688942270627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24609375 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json deleted file mode 100644 index cc035870e..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/1762652580.6090298", - "retrieved_timestamp": "1762652580.609031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630945890237902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4052902505118913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23445811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json deleted file mode 100644 index 326fd12f9..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/1762652580.609858", - "retrieved_timestamp": "1762652580.609859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.505484337114412 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38123590457353557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2282247340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json deleted file mode 100644 index 6e24e7599..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/1762652580.6102881", - "retrieved_timestamp": "1762652580.6102889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30647349033896726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40715971926711275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39691666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2249002659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json deleted file mode 100644 index d8ac068a2..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/1762652580.610075", - "retrieved_timestamp": "1762652580.610076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47478468242042227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38979797271028965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37676041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21908244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json deleted file mode 100644 index 62d33fd73..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17/1762652580.609628", - "retrieved_timestamp": "1762652580.609628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081572449988254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40762664531580056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37006249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2455119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json deleted file mode 100644 index 51cc6e8b2..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/1762652580.610698", - "retrieved_timestamp": "1762652580.610699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47423502972113984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40389353402379324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21850066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json deleted file mode 100644 index 463bef961..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18/1762652580.610494", - "retrieved_timestamp": "1762652580.610495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175246124726836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4132188791645781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37415624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25049867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json deleted file mode 100644 index 48ea972ec..000000000 --- a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-24/1762652580.610902", - "retrieved_timestamp": "1762652580.610903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-24", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49786566310722213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41096027770392857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2473404255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json b/data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json deleted file mode 100644 index ea788f4f8..000000000 --- a/data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-9b-it-chinese-kyara/1762652580.612564", - "retrieved_timestamp": "1762652580.612565", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zake7749/gemma-2-9b-it-chinese-kyara", - "developer": "google", - "inference_platform": "unknown", - "id": "zake7749/gemma-2-9b-it-chinese-kyara", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17642965110351644 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5953692987878404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4241979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41788563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json b/data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json deleted file mode 100644 index bafa6fca6..000000000 --- a/data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_Gemma-2-TM-9B/1762652580.612811", - "retrieved_timestamp": "1762652580.612811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/Gemma-2-TM-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Gemma-2-TM-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8044621604010691 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5986592993557701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41523958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40882646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json b/data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json deleted file mode 100644 index f778b27c8..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen1-gemma-2-9B/1762652580.613055", - "retrieved_timestamp": "1762652580.613056", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7886252920029965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099997385328262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json b/data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json deleted file mode 100644 index a9e136f62..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-GI-gemma-2-9B/1762652580.613308", - "retrieved_timestamp": "1762652580.613309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen2-GI-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen2-GI-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7913979352562313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095558882654465 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42832291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43558843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json b/data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json deleted file mode 100644 index 64cdb676f..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-gemma-2-9B/1762652580.613527", - "retrieved_timestamp": "1762652580.613528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7907485471881275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100494662695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json b/data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json deleted file mode 100644 index ea25bb621..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen3-gemma-2-9B/1762652580.613742", - "retrieved_timestamp": "1762652580.613743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8020142111818863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6097112889343964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43558843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json b/data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json deleted file mode 100644 index 75667b592..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen4-gemma-2-9B/1762652580.613958", - "retrieved_timestamp": "1762652580.6139588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7883005979689446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6109884725351095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json b/data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json deleted file mode 100644 index df4945fa3..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen5-gemma-2-9B/1762652580.614163", - "retrieved_timestamp": "1762652580.614163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7923221496739761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6132787046647334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42016666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4402426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json b/data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json deleted file mode 100644 index 207b7c42f..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6-gemma-2-9B/1762652580.614364", - "retrieved_timestamp": "1762652580.6143649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen6-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1615668648075994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5844669261858688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4165558510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json b/data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json deleted file mode 100644 index 723260a35..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6fix-gemma-2-9B/1762652580.614617", - "retrieved_timestamp": "1762652580.614618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen6fix-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen6fix-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15759518078697854 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5917309697578781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40841666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119847074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json b/data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json deleted file mode 100644 index 9b53cc64d..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen7-gemma-2-9B/1762652580.614857", - "retrieved_timestamp": "1762652580.614858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Gen7-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen7-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16641289556155447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5935242633580781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json b/data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json deleted file mode 100644 index d96652666..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/1762652580.6150799", - "retrieved_timestamp": "1762652580.615081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7907485471881275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6142243374633075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json b/data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json deleted file mode 100644 index 76dc89c60..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge-gemma-2-9B/1762652580.615297", - "retrieved_timestamp": "1762652580.615297", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8035379459833243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118379158679297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43617021276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json b/data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json deleted file mode 100644 index 523459c64..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge1-gemma-2-9B/1762652580.615506", - "retrieved_timestamp": "1762652580.615506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7901490268044344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099997385328262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22885196374622357 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4243854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43741688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json b/data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json deleted file mode 100644 index f122bcd94..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/1762652580.615718", - "retrieved_timestamp": "1762652580.615718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7955945779420825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.60838922159878 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43222916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437250664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json b/data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json deleted file mode 100644 index bcc78f67b..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-gemma-2-9B/1762652580.615932", - "retrieved_timestamp": "1762652580.615933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7877010775852515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106681877306871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json b/data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json deleted file mode 100644 index 7e7a917de..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge3-gemma-2-9B/1762652580.6161401", - "retrieved_timestamp": "1762652580.616141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7858526487497617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6102112889343964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4373337765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json b/data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json deleted file mode 100644 index 7c3721bc3..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge4-gemma-2-9B/1762652580.616342", - "retrieved_timestamp": "1762652580.616342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807317916461656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118218058684427 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42943749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43899601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json b/data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json deleted file mode 100644 index 7e66d2a8f..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge5-gemma-2-9B/1762652580.616543", - "retrieved_timestamp": "1762652580.616544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7843787816327346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6122674386670167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42813541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json b/data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json deleted file mode 100644 index d4e00a85a..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge6-gemma-2-9B/1762652580.6167512", - "retrieved_timestamp": "1762652580.6167512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-Merge6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge6-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16946036516443036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949106849534558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41148603723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json b/data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json deleted file mode 100644 index 395c1a04a..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT-gemma-2-9B/1762652580.616956", - "retrieved_timestamp": "1762652580.616957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7968434863938794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6063604478633632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40711458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42237367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json deleted file mode 100644 index 675035b27..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen1-gemma-2-9B/1762652580.617173", - "retrieved_timestamp": "1762652580.617174", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7974430067775724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6117787046647335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json deleted file mode 100644 index c51d3e402..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen2-gemma-2-9B/1762652580.617375", - "retrieved_timestamp": "1762652580.617376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7983672211953173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095989894691557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42835416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43550531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json deleted file mode 100644 index 6df5ae028..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen3-gemma-2-9B/1762652580.617578", - "retrieved_timestamp": "1762652580.617579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.795969139660545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6101551392017761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json deleted file mode 100644 index fd608d2fc..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen4-gemma-2-9B/1762652580.617781", - "retrieved_timestamp": "1762652580.617782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7941207108250552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6057567677609054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42860704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json deleted file mode 100644 index bb84e0c95..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/1762652580.617982", - "retrieved_timestamp": "1762652580.6179829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7929216700576691 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6000001533684681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42179188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json deleted file mode 100644 index 4c7b7a223..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-gemma-2-9B/1762652580.618199", - "retrieved_timestamp": "1762652580.6182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7794828831943688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017455017631886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json deleted file mode 100644 index 55d51f2a9..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen6-gemma-2-9B/1762652580.618452", - "retrieved_timestamp": "1762652580.618453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen6-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16336542595867853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5943545352208355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40444791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4133144946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json deleted file mode 100644 index 95410eb5a..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen7-gemma-2-9B/1762652580.6186602", - "retrieved_timestamp": "1762652580.6186612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Gen7-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen7-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16336542595867853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5937953240176393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json b/data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json deleted file mode 100644 index 02b7cc373..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/1762652580.618859", - "retrieved_timestamp": "1762652580.61886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7928718023732585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6122674386670167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4255 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json b/data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json deleted file mode 100644 index 29ff10a58..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-gemma-2-9B/1762652580.619083", - "retrieved_timestamp": "1762652580.6190841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7946703635243377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108745950756924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43222916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4357546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json deleted file mode 100644 index eb86159b9..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen1-gemma-2-9B/1762652580.619495", - "retrieved_timestamp": "1762652580.619499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7855778224001206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100802027920743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4376662234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json deleted file mode 100644 index aed53bb3a..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen2-gemma-2-9B/1762652580.6198761", - "retrieved_timestamp": "1762652580.619877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889001183526376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6092917531936446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43882978723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json deleted file mode 100644 index 2afc82b82..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen3-gemma-2-9B/1762652580.620111", - "retrieved_timestamp": "1762652580.620112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7810066179958066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6104772065373926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43741688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json deleted file mode 100644 index 903b15061..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen4-gemma-2-9B/1762652580.620331", - "retrieved_timestamp": "1762652580.620331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7895993741051521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.609655139201776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41254166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43209773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json deleted file mode 100644 index e09277745..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen5-gemma-2-9B/1762652580.6205592", - "retrieved_timestamp": "1762652580.6205592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7749116787900548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6063933817527739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42441666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43018617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json deleted file mode 100644 index 76d7b77c8..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen6-gemma-2-9B/1762652580.620769", - "retrieved_timestamp": "1762652580.620769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen6-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16641289556155447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595964957637105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41371874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42096077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json deleted file mode 100644 index d4123874a..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen7-gemma-2-9B/1762652580.621203", - "retrieved_timestamp": "1762652580.621205", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Gen7-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen7-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078922830693557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42032291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4311003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json b/data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json deleted file mode 100644 index bf6ca6dd0..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/1762652580.6214652", - "retrieved_timestamp": "1762652580.6214678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7901490268044344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108461203950706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42283333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json b/data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json deleted file mode 100644 index ab20bb74b..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-gemma-2-9B/1762652580.6217349", - "retrieved_timestamp": "1762652580.621736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7885754243185858 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.611511004530543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43683510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json deleted file mode 100644 index daa9612e6..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen1-gemma-2-9B/1762652580.6219652", - "retrieved_timestamp": "1762652580.6219661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7837792612490415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106760932030332 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41511458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43267952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json deleted file mode 100644 index c6ea66b4d..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen2-gemma-2-9B/1762652580.622196", - "retrieved_timestamp": "1762652580.622197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7843289139483238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6091473194676166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43326130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json deleted file mode 100644 index a2cc6a597..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen3-gemma-2-9B/1762652580.622438", - "retrieved_timestamp": "1762652580.622439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7856276900845313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6088892215987798 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json deleted file mode 100644 index b7b4d4638..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen4-gemma-2-9B/1762652580.6226869", - "retrieved_timestamp": "1762652580.622689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7737126380226687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100843629460684 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20619335347432025 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json deleted file mode 100644 index 8b59e7ede..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B/1762652580.622956", - "retrieved_timestamp": "1762652580.622956", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7990166092634211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098615465467813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43168218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json deleted file mode 100644 index 66cbf675b..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B_v1/1762652580.623179", - "retrieved_timestamp": "1762652580.623179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen5-gemma-2-9B_v1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen5-gemma-2-9B_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7996161296471141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6113330718661595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359208776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json deleted file mode 100644 index 41d861084..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen6-gemma-2-9B/1762652580.623395", - "retrieved_timestamp": "1762652580.623395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen6-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6020072592121909 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4125729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41023936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json b/data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json deleted file mode 100644 index 20ca5fba4..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/1762652580.623601", - "retrieved_timestamp": "1762652580.623602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6123461203950705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42546875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4389128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json b/data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json deleted file mode 100644 index 6b84acf83..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-gemma-2-9B/1762652580.623819", - "retrieved_timestamp": "1762652580.62382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7786085364610345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.61307842026088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4242916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43267952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json deleted file mode 100644 index d0dd878ee..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen1-gemma-2-9B/1762652580.624031", - "retrieved_timestamp": "1762652580.624032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7894996387363307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6093827996028333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43222916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4389128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json deleted file mode 100644 index 3210237e8..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen2-gemma-2-9B/1762652580.6242292", - "retrieved_timestamp": "1762652580.62423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108348543973539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367519946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json deleted file mode 100644 index ced7a16df..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen3-gemma-2-9B/1762652580.624489", - "retrieved_timestamp": "1762652580.62449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7840540875986826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6087112889343964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json deleted file mode 100644 index 78ed13256..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen4-gemma-2-9B/1762652580.6246998", - "retrieved_timestamp": "1762652580.624701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7874262512356104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076031496231499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42435416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323470744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json deleted file mode 100644 index 5640d7375..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen5-gemma-2-9B/1762652580.6249092", - "retrieved_timestamp": "1762652580.62491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7788833628106757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106664051994928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42683333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43841422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json b/data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json deleted file mode 100644 index b5aba8355..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/1762652580.625107", - "retrieved_timestamp": "1762652580.625107", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1770790391716202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6120127870617372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json b/data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json deleted file mode 100644 index 5f6c239c0..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-gemma-2-9B/1762652580.62533", - "retrieved_timestamp": "1762652580.625331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7761605872418517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.607313601341302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43092708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43658577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json deleted file mode 100644 index 1f9f0ae73..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen1-gemma-2-9B/1762652580.625538", - "retrieved_timestamp": "1762652580.625539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen1-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7831298731809377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6110476837383056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43683510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json deleted file mode 100644 index 7f1b1e6a6..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen2-gemma-2-9B/1762652580.625738", - "retrieved_timestamp": "1762652580.625739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7962439660101863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.610541261742359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41629166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json deleted file mode 100644 index 2fcce744c..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen3-gemma-2-9B/1762652580.625941", - "retrieved_timestamp": "1762652580.625942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen3-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6090494662695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42305208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json deleted file mode 100644 index 34c801cb6..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen4-gemma-2-9B/1762652580.62615", - "retrieved_timestamp": "1762652580.6261508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen4-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834545672149895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131056160021203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42283333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4396609042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json deleted file mode 100644 index ee82549d7..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen5-gemma-2-9B/1762652580.6263602", - "retrieved_timestamp": "1762652580.6263611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7947202312087482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6111664051994928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43292885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json b/data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json deleted file mode 100644 index 84ff8ab38..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/1762652580.62657", - "retrieved_timestamp": "1762652580.62657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6126794537284038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43899601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json b/data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json deleted file mode 100644 index eb92a008e..000000000 --- a/data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-gemma-2-9B/1762652580.6267788", - "retrieved_timestamp": "1762652580.6267798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MT5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8047868544351211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6112225549321132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json b/data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json deleted file mode 100644 index e8f3218ac..000000000 --- a/data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MTM-Merge-gemma-2-9B/1762652580.626984", - "retrieved_timestamp": "1762652580.626985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MTM-Merge-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MTM-Merge-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7798075772284205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6133348543973538 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43882978723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json b/data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json deleted file mode 100644 index 1d8c462ce..000000000 --- a/data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/1762652580.627192", - "retrieved_timestamp": "1762652580.627192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17860277397305815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6116794537284039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42410416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json b/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json deleted file mode 100644 index 913dae30c..000000000 --- a/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/1762652580.627404", - "retrieved_timestamp": "1762652580.627404", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7496575752337131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6069712638522043 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43092708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json b/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json deleted file mode 100644 index 9c710ddfc..000000000 --- a/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/1762652580.627618", - "retrieved_timestamp": "1762652580.627619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7646200968984517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6097862253440982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20694864048338368 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43467420212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json b/data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json deleted file mode 100644 index d861a6c40..000000000 --- a/data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4MT4g2-gemma-2-9B/1762652580.627839", - "retrieved_timestamp": "1762652580.62784", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/Rv0.4MT4g2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Rv0.4MT4g2-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7320221456845614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.604119644415618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44173869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json b/data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json deleted file mode 100644 index fb37dcb9c..000000000 --- a/data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_T31122024203920-gemma-2-9B/1762652580.628056", - "retrieved_timestamp": "1762652580.628057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/T31122024203920-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/T31122024203920-gemma-2-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095634089448112 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437250664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json b/data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json deleted file mode 100644 index e223a7417..000000000 --- a/data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054t0.5_gemma-2/1762652580.628514", - "retrieved_timestamp": "1762652580.628514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/Test01012025155054t0.5_gemma-2", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Test01012025155054t0.5_gemma-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.817 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555229014570229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28295044895258115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10904255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json b/data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json deleted file mode 100644 index 25fe01838..000000000 --- a/data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_gemma-2-S2MTM-9B/1762652580.628712", - "retrieved_timestamp": "1762652580.628713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/gemma-2-S2MTM-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/gemma-2-S2MTM-9B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7822555264476034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6060836790982922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296875 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json deleted file mode 100644 index 4d9db37e6..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/1762652580.630025", - "retrieved_timestamp": "1762652580.630029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7706651684197928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6075432245295168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43226041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json deleted file mode 100644 index 8d92108fe..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/1762652580.630381", - "retrieved_timestamp": "1762652580.630382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7208063493752133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995203934792884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json deleted file mode 100644 index a4456481f..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/1762652580.628911", - "retrieved_timestamp": "1762652580.6289122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7648949232480928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6074511952177571 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43209773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json deleted file mode 100644 index d65491692..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/1762652580.6306539", - "retrieved_timestamp": "1762652580.6306539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.759999024809727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066260664115647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43226396276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json deleted file mode 100644 index 1a9ec9576..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/1762652580.630864", - "retrieved_timestamp": "1762652580.6308649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7615227596111651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098779556010631 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43102083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4315159574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json deleted file mode 100644 index 45762f3bc..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/1762652580.6310751", - "retrieved_timestamp": "1762652580.631076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7943955371746965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064399292200404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42022916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323470744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json deleted file mode 100644 index 413e927e1..000000000 --- a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/1762652580.631496", - "retrieved_timestamp": "1762652580.631499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597759349920723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42946875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41805186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/goulue5/merging_LLM/6efd0dbd-b8c1-4c66-bdf7-19055c16ca22.json b/data/hfopenllm_v2/goulue5/merging_LLM/6efd0dbd-b8c1-4c66-bdf7-19055c16ca22.json new file mode 100644 index 000000000..384e12c7d --- /dev/null +++ b/data/hfopenllm_v2/goulue5/merging_LLM/6efd0dbd-b8c1-4c66-bdf7-19055c16ca22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/goulue5_merging_LLM/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merging_LLM", + "id": "goulue5/merging_LLM", + "developer": "goulue5", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3233 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2958 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json b/data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json deleted file mode 100644 index b5db5ffef..000000000 --- a/data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/goulue5_merging_LLM/1762652580.1806688", - "retrieved_timestamp": "1762652580.18067", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "goulue5/merging_LLM", - "developer": "goulue5", - "inference_platform": "unknown", - "id": "goulue5/merging_LLM", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32326006108237254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216498611590102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29579454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/1388b8d4-c711-480c-8a06-a8b7bd8aa79c.json b/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/1388b8d4-c711-480c-8a06-a8b7bd8aa79c.json new file mode 100644 index 000000000..62b5a87b9 --- /dev/null +++ b/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/1388b8d4-c711-480c-8a06-a8b7bd8aa79c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gradientai_Llama-3-8B-Instruct-Gradient-1048k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-Gradient-1048k", + "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", + "developer": "gradientai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json b/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json deleted file mode 100644 index 4aee94ca5..000000000 --- a/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gradientai_Llama-3-8B-Instruct-Gradient-1048k/1762652580.181334", - "retrieved_timestamp": "1762652580.181335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", - "developer": "gradientai", - "inference_platform": "unknown", - "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4455588948434598 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345903107069573 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29404920212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/03393ffd-1923-4767-ba14-d0e3e6751842.json b/data/hfopenllm_v2/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/03393ffd-1923-4767-ba14-d0e3e6751842.json new file mode 100644 index 000000000..ff2605288 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/03393ffd-1923-4767-ba14-d0e3e6751842.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", + "id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4797 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2221 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3957 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Gigantes-v1-gemma2-9b-it/b7d049dc-127d-4075-8067-22adac9a58c3.json b/data/hfopenllm_v2/grimjim/Gigantes-v1-gemma2-9b-it/b7d049dc-127d-4075-8067-22adac9a58c3.json new file mode 100644 index 000000000..fac07281a --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Gigantes-v1-gemma2-9b-it/b7d049dc-127d-4075-8067-22adac9a58c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v1-gemma2-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gigantes-v1-gemma2-9b-it", + "id": "grimjim/Gigantes-v1-gemma2-9b-it", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Gigantes-v2-gemma2-9b-it/89d79024-f4b8-4165-bd88-47f2b0010800.json b/data/hfopenllm_v2/grimjim/Gigantes-v2-gemma2-9b-it/89d79024-f4b8-4165-bd88-47f2b0010800.json new file mode 100644 index 000000000..b451c7250 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Gigantes-v2-gemma2-9b-it/89d79024-f4b8-4165-bd88-47f2b0010800.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v2-gemma2-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gigantes-v2-gemma2-9b-it", + "id": "grimjim/Gigantes-v2-gemma2-9b-it", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7351 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Gigantes-v3-gemma2-9b-it/d2c0fb0d-6c0c-464a-b09f-6382a57b6afb.json b/data/hfopenllm_v2/grimjim/Gigantes-v3-gemma2-9b-it/d2c0fb0d-6c0c-464a-b09f-6382a57b6afb.json new file mode 100644 index 000000000..918da3c13 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Gigantes-v3-gemma2-9b-it/d2c0fb0d-6c0c-464a-b09f-6382a57b6afb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v3-gemma2-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gigantes-v3-gemma2-9b-it", + "id": "grimjim/Gigantes-v3-gemma2-9b-it", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6976 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4608 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/a891b28a-2dcc-4b8e-ad20-1f23d663b44b.json b/data/hfopenllm_v2/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/a891b28a-2dcc-4b8e-ad20-1f23d663b44b.json new file mode 100644 index 000000000..6204ca20a --- /dev/null +++ b/data/hfopenllm_v2/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/a891b28a-2dcc-4b8e-ad20-1f23d663b44b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuatuoSkywork-o1-Llama-3.1-8B", + "id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3839 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/55e274bb-1e2c-4402-b7ae-09ff7b1f9738.json b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/55e274bb-1e2c-4402-b7ae-09ff7b1f9738.json new file mode 100644 index 000000000..bd55b41d5 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/55e274bb-1e2c-4402-b7ae-09ff7b1f9738.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", + "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4271 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3625 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json deleted file mode 100644 index 25fc18d08..000000000 --- a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/1762652580.1827798", - "retrieved_timestamp": "1762652580.182781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42712447417297217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4961694535006833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40432291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3625332446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json deleted file mode 100644 index af0750ae5..000000000 --- a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/1762652580.183053", - "retrieved_timestamp": "1762652580.183053", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6805897241541332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021734091176594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/fe7a6940-fc4c-4345-84be-609c8155be57.json b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/fe7a6940-fc4c-4345-84be-609c8155be57.json new file mode 100644 index 000000000..c0a6840ce --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/fe7a6940-fc4c-4345-84be-609c8155be57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", + "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6806 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5022 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json b/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json deleted file mode 100644 index 65b4f4396..000000000 --- a/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/1762652580.183267", - "retrieved_timestamp": "1762652580.183268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48695018107510296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510526564708187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/77eb2b0f-e3e3-474c-bb02-dabde2998ef0.json b/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/77eb2b0f-e3e3-474c-bb02-dabde2998ef0.json new file mode 100644 index 000000000..b33246650 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/77eb2b0f-e3e3-474c-bb02-dabde2998ef0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Instruct-abliterated_via_adapter", + "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json b/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json deleted file mode 100644 index 2fc15968c..000000000 --- a/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/1762652580.1834722", - "retrieved_timestamp": "1762652580.1834729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500121898784116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286855891530357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4235104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/94d744be-5d28-490a-ba9a-8440cb97dce9.json b/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/94d744be-5d28-490a-ba9a-8440cb97dce9.json new file mode 100644 index 000000000..3c3c836ca --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/94d744be-5d28-490a-ba9a-8440cb97dce9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Bonsaikraft-8B-Instruct", + "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/2765061e-7506-4eb6-b63f-312f6290665a.json b/data/hfopenllm_v2/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/2765061e-7506-4eb6-b63f-312f6290665a.json new file mode 100644 index 000000000..ee870fb1c --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/2765061e-7506-4eb6-b63f-312f6290665a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Nephilim-Metamorphosis-v2-8B", + "id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/167c937c-66c7-45a8-bbd9-97d98531bf7d.json b/data/hfopenllm_v2/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/167c937c-66c7-45a8-bbd9-97d98531bf7d.json new file mode 100644 index 000000000..7a0c41480 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/167c937c-66c7-45a8-bbd9-97d98531bf7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", + "id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3999 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v1-Gemma2-8k-9B/9587c35c-1def-46e7-8642-7acb0340be5e.json b/data/hfopenllm_v2/grimjim/Magnolia-v1-Gemma2-8k-9B/9587c35c-1def-46e7-8642-7acb0340be5e.json new file mode 100644 index 000000000..51f562ec4 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v1-Gemma2-8k-9B/9587c35c-1def-46e7-8642-7acb0340be5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v1-Gemma2-8k-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v1-Gemma2-8k-9B", + "id": "grimjim/Magnolia-v1-Gemma2-8k-9B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5589 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1684 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/1c9594fe-03d6-4ec1-9da5-99960da0dcd4.json b/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/1c9594fe-03d6-4ec1-9da5-99960da0dcd4.json new file mode 100644 index 000000000..4765d94c9 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/1c9594fe-03d6-4ec1-9da5-99960da0dcd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v2-12B", + "id": "grimjim/Magnolia-v2-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3506 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3601 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json b/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json deleted file mode 100644 index 3532b594d..000000000 --- a/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-12B/1762652580.184318", - "retrieved_timestamp": "1762652580.184319", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v2-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v2-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3506119318962575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290279354217235 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41712499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v2-Gemma2-8k-9B/8ed2c4eb-bc72-4dde-a559-1afd1698d37d.json b/data/hfopenllm_v2/grimjim/Magnolia-v2-Gemma2-8k-9B/8ed2c4eb-bc72-4dde-a559-1afd1698d37d.json new file mode 100644 index 000000000..3274bce78 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v2-Gemma2-8k-9B/8ed2c4eb-bc72-4dde-a559-1afd1698d37d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-Gemma2-8k-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v2-Gemma2-8k-9B", + "id": "grimjim/Magnolia-v2-Gemma2-8k-9B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7384 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6016 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json b/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json deleted file mode 100644 index 31dbaec6e..000000000 --- a/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-12B/1762652580.184813", - "retrieved_timestamp": "1762652580.184814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v3-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v3-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39649906692021614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5326669270363916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/a2f9536a-9266-4aee-be90-d04f4dcbe53c.json b/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/a2f9536a-9266-4aee-be90-d04f4dcbe53c.json new file mode 100644 index 000000000..0565ceb88 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/a2f9536a-9266-4aee-be90-d04f4dcbe53c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v3-12B", + "id": "grimjim/Magnolia-v3-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v3-Gemma2-8k-9B/7f116aaa-3880-4e53-948a-4b06e0d26cff.json b/data/hfopenllm_v2/grimjim/Magnolia-v3-Gemma2-8k-9B/7f116aaa-3880-4e53-948a-4b06e0d26cff.json new file mode 100644 index 000000000..5518b4113 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v3-Gemma2-8k-9B/7f116aaa-3880-4e53-948a-4b06e0d26cff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-Gemma2-8k-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v3-Gemma2-8k-9B", + "id": "grimjim/Magnolia-v3-Gemma2-8k-9B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6015 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2319 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/7cbe4516-2be2-421b-95f4-c9500ad64ca5.json b/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/7cbe4516-2be2-421b-95f4-c9500ad64ca5.json new file mode 100644 index 000000000..53c5ccbfc --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/7cbe4516-2be2-421b-95f4-c9500ad64ca5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v4-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v4-12B", + "id": "grimjim/Magnolia-v4-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json b/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json deleted file mode 100644 index baa4dc02c..000000000 --- a/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v4-12B/1762652580.18525", - "retrieved_timestamp": "1762652580.185251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v4-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v4-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34179421712168156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5430894084668724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42112499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671875 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/07df565a-bc30-4a9d-b472-7a85f35938be.json b/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/07df565a-bc30-4a9d-b472-7a85f35938be.json new file mode 100644 index 000000000..edba3c4ff --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/07df565a-bc30-4a9d-b472-7a85f35938be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v5a-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magnolia-v5a-12B", + "id": "grimjim/Magnolia-v5a-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3601 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json b/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json deleted file mode 100644 index e3b149b70..000000000 --- a/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v5a-12B/1762652580.185457", - "retrieved_timestamp": "1762652580.185458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Magnolia-v5a-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v5a-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41136185321613317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5311764105029141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/grimjim/Magot-v1-Gemma2-8k-9B/7545f7db-10bb-4d97-9b3f-4346f4f26bad.json b/data/hfopenllm_v2/grimjim/Magot-v1-Gemma2-8k-9B/7545f7db-10bb-4d97-9b3f-4346f4f26bad.json new file mode 100644 index 000000000..e97c90ea3 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magot-v1-Gemma2-8k-9B/7545f7db-10bb-4d97-9b3f-4346f4f26bad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magot-v1-Gemma2-8k-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magot-v1-Gemma2-8k-9B", + "id": "grimjim/Magot-v1-Gemma2-8k-9B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2997 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6019 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magot-v2-Gemma2-8k-9B/47384f10-ac6a-4629-92db-86f01a441f7f.json b/data/hfopenllm_v2/grimjim/Magot-v2-Gemma2-8k-9B/47384f10-ac6a-4629-92db-86f01a441f7f.json new file mode 100644 index 000000000..f21dbc7f0 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magot-v2-Gemma2-8k-9B/47384f10-ac6a-4629-92db-86f01a441f7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_Magot-v2-Gemma2-8k-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magot-v2-Gemma2-8k-9B", + "id": "grimjim/Magot-v2-Gemma2-8k-9B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4344 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4223 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/3c9f022f-3e2b-48d6-acb9-07f066cfceb6.json b/data/hfopenllm_v2/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/3c9f022f-3e2b-48d6-acb9-07f066cfceb6.json new file mode 100644 index 000000000..8620f2a75 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/3c9f022f-3e2b-48d6-acb9-07f066cfceb6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SauerHuatuoSkywork-o1-Llama-3.1-8B", + "id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5222 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v1-8B/1d851cfb-8624-4516-8204-85569c60dc67.json b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v1-8B/1d851cfb-8624-4516-8204-85569c60dc67.json new file mode 100644 index 000000000..a154af168 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v1-8B/1d851cfb-8624-4516-8204-85569c60dc67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Nephilim-v1-8B", + "id": "grimjim/llama-3-Nephilim-v1-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3796 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2-8B/a7990990-7498-4b74-a0aa-9c266910698e.json b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2-8B/a7990990-7498-4b74-a0aa-9c266910698e.json new file mode 100644 index 000000000..961e8d5ae --- /dev/null +++ b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2-8B/a7990990-7498-4b74-a0aa-9c266910698e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Nephilim-v2-8B", + "id": "grimjim/llama-3-Nephilim-v2-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3895 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2.1-8B/0b41d37e-0728-4575-9662-c150e2e29bd0.json b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2.1-8B/0b41d37e-0728-4575-9662-c150e2e29bd0.json new file mode 100644 index 000000000..2e431db78 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v2.1-8B/0b41d37e-0728-4575-9662-c150e2e29bd0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Nephilim-v2.1-8B", + "id": "grimjim/llama-3-Nephilim-v2.1-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3895 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5095 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v3-8B/c565a7e9-bd1b-41a5-bff3-3a349553f4e8.json b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v3-8B/c565a7e9-bd1b-41a5-bff3-3a349553f4e8.json new file mode 100644 index 000000000..123748d61 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/llama-3-Nephilim-v3-8B/c565a7e9-bd1b-41a5-bff3-3a349553f4e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-Nephilim-v3-8B", + "id": "grimjim/llama-3-Nephilim-v3-8B", + "developer": "grimjim", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3989 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3612 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json b/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json deleted file mode 100644 index cd7bf69b3..000000000 --- a/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gupta-tanish_llama-7b-dpo-baseline/1762652580.1871748", - "retrieved_timestamp": "1762652580.1871748", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gupta-tanish/llama-7b-dpo-baseline", - "developer": "gupta-tanish", - "inference_platform": "unknown", - "id": "gupta-tanish/llama-7b-dpo-baseline", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26930433472076315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896894398264714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20279255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/680a4507-755e-4014-877b-6032f0220270.json b/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/680a4507-755e-4014-877b-6032f0220270.json new file mode 100644 index 000000000..888dfc69c --- /dev/null +++ b/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/680a4507-755e-4014-877b-6032f0220270.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gupta-tanish_llama-7b-dpo-baseline/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-7b-dpo-baseline", + "id": "gupta-tanish/llama-7b-dpo-baseline", + "developer": "gupta-tanish", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2028 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.1/5ace8dc6-e348-4267-bb4a-f71a335d074e.json b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.1/5ace8dc6-e348-4267-bb4a-f71a335d074e.json new file mode 100644 index 000000000..711f9349b --- /dev/null +++ b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.1/5ace8dc6-e348-4267-bb4a-f71a335d074e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-7b-cabs-v0.1", + "id": "gz987/qwen2.5-7b-cabs-v0.1", + "developer": "gz987", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7506 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5482 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4796 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.2/07549821-db51-4b77-980a-056131b5dd29.json b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.2/07549821-db51-4b77-980a-056131b5dd29.json new file mode 100644 index 000000000..3ae09bf09 --- /dev/null +++ b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.2/07549821-db51-4b77-980a-056131b5dd29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-7b-cabs-v0.2", + "id": "gz987/qwen2.5-7b-cabs-v0.2", + "developer": "gz987", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.3/ff12a0a1-a913-441b-955c-bcbd50056acf.json b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.3/ff12a0a1-a913-441b-955c-bcbd50056acf.json new file mode 100644 index 000000000..9b5948568 --- /dev/null +++ b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.3/ff12a0a1-a913-441b-955c-bcbd50056acf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-7b-cabs-v0.3", + "id": "gz987/qwen2.5-7b-cabs-v0.3", + "developer": "gz987", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5494 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4932 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.4/947cfc2b-b73c-40eb-9e57-be5278776711.json b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.4/947cfc2b-b73c-40eb-9e57-be5278776711.json new file mode 100644 index 000000000..bc502245a --- /dev/null +++ b/data/hfopenllm_v2/gz987/qwen2.5-7b-cabs-v0.4/947cfc2b-b73c-40eb-9e57-be5278776711.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-7b-cabs-v0.4", + "id": "gz987/qwen2.5-7b-cabs-v0.4", + "developer": "gz987", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4849 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/53639078-c50a-4147-bab0-16993f1790b6.json b/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/53639078-c50a-4147-bab0-16993f1790b6.json new file mode 100644 index 000000000..47446d221 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/53639078-c50a-4147-bab0-16993f1790b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube-1.8b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2o-danube-1.8b-chat", + "id": "h2oai/h2o-danube-1.8b-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 1.831 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.322 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3989 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json b/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json deleted file mode 100644 index d6f513855..000000000 --- a/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube-1.8b-chat/1762652580.188648", - "retrieved_timestamp": "1762652580.188649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "h2oai/h2o-danube-1.8b-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube-1.8b-chat", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 1.831 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2198699450790569 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219657593234448 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13139960106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json deleted file mode 100644 index 1d6d5206b..000000000 --- a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-base/1762652580.18891", - "retrieved_timestamp": "1762652580.1889112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "h2oai/h2o-danube3-4b-base", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3-4b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.962 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23380851695722904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3599083951265592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/b2cf96e0-382e-4200-a4a4-d66e8a188878.json b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/b2cf96e0-382e-4200-a4a4-d66e8a188878.json new file mode 100644 index 000000000..0acd0b4f9 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/b2cf96e0-382e-4200-a4a4-d66e8a188878.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2o-danube3-4b-base", + "id": "h2oai/h2o-danube3-4b-base", + "developer": "h2oai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.962 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2338 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json deleted file mode 100644 index 1f7c2cc73..000000000 --- a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-chat/1762652580.1891232", - "retrieved_timestamp": "1762652580.189124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "h2oai/h2o-danube3-4b-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3-4b-chat", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.962 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3628771659197596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466170643135169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22282247340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d4ed3eb6-f569-4d4b-8da5-50eaaf824128.json b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d4ed3eb6-f569-4d4b-8da5-50eaaf824128.json new file mode 100644 index 000000000..c17f6d915 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d4ed3eb6-f569-4d4b-8da5-50eaaf824128.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2o-danube3-4b-chat", + "id": "h2oai/h2o-danube3-4b-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.962 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3629 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/210f7063-e0d9-424d-94f4-3645e4e1b401.json b/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/210f7063-e0d9-424d-94f4-3645e4e1b401.json new file mode 100644 index 000000000..fd3e0551c --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/210f7063-e0d9-424d-94f4-3645e4e1b401.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-500m-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2o-danube3-500m-chat", + "id": "h2oai/h2o-danube3-500m-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3035 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json b/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json deleted file mode 100644 index 2934d5ad5..000000000 --- a/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-500m-chat/1762652580.1893299", - "retrieved_timestamp": "1762652580.1893299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "h2oai/h2o-danube3-500m-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3-500m-chat", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2207941594968018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3034691168308313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23070469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34339583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/4ecd26d8-8416-4dba-8d53-96f4013cfef0.json b/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/4ecd26d8-8416-4dba-8d53-96f4013cfef0.json new file mode 100644 index 000000000..6f2847856 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/4ecd26d8-8416-4dba-8d53-96f4013cfef0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3.1-4b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2o-danube3.1-4b-chat", + "id": "h2oai/h2o-danube3.1-4b-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.962 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3608 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4102 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json b/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json deleted file mode 100644 index c7b795bb8..000000000 --- a/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3.1-4b-chat/1762652580.189556", - "retrieved_timestamp": "1762652580.189557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "h2oai/h2o-danube3.1-4b-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3.1-4b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.962 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021121734774842 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608421638178268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2718583776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/haoranxu/ALMA-13B-R/15712b7d-e69f-4a4f-b13c-4e79ce859399.json b/data/hfopenllm_v2/haoranxu/ALMA-13B-R/15712b7d-e69f-4a4f-b13c-4e79ce859399.json new file mode 100644 index 000000000..b4507647e --- /dev/null +++ b/data/hfopenllm_v2/haoranxu/ALMA-13B-R/15712b7d-e69f-4a4f-b13c-4e79ce859399.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/haoranxu_ALMA-13B-R/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ALMA-13B-R", + "id": "haoranxu/ALMA-13B-R", + "developer": "haoranxu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0039 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3457 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json b/data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json deleted file mode 100644 index 6f047d61d..000000000 --- a/data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/haoranxu_ALMA-13B-R/1762652580.189782", - "retrieved_timestamp": "1762652580.189783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "haoranxu/ALMA-13B-R", - "developer": "haoranxu", - "inference_platform": "unknown", - "id": "haoranxu/ALMA-13B-R", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.003921816336210145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345656261205981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35279166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18168218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/9148c375-7c08-4c1c-82ed-5f935b2a4f04.json b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/9148c375-7c08-4c1c-82ed-5f935b2a4f04.json new file mode 100644 index 000000000..e8754a2a8 --- /dev/null +++ b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/9148c375-7c08-4c1c-82ed-5f935b2a4f04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-CPO-SimPO", + "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", + "developer": "haoranxu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json deleted file mode 100644 index 1f5faddc7..000000000 --- a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/1762652580.190052", - "retrieved_timestamp": "1762652580.190052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", - "developer": "haoranxu", - "inference_platform": "unknown", - "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7046447869430887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048301774821616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json deleted file mode 100644 index 4ad4247e0..000000000 --- a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-SimPO/1762652580.190277", - "retrieved_timestamp": "1762652580.1902778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "haoranxu/Llama-3-Instruct-8B-SimPO", - "developer": "haoranxu", - "inference_platform": "unknown", - "id": "haoranxu/Llama-3-Instruct-8B-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347449212533854 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49792360151415016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35660416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37333776595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/fb93274b-b7d8-483a-a95d-96340535febc.json b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/fb93274b-b7d8-483a-a95d-96340535febc.json new file mode 100644 index 000000000..e99c8aeb7 --- /dev/null +++ b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/fb93274b-b7d8-483a-a95d-96340535febc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SimPO", + "id": "haoranxu/Llama-3-Instruct-8B-SimPO", + "developer": "haoranxu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/0818b755-ec49-457c-8635-73f01816f30b.json b/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/0818b755-ec49-457c-8635-73f01816f30b.json new file mode 100644 index 000000000..86fc861f4 --- /dev/null +++ b/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/0818b755-ec49-457c-8635-73f01816f30b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-1.5b-sft-raft-grpo-hra-doc", + "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", + "developer": "hatemmahmoud", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.427 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2175 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2776 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json b/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json deleted file mode 100644 index 40de2d7db..000000000 --- a/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/1762652580.190489", - "retrieved_timestamp": "1762652580.190489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", - "developer": "hatemmahmoud", - "inference_platform": "unknown", - "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41958004760701606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4269926809768501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36097916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json deleted file mode 100644 index acd185f5b..000000000 --- a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v0.5/1762652580.190754", - "retrieved_timestamp": "1762652580.1907551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hon9kon9ize/CantoneseLLMChat-v0.5", - "developer": "hon9kon9ize", - "inference_platform": "unknown", - "id": "hon9kon9ize/CantoneseLLMChat-v0.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.069 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3230849701015528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43452388803059244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4706458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2504155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/77962326-0160-49bd-9ef1-59b403b2bfce.json b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/77962326-0160-49bd-9ef1-59b403b2bfce.json new file mode 100644 index 000000000..4312be247 --- /dev/null +++ b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/77962326-0160-49bd-9ef1-59b403b2bfce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CantoneseLLMChat-v0.5", + "id": "hon9kon9ize/CantoneseLLMChat-v0.5", + "developer": "hon9kon9ize", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.069 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3231 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4706 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/272abbe5-8b61-442f-9860-d7411e7fec99.json b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/272abbe5-8b61-442f-9860-d7411e7fec99.json new file mode 100644 index 000000000..265d4e3f0 --- /dev/null +++ b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/272abbe5-8b61-442f-9860-d7411e7fec99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v1.0-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CantoneseLLMChat-v1.0-7B", + "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", + "developer": "hon9kon9ize", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4866 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3785 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json deleted file mode 100644 index 76e02c1ee..000000000 --- a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v1.0-7B/1762652580.191013", - "retrieved_timestamp": "1762652580.191013", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", - "developer": "hon9kon9ize", - "inference_platform": "unknown", - "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44548353923146145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865734655539633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3784906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/hongbai12/li-0.4-pre/14d617a8-18c6-40a7-a4ba-19cf5fc5f4e3.json b/data/hfopenllm_v2/hongbai12/li-0.4-pre/14d617a8-18c6-40a7-a4ba-19cf5fc5f4e3.json new file mode 100644 index 000000000..c385d6d79 --- /dev/null +++ b/data/hfopenllm_v2/hongbai12/li-0.4-pre/14d617a8-18c6-40a7-a4ba-19cf5fc5f4e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hongbai12_li-0.4-pre/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "li-0.4-pre", + "id": "hongbai12/li-0.4-pre", + "developer": "hongbai12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5015 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json b/data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json deleted file mode 100644 index 5045977f5..000000000 --- a/data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hongbai12_li-0.4-pre/1762652580.191224", - "retrieved_timestamp": "1762652580.191225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hongbai12/li-0.4-pre", - "developer": "hongbai12", - "inference_platform": "unknown", - "id": "hongbai12/li-0.4-pre", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199725616918665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6298274927108823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4513020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5014960106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/Deepseek-qwen-modelstock-2B/ef7b5e6d-b5b7-4c7b-9781-6f90eb1ff5dd.json b/data/hfopenllm_v2/hotmailuser/Deepseek-qwen-modelstock-2B/ef7b5e6d-b5b7-4c7b-9781-6f90eb1ff5dd.json new file mode 100644 index 000000000..2ecbdfd24 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Deepseek-qwen-modelstock-2B/ef7b5e6d-b5b7-4c7b-9781-6f90eb1ff5dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Deepseek-qwen-modelstock-2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deepseek-qwen-modelstock-2B", + "id": "hotmailuser/Deepseek-qwen-modelstock-2B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2149 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/1970e257-7c93-4342-9ff4-a96af21acc67.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/1970e257-7c93-4342-9ff4-a96af21acc67.json new file mode 100644 index 000000000..5c04c6bd5 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/1970e257-7c93-4342-9ff4-a96af21acc67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp1-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3Slerp1-10B", + "id": "hotmailuser/Falcon3Slerp1-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2598 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4318 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json deleted file mode 100644 index 1d3e91bae..000000000 --- a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp1-10B/1762652580.19171", - "retrieved_timestamp": "1762652580.191711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Falcon3Slerp1-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/Falcon3Slerp1-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5694069513335727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.616984966186231 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43176041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/15d71696-4b21-41ff-a4c6-0aea92fb844a.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/15d71696-4b21-41ff-a4c6-0aea92fb844a.json new file mode 100644 index 000000000..7fc89171e --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/15d71696-4b21-41ff-a4c6-0aea92fb844a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp2-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3Slerp2-10B", + "id": "hotmailuser/Falcon3Slerp2-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6118 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6164 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2319 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json deleted file mode 100644 index 4552162d2..000000000 --- a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp2-10B/1762652580.191951", - "retrieved_timestamp": "1762652580.191952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Falcon3Slerp2-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/Falcon3Slerp2-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6117966994241945 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6164263500746402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369182180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/ccb85394-5252-48d4-8980-8b3a6c67ab1a.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/ccb85394-5252-48d4-8980-8b3a6c67ab1a.json new file mode 100644 index 000000000..ad19a7974 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/ccb85394-5252-48d4-8980-8b3a6c67ab1a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp4-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3Slerp4-10B", + "id": "hotmailuser/Falcon3Slerp4-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6072 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6114 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2289 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json deleted file mode 100644 index dcfaf689b..000000000 --- a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp4-10B/1762652580.19215", - "retrieved_timestamp": "1762652580.192151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Falcon3Slerp4-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/Falcon3Slerp4-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6072254950198805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.611433776236228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22885196374622357 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json deleted file mode 100644 index 2561e1949..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp-3B/1762652580.19236", - "retrieved_timestamp": "1762652580.1923609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp-3B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5694568190179834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46239111387485293 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29679188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/ea9837ff-f4c7-4bb0-b2af-7ae26371baf0.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/ea9837ff-f4c7-4bb0-b2af-7ae26371baf0.json new file mode 100644 index 000000000..e5f0a213e --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/ea9837ff-f4c7-4bb0-b2af-7ae26371baf0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp-3B", + "id": "hotmailuser/FalconSlerp-3B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3989 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json deleted file mode 100644 index c5ffcae72..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp1-7B/1762652580.1925812", - "retrieved_timestamp": "1762652580.192582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp1-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp1-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394564200765082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354677787663963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44525 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/fe9012a7-d07f-48d4-b460-eca256078d8b.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/fe9012a7-d07f-48d4-b460-eca256078d8b.json new file mode 100644 index 000000000..500ef8052 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/fe9012a7-d07f-48d4-b460-eca256078d8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp1-7B", + "id": "hotmailuser/FalconSlerp1-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5355 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/8e8d2071-8e7d-4dad-8536-4698b2d00316.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/8e8d2071-8e7d-4dad-8536-4698b2d00316.json new file mode 100644 index 000000000..8e8a72c09 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/8e8d2071-8e7d-4dad-8536-4698b2d00316.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp2-7B", + "id": "hotmailuser/FalconSlerp2-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5538 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2983 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json deleted file mode 100644 index dfbb71373..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp2-7B/1762652580.1928341", - "retrieved_timestamp": "1762652580.192835", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp2-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp2-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6160432097944565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5537805428914538 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2983383685800604 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/dbcb41be-9ed6-4244-ada8-77f363c3487e.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/dbcb41be-9ed6-4244-ada8-77f363c3487e.json new file mode 100644 index 000000000..3031ed1a6 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/dbcb41be-9ed6-4244-ada8-77f363c3487e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp3-10B", + "id": "hotmailuser/FalconSlerp3-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6002 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.606 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json deleted file mode 100644 index feedb41a3..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-10B/1762652580.1930392", - "retrieved_timestamp": "1762652580.19304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp3-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp3-10B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6001564737119731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6060288025434474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4030833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323470744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json deleted file mode 100644 index 83a5a8f1f..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-7B/1762652580.193249", - "retrieved_timestamp": "1762652580.19325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp3-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp3-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6096235765546527 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5532966528909408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45067708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41273271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/e48e2d7e-6c14-4bb1-bd12-74d93a145ca3.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/e48e2d7e-6c14-4bb1-bd12-74d93a145ca3.json new file mode 100644 index 000000000..5165cca5b --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/e48e2d7e-6c14-4bb1-bd12-74d93a145ca3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp3-7B", + "id": "hotmailuser/FalconSlerp3-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6096 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4507 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/30c2d908-3eaf-408a-a2b5-301e0cd9e052.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/30c2d908-3eaf-408a-a2b5-301e0cd9e052.json new file mode 100644 index 000000000..7cb786f59 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/30c2d908-3eaf-408a-a2b5-301e0cd9e052.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp4-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp4-7B", + "id": "hotmailuser/FalconSlerp4-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6285 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json deleted file mode 100644 index 356475a32..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp4-7B/1762652580.193457", - "retrieved_timestamp": "1762652580.1934578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp4-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp4-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284580468711907 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5523506352993854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4585208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031748670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json deleted file mode 100644 index 779560b9e..000000000 --- a/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp6-7B/1762652580.193665", - "retrieved_timestamp": "1762652580.193666", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp6-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp6-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6026542906155667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383801786207648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39951795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/f7624d04-66d1-4c05-8c01-d015ecf8412c.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/f7624d04-66d1-4c05-8c01-d015ecf8412c.json new file mode 100644 index 000000000..df78b9118 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/f7624d04-66d1-4c05-8c01-d015ecf8412c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp6-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FalconSlerp6-7B", + "id": "hotmailuser/FalconSlerp6-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6027 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4492 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Gemma2Crono-27B/511e4aad-1e5a-4515-9433-46989fc3945b.json b/data/hfopenllm_v2/hotmailuser/Gemma2Crono-27B/511e4aad-1e5a-4515-9433-46989fc3945b.json new file mode 100644 index 000000000..fbd307ade --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Gemma2Crono-27B/511e4aad-1e5a-4515-9433-46989fc3945b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2Crono-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2Crono-27B", + "id": "hotmailuser/Gemma2Crono-27B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7086 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4567 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4633 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Gemma2SimPO-27B/863e71ec-03a4-47ed-8bc9-b064d5571162.json b/data/hfopenllm_v2/hotmailuser/Gemma2SimPO-27B/863e71ec-03a4-47ed-8bc9-b064d5571162.json new file mode 100644 index 000000000..710203d42 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Gemma2SimPO-27B/863e71ec-03a4-47ed-8bc9-b064d5571162.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2SimPO-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2SimPO-27B", + "id": "hotmailuser/Gemma2SimPO-27B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7222 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6413 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4642 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Gemma2atlas-27B/6a6dfcb4-192b-44ff-a34f-76b31bbf5ad3.json b/data/hfopenllm_v2/hotmailuser/Gemma2atlas-27B/6a6dfcb4-192b-44ff-a34f-76b31bbf5ad3.json new file mode 100644 index 000000000..b057bd05c --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Gemma2atlas-27B/6a6dfcb4-192b-44ff-a34f-76b31bbf5ad3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2atlas-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2atlas-27B", + "id": "hotmailuser/Gemma2atlas-27B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Gemma2magnum-27b/e0dbec0b-a154-448a-be23-ef9b764469ea.json b/data/hfopenllm_v2/hotmailuser/Gemma2magnum-27b/e0dbec0b-a154-448a-be23-ef9b764469ea.json new file mode 100644 index 000000000..29f7bb0c0 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Gemma2magnum-27b/e0dbec0b-a154-448a-be23-ef9b764469ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2magnum-27b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2magnum-27b", + "id": "hotmailuser/Gemma2magnum-27b", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.62 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4723 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp-8B/ecd91300-b0cf-48ce-9e5c-253a7991f90e.json b/data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp-8B/ecd91300-b0cf-48ce-9e5c-253a7991f90e.json new file mode 100644 index 000000000..2af872ca1 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp-8B/ecd91300-b0cf-48ce-9e5c-253a7991f90e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Hermes-slerp-8B", + "id": "hotmailuser/Llama-Hermes-slerp-8B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.339 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.531 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp2-8B/e3df71f1-63e1-40f1-918d-07cb3ec939cf.json b/data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp2-8B/e3df71f1-63e1-40f1-918d-07cb3ec939cf.json new file mode 100644 index 000000000..c653551a2 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Llama-Hermes-slerp2-8B/e3df71f1-63e1-40f1-918d-07cb3ec939cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Hermes-slerp2-8B", + "id": "hotmailuser/Llama-Hermes-slerp2-8B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5265 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4248 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/LlamaStock-8B/52066a23-9847-490e-90e3-57eee3c63276.json b/data/hfopenllm_v2/hotmailuser/LlamaStock-8B/52066a23-9847-490e-90e3-57eee3c63276.json new file mode 100644 index 000000000..8711f86d4 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/LlamaStock-8B/52066a23-9847-490e-90e3-57eee3c63276.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_LlamaStock-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LlamaStock-8B", + "id": "hotmailuser/LlamaStock-8B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5329 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1699 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Mistral-modelstock-24B/91f15ba3-a062-4b01-8a61-6e51fdf5f8d4.json b/data/hfopenllm_v2/hotmailuser/Mistral-modelstock-24B/91f15ba3-a062-4b01-8a61-6e51fdf5f8d4.json new file mode 100644 index 000000000..656587aa6 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Mistral-modelstock-24B/91f15ba3-a062-4b01-8a61-6e51fdf5f8d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-modelstock-24B", + "id": "hotmailuser/Mistral-modelstock-24B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4102 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Mistral-modelstock2-24B/323630ee-fbe0-49a7-aa11-816fde38ba2d.json b/data/hfopenllm_v2/hotmailuser/Mistral-modelstock2-24B/323630ee-fbe0-49a7-aa11-816fde38ba2d.json new file mode 100644 index 000000000..2a8edbdfe --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Mistral-modelstock2-24B/323630ee-fbe0-49a7-aa11-816fde38ba2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock2-24B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-modelstock2-24B", + "id": "hotmailuser/Mistral-modelstock2-24B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4318 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6689 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2402 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4616 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5318 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Phi4-Slerp4-14B/e5c8f97d-1873-4c9d-8bed-50dc592543db.json b/data/hfopenllm_v2/hotmailuser/Phi4-Slerp4-14B/e5c8f97d-1873-4c9d-8bed-50dc592543db.json new file mode 100644 index 000000000..4377cee38 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Phi4-Slerp4-14B/e5c8f97d-1873-4c9d-8bed-50dc592543db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Phi4-Slerp4-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4-Slerp4-14B", + "id": "hotmailuser/Phi4-Slerp4-14B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0629 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6731 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3474 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Qwen2.5-HomerSlerp-7B/7ee2803c-b8f8-4156-8472-bab4baab8863.json b/data/hfopenllm_v2/hotmailuser/Qwen2.5-HomerSlerp-7B/7ee2803c-b8f8-4156-8472-bab4baab8863.json new file mode 100644 index 000000000..1e3e4c4b1 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Qwen2.5-HomerSlerp-7B/7ee2803c-b8f8-4156-8472-bab4baab8863.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_Qwen2.5-HomerSlerp-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-HomerSlerp-7B", + "id": "hotmailuser/Qwen2.5-HomerSlerp-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5633 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4549 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenModelStock-1.8B/78573f63-3073-4be4-93a7-0ea00b1383fd.json b/data/hfopenllm_v2/hotmailuser/QwenModelStock-1.8B/78573f63-3073-4be4-93a7-0ea00b1383fd.json new file mode 100644 index 000000000..818c900fe --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenModelStock-1.8B/78573f63-3073-4be4-93a7-0ea00b1383fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenModelStock-1.8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenModelStock-1.8B", + "id": "hotmailuser/QwenModelStock-1.8B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4359 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2959 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSlerp-14B/42da7295-d78d-49a4-9279-8406063240c4.json b/data/hfopenllm_v2/hotmailuser/QwenSlerp-14B/42da7295-d78d-49a4-9279-8406063240c4.json new file mode 100644 index 000000000..448439df9 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSlerp-14B/42da7295-d78d-49a4-9279-8406063240c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp-14B", + "id": "hotmailuser/QwenSlerp-14B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7025 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4634 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSlerp-3B/b61c5735-53ca-4dda-a223-79921eee7f3e.json b/data/hfopenllm_v2/hotmailuser/QwenSlerp-3B/b61c5735-53ca-4dda-a223-79921eee7f3e.json new file mode 100644 index 000000000..ab9d3d420 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSlerp-3B/b61c5735-53ca-4dda-a223-79921eee7f3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp-3B", + "id": "hotmailuser/QwenSlerp-3B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4892 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSlerp-7B/310124ef-e33f-49de-83eb-e665a5143aaa.json b/data/hfopenllm_v2/hotmailuser/QwenSlerp-7B/310124ef-e33f-49de-83eb-e665a5143aaa.json new file mode 100644 index 000000000..14cc9dac6 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSlerp-7B/310124ef-e33f-49de-83eb-e665a5143aaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp-7B", + "id": "hotmailuser/QwenSlerp-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5636 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSlerp2-14B/c9b056df-8bbe-4959-ab44-85813157c95c.json b/data/hfopenllm_v2/hotmailuser/QwenSlerp2-14B/c9b056df-8bbe-4959-ab44-85813157c95c.json new file mode 100644 index 000000000..746637a2e --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSlerp2-14B/c9b056df-8bbe-4959-ab44-85813157c95c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp2-14B", + "id": "hotmailuser/QwenSlerp2-14B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7037 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSlerp2-3B/7a60385f-48dd-4926-8b66-3d42a1631db3.json b/data/hfopenllm_v2/hotmailuser/QwenSlerp2-3B/7a60385f-48dd-4926-8b66-3d42a1631db3.json new file mode 100644 index 000000000..4a1951fcf --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSlerp2-3B/7a60385f-48dd-4926-8b66-3d42a1631db3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp2-3B", + "id": "hotmailuser/QwenSlerp2-3B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4802 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2606 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4252 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSlerp3-14B/da365c7b-74d0-4a9f-a8fd-cf4049ec4de6.json b/data/hfopenllm_v2/hotmailuser/QwenSlerp3-14B/da365c7b-74d0-4a9f-a8fd-cf4049ec4de6.json new file mode 100644 index 000000000..812599df3 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSlerp3-14B/da365c7b-74d0-4a9f-a8fd-cf4049ec4de6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp3-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSlerp3-14B", + "id": "hotmailuser/QwenSlerp3-14B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6632 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6267 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenSparse-7B/e2930715-b616-49a4-83bc-53e92fc3580f.json b/data/hfopenllm_v2/hotmailuser/QwenSparse-7B/e2930715-b616-49a4-83bc-53e92fc3580f.json new file mode 100644 index 000000000..8f33a3dbf --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenSparse-7B/e2930715-b616-49a4-83bc-53e92fc3580f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSparse-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenSparse-7B", + "id": "hotmailuser/QwenSparse-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1086 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2896 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3562 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenStock-0.5B/543f45e0-a158-4fdb-bbb1-8deb38f4515b.json b/data/hfopenllm_v2/hotmailuser/QwenStock-0.5B/543f45e0-a158-4fdb-bbb1-8deb38f4515b.json new file mode 100644 index 000000000..be59d9fa4 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenStock-0.5B/543f45e0-a158-4fdb-bbb1-8deb38f4515b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-0.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock-0.5B", + "id": "hotmailuser/QwenStock-0.5B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenStock-1.7B/b96a20e0-d044-4a66-8909-437aeaef569c.json b/data/hfopenllm_v2/hotmailuser/QwenStock-1.7B/b96a20e0-d044-4a66-8909-437aeaef569c.json new file mode 100644 index 000000000..efbdd247d --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenStock-1.7B/b96a20e0-d044-4a66-8909-437aeaef569c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-1.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock-1.7B", + "id": "hotmailuser/QwenStock-1.7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2955 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/QwenStock1-14B/408742ff-4b21-46dc-b4d6-4c78d652d228.json b/data/hfopenllm_v2/hotmailuser/QwenStock1-14B/408742ff-4b21-46dc-b4d6-4c78d652d228.json new file mode 100644 index 000000000..2d5a4767d --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/QwenStock1-14B/408742ff-4b21-46dc-b4d6-4c78d652d228.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock1-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwenStock1-14B", + "id": "hotmailuser/QwenStock1-14B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6693 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/496a9fbe-376c-4546-bd90-b42f583924ce.json b/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/496a9fbe-376c-4546-bd90-b42f583924ce.json new file mode 100644 index 000000000..3e888318d --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/496a9fbe-376c-4546-bd90-b42f583924ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/hotmailuser_RombosBeagle-v2beta-MGS-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RombosBeagle-v2beta-MGS-32B", + "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5157 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7037 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5908 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json b/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json deleted file mode 100644 index 9ae073b42..000000000 --- a/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_RombosBeagle-v2beta-MGS-32B/1762652580.199307", - "retrieved_timestamp": "1762652580.199308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/RombosBeagle-v2beta-MGS-32B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5156761836371937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037350002757341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5907579787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/huggyllama/llama-13b/f32c07b4-21a8-4cd2-91f8-f0f26d0b1b38.json b/data/hfopenllm_v2/huggyllama/llama-13b/f32c07b4-21a8-4cd2-91f8-f0f26d0b1b38.json new file mode 100644 index 000000000..a7410ce47 --- /dev/null +++ b/data/hfopenllm_v2/huggyllama/llama-13b/f32c07b4-21a8-4cd2-91f8-f0f26d0b1b38.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huggyllama_llama-13b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-13b", + "id": "huggyllama/llama-13b", + "developer": "huggyllama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2411 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1952 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huggyllama/llama-65b/cc36cc37-0f41-42aa-8051-54cc135820ef.json b/data/hfopenllm_v2/huggyllama/llama-65b/cc36cc37-0f41-42aa-8051-54cc135820ef.json new file mode 100644 index 000000000..89a0f62be --- /dev/null +++ b/data/hfopenllm_v2/huggyllama/llama-65b/cc36cc37-0f41-42aa-8051-54cc135820ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huggyllama_llama-65b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-65b", + "id": "huggyllama/llama-65b", + "developer": "huggyllama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 65.286 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2526 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4703 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3078 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huggyllama/llama-7b/20d3dac4-9f8c-431c-b20f-364dd860e37f.json b/data/hfopenllm_v2/huggyllama/llama-7b/20d3dac4-9f8c-431c-b20f-364dd860e37f.json new file mode 100644 index 000000000..18ff58949 --- /dev/null +++ b/data/hfopenllm_v2/huggyllama/llama-7b/20d3dac4-9f8c-431c-b20f-364dd860e37f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huggyllama_llama-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-7b", + "id": "huggyllama/llama-7b", + "developer": "huggyllama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2501 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1313 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/89022ea8-2a5b-4eba-8d7a-320ba13d30a4.json b/data/hfopenllm_v2/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/89022ea8-2a5b-4eba-8d7a-320ba13d30a4.json new file mode 100644 index 000000000..3494d0b6e --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/89022ea8-2a5b-4eba-8d7a-320ba13d30a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", + "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json deleted file mode 100644 index 07ec3473d..000000000 --- a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-7030/1762652580.2006452", - "retrieved_timestamp": "1762652580.200646", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-7030", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38650779930584184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6177864730931621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39222916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367519946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/97bfd152-79c6-4c96-8d3e-588275339e41.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/97bfd152-79c6-4c96-8d3e-588275339e41.json new file mode 100644 index 000000000..20f0742c2 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/97bfd152-79c6-4c96-8d3e-588275339e41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-7030/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-32B-Coder-Fusion-7030", + "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3865 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json deleted file mode 100644 index 16ec30da0..000000000 --- a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-8020/1762652580.200916", - "retrieved_timestamp": "1762652580.200917", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-8020", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6020547702318737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6664531829718748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367353723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/93061947-2bcf-482e-ab22-38ef8ee33bcf.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/93061947-2bcf-482e-ab22-38ef8ee33bcf.json new file mode 100644 index 000000000..10b206b42 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/93061947-2bcf-482e-ab22-38ef8ee33bcf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-8020/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-32B-Coder-Fusion-8020", + "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4592 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json deleted file mode 100644 index c26247c36..000000000 --- a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-9010/1762652580.201131", - "retrieved_timestamp": "1762652580.201132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-9010", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5778246164620984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6727405551499568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5600066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/8f65748b-1251-49f8-bfed-d1e4a937d5ba.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/8f65748b-1251-49f8-bfed-d1e4a937d5ba.json new file mode 100644 index 000000000..9d7bf036a --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/8f65748b-1251-49f8-bfed-d1e4a937d5ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-9010/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-32B-Coder-Fusion-9010", + "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6727 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.56 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/4f278881-69d3-42b5-b72c-ff8627a6ef44.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/4f278881-69d3-42b5-b72c-ff8627a6ef44.json new file mode 100644 index 000000000..ca8850291 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/4f278881-69d3-42b5-b72c-ff8627a6ef44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Instruct-abliterated-v2", + "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6324 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json deleted file mode 100644 index 9d493d83f..000000000 --- a/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/1762652580.201351", - "retrieved_timestamp": "1762652580.201352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8327637335602867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6323822447052897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49617686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json deleted file mode 100644 index 96e9232ea..000000000 --- a/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-72B-Instruct-abliterated/1762652580.2015731", - "retrieved_timestamp": "1762652580.2015731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8592667455684251 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7189881596250237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536901595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/d88e85c5-73df-46cc-9234-f0556592ad5a.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/d88e85c5-73df-46cc-9234-f0556592ad5a.json new file mode 100644 index 000000000..716aaba17 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/d88e85c5-73df-46cc-9234-f0556592ad5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-72B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-72B-Instruct-abliterated", + "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8593 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.719 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json deleted file mode 100644 index a05beae58..000000000 --- a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/1762652580.201998", - "retrieved_timestamp": "1762652580.201998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606484128778308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376688442794247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42079454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/44d2a20d-e867-4fa5-af3d-087f9c1b4067.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/44d2a20d-e867-4fa5-af3d-087f9c1b4067.json new file mode 100644 index 000000000..da59c59b9 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/44d2a20d-e867-4fa5-af3d-087f9c1b4067.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-abliterated-v2", + "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5377 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json deleted file mode 100644 index 27d26d6e7..000000000 --- a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated/1762652580.201783", - "retrieved_timestamp": "1762652580.2017841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7546033413564897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261589972829911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45770392749244715 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41796875 - } - } - ] -} diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/e83b3e7e-dc34-4b06-bcfe-95b3ba28aab4.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/e83b3e7e-dc34-4b06-bcfe-95b3ba28aab4.json new file mode 100644 index 000000000..092cf89ac --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/e83b3e7e-dc34-4b06-bcfe-95b3ba28aab4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-abliterated", + "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", + "developer": "huihui-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7546 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4577 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3967 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/44f2948c-4564-44cc-98d8-4f82a30e1f09.json b/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/44f2948c-4564-44cc-98d8-4f82a30e1f09.json new file mode 100644 index 000000000..a707ad657 --- /dev/null +++ b/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/44f2948c-4564-44cc-98d8-4f82a30e1f09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_orpo_stage1.1-ss1-orpo3", + "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", + "developer": "huu-ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json b/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json deleted file mode 100644 index ea6af20b7..000000000 --- a/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/1762652580.202209", - "retrieved_timestamp": "1762652580.20221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", - "developer": "huu-ontocord", - "inference_platform": "unknown", - "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15052726764983576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936618285636837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/iFaz/llama31_8B_en_emo_v4/846cf1ff-62c3-44e7-b6dd-0135ec77451a.json b/data/hfopenllm_v2/iFaz/llama31_8B_en_emo_v4/846cf1ff-62c3-44e7-b6dd-0135ec77451a.json new file mode 100644 index 000000000..099d88b45 --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama31_8B_en_emo_v4/846cf1ff-62c3-44e7-b6dd-0135ec77451a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama31_8B_en_emo_v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama31_8B_en_emo_v4", + "id": "iFaz/llama31_8B_en_emo_v4", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "", + "params_billions": 4.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4916 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3643 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3049 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_1B_en_emo_v1/d2054469-b38b-4b1d-bd40-7324319f8eca.json b/data/hfopenllm_v2/iFaz/llama32_1B_en_emo_v1/d2054469-b38b-4b1d-bd40-7324319f8eca.json new file mode 100644 index 000000000..03f4a931f --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_1B_en_emo_v1/d2054469-b38b-4b1d-bd40-7324319f8eca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_1B_en_emo_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_1B_en_emo_v1", + "id": "iFaz/llama32_1B_en_emo_v1", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.765 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_1000_stp/ce60608d-5b52-49d4-bbce-4b20e8272cef.json b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_1000_stp/ce60608d-5b52-49d4-bbce-4b20e8272cef.json new file mode 100644 index 000000000..d3b7b81e6 --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_1000_stp/ce60608d-5b52-49d4-bbce-4b20e8272cef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_1000_stp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_3B_en_emo_1000_stp", + "id": "iFaz/llama32_3B_en_emo_1000_stp", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7295 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_2000_stp/f177bb70-fb7c-4b57-965d-acbcb4936bfa.json b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_2000_stp/f177bb70-fb7c-4b57-965d-acbcb4936bfa.json new file mode 100644 index 000000000..3a25847fc --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_2000_stp/f177bb70-fb7c-4b57-965d-acbcb4936bfa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_2000_stp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_3B_en_emo_2000_stp", + "id": "iFaz/llama32_3B_en_emo_2000_stp", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7369 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4535 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_300_stp/a5b2ab3d-1f12-4a5a-a110-2514185568b6.json b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_300_stp/a5b2ab3d-1f12-4a5a-a110-2514185568b6.json new file mode 100644 index 000000000..b84be7a8b --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_300_stp/a5b2ab3d-1f12-4a5a-a110-2514185568b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_300_stp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_3B_en_emo_300_stp", + "id": "iFaz/llama32_3B_en_emo_300_stp", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7256 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1601 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3148 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_5000_stp/63b887a1-a0b9-46db-a563-b9bd67a0805a.json b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_5000_stp/63b887a1-a0b9-46db-a563-b9bd67a0805a.json new file mode 100644 index 000000000..2c79dd41d --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_5000_stp/63b887a1-a0b9-46db-a563-b9bd67a0805a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_5000_stp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_3B_en_emo_5000_stp", + "id": "iFaz/llama32_3B_en_emo_5000_stp", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.71 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4568 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3067 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v2/92d122f7-f29d-49e3-99da-bf20edf377a2.json b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v2/92d122f7-f29d-49e3-99da-bf20edf377a2.json new file mode 100644 index 000000000..bc0aeebe3 --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v2/92d122f7-f29d-49e3-99da-bf20edf377a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_3B_en_emo_v2", + "id": "iFaz/llama32_3B_en_emo_v2", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v3/a0b71344-f3a8-4ad0-87c5-6393148488b1.json b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v3/a0b71344-f3a8-4ad0-87c5-6393148488b1.json new file mode 100644 index 000000000..6e61f1e41 --- /dev/null +++ b/data/hfopenllm_v2/iFaz/llama32_3B_en_emo_v3/a0b71344-f3a8-4ad0-87c5-6393148488b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama32_3B_en_emo_v3", + "id": "iFaz/llama32_3B_en_emo_v3", + "developer": "iFaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5759 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4301 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3553 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json b/data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json deleted file mode 100644 index cc9c1b492..000000000 --- a/data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1-II/1762652580.204559", - "retrieved_timestamp": "1762652580.204561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iRyanBell/ARC1-II", - "developer": "iRyanBell", - "inference_platform": "unknown", - "id": "iRyanBell/ARC1-II", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17083560508340093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33817781029884353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1685505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/iRyanBell/ARC1-II/821ff784-c48a-4623-9fb5-b77b7114b625.json b/data/hfopenllm_v2/iRyanBell/ARC1-II/821ff784-c48a-4623-9fb5-b77b7114b625.json new file mode 100644 index 000000000..2ccabcdfe --- /dev/null +++ b/data/hfopenllm_v2/iRyanBell/ARC1-II/821ff784-c48a-4623-9fb5-b77b7114b625.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1-II/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ARC1-II", + "id": "iRyanBell/ARC1-II", + "developer": "iRyanBell", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1708 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4913 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1686 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json b/data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json deleted file mode 100644 index dd5dcc528..000000000 --- a/data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1/1762652580.204204", - "retrieved_timestamp": "1762652580.204204", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iRyanBell/ARC1", - "developer": "iRyanBell", - "inference_platform": "unknown", - "id": "iRyanBell/ARC1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441112913735555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4902999658144703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371010638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/iRyanBell/ARC1/ed251513-4807-4e31-bc8e-3ab0217ae4f3.json b/data/hfopenllm_v2/iRyanBell/ARC1/ed251513-4807-4e31-bc8e-3ab0217ae4f3.json new file mode 100644 index 000000000..0ff922243 --- /dev/null +++ b/data/hfopenllm_v2/iRyanBell/ARC1/ed251513-4807-4e31-bc8e-3ab0217ae4f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ARC1", + "id": "iRyanBell/ARC1", + "developer": "iRyanBell", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4411 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4903 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibivibiv/colossus_120b/e7fa3baa-07b4-4f10-aa9c-8424d8fea303.json b/data/hfopenllm_v2/ibivibiv/colossus_120b/e7fa3baa-07b4-4f10-aa9c-8424d8fea303.json new file mode 100644 index 000000000..7022b74fb --- /dev/null +++ b/data/hfopenllm_v2/ibivibiv/colossus_120b/e7fa3baa-07b4-4f10-aa9c-8424d8fea303.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibivibiv_colossus_120b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "colossus_120b", + "id": "ibivibiv/colossus_120b", + "developer": "ibivibiv", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 117.749 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4276 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6061 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4733 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3961 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json b/data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json deleted file mode 100644 index 7a673cdd1..000000000 --- a/data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibivibiv_colossus_120b/1762652580.2048829", - "retrieved_timestamp": "1762652580.204884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibivibiv/colossus_120b", - "developer": "ibivibiv", - "inference_platform": "unknown", - "id": "ibivibiv/colossus_120b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 117.749 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42759877126025614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6061408586494191 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4733125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/11dfd131-00bf-4561-a913-f1c0cb15bf9c.json b/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/11dfd131-00bf-4561-a913-f1c0cb15bf9c.json new file mode 100644 index 000000000..6b86a8133 --- /dev/null +++ b/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/11dfd131-00bf-4561-a913-f1c0cb15bf9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibivibiv_multimaster-7b-v6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "multimaster-7b-v6", + "id": "ibivibiv/multimaster-7b-v6", + "developer": "ibivibiv", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 35.428 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json b/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json deleted file mode 100644 index 0121a7c6b..000000000 --- a/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibivibiv_multimaster-7b-v6/1762652580.205187", - "retrieved_timestamp": "1762652580.205188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibivibiv/multimaster-7b-v6", - "developer": "ibivibiv", - "inference_platform": "unknown", - "id": "ibivibiv/multimaster-7b-v6", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 35.428 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473075883101283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519351871026721 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43957291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/3ba34f38-2340-407f-a7b5-82749f8a0ee6.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/3ba34f38-2340-407f-a7b5-82749f8a0ee6.json new file mode 100644 index 000000000..f51a2ca74 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/3ba34f38-2340-407f-a7b5-82749f8a0ee6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-1b-a400m-base", + "id": "ibm-granite/granite-3.0-1b-a400m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 1.335 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json deleted file mode 100644 index 1f9dc06b0..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-base/1762652580.205958", - "retrieved_timestamp": "1762652580.20596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-1b-a400m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-1b-a400m-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 1.335 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24040324117785256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221205531032148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/91b9649b-bdf6-4b15-a038-47edc2e79ef6.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/91b9649b-bdf6-4b15-a038-47edc2e79ef6.json new file mode 100644 index 000000000..d041957dc --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/91b9649b-bdf6-4b15-a038-47edc2e79ef6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-1b-a400m-instruct", + "id": "ibm-granite/granite-3.0-1b-a400m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 1.335 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3623 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json deleted file mode 100644 index d0bb91ce9..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-instruct/1762652580.206321", - "retrieved_timestamp": "1762652580.206322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-1b-a400m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-1b-a400m-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 1.335 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33315159332792543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223950988485842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36228124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12441821808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json deleted file mode 100644 index 0b204f4c2..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-base/1762652580.206552", - "retrieved_timestamp": "1762652580.206552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-2b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-2b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.634 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873821460391761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40474805593806223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28020134228187926 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23811502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/24670e63-32e1-4c5d-82fe-0d0c45a4e165.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/24670e63-32e1-4c5d-82fe-0d0c45a4e165.json new file mode 100644 index 000000000..875fbc380 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/24670e63-32e1-4c5d-82fe-0d0c45a4e165.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-2b-base", + "id": "ibm-granite/granite-3.0-2b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.634 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/198d1441-1d13-468a-a998-c8cf9f1e7a57.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/198d1441-1d13-468a-a998-c8cf9f1e7a57.json new file mode 100644 index 000000000..3d240c282 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/198d1441-1d13-468a-a998-c8cf9f1e7a57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-2b-instruct", + "id": "ibm-granite/granite-3.0-2b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.634 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.514 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2814 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json deleted file mode 100644 index 9486af183..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-instruct/1762652580.206777", - "retrieved_timestamp": "1762652580.206777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-2b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-2b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.634 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513977357854936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44119772062630297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35148958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2814162234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/e9eb1499-835c-4a70-b531-4be5a9718c34.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/e9eb1499-835c-4a70-b531-4be5a9718c34.json new file mode 100644 index 000000000..226473a3b --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/e9eb1499-835c-4a70-b531-4be5a9718c34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-3b-a800m-base", + "id": "ibm-granite/granite-3.0-3b-a800m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 3.374 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2732 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3667 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1891 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json deleted file mode 100644 index e1fe1a20c..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-base/1762652580.20698", - "retrieved_timestamp": "1762652580.20698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-3b-a800m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-3b-a800m-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 3.374 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2732261510569733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36674974971308566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907912234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json deleted file mode 100644 index 806fb5851..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-instruct/1762652580.2071838", - "retrieved_timestamp": "1762652580.2071848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-3b-a800m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-3b-a800m-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 3.374 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298217618142085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37527805291733446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21517619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/b1fd95ad-767d-4c13-a936-00b08c74ca3d.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/b1fd95ad-767d-4c13-a936-00b08c74ca3d.json new file mode 100644 index 000000000..b7c39baf2 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/b1fd95ad-767d-4c13-a936-00b08c74ca3d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-3b-a800m-instruct", + "id": "ibm-granite/granite-3.0-3b-a800m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 3.374 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json deleted file mode 100644 index 0e7c7389f..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-base/1762652580.207386", - "retrieved_timestamp": "1762652580.207386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-8b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-8b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4583482936386566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4943760637365333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40813541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3312832446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/f87bd357-535e-4450-b01d-b41e1b7571e0.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/f87bd357-535e-4450-b01d-b41e1b7571e0.json new file mode 100644 index 000000000..fba09f2d6 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/f87bd357-535e-4450-b01d-b41e1b7571e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-8b-base", + "id": "ibm-granite/granite-3.0-8b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4944 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3313 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/300fd27e-4dce-441f-91da-f38bd14ffe5e.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/300fd27e-4dce-441f-91da-f38bd14ffe5e.json new file mode 100644 index 000000000..b5c6943a0 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/300fd27e-4dce-441f-91da-f38bd14ffe5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.0-8b-instruct", + "id": "ibm-granite/granite-3.0-8b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.531 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5192 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3457 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json deleted file mode 100644 index f3ef5212a..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-instruct/1762652580.207594", - "retrieved_timestamp": "1762652580.207595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-8b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-8b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309633993359841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191874631840226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1419939577039275 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34566156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json deleted file mode 100644 index 83cc7b873..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-base/1762652580.207968", - "retrieved_timestamp": "1762652580.2079701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-1b-a400m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-1b-a400m-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 1.335 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2519437315212525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298699546506724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/1fd9a2e5-856f-4303-8ac1-611311f3e7b5.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/1fd9a2e5-856f-4303-8ac1-611311f3e7b5.json new file mode 100644 index 000000000..81e9a2dd8 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/1fd9a2e5-856f-4303-8ac1-611311f3e7b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-1b-a400m-base", + "id": "ibm-granite/granite-3.1-1b-a400m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 1.335 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2519 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/4c34d5c6-af1b-4519-8d08-67bd837e9b97.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/4c34d5c6-af1b-4519-8d08-67bd837e9b97.json new file mode 100644 index 000000000..18f994f0f --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/4c34d5c6-af1b-4519-8d08-67bd837e9b97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-1b-a400m-instruct", + "id": "ibm-granite/granite-3.1-1b-a400m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 1.335 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3302 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1217 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json deleted file mode 100644 index d4008fc1d..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-instruct/1762652580.208256", - "retrieved_timestamp": "1762652580.208257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-1b-a400m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-1b-a400m-instruct", - "additional_details": { - "precision": "float16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 1.335 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46863987553025976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3279834385375178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33025 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12167553191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json deleted file mode 100644 index 00b447842..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-base/1762652580.208491", - "retrieved_timestamp": "1762652580.208492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-2b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-2b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.534 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35216115462528313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047188028918873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22506648936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/ddc27df7-1c4c-4563-92b2-5a39380423a8.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/ddc27df7-1c4c-4563-92b2-5a39380423a8.json new file mode 100644 index 000000000..d36351d16 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/ddc27df7-1c4c-4563-92b2-5a39380423a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-2b-base", + "id": "ibm-granite/granite-3.1-2b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.534 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2251 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/3e606ef8-9caa-43d4-81d6-8eae9936ab4c.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/3e606ef8-9caa-43d4-81d6-8eae9936ab4c.json new file mode 100644 index 000000000..8f8fd088d --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/3e606ef8-9caa-43d4-81d6-8eae9936ab4c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-2b-instruct", + "id": "ibm-granite/granite-3.1-2b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.534 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6286 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4409 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3605 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json deleted file mode 100644 index fc17e9519..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-instruct/1762652580.2087219", - "retrieved_timestamp": "1762652580.2087228", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-2b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-2b-instruct", - "additional_details": { - "precision": "float16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.534 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.628557782240012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44089858558056544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3605416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28191489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json deleted file mode 100644 index bc40f0874..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-base/1762652580.20895", - "retrieved_timestamp": "1762652580.208951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-3b-a800m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-3b-a800m-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 3.299 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2996294276962903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362822992347764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1792719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/b9053559-3b90-4de0-981a-dbb49db38eb5.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/b9053559-3b90-4de0-981a-dbb49db38eb5.json new file mode 100644 index 000000000..d42475062 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/b9053559-3b90-4de0-981a-dbb49db38eb5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-3b-a800m-base", + "id": "ibm-granite/granite-3.1-3b-a800m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 3.299 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2996 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3628 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1793 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json deleted file mode 100644 index de0d59eab..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-instruct/1762652580.2092001", - "retrieved_timestamp": "1762652580.2092009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-3b-a800m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-3b-a800m-instruct", - "additional_details": { - "precision": "float16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 3.299 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516462984880118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4009494521947192 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21476063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/cea89bc6-b1a1-4b67-a136-45e097563a5b.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/cea89bc6-b1a1-4b67-a136-45e097563a5b.json new file mode 100644 index 000000000..cd1998b51 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/cea89bc6-b1a1-4b67-a136-45e097563a5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-3b-a800m-instruct", + "id": "ibm-granite/granite-3.1-3b-a800m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 3.299 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4009 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2148 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json deleted file mode 100644 index 854452e59..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-base/1762652580.209538", - "retrieved_timestamp": "1762652580.2095392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-8b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-8b-base", - "additional_details": { - "precision": "float16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4221033524381973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4776956677111636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3922291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3232214095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/5eb16113-7d0d-47a0-91d8-ec7dab35efdd.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/5eb16113-7d0d-47a0-91d8-ec7dab35efdd.json new file mode 100644 index 000000000..0a47fdba2 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/5eb16113-7d0d-47a0-91d8-ec7dab35efdd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-8b-base", + "id": "ibm-granite/granite-3.1-8b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4777 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3232 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/45aa6545-d20a-4dfb-a8a6-01f2fd34c9f5.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/45aa6545-d20a-4dfb-a8a6-01f2fd34c9f5.json new file mode 100644 index 000000000..5014bf7fd --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/45aa6545-d20a-4dfb-a8a6-01f2fd34c9f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.1-8b-instruct", + "id": "ibm-granite/granite-3.1-8b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4707 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json deleted file mode 100644 index fea1f257a..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-instruct/1762652580.2098079", - "retrieved_timestamp": "1762652580.2098088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-8b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-8b-instruct", - "additional_details": { - "precision": "float16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207564816908026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364460433816018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47070833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3537234042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json b/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json deleted file mode 100644 index 5b221cd2a..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-2b-instruct/1762652580.2100549", - "retrieved_timestamp": "1762652580.2100558", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.2-2b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.2-2b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.534 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6151688630611223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43872707491212865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14425981873111782 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2783410904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/c94079d1-d8b1-4198-8129-8c5a11c310ca.json b/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/c94079d1-d8b1-4198-8129-8c5a11c310ca.json new file mode 100644 index 000000000..b97bdbfbb --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/c94079d1-d8b1-4198-8129-8c5a11c310ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-2b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.2-2b-instruct", + "id": "ibm-granite/granite-3.2-2b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.534 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6152 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1443 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3646 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2783 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json b/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json deleted file mode 100644 index db3c6057b..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-8b-instruct/1762652580.210291", - "retrieved_timestamp": "1762652580.2102919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-3.2-8b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.2-8b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401759656246116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35123005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/cb45306a-096c-4ed5-a028-6d720b26afe9.json b/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/cb45306a-096c-4ed5-a028-6d720b26afe9.json new file mode 100644 index 000000000..15fc669a2 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/cb45306a-096c-4ed5-a028-6d720b26afe9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-8b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-3.2-8b-instruct", + "id": "ibm-granite/granite-3.2-8b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json b/data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json deleted file mode 100644 index 2ab0c37cc..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-base/1762652580.2106082", - "retrieved_timestamp": "1762652580.210609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-7b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-7b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24142719096441884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34804372716106186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35548958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18342752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm-granite/granite-7b-base/f301908e-474b-4ba2-a873-610ca1b6c2bd.json b/data/hfopenllm_v2/ibm-granite/granite-7b-base/f301908e-474b-4ba2-a873-610ca1b6c2bd.json new file mode 100644 index 000000000..678678ab3 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-7b-base/f301908e-474b-4ba2-a873-610ca1b6c2bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-7b-base", + "id": "ibm-granite/granite-7b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2414 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.348 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1834 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/06f5865d-a62a-48da-b33f-486fe29e3685.json b/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/06f5865d-a62a-48da-b33f-486fe29e3685.json new file mode 100644 index 000000000..d82e7bb12 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/06f5865d-a62a-48da-b33f-486fe29e3685.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "granite-7b-instruct", + "id": "ibm-granite/granite-7b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2972 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2286 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json b/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json deleted file mode 100644 index c2ab15bb4..000000000 --- a/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-instruct/1762652580.2108219", - "retrieved_timestamp": "1762652580.2108219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm-granite/granite-7b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2972313461615181 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37229529603269523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2286402925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm/PowerLM-3b/4f952c51-91dc-446e-bda1-43ed66e1ca3e.json b/data/hfopenllm_v2/ibm/PowerLM-3b/4f952c51-91dc-446e-bda1-43ed66e1ca3e.json new file mode 100644 index 000000000..fb5d9a315 --- /dev/null +++ b/data/hfopenllm_v2/ibm/PowerLM-3b/4f952c51-91dc-446e-bda1-43ed66e1ca3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm_PowerLM-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PowerLM-3b", + "id": "ibm/PowerLM-3b", + "developer": "ibm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 3.512 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3563 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2016 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json b/data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json deleted file mode 100644 index ae64b452c..000000000 --- a/data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm_PowerLM-3b/1762652580.205445", - "retrieved_timestamp": "1762652580.205446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm/PowerLM-3b", - "developer": "ibm", - "inference_platform": "unknown", - "id": "ibm/PowerLM-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 3.512 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33212764354135915 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679456724439114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20162898936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json b/data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json deleted file mode 100644 index c864a9a7f..000000000 --- a/data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ibm_merlinite-7b/1762652580.2057128", - "retrieved_timestamp": "1762652580.205714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ibm/merlinite-7b", - "developer": "ibm", - "inference_platform": "unknown", - "id": "ibm/merlinite-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2498703440205322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50071326118705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44115624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068484042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/ibm/merlinite-7b/dcba3a6f-8f4f-49f6-af74-541de16be435.json b/data/hfopenllm_v2/ibm/merlinite-7b/dcba3a6f-8f4f-49f6-af74-541de16be435.json new file mode 100644 index 000000000..240a78e34 --- /dev/null +++ b/data/hfopenllm_v2/ibm/merlinite-7b/dcba3a6f-8f4f-49f6-af74-541de16be435.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ibm_merlinite-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merlinite-7b", + "id": "ibm/merlinite-7b", + "developer": "ibm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2499 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json b/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json deleted file mode 100644 index 2aa0fa851..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.15-02.10-RP/1762652580.211034", - "retrieved_timestamp": "1762652580.211034", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.15-02.10-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.15-02.10-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343355629729118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4976384736188401 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30659906914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/b5d39bcb-dab4-4880-9cb1-68dbd20a3ce5.json b/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/b5d39bcb-dab4-4880-9cb1-68dbd20a3ce5.json new file mode 100644 index 000000000..920868959 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/b5d39bcb-dab4-4880-9cb1-68dbd20a3ce5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.15-02.10-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.15-02.10-RP", + "id": "icefog72/Ice0.15-02.10-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4976 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/1e597e9b-4e75-4981-842b-dad6f1c15ed7.json b/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/1e597e9b-4e75-4981-842b-dad6f1c15ed7.json new file mode 100644 index 000000000..6d89ee60d --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/1e597e9b-4e75-4981-842b-dad6f1c15ed7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.16-02.10-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.16-02.10-RP", + "id": "icefog72/Ice0.16-02.10-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5069 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4946 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0589 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json b/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json deleted file mode 100644 index 2b60f1a81..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.16-02.10-RP/1762652580.211284", - "retrieved_timestamp": "1762652580.211284", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.16-02.10-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.16-02.10-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069083365470286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4945564313654156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/18752dc4-76d1-40dc-9f43-62b8087b7a88.json b/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/18752dc4-76d1-40dc-9f43-62b8087b7a88.json new file mode 100644 index 000000000..1e5cdb6bc --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/18752dc4-76d1-40dc-9f43-62b8087b7a88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.17-03.10-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.17-03.10-RP", + "id": "icefog72/Ice0.17-03.10-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3085 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json b/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json deleted file mode 100644 index e3a176c4a..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.17-03.10-RP/1762652580.211494", - "retrieved_timestamp": "1762652580.211495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.17-03.10-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.17-03.10-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123538876846767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006815748225494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30851063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json b/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json deleted file mode 100644 index 7976cc135..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.27-06.11-RP/1762652580.211702", - "retrieved_timestamp": "1762652580.211702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.27-06.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.27-06.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49182059158588104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111654648230625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43278125000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3154089095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/fa30c36e-20f1-41ee-a59d-0044f2b76dfb.json b/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/fa30c36e-20f1-41ee-a59d-0044f2b76dfb.json new file mode 100644 index 000000000..ce00faedd --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/fa30c36e-20f1-41ee-a59d-0044f2b76dfb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.27-06.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.27-06.11-RP", + "id": "icefog72/Ice0.27-06.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4918 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/5391ae8f-41b0-41cb-9365-b5cb7649c8b7.json b/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/5391ae8f-41b0-41cb-9365-b5cb7649c8b7.json new file mode 100644 index 000000000..8a1fccbff --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/5391ae8f-41b0-41cb-9365-b5cb7649c8b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.29-06.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.29-06.11-RP", + "id": "icefog72/Ice0.29-06.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json b/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json deleted file mode 100644 index c038d90ce..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.29-06.11-RP/1762652580.2119", - "retrieved_timestamp": "1762652580.211901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.29-06.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.29-06.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486050346414181 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087880173407883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4458958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30925864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json b/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json deleted file mode 100644 index fecbf0e55..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.31-08.11-RP/1762652580.212094", - "retrieved_timestamp": "1762652580.212095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.31-08.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.31-08.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5145768782386291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032134100285419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3130817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/a95ab4cf-456f-4b3d-9bab-2b755649758d.json b/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/a95ab4cf-456f-4b3d-9bab-2b755649758d.json new file mode 100644 index 000000000..b2b7ce69b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/a95ab4cf-456f-4b3d-9bab-2b755649758d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.31-08.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.31-08.11-RP", + "id": "icefog72/Ice0.31-08.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5146 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json b/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json deleted file mode 100644 index 75d9dbd57..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.32-10.11-RP/1762652580.2122939", - "retrieved_timestamp": "1762652580.2122948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.32-10.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.32-10.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49154576523623983 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5047695597611622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/9840baa9-2ddf-4dd9-b3b0-3ec3075089bc.json b/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/9840baa9-2ddf-4dd9-b3b0-3ec3075089bc.json new file mode 100644 index 000000000..c07a2e5fe --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/9840baa9-2ddf-4dd9-b3b0-3ec3075089bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.32-10.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.32-10.11-RP", + "id": "icefog72/Ice0.32-10.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4915 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/26ff113c-95ca-4716-83f7-4792b46be246.json b/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/26ff113c-95ca-4716-83f7-4792b46be246.json new file mode 100644 index 000000000..e450135ff --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/26ff113c-95ca-4716-83f7-4792b46be246.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34b-14.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.34b-14.11-RP", + "id": "icefog72/Ice0.34b-14.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5067 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.442 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json b/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json deleted file mode 100644 index 26bfba41c..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34b-14.11-RP/1762652580.2124958", - "retrieved_timestamp": "1762652580.212497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.34b-14.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.34b-14.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47620868185303883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5067195329696937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3125 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/285e1d08-15a0-4d8b-a844-e4cad923ea9b.json b/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/285e1d08-15a0-4d8b-a844-e4cad923ea9b.json new file mode 100644 index 000000000..8fc9467cf --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/285e1d08-15a0-4d8b-a844-e4cad923ea9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34n-14.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.34n-14.11-RP", + "id": "icefog72/Ice0.34n-14.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5091 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json b/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json deleted file mode 100644 index fd1e89765..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34n-14.11-RP/1762652580.2127092", - "retrieved_timestamp": "1762652580.21271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.34n-14.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.34n-14.11-RP", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47865663107222167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091090160356474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31241688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/0462269d-94a3-4991-9af5-e55592f344e5.json b/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/0462269d-94a3-4991-9af5-e55592f344e5.json new file mode 100644 index 000000000..11c77c634 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/0462269d-94a3-4991-9af5-e55592f344e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.37-18.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.37-18.11-RP", + "id": "icefog72/Ice0.37-18.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4972 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json b/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json deleted file mode 100644 index 7ae8045fb..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.37-18.11-RP/1762652580.212915", - "retrieved_timestamp": "1762652580.212916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.37-18.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.37-18.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4972162750391184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084310833712639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43392708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json b/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json deleted file mode 100644 index 4f94974a3..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.38-19.11-RP/1762652580.213116", - "retrieved_timestamp": "1762652580.213117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.38-19.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.38-19.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44033830237104216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510108216407024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31399601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/c47c4cd6-90b6-42df-a3b9-4fc8f1b3c980.json b/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/c47c4cd6-90b6-42df-a3b9-4fc8f1b3c980.json new file mode 100644 index 000000000..d940d4450 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/c47c4cd6-90b6-42df-a3b9-4fc8f1b3c980.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.38-19.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.38-19.11-RP", + "id": "icefog72/Ice0.38-19.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/0fecafe4-f8f0-4f97-ab2d-589a3856e1af.json b/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/0fecafe4-f8f0-4f97-ab2d-589a3856e1af.json new file mode 100644 index 000000000..07a2c60d3 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/0fecafe4-f8f0-4f97-ab2d-589a3856e1af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.39-19.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.39-19.11-RP", + "id": "icefog72/Ice0.39-19.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json b/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json deleted file mode 100644 index 9d995223b..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.39-19.11-RP/1762652580.2133162", - "retrieved_timestamp": "1762652580.2133162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.39-19.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.39-19.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47565902915375646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092985137525424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126662234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/4b5529b9-0800-4cd6-b720-a905ab5e6c9a.json b/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/4b5529b9-0800-4cd6-b720-a905ab5e6c9a.json new file mode 100644 index 000000000..ef805d1b3 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/4b5529b9-0800-4cd6-b720-a905ab5e6c9a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.40-20.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.40-20.11-RP", + "id": "icefog72/Ice0.40-20.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json b/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json deleted file mode 100644 index 49d0a5601..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.40-20.11-RP/1762652580.2136111", - "retrieved_timestamp": "1762652580.213614", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.40-20.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.40-20.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762585495374495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509308586549064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44459374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30992353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json b/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json deleted file mode 100644 index 206293861..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.41-22.11-RP/1762652580.213999", - "retrieved_timestamp": "1762652580.2140002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.41-22.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.41-22.11-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620451513096362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4723318624775949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45597916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26180186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/84783e4d-5eed-474d-9463-a01a0890850e.json b/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/84783e4d-5eed-474d-9463-a01a0890850e.json new file mode 100644 index 000000000..baa053681 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/84783e4d-5eed-474d-9463-a01a0890850e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.41-22.11-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.41-22.11-RP", + "id": "icefog72/Ice0.41-22.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4723 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.456 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2618 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json b/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json deleted file mode 100644 index 1032ae0de..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50-16.01-RP/1762652580.214273", - "retrieved_timestamp": "1762652580.214274", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.50-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.50-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43848987353555235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49804682910006176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30693151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/d9fe39c5-24a5-4240-bfc9-59860fcb3911.json b/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/d9fe39c5-24a5-4240-bfc9-59860fcb3911.json new file mode 100644 index 000000000..eb81cb8cd --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/d9fe39c5-24a5-4240-bfc9-59860fcb3911.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.50-16.01-RP", + "id": "icefog72/Ice0.50-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4385 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3069 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/2ddf850e-36dc-41b2-92da-e2b45d1544c6.json b/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/2ddf850e-36dc-41b2-92da-e2b45d1544c6.json new file mode 100644 index 000000000..bcbafb408 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/2ddf850e-36dc-41b2-92da-e2b45d1544c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50.1-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.50.1-16.01-RP", + "id": "icefog72/Ice0.50.1-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4829 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json b/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json deleted file mode 100644 index 9342e7623..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50.1-16.01-RP/1762652580.214615", - "retrieved_timestamp": "1762652580.214617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.50.1-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.50.1-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4829031414424837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107472937598788 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43274999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3132480053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json b/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json deleted file mode 100644 index 7e96a8361..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51-16.01-RP/1762652580.214901", - "retrieved_timestamp": "1762652580.214902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.51-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.51-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4430610779398662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5044464794803141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44366666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/b10a9284-fa5e-4a4e-8240-edc98cea6d9c.json b/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/b10a9284-fa5e-4a4e-8240-edc98cea6d9c.json new file mode 100644 index 000000000..a158fdd3f --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/b10a9284-fa5e-4a4e-8240-edc98cea6d9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.51-16.01-RP", + "id": "icefog72/Ice0.51-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4437 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/2c51bd1d-ebe8-4de9-9749-5f42f7ba3d5a.json b/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/2c51bd1d-ebe8-4de9-9749-5f42f7ba3d5a.json new file mode 100644 index 000000000..1cd3c840d --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/2c51bd1d-ebe8-4de9-9749-5f42f7ba3d5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51.1-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.51.1-16.01-RP", + "id": "icefog72/Ice0.51.1-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4573 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json b/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json deleted file mode 100644 index 8e896753c..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51.1-16.01-RP/1762652580.215148", - "retrieved_timestamp": "1762652580.2151492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.51.1-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.51.1-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4573243438520902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5121083021452105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43938541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/425e6f1e-50dd-444f-b0da-5a0c47d5bf06.json b/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/425e6f1e-50dd-444f-b0da-5a0c47d5bf06.json new file mode 100644 index 000000000..10d868dab --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/425e6f1e-50dd-444f-b0da-5a0c47d5bf06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.52-16.01-RP", + "id": "icefog72/Ice0.52-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4503 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.308 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json b/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json deleted file mode 100644 index 6efbe282f..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52-16.01-RP/1762652580.21541", - "retrieved_timestamp": "1762652580.215412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.52-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.52-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503051902285935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504677500406742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43960416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3080119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json b/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json deleted file mode 100644 index 7d9de670a..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52.1-16.01-RP/1762652580.21567", - "retrieved_timestamp": "1762652580.215671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.52.1-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.52.1-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45492626231731803 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510648341878344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43938541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31050531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/7e1fcf4e-9f64-4112-934c-4808f07d32b2.json b/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/7e1fcf4e-9f64-4112-934c-4808f07d32b2.json new file mode 100644 index 000000000..51094c005 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/7e1fcf4e-9f64-4112-934c-4808f07d32b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52.1-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.52.1-16.01-RP", + "id": "icefog72/Ice0.52.1-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5106 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json b/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json deleted file mode 100644 index b5c35e294..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.53-16.01-RP/1762652580.215963", - "retrieved_timestamp": "1762652580.2159638", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.53-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.53-16.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4741352943523185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101675133484068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43274999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31299867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/d3666566-09dc-4d53-9996-2301c6fb2721.json b/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/d3666566-09dc-4d53-9996-2301c6fb2721.json new file mode 100644 index 000000000..2b83a20cc --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/d3666566-09dc-4d53-9996-2301c6fb2721.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.53-16.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.53-16.01-RP", + "id": "icefog72/Ice0.53-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4741 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.313 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/36e5efb9-e3f0-4903-a9f1-3d51453bfdc4.json b/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/36e5efb9-e3f0-4903-a9f1-3d51453bfdc4.json new file mode 100644 index 000000000..aac00db76 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/36e5efb9-e3f0-4903-a9f1-3d51453bfdc4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.54-17.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.54-17.01-RP", + "id": "icefog72/Ice0.54-17.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2326 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json b/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json deleted file mode 100644 index 52940b2df..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.54-17.01-RP/1762652580.2162719", - "retrieved_timestamp": "1762652580.2162728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.54-17.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.54-17.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378903531518593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4853448809638454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48741666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23262965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json b/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json deleted file mode 100644 index 77936a744..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.55-17.01-RP/1762652580.2165911", - "retrieved_timestamp": "1762652580.2165918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.55-17.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.55-17.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496067101956143 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076567509425027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4725 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2657912234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a6dba337-81d2-40c6-89c2-aee6de82282e.json b/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a6dba337-81d2-40c6-89c2-aee6de82282e.json new file mode 100644 index 000000000..d93982f8a --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a6dba337-81d2-40c6-89c2-aee6de82282e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.55-17.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.55-17.01-RP", + "id": "icefog72/Ice0.55-17.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json b/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json deleted file mode 100644 index 7eff12087..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.57-17.01-RP/1762652580.216822", - "retrieved_timestamp": "1762652580.216822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.57-17.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.57-17.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151763986223221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064080420224116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/e44b8d9a-f270-45c8-b126-6a8911c35436.json b/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/e44b8d9a-f270-45c8-b126-6a8911c35436.json new file mode 100644 index 000000000..100730827 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/e44b8d9a-f270-45c8-b126-6a8911c35436.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.57-17.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.57-17.01-RP", + "id": "icefog72/Ice0.57-17.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5152 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/44d5e1ac-45d5-42aa-b9fa-f18112cf6676.json b/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/44d5e1ac-45d5-42aa-b9fa-f18112cf6676.json new file mode 100644 index 000000000..79e0d8058 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/44d5e1ac-45d5-42aa-b9fa-f18112cf6676.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60-18.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.60-18.01-RP", + "id": "icefog72/Ice0.60-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.467 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2837 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json b/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json deleted file mode 100644 index 7a9ed0303..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60-18.01-RP/1762652580.217043", - "retrieved_timestamp": "1762652580.2170439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.60-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.60-18.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374329002601985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093724614980669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46704166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28366023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/4246401d-9049-4c83-83d4-e2d9efa4dded.json b/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/4246401d-9049-4c83-83d4-e2d9efa4dded.json new file mode 100644 index 000000000..a75134cde --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/4246401d-9049-4c83-83d4-e2d9efa4dded.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60.1-18.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.60.1-18.01-RP", + "id": "icefog72/Ice0.60.1-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5188 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4498 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2914 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json b/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json deleted file mode 100644 index da23cb0d8..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60.1-18.01-RP/1762652580.217258", - "retrieved_timestamp": "1762652580.217259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.60.1-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.60.1-18.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5187735209244804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119675522804026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4497708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2913896276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json b/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json deleted file mode 100644 index 8a4506f0d..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.61-18.01-RP/1762652580.2174668", - "retrieved_timestamp": "1762652580.2174678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.61-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.61-18.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441273598496433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104839613346842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27086103723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/26c4785a-0caf-4b01-be5d-1e421bfeb698.json b/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/26c4785a-0caf-4b01-be5d-1e421bfeb698.json new file mode 100644 index 000000000..de443599c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/26c4785a-0caf-4b01-be5d-1e421bfeb698.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.61-18.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.61-18.01-RP", + "id": "icefog72/Ice0.61-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5441 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4697 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json b/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json deleted file mode 100644 index edd4b5335..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62-18.01-RP/1762652580.21767", - "retrieved_timestamp": "1762652580.217671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.62-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.62-18.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536733644507684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103327208197285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4537708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28773271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/cc9b9a25-18f9-4cc3-a756-3975a3a3be7d.json b/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/cc9b9a25-18f9-4cc3-a756-3975a3a3be7d.json new file mode 100644 index 000000000..1c76bab97 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/cc9b9a25-18f9-4cc3-a756-3975a3a3be7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62-18.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.62-18.01-RP", + "id": "icefog72/Ice0.62-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2877 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json b/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json deleted file mode 100644 index b83e89481..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62.1-24.01-RP/1762652580.2178729", - "retrieved_timestamp": "1762652580.2178729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.62.1-24.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.62.1-24.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181740005407873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5108967760246949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45510416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28706781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/b4edb7f5-a675-4627-af96-7ed0909da1e5.json b/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/b4edb7f5-a675-4627-af96-7ed0909da1e5.json new file mode 100644 index 000000000..da0bcab1f --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/b4edb7f5-a675-4627-af96-7ed0909da1e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62.1-24.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.62.1-24.01-RP", + "id": "icefog72/Ice0.62.1-24.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4551 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2871 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/461b6f40-6f19-48b1-857e-f0fb37f929f9.json b/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/461b6f40-6f19-48b1-857e-f0fb37f929f9.json new file mode 100644 index 000000000..3cff42e22 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/461b6f40-6f19-48b1-857e-f0fb37f929f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64-24.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.64-24.01-RP", + "id": "icefog72/Ice0.64-24.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5441 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2933 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json b/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json deleted file mode 100644 index 8b32e9405..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64-24.01-RP/1762652580.218076", - "retrieved_timestamp": "1762652580.218076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.64-24.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.64-24.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440774921652327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059610114856247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29330119680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json b/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json deleted file mode 100644 index 245275abe..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64.1-24.01-RP/1762652580.218272", - "retrieved_timestamp": "1762652580.218272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.64.1-24.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.64.1-24.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446770125489258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059610114856247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29330119680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/e924270d-a655-4093-91b2-f73b7f12eefd.json b/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/e924270d-a655-4093-91b2-f73b7f12eefd.json new file mode 100644 index 000000000..63500c106 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/e924270d-a655-4093-91b2-f73b7f12eefd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64.1-24.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.64.1-24.01-RP", + "id": "icefog72/Ice0.64.1-24.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2933 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/af8905e0-e969-45bd-8e09-e7316fff0914.json b/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/af8905e0-e969-45bd-8e09-e7316fff0914.json new file mode 100644 index 000000000..6575a19cf --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/af8905e0-e969-45bd-8e09-e7316fff0914.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.65-25.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.65-25.01-RP", + "id": "icefog72/Ice0.65-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2997 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json b/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json deleted file mode 100644 index 12d8dd287..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.65-25.01-RP/1762652580.2184708", - "retrieved_timestamp": "1762652580.218472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.65-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.65-25.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029366525264077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5095976254774931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4339583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29970079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json b/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json deleted file mode 100644 index 61983c8d9..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.66-25.01-RP/1762652580.2186701", - "retrieved_timestamp": "1762652580.2186701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.66-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.66-25.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.532487134137422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128983540188711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44344791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3039394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/e92a6d31-2277-4093-8fae-b3dfaa2d47dd.json b/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/e92a6d31-2277-4093-8fae-b3dfaa2d47dd.json new file mode 100644 index 000000000..f0e31f665 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/e92a6d31-2277-4093-8fae-b3dfaa2d47dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.66-25.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.66-25.01-RP", + "id": "icefog72/Ice0.66-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5325 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5129 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3039 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/47472cd9-36d3-4074-83d4-af53b9c23758.json b/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/47472cd9-36d3-4074-83d4-af53b9c23758.json new file mode 100644 index 000000000..46ca836b5 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/47472cd9-36d3-4074-83d4-af53b9c23758.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.67-25.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.67-25.01-RP", + "id": "icefog72/Ice0.67-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5113 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3097 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json b/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json deleted file mode 100644 index 069c11ba5..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.67-25.01-RP/1762652580.21887", - "retrieved_timestamp": "1762652580.218871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.67-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.67-25.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536134124123991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5112894150790012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42788541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30967420212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/b922f4e1-1fd9-4a32-94ce-4784430cef51.json b/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/b922f4e1-1fd9-4a32-94ce-4784430cef51.json new file mode 100644 index 000000000..02b0eca7c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/b922f4e1-1fd9-4a32-94ce-4784430cef51.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.68-25.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.68-25.01-RP", + "id": "icefog72/Ice0.68-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5514 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.513 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json b/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json deleted file mode 100644 index 9938ddd88..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.68-25.01-RP/1762652580.2190669", - "retrieved_timestamp": "1762652580.2190678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.68-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.68-25.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5513714721383707 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5130058094823416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44456249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/5bb2e77f-7709-4eb8-bd08-3c8da4a56310.json b/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/5bb2e77f-7709-4eb8-bd08-3c8da4a56310.json new file mode 100644 index 000000000..87263c5f0 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/5bb2e77f-7709-4eb8-bd08-3c8da4a56310.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.69-25.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.69-25.01-RP", + "id": "icefog72/Ice0.69-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5438 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2965 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json b/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json deleted file mode 100644 index 8735bb0bf..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.69-25.01-RP/1762652580.219263", - "retrieved_timestamp": "1762652580.219264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.69-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.69-25.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437527981311808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097683665599672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4485625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29654255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/35937213-bb16-4935-9d92-9fa8fd61aac3.json b/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/35937213-bb16-4935-9d92-9fa8fd61aac3.json new file mode 100644 index 000000000..88704a4a5 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/35937213-bb16-4935-9d92-9fa8fd61aac3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.7-29.09-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.7-29.09-RP", + "id": "icefog72/Ice0.7-29.09-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5176 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4238 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json b/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json deleted file mode 100644 index 04253b009..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.7-29.09-RP/1762652580.2194638", - "retrieved_timestamp": "1762652580.219465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.7-29.09-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.7-29.09-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175744801570943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5047661992357916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4237916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126662234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/04122d1b-929d-439c-bb8d-f08508f7a00e.json b/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/04122d1b-929d-439c-bb8d-f08508f7a00e.json new file mode 100644 index 000000000..57317f5c1 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/04122d1b-929d-439c-bb8d-f08508f7a00e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70-25.01-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.70-25.01-RP", + "id": "icefog72/Ice0.70-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5498 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5136 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4512 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2996 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json b/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json deleted file mode 100644 index 1d447e3ab..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70-25.01-RP/1762652580.2196732", - "retrieved_timestamp": "1762652580.219674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.70-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.70-25.01-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549797869652522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513632436415875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45119791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2996176861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/03beb242-2628-4ea0-a2f3-c3ec43d379de.json b/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/03beb242-2628-4ea0-a2f3-c3ec43d379de.json new file mode 100644 index 000000000..72031f33b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/03beb242-2628-4ea0-a2f3-c3ec43d379de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70.1-01.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.70.1-01.02-RP", + "id": "icefog72/Ice0.70.1-01.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json b/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json deleted file mode 100644 index 8881b0c9c..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70.1-01.02-RP/1762652580.219877", - "retrieved_timestamp": "1762652580.219877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.70.1-01.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.70.1-01.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069582042314393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059798926804829 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2748503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/46d55b7b-1972-4cb0-97ca-e04d306282a7.json b/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/46d55b7b-1972-4cb0-97ca-e04d306282a7.json new file mode 100644 index 000000000..6f985e2ba --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/46d55b7b-1972-4cb0-97ca-e04d306282a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.73-01.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.73-01.02-RP", + "id": "icefog72/Ice0.73-01.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json b/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json deleted file mode 100644 index d044a764d..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.73-01.02-RP/1762652580.220075", - "retrieved_timestamp": "1762652580.220076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.73-01.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.73-01.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.529164838184905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103425890792322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46639583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27019614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/32730d82-cfac-481f-9a22-9cbe40646218.json b/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/32730d82-cfac-481f-9a22-9cbe40646218.json new file mode 100644 index 000000000..db1bba27d --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/32730d82-cfac-481f-9a22-9cbe40646218.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.74-02.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.74-02.02-RP", + "id": "icefog72/Ice0.74-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2935 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4646 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json b/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json deleted file mode 100644 index 2f7afa0bd..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.74-02.02-RP/1762652580.220269", - "retrieved_timestamp": "1762652580.2202702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.74-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.74-02.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935344884905384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4646134965075064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21434507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json b/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json deleted file mode 100644 index 3106131ee..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.76-02.02-RP/1762652580.220735", - "retrieved_timestamp": "1762652580.220737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.76-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.76-02.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45290274250100837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5085610407875073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43616666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2652094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/a290a75f-753b-489d-87a2-ce0637c09f41.json b/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/a290a75f-753b-489d-87a2-ce0637c09f41.json new file mode 100644 index 000000000..9c170c9cd --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/a290a75f-753b-489d-87a2-ce0637c09f41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.76-02.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.76-02.02-RP", + "id": "icefog72/Ice0.76-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5086 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2652 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json b/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json deleted file mode 100644 index 612ed641a..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.77-02.02-RP/1762652580.221007", - "retrieved_timestamp": "1762652580.2210078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.77-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.77-02.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309633993359841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5109257300160749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4765 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29986702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/54032eb0-c4cd-4c76-be2e-f0c81bd26365.json b/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/54032eb0-c4cd-4c76-be2e-f0c81bd26365.json new file mode 100644 index 000000000..d50d5a736 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/54032eb0-c4cd-4c76-be2e-f0c81bd26365.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.77-02.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.77-02.02-RP", + "id": "icefog72/Ice0.77-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.531 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4765 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2999 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/73b59506-cc1d-413c-a28b-d25e0e6bf413.json b/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/73b59506-cc1d-413c-a28b-d25e0e6bf413.json new file mode 100644 index 000000000..cbf1db27b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/73b59506-cc1d-413c-a28b-d25e0e6bf413.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.78-02.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.78-02.02-RP", + "id": "icefog72/Ice0.78-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4053 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5002 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2955 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json b/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json deleted file mode 100644 index 7963da79b..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.78-02.02-RP/1762652580.221266", - "retrieved_timestamp": "1762652580.221267", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.78-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.78-02.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405292401937969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5002126961381052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.468625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json b/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json deleted file mode 100644 index cf628629d..000000000 --- a/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.80-03.02-RP/1762652580.2214909", - "retrieved_timestamp": "1762652580.221492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/Ice0.80-03.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.80-03.02-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516462984880118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097962218679292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4923125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2912234042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/bea2dcd6-4772-4aac-bcbc-4802cfb33495.json b/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/bea2dcd6-4772-4aac-bcbc-4802cfb33495.json new file mode 100644 index 000000000..1c2303ea6 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/bea2dcd6-4772-4aac-bcbc-4802cfb33495.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.80-03.02-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ice0.80-03.02-RP", + "id": "icefog72/Ice0.80-03.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4923 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json b/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json deleted file mode 100644 index 505829224..000000000 --- a/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceCocoaRP-7b/1762652580.2217228", - "retrieved_timestamp": "1762652580.2217238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceCocoaRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceCocoaRP-7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962421929369628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4937902147076245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3098404255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/66275215-28e6-42bc-bc22-5d152682ce53.json b/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/66275215-28e6-42bc-bc22-5d152682ce53.json new file mode 100644 index 000000000..481d7723a --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/66275215-28e6-42bc-bc22-5d152682ce53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceCocoaRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceCocoaRP-7b", + "id": "icefog72/IceCocoaRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/9015365c-400b-4fa3-85f2-a1033b030cf7.json b/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/9015365c-400b-4fa3-85f2-a1033b030cf7.json new file mode 100644 index 000000000..dd02fc052 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/9015365c-400b-4fa3-85f2-a1033b030cf7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceCoffeeRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceCoffeeRP-7b", + "id": "icefog72/IceCoffeeRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4959 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4889 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2975 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json b/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json deleted file mode 100644 index 187ee6f4c..000000000 --- a/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceCoffeeRP-7b/1762652580.2220101", - "retrieved_timestamp": "1762652580.2220109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceCoffeeRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceCoffeeRP-7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4959174989029109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48887216244327214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2974567819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json b/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json deleted file mode 100644 index b9c880773..000000000 --- a/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkByFrankensteinV3RP/1762652580.222236", - "retrieved_timestamp": "1762652580.222236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceDrinkByFrankensteinV3RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrinkByFrankensteinV3RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4974911013887596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4832523723413275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/55d52914-0904-4e6e-8b37-c22b06f5f2bf.json b/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/55d52914-0904-4e6e-8b37-c22b06f5f2bf.json new file mode 100644 index 000000000..ccf1b184c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/55d52914-0904-4e6e-8b37-c22b06f5f2bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkByFrankensteinV3RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceDrinkByFrankensteinV3RP", + "id": "icefog72/IceDrinkByFrankensteinV3RP", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4975 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4833 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2927 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/3677260a-2fd5-41bf-9010-f1b31cedacbc.json b/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/3677260a-2fd5-41bf-9010-f1b31cedacbc.json new file mode 100644 index 000000000..24025b247 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/3677260a-2fd5-41bf-9010-f1b31cedacbc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceDrinkNameGoesHereRP-7b-Model_Stock", + "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4067 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json b/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json deleted file mode 100644 index ed7d156d7..000000000 --- a/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/1762652580.2224698", - "retrieved_timestamp": "1762652580.2224698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49684171332065585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46578646938927254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2816655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json b/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json deleted file mode 100644 index 5f0226fc9..000000000 --- a/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/1762652580.2227032", - "retrieved_timestamp": "1762652580.2227042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5130032757527804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.502625425089929 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3064328457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/fc54f87a-2e4a-4f3f-b407-e268c4487d16.json b/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/fc54f87a-2e4a-4f3f-b407-e268c4487d16.json new file mode 100644 index 000000000..2746a283f --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/fc54f87a-2e4a-4f3f-b407-e268c4487d16.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceDrinkNameNotFoundRP-7b-Model_Stock", + "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.513 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3064 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/8d893736-1707-4c0b-860d-16c62ec26d78.json b/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/8d893736-1707-4c0b-860d-16c62ec26d78.json new file mode 100644 index 000000000..fdf69bdb5 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/8d893736-1707-4c0b-860d-16c62ec26d78.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkCherryRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceDrunkCherryRP-7b", + "id": "icefog72/IceDrunkCherryRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4898 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4292 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3009 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json b/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json deleted file mode 100644 index 30d18f2b2..000000000 --- a/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkCherryRP-7b/1762652580.222923", - "retrieved_timestamp": "1762652580.222924", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceDrunkCherryRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrunkCherryRP-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48982255969715904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4846629039263151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4291875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3009474734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json b/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json deleted file mode 100644 index cf1259e5a..000000000 --- a/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkenCherryRP-7b/1762652580.223197", - "retrieved_timestamp": "1762652580.223207", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceDrunkenCherryRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrunkenCherryRP-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762585495374495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509308586549064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44459374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30992353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/d3d2728f-74bf-4196-a909-43797d8b628a.json b/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/d3d2728f-74bf-4196-a909-43797d8b628a.json new file mode 100644 index 000000000..2dca8d588 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/d3d2728f-74bf-4196-a909-43797d8b628a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkenCherryRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceDrunkenCherryRP-7b", + "id": "icefog72/IceDrunkenCherryRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json b/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json deleted file mode 100644 index 08a05dc9b..000000000 --- a/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceEspressoRPv2-7b/1762652580.223459", - "retrieved_timestamp": "1762652580.2234602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceEspressoRPv2-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceEspressoRPv2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977160600539901 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054890156350785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43306249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3061003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ed241e67-8718-48be-a6e8-19e295a2b5cd.json b/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ed241e67-8718-48be-a6e8-19e295a2b5cd.json new file mode 100644 index 000000000..1ca2a66f9 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ed241e67-8718-48be-a6e8-19e295a2b5cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceEspressoRPv2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceEspressoRPv2-7b", + "id": "icefog72/IceEspressoRPv2-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4977 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5055 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4331 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3061 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/05aafad3-e07a-453b-a70b-f18fbd4eb218.json b/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/05aafad3-e07a-453b-a70b-f18fbd4eb218.json new file mode 100644 index 000000000..751ec205c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/05aafad3-e07a-453b-a70b-f18fbd4eb218.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceLemonTeaRP-32k-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceLemonTeaRP-32k-7b", + "id": "icefog72/IceLemonTeaRP-32k-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4997 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json b/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json deleted file mode 100644 index 27b3dcd30..000000000 --- a/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceLemonTeaRP-32k-7b/1762652580.2236779", - "retrieved_timestamp": "1762652580.223679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceLemonTeaRP-32k-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceLemonTeaRP-32k-7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212214701436633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49973852418379305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json b/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json deleted file mode 100644 index 787a38b2b..000000000 --- a/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceMartiniRP-7b/1762652580.223922", - "retrieved_timestamp": "1762652580.223923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceMartiniRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceMartiniRP-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5044603873278457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4972421837639585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4344895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3073470744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/f79ac32e-ab83-40c3-9c18-35623f5ae1d4.json b/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/f79ac32e-ab83-40c3-9c18-35623f5ae1d4.json new file mode 100644 index 000000000..e94d36e4e --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/f79ac32e-ab83-40c3-9c18-35623f5ae1d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceMartiniRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceMartiniRP-7b", + "id": "icefog72/IceMartiniRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4972 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4345 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3073 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json b/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json deleted file mode 100644 index 7f3356be0..000000000 --- a/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceNalyvkaRP-7b/1762652580.224114", - "retrieved_timestamp": "1762652580.224115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceNalyvkaRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceNalyvkaRP-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549797869652522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513632436415875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45119791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2996176861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/cec76b15-1069-4d37-b8bc-74dde28101f6.json b/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/cec76b15-1069-4d37-b8bc-74dde28101f6.json new file mode 100644 index 000000000..f70b98ac1 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/cec76b15-1069-4d37-b8bc-74dde28101f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceNalyvkaRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceNalyvkaRP-7b", + "id": "icefog72/IceNalyvkaRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5498 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5136 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4512 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2996 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json b/data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json deleted file mode 100644 index 342c661e7..000000000 --- a/data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeRP-7b/1762652580.2243059", - "retrieved_timestamp": "1762652580.224307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceSakeRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeRP-7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227950726295119 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119287057484642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41300000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceSakeRP-7b/e4ac0d0c-65ea-4b43-bb4b-7371c6cd5d61.json b/data/hfopenllm_v2/icefog72/IceSakeRP-7b/e4ac0d0c-65ea-4b43-bb4b-7371c6cd5d61.json new file mode 100644 index 000000000..70a43f1aa --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeRP-7b/e4ac0d0c-65ea-4b43-bb4b-7371c6cd5d61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeRP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceSakeRP-7b", + "id": "icefog72/IceSakeRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5228 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.413 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3177 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json b/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json deleted file mode 100644 index 8f1ad1b71..000000000 --- a/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV4RP-7b/1762652580.224551", - "retrieved_timestamp": "1762652580.224552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceSakeV4RP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeV4RP-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4634192830578421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4929557826908731 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40819791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31025598404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/f8d629bf-df0b-4c6a-8c18-17dda002b089.json b/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/f8d629bf-df0b-4c6a-8c18-17dda002b089.json new file mode 100644 index 000000000..620f3fe56 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/f8d629bf-df0b-4c6a-8c18-17dda002b089.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV4RP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceSakeV4RP-7b", + "id": "icefog72/IceSakeV4RP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4634 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4082 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/6739d8e3-f4bd-4fd5-98f3-887f5ed3f9c0.json b/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/6739d8e3-f4bd-4fd5-98f3-887f5ed3f9c0.json new file mode 100644 index 000000000..477e0dbce --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/6739d8e3-f4bd-4fd5-98f3-887f5ed3f9c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV6RP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceSakeV6RP-7b", + "id": "icefog72/IceSakeV6RP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4976 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json b/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json deleted file mode 100644 index 317a9a0c1..000000000 --- a/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV6RP-7b/1762652580.224776", - "retrieved_timestamp": "1762652580.224777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceSakeV6RP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeV6RP-7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032613465604596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49760336362566354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42001041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093417553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/a51722f4-29f4-47a5-acba-4c8b5355551b.json b/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/a51722f4-29f4-47a5-acba-4c8b5355551b.json new file mode 100644 index 000000000..7a5af5dc8 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/a51722f4-29f4-47a5-acba-4c8b5355551b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV8RP-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceSakeV8RP-7b", + "id": "icefog72/IceSakeV8RP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6086 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.301 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json b/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json deleted file mode 100644 index 5b6f3efb8..000000000 --- a/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV8RP-7b/1762652580.2249868", - "retrieved_timestamp": "1762652580.224988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceSakeV8RP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeV8RP-7b", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6085741388404988 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48847141337960176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.301030585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/06d0a21f-f6e4-4ca9-a679-8c4502aaaad1.json b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/06d0a21f-f6e4-4ca9-a679-8c4502aaaad1.json new file mode 100644 index 000000000..bf984e1a4 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/06d0a21f-f6e4-4ca9-a679-8c4502aaaad1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceTea21EnergyDrinkRPV13-DPOv3.5", + "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4871 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2498 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json deleted file mode 100644 index 7f19401d4..000000000 --- a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/1762652580.2254012", - "retrieved_timestamp": "1762652580.225402", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48709978412833504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399660013109026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24983377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/04a4dcc9-3784-4aea-9faf-9db49c2e4c43.json b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/04a4dcc9-3784-4aea-9faf-9db49c2e4c43.json new file mode 100644 index 000000000..87ff25f1f --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/04a4dcc9-3784-4aea-9faf-9db49c2e4c43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IceTea21EnergyDrinkRPV13-DPOv3", + "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", + "developer": "icefog72", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3056 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json deleted file mode 100644 index ddb6c82b8..000000000 --- a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/1762652580.225198", - "retrieved_timestamp": "1762652580.2251992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263423272472595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5019587584232624 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30560172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/ifable/gemma-2-Ifable-9B/e4668365-d3dd-4996-9bb1-5b4e6f510264.json b/data/hfopenllm_v2/ifable/gemma-2-Ifable-9B/e4668365-d3dd-4996-9bb1-5b4e6f510264.json new file mode 100644 index 000000000..186f9833c --- /dev/null +++ b/data/hfopenllm_v2/ifable/gemma-2-Ifable-9B/e4668365-d3dd-4996-9bb1-5b4e6f510264.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ifable_gemma-2-Ifable-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-Ifable-9B", + "id": "ifable/gemma-2-Ifable-9B", + "developer": "ifable", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2984 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5866 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4053 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/4d743678-e14d-4866-b1bf-0d660787847b.json b/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/4d743678-e14d-4866-b1bf-0d660787847b.json new file mode 100644 index 000000000..4ae435b53 --- /dev/null +++ b/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/4d743678-e14d-4866-b1bf-0d660787847b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ilsp_Llama-Krikri-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Krikri-8B-Instruct", + "id": "ilsp/Llama-Krikri-8B-Instruct", + "developer": "ilsp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.202 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6079 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3313 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json b/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json deleted file mode 100644 index 7e1f65583..000000000 --- a/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ilsp_Llama-Krikri-8B-Instruct/1762652580.225861", - "retrieved_timestamp": "1762652580.225861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ilsp/Llama-Krikri-8B-Instruct", - "developer": "ilsp", - "inference_platform": "unknown", - "id": "ilsp/Llama-Krikri-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.202 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078748830879843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504664191645287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3312832446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json b/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json deleted file mode 100644 index 66746c1f3..000000000 --- a/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/inflatebot_MN-12B-Mag-Mell-R1/1762652580.2261078", - "retrieved_timestamp": "1762652580.226109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "inflatebot/MN-12B-Mag-Mell-R1", - "developer": "inflatebot", - "inference_platform": "unknown", - "id": "inflatebot/MN-12B-Mag-Mell-R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46129602787271107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5303854975434981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40022916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34383311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/720b1476-876c-47d1-bf46-d037389b4b2f.json b/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/720b1476-876c-47d1-bf46-d037389b4b2f.json new file mode 100644 index 000000000..848ed7790 --- /dev/null +++ b/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/720b1476-876c-47d1-bf46-d037389b4b2f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/inflatebot_MN-12B-Mag-Mell-R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Mag-Mell-R1", + "id": "inflatebot/MN-12B-Mag-Mell-R1", + "developer": "inflatebot", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4613 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5304 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3438 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/4e4f3b2d-5b17-486a-a2ab-c2e89194c765.json b/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/4e4f3b2d-5b17-486a-a2ab-c2e89194c765.json new file mode 100644 index 000000000..69b688c62 --- /dev/null +++ b/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/4e4f3b2d-5b17-486a-a2ab-c2e89194c765.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/informatiker_Qwen2-7B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B-Instruct-abliterated", + "id": "informatiker/Qwen2-7B-Instruct-abliterated", + "developer": "informatiker", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5822 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5534 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2636 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3888 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json b/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json deleted file mode 100644 index 3774a2e2e..000000000 --- a/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/informatiker_Qwen2-7B-Instruct-abliterated/1762652580.2263439", - "retrieved_timestamp": "1762652580.226345", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "informatiker/Qwen2-7B-Instruct-abliterated", - "developer": "informatiker", - "inference_platform": "unknown", - "id": "informatiker/Qwen2-7B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5821708622011817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534265515936739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.263595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json b/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json deleted file mode 100644 index 07951d559..000000000 --- a/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/1762652580.226581", - "retrieved_timestamp": "1762652580.226582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", - "developer": "insightfactory", - "inference_platform": "unknown", - "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", - "additional_details": { - "precision": "float16", - "architecture": "", - "params_billions": 1.933 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45884807865352817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4146016381618061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.349875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2960438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/b738668e-3ac1-4a36-ad71-ad7d2a5256ae.json b/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/b738668e-3ac1-4a36-ad71-ad7d2a5256ae.json new file mode 100644 index 000000000..35ea90e61 --- /dev/null +++ b/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/b738668e-3ac1-4a36-ad71-ad7d2a5256ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", + "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", + "developer": "insightfactory", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "", + "params_billions": 1.933 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4588 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.296 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json b/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json deleted file mode 100644 index 069ab9f06..000000000 --- a/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/instruction-pretrain_InstructLM-500M/1762652580.226826", - "retrieved_timestamp": "1762652580.226826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "instruction-pretrain/InstructLM-500M", - "developer": "instruction-pretrain", - "inference_platform": "unknown", - "id": "instruction-pretrain/InstructLM-500M", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 0.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027662158627996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29408717872529677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1141123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/623f1b73-1505-4527-b41c-dcb2b711226d.json b/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/623f1b73-1505-4527-b41c-dcb2b711226d.json new file mode 100644 index 000000000..250a5012f --- /dev/null +++ b/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/623f1b73-1505-4527-b41c-dcb2b711226d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/instruction-pretrain_InstructLM-500M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "InstructLM-500M", + "id": "instruction-pretrain/InstructLM-500M", + "developer": "instruction-pretrain", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 0.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1028 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1141 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2-1_8b/53f03454-9587-4208-bc01-21de62f59195.json b/data/hfopenllm_v2/internlm/internlm2-1_8b/53f03454-9587-4208-bc01-21de62f59195.json new file mode 100644 index 000000000..112c6e318 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2-1_8b/53f03454-9587-4208-bc01-21de62f59195.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/internlm_internlm2-1_8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2-1_8b", + "id": "internlm/internlm2-1_8b", + "developer": "internlm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3813 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1588 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json b/data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json deleted file mode 100644 index f1b14cd1a..000000000 --- a/data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/internlm_internlm2-1_8b/1762652580.227062", - "retrieved_timestamp": "1762652580.227063", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "internlm/internlm2-1_8b", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2-1_8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2197702097102355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879732800028095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15882646276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json b/data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json deleted file mode 100644 index 710d70ed6..000000000 --- a/data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/internlm_internlm2-7b/1762652580.2273018", - "retrieved_timestamp": "1762652580.227303", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "internlm/internlm2-7b", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2-7b", - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22803680981595092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5825 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08571428571428572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33666666666666667 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43999999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19 - } - } - ] -} diff --git a/data/hfopenllm_v2/internlm/internlm2-7b/fb38d8b4-6320-4b8d-bf3d-e3d22bb0ed83.json b/data/hfopenllm_v2/internlm/internlm2-7b/fb38d8b4-6320-4b8d-bf3d-e3d22bb0ed83.json new file mode 100644 index 000000000..0954fe225 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2-7b/fb38d8b4-6320-4b8d-bf3d-e3d22bb0ed83.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/internlm_internlm2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2-7b", + "id": "internlm/internlm2-7b", + "developer": "internlm", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.228 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5825 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0857 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3367 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.19 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json b/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json deleted file mode 100644 index dcf38f345..000000000 --- a/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/internlm_internlm2-chat-1_8b/1762652580.227562", - "retrieved_timestamp": "1762652580.227563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "internlm/internlm2-chat-1_8b", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2-chat-1_8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 1.889 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2386545477111841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4452271664119214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36305208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18392619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/b127a923-3bf2-4cad-9225-d738efe800e3.json b/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/b127a923-3bf2-4cad-9225-d738efe800e3.json new file mode 100644 index 000000000..0ea7fed94 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/b127a923-3bf2-4cad-9225-d738efe800e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/internlm_internlm2-chat-1_8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2-chat-1_8b", + "id": "internlm/internlm2-chat-1_8b", + "developer": "internlm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 1.889 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2387 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1839 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/a94ae52a-7936-4750-83f5-4740f23adf15.json b/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/a94ae52a-7936-4750-83f5-4740f23adf15.json new file mode 100644 index 000000000..89f784e4a --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/a94ae52a-7936-4750-83f5-4740f23adf15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-1_8b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2_5-1_8b-chat", + "id": "internlm/internlm2_5-1_8b-chat", + "developer": "internlm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 1.89 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3849 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json b/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json deleted file mode 100644 index 8bf8e3243..000000000 --- a/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-1_8b-chat/1762652580.227762", - "retrieved_timestamp": "1762652580.227763", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "internlm/internlm2_5-1_8b-chat", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2_5-1_8b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 1.89 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38490870889240547 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4488926786996439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35939583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12990359042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/95e689c6-cd19-4114-b3b5-1672ab849214.json b/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/95e689c6-cd19-4114-b3b5-1672ab849214.json new file mode 100644 index 000000000..c3237ef07 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/95e689c6-cd19-4114-b3b5-1672ab849214.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-20b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2_5-20b-chat", + "id": "internlm/internlm2_5-20b-chat", + "developer": "internlm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 19.86 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7474 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4079 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json b/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json deleted file mode 100644 index 6a1ba8252..000000000 --- a/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-20b-chat/1762652580.2279649", - "retrieved_timestamp": "1762652580.227966", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "internlm/internlm2_5-20b-chat", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2_5-20b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 19.86 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7009977969565198 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7473580533672884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4558229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39976728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json b/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json deleted file mode 100644 index 652378a71..000000000 --- a/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-7b-chat/1762652580.2281651", - "retrieved_timestamp": "1762652580.2281659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "internlm/internlm2_5-7b-chat", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2_5-7b-chat", - "additional_details": { - "precision": "float16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 7.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5538692890419642 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7073179916851792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45938541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/890a8414-bccf-4a66-8013-6c270d017965.json b/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/890a8414-bccf-4a66-8013-6c270d017965.json new file mode 100644 index 000000000..39a2746e0 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/890a8414-bccf-4a66-8013-6c270d017965.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-7b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm2_5-7b-chat", + "id": "internlm/internlm2_5-7b-chat", + "developer": "internlm", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 7.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5539 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.253 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/0f8ce410-cf3b-4f78-81b9-a0a1fe91b963.json b/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/0f8ce410-cf3b-4f78-81b9-a0a1fe91b963.json new file mode 100644 index 000000000..37988d1da --- /dev/null +++ b/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/0f8ce410-cf3b-4f78-81b9-a0a1fe91b963.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/intervitens_mini-magnum-12b-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mini-magnum-12b-v1.1", + "id": "intervitens/mini-magnum-12b-v1.1", + "developer": "intervitens", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5156 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5062 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3291 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json b/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json deleted file mode 100644 index 04e62a92c..000000000 --- a/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/intervitens_mini-magnum-12b-v1.1/1762652580.228364", - "retrieved_timestamp": "1762652580.228365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "intervitens/mini-magnum-12b-v1.1", - "developer": "intervitens", - "inference_platform": "unknown", - "id": "intervitens/mini-magnum-12b-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155509603407846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506180035650624 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4004479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/inumulaisk/eval_model/121096cf-356b-4069-a0a3-8cf6aad52b81.json b/data/hfopenllm_v2/inumulaisk/eval_model/121096cf-356b-4069-a0a3-8cf6aad52b81.json new file mode 100644 index 000000000..fd8c4c747 --- /dev/null +++ b/data/hfopenllm_v2/inumulaisk/eval_model/121096cf-356b-4069-a0a3-8cf6aad52b81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/inumulaisk_eval_model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "eval_model", + "id": "inumulaisk/eval_model", + "developer": "inumulaisk", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1931 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json b/data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json deleted file mode 100644 index b127c4e0a..000000000 --- a/data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/inumulaisk_eval_model/1762652580.228598", - "retrieved_timestamp": "1762652580.228599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "inumulaisk/eval_model", - "developer": "inumulaisk", - "inference_platform": "unknown", - "id": "inumulaisk/eval_model", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19314197440568803 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35118774303346373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16638962765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json b/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json deleted file mode 100644 index fc0a9920f..000000000 --- a/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/1762652580.229043", - "retrieved_timestamp": "1762652580.229047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", - "developer": "invalid-coder", - "inference_platform": "unknown", - "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547591501660034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5158439010792586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/fb0bcadf-32a0-4320-909f-2c38ba7d9372.json b/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/fb0bcadf-32a0-4320-909f-2c38ba7d9372.json new file mode 100644 index 000000000..8bfc59d66 --- /dev/null +++ b/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/fb0bcadf-32a0-4320-909f-2c38ba7d9372.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", + "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", + "developer": "invalid-coder", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3992 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/ab941c52-cf33-4b8e-87af-4a73930cf72a.json b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/ab941c52-cf33-4b8e-87af-4a73930cf72a.json new file mode 100644 index 000000000..a6bf367ef --- /dev/null +++ b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/ab941c52-cf33-4b8e-87af-4a73930cf72a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EtherealRainbow-v0.2-8B", + "id": "invisietch/EtherealRainbow-v0.2-8B", + "developer": "invisietch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3903 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3827 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3653 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json deleted file mode 100644 index 55b8d48f2..000000000 --- a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.2-8B/1762652580.229454", - "retrieved_timestamp": "1762652580.229455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "invisietch/EtherealRainbow-v0.2-8B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/EtherealRainbow-v0.2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39032988027323057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5102035205059678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36527593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/08c242fd-0258-4817-970a-668584ed9385.json b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/08c242fd-0258-4817-970a-668584ed9385.json new file mode 100644 index 000000000..d5ccbfe08 --- /dev/null +++ b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/08c242fd-0258-4817-970a-668584ed9385.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EtherealRainbow-v0.3-8B", + "id": "invisietch/EtherealRainbow-v0.3-8B", + "developer": "invisietch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3904 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json deleted file mode 100644 index 4f169c11b..000000000 --- a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.3-8B/1762652580.229776", - "retrieved_timestamp": "1762652580.2297769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "invisietch/EtherealRainbow-v0.3-8B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/EtherealRainbow-v0.3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36822298168858625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096758454539693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39039583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36261635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/2171af9a-be5e-4daf-8e67-a5239ccec7bd.json b/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/2171af9a-be5e-4daf-8e67-a5239ccec7bd.json new file mode 100644 index 000000000..501989bf4 --- /dev/null +++ b/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/2171af9a-be5e-4daf-8e67-a5239ccec7bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/invisietch_MiS-Firefly-v0.2-22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiS-Firefly-v0.2-22B", + "id": "invisietch/MiS-Firefly-v0.2-22B", + "developer": "invisietch", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1654 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4694 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.362 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json b/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json deleted file mode 100644 index cb275feb8..000000000 --- a/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/invisietch_MiS-Firefly-v0.2-22B/1762652580.2300959", - "retrieved_timestamp": "1762652580.2300968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "invisietch/MiS-Firefly-v0.2-22B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/MiS-Firefly-v0.2-22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371082062261466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5513523591170696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46937500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3620345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/706f75a1-2f6b-47dd-809e-a830e739b574.json b/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/706f75a1-2f6b-47dd-809e-a830e739b574.json new file mode 100644 index 000000000..4802b7933 --- /dev/null +++ b/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/706f75a1-2f6b-47dd-809e-a830e739b574.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/invisietch_Nimbus-Miqu-v0.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nimbus-Miqu-v0.1-70B", + "id": "invisietch/Nimbus-Miqu-v0.1-70B", + "developer": "invisietch", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.601 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json b/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json deleted file mode 100644 index 803554fdf..000000000 --- a/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/invisietch_Nimbus-Miqu-v0.1-70B/1762652580.230321", - "retrieved_timestamp": "1762652580.230322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "invisietch/Nimbus-Miqu-v0.1-70B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/Nimbus-Miqu-v0.1-70B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46466819150963884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.601030667794844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41331249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/irahulpandey/mistralai-7B-slerp-v0.1/a9cd0399-4670-4f5c-8c64-c82dac97cd8c.json b/data/hfopenllm_v2/irahulpandey/mistralai-7B-slerp-v0.1/a9cd0399-4670-4f5c-8c64-c82dac97cd8c.json new file mode 100644 index 000000000..42cef2a10 --- /dev/null +++ b/data/hfopenllm_v2/irahulpandey/mistralai-7B-slerp-v0.1/a9cd0399-4670-4f5c-8c64-c82dac97cd8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/irahulpandey_mistralai-7B-slerp-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai-7B-slerp-v0.1", + "id": "irahulpandey/mistralai-7B-slerp-v0.1", + "developer": "irahulpandey", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4966 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5011 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.455 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2951 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/67cfd12d-0551-406d-bd1d-8ced75c69478.json b/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/67cfd12d-0551-406d-bd1d-8ced75c69478.json new file mode 100644 index 000000000..6cad09ae3 --- /dev/null +++ b/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/67cfd12d-0551-406d-bd1d-8ced75c69478.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", + "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", + "developer": "jaredjoss", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 0.407 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1572 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2863 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json b/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json deleted file mode 100644 index 69b623db6..000000000 --- a/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/1762652580.230787", - "retrieved_timestamp": "1762652580.230787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", - "developer": "jaredjoss", - "inference_platform": "unknown", - "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 0.407 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15722172723928066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2863444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json deleted file mode 100644 index e350b4996..000000000 --- a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2-8B/1762652580.231028", - "retrieved_timestamp": "1762652580.231029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4778077722664752 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447163557182707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38580452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0a31d2f0-196b-4508-861a-1ba7bd28ea23.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0a31d2f0-196b-4508-861a-1ba7bd28ea23.json new file mode 100644 index 000000000..8ebe97770 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0a31d2f0-196b-4508-861a-1ba7bd28ea23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Auro-Kosmos-EVAA-v2-8B", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json deleted file mode 100644 index 650b38940..000000000 --- a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/1762652580.231263", - "retrieved_timestamp": "1762652580.231264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4665919759571271 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444200006474947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.382563164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/57576999-2749-441a-91d6-5a976e83a658.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/57576999-2749-441a-91d6-5a976e83a658.json new file mode 100644 index 000000000..315886d52 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/57576999-2749-441a-91d6-5a976e83a658.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Auro-Kosmos-EVAA-v2.1-8B", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4666 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5444 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json deleted file mode 100644 index c09e29fa8..000000000 --- a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/1762652580.231466", - "retrieved_timestamp": "1762652580.231467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267997801389203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431077158331955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37982047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/e44792e6-0329-4784-832b-3043478e70a4.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/e44792e6-0329-4784-832b-3043478e70a4.json new file mode 100644 index 000000000..3152352b6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/e44792e6-0329-4784-832b-3043478e70a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Auro-Kosmos-EVAA-v2.2-8B", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/8b3789d6-51be-472a-95d3-2ae7c34ad140.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/8b3789d6-51be-472a-95d3-2ae7c34ad140.json new file mode 100644 index 000000000..f7a617f03 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/8b3789d6-51be-472a-95d3-2ae7c34ad140.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Auro-Kosmos-EVAA-v2.3-8B", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4271 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4278 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json deleted file mode 100644 index 73105f0e0..000000000 --- a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/1762652580.231667", - "retrieved_timestamp": "1762652580.231667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42712447417297217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440818233123913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4277916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37840757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json deleted file mode 100644 index 6e85759b9..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Aurora_faustus-8B/1762652580.231864", - "retrieved_timestamp": "1762652580.2318652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Aurora_faustus-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Aurora_faustus-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.443236168920686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260325661068855 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4116979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38131648936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/3f4765f2-551b-485f-9020-0cf17a36a887.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/3f4765f2-551b-485f-9020-0cf17a36a887.json new file mode 100644 index 000000000..0ae45d0a4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/3f4765f2-551b-485f-9020-0cf17a36a887.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Aurora_faustus-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-Aurora_faustus-8B", + "id": "jaspionjader/Kosmos-Aurora_faustus-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4117 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3813 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/6375a845-5d86-4dcf-bfd2-e836daa4ca11.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/6375a845-5d86-4dcf-bfd2-e836daa4ca11.json new file mode 100644 index 000000000..f9aae6a67 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/6375a845-5d86-4dcf-bfd2-e836daa4ca11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-8B", + "id": "jaspionjader/Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4405 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json deleted file mode 100644 index 3ed2aa2bc..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-8B/1762652580.232065", - "retrieved_timestamp": "1762652580.232065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4404635256674513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5311831227740652 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3818151595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/65a74446-6964-4f5f-8ea6-aeb1b09595ae.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/65a74446-6964-4f5f-8ea6-aeb1b09595ae.json new file mode 100644 index 000000000..79154e34e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/65a74446-6964-4f5f-8ea6-aeb1b09595ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-Franken-Immersive-v39-8B", + "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json deleted file mode 100644 index 605c9e2a3..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/1762652580.232267", - "retrieved_timestamp": "1762652580.232268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43779061778303796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189720817259138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json deleted file mode 100644 index fbb553285..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-v38-8B/1762652580.2324722", - "retrieved_timestamp": "1762652580.2324731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355676272290855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5229513322616746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42115624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3890458776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/dcba5998-3b84-4753-a4fa-2558ffe3e69b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/dcba5998-3b84-4753-a4fa-2558ffe3e69b.json new file mode 100644 index 000000000..394557379 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/dcba5998-3b84-4753-a4fa-2558ffe3e69b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-v38-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-Franken-v38-8B", + "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/0af6b3c0-6638-4bd8-bdd9-349e2b9ca71c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/0af6b3c0-6638-4bd8-bdd9-349e2b9ca71c.json new file mode 100644 index 000000000..d05e11595 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/0af6b3c0-6638-4bd8-bdd9-349e2b9ca71c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-Fusion-8B", + "id": "jaspionjader/Kosmos-EVAA-Fusion-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3854 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json deleted file mode 100644 index b3753ec3b..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.2328691", - "retrieved_timestamp": "1762652580.2328691", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43446832183052075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5419028777027763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38538896276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/4e332594-d0b9-4913-9950-208abe4faab7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/4e332594-d0b9-4913-9950-208abe4faab7.json new file mode 100644 index 000000000..eb9755b5c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/4e332594-d0b9-4913-9950-208abe4faab7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-Fusion-8B", + "id": "jaspionjader/Kosmos-EVAA-Fusion-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json deleted file mode 100644 index 9a461f39c..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.23267", - "retrieved_timestamp": "1762652580.232671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4417623018036587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405890148943007 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/5ad2ad73-47ed-465d-b4c0-b358e6b6435f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/5ad2ad73-47ed-465d-b4c0-b358e6b6435f.json new file mode 100644 index 000000000..b95e94419 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/5ad2ad73-47ed-465d-b4c0-b358e6b6435f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3405 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json deleted file mode 100644 index 56f8097e7..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-8B/1762652580.233048", - "retrieved_timestamp": "1762652580.2330492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34052092891306174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195634214282913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4301145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json deleted file mode 100644 index bf4eef76d..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-light-8B/1762652580.233289", - "retrieved_timestamp": "1762652580.23329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38238651223198894 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271029575696119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/c9f716ef-0aa6-445f-8fc9-b102f3a0ea2a.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/c9f716ef-0aa6-445f-8fc9-b102f3a0ea2a.json new file mode 100644 index 000000000..8874fb70f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/c9f716ef-0aa6-445f-8fc9-b102f3a0ea2a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-light-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-light-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3824 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json deleted file mode 100644 index 9f393e179..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v23-8B/1762652580.233495", - "retrieved_timestamp": "1762652580.233495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040933611705829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5289840558524612 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43684375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37059507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/a2e32a77-867c-4921-ada4-c7b169efbebe.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/a2e32a77-867c-4921-ada4-c7b169efbebe.json new file mode 100644 index 000000000..3407edf36 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/a2e32a77-867c-4921-ada4-c7b169efbebe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v23-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v23-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3706 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json deleted file mode 100644 index 914747dca..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v24-8B/1762652580.233697", - "retrieved_timestamp": "1762652580.2336981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42587556572117535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276140433113651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/f76f759f-d05d-4eb6-a2b9-3b1dfbe840f0.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/f76f759f-d05d-4eb6-a2b9-3b1dfbe840f0.json new file mode 100644 index 000000000..269ec5b3a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/f76f759f-d05d-4eb6-a2b9-3b1dfbe840f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v24-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v24-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json deleted file mode 100644 index 4ba657080..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v25-8B/1762652580.23391", - "retrieved_timestamp": "1762652580.23391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4420869958377106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290702582598797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/ece0bd6b-4eec-485c-942b-e23f3295c2f8.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/ece0bd6b-4eec-485c-942b-e23f3295c2f8.json new file mode 100644 index 000000000..c4a81c92e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/ece0bd6b-4eec-485c-942b-e23f3295c2f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v25-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v25-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5291 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json deleted file mode 100644 index 2a4812623..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v26-8B/1762652580.234126", - "retrieved_timestamp": "1762652580.234127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4413877400851962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271171047819411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/ada110bb-0988-4c19-9798-74577dde5ce9.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/ada110bb-0988-4c19-9798-74577dde5ce9.json new file mode 100644 index 000000000..fb137b1ab --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/ada110bb-0988-4c19-9798-74577dde5ce9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v26-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v26-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4414 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json deleted file mode 100644 index 0c722e835..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v27-8B/1762652580.2343428", - "retrieved_timestamp": "1762652580.234344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378404854674486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290320010579407 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37549867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/ed4f994d-d196-40bd-8f8f-f6a7f07c3c90.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/ed4f994d-d196-40bd-8f8f-f6a7f07c3c90.json new file mode 100644 index 000000000..3a868b97e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/ed4f994d-d196-40bd-8f8f-f6a7f07c3c90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v27-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v27-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3755 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/57395f9a-0534-453e-80fc-96e9dc5cd9c3.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/57395f9a-0534-453e-80fc-96e9dc5cd9c3.json new file mode 100644 index 000000000..9e9f72bfb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/57395f9a-0534-453e-80fc-96e9dc5cd9c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v28-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v28-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json deleted file mode 100644 index 54b4e5f2a..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v28-8B/1762652580.234553", - "retrieved_timestamp": "1762652580.234553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43659157701565177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294743678489208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43296874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json deleted file mode 100644 index c45e4c32f..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v29-8B/1762652580.234771", - "retrieved_timestamp": "1762652580.234771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487315877427448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275189525290296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42366666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37649601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/f8f70702-9ab4-4e1a-a11d-090627d58f02.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/f8f70702-9ab4-4e1a-a11d-090627d58f02.json new file mode 100644 index 000000000..444db8839 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/f8f70702-9ab4-4e1a-a11d-090627d58f02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v29-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v29-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json deleted file mode 100644 index 3423dce6e..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v30-8B/1762652580.2349901", - "retrieved_timestamp": "1762652580.234991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42947268802333366 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327819889174134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3937832446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/3cab8bda-bdf6-4345-b89e-18d34a8f6361.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/3cab8bda-bdf6-4345-b89e-18d34a8f6361.json new file mode 100644 index 000000000..325f0d8d0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/3cab8bda-bdf6-4345-b89e-18d34a8f6361.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v30-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v30-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4295 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json deleted file mode 100644 index 8c7601bde..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v31-8B/1762652580.235214", - "retrieved_timestamp": "1762652580.235214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43986400528375824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5315048053167004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39345079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0955fc17-8878-401a-9ec3-149528ee51e1.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0955fc17-8878-401a-9ec3-149528ee51e1.json new file mode 100644 index 000000000..4cfc98e57 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0955fc17-8878-401a-9ec3-149528ee51e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v31-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v31-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5315 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/c63bf49a-e7d4-4853-8684-9cc03eaa7840.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/c63bf49a-e7d4-4853-8684-9cc03eaa7840.json new file mode 100644 index 000000000..ddd83d684 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/c63bf49a-e7d4-4853-8684-9cc03eaa7840.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v32-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v32-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5293 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json deleted file mode 100644 index e1b5114b3..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v32-8B/1762652580.235436", - "retrieved_timestamp": "1762652580.2354372", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487315877427448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292530349260334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42106249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json deleted file mode 100644 index 81591a0b4..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v33-8B/1762652580.23565", - "retrieved_timestamp": "1762652580.235651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4301719437758481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5321153222507468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41839583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390874335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/65e6a3b6-4291-4591-bc0b-576930061c68.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/65e6a3b6-4291-4591-bc0b-576930061c68.json new file mode 100644 index 000000000..48e58506f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/65e6a3b6-4291-4591-bc0b-576930061c68.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v33-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v33-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5321 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json deleted file mode 100644 index b4184d617..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v34-8B/1762652580.235871", - "retrieved_timestamp": "1762652580.235871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45625052638111324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533301459442271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42372916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3927027925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/1ddf9e02-4066-440e-a777-fcd3f96bc4b3.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/1ddf9e02-4066-440e-a777-fcd3f96bc4b3.json new file mode 100644 index 000000000..89f19eb09 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/1ddf9e02-4066-440e-a777-fcd3f96bc4b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v34-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-PRP-v34-8B", + "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4563 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5333 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3927 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json deleted file mode 100644 index 6b3941007..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-8B/1762652580.236081", - "retrieved_timestamp": "1762652580.2360818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47213726246359655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176546480934434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43290625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816489361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/f9f96bb2-edbc-4112-97aa-a7420dea32a1.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/f9f96bb2-edbc-4112-97aa-a7420dea32a1.json new file mode 100644 index 000000000..f496d05c5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/f9f96bb2-edbc-4112-97aa-a7420dea32a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-TSN-8B", + "id": "jaspionjader/Kosmos-EVAA-TSN-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5177 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/3a24b30f-7698-4ecb-ac26-3537a0b38616.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/3a24b30f-7698-4ecb-ac26-3537a0b38616.json new file mode 100644 index 000000000..6959b532b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/3a24b30f-7698-4ecb-ac26-3537a0b38616.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-light-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-TSN-light-8B", + "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3806 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json deleted file mode 100644 index 2dac0863d..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-light-8B/1762652580.236298", - "retrieved_timestamp": "1762652580.236299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46849027247702757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235021286391058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42893749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38056848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json deleted file mode 100644 index e92897c14..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v19-8B/1762652580.236516", - "retrieved_timestamp": "1762652580.2365172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4563502617499346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316458785173577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37898936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/d4030df6-2be6-4f46-9c9b-ce3037b9a004.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/d4030df6-2be6-4f46-9c9b-ce3037b9a004.json new file mode 100644 index 000000000..e3426247a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/d4030df6-2be6-4f46-9c9b-ce3037b9a004.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v19-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-TSN-v19-8B", + "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json deleted file mode 100644 index 3f68136a6..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v20-8B/1762652580.236737", - "retrieved_timestamp": "1762652580.236737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4423119545029411 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250468078369915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39361702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/ec234403-f43d-46a0-84a4-ab47673226b3.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/ec234403-f43d-46a0-84a4-ab47673226b3.json new file mode 100644 index 000000000..3081940fd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/ec234403-f43d-46a0-84a4-ab47673226b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v20-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-TSN-v20-8B", + "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3936 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/805379f4-784f-4602-92e8-180df4da9fc3.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/805379f4-784f-4602-92e8-180df4da9fc3.json new file mode 100644 index 000000000..3d18c5f8b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/805379f4-784f-4602-92e8-180df4da9fc3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v21-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-TSN-v21-8B", + "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5248 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3816 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json deleted file mode 100644 index e46fd2105..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v21-8B/1762652580.2369542", - "retrieved_timestamp": "1762652580.236955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46701640536000033 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524796520922724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43427083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816489361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json deleted file mode 100644 index e2b6425a1..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v22-8B/1762652580.2371762", - "retrieved_timestamp": "1762652580.2371771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4673410993940522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245863682593667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38115026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/9f3920aa-9400-46f1-bcfa-969f69b3335c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/9f3920aa-9400-46f1-bcfa-969f69b3335c.json new file mode 100644 index 000000000..c54dd019f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/9f3920aa-9400-46f1-bcfa-969f69b3335c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v22-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-TSN-v22-8B", + "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5246 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/26cbf444-ab93-409a-b85d-e2bd267eae5e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/26cbf444-ab93-409a-b85d-e2bd267eae5e.json new file mode 100644 index 000000000..92fa310e3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/26cbf444-ab93-409a-b85d-e2bd267eae5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5322 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4306 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json deleted file mode 100644 index fa34e54cf..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-8B/1762652580.237391", - "retrieved_timestamp": "1762652580.237392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45722460848326885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5321936191858193 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39012632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json deleted file mode 100644 index 6f115c4de..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-alt-8B/1762652580.23761", - "retrieved_timestamp": "1762652580.23761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4542270065648036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297928701221488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/7c2b17a8-1de2-4441-a281-fe3fd043f831.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/7c2b17a8-1de2-4441-a281-fe3fd043f831.json new file mode 100644 index 000000000..90cb19b6b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/7c2b17a8-1de2-4441-a281-fe3fd043f831.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-alt-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-alt-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4292 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json deleted file mode 100644 index 65df2f3fb..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-8B/1762652580.237838", - "retrieved_timestamp": "1762652580.2378392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45809895521660304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376138387743472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.394281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/94c5756c-cbde-46e2-90d2-207678373061.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/94c5756c-cbde-46e2-90d2-207678373061.json new file mode 100644 index 000000000..980ba2f9f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/94c5756c-cbde-46e2-90d2-207678373061.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-light-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4291 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json deleted file mode 100644 index cbff58353..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/1762652580.238084", - "retrieved_timestamp": "1762652580.238085", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44535942410581697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327145731870764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43045833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39228723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/e0048124-89bf-4327-88a8-00aa51ee29af.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/e0048124-89bf-4327-88a8-00aa51ee29af.json new file mode 100644 index 000000000..e1956821f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/e0048124-89bf-4327-88a8-00aa51ee29af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-light-alt-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json deleted file mode 100644 index 3e890e58c..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1762652580.238316", - "retrieved_timestamp": "1762652580.238317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4563003940655239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316344937208096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/9d776307-43af-43bb-ab64-52fb7f331cfe.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/9d776307-43af-43bb-ab64-52fb7f331cfe.json new file mode 100644 index 000000000..2646a813c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/9d776307-43af-43bb-ab64-52fb7f331cfe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-ultra-light-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4563 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json deleted file mode 100644 index 06ffdd3fe..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1762652580.238605", - "retrieved_timestamp": "1762652580.238605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286160720222345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359422335881335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42776041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/d8d41981-a7c8-48e9-a63c-86520a0f23d5.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/d8d41981-a7c8-48e9-a63c-86520a0f23d5.json new file mode 100644 index 000000000..d1b28b8b8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/d8d41981-a7c8-48e9-a63c-86520a0f23d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-v13-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5359 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4278 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/1355985c-fbcb-4eac-8435-417d6034f2f0.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/1355985c-fbcb-4eac-8435-417d6034f2f0.json new file mode 100644 index 000000000..8087ad74c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/1355985c-fbcb-4eac-8435-417d6034f2f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v14-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-v14-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.438 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3931 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json deleted file mode 100644 index 606ab571f..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v14-8B/1762652580.23884", - "retrieved_timestamp": "1762652580.23884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380155764482684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363063034440413 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42772916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931183510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json deleted file mode 100644 index 6f681e140..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v15-8B/1762652580.239064", - "retrieved_timestamp": "1762652580.2390652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4654428028741517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534326872652317 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42772916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3941156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/44486b02-7bdd-4f59-8d4e-5c8deeb1fd60.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/44486b02-7bdd-4f59-8d4e-5c8deeb1fd60.json new file mode 100644 index 000000000..a02407d4d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/44486b02-7bdd-4f59-8d4e-5c8deeb1fd60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v15-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-v15-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4654 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3941 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json deleted file mode 100644 index 74aaceb93..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v16-8B/1762652580.2392871", - "retrieved_timestamp": "1762652580.239288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4556510059974202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343925058514598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39170545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/45ae3dc3-6dc0-4d10-99cb-a7f330110906.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/45ae3dc3-6dc0-4d10-99cb-a7f330110906.json new file mode 100644 index 000000000..703c586b6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/45ae3dc3-6dc0-4d10-99cb-a7f330110906.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v16-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-v16-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5344 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3917 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/6b54763a-6329-47fb-bf50-296604251b47.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/6b54763a-6329-47fb-bf50-296604251b47.json new file mode 100644 index 000000000..d04ea2204 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/6b54763a-6329-47fb-bf50-296604251b47.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v17-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-v17-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5347 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4291 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json deleted file mode 100644 index da41d1e0b..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v17-8B/1762652580.239734", - "retrieved_timestamp": "1762652580.239739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462337708391512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5346666279815969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39228723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/96a26bf3-b4b2-465f-8ce6-a2ef943c001a.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/96a26bf3-b4b2-465f-8ce6-a2ef943c001a.json new file mode 100644 index 000000000..c9226fc57 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/96a26bf3-b4b2-465f-8ce6-a2ef943c001a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v18-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-gamma-v18-8B", + "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3905 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json deleted file mode 100644 index 1acda0304..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v18-8B/1762652580.240138", - "retrieved_timestamp": "1762652580.240139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43409376011205825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339179190615058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3904587765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json deleted file mode 100644 index 16b3598e9..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/1762652580.2404292", - "retrieved_timestamp": "1762652580.24043", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078821970150317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214884907801955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887965425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/655b047f-c3a8-4c9c-b864-81d318b2f506.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/655b047f-c3a8-4c9c-b864-81d318b2f506.json new file mode 100644 index 000000000..53ac1daaf --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/655b047f-c3a8-4c9c-b864-81d318b2f506.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-immersive-sof-v44-8B", + "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3888 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json deleted file mode 100644 index 7639f2738..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v10-8B/1762652580.2406652", - "retrieved_timestamp": "1762652580.2406662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v10-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v10-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4261503920708165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375875314179012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/f62fed77-e166-422d-b5ce-c50b7bccbf4c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/f62fed77-e166-422d-b5ce-c50b7bccbf4c.json new file mode 100644 index 000000000..5c80d82d6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/f62fed77-e166-422d-b5ce-c50b7bccbf4c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v10-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v10-8B", + "id": "jaspionjader/Kosmos-EVAA-v10-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/7ffdabf3-0a8e-4316-b6bd-85b10a81db53.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/7ffdabf3-0a8e-4316-b6bd-85b10a81db53.json new file mode 100644 index 000000000..ce89b68ef --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/7ffdabf3-0a8e-4316-b6bd-85b10a81db53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v11-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v11-8B", + "id": "jaspionjader/Kosmos-EVAA-v11-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4426 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5359 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3836 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json deleted file mode 100644 index b6d9a34f9..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v11-8B/1762652580.240909", - "retrieved_timestamp": "1762652580.24091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v11-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v11-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44263664853699297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359208647512345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/2c93c987-b32d-4a02-8df4-949cc45b8eb2.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/2c93c987-b32d-4a02-8df4-949cc45b8eb2.json new file mode 100644 index 000000000..cc567f9c9 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/2c93c987-b32d-4a02-8df4-949cc45b8eb2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v12-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v12-8B", + "id": "jaspionjader/Kosmos-EVAA-v12-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5349 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3836 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json deleted file mode 100644 index d7cb6739c..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v12-8B/1762652580.2411451", - "retrieved_timestamp": "1762652580.241146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v12-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v12-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43779061778303796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5348808250181011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42106249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/02e7c1d6-9db1-4de8-b13e-afd752b3669a.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/02e7c1d6-9db1-4de8-b13e-afd752b3669a.json new file mode 100644 index 000000000..308d1927e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/02e7c1d6-9db1-4de8-b13e-afd752b3669a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v2-8B", + "id": "jaspionjader/Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json deleted file mode 100644 index 50ef13386..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v2-8B/1762652580.241379", - "retrieved_timestamp": "1762652580.2413802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395891789341171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5341160060985229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42106249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3826462765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json deleted file mode 100644 index 3c8bf88f5..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v3-8B/1762652580.241601", - "retrieved_timestamp": "1762652580.241602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v3-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4410630460511443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330987974156178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38214760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/580a3045-338a-47b2-8ed7-54c993d5aa90.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/580a3045-338a-47b2-8ed7-54c993d5aa90.json new file mode 100644 index 000000000..f03f0b4c8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/580a3045-338a-47b2-8ed7-54c993d5aa90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v3-8B", + "id": "jaspionjader/Kosmos-EVAA-v3-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4411 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/e71d3be5-ea9d-4426-aa58-5806b7541aa6.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/e71d3be5-ea9d-4426-aa58-5806b7541aa6.json new file mode 100644 index 000000000..c7bfce252 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/e71d3be5-ea9d-4426-aa58-5806b7541aa6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v4-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v4-8B", + "id": "jaspionjader/Kosmos-EVAA-v4-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5337 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json deleted file mode 100644 index c99b4246b..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v4-8B/1762652580.241815", - "retrieved_timestamp": "1762652580.241816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v4-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v4-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289230353240513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336560458316563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41972916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38173204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/1174683a-9488-4c6b-be6b-e5a96328a96f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/1174683a-9488-4c6b-be6b-e5a96328a96f.json new file mode 100644 index 000000000..d9d42b842 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/1174683a-9488-4c6b-be6b-e5a96328a96f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v5-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v5-8B", + "id": "jaspionjader/Kosmos-EVAA-v5-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json deleted file mode 100644 index 45a12156e..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v5-8B/1762652580.2420359", - "retrieved_timestamp": "1762652580.2420359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v5-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v5-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44595894448951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344958011609363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3820644946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/3789b37f-daf0-4c21-82b8-309cbf00312e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/3789b37f-daf0-4c21-82b8-309cbf00312e.json new file mode 100644 index 000000000..e333bd78c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/3789b37f-daf0-4c21-82b8-309cbf00312e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v6-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v6-8B", + "id": "jaspionjader/Kosmos-EVAA-v6-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.538 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json deleted file mode 100644 index 03de17067..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v6-8B/1762652580.242274", - "retrieved_timestamp": "1762652580.242275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v6-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v6-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395891789341171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379609044843678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3820644946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json deleted file mode 100644 index 845a51859..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v7-8B/1762652580.242492", - "retrieved_timestamp": "1762652580.242493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v7-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v7-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276741268722545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334882804815716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41709375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/8586cdc1-dd4e-4112-a59c-f6bc2766701b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/8586cdc1-dd4e-4112-a59c-f6bc2766701b.json new file mode 100644 index 000000000..7a7e506dc --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/8586cdc1-dd4e-4112-a59c-f6bc2766701b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v7-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v7-8B", + "id": "jaspionjader/Kosmos-EVAA-v7-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5335 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3836 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json deleted file mode 100644 index 06443f816..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v8-8B/1762652580.242712", - "retrieved_timestamp": "1762652580.242713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v8-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v8-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43834027048232027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359208647512345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38272938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/946a7b16-dfa6-42ad-97c1-955bf8a40dae.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/946a7b16-dfa6-42ad-97c1-955bf8a40dae.json new file mode 100644 index 000000000..5b504be25 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/946a7b16-dfa6-42ad-97c1-955bf8a40dae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v8-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v8-8B", + "id": "jaspionjader/Kosmos-EVAA-v8-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5359 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3827 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json deleted file mode 100644 index 337821f99..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-8B/1762652580.242934", - "retrieved_timestamp": "1762652580.242935", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v9-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v9-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43686640336529303 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5360680608930435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/d9a6cc31-57c4-4480-a019-25a34b31fcc8.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/d9a6cc31-57c4-4480-a019-25a34b31fcc8.json new file mode 100644 index 000000000..8db1d146b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/d9a6cc31-57c4-4480-a019-25a34b31fcc8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v9-8B", + "id": "jaspionjader/Kosmos-EVAA-v9-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5361 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/279bd5fa-0ab1-411b-871b-bd9ff23853f6.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/279bd5fa-0ab1-411b-871b-bd9ff23853f6.json new file mode 100644 index 000000000..9e70660a3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/279bd5fa-0ab1-411b-871b-bd9ff23853f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-EVAA-v9-TitanFusion-Mix-8B", + "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3836 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json deleted file mode 100644 index 0dbf12555..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/1762652580.243146", - "retrieved_timestamp": "1762652580.243147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.428373382624769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5539931244833417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43544791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3836436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json deleted file mode 100644 index 8f0387227..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-8b/1762652580.243371", - "retrieved_timestamp": "1762652580.243371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41688275996577967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338593917060857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3759973404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/c26fae10-e65a-49ac-a2da-2dbf024fd10d.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/c26fae10-e65a-49ac-a2da-2dbf024fd10d.json new file mode 100644 index 000000000..ab43cc04d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/c26fae10-e65a-49ac-a2da-2dbf024fd10d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-Elusive-8b", + "id": "jaspionjader/Kosmos-Elusive-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4078 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/6d37b2b4-630e-4471-b7a8-50f8a58902fe.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/6d37b2b4-630e-4471-b7a8-50f8a58902fe.json new file mode 100644 index 000000000..47b464069 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/6d37b2b4-630e-4471-b7a8-50f8a58902fe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-Elusive-VENN-8B", + "id": "jaspionjader/Kosmos-Elusive-VENN-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5356 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4157 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3797 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json deleted file mode 100644 index be451f295..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-8B/1762652580.243592", - "retrieved_timestamp": "1762652580.243593", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-VENN-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-VENN-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232525255211727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5355598563659026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3797373670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/de687865-4297-4130-bcfe-0c5116c9b0d1.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/de687865-4297-4130-bcfe-0c5116c9b0d1.json new file mode 100644 index 000000000..c8e3d1871 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/de687865-4297-4130-bcfe-0c5116c9b0d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-Elusive-VENN-Asymmetric-8B", + "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5313 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1344 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json deleted file mode 100644 index 4ab5c5f73..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/1762652580.243807", - "retrieved_timestamp": "1762652580.243807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4541771388803929 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5312976840812583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json deleted file mode 100644 index 4827a8b74..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/1762652580.244045", - "retrieved_timestamp": "1762652580.244046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335441074127758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5303980337010061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794880319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/ee1acad1-5dc4-4d8b-8aca-544af5dc2392.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/ee1acad1-5dc4-4d8b-8aca-544af5dc2392.json new file mode 100644 index 000000000..c80a27e40 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/ee1acad1-5dc4-4d8b-8aca-544af5dc2392.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-Elusive-VENN-Aurora_faustus-8B", + "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5304 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/52e3f1b1-5a1c-4cca-a36f-9f60284e1883.json b/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/52e3f1b1-5a1c-4cca-a36f-9f60284e1883.json new file mode 100644 index 000000000..4060c65ec --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/52e3f1b1-5a1c-4cca-a36f-9f60284e1883.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-VENN-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kosmos-VENN-8B", + "id": "jaspionjader/Kosmos-VENN-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3801 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json b/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json deleted file mode 100644 index de925d6da..000000000 --- a/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-VENN-8B/1762652580.24428", - "retrieved_timestamp": "1762652580.244281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/Kosmos-VENN-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-VENN-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433219413378724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317923607687299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42109375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/2d54c67e-fad5-4a61-b3ae-0393f16dc1ba.json b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/2d54c67e-fad5-4a61-b3ae-0393f16dc1ba.json new file mode 100644 index 000000000..331d553d5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/2d54c67e-fad5-4a61-b3ae-0393f16dc1ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRP-Kosmos-EVAA-8B", + "id": "jaspionjader/PRP-Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5237 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json deleted file mode 100644 index b8fd7e751..000000000 --- a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-8B/1762652580.244709", - "retrieved_timestamp": "1762652580.24471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/PRP-Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/PRP-Kosmos-EVAA-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36327721556580983 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237421324582278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/5120e433-f5c7-45fa-be56-566101556271.json b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/5120e433-f5c7-45fa-be56-566101556271.json new file mode 100644 index 000000000..a17b8d20f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/5120e433-f5c7-45fa-be56-566101556271.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-light-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PRP-Kosmos-EVAA-light-8B", + "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json deleted file mode 100644 index f01a52001..000000000 --- a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-light-8B/1762652580.2449658", - "retrieved_timestamp": "1762652580.244967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/PRP-Kosmos-EVAA-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321201079801593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274582578494339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4235416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/7f4b4668-c3a0-4575-957d-ba321d55f420.json b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/7f4b4668-c3a0-4575-957d-ba321d55f420.json new file mode 100644 index 000000000..c4d0d61b7 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/7f4b4668-c3a0-4575-957d-ba321d55f420.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TSN-Kosmos-EVAA-8B", + "id": "jaspionjader/TSN-Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4903 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5347 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json deleted file mode 100644 index b8c15025e..000000000 --- a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-8B/1762652580.2451851", - "retrieved_timestamp": "1762652580.245186", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/TSN-Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/TSN-Kosmos-EVAA-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49032234471203073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347376087743225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json deleted file mode 100644 index 69fd1b845..000000000 --- a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-v2-8B/1762652580.2454138", - "retrieved_timestamp": "1762652580.245415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46669171132594844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534342097284994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/9245b74d-4b9d-4158-a402-0c3742097eba.json b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/9245b74d-4b9d-4158-a402-0c3742097eba.json new file mode 100644 index 000000000..7eb53a06b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/9245b74d-4b9d-4158-a402-0c3742097eba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TSN-Kosmos-EVAA-v2-8B", + "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-1/29a5fcd3-9c22-424c-ab17-70cfe187aea1.json b/data/hfopenllm_v2/jaspionjader/bbb-1/29a5fcd3-9c22-424c-ab17-70cfe187aea1.json new file mode 100644 index 000000000..3ef1955b2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-1/29a5fcd3-9c22-424c-ab17-70cfe187aea1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-1", + "id": "jaspionjader/bbb-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json b/data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json deleted file mode 100644 index 53bc99d38..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-1/1762652580.2456498", - "retrieved_timestamp": "1762652580.245653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4864005283758206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375556962119087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706250000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38971077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json b/data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json deleted file mode 100644 index f13780cdb..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-2/1762652580.2460952", - "retrieved_timestamp": "1762652580.2460968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-2", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077403511571519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5066789926627318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363530585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bbb-2/af71bfa0-1077-4c96-a4c1-0aa28dc789bf.json b/data/hfopenllm_v2/jaspionjader/bbb-2/af71bfa0-1077-4c96-a4c1-0aa28dc789bf.json new file mode 100644 index 000000000..6b4a860f0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-2/af71bfa0-1077-4c96-a4c1-0aa28dc789bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-2", + "id": "jaspionjader/bbb-2", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5067 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3635 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-3/258ebe6d-191d-4804-b5e1-5cd6ce93ba88.json b/data/hfopenllm_v2/jaspionjader/bbb-3/258ebe6d-191d-4804-b5e1-5cd6ce93ba88.json new file mode 100644 index 000000000..2f10bb3d8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-3/258ebe6d-191d-4804-b5e1-5cd6ce93ba88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-3", + "id": "jaspionjader/bbb-3", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4168 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1405 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4265 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3856 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json b/data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json deleted file mode 100644 index 5553b6d41..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-3/1762652580.24635", - "retrieved_timestamp": "1762652580.246351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-3", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.416832892281369 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5157831821186084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38563829787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bbb-4/4765f197-82ed-44b3-9a7c-7cbabc6ecd8e.json b/data/hfopenllm_v2/jaspionjader/bbb-4/4765f197-82ed-44b3-9a7c-7cbabc6ecd8e.json new file mode 100644 index 000000000..2e270c373 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-4/4765f197-82ed-44b3-9a7c-7cbabc6ecd8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-4", + "id": "jaspionjader/bbb-4", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5212 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3773 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json b/data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json deleted file mode 100644 index db9c748db..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-4/1762652580.2465842", - "retrieved_timestamp": "1762652580.2465851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-4", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675833455232114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52115051798211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40924999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3773271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json b/data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json deleted file mode 100644 index 27bd5325d..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-5/1762652580.2468202", - "retrieved_timestamp": "1762652580.2468212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-5", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702888336281067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206902586604485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833942819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bbb-5/a5d66f97-1f4b-43da-a83a-4a262e297fd9.json b/data/hfopenllm_v2/jaspionjader/bbb-5/a5d66f97-1f4b-43da-a83a-4a262e297fd9.json new file mode 100644 index 000000000..e596a11bf --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-5/a5d66f97-1f4b-43da-a83a-4a262e297fd9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-5", + "id": "jaspionjader/bbb-5", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4703 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-6/5d29cf73-65d6-4965-a504-4caf07108cc8.json b/data/hfopenllm_v2/jaspionjader/bbb-6/5d29cf73-65d6-4965-a504-4caf07108cc8.json new file mode 100644 index 000000000..33980d484 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-6/5d29cf73-65d6-4965-a504-4caf07108cc8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-6", + "id": "jaspionjader/bbb-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4052 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json b/data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json deleted file mode 100644 index d6e0584c5..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-6/1762652580.247051", - "retrieved_timestamp": "1762652580.247051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48797413086166924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211453749255449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40515625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bbb-7/15ec04ae-30d3-4ffb-9b0c-54ba63410e3d.json b/data/hfopenllm_v2/jaspionjader/bbb-7/15ec04ae-30d3-4ffb-9b0c-54ba63410e3d.json new file mode 100644 index 000000000..36795088a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-7/15ec04ae-30d3-4ffb-9b0c-54ba63410e3d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbb-7", + "id": "jaspionjader/bbb-7", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4828 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json b/data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json deleted file mode 100644 index 7cede2aa2..000000000 --- a/data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-7/1762652580.2473001", - "retrieved_timestamp": "1762652580.247304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bbb-7", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48280340607366234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211089947725771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json b/data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json deleted file mode 100644 index f99f1a82a..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-1/1762652580.2475939", - "retrieved_timestamp": "1762652580.247595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42842325030917966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890155164168736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449135638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-1/2ed96c70-390b-44de-aa08-9883a2f33ff3.json b/data/hfopenllm_v2/jaspionjader/bh-1/2ed96c70-390b-44de-aa08-9883a2f33ff3.json new file mode 100644 index 000000000..a17534661 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-1/2ed96c70-390b-44de-aa08-9883a2f33ff3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-1", + "id": "jaspionjader/bh-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.589 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-10/67c95889-8a67-40fd-99e2-62e767c16416.json b/data/hfopenllm_v2/jaspionjader/bh-10/67c95889-8a67-40fd-99e2-62e767c16416.json new file mode 100644 index 000000000..bef06887f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-10/67c95889-8a67-40fd-99e2-62e767c16416.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-10", + "id": "jaspionjader/bh-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4618 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5856 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json b/data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json deleted file mode 100644 index 14ce33ba1..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-10/1762652580.247846", - "retrieved_timestamp": "1762652580.2478468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46184568057199343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5856025427339699 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37076130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json b/data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json deleted file mode 100644 index 40218e076..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-11/1762652580.2481", - "retrieved_timestamp": "1762652580.2481012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-11", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-11", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45754930251732073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5851155912628809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-11/a518f39d-e073-493d-9a4f-9af53fc71abf.json b/data/hfopenllm_v2/jaspionjader/bh-11/a518f39d-e073-493d-9a4f-9af53fc71abf.json new file mode 100644 index 000000000..517e4b745 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-11/a518f39d-e073-493d-9a4f-9af53fc71abf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-11", + "id": "jaspionjader/bh-11", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5851 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-12/24f0d9bc-d743-4f46-b5a6-e855e39a1daf.json b/data/hfopenllm_v2/jaspionjader/bh-12/24f0d9bc-d743-4f46-b5a6-e855e39a1daf.json new file mode 100644 index 000000000..1eb93e6a6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-12/24f0d9bc-d743-4f46-b5a6-e855e39a1daf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-12/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-12", + "id": "jaspionjader/bh-12", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4734 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5802 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json b/data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json deleted file mode 100644 index 9e1ecff30..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-12/1762652580.248367", - "retrieved_timestamp": "1762652580.248368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-12", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-12", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47338617091539337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5802489392471556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-13/3d27f6d9-05a0-44bd-a225-6e6a0bf4a35b.json b/data/hfopenllm_v2/jaspionjader/bh-13/3d27f6d9-05a0-44bd-a225-6e6a0bf4a35b.json new file mode 100644 index 000000000..932f2640d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-13/3d27f6d9-05a0-44bd-a225-6e6a0bf4a35b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-13/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-13", + "id": "jaspionjader/bh-13", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4698 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5778 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4159 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json b/data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json deleted file mode 100644 index a193f2b5b..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-13/1762652580.248588", - "retrieved_timestamp": "1762652580.248588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-13", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-13", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697890486132351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5777886799254942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41585416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37300531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-15/ad28e7b8-69e6-4fb9-bec4-62c67fae6d58.json b/data/hfopenllm_v2/jaspionjader/bh-15/ad28e7b8-69e6-4fb9-bec4-62c67fae6d58.json new file mode 100644 index 000000000..87663e79d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-15/ad28e7b8-69e6-4fb9-bec4-62c67fae6d58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-15/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-15", + "id": "jaspionjader/bh-15", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4745 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5819 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json b/data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json deleted file mode 100644 index a4aa4857c..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-15/1762652580.248791", - "retrieved_timestamp": "1762652580.2487922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-15", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-15", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47453534399836883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5818643001829722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-16/0da639d4-181c-4ee1-808c-3de8003c2471.json b/data/hfopenllm_v2/jaspionjader/bh-16/0da639d4-181c-4ee1-808c-3de8003c2471.json new file mode 100644 index 000000000..5973a4f9d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-16/0da639d4-181c-4ee1-808c-3de8003c2471.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-16", + "id": "jaspionjader/bh-16", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4731 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5783 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4159 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json b/data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json deleted file mode 100644 index 1eee1aa4d..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-16/1762652580.2489972", - "retrieved_timestamp": "1762652580.248998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-16", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-16", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4730614768813415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5783335636603978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4158541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-17/480bd62c-bc67-4379-bce0-b28a5d6bdf4f.json b/data/hfopenllm_v2/jaspionjader/bh-17/480bd62c-bc67-4379-bce0-b28a5d6bdf4f.json new file mode 100644 index 000000000..cd717325d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-17/480bd62c-bc67-4379-bce0-b28a5d6bdf4f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-17/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-17", + "id": "jaspionjader/bh-17", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4722 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5776 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3757 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json b/data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json deleted file mode 100644 index 9f9f97dba..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-17/1762652580.249204", - "retrieved_timestamp": "1762652580.2492049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-17", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-17", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4721871301480073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5776302177859685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37566489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json b/data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json deleted file mode 100644 index 25420b0d8..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-18/1762652580.249514", - "retrieved_timestamp": "1762652580.249515", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-18", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-18", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246195649764844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5823837707078298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37566489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-18/dd94c18e-b2c3-4135-aa2d-5eb0248315d0.json b/data/hfopenllm_v2/jaspionjader/bh-18/dd94c18e-b2c3-4135-aa2d-5eb0248315d0.json new file mode 100644 index 000000000..0e961f484 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-18/dd94c18e-b2c3-4135-aa2d-5eb0248315d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-18/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-18", + "id": "jaspionjader/bh-18", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3757 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json b/data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json deleted file mode 100644 index f74187dff..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-19/1762652580.249828", - "retrieved_timestamp": "1762652580.249829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-19", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-19", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45842364925065493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765774285787187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-19/a2ae2953-e341-49be-8469-32bd41d780d7.json b/data/hfopenllm_v2/jaspionjader/bh-19/a2ae2953-e341-49be-8469-32bd41d780d7.json new file mode 100644 index 000000000..4406baf52 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-19/a2ae2953-e341-49be-8469-32bd41d780d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-19/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-19", + "id": "jaspionjader/bh-19", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5766 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-2/23bdd694-f250-46dd-9b8b-526fda47bc9e.json b/data/hfopenllm_v2/jaspionjader/bh-2/23bdd694-f250-46dd-9b8b-526fda47bc9e.json new file mode 100644 index 000000000..504f5d126 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-2/23bdd694-f250-46dd-9b8b-526fda47bc9e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-2", + "id": "jaspionjader/bh-2", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4579 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1027 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json b/data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json deleted file mode 100644 index 9378e0e5a..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-2/1762652580.250077", - "retrieved_timestamp": "1762652580.250078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-2", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45792386423578324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5937358907182445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json b/data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json deleted file mode 100644 index fb98017a6..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-20/1762652580.2503", - "retrieved_timestamp": "1762652580.2503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-20", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-20", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.574973333640619 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-20/d600a69d-1952-4e30-abe8-1769ab63ac29.json b/data/hfopenllm_v2/jaspionjader/bh-20/d600a69d-1952-4e30-abe8-1769ab63ac29.json new file mode 100644 index 000000000..47f8d0aaa --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-20/d600a69d-1952-4e30-abe8-1769ab63ac29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-20/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-20", + "id": "jaspionjader/bh-20", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json b/data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json deleted file mode 100644 index 397bccf37..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-21/1762652580.25052", - "retrieved_timestamp": "1762652580.2505212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-21", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-21", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47001400727846554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5738369241857685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-21/afc031d4-852e-4ead-9098-6ce30112b459.json b/data/hfopenllm_v2/jaspionjader/bh-21/afc031d4-852e-4ead-9098-6ce30112b459.json new file mode 100644 index 000000000..00f6374fa --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-21/afc031d4-852e-4ead-9098-6ce30112b459.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-21/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-21", + "id": "jaspionjader/bh-21", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.47 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5738 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-22/cb33e29f-e5e1-4bf5-9e20-86d9c3486d2d.json b/data/hfopenllm_v2/jaspionjader/bh-22/cb33e29f-e5e1-4bf5-9e20-86d9c3486d2d.json new file mode 100644 index 000000000..1e2ca8954 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-22/cb33e29f-e5e1-4bf5-9e20-86d9c3486d2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-22/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-22", + "id": "jaspionjader/bh-22", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5793 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json b/data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json deleted file mode 100644 index 226dbe7d7..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-22/1762652580.250869", - "retrieved_timestamp": "1762652580.25087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-22", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-22", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.579296884452635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41715625000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-23/a4b93124-1151-4f69-8a5e-6b916e8cf11f.json b/data/hfopenllm_v2/jaspionjader/bh-23/a4b93124-1151-4f69-8a5e-6b916e8cf11f.json new file mode 100644 index 000000000..532376bf1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-23/a4b93124-1151-4f69-8a5e-6b916e8cf11f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-23/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-23", + "id": "jaspionjader/bh-23", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.57 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3796 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json b/data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json deleted file mode 100644 index 1636fb8fa..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-23/1762652580.2511601", - "retrieved_timestamp": "1762652580.251161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-23", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-23", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46576749690820357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.570027700842045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37957114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json b/data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json deleted file mode 100644 index 550d7b6af..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-24/1762652580.251392", - "retrieved_timestamp": "1762652580.251392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-24", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-24", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4715377420799035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5716684749879075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38090093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-24/efe11d8f-65e6-4ba6-8148-fdd43c9346be.json b/data/hfopenllm_v2/jaspionjader/bh-24/efe11d8f-65e6-4ba6-8148-fdd43c9346be.json new file mode 100644 index 000000000..22202dd5f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-24/efe11d8f-65e6-4ba6-8148-fdd43c9346be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-24", + "id": "jaspionjader/bh-24", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4715 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5717 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-25/923da7be-2ec8-46b2-8187-fe08eb86d5a0.json b/data/hfopenllm_v2/jaspionjader/bh-25/923da7be-2ec8-46b2-8187-fe08eb86d5a0.json new file mode 100644 index 000000000..ae523eebf --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-25/923da7be-2ec8-46b2-8187-fe08eb86d5a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-25/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-25", + "id": "jaspionjader/bh-25", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5706 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json b/data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json deleted file mode 100644 index 91466e670..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-25/1762652580.251633", - "retrieved_timestamp": "1762652580.251633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-25", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-25", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47518473206647255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5705628020556314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37824135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json b/data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json deleted file mode 100644 index bb10be634..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-26/1762652580.251851", - "retrieved_timestamp": "1762652580.251852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-26", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-26", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690897928607206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5734958656360526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3771609042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-26/1652b9fe-640a-48f9-b7a5-20ae28fb5985.json b/data/hfopenllm_v2/jaspionjader/bh-26/1652b9fe-640a-48f9-b7a5-20ae28fb5985.json new file mode 100644 index 000000000..4a003c489 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-26/1652b9fe-640a-48f9-b7a5-20ae28fb5985.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-26/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-26", + "id": "jaspionjader/bh-26", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5735 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3772 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-27/572463ed-f6b9-460d-9c38-0e0ee5327511.json b/data/hfopenllm_v2/jaspionjader/bh-27/572463ed-f6b9-460d-9c38-0e0ee5327511.json new file mode 100644 index 000000000..041f63301 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-27/572463ed-f6b9-460d-9c38-0e0ee5327511.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-27/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-27", + "id": "jaspionjader/bh-27", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4819 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5714 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json b/data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json deleted file mode 100644 index ea91466ed..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-27/1762652580.2520802", - "retrieved_timestamp": "1762652580.252081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-27", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-27", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4818791916559174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.571405917910282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3799035904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json b/data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json deleted file mode 100644 index 6dbc1fbe0..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-28/1762652580.252297", - "retrieved_timestamp": "1762652580.2522979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-28", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-28", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4785070280189896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5702617832390487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-28/5f6bbbfd-16a8-4ea8-b9d9-b436a882700a.json b/data/hfopenllm_v2/jaspionjader/bh-28/5f6bbbfd-16a8-4ea8-b9d9-b436a882700a.json new file mode 100644 index 000000000..9d000a51b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-28/5f6bbbfd-16a8-4ea8-b9d9-b436a882700a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-28/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-28", + "id": "jaspionjader/bh-28", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5703 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json b/data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json deleted file mode 100644 index 0a8ab3328..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-29/1762652580.252519", - "retrieved_timestamp": "1762652580.2525198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-29", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-29", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46881496651107946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5670161357895335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38189827127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-29/32322361-f18d-480d-9475-cd11a45bc4bc.json b/data/hfopenllm_v2/jaspionjader/bh-29/32322361-f18d-480d-9475-cd11a45bc4bc.json new file mode 100644 index 000000000..015e4ce5c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-29/32322361-f18d-480d-9475-cd11a45bc4bc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-29/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-29", + "id": "jaspionjader/bh-29", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.567 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json b/data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json deleted file mode 100644 index 6f653b5ca..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-3/1762652580.2527301", - "retrieved_timestamp": "1762652580.2527308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-3", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4663670172918966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890722855221537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-3/f62d1aee-2d9e-466e-85e2-002fae5d2504.json b/data/hfopenllm_v2/jaspionjader/bh-3/f62d1aee-2d9e-466e-85e2-002fae5d2504.json new file mode 100644 index 000000000..7c0e9e2c4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-3/f62d1aee-2d9e-466e-85e2-002fae5d2504.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-3", + "id": "jaspionjader/bh-3", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5891 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json b/data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json deleted file mode 100644 index bba3db4c9..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-30/1762652580.252943", - "retrieved_timestamp": "1762652580.2529438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-30", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-30", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46664184364153777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5705838505746653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-30/af389bf1-da63-49a9-9e49-32613d8d05b8.json b/data/hfopenllm_v2/jaspionjader/bh-30/af389bf1-da63-49a9-9e49-32613d8d05b8.json new file mode 100644 index 000000000..8e6b682db --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-30/af389bf1-da63-49a9-9e49-32613d8d05b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-30/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-30", + "id": "jaspionjader/bh-30", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4666 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5706 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json b/data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json deleted file mode 100644 index 4d2f8f274..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-31/1762652580.253162", - "retrieved_timestamp": "1762652580.253163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-31", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-31", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5665082303171874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-31/ea13ae62-d050-4cc4-9cbe-99eedfc206e2.json b/data/hfopenllm_v2/jaspionjader/bh-31/ea13ae62-d050-4cc4-9cbe-99eedfc206e2.json new file mode 100644 index 000000000..da3e5651c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-31/ea13ae62-d050-4cc4-9cbe-99eedfc206e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-31/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-31", + "id": "jaspionjader/bh-31", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4104 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-32/1e697620-36a7-459c-b88c-405febb57c3a.json b/data/hfopenllm_v2/jaspionjader/bh-32/1e697620-36a7-459c-b88c-405febb57c3a.json new file mode 100644 index 000000000..ab338397a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-32/1e697620-36a7-459c-b88c-405febb57c3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-32", + "id": "jaspionjader/bh-32", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5662 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4157 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json b/data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json deleted file mode 100644 index 20159a56e..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-32/1762652580.253373", - "retrieved_timestamp": "1762652580.2533739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-32", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-32", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4635943740386619 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5662056335064284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-33/532723e8-a9b7-4f72-a015-c2bd9363b5d8.json b/data/hfopenllm_v2/jaspionjader/bh-33/532723e8-a9b7-4f72-a015-c2bd9363b5d8.json new file mode 100644 index 000000000..cb0a494fb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-33/532723e8-a9b7-4f72-a015-c2bd9363b5d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-33/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-33", + "id": "jaspionjader/bh-33", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5653 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4157 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3808 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json b/data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json deleted file mode 100644 index 645e7d069..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-33/1762652580.25359", - "retrieved_timestamp": "1762652580.253591", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-33", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-33", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685401401614383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5652966799156172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38081781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json b/data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json deleted file mode 100644 index 4185aa3b1..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-34/1762652580.253809", - "retrieved_timestamp": "1762652580.25381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-34", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-34", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623953332712758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5681235912530039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-34/be096a57-7d81-4999-919a-ed8a243012b2.json b/data/hfopenllm_v2/jaspionjader/bh-34/be096a57-7d81-4999-919a-ed8a243012b2.json new file mode 100644 index 000000000..e3c52f212 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-34/be096a57-7d81-4999-919a-ed8a243012b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-34/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-34", + "id": "jaspionjader/bh-34", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5681 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json b/data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json deleted file mode 100644 index 3c8ea1800..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-35/1762652580.2540212", - "retrieved_timestamp": "1762652580.254022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-35", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-35", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47213726246359655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5639648300586834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41830208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-35/cadeb016-e158-4a49-921c-efe0e4eb0cb2.json b/data/hfopenllm_v2/jaspionjader/bh-35/cadeb016-e158-4a49-921c-efe0e4eb0cb2.json new file mode 100644 index 000000000..3cf6f3543 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-35/cadeb016-e158-4a49-921c-efe0e4eb0cb2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-35/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-35", + "id": "jaspionjader/bh-35", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.564 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json b/data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json deleted file mode 100644 index da0f0f8f3..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-36/1762652580.2542279", - "retrieved_timestamp": "1762652580.254229", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-36", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-36", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4665919759571271 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5664445599052024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-36/c606d7b9-3ea3-49d4-9ecc-9610ed4b4eac.json b/data/hfopenllm_v2/jaspionjader/bh-36/c606d7b9-3ea3-49d4-9ecc-9610ed4b4eac.json new file mode 100644 index 000000000..c44481774 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-36/c606d7b9-3ea3-49d4-9ecc-9610ed4b4eac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-36/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-36", + "id": "jaspionjader/bh-36", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4666 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5664 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-37/04a5eed3-7eea-4d9f-acc6-5a96ec987e2b.json b/data/hfopenllm_v2/jaspionjader/bh-37/04a5eed3-7eea-4d9f-acc6-5a96ec987e2b.json new file mode 100644 index 000000000..00efc0c32 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-37/04a5eed3-7eea-4d9f-acc6-5a96ec987e2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-37/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-37", + "id": "jaspionjader/bh-37", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5625 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json b/data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json deleted file mode 100644 index a53a80e6e..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-37/1762652580.2544441", - "retrieved_timestamp": "1762652580.254445", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-37", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-37", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48797413086166924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.562488460737535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828125 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json b/data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json deleted file mode 100644 index f9cbdd06c..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-38/1762652580.2548852", - "retrieved_timestamp": "1762652580.2548869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-38", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-38", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46179581288758276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5658176339168742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3810671542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-38/a1c60d74-dabe-423d-9e40-3dd8112d7d8e.json b/data/hfopenllm_v2/jaspionjader/bh-38/a1c60d74-dabe-423d-9e40-3dd8112d7d8e.json new file mode 100644 index 000000000..8a76bd6cd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-38/a1c60d74-dabe-423d-9e40-3dd8112d7d8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-38/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-38", + "id": "jaspionjader/bh-38", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4618 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5658 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4117 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3811 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json b/data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json deleted file mode 100644 index 84af5c76f..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-39/1762652580.2552152", - "retrieved_timestamp": "1762652580.2552161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-39", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-39", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45759917020173135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5633012248625926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-39/29c7bc9b-6833-497b-a553-2941026efea5.json b/data/hfopenllm_v2/jaspionjader/bh-39/29c7bc9b-6833-497b-a553-2941026efea5.json new file mode 100644 index 000000000..8f546e7ae --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-39/29c7bc9b-6833-497b-a553-2941026efea5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-39/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-39", + "id": "jaspionjader/bh-39", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5633 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-4/09a60955-978e-4136-bdde-d5459e37ad2c.json b/data/hfopenllm_v2/jaspionjader/bh-4/09a60955-978e-4136-bdde-d5459e37ad2c.json new file mode 100644 index 000000000..76f667723 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-4/09a60955-978e-4136-bdde-d5459e37ad2c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-4", + "id": "jaspionjader/bh-4", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5892 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3705 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json b/data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json deleted file mode 100644 index 3c4c952af..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-4/1762652580.2554429", - "retrieved_timestamp": "1762652580.255444", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-4", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672912317096415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5892000111391051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-40/501744a2-070a-4378-9232-f7ccd9b2a67e.json b/data/hfopenllm_v2/jaspionjader/bh-40/501744a2-070a-4378-9232-f7ccd9b2a67e.json new file mode 100644 index 000000000..e827aaca1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-40/501744a2-070a-4378-9232-f7ccd9b2a67e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-40/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-40", + "id": "jaspionjader/bh-40", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5634 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json b/data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json deleted file mode 100644 index dbd7332d7..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-40/1762652580.2556531", - "retrieved_timestamp": "1762652580.2556539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-40", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-40", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45357761849669986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5633956317971519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38347739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-41/369efdc6-6529-477c-b5f0-d229c8102491.json b/data/hfopenllm_v2/jaspionjader/bh-41/369efdc6-6529-477c-b5f0-d229c8102491.json new file mode 100644 index 000000000..559005960 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-41/369efdc6-6529-477c-b5f0-d229c8102491.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-41/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-41", + "id": "jaspionjader/bh-41", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.474 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5614 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json b/data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json deleted file mode 100644 index 88a67d2bc..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-41/1762652580.2558541", - "retrieved_timestamp": "1762652580.2558541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-41", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-41", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4739856912990864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.56138466485423 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41827083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json b/data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json deleted file mode 100644 index 9823d1fca..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-42/1762652580.25606", - "retrieved_timestamp": "1762652580.2560608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-42", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-42", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4660423232578447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5645607204696422 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42100000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-42/906645f3-2041-4380-8118-ac26b92297ba.json b/data/hfopenllm_v2/jaspionjader/bh-42/906645f3-2041-4380-8118-ac26b92297ba.json new file mode 100644 index 000000000..8e371ce85 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-42/906645f3-2041-4380-8118-ac26b92297ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-42/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-42", + "id": "jaspionjader/bh-42", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.466 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5646 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json b/data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json deleted file mode 100644 index e4553e8ca..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-43/1762652580.2562718", - "retrieved_timestamp": "1762652580.2562718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-43", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-43", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635240412618795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-43/57fe8deb-02dc-43a8-8a92-14bdaf61dd67.json b/data/hfopenllm_v2/jaspionjader/bh-43/57fe8deb-02dc-43a8-8a92-14bdaf61dd67.json new file mode 100644 index 000000000..2479e939a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-43/57fe8deb-02dc-43a8-8a92-14bdaf61dd67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-43/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-43", + "id": "jaspionjader/bh-43", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json b/data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json deleted file mode 100644 index 49ce2c817..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-44/1762652580.2565289", - "retrieved_timestamp": "1762652580.2565298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-44", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-44", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4706135276621586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642775941837409 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42487500000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833942819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-44/95f2fa22-3da9-4876-ace3-50763f2b2453.json b/data/hfopenllm_v2/jaspionjader/bh-44/95f2fa22-3da9-4876-ace3-50763f2b2453.json new file mode 100644 index 000000000..e04123e5c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-44/95f2fa22-3da9-4876-ace3-50763f2b2453.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-44/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-44", + "id": "jaspionjader/bh-44", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4706 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json b/data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json deleted file mode 100644 index 924082e77..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-46/1762652580.2567308", - "retrieved_timestamp": "1762652580.256732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-46", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-46", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5631697539272891 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3822307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-46/b2f9e38f-c2a1-4e5f-a7ce-4e33a05b503b.json b/data/hfopenllm_v2/jaspionjader/bh-46/b2f9e38f-c2a1-4e5f-a7ce-4e33a05b503b.json new file mode 100644 index 000000000..b0fd30662 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-46/b2f9e38f-c2a1-4e5f-a7ce-4e33a05b503b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-46/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-46", + "id": "jaspionjader/bh-46", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5632 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3822 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json b/data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json deleted file mode 100644 index 21ce79fa6..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-47/1762652580.256935", - "retrieved_timestamp": "1762652580.2569358", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-47", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-47", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46516797652451053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5545716016743777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3854720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-47/b3173a2a-8309-498d-961b-0167d5d5dea6.json b/data/hfopenllm_v2/jaspionjader/bh-47/b3173a2a-8309-498d-961b-0167d5d5dea6.json new file mode 100644 index 000000000..12bf1bc95 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-47/b3173a2a-8309-498d-961b-0167d5d5dea6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-47/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-47", + "id": "jaspionjader/bh-47", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5546 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3855 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-48/0d59dd75-c999-4a7e-919a-fd084202fc9c.json b/data/hfopenllm_v2/jaspionjader/bh-48/0d59dd75-c999-4a7e-919a-fd084202fc9c.json new file mode 100644 index 000000000..0b617599c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-48/0d59dd75-c999-4a7e-919a-fd084202fc9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-48/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-48", + "id": "jaspionjader/bh-48", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5541 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1254 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json b/data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json deleted file mode 100644 index 47f4212c1..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-48/1762652580.257132", - "retrieved_timestamp": "1762652580.257133", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-48", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-48", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46881496651107946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541308128775738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4209375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-49/639e91d9-ebbf-4ba2-bce3-6953e7c91e32.json b/data/hfopenllm_v2/jaspionjader/bh-49/639e91d9-ebbf-4ba2-bce3-6953e7c91e32.json new file mode 100644 index 000000000..be5e9a125 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-49/639e91d9-ebbf-4ba2-bce3-6953e7c91e32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-49/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-49", + "id": "jaspionjader/bh-49", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3808 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json b/data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json deleted file mode 100644 index 2219021af..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-49/1762652580.257362", - "retrieved_timestamp": "1762652580.257366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-49", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-49", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246195649764844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540285004706683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41290625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38081781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-5/56a5fb9b-a4b7-4290-9ec9-6864b3efaa82.json b/data/hfopenllm_v2/jaspionjader/bh-5/56a5fb9b-a4b7-4290-9ec9-6864b3efaa82.json new file mode 100644 index 000000000..f712b9f05 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-5/56a5fb9b-a4b7-4290-9ec9-6864b3efaa82.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-5", + "id": "jaspionjader/bh-5", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5882 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json b/data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json deleted file mode 100644 index 4a5c55eda..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-5/1762652580.2577002", - "retrieved_timestamp": "1762652580.2577012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-5", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46516797652451053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5881569099353959 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json b/data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json deleted file mode 100644 index 40e545815..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-50/1762652580.257925", - "retrieved_timestamp": "1762652580.257926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-50", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-50", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246195649764844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555294802866646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41687500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-50/d03fb481-be0b-4dfb-bb4d-54067e058e99.json b/data/hfopenllm_v2/jaspionjader/bh-50/d03fb481-be0b-4dfb-bb4d-54067e058e99.json new file mode 100644 index 000000000..fab8832f6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-50/d03fb481-be0b-4dfb-bb4d-54067e058e99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-50/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-50", + "id": "jaspionjader/bh-50", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json b/data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json deleted file mode 100644 index 9934c1eaf..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-51/1762652580.2581341", - "retrieved_timestamp": "1762652580.258135", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-51", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-51", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630447213393795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5557101784534039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41681250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-51/d8fc3475-83e9-4790-a472-72b442087562.json b/data/hfopenllm_v2/jaspionjader/bh-51/d8fc3475-83e9-4790-a472-72b442087562.json new file mode 100644 index 000000000..82a672fc1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-51/d8fc3475-83e9-4790-a472-72b442087562.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-51/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-51", + "id": "jaspionjader/bh-51", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4168 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-52/57efd335-4873-4e01-bfc3-0d704b3d482a.json b/data/hfopenllm_v2/jaspionjader/bh-52/57efd335-4873-4e01-bfc3-0d704b3d482a.json new file mode 100644 index 000000000..5d9a2c2b6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-52/57efd335-4873-4e01-bfc3-0d704b3d482a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-52/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-52", + "id": "jaspionjader/bh-52", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5444 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json b/data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json deleted file mode 100644 index 33dd91d26..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-52/1762652580.258348", - "retrieved_timestamp": "1762652580.258349", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-52", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-52", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45362748618111054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544409095161705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41690625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38430851063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-53/25fdcc8a-0e7d-4148-8508-2631ea6deb05.json b/data/hfopenllm_v2/jaspionjader/bh-53/25fdcc8a-0e7d-4148-8508-2631ea6deb05.json new file mode 100644 index 000000000..c656925be --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-53/25fdcc8a-0e7d-4148-8508-2631ea6deb05.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-53/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-53", + "id": "jaspionjader/bh-53", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5494 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json b/data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json deleted file mode 100644 index f15554275..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-53/1762652580.25855", - "retrieved_timestamp": "1762652580.2585511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-53", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-53", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779573753197073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494367702137035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29865771812080544 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38580452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json b/data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json deleted file mode 100644 index 34a7c12b3..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-54/1762652580.258788", - "retrieved_timestamp": "1762652580.258792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-54", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-54", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48405231452545916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5547738488653888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4155416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-54/f5f63d06-7e51-4b91-8814-ecbda604fe6b.json b/data/hfopenllm_v2/jaspionjader/bh-54/f5f63d06-7e51-4b91-8814-ecbda604fe6b.json new file mode 100644 index 000000000..b9fb7dee3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-54/f5f63d06-7e51-4b91-8814-ecbda604fe6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-54/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-54", + "id": "jaspionjader/bh-54", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4841 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-55/5326c33b-6b8a-472a-9058-a9e9fe83b599.json b/data/hfopenllm_v2/jaspionjader/bh-55/5326c33b-6b8a-472a-9058-a9e9fe83b599.json new file mode 100644 index 000000000..f9f52d25d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-55/5326c33b-6b8a-472a-9058-a9e9fe83b599.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-55/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-55", + "id": "jaspionjader/bh-55", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4709 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3846 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json b/data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json deleted file mode 100644 index 11aa42c62..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-55/1762652580.259115", - "retrieved_timestamp": "1762652580.259116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-55", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-55", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47093822169621047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549641462109072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3846409574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-56/28674053-e1b6-4f0a-a90e-5dd5082ec164.json b/data/hfopenllm_v2/jaspionjader/bh-56/28674053-e1b6-4f0a-a90e-5dd5082ec164.json new file mode 100644 index 000000000..4876a15a1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-56/28674053-e1b6-4f0a-a90e-5dd5082ec164.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-56/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-56", + "id": "jaspionjader/bh-56", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json b/data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json deleted file mode 100644 index 386a8242e..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-56/1762652580.2593641", - "retrieved_timestamp": "1762652580.259365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-56", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-56", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446903231355648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4116041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json b/data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json deleted file mode 100644 index 235faa278..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-57/1762652580.259624", - "retrieved_timestamp": "1762652580.259625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-57", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-57", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44051339335186196 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5424621834237494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-57/fd27bfa7-11b3-46d3-915c-373ddf5a9865.json b/data/hfopenllm_v2/jaspionjader/bh-57/fd27bfa7-11b3-46d3-915c-373ddf5a9865.json new file mode 100644 index 000000000..f2fb7ade4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-57/fd27bfa7-11b3-46d3-915c-373ddf5a9865.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-57/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-57", + "id": "jaspionjader/bh-57", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4405 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5425 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-58/91f190ba-39c8-47af-8351-73d1f382dd99.json b/data/hfopenllm_v2/jaspionjader/bh-58/91f190ba-39c8-47af-8351-73d1f382dd99.json new file mode 100644 index 000000000..3b78d4b75 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-58/91f190ba-39c8-47af-8351-73d1f382dd99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-58/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-58", + "id": "jaspionjader/bh-58", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json b/data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json deleted file mode 100644 index 58141bfff..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-58/1762652580.259867", - "retrieved_timestamp": "1762652580.259868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-58", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-58", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630447213393795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446322106157867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json b/data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json deleted file mode 100644 index 551babc66..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-59/1762652580.260088", - "retrieved_timestamp": "1762652580.2600892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-59", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-59", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43414362779646887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5511531646170439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41700000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3838098404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-59/b637b55c-dd05-4060-bf33-e63e9de7fac9.json b/data/hfopenllm_v2/jaspionjader/bh-59/b637b55c-dd05-4060-bf33-e63e9de7fac9.json new file mode 100644 index 000000000..40582b054 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-59/b637b55c-dd05-4060-bf33-e63e9de7fac9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-59/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-59", + "id": "jaspionjader/bh-59", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1541 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3838 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-6/bcacef79-d7c0-46e7-9194-43541c2f01fc.json b/data/hfopenllm_v2/jaspionjader/bh-6/bcacef79-d7c0-46e7-9194-43541c2f01fc.json new file mode 100644 index 000000000..66016308e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-6/bcacef79-d7c0-46e7-9194-43541c2f01fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-6", + "id": "jaspionjader/bh-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5891 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json b/data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json deleted file mode 100644 index d57c4e728..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-6/1762652580.260308", - "retrieved_timestamp": "1762652580.260309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620706392372239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890658635262072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41991666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36976396276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json b/data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json deleted file mode 100644 index d3d543ca6..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-60/1762652580.2605288", - "retrieved_timestamp": "1762652580.2605288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-60", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-60", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42070484093316846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5368509826419269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689328457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-60/77a358c7-59fa-4b22-a190-dfca86c5166b.json b/data/hfopenllm_v2/jaspionjader/bh-60/77a358c7-59fa-4b22-a190-dfca86c5166b.json new file mode 100644 index 000000000..60b6bf47a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-60/77a358c7-59fa-4b22-a190-dfca86c5166b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-60/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-60", + "id": "jaspionjader/bh-60", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5369 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json b/data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json deleted file mode 100644 index d2765827c..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-61/1762652580.260743", - "retrieved_timestamp": "1762652580.260743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-61", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-61", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42467652495378927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271029876122725 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679355053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-61/ad4c8922-7079-4383-8f42-d3de6326a1e1.json b/data/hfopenllm_v2/jaspionjader/bh-61/ad4c8922-7079-4383-8f42-d3de6326a1e1.json new file mode 100644 index 000000000..1e6b5f4bd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-61/ad4c8922-7079-4383-8f42-d3de6326a1e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-61/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-61", + "id": "jaspionjader/bh-61", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4247 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5271 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-62/7f89eded-e5fc-4b3b-9afd-dcd71b7b44d5.json b/data/hfopenllm_v2/jaspionjader/bh-62/7f89eded-e5fc-4b3b-9afd-dcd71b7b44d5.json new file mode 100644 index 000000000..ba9efa108 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-62/7f89eded-e5fc-4b3b-9afd-dcd71b7b44d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-62/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-62", + "id": "jaspionjader/bh-62", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5379 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3719 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json b/data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json deleted file mode 100644 index 6c440cbfe..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-62/1762652580.260948", - "retrieved_timestamp": "1762652580.260949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-62", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-62", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41498446344587914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379352222621877 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3719248670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-63/07cb94ab-0aea-4ce2-89b0-4378cb892c7e.json b/data/hfopenllm_v2/jaspionjader/bh-63/07cb94ab-0aea-4ce2-89b0-4378cb892c7e.json new file mode 100644 index 000000000..61c277886 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-63/07cb94ab-0aea-4ce2-89b0-4378cb892c7e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-63/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-63", + "id": "jaspionjader/bh-63", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4308 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4917 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4313 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json b/data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json deleted file mode 100644 index 78334e918..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-63/1762652580.261157", - "retrieved_timestamp": "1762652580.261157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-63", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-63", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43077146415954115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49171126396743653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-64/5fb04756-c7bb-4772-b209-0d9a300bbf7d.json b/data/hfopenllm_v2/jaspionjader/bh-64/5fb04756-c7bb-4772-b209-0d9a300bbf7d.json new file mode 100644 index 000000000..4fa30fb6a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-64/5fb04756-c7bb-4772-b209-0d9a300bbf7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-64/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-64", + "id": "jaspionjader/bh-64", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json b/data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json deleted file mode 100644 index 13309975d..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-64/1762652580.261374", - "retrieved_timestamp": "1762652580.261375", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-64", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-64", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41401038134372353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359944334653838 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-7/0c02d1b6-2d31-4c54-b881-588cbfb0c686.json b/data/hfopenllm_v2/jaspionjader/bh-7/0c02d1b6-2d31-4c54-b881-588cbfb0c686.json new file mode 100644 index 000000000..898316818 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-7/0c02d1b6-2d31-4c54-b881-588cbfb0c686.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-7", + "id": "jaspionjader/bh-7", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4624 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5861 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3715 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json b/data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json deleted file mode 100644 index 2155806af..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-7/1762652580.261788", - "retrieved_timestamp": "1762652580.261791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-7", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623953332712758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5860594415302606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41191666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3715093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-8/a32e4d22-8096-4537-a68a-98ff9171ac8c.json b/data/hfopenllm_v2/jaspionjader/bh-8/a32e4d22-8096-4537-a68a-98ff9171ac8c.json new file mode 100644 index 000000000..2f29f8cdb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-8/a32e4d22-8096-4537-a68a-98ff9171ac8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-8", + "id": "jaspionjader/bh-8", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.59 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4265 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json b/data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json deleted file mode 100644 index 7843ef4f6..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-8/1762652580.262149", - "retrieved_timestamp": "1762652580.262152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-8", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-8", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45967255770245175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5899505025903907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/bh-9/4e45b666-fa7e-4a38-8b6b-65846876c8d9.json b/data/hfopenllm_v2/jaspionjader/bh-9/4e45b666-fa7e-4a38-8b6b-65846876c8d9.json new file mode 100644 index 000000000..038a2b90e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-9/4e45b666-fa7e-4a38-8b6b-65846876c8d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bh-9", + "id": "jaspionjader/bh-9", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.585 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4146 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3703 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json b/data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json deleted file mode 100644 index 361e5c45c..000000000 --- a/data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-9/1762652580.262652", - "retrieved_timestamp": "1762652580.2626529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/bh-9", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508548429278758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5850048697918168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3702626329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json b/data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json deleted file mode 100644 index bdf9cbbe6..000000000 --- a/data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_dp-6-8b/1762652580.263117", - "retrieved_timestamp": "1762652580.2631192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/dp-6-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/dp-6-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4805804155197099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299697041031141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44338541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38971077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/dp-6-8b/d9cb1d13-2af5-4385-aa78-5c053e00e6c6.json b/data/hfopenllm_v2/jaspionjader/dp-6-8b/d9cb1d13-2af5-4385-aa78-5c053e00e6c6.json new file mode 100644 index 000000000..d490d1352 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/dp-6-8b/d9cb1d13-2af5-4385-aa78-5c053e00e6c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_dp-6-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dp-6-8b", + "id": "jaspionjader/dp-6-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4806 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.53 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json b/data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json deleted file mode 100644 index 20df60aa7..000000000 --- a/data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_dp-7-8b/1762652580.2634509", - "retrieved_timestamp": "1762652580.2634518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/dp-7-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/dp-7-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44983089314130953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290850650389306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/dp-7-8b/6afaec07-ebb8-4f3f-af48-c679f38f4917.json b/data/hfopenllm_v2/jaspionjader/dp-7-8b/6afaec07-ebb8-4f3f-af48-c679f38f4917.json new file mode 100644 index 000000000..2aba7ace1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/dp-7-8b/6afaec07-ebb8-4f3f-af48-c679f38f4917.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_dp-7-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dp-7-8b", + "id": "jaspionjader/dp-7-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4498 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5291 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3934 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json b/data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json deleted file mode 100644 index ce046561a..000000000 --- a/data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_ek-6/1762652580.2637498", - "retrieved_timestamp": "1762652580.263751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/ek-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/ek-6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642437621067656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219292795769993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3861369680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/ek-6/bf8370c9-baed-4034-ac38-c6f796baca15.json b/data/hfopenllm_v2/jaspionjader/ek-6/bf8370c9-baed-4034-ac38-c6f796baca15.json new file mode 100644 index 000000000..3351de25f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/ek-6/bf8370c9-baed-4034-ac38-c6f796baca15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_ek-6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ek-6", + "id": "jaspionjader/ek-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4642 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json b/data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json deleted file mode 100644 index 05e15cc6f..000000000 --- a/data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_ek-7/1762652580.264135", - "retrieved_timestamp": "1762652580.2641358", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/ek-7", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/ek-7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47670846686791046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194098090521417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38871343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/ek-7/d397c078-6fe3-44a8-859c-a0f7c551dc3a.json b/data/hfopenllm_v2/jaspionjader/ek-7/d397c078-6fe3-44a8-859c-a0f7c551dc3a.json new file mode 100644 index 000000000..7e3d311e0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/ek-7/d397c078-6fe3-44a8-859c-a0f7c551dc3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_ek-7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ek-7", + "id": "jaspionjader/ek-7", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4767 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3887 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json b/data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json deleted file mode 100644 index 76c623ada..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-1-8b/1762652580.264415", - "retrieved_timestamp": "1762652580.264416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-1-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-1-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49826571275327247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140825686172996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45268749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39070811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-1-8b/ed61cd6a-bbf0-45f2-9536-a7a262d5d6fb.json b/data/hfopenllm_v2/jaspionjader/f-1-8b/ed61cd6a-bbf0-45f2-9536-a7a262d5d6fb.json new file mode 100644 index 000000000..1d3bd64ed --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-1-8b/ed61cd6a-bbf0-45f2-9536-a7a262d5d6fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-1-8b", + "id": "jaspionjader/f-1-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3907 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-2-8b/6be795f4-0784-44bf-8926-e3060ec37dcf.json b/data/hfopenllm_v2/jaspionjader/f-2-8b/6be795f4-0784-44bf-8926-e3060ec37dcf.json new file mode 100644 index 000000000..cdc2207e8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-2-8b/6be795f4-0784-44bf-8926-e3060ec37dcf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-2-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-2-8b", + "id": "jaspionjader/f-2-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4824 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5294 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3962 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json b/data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json deleted file mode 100644 index fb319c709..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-2-8b/1762652580.264705", - "retrieved_timestamp": "1762652580.2647061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-2-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-2-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48237897667078905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294150378468933 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4500520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39619348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json b/data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json deleted file mode 100644 index 055856925..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-3-8b/1762652580.264997", - "retrieved_timestamp": "1762652580.264998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-3-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4803055891700687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274906581043712 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44208333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39544547872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-3-8b/d4d808f5-3b79-43b5-8076-d3f785083789.json b/data/hfopenllm_v2/jaspionjader/f-3-8b/d4d808f5-3b79-43b5-8076-d3f785083789.json new file mode 100644 index 000000000..9a8bac39d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-3-8b/d4d808f5-3b79-43b5-8076-d3f785083789.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-3-8b", + "id": "jaspionjader/f-3-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4803 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3954 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-4-8b/370f5923-91d7-40d2-bd06-bf2b657b8ef2.json b/data/hfopenllm_v2/jaspionjader/f-4-8b/370f5923-91d7-40d2-bd06-bf2b657b8ef2.json new file mode 100644 index 000000000..85c9a3a2a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-4-8b/370f5923-91d7-40d2-bd06-bf2b657b8ef2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-4-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-4-8b", + "id": "jaspionjader/f-4-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4797 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5289 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3956 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json b/data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json deleted file mode 100644 index d875a458b..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-4-8b/1762652580.265391", - "retrieved_timestamp": "1762652580.2653928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-4-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-4-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4797060687863757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5288622486396436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45141666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39561170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json b/data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json deleted file mode 100644 index 07939b996..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-5-8b/1762652580.265783", - "retrieved_timestamp": "1762652580.265785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-5-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-5-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043606519590242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5313273519630752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4460520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39486369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-5-8b/5334e5e4-d243-4c20-912c-d0ded74d6ea5.json b/data/hfopenllm_v2/jaspionjader/f-5-8b/5334e5e4-d243-4c20-912c-d0ded74d6ea5.json new file mode 100644 index 000000000..dff79aa05 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-5-8b/5334e5e4-d243-4c20-912c-d0ded74d6ea5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-5-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-5-8b", + "id": "jaspionjader/f-5-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5313 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3949 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json b/data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json deleted file mode 100644 index 85668769f..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-6-8b/1762652580.2661529", - "retrieved_timestamp": "1762652580.266155", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-6-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-6-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48460196722474147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524094753042471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44735416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-6-8b/7306f2cd-4fd2-4dd4-b06b-8c9aa558388b.json b/data/hfopenllm_v2/jaspionjader/f-6-8b/7306f2cd-4fd2-4dd4-b06b-8c9aa558388b.json new file mode 100644 index 000000000..64a06c461 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-6-8b/7306f2cd-4fd2-4dd4-b06b-8c9aa558388b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-6-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-6-8b", + "id": "jaspionjader/f-6-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4846 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-7-8b/68cc19eb-423b-4d6d-a3bf-eac6f666bc4b.json b/data/hfopenllm_v2/jaspionjader/f-7-8b/68cc19eb-423b-4d6d-a3bf-eac6f666bc4b.json new file mode 100644 index 000000000..e277f31b0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-7-8b/68cc19eb-423b-4d6d-a3bf-eac6f666bc4b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-7-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-7-8b", + "id": "jaspionjader/f-7-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3936 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json b/data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json deleted file mode 100644 index 09eb08f39..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-7-8b/1762652580.2664478", - "retrieved_timestamp": "1762652580.266449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-7-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-7-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462337708391512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277022085059414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4315104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39361702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-8-8b/59aa26a8-93b3-43fc-8c38-ef67cd8efd80.json b/data/hfopenllm_v2/jaspionjader/f-8-8b/59aa26a8-93b3-43fc-8c38-ef67cd8efd80.json new file mode 100644 index 000000000..5cbc566d2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-8-8b/59aa26a8-93b3-43fc-8c38-ef67cd8efd80.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-8-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-8-8b", + "id": "jaspionjader/f-8-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4739 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5259 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.394 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json b/data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json deleted file mode 100644 index 0cc1cacf3..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-8-8b/1762652580.266931", - "retrieved_timestamp": "1762652580.266932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-8-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-8-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4739358236146758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5259311478463803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43544791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39403257978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json b/data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json deleted file mode 100644 index 89b2b1902..000000000 --- a/data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-9-8b/1762652580.267217", - "retrieved_timestamp": "1762652580.2672179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/f-9-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-9-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4601723427173233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5291558412946383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44608333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3943650265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/f-9-8b/220cd306-0613-4c8f-9848-4af812a1d37f.json b/data/hfopenllm_v2/jaspionjader/f-9-8b/220cd306-0613-4c8f-9848-4af812a1d37f.json new file mode 100644 index 000000000..e31f7fa11 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-9-8b/220cd306-0613-4c8f-9848-4af812a1d37f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-9-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "f-9-8b", + "id": "jaspionjader/f-9-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4602 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3944 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json b/data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json deleted file mode 100644 index 605b21673..000000000 --- a/data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_fct-14-8b/1762652580.2674618", - "retrieved_timestamp": "1762652580.267463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/fct-14-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fct-14-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128612082607481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206018889288543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875498670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/fct-14-8b/39a6a40c-3fa0-41ba-9d13-da9381263d4a.json b/data/hfopenllm_v2/jaspionjader/fct-14-8b/39a6a40c-3fa0-41ba-9d13-da9381263d4a.json new file mode 100644 index 000000000..4829d4d11 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fct-14-8b/39a6a40c-3fa0-41ba-9d13-da9381263d4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_fct-14-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fct-14-8b", + "id": "jaspionjader/fct-14-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5206 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d037b71-5d03-41a1-bf23-c0aea0cdcbbb.json b/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d037b71-5d03-41a1-bf23-c0aea0cdcbbb.json new file mode 100644 index 000000000..6ed9866d8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d037b71-5d03-41a1-bf23-c0aea0cdcbbb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_fct-9-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fct-9-8b", + "id": "jaspionjader/fct-9-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5205 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4291 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3932 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json b/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json deleted file mode 100644 index 769ed2531..000000000 --- a/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_fct-9-8b/1762652580.267691", - "retrieved_timestamp": "1762652580.267692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/fct-9-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fct-9-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353925362482657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520510244410076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39320146276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/fr-1-8b/16baf620-7dcc-49f3-a787-b431e11ad4f6.json b/data/hfopenllm_v2/jaspionjader/fr-1-8b/16baf620-7dcc-49f3-a787-b431e11ad4f6.json new file mode 100644 index 000000000..e4d8bb492 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fr-1-8b/16baf620-7dcc-49f3-a787-b431e11ad4f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_fr-1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fr-1-8b", + "id": "jaspionjader/fr-1-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json b/data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json deleted file mode 100644 index e6415b486..000000000 --- a/data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_fr-1-8b/1762652580.267912", - "retrieved_timestamp": "1762652580.2679129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/fr-1-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fr-1-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421079402651631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142290494968609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36103723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/fr-10-8b/4745add2-7bcb-4c05-8b12-6bd30856890b.json b/data/hfopenllm_v2/jaspionjader/fr-10-8b/4745add2-7bcb-4c05-8b12-6bd30856890b.json new file mode 100644 index 000000000..9d8f40937 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fr-10-8b/4745add2-7bcb-4c05-8b12-6bd30856890b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_fr-10-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fr-10-8b", + "id": "jaspionjader/fr-10-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3863 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json b/data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json deleted file mode 100644 index 5d36e323e..000000000 --- a/data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_fr-10-8b/1762652580.268136", - "retrieved_timestamp": "1762652580.268136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/fr-10-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fr-10-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44018869931781013 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206624978702634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863031914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json b/data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json deleted file mode 100644 index 738bb0c78..000000000 --- a/data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_fr-3-8b/1762652580.268359", - "retrieved_timestamp": "1762652580.26836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/fr-3-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fr-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325700253106203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255174690526301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41982291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863031914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/fr-3-8b/f68b122d-4dec-4d5c-ac22-198da3d3e96b.json b/data/hfopenllm_v2/jaspionjader/fr-3-8b/f68b122d-4dec-4d5c-ac22-198da3d3e96b.json new file mode 100644 index 000000000..4ae5f5614 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fr-3-8b/f68b122d-4dec-4d5c-ac22-198da3d3e96b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_fr-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fr-3-8b", + "id": "jaspionjader/fr-3-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5255 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3863 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/2e20f780-ceab-4d1d-a1ab-35f4f0ac44aa.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/2e20f780-ceab-4d1d-a1ab-35f4f0ac44aa.json new file mode 100644 index 000000000..84a9fa596 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/2e20f780-ceab-4d1d-a1ab-35f4f0ac44aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gamma-Kosmos-EVAA-8B", + "id": "jaspionjader/gamma-Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json deleted file mode 100644 index 5c80e1137..000000000 --- a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-8B/1762652580.268576", - "retrieved_timestamp": "1762652580.268577", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/gamma-Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/gamma-Kosmos-EVAA-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500121898784116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252624326543692 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44115624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json deleted file mode 100644 index a0a91ae30..000000000 --- a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v2-8B/1762652580.268805", - "retrieved_timestamp": "1762652580.268806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232525255211727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262464083930688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3755817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/f21bcd75-fc9f-4266-8976-3227b18b6b32.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/f21bcd75-fc9f-4266-8976-3227b18b6b32.json new file mode 100644 index 000000000..fdf1ccc99 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/f21bcd75-fc9f-4266-8976-3227b18b6b32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gamma-Kosmos-EVAA-v2-8B", + "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5262 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4344 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3756 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/7c1a81ec-1cb7-4858-8f1f-23b3ee49b73f.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/7c1a81ec-1cb7-4858-8f1f-23b3ee49b73f.json new file mode 100644 index 000000000..9aa3757ce --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/7c1a81ec-1cb7-4858-8f1f-23b3ee49b73f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gamma-Kosmos-EVAA-v3-8B", + "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4263 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3898 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json deleted file mode 100644 index 968f92f0f..000000000 --- a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v3-8B/1762652580.269119", - "retrieved_timestamp": "1762652580.26912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43326928106313467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527793553969925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3897938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/knf-2-8b/1cbfd1ad-237d-4cd3-8b5d-3135c194fcc0.json b/data/hfopenllm_v2/jaspionjader/knf-2-8b/1cbfd1ad-237d-4cd3-8b5d-3135c194fcc0.json new file mode 100644 index 000000000..f0dcdddf2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/knf-2-8b/1cbfd1ad-237d-4cd3-8b5d-3135c194fcc0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_knf-2-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "knf-2-8b", + "id": "jaspionjader/knf-2-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json b/data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json deleted file mode 100644 index 752ef4d2c..000000000 --- a/data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_knf-2-8b/1762652580.269415", - "retrieved_timestamp": "1762652580.2694159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/knf-2-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/knf-2-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500121898784116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206718655559387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3874667553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json b/data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json deleted file mode 100644 index 467bbd85c..000000000 --- a/data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-2-8b/1762652580.2696629", - "retrieved_timestamp": "1762652580.269664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/knfp-2-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/knfp-2-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327120928026525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304878011708133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37258976063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/knfp-2-8b/ef5c1813-a74d-4b3d-9911-c27a46c1c84e.json b/data/hfopenllm_v2/jaspionjader/knfp-2-8b/ef5c1813-a74d-4b3d-9911-c27a46c1c84e.json new file mode 100644 index 000000000..2ee41f38d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/knfp-2-8b/ef5c1813-a74d-4b3d-9911-c27a46c1c84e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-2-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "knfp-2-8b", + "id": "jaspionjader/knfp-2-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1427 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3726 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json b/data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json deleted file mode 100644 index d345b2110..000000000 --- a/data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-3-8b/1762652580.2700531", - "retrieved_timestamp": "1762652580.2700539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/knfp-3-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/knfp-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49456885508229276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199790073136731 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41712499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/knfp-3-8b/df50857d-c90e-4ec8-a9b6-96a6d2f894b1.json b/data/hfopenllm_v2/jaspionjader/knfp-3-8b/df50857d-c90e-4ec8-a9b6-96a6d2f894b1.json new file mode 100644 index 000000000..3ed3a2d85 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/knfp-3-8b/df50857d-c90e-4ec8-a9b6-96a6d2f894b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "knfp-3-8b", + "id": "jaspionjader/knfp-3-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4946 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-1-8b/774d54fb-a445-4ed9-b79a-9c1346537e98.json b/data/hfopenllm_v2/jaspionjader/kstc-1-8b/774d54fb-a445-4ed9-b79a-9c1346537e98.json new file mode 100644 index 000000000..3c1fc3df6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-1-8b/774d54fb-a445-4ed9-b79a-9c1346537e98.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-1-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-1-8b", + "id": "jaspionjader/kstc-1-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4643 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4158 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3892 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json b/data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json deleted file mode 100644 index d22dd27de..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-1-8b/1762652580.2702851", - "retrieved_timestamp": "1762652580.270286", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-1-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-1-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642936297911763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209048705325947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892121010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json b/data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json deleted file mode 100644 index 733ddea75..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-11-8b/1762652580.270522", - "retrieved_timestamp": "1762652580.270522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-11-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-11-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757343847657549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189389675805397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878823138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/kstc-11-8b/420b8be3-3560-48e8-8ab3-bb55338a9069.json b/data/hfopenllm_v2/jaspionjader/kstc-11-8b/420b8be3-3560-48e8-8ab3-bb55338a9069.json new file mode 100644 index 000000000..cbbf1f3a0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-11-8b/420b8be3-3560-48e8-8ab3-bb55338a9069.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-11-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-11-8b", + "id": "jaspionjader/kstc-11-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3879 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json b/data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json deleted file mode 100644 index a6d3d4635..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-4-8b/1762652580.270735", - "retrieved_timestamp": "1762652580.270736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-4-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-4-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4769832932175517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5216059333020012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868849734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/kstc-4-8b/c118b75c-597f-48a7-a4eb-675af72c9930.json b/data/hfopenllm_v2/jaspionjader/kstc-4-8b/c118b75c-597f-48a7-a4eb-675af72c9930.json new file mode 100644 index 000000000..3b5882807 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-4-8b/c118b75c-597f-48a7-a4eb-675af72c9930.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-4-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-4-8b", + "id": "jaspionjader/kstc-4-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-5-8b/e75534d3-b994-4e88-9274-7b62f61916cf.json b/data/hfopenllm_v2/jaspionjader/kstc-5-8b/e75534d3-b994-4e88-9274-7b62f61916cf.json new file mode 100644 index 000000000..4dec5cab3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-5-8b/e75534d3-b994-4e88-9274-7b62f61916cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-5-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-5-8b", + "id": "jaspionjader/kstc-5-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3892 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json b/data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json deleted file mode 100644 index 75c072de0..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-5-8b/1762652580.270952", - "retrieved_timestamp": "1762652580.270953", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-5-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-5-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47208739477918593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211438914491455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892121010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/kstc-6-8b/770a1ff1-057f-49a7-9402-c6dd881ac03d.json b/data/hfopenllm_v2/jaspionjader/kstc-6-8b/770a1ff1-057f-49a7-9402-c6dd881ac03d.json new file mode 100644 index 000000000..2000e4ffd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-6-8b/770a1ff1-057f-49a7-9402-c6dd881ac03d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-6-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-6-8b", + "id": "jaspionjader/kstc-6-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4944 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5231 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3857 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json b/data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json deleted file mode 100644 index 3e2e31647..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-6-8b/1762652580.2711701", - "retrieved_timestamp": "1762652580.2711701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-6-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-6-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49439376410147295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230977287748603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/kstc-8-8b/6cc9790d-9b02-437e-8ac7-be4152f5b17d.json b/data/hfopenllm_v2/jaspionjader/kstc-8-8b/6cc9790d-9b02-437e-8ac7-be4152f5b17d.json new file mode 100644 index 000000000..3637c9325 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-8-8b/6cc9790d-9b02-437e-8ac7-be4152f5b17d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-8-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-8-8b", + "id": "jaspionjader/kstc-8-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.491 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3889 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json b/data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json deleted file mode 100644 index d7df2b02b..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-8-8b/1762652580.271383", - "retrieved_timestamp": "1762652580.271384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-8-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-8-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49097173278013445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5238910223750602 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42112499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3888796542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/kstc-9-8b/264f5b42-a3ac-4af1-8145-c5763b8e7fa6.json b/data/hfopenllm_v2/jaspionjader/kstc-9-8b/264f5b42-a3ac-4af1-8145-c5763b8e7fa6.json new file mode 100644 index 000000000..23d708f9b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-9-8b/264f5b42-a3ac-4af1-8145-c5763b8e7fa6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-9-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kstc-9-8b", + "id": "jaspionjader/kstc-9-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5238 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json b/data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json deleted file mode 100644 index 17e354ab1..000000000 --- a/data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-9-8b/1762652580.27159", - "retrieved_timestamp": "1762652580.27159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/kstc-9-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-9-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860758343417687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5238366551736342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38721742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-10/549db368-437a-4982-ba5b-5c4d7bf203ae.json b/data/hfopenllm_v2/jaspionjader/slu-10/549db368-437a-4982-ba5b-5c4d7bf203ae.json new file mode 100644 index 000000000..bde42f6e8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-10/549db368-437a-4982-ba5b-5c4d7bf203ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-10", + "id": "jaspionjader/slu-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json b/data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json deleted file mode 100644 index 148b8c2ca..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-10/1762652580.271806", - "retrieved_timestamp": "1762652580.271807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096469529197213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json b/data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json deleted file mode 100644 index b5cf2fe9a..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-11/1762652580.272018", - "retrieved_timestamp": "1762652580.272018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-11", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-11", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.372519359743259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4890236865115587 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3919479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-11/0d098a19-7e8f-4a52-8466-729be91388d8.json b/data/hfopenllm_v2/jaspionjader/slu-11/0d098a19-7e8f-4a52-8466-729be91388d8.json new file mode 100644 index 000000000..00f33a8c5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-11/0d098a19-7e8f-4a52-8466-729be91388d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-11", + "id": "jaspionjader/slu-11", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3919 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json b/data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json deleted file mode 100644 index d28a89793..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-13/1762652580.272234", - "retrieved_timestamp": "1762652580.272237", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-13", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-13", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378404854674486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097334543819346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-13/83335f65-25a4-4bec-a901-587567ed0e99.json b/data/hfopenllm_v2/jaspionjader/slu-13/83335f65-25a4-4bec-a901-587567ed0e99.json new file mode 100644 index 000000000..7dba81194 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-13/83335f65-25a4-4bec-a901-587567ed0e99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-13/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-13", + "id": "jaspionjader/slu-13", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-14/02fb24c3-927f-4c21-bd47-b883521162a3.json b/data/hfopenllm_v2/jaspionjader/slu-14/02fb24c3-927f-4c21-bd47-b883521162a3.json new file mode 100644 index 000000000..ff8e6a9e1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-14/02fb24c3-927f-4c21-bd47-b883521162a3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-14/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-14", + "id": "jaspionjader/slu-14", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5089 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3627 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json b/data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json deleted file mode 100644 index 4a15f7f4a..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-14/1762652580.27245", - "retrieved_timestamp": "1762652580.2724512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-14", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-14", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4106880853912065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088505978489455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3626994680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-17/2a6507c7-44c1-4416-9ff1-36abd6af3b73.json b/data/hfopenllm_v2/jaspionjader/slu-17/2a6507c7-44c1-4416-9ff1-36abd6af3b73.json new file mode 100644 index 000000000..5214d57f6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-17/2a6507c7-44c1-4416-9ff1-36abd6af3b73.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-17/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-17", + "id": "jaspionjader/slu-17", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json b/data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json deleted file mode 100644 index 154de249b..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-17/1762652580.272654", - "retrieved_timestamp": "1762652580.272655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-17", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-17", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42167892303532406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070562055653275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3618683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json b/data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json deleted file mode 100644 index b36760b3a..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-2/1762652580.2728698", - "retrieved_timestamp": "1762652580.272871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-2", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40159554426698935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008068127974601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35064827127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-2/327a146a-8cfd-4480-8342-46afde530677.json b/data/hfopenllm_v2/jaspionjader/slu-2/327a146a-8cfd-4480-8342-46afde530677.json new file mode 100644 index 000000000..746324375 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-2/327a146a-8cfd-4480-8342-46afde530677.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-2", + "id": "jaspionjader/slu-2", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3506 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-20/0700fb7a-e722-432f-a64d-c040bba4deee.json b/data/hfopenllm_v2/jaspionjader/slu-20/0700fb7a-e722-432f-a64d-c040bba4deee.json new file mode 100644 index 000000000..b57d6ab6a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-20/0700fb7a-e722-432f-a64d-c040bba4deee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-20/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-20", + "id": "jaspionjader/slu-20", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5061 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3665 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json b/data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json deleted file mode 100644 index 7e04d1209..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-20/1762652580.273083", - "retrieved_timestamp": "1762652580.273084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-20", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-20", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4393143525844759 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061273966566772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229606 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39334375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36652260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-22/131d3a7e-43dd-4189-8466-6562703b3bdd.json b/data/hfopenllm_v2/jaspionjader/slu-22/131d3a7e-43dd-4189-8466-6562703b3bdd.json new file mode 100644 index 000000000..3b02267ec --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-22/131d3a7e-43dd-4189-8466-6562703b3bdd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-22/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-22", + "id": "jaspionjader/slu-22", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5082 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.365 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json b/data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json deleted file mode 100644 index cbf03d260..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-22/1762652580.2733881", - "retrieved_timestamp": "1762652580.273391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-22", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-22", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321201079801593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081790871805086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38934375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3650265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-23/8f6d7008-b8de-4a76-94aa-bbecc93ef3f7.json b/data/hfopenllm_v2/jaspionjader/slu-23/8f6d7008-b8de-4a76-94aa-bbecc93ef3f7.json new file mode 100644 index 000000000..12d59c50d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-23/8f6d7008-b8de-4a76-94aa-bbecc93ef3f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-23/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-23", + "id": "jaspionjader/slu-23", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json b/data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json deleted file mode 100644 index a6f397181..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-23/1762652580.27371", - "retrieved_timestamp": "1762652580.2737112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-23", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-23", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44780737332499987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131603005034272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40924999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json b/data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json deleted file mode 100644 index 53324f918..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-25/1762652580.27394", - "retrieved_timestamp": "1762652580.273941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-25", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-25", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4500303638789523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5094887898349904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-25/aadb0ce5-a1aa-4b0d-bec4-8bb0e8e54a1d.json b/data/hfopenllm_v2/jaspionjader/slu-25/aadb0ce5-a1aa-4b0d-bec4-8bb0e8e54a1d.json new file mode 100644 index 000000000..f8a9592be --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-25/aadb0ce5-a1aa-4b0d-bec4-8bb0e8e54a1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-25/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-25", + "id": "jaspionjader/slu-25", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.45 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5095 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-29/a73250f1-399a-4afa-bf83-4036dce78ef3.json b/data/hfopenllm_v2/jaspionjader/slu-29/a73250f1-399a-4afa-bf83-4036dce78ef3.json new file mode 100644 index 000000000..56321b0e4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-29/a73250f1-399a-4afa-bf83-4036dce78ef3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-29/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-29", + "id": "jaspionjader/slu-29", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json b/data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json deleted file mode 100644 index 0931fe3c9..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-29/1762652580.274164", - "retrieved_timestamp": "1762652580.274165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-29", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-29", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4430610779398662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096472519745161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229606 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json b/data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json deleted file mode 100644 index fa6cce8be..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-32/1762652580.274382", - "retrieved_timestamp": "1762652580.274383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-32", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-32", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45155409868039026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167277162337642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-32/f68bf680-9626-4952-b95e-12a18fd60820.json b/data/hfopenllm_v2/jaspionjader/slu-32/f68bf680-9626-4952-b95e-12a18fd60820.json new file mode 100644 index 000000000..3e62ecf10 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-32/f68bf680-9626-4952-b95e-12a18fd60820.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-32", + "id": "jaspionjader/slu-32", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json b/data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json deleted file mode 100644 index 0c6ca635a..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-33/1762652580.274691", - "retrieved_timestamp": "1762652580.274692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-33", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-33", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457339858242796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081308429202344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38667708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679355053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-33/d6a78a5c-4a2e-4370-88f2-d8627a94f1ea.json b/data/hfopenllm_v2/jaspionjader/slu-33/d6a78a5c-4a2e-4370-88f2-d8627a94f1ea.json new file mode 100644 index 000000000..c25031e6e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-33/d6a78a5c-4a2e-4370-88f2-d8627a94f1ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-33/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-33", + "id": "jaspionjader/slu-33", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4457 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5081 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json b/data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json deleted file mode 100644 index 8875b7acb..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-34/1762652580.2749598", - "retrieved_timestamp": "1762652580.274961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-34", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-34", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350678422142138 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077400809148992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-34/7b5eab2e-fba3-47d5-9839-02249c2568c5.json b/data/hfopenllm_v2/jaspionjader/slu-34/7b5eab2e-fba3-47d5-9839-02249c2568c5.json new file mode 100644 index 000000000..53e4bfd25 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-34/7b5eab2e-fba3-47d5-9839-02249c2568c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-34/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-34", + "id": "jaspionjader/slu-34", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json b/data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json deleted file mode 100644 index e36dfec35..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-35/1762652580.275198", - "retrieved_timestamp": "1762652580.2752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-35", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-35", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42417673993891764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103079759559944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39464583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676030585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-35/2acee2c3-4322-4152-8151-c1d571475b7c.json b/data/hfopenllm_v2/jaspionjader/slu-35/2acee2c3-4322-4152-8151-c1d571475b7c.json new file mode 100644 index 000000000..3720fab6c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-35/2acee2c3-4322-4152-8151-c1d571475b7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-35/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-35", + "id": "jaspionjader/slu-35", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5103 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-36/67ffb2de-0410-44a2-aad7-4a32e2c49c7d.json b/data/hfopenllm_v2/jaspionjader/slu-36/67ffb2de-0410-44a2-aad7-4a32e2c49c7d.json new file mode 100644 index 000000000..54b88f06a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-36/67ffb2de-0410-44a2-aad7-4a32e2c49c7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-36/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-36", + "id": "jaspionjader/slu-36", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5087 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3711 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json b/data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json deleted file mode 100644 index aa2652b4e..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-36/1762652580.275441", - "retrieved_timestamp": "1762652580.275442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-36", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-36", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518289250300314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087352369131289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-37/2923aeb3-982f-400d-9588-707583c75a1d.json b/data/hfopenllm_v2/jaspionjader/slu-37/2923aeb3-982f-400d-9588-707583c75a1d.json new file mode 100644 index 000000000..c1501c97b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-37/2923aeb3-982f-400d-9588-707583c75a1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-37/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-37", + "id": "jaspionjader/slu-37", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4534 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.51 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json b/data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json deleted file mode 100644 index 6af7b9e08..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-37/1762652580.2757561", - "retrieved_timestamp": "1762652580.275757", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-37", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-37", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4533526598314694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5099854293096197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39464583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json b/data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json deleted file mode 100644 index 0ca141f1c..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-6/1762652580.276035", - "retrieved_timestamp": "1762652580.276036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41166216749336204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098719666858446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-6/b6a622da-5ce8-4ea5-a82a-f3a2a299ddf2.json b/data/hfopenllm_v2/jaspionjader/slu-6/b6a622da-5ce8-4ea5-a82a-f3a2a299ddf2.json new file mode 100644 index 000000000..bdbe422b6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-6/b6a622da-5ce8-4ea5-a82a-f3a2a299ddf2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-6", + "id": "jaspionjader/slu-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4117 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4066 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3611 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json b/data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json deleted file mode 100644 index a75f885dc..000000000 --- a/data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-mix-1/1762652580.276264", - "retrieved_timestamp": "1762652580.276264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/slu-mix-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-mix-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45689991444921696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240269525191525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39303523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/slu-mix-1/7b06ac17-bfc6-43d5-99e6-d2b7a31290fb.json b/data/hfopenllm_v2/jaspionjader/slu-mix-1/7b06ac17-bfc6-43d5-99e6-d2b7a31290fb.json new file mode 100644 index 000000000..d2cd73f98 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-mix-1/7b06ac17-bfc6-43d5-99e6-d2b7a31290fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-mix-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "slu-mix-1", + "id": "jaspionjader/slu-mix-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json b/data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json deleted file mode 100644 index edade6aec..000000000 --- a/data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-1/1762652580.276484", - "retrieved_timestamp": "1762652580.2764852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/sof-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4313709845432342 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5009822733212669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40819791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.367436835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/sof-1/fd481b93-55b2-4831-9be9-1b1b2886fda3.json b/data/hfopenllm_v2/jaspionjader/sof-1/fd481b93-55b2-4831-9be9-1b1b2886fda3.json new file mode 100644 index 000000000..23e9adbfe --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-1/fd481b93-55b2-4831-9be9-1b1b2886fda3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sof-1", + "id": "jaspionjader/sof-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.501 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4082 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json b/data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json deleted file mode 100644 index 30815cdd1..000000000 --- a/data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-10/1762652580.276895", - "retrieved_timestamp": "1762652580.276897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/sof-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46484328249045864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5197177291754291 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40906250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38738364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/sof-10/f159748f-234e-4962-b582-cd5805448f33.json b/data/hfopenllm_v2/jaspionjader/sof-10/f159748f-234e-4962-b582-cd5805448f33.json new file mode 100644 index 000000000..a665ef820 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-10/f159748f-234e-4962-b582-cd5805448f33.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sof-10", + "id": "jaspionjader/sof-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-3/044d53dd-d134-4959-a70c-46f11cc0b300.json b/data/hfopenllm_v2/jaspionjader/sof-3/044d53dd-d134-4959-a70c-46f11cc0b300.json new file mode 100644 index 000000000..02de1aae3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-3/044d53dd-d134-4959-a70c-46f11cc0b300.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sof-3", + "id": "jaspionjader/sof-3", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5206 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1276 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json b/data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json deleted file mode 100644 index 957764216..000000000 --- a/data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-3/1762652580.277219", - "retrieved_timestamp": "1762652580.27722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/sof-3", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46369410940748323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206072122413828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json b/data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json deleted file mode 100644 index d1ea7021c..000000000 --- a/data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-6/1762652580.277473", - "retrieved_timestamp": "1762652580.2774742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/sof-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-6", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353925362482657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209098090521417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706250000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/sof-6/f05501fd-7c06-46d5-bc20-a9d0cc5c2e0f.json b/data/hfopenllm_v2/jaspionjader/sof-6/f05501fd-7c06-46d5-bc20-a9d0cc5c2e0f.json new file mode 100644 index 000000000..f3a8226cf --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-6/f05501fd-7c06-46d5-bc20-a9d0cc5c2e0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sof-6", + "id": "jaspionjader/sof-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json b/data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json deleted file mode 100644 index 23da2c3fb..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-10/1762652580.2777631", - "retrieved_timestamp": "1762652580.277764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578241288669619 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316217442466934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42509375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39361702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-10/5c44a2f2-23e3-4c9f-9b7c-9012ca8b15e9.json b/data/hfopenllm_v2/jaspionjader/test-10/5c44a2f2-23e3-4c9f-9b7c-9012ca8b15e9.json new file mode 100644 index 000000000..6d72f6368 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-10/5c44a2f2-23e3-4c9f-9b7c-9012ca8b15e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-10", + "id": "jaspionjader/test-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3936 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-11/80e5134b-0733-41cc-8b4f-ef32fbe57066.json b/data/hfopenllm_v2/jaspionjader/test-11/80e5134b-0733-41cc-8b4f-ef32fbe57066.json new file mode 100644 index 000000000..91ee0a934 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-11/80e5134b-0733-41cc-8b4f-ef32fbe57066.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-11", + "id": "jaspionjader/test-11", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4541 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.535 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json b/data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json deleted file mode 100644 index 470e531b7..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-11/1762652580.2779882", - "retrieved_timestamp": "1762652580.2779891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-11", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-11", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45412727119598223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350048053167004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-12/61123e41-7b2a-40da-9f7f-b830c27d7f12.json b/data/hfopenllm_v2/jaspionjader/test-12/61123e41-7b2a-40da-9f7f-b830c27d7f12.json new file mode 100644 index 000000000..f704e5a09 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-12/61123e41-7b2a-40da-9f7f-b830c27d7f12.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-12/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-12", + "id": "jaspionjader/test-12", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5347 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json b/data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json deleted file mode 100644 index f058fc473..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-12/1762652580.278201", - "retrieved_timestamp": "1762652580.278202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-12", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-12", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4368165356808823 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347063686599355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42503124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935339095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json b/data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json deleted file mode 100644 index 66fec61ae..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-13/1762652580.278408", - "retrieved_timestamp": "1762652580.278409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-13", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-13", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45809895521660304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531808681066841 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935339095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-13/b93c31d7-54c3-47b9-a267-3f8fdb796805.json b/data/hfopenllm_v2/jaspionjader/test-13/b93c31d7-54c3-47b9-a267-3f8fdb796805.json new file mode 100644 index 000000000..d5aea42d0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-13/b93c31d7-54c3-47b9-a267-3f8fdb796805.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-13/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-13", + "id": "jaspionjader/test-13", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-14/b3eaa4c5-7abc-4e2d-9c11-c70ecb8a843b.json b/data/hfopenllm_v2/jaspionjader/test-14/b3eaa4c5-7abc-4e2d-9c11-c70ecb8a843b.json new file mode 100644 index 000000000..d1c730295 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-14/b3eaa4c5-7abc-4e2d-9c11-c70ecb8a843b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-14/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-14", + "id": "jaspionjader/test-14", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4444 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5323 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json b/data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json deleted file mode 100644 index 66c797ab8..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-14/1762652580.2787268", - "retrieved_timestamp": "1762652580.278728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-14", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-14", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4443853420036614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322932549151301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-15/3b06f75e-3d22-4428-8d4f-2e704b96961e.json b/data/hfopenllm_v2/jaspionjader/test-15/3b06f75e-3d22-4428-8d4f-2e704b96961e.json new file mode 100644 index 000000000..1f7a65dff --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-15/3b06f75e-3d22-4428-8d4f-2e704b96961e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-15/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-15", + "id": "jaspionjader/test-15", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4365 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5328 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json b/data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json deleted file mode 100644 index b9b074c40..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-15/1762652580.278964", - "retrieved_timestamp": "1762652580.278965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-15", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-15", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4364918416468304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53278841091336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json b/data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json deleted file mode 100644 index 3f7eadfe3..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-16/1762652580.279189", - "retrieved_timestamp": "1762652580.27919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-16", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-16", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599473840520929 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330160713144172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39303523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-16/dfda4aab-f8d4-49ee-b141-78539b69007c.json b/data/hfopenllm_v2/jaspionjader/test-16/dfda4aab-f8d4-49ee-b141-78539b69007c.json new file mode 100644 index 000000000..e35aa96fb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-16/dfda4aab-f8d4-49ee-b141-78539b69007c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-16", + "id": "jaspionjader/test-16", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-17/690f3c19-c148-458d-b4c5-87761d72b851.json b/data/hfopenllm_v2/jaspionjader/test-17/690f3c19-c148-458d-b4c5-87761d72b851.json new file mode 100644 index 000000000..ecdd22a51 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-17/690f3c19-c148-458d-b4c5-87761d72b851.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-17/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-17", + "id": "jaspionjader/test-17", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4267 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5329 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3929 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json b/data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json deleted file mode 100644 index 804ab2ff4..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-17/1762652580.279401", - "retrieved_timestamp": "1762652580.279402", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-17", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-17", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42674991245450955 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329373895863633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39286901595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json b/data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json deleted file mode 100644 index 1e5b475a3..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-18/1762652580.2796118", - "retrieved_timestamp": "1762652580.279613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-18", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-18", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43916474953124374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317453097096507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39303523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-18/b6a18246-776d-463f-80d5-140df74e9704.json b/data/hfopenllm_v2/jaspionjader/test-18/b6a18246-776d-463f-80d5-140df74e9704.json new file mode 100644 index 000000000..ae471a895 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-18/b6a18246-776d-463f-80d5-140df74e9704.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-18/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-18", + "id": "jaspionjader/test-18", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-19/9831abdc-ad08-48c0-8384-86240e7350b5.json b/data/hfopenllm_v2/jaspionjader/test-19/9831abdc-ad08-48c0-8384-86240e7350b5.json new file mode 100644 index 000000000..acc7c5b32 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-19/9831abdc-ad08-48c0-8384-86240e7350b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-19/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-19", + "id": "jaspionjader/test-19", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4264 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3929 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json b/data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json deleted file mode 100644 index da634f6d0..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-19/1762652580.279826", - "retrieved_timestamp": "1762652580.2798269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-19", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-19", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44008896394898867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319373895863634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39286901595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json b/data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json deleted file mode 100644 index 63005b07f..000000000 --- a/data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-20/1762652580.2800388", - "retrieved_timestamp": "1762652580.28004", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jaspionjader/test-20", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-20", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45292823042859615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327388877137041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39195478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/jaspionjader/test-20/96a572e5-4751-46ce-9202-deb223ef4dfe.json b/data/hfopenllm_v2/jaspionjader/test-20/96a572e5-4751-46ce-9202-deb223ef4dfe.json new file mode 100644 index 000000000..98974db50 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-20/96a572e5-4751-46ce-9202-deb223ef4dfe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-20/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-20", + "id": "jaspionjader/test-20", + "developer": "jaspionjader", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jayasuryajsk/Qwen2.5-3B-reasoner/f4320b1e-ea4f-4aea-8dab-cdb221ce53e5.json b/data/hfopenllm_v2/jayasuryajsk/Qwen2.5-3B-reasoner/f4320b1e-ea4f-4aea-8dab-cdb221ce53e5.json new file mode 100644 index 000000000..b9587c6d6 --- /dev/null +++ b/data/hfopenllm_v2/jayasuryajsk/Qwen2.5-3B-reasoner/f4320b1e-ea4f-4aea-8dab-cdb221ce53e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jayasuryajsk_Qwen2.5-3B-reasoner/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-reasoner", + "id": "jayasuryajsk/Qwen2.5-3B-reasoner", + "developer": "jayasuryajsk", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4651 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2085 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeanmichela/o-distil-qwen/8376c0bf-f9c3-4529-b13c-c57106182d15.json b/data/hfopenllm_v2/jeanmichela/o-distil-qwen/8376c0bf-f9c3-4529-b13c-c57106182d15.json new file mode 100644 index 000000000..214d189b4 --- /dev/null +++ b/data/hfopenllm_v2/jeanmichela/o-distil-qwen/8376c0bf-f9c3-4529-b13c-c57106182d15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeanmichela_o-distil-qwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "o-distil-qwen", + "id": "jeanmichela/o-distil-qwen", + "developer": "jeanmichela", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.59 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.534 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/97a80145-e621-4603-8ff8-2cc4bd74190a.json b/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/97a80145-e621-4603-8ff8-2cc4bd74190a.json new file mode 100644 index 000000000..bb13bcad2 --- /dev/null +++ b/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/97a80145-e621-4603-8ff8-2cc4bd74190a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebcarter_psyonic-cetacean-20B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "psyonic-cetacean-20B", + "id": "jebcarter/psyonic-cetacean-20B", + "developer": "jebcarter", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 19.994 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2544 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4907 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json b/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json deleted file mode 100644 index 877f82758..000000000 --- a/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebcarter_psyonic-cetacean-20B/1762652580.2807941", - "retrieved_timestamp": "1762652580.2807949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebcarter/psyonic-cetacean-20B", - "developer": "jebcarter", - "inference_platform": "unknown", - "id": "jebcarter/psyonic-cetacean-20B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 19.994 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25436619281284767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907386156835858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46611458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28856382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json b/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json deleted file mode 100644 index 6c70823e5..000000000 --- a/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_Llama-3-Nanda-10B-Chat/1762652580.28106", - "retrieved_timestamp": "1762652580.2810612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/Llama-3-Nanda-10B-Chat", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Llama-3-Nanda-10B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 9.985 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2952831819572069 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4958605204321644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/99a7881c-cca0-43d6-96f5-ce5292ed60a0.json b/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/99a7881c-cca0-43d6-96f5-ce5292ed60a0.json new file mode 100644 index 000000000..f63ea2032 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/99a7881c-cca0-43d6-96f5-ce5292ed60a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_Llama-3-Nanda-10B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Nanda-10B-Chat", + "id": "jebish7/Llama-3-Nanda-10B-Chat", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 9.985 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4959 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/60ca8f7e-1c20-4adb-bb84-892bad3c0d63.json b/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/60ca8f7e-1c20-4adb-bb84-892bad3c0d63.json new file mode 100644 index 000000000..d88b01af6 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/60ca8f7e-1c20-4adb-bb84-892bad3c0d63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Instruct", + "id": "jebish7/Llama-3.1-8B-Instruct", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5058 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1548 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json b/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json deleted file mode 100644 index e48974a50..000000000 --- a/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_Llama-3.1-8B-Instruct/1762652580.281322", - "retrieved_timestamp": "1762652580.281322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/Llama-3.1-8B-Instruct", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088388495224864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3777426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/4a0f8dc7-9446-4dda-bf49-8cca4851746c.json b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/4a0f8dc7-9446-4dda-bf49-8cca4851746c.json new file mode 100644 index 000000000..9ffd03cf9 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/4a0f8dc7-9446-4dda-bf49-8cca4851746c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemotron-4-Mini-Hindi-4B-Base", + "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.191 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2285 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json deleted file mode 100644 index 165b9f5c6..000000000 --- a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Base/1762652580.2815292", - "retrieved_timestamp": "1762652580.2815301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.191 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22848818911599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3923566745600671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25033244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/6eb3a040-8234-4d31-8274-6987b0e4e3b4.json b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/6eb3a040-8234-4d31-8274-6987b0e4e3b4.json new file mode 100644 index 000000000..bd15b91c4 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/6eb3a040-8234-4d31-8274-6987b0e4e3b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemotron-4-Mini-Hindi-4B-Instruct", + "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.191 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4153 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2595 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json deleted file mode 100644 index e3dba69dd..000000000 --- a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/1762652580.2817988", - "retrieved_timestamp": "1762652580.2818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.191 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345257250761313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040596055988545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41529166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25947473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/16053077-38fd-4136-81a5-fea0d4cd927a.json b/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/16053077-38fd-4136-81a5-fea0d4cd927a.json new file mode 100644 index 000000000..b0a9fe425 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/16053077-38fd-4136-81a5-fea0d4cd927a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-Mini-4B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemotron-Mini-4B-Instruct", + "id": "jebish7/Nemotron-Mini-4B-Instruct", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.191 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3709 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2783 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json b/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json deleted file mode 100644 index 7dbe660ff..000000000 --- a/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-Mini-4B-Instruct/1762652580.282024", - "retrieved_timestamp": "1762652580.282024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/Nemotron-Mini-4B-Instruct", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Nemotron-Mini-4B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.191 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37092026932982264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244475437312765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47271875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27825797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/aya-expanse-8b/25abb99f-536e-4638-8611-a1db5dee931d.json b/data/hfopenllm_v2/jebish7/aya-expanse-8b/25abb99f-536e-4638-8611-a1db5dee931d.json new file mode 100644 index 000000000..d66b00bb7 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/aya-expanse-8b/25abb99f-536e-4638-8611-a1db5dee931d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_aya-expanse-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aya-expanse-8b", + "id": "jebish7/aya-expanse-8b", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "CohereForCausalLM", + "params_billions": 8.028 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json b/data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json deleted file mode 100644 index e4cce7e5b..000000000 --- a/data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jebish7_aya-expanse-8b/1762652580.282242", - "retrieved_timestamp": "1762652580.282243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jebish7/aya-expanse-8b", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/aya-expanse-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "CohereForCausalLM", - "params_billions": 8.028 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37911408396388246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496904421264497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31025598404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jebish7/gemma-2-2b-it/aaf0e5bd-b033-455e-bb23-b12b6f7c4520.json b/data/hfopenllm_v2/jebish7/gemma-2-2b-it/aaf0e5bd-b033-455e-bb23-b12b6f7c4520.json new file mode 100644 index 000000000..d43d0aaca --- /dev/null +++ b/data/hfopenllm_v2/jebish7/gemma-2-2b-it/aaf0e5bd-b033-455e-bb23-b12b6f7c4520.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-2b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-it", + "id": "jebish7/gemma-2-2b-it", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1272 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2715 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/gemma-2-9b-it/b3a46478-c5f4-4c74-9bf0-d1ba616ae24c.json b/data/hfopenllm_v2/jebish7/gemma-2-9b-it/b3a46478-c5f4-4c74-9bf0-d1ba616ae24c.json new file mode 100644 index 000000000..2343609a8 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/gemma-2-9b-it/b3a46478-c5f4-4c74-9bf0-d1ba616ae24c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-9b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it", + "id": "jebish7/gemma-2-9b-it", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1557 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5949 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/qwen2.5-0.5B-IHA-Hin/169fb05f-5201-47b8-a06e-7d01e574c689.json b/data/hfopenllm_v2/jebish7/qwen2.5-0.5B-IHA-Hin/169fb05f-5201-47b8-a06e-7d01e574c689.json new file mode 100644 index 000000000..8d24c633c --- /dev/null +++ b/data/hfopenllm_v2/jebish7/qwen2.5-0.5B-IHA-Hin/169fb05f-5201-47b8-a06e-7d01e574c689.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jebish7_qwen2.5-0.5B-IHA-Hin/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-0.5B-IHA-Hin", + "id": "jebish7/qwen2.5-0.5B-IHA-Hin", + "developer": "jebish7", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2989 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/db076309-32e5-4d46-9786-ff14f8daf5d2.json b/data/hfopenllm_v2/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/db076309-32e5-4d46-9786-ff14f8daf5d2.json new file mode 100644 index 000000000..88151c7d8 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/db076309-32e5-4d46-9786-ff14f8daf5d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-7B-nerd-uncensored-v1.0", + "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-minperplexity-2/cde914dc-7d57-425f-9787-e4b8d36d61cf.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-minperplexity-2/cde914dc-7d57-425f-9787-e4b8d36d61cf.json new file mode 100644 index 000000000..470df5e03 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-minperplexity-2/cde914dc-7d57-425f-9787-e4b8d36d61cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-minperplexity-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-minperplexity-2", + "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/5d793ce3-a7fd-4ee3-b32c-c9da63ec0566.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/5d793ce3-a7fd-4ee3-b32c-c9da63ec0566.json new file mode 100644 index 000000000..b70182eb1 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/5d793ce3-a7fd-4ee3-b32c-c9da63ec0566.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v0.9", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.547 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2946 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/8c645c9f-02f6-44a5-b295-d6364ed49464.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/8c645c9f-02f6-44a5-b295-d6364ed49464.json new file mode 100644 index 000000000..5f5c44741 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/8c645c9f-02f6-44a5-b295-d6364ed49464.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.0", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4713 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4551 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/97bb5519-e2d3-44d5-abf4-b5263c2b3245.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/97bb5519-e2d3-44d5-abf4-b5263c2b3245.json new file mode 100644 index 000000000..87afc83a8 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/97bb5519-e2d3-44d5-abf4-b5263c2b3245.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.1", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6626 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.385 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/bd3d78d3-3ff1-4a92-a316-e4e30787a331.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/bd3d78d3-3ff1-4a92-a316-e4e30787a331.json new file mode 100644 index 000000000..edab0f5d7 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/bd3d78d3-3ff1-4a92-a316-e4e30787a331.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.2", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4965 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4946 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3969 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/d8951ed7-f4ef-49ce-891e-8d8509e9cf93.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/d8951ed7-f4ef-49ce-891e-8d8509e9cf93.json new file mode 100644 index 000000000..ae6f30f79 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/d8951ed7-f4ef-49ce-891e-8d8509e9cf93.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.3", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4995 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/e1772d6c-fd26-43a7-82b3-7997d8a6809f.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/e1772d6c-fd26-43a7-82b3-7997d8a6809f.json new file mode 100644 index 000000000..130b5f1a1 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/e1772d6c-fd26-43a7-82b3-7997d8a6809f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.4", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6079 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5467 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/febaf893-6aaf-4c87-89fc-cc865ebf2859.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/febaf893-6aaf-4c87-89fc-cc865ebf2859.json new file mode 100644 index 000000000..41e373c25 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/febaf893-6aaf-4c87-89fc-cc865ebf2859.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.5", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.565 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5523 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2757 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4982 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4448 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/0ad591f4-c846-4fd1-8536-a169e0a7e4ab.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/0ad591f4-c846-4fd1-8536-a169e0a7e4ab.json new file mode 100644 index 000000000..adea07aff --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/0ad591f4-c846-4fd1-8536-a169e0a7e4ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.7", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4848 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/0a318ebd-7bbb-456b-a6e4-9b480a858b5e.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/0a318ebd-7bbb-456b-a6e4-9b480a858b5e.json new file mode 100644 index 000000000..b0715c0fe --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/0a318ebd-7bbb-456b-a6e4-9b480a858b5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v1.8", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6256 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2704 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.0/e1cfdc32-3c5e-4f4b-a205-f416c96cf5e6.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.0/e1cfdc32-3c5e-4f4b-a205-f416c96cf5e6.json new file mode 100644 index 000000000..ff3474fac --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.0/e1cfdc32-3c5e-4f4b-a205-f416c96cf5e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-olm-v1.0", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.566 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2863 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4278 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4566 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.1/85426280-8138-46d0-a111-b59b0d7c86c8.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.1/85426280-8138-46d0-a111-b59b0d7c86c8.json new file mode 100644 index 000000000..9cbcd9f74 --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.1/85426280-8138-46d0-a111-b59b0d7c86c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-olm-v1.1", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4329 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5478 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3829 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.2/32bbd26e-05e7-4a0f-a491-8f54cea9f3d3.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.2/32bbd26e-05e7-4a0f-a491-8f54cea9f3d3.json new file mode 100644 index 000000000..ed920c47e --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.2/32bbd26e-05e7-4a0f-a491-8f54cea9f3d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-olm-v1.2", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2847 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.3/86ed6833-ae85-4a8e-b840-b0c9540083ce.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.3/86ed6833-ae85-4a8e-b840-b0c9540083ce.json new file mode 100644 index 000000000..c0cbb0d3b --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.3/86ed6833-ae85-4a8e-b840-b0c9540083ce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-olm-v1.3", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4219 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5532 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.4/2f751ac3-5ca5-4d0d-9ad4-48155e51468a.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.4/2f751ac3-5ca5-4d0d-9ad4-48155e51468a.json new file mode 100644 index 000000000..120c4b77c --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.4/2f751ac3-5ca5-4d0d-9ad4-48155e51468a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-olm-v1.4", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4545 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4457 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.5/9677e68d-afda-4917-825c-83318219ff59.json b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.5/9677e68d-afda-4917-825c-83318219ff59.json new file mode 100644 index 000000000..c40b034ac --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/Qwen2.5-7B-olm-v1.5/9677e68d-afda-4917-825c-83318219ff59.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-olm-v1.5", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4547 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/23cd57c2-bf7f-440a-ab3e-edfdede5e8cd.json b/data/hfopenllm_v2/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/23cd57c2-bf7f-440a-ab3e-edfdede5e8cd.json new file mode 100644 index 000000000..a05b1551c --- /dev/null +++ b/data/hfopenllm_v2/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/23cd57c2-bf7f-440a-ab3e-edfdede5e8cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy_Qwen2.5-7B-minperplexity-1", + "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", + "developer": "jeffmeloy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json b/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json deleted file mode 100644 index bf98bb8b5..000000000 --- a/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jeonsworld_CarbonVillain-en-10.7B-v4/1762652580.2876348", - "retrieved_timestamp": "1762652580.287636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jeonsworld/CarbonVillain-en-10.7B-v4", - "developer": "jeonsworld", - "inference_platform": "unknown", - "id": "jeonsworld/CarbonVillain-en-10.7B-v4", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45792386423578324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516795955873779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3965416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31416223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bec23315-f98a-4211-81a0-c49f395e66c9.json b/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bec23315-f98a-4211-81a0-c49f395e66c9.json new file mode 100644 index 000000000..186c8ce3d --- /dev/null +++ b/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bec23315-f98a-4211-81a0-c49f395e66c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jeonsworld_CarbonVillain-en-10.7B-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CarbonVillain-en-10.7B-v4", + "id": "jeonsworld/CarbonVillain-en-10.7B-v4", + "developer": "jeonsworld", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4579 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5168 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jiangxinyang-shanda/Homer-LLama3-8B/1ac5faef-7fa0-4b58-a6ba-0c444a2023a8.json b/data/hfopenllm_v2/jiangxinyang-shanda/Homer-LLama3-8B/1ac5faef-7fa0-4b58-a6ba-0c444a2023a8.json new file mode 100644 index 000000000..122a2bdfb --- /dev/null +++ b/data/hfopenllm_v2/jiangxinyang-shanda/Homer-LLama3-8B/1ac5faef-7fa0-4b58-a6ba-0c444a2023a8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jiangxinyang-shanda_Homer-LLama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-LLama3-8B", + "id": "jiangxinyang-shanda/Homer-LLama3-8B", + "developer": "jiangxinyang-shanda", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3992 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5173 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4056 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jieliu/Storm-7B/39327803-11e7-4b28-8750-81feb027e8f3.json b/data/hfopenllm_v2/jieliu/Storm-7B/39327803-11e7-4b28-8750-81feb027e8f3.json new file mode 100644 index 000000000..eaad1d2e4 --- /dev/null +++ b/data/hfopenllm_v2/jieliu/Storm-7B/39327803-11e7-4b28-8750-81feb027e8f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jieliu_Storm-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Storm-7B", + "id": "jieliu/Storm-7B", + "developer": "jieliu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json b/data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json deleted file mode 100644 index 1fa52234b..000000000 --- a/data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jieliu_Storm-7B/1762652580.288308", - "retrieved_timestamp": "1762652580.288309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jieliu/Storm-7B", - "developer": "jieliu", - "inference_platform": "unknown", - "id": "jieliu/Storm-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3424192254329623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5187285371254579 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119182180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json b/data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json deleted file mode 100644 index d4c8ea785..000000000 --- a/data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jiviai_medX_v2/1762652580.288615", - "retrieved_timestamp": "1762652580.288616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jiviai/medX_v2", - "developer": "jiviai", - "inference_platform": "unknown", - "id": "jiviai/medX_v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37431792089433813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508721125093523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34283577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/jiviai/medX_v2/ce2b6874-0fc8-4364-a526-7b25b101e1e3.json b/data/hfopenllm_v2/jiviai/medX_v2/ce2b6874-0fc8-4364-a526-7b25b101e1e3.json new file mode 100644 index 000000000..57f4909ab --- /dev/null +++ b/data/hfopenllm_v2/jiviai/medX_v2/ce2b6874-0fc8-4364-a526-7b25b101e1e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jiviai_medX_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "medX_v2", + "id": "jiviai/medX_v2", + "developer": "jiviai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3743 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3428 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json b/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json deleted file mode 100644 index 4c1f3dc21..000000000 --- a/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/1762652580.288917", - "retrieved_timestamp": "1762652580.288918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", - "developer": "jlzhou", - "inference_platform": "unknown", - "id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35575827692744144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773774601029352 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39809374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3198969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/9f9ebc90-31f9-45c1-b9c2-07b727b12f3d.json b/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/9f9ebc90-31f9-45c1-b9c2-07b727b12f3d.json new file mode 100644 index 000000000..bacdbbca7 --- /dev/null +++ b/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/9f9ebc90-31f9-45c1-b9c2-07b727b12f3d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-3B-Infinity-Instruct-0625", + "id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", + "developer": "jlzhou", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3558 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4774 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1367 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/d189a2fc-71f5-4bc9-a0b1-7e744a19921f.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/d189a2fc-71f5-4bc9-a0b1-7e744a19921f.json new file mode 100644 index 000000000..c7139a751 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/d189a2fc-71f5-4bc9-a0b1-7e744a19921f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4271 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4638 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json deleted file mode 100644 index eb20af00c..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/1762652580.289233", - "retrieved_timestamp": "1762652580.289234", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42712447417297217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5035519809362171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37391954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1eb697fe-9dd4-4a41-aa47-33456df39e2d.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1eb697fe-9dd4-4a41-aa47-33456df39e2d.json new file mode 100644 index 000000000..a10daf5d6 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1eb697fe-9dd4-4a41-aa47-33456df39e2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5019 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json deleted file mode 100644 index 8c568bc6a..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1762652580.289527", - "retrieved_timestamp": "1762652580.2895281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42532591302189304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5018845446835877 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41502083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37242353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json deleted file mode 100644 index 6252efa6c..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/1762652580.2897432", - "retrieved_timestamp": "1762652580.289744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33774828565982706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4917135045463188 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3533078457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/5f10df7b-cd2c-44ca-b13a-2852483c71f8.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/5f10df7b-cd2c-44ca-b13a-2852483c71f8.json new file mode 100644 index 000000000..c3e92c793 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/5f10df7b-cd2c-44ca-b13a-2852483c71f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3377 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4917 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5018 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/3abbb4b6-8050-44fd-b066-0f061ce2f4d7.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/3abbb4b6-8050-44fd-b066-0f061ce2f4d7.json new file mode 100644 index 000000000..8e8a04411 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/3abbb4b6-8050-44fd-b066-0f061ce2f4d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4274 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json deleted file mode 100644 index 3426d2506..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/1762652580.289967", - "retrieved_timestamp": "1762652580.289967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4273993005226133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5125777877188348 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42264583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37391954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/5f47e65d-293f-469e-a18f-5627ca1adf44.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/5f47e65d-293f-469e-a18f-5627ca1adf44.json new file mode 100644 index 000000000..f7941ba70 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/5f47e65d-293f-469e-a18f-5627ca1adf44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4884 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3344 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json deleted file mode 100644 index f3d58880e..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/1762652580.2902021", - "retrieved_timestamp": "1762652580.2902029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32036219453272874 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48835763921755193 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33444148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/b753c1aa-8a0c-4600-99ec-8eb51ab50da7.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/b753c1aa-8a0c-4600-99ec-8eb51ab50da7.json new file mode 100644 index 000000000..9fd6df7ed --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/b753c1aa-8a0c-4600-99ec-8eb51ab50da7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3696 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json deleted file mode 100644 index 2402d1dc6..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/1762652580.290431", - "retrieved_timestamp": "1762652580.290432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43963904661852776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140041302485145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43979166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36959773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/15c21655-9af8-4bee-9884-b047683e9adf.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/15c21655-9af8-4bee-9884-b047683e9adf.json new file mode 100644 index 000000000..2d9d72344 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/15c21655-9af8-4bee-9884-b047683e9adf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4854 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5163 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3295 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json deleted file mode 100644 index da08100a5..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/1762652580.290661", - "retrieved_timestamp": "1762652580.2906618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2814443454478561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4854325756272537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163125000000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3295378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json deleted file mode 100644 index 1628f20d7..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/1762652580.2908769", - "retrieved_timestamp": "1762652580.290878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302218114602588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5157097379648965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43315624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36627327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/f642de95-218a-4db0-807f-1bb97618b4f6.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/f642de95-218a-4db0-807f-1bb97618b4f6.json new file mode 100644 index 000000000..0a6e5d576 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/f642de95-218a-4db0-807f-1bb97618b4f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5157 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/01443b06-9ad3-41f5-ae0d-bc84086e0a0d.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/01443b06-9ad3-41f5-ae0d-bc84086e0a0d.json new file mode 100644 index 000000000..960cb299c --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/01443b06-9ad3-41f5-ae0d-bc84086e0a0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4861 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.515 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json deleted file mode 100644 index ffd0ba1ee..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/1762652580.2910998", - "retrieved_timestamp": "1762652580.291101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2789963962286732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48611535229340735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3304521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1ee8c377-2236-4225-942f-ef8ce5770741.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1ee8c377-2236-4225-942f-ef8ce5770741.json new file mode 100644 index 000000000..25f0a4caa --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1ee8c377-2236-4225-942f-ef8ce5770741.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4223 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4384 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.365 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json deleted file mode 100644 index 6c6dbfa71..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1762652580.291321", - "retrieved_timestamp": "1762652580.291322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4222784434190171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153764046315631 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4384270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3650265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/4ee9aa78-d9eb-4a1c-91c4-f29f093b95d3.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/4ee9aa78-d9eb-4a1c-91c4-f29f093b95d3.json new file mode 100644 index 000000000..9cbebea51 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/4ee9aa78-d9eb-4a1c-91c4-f29f093b95d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4359 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json deleted file mode 100644 index 8ea1073e0..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/1762652580.291548", - "retrieved_timestamp": "1762652580.291548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4358923212631374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040935986635269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45315625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json deleted file mode 100644 index 1dece87f1..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/1762652580.291779", - "retrieved_timestamp": "1762652580.29178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4201551882338861 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.501124270710985 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41502083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699301861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/419c6631-805f-43ba-9db8-5296f8d221ec.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/419c6631-805f-43ba-9db8-5296f8d221ec.json new file mode 100644 index 000000000..bd49fe5af --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/419c6631-805f-43ba-9db8-5296f8d221ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5011 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json deleted file mode 100644 index 4eda5ded3..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/1762652580.292005", - "retrieved_timestamp": "1762652580.2920058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35178659290682057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49985217584312186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48710416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/3fc1822f-4a43-4a3b-90d7-fc163491c90a.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/3fc1822f-4a43-4a3b-90d7-fc163491c90a.json new file mode 100644 index 000000000..e230b5938 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/3fc1822f-4a43-4a3b-90d7-fc163491c90a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4999 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4871 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3611 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json deleted file mode 100644 index 8edf0f661..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/1762652580.2922251", - "retrieved_timestamp": "1762652580.292226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42038014689911657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107301269172088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37101063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/76b4037b-c5d0-435f-966a-bd88b1665dad.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/76b4037b-c5d0-435f-966a-bd88b1665dad.json new file mode 100644 index 000000000..449e51cc0 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/76b4037b-c5d0-435f-966a-bd88b1665dad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json deleted file mode 100644 index ceeec7cc2..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/1762652580.292447", - "retrieved_timestamp": "1762652580.292447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541682735142754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4983827321097329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49113541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3531416223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/757b85e7-84c8-429f-aeb4-870852fa8959.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/757b85e7-84c8-429f-aeb4-870852fa8959.json new file mode 100644 index 000000000..3c0a95ced --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/757b85e7-84c8-429f-aeb4-870852fa8959.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/acab4982-1205-4362-803e-306b1e2371bf.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/acab4982-1205-4362-803e-306b1e2371bf.json new file mode 100644 index 000000000..07aaad644 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/acab4982-1205-4362-803e-306b1e2371bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5137 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json deleted file mode 100644 index 1ccaf66a3..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/1762652580.292675", - "retrieved_timestamp": "1762652580.2926762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40916435058976847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513665952913411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43569791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/0e549b5d-c1d9-443d-9a80-8dd34dadd22e.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/0e549b5d-c1d9-443d-9a80-8dd34dadd22e.json new file mode 100644 index 000000000..a9858ea11 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/0e549b5d-c1d9-443d-9a80-8dd34dadd22e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2904 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4967 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json deleted file mode 100644 index 26a2a2d63..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/1762652580.292904", - "retrieved_timestamp": "1762652580.2929049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29038728351884113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4967337534367295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4990729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34898603723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d3d4eccc-8792-40e5-91cf-22885f4cbaf5.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d3d4eccc-8792-40e5-91cf-22885f4cbaf5.json new file mode 100644 index 000000000..0de836a5f --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d3d4eccc-8792-40e5-91cf-22885f4cbaf5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json deleted file mode 100644 index 6b29b5e89..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/1762652580.2931998", - "retrieved_timestamp": "1762652580.293205", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988036188424493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5146905664948336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43576041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/708aded5-6252-44e3-bf0d-08bf3e7f32e0.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/708aded5-6252-44e3-bf0d-08bf3e7f32e0.json new file mode 100644 index 000000000..987fd3b28 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/708aded5-6252-44e3-bf0d-08bf3e7f32e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4918 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3454 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json deleted file mode 100644 index f85a722d4..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/1762652580.293625", - "retrieved_timestamp": "1762652580.293626", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29131149793658606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49182964384768835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4976770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json deleted file mode 100644 index f2bfa2092..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/1762652580.293948", - "retrieved_timestamp": "1762652580.293949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41623337189767595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5138610942606995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43172916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/ce6d31f2-f38e-4af3-85a3-d2f6c80f71f1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/ce6d31f2-f38e-4af3-85a3-d2f6c80f71f1.json new file mode 100644 index 000000000..50c1000ff --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/ce6d31f2-f38e-4af3-85a3-d2f6c80f71f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5139 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3625 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json deleted file mode 100644 index 16ffa4816..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_linear/1762652580.294196", - "retrieved_timestamp": "1762652580.294197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_linear", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21454961723781787 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282807940700452 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49792708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24143949468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/5efcc291-ca9a-4ca9-b2ed-dab37dce5f5a.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/5efcc291-ca9a-4ca9-b2ed-dab37dce5f5a.json new file mode 100644 index 000000000..4cb808ae2 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/5efcc291-ca9a-4ca9-b2ed-dab37dce5f5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_dare_linear", + "id": "johnsutor/Llama-3-8B-Instruct_dare_linear", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4283 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2414 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/47320824-8064-40d4-a08c-810faafbba77.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/47320824-8064-40d4-a08c-810faafbba77.json new file mode 100644 index 000000000..2aa16cb5b --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/47320824-8064-40d4-a08c-810faafbba77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_dare_ties-density-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1891 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json deleted file mode 100644 index 533f3cf52..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/1762652580.2944481", - "retrieved_timestamp": "1762652580.2944489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907055501624578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41187360174735804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46580208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22647938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/8baeef58-0ba6-4723-8f23-7a4c386f2cad.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/8baeef58-0ba6-4723-8f23-7a4c386f2cad.json new file mode 100644 index 000000000..0f28a208f --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/8baeef58-0ba6-4723-8f23-7a4c386f2cad.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_dare_ties-density-0.3", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.304 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json deleted file mode 100644 index db7356fdf..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/1762652580.294691", - "retrieved_timestamp": "1762652580.294692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21132705665412216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4558569854124363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30402260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/0387ca63-1e31-4eaa-ac7c-35d417548c54.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/0387ca63-1e31-4eaa-ac7c-35d417548c54.json new file mode 100644 index 000000000..d8989fbe5 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/0387ca63-1e31-4eaa-ac7c-35d417548c54.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_dare_ties-density-0.7", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2034 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4723 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3148 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json deleted file mode 100644 index fe5b91997..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/1762652580.2949278", - "retrieved_timestamp": "1762652580.2949288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20338368861288048 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4722858888388635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json deleted file mode 100644 index ff762735e..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/1762652580.29516", - "retrieved_timestamp": "1762652580.295161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21607335203925582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46639610671811504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/733983fe-4b9c-47e6-963d-c57829b6f1af.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/733983fe-4b9c-47e6-963d-c57829b6f1af.json new file mode 100644 index 000000000..ad8724889 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/733983fe-4b9c-47e6-963d-c57829b6f1af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_dare_ties-density-0.9", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2161 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4664 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.523 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/80c4859d-8016-4650-939f-100ba2e6d808.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/80c4859d-8016-4650-939f-100ba2e6d808.json new file mode 100644 index 000000000..011100dc4 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/80c4859d-8016-4650-939f-100ba2e6d808.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_linear", + "id": "johnsutor/Llama-3-8B-Instruct_linear", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4308 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5031 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4097 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3712 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json deleted file mode 100644 index d6b9a2a3b..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_linear/1762652580.295396", - "retrieved_timestamp": "1762652580.295396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_linear", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_linear", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4308213318439518 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031496839210309 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40971874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37117686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/21724d3a-cc6c-43eb-9d69-46d8d91c97f8.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/21724d3a-cc6c-43eb-9d69-46d8d91c97f8.json new file mode 100644 index 000000000..9778173ee --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/21724d3a-cc6c-43eb-9d69-46d8d91c97f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_ties-density-0.1", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.36 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json deleted file mode 100644 index e317aadf8..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/1762652580.295634", - "retrieved_timestamp": "1762652580.295635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41161229980895137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021445196013956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36003989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json deleted file mode 100644 index 532c261b2..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/1762652580.29586", - "retrieved_timestamp": "1762652580.295861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3626278274977061 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49061122520005807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40248958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33211436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/d781945e-e9df-4136-90cd-632f0bed6246.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/d781945e-e9df-4136-90cd-632f0bed6246.json new file mode 100644 index 000000000..386af3f39 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/d781945e-e9df-4136-90cd-632f0bed6246.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_ties-density-0.3", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3626 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4906 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json deleted file mode 100644 index d8b1aa6ef..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/1762652580.2960892", - "retrieved_timestamp": "1762652580.2960901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37966373666316483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47931248948849836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31748670212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/8f146bb5-dd4d-49ce-ac60-76f66321feb8.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/8f146bb5-dd4d-49ce-ac60-76f66321feb8.json new file mode 100644 index 000000000..dfb672445 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/8f146bb5-dd4d-49ce-ac60-76f66321feb8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_ties-density-0.5", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3797 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json deleted file mode 100644 index 6517b84bb..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/1762652580.296313", - "retrieved_timestamp": "1762652580.296314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681232463197649 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4738186124296502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3152426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/89bfba6d-c622-445e-b0b9-512aadcea7cf.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/89bfba6d-c622-445e-b0b9-512aadcea7cf.json new file mode 100644 index 000000000..5246939f3 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/89bfba6d-c622-445e-b0b9-512aadcea7cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_ties-density-0.7", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3681 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4738 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3881 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json deleted file mode 100644 index 47aed9064..000000000 --- a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/1762652580.296535", - "retrieved_timestamp": "1762652580.296536", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3858085435533274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47354321136013144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/9c27f2e6-ebbe-4fac-bc51-74455d3a6512.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/9c27f2e6-ebbe-4fac-bc51-74455d3a6512.json new file mode 100644 index 000000000..248669cbb --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/9c27f2e6-ebbe-4fac-bc51-74455d3a6512.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct_ties-density-0.9", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", + "developer": "johnsutor", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4735 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/455ef1e0-bdf2-49bf-a53d-2c9e3d00d5f3.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/455ef1e0-bdf2-49bf-a53d-2c9e3d00d5f3.json new file mode 100644 index 000000000..300e25443 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/455ef1e0-bdf2-49bf-a53d-2c9e3d00d5f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-4k-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-14B-Instruct-4k-DPO", + "id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.63 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4439 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json deleted file mode 100644 index 3bc4db22a..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-4k-DPO/1762652580.296761", - "retrieved_timestamp": "1762652580.2967622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4688648341954902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6299582409761587 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44388541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4763962765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json deleted file mode 100644 index 0d43d76b8..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/1762652580.297051", - "retrieved_timestamp": "1762652580.297052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6852107962428579 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6438408959901142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46966422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/e04a76a6-ac22-43b2-bbf9-196a08de2949.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/e04a76a6-ac22-43b2-bbf9-196a08de2949.json new file mode 100644 index 000000000..ec8836b29 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/e04a76a6-ac22-43b2-bbf9-196a08de2949.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-14B-Instruct-DPO-v1.2", + "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6852 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6438 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2092 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/2fcb74f0-add1-4d46-8a0f-8578a616dbed.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/2fcb74f0-add1-4d46-8a0f-8578a616dbed.json new file mode 100644 index 000000000..e3fa490e4 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/2fcb74f0-add1-4d46-8a0f-8578a616dbed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-14B-Instruct-DPO-v1.3", + "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6846 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4234 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json deleted file mode 100644 index 99c81539a..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/1762652580.2973812", - "retrieved_timestamp": "1762652580.297384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.703995398874985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6846125547592651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5619335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42339583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/51530638-ef76-43ce-9396-8a0d07988712.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/51530638-ef76-43ce-9396-8a0d07988712.json new file mode 100644 index 000000000..a7ff62b04 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/51530638-ef76-43ce-9396-8a0d07988712.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-2-14B-Instruct-DPO-v2.0b1", + "id": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6696 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2757 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4467 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json deleted file mode 100644 index 404500275..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/1762652580.2977622", - "retrieved_timestamp": "1762652580.297763", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10334024831890495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669567432054888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2756797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123836436170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/74d99e4d-0e6f-4804-aa52-0dc76d37fac3.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/74d99e4d-0e6f-4804-aa52-0dc76d37fac3.json new file mode 100644 index 000000000..1b5b639c2 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/74d99e4d-0e6f-4804-aa52-0dc76d37fac3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-2-14B-Instruct-v2.0.1", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6736 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4796 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json deleted file mode 100644 index 2733a88fe..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/1762652580.298285", - "retrieved_timestamp": "1762652580.2982872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07421419611076388 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736278064166185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299202127659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/80e8b9f0-b507-4927-9d24-1c793e3783cc.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/80e8b9f0-b507-4927-9d24-1c793e3783cc.json new file mode 100644 index 000000000..a34d92883 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/80e8b9f0-b507-4927-9d24-1c793e3783cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-2-14B-Instruct-v2.0.3", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7037 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json deleted file mode 100644 index b4ec98edb..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/1762652580.298579", - "retrieved_timestamp": "1762652580.29858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037205725253439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6548026688308357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47681250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/7b037520-a5e9-4b58-80f3-f0ecc5957c67.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/7b037520-a5e9-4b58-80f3-f0ecc5957c67.json new file mode 100644 index 000000000..157f2eb32 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/7b037520-a5e9-4b58-80f3-f0ecc5957c67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-2-14B-Instruct-v2.0", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0885 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.677 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4804 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json deleted file mode 100644 index 34973b279..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0/1762652580.2980192", - "retrieved_timestamp": "1762652580.2980192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0885273297073986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769929749559443 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/10b88d05-62d2-4603-9d04-b0854e39ed40.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/10b88d05-62d2-4603-9d04-b0854e39ed40.json new file mode 100644 index 000000000..eb93d587d --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/10b88d05-62d2-4603-9d04-b0854e39ed40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-2-14B-Instruct-v2.0b2", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7241 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6476 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json deleted file mode 100644 index 5595dfdd7..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/1762652580.298837", - "retrieved_timestamp": "1762652580.298838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7240787776433197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6475822300543483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/4b693f41-d811-4b64-892c-d840eee5ace4.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/4b693f41-d811-4b64-892c-d840eee5ace4.json new file mode 100644 index 000000000..d9d2d02b9 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/4b693f41-d811-4b64-892c-d840eee5ace4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-2-14B-Instruct-v2.0b3", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6469 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4109 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5337 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json deleted file mode 100644 index c6f333e23..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/1762652580.2990808", - "retrieved_timestamp": "1762652580.299082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7322969720342026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.646878884179919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4108761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5337433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json deleted file mode 100644 index a313c90ad..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/1762652580.299312", - "retrieved_timestamp": "1762652580.299314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5622625744136669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5539982344792619 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44534375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988530585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/90d86c8c-3aa6-42ba-a94f-75c961e65c41.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/90d86c8c-3aa6-42ba-a94f-75c961e65c41.json new file mode 100644 index 000000000..5f96bd801 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/90d86c8c-3aa6-42ba-a94f-75c961e65c41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-3B-Instruct-DPO-Revised", + "id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5623 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1805 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4453 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3989 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json deleted file mode 100644 index ccf2f125d..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/1762652580.29967", - "retrieved_timestamp": "1762652580.299671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737184005106451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5471398082537478 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4754791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3937001329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/8318ae52-6ae3-45ce-82db-73f8cb5ad7c7.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/8318ae52-6ae3-45ce-82db-73f8cb5ad7c7.json new file mode 100644 index 000000000..8e9903c3a --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/8318ae52-6ae3-45ce-82db-73f8cb5ad7c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-3B-Instruct-DPO-v1.0", + "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5471 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4755 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3937 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/b20a1d13-2f14-42e4-bdde-49f053cef325.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/b20a1d13-2f14-42e4-bdde-49f053cef325.json new file mode 100644 index 000000000..ae3e1be3f --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/b20a1d13-2f14-42e4-bdde-49f053cef325.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chocolatine-3B-Instruct-DPO-v1.2", + "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3877 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json deleted file mode 100644 index 8aec695ed..000000000 --- a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/1762652580.300061", - "retrieved_timestamp": "1762652580.300063", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5455014915978493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5487182027245813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/51521dfb-d4b5-45df-ac2a-54190aed0b9f.json b/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/51521dfb-d4b5-45df-ac2a-54190aed0b9f.json new file mode 100644 index 000000000..e3a74e372 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/51521dfb-d4b5-45df-ac2a-54190aed0b9f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Distilucie-7B-Math-Instruct-DPO-v0.1", + "id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3835 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json b/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json deleted file mode 100644 index ba22ed627..000000000 --- a/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/1762652580.300392", - "retrieved_timestamp": "1762652580.3003929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30475028479988653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38346961466103785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1809341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json deleted file mode 100644 index ecaefae4f..000000000 --- a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/1762652580.3010209", - "retrieved_timestamp": "1762652580.301022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3044754584502453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381900181819828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38178124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1763630319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/997a1ceb-185a-4e6c-8383-eb5a6f976771.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/997a1ceb-185a-4e6c-8383-eb5a6f976771.json new file mode 100644 index 000000000..524704add --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/997a1ceb-185a-4e6c-8383-eb5a6f976771.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct-DPO-v1.1.3", + "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/22101998-c3d3-414f-9ed1-99330cdbe3b2.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/22101998-c3d3-414f-9ed1-99330cdbe3b2.json new file mode 100644 index 000000000..da8506358 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/22101998-c3d3-414f-9ed1-99330cdbe3b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct-DPO-v1.1", + "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1838 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json deleted file mode 100644 index c8c5dd15d..000000000 --- a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1/1762652580.300676", - "retrieved_timestamp": "1762652580.300677", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31209413245743517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37810118011411814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40159374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18375997340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/a2408953-a7eb-449c-b80c-3620915d44d0.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/a2408953-a7eb-449c-b80c-3620915d44d0.json new file mode 100644 index 000000000..95eaddbfc --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/a2408953-a7eb-449c-b80c-3620915d44d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct-Merged-Model_Stock-v1.0", + "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3234 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1871 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json deleted file mode 100644 index 659c89e26..000000000 --- a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/1762652580.3014882", - "retrieved_timestamp": "1762652580.3014889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32335979645119395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802022135816421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38438541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1870844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json deleted file mode 100644 index caa2d08c8..000000000 --- a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/1762652580.301858", - "retrieved_timestamp": "1762652580.3018591", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30142798884736943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38078615414710804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37502083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18617021276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/d65e5b08-7d3c-4c0d-85fa-496db65a235c.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/d65e5b08-7d3c-4c0d-85fa-496db65a235c.json new file mode 100644 index 000000000..6a31392cf --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/d65e5b08-7d3c-4c0d-85fa-496db65a235c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-7B-Instruct-Merged-Model_Stock-v1.1", + "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3808 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1862 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json b/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json deleted file mode 100644 index e91f779e2..000000000 --- a/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-Boosted-7B-Instruct/1762652580.302166", - "retrieved_timestamp": "1762652580.3021681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jpacifico/Lucie-Boosted-7B-Instruct", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-Boosted-7B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25661467129438775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34654827210803724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.369875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1629820478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/ce2c9614-46d2-481d-ac25-3cc71a93bd5e.json b/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/ce2c9614-46d2-481d-ac25-3cc71a93bd5e.json new file mode 100644 index 000000000..b3296bdfe --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/ce2c9614-46d2-481d-ac25-3cc71a93bd5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-Boosted-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lucie-Boosted-7B-Instruct", + "id": "jpacifico/Lucie-Boosted-7B-Instruct", + "developer": "jpacifico", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2566 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json b/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json deleted file mode 100644 index 6c5b0adef..000000000 --- a/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jsfs11_L3-8B-Stheno-slerp/1762652580.302513", - "retrieved_timestamp": "1762652580.302515", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jsfs11/L3-8B-Stheno-slerp", - "developer": "jsfs11", - "inference_platform": "unknown", - "id": "jsfs11/L3-8B-Stheno-slerp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6751940407008958 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5325675903618755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36494348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/e9ba998d-8147-4046-afae-9ee7d544e98d.json b/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/e9ba998d-8147-4046-afae-9ee7d544e98d.json new file mode 100644 index 000000000..09c46c3d4 --- /dev/null +++ b/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/e9ba998d-8147-4046-afae-9ee7d544e98d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jsfs11_L3-8B-Stheno-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-Stheno-slerp", + "id": "jsfs11/L3-8B-Stheno-slerp", + "developer": "jsfs11", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5326 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json deleted file mode 100644 index ec8c0d0a0..000000000 --- a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v4/1762652580.302909", - "retrieved_timestamp": "1762652580.3029099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jsfs11/MixtureofMerges-MoE-4x7b-v4", - "developer": "jsfs11", - "inference_platform": "unknown", - "id": "jsfs11/MixtureofMerges-MoE-4x7b-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40299405577201824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169007103786006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43855208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30319148936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/c44f1012-1123-42c8-b110-5735dc756fd5.json b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/c44f1012-1123-42c8-b110-5735dc756fd5.json new file mode 100644 index 000000000..b881157bf --- /dev/null +++ b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/c44f1012-1123-42c8-b110-5735dc756fd5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MixtureofMerges-MoE-4x7b-v4", + "id": "jsfs11/MixtureofMerges-MoE-4x7b-v4", + "developer": "jsfs11", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3032 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/5088f6a6-2acf-4d10-8b78-0d5bd4126ab5.json b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/5088f6a6-2acf-4d10-8b78-0d5bd4126ab5.json new file mode 100644 index 000000000..fe751bdd0 --- /dev/null +++ b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/5088f6a6-2acf-4d10-8b78-0d5bd4126ab5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MixtureofMerges-MoE-4x7b-v5", + "id": "jsfs11/MixtureofMerges-MoE-4x7b-v5", + "developer": "jsfs11", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json deleted file mode 100644 index 0834847ac..000000000 --- a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v5/1762652580.30316", - "retrieved_timestamp": "1762652580.30316", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jsfs11/MixtureofMerges-MoE-4x7b-v5", - "developer": "jsfs11", - "inference_platform": "unknown", - "id": "jsfs11/MixtureofMerges-MoE-4x7b-v5", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41993022956865567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5198481257083689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4304895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json b/data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json deleted file mode 100644 index 4cb9644d6..000000000 --- a/data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kaist-ai_janus-7b/1762652580.303385", - "retrieved_timestamp": "1762652580.3033862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kaist-ai/janus-7b", - "developer": "kaist-ai", - "inference_platform": "unknown", - "id": "kaist-ai/janus-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37751499355044615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4693667591541633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28740026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/kaist-ai/janus-7b/b4d96088-5cc0-4ebc-8b8b-8c7e9f90420b.json b/data/hfopenllm_v2/kaist-ai/janus-7b/b4d96088-5cc0-4ebc-8b8b-8c7e9f90420b.json new file mode 100644 index 000000000..7009871f9 --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/janus-7b/b4d96088-5cc0-4ebc-8b8b-8c7e9f90420b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kaist-ai_janus-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "janus-7b", + "id": "kaist-ai/janus-7b", + "developer": "kaist-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4694 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2874 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json b/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json deleted file mode 100644 index e1c972159..000000000 --- a/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kaist-ai_janus-dpo-7b/1762652580.303661", - "retrieved_timestamp": "1762652580.303662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kaist-ai/janus-dpo-7b", - "developer": "kaist-ai", - "inference_platform": "unknown", - "id": "kaist-ai/janus-dpo-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4002712802031942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4772581104894978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43873958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/529dba11-53af-4045-ae46-04e1b9838d4a.json b/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/529dba11-53af-4045-ae46-04e1b9838d4a.json new file mode 100644 index 000000000..c70f9f282 --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/529dba11-53af-4045-ae46-04e1b9838d4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kaist-ai_janus-dpo-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "janus-dpo-7b", + "id": "kaist-ai/janus-dpo-7b", + "developer": "kaist-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4773 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kaist-ai/janus-rm-7b/391f6d6c-418f-44be-910a-fb90b5712649.json b/data/hfopenllm_v2/kaist-ai/janus-rm-7b/391f6d6c-418f-44be-910a-fb90b5712649.json new file mode 100644 index 000000000..979e94c2a --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/janus-rm-7b/391f6d6c-418f-44be-910a-fb90b5712649.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kaist-ai_janus-rm-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "janus-rm-7b", + "id": "kaist-ai/janus-rm-7b", + "developer": "kaist-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LLMForSequenceRegression", + "params_billions": 7.111 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3056 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json b/data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json deleted file mode 100644 index be35a881d..000000000 --- a/data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kaist-ai_janus-rm-7b/1762652580.303882", - "retrieved_timestamp": "1762652580.303883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kaist-ai/janus-rm-7b", - "developer": "kaist-ai", - "inference_platform": "unknown", - "id": "kaist-ai/janus-rm-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LLMForSequenceRegression", - "params_billions": 7.111 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.177804891022487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056467446788138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38829166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/kaist-ai/mistral-orpo-capybara-7k/2ccccb4b-7260-4a1a-9426-117e359c7c5c.json b/data/hfopenllm_v2/kaist-ai/mistral-orpo-capybara-7k/2ccccb4b-7260-4a1a-9426-117e359c7c5c.json new file mode 100644 index 000000000..41fa69358 --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/mistral-orpo-capybara-7k/2ccccb4b-7260-4a1a-9426-117e359c7c5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kaist-ai_mistral-orpo-capybara-7k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-orpo-capybara-7k", + "id": "kaist-ai/mistral-orpo-capybara-7k", + "developer": "kaist-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3964 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2971 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json deleted file mode 100644 index afbe19bd5..000000000 --- a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1762652580.304877", - "retrieved_timestamp": "1762652580.3048792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kavonalds/BunderMaxx-0710", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/BunderMaxx-0710", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27007894608527594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.556586279503196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1449468085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json deleted file mode 100644 index 12b520b58..000000000 --- a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1762652580.3044312", - "retrieved_timestamp": "1762652580.3044322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kavonalds/BunderMaxx-0710", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/BunderMaxx-0710", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32825569488955975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650758850169982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13139960106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/84afecec-453d-491c-9f5a-de31d8fba43e.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/84afecec-453d-491c-9f5a-de31d8fba43e.json new file mode 100644 index 000000000..7d5fbcff2 --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/84afecec-453d-491c-9f5a-de31d8fba43e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BunderMaxx-0710", + "id": "kavonalds/BunderMaxx-0710", + "developer": "kavonalds", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3283 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6651 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3393 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/dba3a3a4-cd23-44c9-823f-0bd88cf6465b.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/dba3a3a4-cd23-44c9-823f-0bd88cf6465b.json new file mode 100644 index 000000000..a38c701e1 --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/dba3a3a4-cd23-44c9-823f-0bd88cf6465b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BunderMaxx-0710", + "id": "kavonalds/BunderMaxx-0710", + "developer": "kavonalds", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5566 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1449 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/1179bcce-558e-40ad-8537-c74c59557975.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/1179bcce-558e-40ad-8537-c74c59557975.json new file mode 100644 index 000000000..a63fbd6f4 --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/1179bcce-558e-40ad-8537-c74c59557975.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-1010/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BunderMaxx-1010", + "id": "kavonalds/BunderMaxx-1010", + "developer": "kavonalds", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2981 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.702 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3484 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json deleted file mode 100644 index ce20d567f..000000000 --- a/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-1010/1762652580.305197", - "retrieved_timestamp": "1762652580.3051982", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kavonalds/BunderMaxx-1010", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/BunderMaxx-1010", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2980558252104416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7019840419971701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3484479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12242353723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json b/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json deleted file mode 100644 index 3036d09c7..000000000 --- a/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kavonalds_Lancer-1-1b-Instruct/1762652580.305463", - "retrieved_timestamp": "1762652580.305465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kavonalds/Lancer-1-1b-Instruct", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/Lancer-1-1b-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5545940327220664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32532742727549835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3144375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568317819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/fe0a5c17-6c8d-4f06-a58e-47648ef9ecec.json b/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/fe0a5c17-6c8d-4f06-a58e-47648ef9ecec.json new file mode 100644 index 000000000..13c6651be --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/fe0a5c17-6c8d-4f06-a58e-47648ef9ecec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kavonalds_Lancer-1-1b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lancer-1-1b-Instruct", + "id": "kavonalds/Lancer-1-1b-Instruct", + "developer": "kavonalds", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5546 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1568 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/81cf8cbd-33bc-44ab-930a-65242e1ae7b2.json b/data/hfopenllm_v2/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/81cf8cbd-33bc-44ab-930a-65242e1ae7b2.json new file mode 100644 index 000000000..773475a94 --- /dev/null +++ b/data/hfopenllm_v2/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/81cf8cbd-33bc-44ab-930a-65242e1ae7b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T3Q-Qwen2.5-7B-it-KOR-Safe", + "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", + "developer": "kayfour", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6081 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4464 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/keeeeenw/MicroLlama/173bb053-e817-4551-b169-c3f71163650a.json b/data/hfopenllm_v2/keeeeenw/MicroLlama/173bb053-e817-4551-b169-c3f71163650a.json new file mode 100644 index 000000000..50d40f2d1 --- /dev/null +++ b/data/hfopenllm_v2/keeeeenw/MicroLlama/173bb053-e817-4551-b169-c3f71163650a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/keeeeenw_MicroLlama/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MicroLlama", + "id": "keeeeenw/MicroLlama", + "developer": "keeeeenw", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.305 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1985 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json b/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json deleted file mode 100644 index 48497fdf0..000000000 --- a/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kekmodel_StopCarbon-10.7B-v5/1762652580.306321", - "retrieved_timestamp": "1762652580.3063219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kekmodel/StopCarbon-10.7B-v5", - "developer": "kekmodel", - "inference_platform": "unknown", - "id": "kekmodel/StopCarbon-10.7B-v5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47283651821611106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177716413471513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/b7e6a86f-340c-48ed-a828-2e80a13aa515.json b/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/b7e6a86f-340c-48ed-a828-2e80a13aa515.json new file mode 100644 index 000000000..087787728 --- /dev/null +++ b/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/b7e6a86f-340c-48ed-a828-2e80a13aa515.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kekmodel_StopCarbon-10.7B-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StopCarbon-10.7B-v5", + "id": "kekmodel/StopCarbon-10.7B-v5", + "developer": "kekmodel", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5178 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kevin009/llamaRAGdrama/bd221eee-7aa8-4d6f-a6be-89ee5568e729.json b/data/hfopenllm_v2/kevin009/llamaRAGdrama/bd221eee-7aa8-4d6f-a6be-89ee5568e729.json new file mode 100644 index 000000000..9b012321f --- /dev/null +++ b/data/hfopenllm_v2/kevin009/llamaRAGdrama/bd221eee-7aa8-4d6f-a6be-89ee5568e729.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kevin009_llamaRAGdrama/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llamaRAGdrama", + "id": "kevin009/llamaRAGdrama", + "developer": "kevin009", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2598 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2724 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/cheap-moe-merge/8727a325-a515-4456-ba34-65c30f84644a.json b/data/hfopenllm_v2/khoantap/cheap-moe-merge/8727a325-a515-4456-ba34-65c30f84644a.json new file mode 100644 index 000000000..20cf2da74 --- /dev/null +++ b/data/hfopenllm_v2/khoantap/cheap-moe-merge/8727a325-a515-4456-ba34-65c30f84644a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_cheap-moe-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cheap-moe-merge", + "id": "khoantap/cheap-moe-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 19.305 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4557 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5131 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json b/data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json deleted file mode 100644 index ba5f4c2b7..000000000 --- a/data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_cheap-moe-merge/1762652580.3070369", - "retrieved_timestamp": "1762652580.307038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/cheap-moe-merge", - "developer": "khoantap", - "inference_platform": "unknown", - "id": "khoantap/cheap-moe-merge", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 19.305 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557008736818309 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513116897226939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338597074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/khoantap/llama-3-8b-stock-merge/3e4011fa-d480-4c16-9371-2025bc834358.json b/data/hfopenllm_v2/khoantap/llama-3-8b-stock-merge/3e4011fa-d480-4c16-9371-2025bc834358.json new file mode 100644 index 000000000..df818dab9 --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-3-8b-stock-merge/3e4011fa-d480-4c16-9371-2025bc834358.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-3-8b-stock-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-stock-merge", + "id": "khoantap/llama-3-8b-stock-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5162 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/llama-breadcrumbs-ties-merge/867499a7-589b-4564-b04d-a004b7c0abb4.json b/data/hfopenllm_v2/khoantap/llama-breadcrumbs-ties-merge/867499a7-589b-4564-b04d-a004b7c0abb4.json new file mode 100644 index 000000000..ce7273c9a --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-breadcrumbs-ties-merge/867499a7-589b-4564-b04d-a004b7c0abb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-breadcrumbs-ties-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-breadcrumbs-ties-merge", + "id": "khoantap/llama-breadcrumbs-ties-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/llama-evolve-ties-best-merge/52f1fb51-fc7e-4cc2-918a-7c7226ae2ce5.json b/data/hfopenllm_v2/khoantap/llama-evolve-ties-best-merge/52f1fb51-fc7e-4cc2-918a-7c7226ae2ce5.json new file mode 100644 index 000000000..8af7559fa --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-evolve-ties-best-merge/52f1fb51-fc7e-4cc2-918a-7c7226ae2ce5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-evolve-ties-best-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-evolve-ties-best-merge", + "id": "khoantap/llama-evolve-ties-best-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6744 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1563 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/llama-linear-0.5-0.5-1-merge/5f4a8fb6-b22d-4eb2-aaef-da05ca45fbeb.json b/data/hfopenllm_v2/khoantap/llama-linear-0.5-0.5-1-merge/5f4a8fb6-b22d-4eb2-aaef-da05ca45fbeb.json new file mode 100644 index 000000000..75bc7bcd6 --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-linear-0.5-0.5-1-merge/5f4a8fb6-b22d-4eb2-aaef-da05ca45fbeb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-0.5-1-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-linear-0.5-0.5-1-merge", + "id": "khoantap/llama-linear-0.5-0.5-1-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3833 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/llama-linear-0.5-1-0.5-merge/3278855d-7bd1-4e7e-b27b-b1393006e7e7.json b/data/hfopenllm_v2/khoantap/llama-linear-0.5-1-0.5-merge/3278855d-7bd1-4e7e-b27b-b1393006e7e7.json new file mode 100644 index 000000000..28ab86f9c --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-linear-0.5-1-0.5-merge/3278855d-7bd1-4e7e-b27b-b1393006e7e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-1-0.5-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-linear-0.5-1-0.5-merge", + "id": "khoantap/llama-linear-0.5-1-0.5-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/llama-linear-1-0.5-0.5-merge/5193ab4d-1627-43b5-bfb7-89e08ea1f810.json b/data/hfopenllm_v2/khoantap/llama-linear-1-0.5-0.5-merge/5193ab4d-1627-43b5-bfb7-89e08ea1f810.json new file mode 100644 index 000000000..e1f8e572b --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-linear-1-0.5-0.5-merge/5193ab4d-1627-43b5-bfb7-89e08ea1f810.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-1-0.5-0.5-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-linear-1-0.5-0.5-merge", + "id": "khoantap/llama-linear-1-0.5-0.5-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3635 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/llama-slerp-merge/598faeda-48fb-43a8-aaa9-849d5dfcea79.json b/data/hfopenllm_v2/khoantap/llama-slerp-merge/598faeda-48fb-43a8-aaa9-849d5dfcea79.json new file mode 100644 index 000000000..b429d2b76 --- /dev/null +++ b/data/hfopenllm_v2/khoantap/llama-slerp-merge/598faeda-48fb-43a8-aaa9-849d5dfcea79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_llama-slerp-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-slerp-merge", + "id": "khoantap/llama-slerp-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.498 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5783 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4053 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json b/data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json deleted file mode 100644 index cc34abcd7..000000000 --- a/data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_moe-out-merge/1762652580.309191", - "retrieved_timestamp": "1762652580.309192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/moe-out-merge", - "developer": "khoantap", - "inference_platform": "unknown", - "id": "khoantap/moe-out-merge", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 19.305 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4504802812094133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515116897226939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40630208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347739361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/khoantap/moe-out-merge/d1afa2fb-1256-4dd3-b13b-802917bf481b.json b/data/hfopenllm_v2/khoantap/moe-out-merge/d1afa2fb-1256-4dd3-b13b-802917bf481b.json new file mode 100644 index 000000000..7471ab85d --- /dev/null +++ b/data/hfopenllm_v2/khoantap/moe-out-merge/d1afa2fb-1256-4dd3-b13b-802917bf481b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khoantap_moe-out-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "moe-out-merge", + "id": "khoantap/moe-out-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 19.305 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4063 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3348 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/397c9bc3-0af5-453c-9b68-5360783dfbf7.json b/data/hfopenllm_v2/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/397c9bc3-0af5-453c-9b68-5360783dfbf7.json new file mode 100644 index 000000000..b28191893 --- /dev/null +++ b/data/hfopenllm_v2/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/397c9bc3-0af5-453c-9b68-5360783dfbf7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Reason-Blend-888k", + "id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", + "developer": "khulaifi95", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5832 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.479 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json b/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json deleted file mode 100644 index f31a49ee6..000000000 --- a/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/1762652580.309702", - "retrieved_timestamp": "1762652580.3097029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", - "developer": "kms7530", - "inference_platform": "unknown", - "id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5455014915978493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42890394469736065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38206249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2798371010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/9bb39652-c79a-42bf-b6d8-c4ed6174a4c7.json b/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/9bb39652-c79a-42bf-b6d8-c4ed6174a4c7.json new file mode 100644 index 000000000..06712c0c5 --- /dev/null +++ b/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/9bb39652-c79a-42bf-b6d8-c4ed6174a4c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", + "id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", + "developer": "kms7530", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3821 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json b/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json deleted file mode 100644 index 6094660c1..000000000 --- a/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/1762652580.309973", - "retrieved_timestamp": "1762652580.309974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", - "developer": "kms7530", - "inference_platform": "unknown", - "id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 4.132 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4863251727638222 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49871846432893613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39828125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/7e793244-b746-4aa4-a401-dcf5884f61a4.json b/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/7e793244-b746-4aa4-a401-dcf5884f61a4.json new file mode 100644 index 000000000..a2ee08c81 --- /dev/null +++ b/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/7e793244-b746-4aa4-a401-dcf5884f61a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", + "id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", + "developer": "kms7530", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 4.132 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4863 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3983 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3481 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1/26a8da03-debd-41e3-8ee1-2827d76b26ca.json b/data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1/26a8da03-debd-41e3-8ee1-2827d76b26ca.json new file mode 100644 index 000000000..969ace9a0 --- /dev/null +++ b/data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1/26a8da03-debd-41e3-8ee1-2827d76b26ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chemeng_qwen-math-7b_24_1_100_1", + "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1", + "developer": "kms7530", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 8.911 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2111 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3578 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/e214c326-dd84-4915-bba1-faaafbb026b2.json b/data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/e214c326-dd84-4915-bba1-faaafbb026b2.json new file mode 100644 index 000000000..ce14e36aa --- /dev/null +++ b/data/hfopenllm_v2/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/e214c326-dd84-4915-bba1-faaafbb026b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chemeng_qwen-math-7b_24_1_100_1_nonmath", + "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", + "developer": "kms7530", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 15.231 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3097 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2452 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kno10/ende-chat-0.0.5/98a5ea0a-6e45-48f8-8219-32099b9fa9d0.json b/data/hfopenllm_v2/kno10/ende-chat-0.0.5/98a5ea0a-6e45-48f8-8219-32099b9fa9d0.json new file mode 100644 index 000000000..dfb3a0c4e --- /dev/null +++ b/data/hfopenllm_v2/kno10/ende-chat-0.0.5/98a5ea0a-6e45-48f8-8219-32099b9fa9d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ende-chat-0.0.5", + "id": "kno10/ende-chat-0.0.5", + "developer": "kno10", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.891 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json b/data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json deleted file mode 100644 index c649f93b0..000000000 --- a/data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.5/1762652580.310679", - "retrieved_timestamp": "1762652580.3106802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kno10/ende-chat-0.0.5", - "developer": "kno10", - "inference_platform": "unknown", - "id": "kno10/ende-chat-0.0.5", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.891 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3404455733010634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604365707523862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17902260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/kno10/ende-chat-0.0.7/40d7d17d-2d41-4d23-83c1-ab5f3320e36e.json b/data/hfopenllm_v2/kno10/ende-chat-0.0.7/40d7d17d-2d41-4d23-83c1-ab5f3320e36e.json new file mode 100644 index 000000000..eb7bd3e9e --- /dev/null +++ b/data/hfopenllm_v2/kno10/ende-chat-0.0.7/40d7d17d-2d41-4d23-83c1-ab5f3320e36e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ende-chat-0.0.7", + "id": "kno10/ende-chat-0.0.7", + "developer": "kno10", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.891 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1966 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json b/data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json deleted file mode 100644 index 2076d5e3a..000000000 --- a/data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.7/1762652580.310943", - "retrieved_timestamp": "1762652580.310944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kno10/ende-chat-0.0.7", - "developer": "kno10", - "inference_platform": "unknown", - "id": "kno10/ende-chat-0.0.7", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.891 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.440063476021401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37918745577624335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19664228723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json b/data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json deleted file mode 100644 index dfc1c550c..000000000 --- a/data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kyutai_helium-1-preview-2b/1762652580.3111548", - "retrieved_timestamp": "1762652580.3111548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kyutai/helium-1-preview-2b", - "developer": "kyutai", - "inference_platform": "unknown", - "id": "kyutai/helium-1-preview-2b", - "additional_details": { - "precision": "bfloat16", - "architecture": "HeliumForCausalLM", - "params_billions": 2.173 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26136096667952147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3638164815956466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18725066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/kyutai/helium-1-preview-2b/d881a83a-9ba8-4919-8b89-45f5a7220621.json b/data/hfopenllm_v2/kyutai/helium-1-preview-2b/d881a83a-9ba8-4919-8b89-45f5a7220621.json new file mode 100644 index 000000000..9db17dbd8 --- /dev/null +++ b/data/hfopenllm_v2/kyutai/helium-1-preview-2b/d881a83a-9ba8-4919-8b89-45f5a7220621.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kyutai_helium-1-preview-2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "helium-1-preview-2b", + "id": "kyutai/helium-1-preview-2b", + "developer": "kyutai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "HeliumForCausalLM", + "params_billions": 2.173 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2614 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3638 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json b/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json deleted file mode 100644 index 6051f1433..000000000 --- a/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kz919_QwQ-0.5B-Distilled-SFT/1762652580.311408", - "retrieved_timestamp": "1762652580.311409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kz919/QwQ-0.5B-Distilled-SFT", - "developer": "kz919", - "inference_platform": "unknown", - "id": "kz919/QwQ-0.5B-Distilled-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3076725311063534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3256291569645335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15874335106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/d6c966a1-7927-424a-9886-b98688d27e6f.json b/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/d6c966a1-7927-424a-9886-b98688d27e6f.json new file mode 100644 index 000000000..34900ada9 --- /dev/null +++ b/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/d6c966a1-7927-424a-9886-b98688d27e6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/kz919_QwQ-0.5B-Distilled-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-0.5B-Distilled-SFT", + "id": "kz919/QwQ-0.5B-Distilled-SFT", + "developer": "kz919", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1587 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json b/data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json deleted file mode 100644 index 7fc96237c..000000000 --- a/data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ladydaina_ECE-FDF/1762652580.311657", - "retrieved_timestamp": "1762652580.311657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ladydaina/ECE-FDF", - "developer": "ladydaina", - "inference_platform": "unknown", - "id": "ladydaina/ECE-FDF", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3728440537773109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150177593278346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45039583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30069813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ladydaina/ECE-FDF/c09fe163-a7f7-4b6b-b407-ee8d698b2ee8.json b/data/hfopenllm_v2/ladydaina/ECE-FDF/c09fe163-a7f7-4b6b-b407-ee8d698b2ee8.json new file mode 100644 index 000000000..298900d6a --- /dev/null +++ b/data/hfopenllm_v2/ladydaina/ECE-FDF/c09fe163-a7f7-4b6b-b407-ee8d698b2ee8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ladydaina_ECE-FDF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-FDF", + "id": "ladydaina/ECE-FDF", + "developer": "ladydaina", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4504 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3007 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/laislemke/LLaMA-2-vicuna-7b-slerp/b3979c7f-0596-4a24-b264-73a17ba19821.json b/data/hfopenllm_v2/laislemke/LLaMA-2-vicuna-7b-slerp/b3979c7f-0596-4a24-b264-73a17ba19821.json new file mode 100644 index 000000000..d0f53a22e --- /dev/null +++ b/data/hfopenllm_v2/laislemke/LLaMA-2-vicuna-7b-slerp/b3979c7f-0596-4a24-b264-73a17ba19821.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/laislemke_LLaMA-2-vicuna-7b-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMA-2-vicuna-7b-slerp", + "id": "laislemke/LLaMA-2-vicuna-7b-slerp", + "developer": "laislemke", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2932 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2986 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3833 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json deleted file mode 100644 index 4dd67cd85..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/1762652580.312166", - "retrieved_timestamp": "1762652580.312166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21377500587330506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32694393820046386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334109042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/f6156893-92e7-4c4f-bff4-8b6d774ecbd8.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/f6156893-92e7-4c4f-bff4-8b6d774ecbd8.json new file mode 100644 index 000000000..d16b8f1ef --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/f6156893-92e7-4c4f-bff4-8b6d774ecbd8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR", + "id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2138 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3262 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json deleted file mode 100644 index 496a7f6f3..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/1762652580.312417", - "retrieved_timestamp": "1762652580.312417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15639724819035714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2894308596288922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37892708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/8b1c19e0-8b47-46ae-8bf3-f84c7d3a9c0e.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/8b1c19e0-8b47-46ae-8bf3-f84c7d3a9c0e.json new file mode 100644 index 000000000..bf518c502 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/8b1c19e0-8b47-46ae-8bf3-f84c7d3a9c0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-SLERP-V4", + "id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1169 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/6221102e-4e8c-46dd-8c03-fa9e92b7e4ea.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/6221102e-4e8c-46dd-8c03-fa9e92b7e4ea.json new file mode 100644 index 000000000..e44f2c10f --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/6221102e-4e8c-46dd-8c03-fa9e92b7e4ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", + "id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1437 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3646 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json deleted file mode 100644 index 4a4609bc8..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/1762652580.31263", - "retrieved_timestamp": "1762652580.31263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1437075847639818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031946898842932 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/329e5e91-10ba-4795-ae86-dda95e698b4f.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/329e5e91-10ba-4795-ae86-dda95e698b4f.json new file mode 100644 index 000000000..97784607c --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/329e5e91-10ba-4795-ae86-dda95e698b4f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V3", + "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.325 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4213 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2931 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json deleted file mode 100644 index 89511009f..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/1762652580.31284", - "retrieved_timestamp": "1762652580.312841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.325008754549041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42245501886651654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2931349734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json deleted file mode 100644 index 5842324bc..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/1762652580.3130481", - "retrieved_timestamp": "1762652580.313049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33235260220658963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4170742409015322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.289311835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/3fe89b13-135d-4790-871d-74e7a28ea2e9.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/3fe89b13-135d-4790-871d-74e7a28ea2e9.json new file mode 100644 index 000000000..e1cc670e9 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/3fe89b13-135d-4790-871d-74e7a28ea2e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V4", + "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4306 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/4b807741-f1b9-4964-9bc9-bb93f9b34217.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/4b807741-f1b9-4964-9bc9-bb93f9b34217.json new file mode 100644 index 000000000..fb7527528 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/4b807741-f1b9-4964-9bc9-bb93f9b34217.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-6B-SLERP-V1", + "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3264 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4629 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3214 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json deleted file mode 100644 index 7d524086f..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/1762652580.31332", - "retrieved_timestamp": "1762652580.3133209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3264072660540699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46293726502592586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48639583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32139295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/c52a8a4d-be91-4a0d-8cd5-8473a42f0978.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/c52a8a4d-be91-4a0d-8cd5-8473a42f0978.json new file mode 100644 index 000000000..c4e7de98b --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/c52a8a4d-be91-4a0d-8cd5-8473a42f0978.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-6B-SLERP-V2", + "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", + "developer": "lalainy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4629 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3214 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json deleted file mode 100644 index e5787e714..000000000 --- a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/1762652580.3135412", - "retrieved_timestamp": "1762652580.3135412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248835312526319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46293726502592586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48639583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32139295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/langgptai/Qwen-las-v0.1/f6e157c4-0ce9-41c9-b885-9222d894ff0c.json b/data/hfopenllm_v2/langgptai/Qwen-las-v0.1/f6e157c4-0ce9-41c9-b885-9222d894ff0c.json new file mode 100644 index 000000000..0046e64f5 --- /dev/null +++ b/data/hfopenllm_v2/langgptai/Qwen-las-v0.1/f6e157c4-0ce9-41c9-b885-9222d894ff0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/langgptai_Qwen-las-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-las-v0.1", + "id": "langgptai/Qwen-las-v0.1", + "developer": "langgptai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.901 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2325 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json b/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json deleted file mode 100644 index 21d17a31f..000000000 --- a/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/langgptai_qwen1.5-7b-chat-sa-v0.1/1762652580.314067", - "retrieved_timestamp": "1762652580.314068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "langgptai/qwen1.5-7b-chat-sa-v0.1", - "developer": "langgptai", - "inference_platform": "unknown", - "id": "langgptai/qwen1.5-7b-chat-sa-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 15.443 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42677429221133256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325267992878656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29928523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/fe52a94a-5324-4b59-accc-dfd1f9d4aead.json b/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/fe52a94a-5324-4b59-accc-dfd1f9d4aead.json new file mode 100644 index 000000000..17240757f --- /dev/null +++ b/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/fe52a94a-5324-4b59-accc-dfd1f9d4aead.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/langgptai_qwen1.5-7b-chat-sa-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen1.5-7b-chat-sa-v0.1", + "id": "langgptai/qwen1.5-7b-chat-sa-v0.1", + "developer": "langgptai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 15.443 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4325 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2993 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/1241f5e3-54eb-429e-b109-a5e163e39eda.json b/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/1241f5e3-54eb-429e-b109-a5e163e39eda.json new file mode 100644 index 000000000..92337f309 --- /dev/null +++ b/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/1241f5e3-54eb-429e-b109-a5e163e39eda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lars1234_Mistral-Small-24B-Instruct-2501-writer/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-24B-Instruct-2501-writer", + "id": "lars1234/Mistral-Small-24B-Instruct-2501-writer", + "developer": "lars1234", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6565 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6733 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5448 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json b/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json deleted file mode 100644 index 97f333e9f..000000000 --- a/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lars1234_Mistral-Small-24B-Instruct-2501-writer/1762652580.314311", - "retrieved_timestamp": "1762652580.314312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lars1234/Mistral-Small-24B-Instruct-2501-writer", - "developer": "lars1234", - "inference_platform": "unknown", - "id": "lars1234/Mistral-Small-24B-Instruct-2501-writer", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565346613651777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6733164099871131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3557401812688822 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46453125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447972074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/8ccc7c8c-1d14-45bb-9a6b-f8f69e506139.json b/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/8ccc7c8c-1d14-45bb-9a6b-f8f69e506139.json new file mode 100644 index 000000000..b9d1ba6c5 --- /dev/null +++ b/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/8ccc7c8c-1d14-45bb-9a6b-f8f69e506139.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/leafspark_Llama-3.1-8B-MultiReflection-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-MultiReflection-Instruct", + "id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", + "developer": "leafspark", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7125 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5009 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json b/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json deleted file mode 100644 index d43d7ba3a..000000000 --- a/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/leafspark_Llama-3.1-8B-MultiReflection-Instruct/1762652580.3145778", - "retrieved_timestamp": "1762652580.3145778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", - "developer": "leafspark", - "inference_platform": "unknown", - "id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7125382872999197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5009088261495708 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37242353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-9B/5531b59e-24c0-41af-ab6b-d6a5e38b0a98.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-9B/5531b59e-24c0-41af-ab6b-d6a5e38b0a98.json new file mode 100644 index 000000000..1a22b28f0 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-9B/5531b59e-24c0-41af-ab6b-d6a5e38b0a98.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-9B", + "id": "lemon07r/Gemma-2-Ataraxy-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3009 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Advanced-9B/63e82cb3-2f6f-4617-abb7-ae093bc27830.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Advanced-9B/63e82cb3-2f6f-4617-abb7-ae093bc27830.json new file mode 100644 index 000000000..d0d68741f --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Advanced-9B/63e82cb3-2f6f-4617-abb7-ae093bc27830.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Advanced-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-Advanced-9B", + "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5889 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Remix-9B/0feb74e6-40d4-472d-9233-27faa2d3f802.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Remix-9B/0feb74e6-40d4-472d-9233-27faa2d3f802.json new file mode 100644 index 000000000..3e6258067 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-Remix-9B/0feb74e6-40d4-472d-9233-27faa2d3f802.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Remix-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-Remix-9B", + "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7083 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5892 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4239 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2-9B/e74dd005-c9b5-45c9-b7f5-455c3110e09b.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2-9B/e74dd005-c9b5-45c9-b7f5-455c3110e09b.json new file mode 100644 index 000000000..48d6b6abd --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2-9B/e74dd005-c9b5-45c9-b7f5-455c3110e09b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v2-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v2-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5766 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3484 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2a-9B/d094bf6f-9952-45c7-995e-d7eda07f4668.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2a-9B/d094bf6f-9952-45c7-995e-d7eda07f4668.json new file mode 100644 index 000000000..9dfdf8d74 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2a-9B/d094bf6f-9952-45c7-995e-d7eda07f4668.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2a-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v2a-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1595 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2f-9B/0e5f3393-8a6a-4f2f-948a-a37ae4d8fdeb.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2f-9B/0e5f3393-8a6a-4f2f-948a-a37ae4d8fdeb.json new file mode 100644 index 000000000..a5ae93322 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v2f-9B/0e5f3393-8a6a-4f2f-948a-a37ae4d8fdeb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2f-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v2f-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5193 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/f91982ac-0cab-415a-8503-e090d195bd05.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/f91982ac-0cab-415a-8503-e090d195bd05.json new file mode 100644 index 000000000..c793e8539 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/f91982ac-0cab-415a-8503-e090d195bd05.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v3-Advanced-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6602 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4196 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3b-9B/fb1af66e-7828-495b-8277-5cff77c3070e.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3b-9B/fb1af66e-7828-495b-8277-5cff77c3070e.json new file mode 100644 index 000000000..5ef333acb --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3b-9B/fb1af66e-7828-495b-8277-5cff77c3070e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3b-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v3b-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3i-9B/ac84c157-4d11-43c1-8731-b1e5cfa91668.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3i-9B/ac84c157-4d11-43c1-8731-b1e5cfa91668.json new file mode 100644 index 000000000..fb3af6e05 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3i-9B/ac84c157-4d11-43c1-8731-b1e5cfa91668.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3i-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v3i-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3j-9B/bbc812dd-9a9c-4f99-b813-50361025eea3.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3j-9B/bbc812dd-9a9c-4f99-b813-50361025eea3.json new file mode 100644 index 000000000..627159167 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v3j-9B/bbc812dd-9a9c-4f99-b813-50361025eea3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3j-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v3j-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5632 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1692 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/fc818799-49d5-4fca-b131-ebe8d5d831f1.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/fc818799-49d5-4fca-b131-ebe8d5d831f1.json new file mode 100644 index 000000000..5762b4771 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/fc818799-49d5-4fca-b131-ebe8d5d831f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v4-Advanced-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7015 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/33349989-8573-4d71-ae0f-99691fdaffc3.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/33349989-8573-4d71-ae0f-99691fdaffc3.json new file mode 100644 index 000000000..f1c4a9342 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/33349989-8573-4d71-ae0f-99691fdaffc3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v4a-Advanced-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7135 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5988 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4309 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4b-9B/91551de5-d8ac-4c0d-b9b4-3627db947f0e.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4b-9B/91551de5-d8ac-4c0d-b9b4-3627db947f0e.json new file mode 100644 index 000000000..de6307f19 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4b-9B/91551de5-d8ac-4c0d-b9b4-3627db947f0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4b-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v4b-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6878 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6039 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2334 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4c-9B/c2d2c1f4-aaab-45f1-b3f6-5b4ea56b696e.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4c-9B/c2d2c1f4-aaab-45f1-b3f6-5b4ea56b696e.json new file mode 100644 index 000000000..ddb47c621 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4c-9B/c2d2c1f4-aaab-45f1-b3f6-5b4ea56b696e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4c-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v4c-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6945 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4395 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4d-9B/36821a8b-af18-4631-b4b0-7e4b37bb194b.json b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4d-9B/36821a8b-af18-4631-b4b0-7e4b37bb194b.json new file mode 100644 index 000000000..911a9ca74 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Gemma-2-Ataraxy-v4d-9B/36821a8b-af18-4631-b4b0-7e4b37bb194b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4d-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-v4d-9B", + "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2334 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4346 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/Llama-3-RedMagic4-8B/e402d129-f4f1-4b95-b079-4f30936119aa.json b/data/hfopenllm_v2/lemon07r/Llama-3-RedMagic4-8B/e402d129-f4f1-4b95-b079-4f30936119aa.json new file mode 100644 index 000000000..4d4ff0851 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/Llama-3-RedMagic4-8B/e402d129-f4f1-4b95-b079-4f30936119aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_Llama-3-RedMagic4-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-RedMagic4-8B", + "id": "lemon07r/Llama-3-RedMagic4-8B", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3766 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lemon07r/llama-3-NeuralMahou-8b/814e1ea7-a639-4b05-9208-0bf537ea5479.json b/data/hfopenllm_v2/lemon07r/llama-3-NeuralMahou-8b/814e1ea7-a639-4b05-9208-0bf537ea5479.json new file mode 100644 index 000000000..27784d919 --- /dev/null +++ b/data/hfopenllm_v2/lemon07r/llama-3-NeuralMahou-8b/814e1ea7-a639-4b05-9208-0bf537ea5479.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lemon07r_llama-3-NeuralMahou-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-NeuralMahou-8b", + "id": "lemon07r/llama-3-NeuralMahou-8b", + "developer": "lemon07r", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4901 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/35a50d36-31d0-454b-a13c-80ca26945f94.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/35a50d36-31d0-454b-a13c-80ca26945f94.json new file mode 100644 index 000000000..83e427178 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/35a50d36-31d0-454b-a13c-80ca26945f94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-EIFFEL-3B", + "id": "lesubra/ECE-EIFFEL-3B", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json deleted file mode 100644 index d13ebcfd0..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3B/1762652580.319232", - "retrieved_timestamp": "1762652580.319233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-EIFFEL-3B", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-EIFFEL-3B", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3469405621528655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101583259186949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3820644946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json deleted file mode 100644 index 695a131ff..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv2/1762652580.319594", - "retrieved_timestamp": "1762652580.319595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-EIFFEL-3Bv2", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-EIFFEL-3Bv2", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30130276555096036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5424007873371969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4442916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39993351063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/87347017-4ff1-4bd3-a1d7-8f3999061209.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/87347017-4ff1-4bd3-a1d7-8f3999061209.json new file mode 100644 index 000000000..3d51d39fa --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/87347017-4ff1-4bd3-a1d7-8f3999061209.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-EIFFEL-3Bv2", + "id": "lesubra/ECE-EIFFEL-3Bv2", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3013 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5424 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3999 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json deleted file mode 100644 index 17bd1e742..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv3/1762652580.319853", - "retrieved_timestamp": "1762652580.319854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-EIFFEL-3Bv3", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-EIFFEL-3Bv3", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3786142989490109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469446669064592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46751041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39752327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/976184ed-c4ed-4898-83c7-521a8a8309ac.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/976184ed-c4ed-4898-83c7-521a8a8309ac.json new file mode 100644 index 000000000..33d7b6db8 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/976184ed-c4ed-4898-83c7-521a8a8309ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-EIFFEL-3Bv3", + "id": "lesubra/ECE-EIFFEL-3Bv3", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5469 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json deleted file mode 100644 index 1e86dfb18..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V1/1762652580.320159", - "retrieved_timestamp": "1762652580.32016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2932840418977203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340594627933309 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/fa52f072-7725-4a4e-b728-042e5897a1bd.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/fa52f072-7725-4a4e-b728-042e5897a1bd.json new file mode 100644 index 000000000..692fe6956 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/fa52f072-7725-4a4e-b728-042e5897a1bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-3B-SLERP-V1", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2933 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1662 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/6374dcee-301c-4f28-9316-82ed8e693089.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/6374dcee-301c-4f28-9316-82ed8e693089.json new file mode 100644 index 000000000..91ea20f99 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/6374dcee-301c-4f28-9316-82ed8e693089.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-3B-SLERP-V2", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2933 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1662 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json deleted file mode 100644 index 6d78ac84c..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V2/1762652580.320386", - "retrieved_timestamp": "1762652580.3203871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2932840418977203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340594627933309 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/b7c95cb4-f32f-466e-a28c-32afd9ec5578.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/b7c95cb4-f32f-466e-a28c-32afd9ec5578.json new file mode 100644 index 000000000..7c02e49b7 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/b7c95cb4-f32f-466e-a28c-32afd9ec5578.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-3B-SLERP_2-V1", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1677 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json deleted file mode 100644 index 113831d3f..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/1762652580.320611", - "retrieved_timestamp": "1762652580.3206122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649006857360692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411447467732948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4661458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990192819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json deleted file mode 100644 index 09c4eac50..000000000 --- a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/1762652580.320825", - "retrieved_timestamp": "1762652580.320826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664244205375071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411447467732948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4661458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990192819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/bddd742b-f7c9-44aa-ad2f-83f51a4625be.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/bddd742b-f7c9-44aa-ad2f-83f51a4625be.json new file mode 100644 index 000000000..7ac9d72fd --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/bddd742b-f7c9-44aa-ad2f-83f51a4625be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-3B-SLERP_2-V2", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1677 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.399 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/merge-test/099af0ee-c06b-4435-8f97-27681f3eddff.json b/data/hfopenllm_v2/lesubra/merge-test/099af0ee-c06b-4435-8f97-27681f3eddff.json new file mode 100644 index 000000000..0105f3bb4 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/merge-test/099af0ee-c06b-4435-8f97-27681f3eddff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lesubra_merge-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merge-test", + "id": "lesubra/merge-test", + "developer": "lesubra", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4419 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json b/data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json deleted file mode 100644 index 9cc74e152..000000000 --- a/data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lesubra_merge-test/1762652580.321054", - "retrieved_timestamp": "1762652580.321055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lesubra/merge-test", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/merge-test", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538257379309122 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240434385320306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44190625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38738364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/fa826f3a-8688-4518-8d44-68189abb47ba.json b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/fa826f3a-8688-4518-8d44-68189abb47ba.json new file mode 100644 index 000000000..122789813 --- /dev/null +++ b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/fa826f3a-8688-4518-8d44-68189abb47ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suzume-llama-3-8B-multilingual-orpo-borda-full", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", + "developer": "lightblue", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/10d29dc0-3486-40df-9933-1ce8f0fabaa2.json b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/10d29dc0-3486-40df-9933-1ce8f0fabaa2.json new file mode 100644 index 000000000..b4ab0a93e --- /dev/null +++ b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/10d29dc0-3486-40df-9933-1ce8f0fabaa2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suzume-llama-3-8B-multilingual-orpo-borda-half", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", + "developer": "lightblue", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6249 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4707 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3614 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/741ff375-3392-461e-a9b0-e0dab4e6e9f8.json b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/741ff375-3392-461e-a9b0-e0dab4e6e9f8.json new file mode 100644 index 000000000..9a8eccba8 --- /dev/null +++ b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/741ff375-3392-461e-a9b0-e0dab4e6e9f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suzume-llama-3-8B-multilingual-orpo-borda-top25", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", + "developer": "lightblue", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6637 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/c3d709de-118d-40c2-ab89-040efedd7fdb.json b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/c3d709de-118d-40c2-ab89-040efedd7fdb.json new file mode 100644 index 000000000..669b89390 --- /dev/null +++ b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/c3d709de-118d-40c2-ab89-040efedd7fdb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suzume-llama-3-8B-multilingual-orpo-borda-top75", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", + "developer": "lightblue", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6687 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4833 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual/9be3dd27-93fa-49e9-a628-5a77a8a3bb9a.json b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual/9be3dd27-93fa-49e9-a628-5a77a8a3bb9a.json new file mode 100644 index 000000000..919b41cbc --- /dev/null +++ b/data/hfopenllm_v2/lightblue/suzume-llama-3-8B-multilingual/9be3dd27-93fa-49e9-a628-5a77a8a3bb9a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suzume-llama-3-8B-multilingual", + "id": "lightblue/suzume-llama-3-8B-multilingual", + "developer": "lightblue", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json b/data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json deleted file mode 100644 index b17ff2072..000000000 --- a/data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_145_/1762652580.322459", - "retrieved_timestamp": "1762652580.32246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lkoenig/BBAI_145_", - "developer": "lkoenig", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_145_", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44503473007176514 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5567169940219221 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.448969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/lkoenig/BBAI_145_/be850d1b-bf75-4c34-830f-8881792ac842.json b/data/hfopenllm_v2/lkoenig/BBAI_145_/be850d1b-bf75-4c34-830f-8881792ac842.json new file mode 100644 index 000000000..302154c6b --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_145_/be850d1b-bf75-4c34-830f-8881792ac842.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_145_/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_145_", + "id": "lkoenig/BBAI_145_", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5567 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.449 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_200_Gemma/6b644b97-4fc3-4826-9ea9-68be1dc8e947.json b/data/hfopenllm_v2/lkoenig/BBAI_200_Gemma/6b644b97-4fc3-4826-9ea9-68be1dc8e947.json new file mode 100644 index 000000000..7865c5bfc --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_200_Gemma/6b644b97-4fc3-4826-9ea9-68be1dc8e947.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_200_Gemma/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_200_Gemma", + "id": "lkoenig/BBAI_200_Gemma", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 19.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0705 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_212_QwenLawLo/861d41f1-6d33-4e07-96ea-2c39a36c4b63.json b/data/hfopenllm_v2/lkoenig/BBAI_212_QwenLawLo/861d41f1-6d33-4e07-96ea-2c39a36c4b63.json new file mode 100644 index 000000000..2d76a0d19 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_212_QwenLawLo/861d41f1-6d33-4e07-96ea-2c39a36c4b63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_QwenLawLo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_212_QwenLawLo", + "id": "lkoenig/BBAI_212_QwenLawLo", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4566 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5574 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.437 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4489 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_212_Qwencore/7501b038-4847-45bc-8b92-6800d7a58c1e.json b/data/hfopenllm_v2/lkoenig/BBAI_212_Qwencore/7501b038-4847-45bc-8b92-6800d7a58c1e.json new file mode 100644 index 000000000..3b77ae393 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_212_Qwencore/7501b038-4847-45bc-8b92-6800d7a58c1e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_Qwencore/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_212_Qwencore", + "id": "lkoenig/BBAI_212_Qwencore", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4384 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5569 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.449 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_230_Xiaqwen/db48206d-700b-45f3-b597-8752110113b5.json b/data/hfopenllm_v2/lkoenig/BBAI_230_Xiaqwen/db48206d-700b-45f3-b597-8752110113b5.json new file mode 100644 index 000000000..af9941d74 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_230_Xiaqwen/db48206d-700b-45f3-b597-8752110113b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_230_Xiaqwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_230_Xiaqwen", + "id": "lkoenig/BBAI_230_Xiaqwen", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5578 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3663 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4481 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_375_QwenDyancabs/b52b76e4-9dec-4336-88b1-d98b95b95d2a.json b/data/hfopenllm_v2/lkoenig/BBAI_375_QwenDyancabs/b52b76e4-9dec-4336-88b1-d98b95b95d2a.json new file mode 100644 index 000000000..687f6c068 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_375_QwenDyancabs/b52b76e4-9dec-4336-88b1-d98b95b95d2a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_375_QwenDyancabs/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_375_QwenDyancabs", + "id": "lkoenig/BBAI_375_QwenDyancabs", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4566 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5571 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_456_QwenKoen/ba9ec2ea-2bce-4999-9e48-e1d0795b31d0.json b/data/hfopenllm_v2/lkoenig/BBAI_456_QwenKoen/ba9ec2ea-2bce-4999-9e48-e1d0795b31d0.json new file mode 100644 index 000000000..b1a7b3d3e --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_456_QwenKoen/ba9ec2ea-2bce-4999-9e48-e1d0795b31d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_456_QwenKoen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_456_QwenKoen", + "id": "lkoenig/BBAI_456_QwenKoen", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4529 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4469 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_7B_KoenQwenDyan/724221ce-d7b2-43cb-8e16-72ac529a7b60.json b/data/hfopenllm_v2/lkoenig/BBAI_7B_KoenQwenDyan/724221ce-d7b2-43cb-8e16-72ac529a7b60.json new file mode 100644 index 000000000..41f147ae3 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_7B_KoenQwenDyan/724221ce-d7b2-43cb-8e16-72ac529a7b60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_KoenQwenDyan/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_7B_KoenQwenDyan", + "id": "lkoenig/BBAI_7B_KoenQwenDyan", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5807 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5537 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_7B_Qwen2.5koen/552f3814-d071-4d00-a895-b739dffdcb2d.json b/data/hfopenllm_v2/lkoenig/BBAI_7B_Qwen2.5koen/552f3814-d071-4d00-a895-b739dffdcb2d.json new file mode 100644 index 000000000..b762f6c72 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_7B_Qwen2.5koen/552f3814-d071-4d00-a895-b739dffdcb2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_Qwen2.5koen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_7B_Qwen2.5koen", + "id": "lkoenig/BBAI_7B_Qwen2.5koen", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyanKoenLo/d3819133-bae8-493d-9a86-aee67da5d115.json b/data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyanKoenLo/d3819133-bae8-493d-9a86-aee67da5d115.json new file mode 100644 index 000000000..536de7261 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyanKoenLo/d3819133-bae8-493d-9a86-aee67da5d115.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyanKoenLo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_7B_QwenDyanKoenLo", + "id": "lkoenig/BBAI_7B_QwenDyanKoenLo", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4663 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5562 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4465 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyancabsLAW/5c3a022f-7221-4b4f-ab67-d5b69c558434.json b/data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyancabsLAW/5c3a022f-7221-4b4f-ab67-d5b69c558434.json new file mode 100644 index 000000000..9a62f605c --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_7B_QwenDyancabsLAW/5c3a022f-7221-4b4f-ab67-d5b69c558434.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyancabsLAW/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BBAI_7B_QwenDyancabsLAW", + "id": "lkoenig/BBAI_7B_QwenDyancabsLAW", + "developer": "lkoenig", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5579 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3678 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4471 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/c161b868-746f-4d88-9f41-eb8283a7b87a.json b/data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/c161b868-746f-4d88-9f41-eb8283a7b87a.json new file mode 100644 index 000000000..240e3a159 --- /dev/null +++ b/data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/c161b868-746f-4d88-9f41-eb8283a7b87a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-v0.3-7B-ORPO", + "id": "llmat/Mistral-v0.3-7B-ORPO", + "developer": "llmat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.377 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/f79a76fc-09ff-48c8-b0e7-5f18e0750e6d.json b/data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/f79a76fc-09ff-48c8-b0e7-5f18e0750e6d.json new file mode 100644 index 000000000..ffd455c9c --- /dev/null +++ b/data/hfopenllm_v2/llmat/Mistral-v0.3-7B-ORPO/f79a76fc-09ff-48c8-b0e7-5f18e0750e6d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-v0.3-7B-ORPO", + "id": "llmat/Mistral-v0.3-7B-ORPO", + "developer": "llmat", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4005 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2301 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json deleted file mode 100644 index 5ecfaad90..000000000 --- a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/1762652580.3253949", - "retrieved_timestamp": "1762652580.325396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33125329680802496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42329545804357255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29305186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/39f4d1ab-fd42-4746-b949-9666ce32f9d1.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/39f4d1ab-fd42-4746-b949-9666ce32f9d1.json new file mode 100644 index 000000000..cc9cfb3e3 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/39f4d1ab-fd42-4746-b949-9666ce32f9d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V5", + "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", + "developer": "llnYou", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3313 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3868 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2931 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/8348f316-9109-4229-9fee-edc02431befa.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/8348f316-9109-4229-9fee-edc02431befa.json new file mode 100644 index 000000000..48d4a3877 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/8348f316-9109-4229-9fee-edc02431befa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V6", + "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", + "developer": "llnYou", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.357 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3944 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json deleted file mode 100644 index 9565d0d92..000000000 --- a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/1762652580.325702", - "retrieved_timestamp": "1762652580.325703", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.357 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13876181864120535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3944027089700251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2349567819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/6b2346c6-5fbf-4195-b3bb-66bbd446ca53.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/6b2346c6-5fbf-4195-b3bb-66bbd446ca53.json new file mode 100644 index 000000000..914529969 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/6b2346c6-5fbf-4195-b3bb-66bbd446ca53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-3B-SLERP-V1", + "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", + "developer": "llnYou", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2346 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json deleted file mode 100644 index 1517d28cb..000000000 --- a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/1762652580.325917", - "retrieved_timestamp": "1762652580.325917", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23463299600615256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4018418465179459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3364479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2849900265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json deleted file mode 100644 index d250c850c..000000000 --- a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/1762652580.326129", - "retrieved_timestamp": "1762652580.326129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2309361383351729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39897709281426197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3587708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28997672872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/8645ffc1-6487-4205-b8b0-e980e094ac6c.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/8645ffc1-6487-4205-b8b0-e980e094ac6c.json new file mode 100644 index 000000000..dd572deaa --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/8645ffc1-6487-4205-b8b0-e980e094ac6c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-3B-SLERP-V2", + "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", + "developer": "llnYou", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2309 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3588 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.29 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json deleted file mode 100644 index b684bb6e7..000000000 --- a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/1762652580.326333", - "retrieved_timestamp": "1762652580.326334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35808100285021516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473121918055145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40433843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/2c6d1e57-7673-4a86-808e-6ff6a7146a11.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/2c6d1e57-7673-4a86-808e-6ff6a7146a11.json new file mode 100644 index 000000000..b5bc635f3 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/2c6d1e57-7673-4a86-808e-6ff6a7146a11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-3B-SLERP-V3", + "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", + "developer": "llnYou", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5473 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json b/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json deleted file mode 100644 index 46c6f7c4a..000000000 --- a/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lmsys_vicuna-13b-v1.3/1762652580.3265438", - "retrieved_timestamp": "1762652580.326545", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lmsys/vicuna-13b-v1.3", - "developer": "lmsys", - "inference_platform": "unknown", - "id": "lmsys/vicuna-13b-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343506340953115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3384399312777569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3727291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243184840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/64ab8b1a-62be-4561-8f0c-e42f1fe37178.json b/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/64ab8b1a-62be-4561-8f0c-e42f1fe37178.json new file mode 100644 index 000000000..51fcf6393 --- /dev/null +++ b/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/64ab8b1a-62be-4561-8f0c-e42f1fe37178.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lmsys_vicuna-13b-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicuna-13b-v1.3", + "id": "lmsys/vicuna-13b-v1.3", + "developer": "lmsys", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3344 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/3eb22885-eb7c-4c85-b79f-cd47ffacd551.json b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/3eb22885-eb7c-4c85-b79f-cd47ffacd551.json new file mode 100644 index 000000000..adbfd12b9 --- /dev/null +++ b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/3eb22885-eb7c-4c85-b79f-cd47ffacd551.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicuna-7b-v1.3", + "id": "lmsys/vicuna-7b-v1.3", + "developer": "lmsys", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1838 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json deleted file mode 100644 index 137720dac..000000000 --- a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.3/1762652580.326798", - "retrieved_timestamp": "1762652580.3267992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lmsys/vicuna-7b-v1.3", - "developer": "lmsys", - "inference_platform": "unknown", - "id": "lmsys/vicuna-7b-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29086158060612505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298410006592924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18375997340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json deleted file mode 100644 index a6b5160bb..000000000 --- a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.5/1762652580.327009", - "retrieved_timestamp": "1762652580.3270102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lmsys/vicuna-7b-v1.5", - "developer": "lmsys", - "inference_platform": "unknown", - "id": "lmsys/vicuna-7b-v1.5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23515716077784724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39470436842233775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21467752659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/8956d608-c627-469b-943d-bfad6c7382af.json b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/8956d608-c627-469b-943d-bfad6c7382af.json new file mode 100644 index 000000000..54b24d075 --- /dev/null +++ b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/8956d608-c627-469b-943d-bfad6c7382af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicuna-7b-v1.5", + "id": "lmsys/vicuna-7b-v1.5", + "developer": "lmsys", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3947 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json b/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json deleted file mode 100644 index fccdb803e..000000000 --- a/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/1762652580.327225", - "retrieved_timestamp": "1762652580.3272262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", - "developer": "lodrick-the-lafted", - "inference_platform": "unknown", - "id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3514618988727687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39069140261362917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36159375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1973902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/9ff060c8-d4fa-4880-a0cd-9581f5c2f574.json b/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/9ff060c8-d4fa-4880-a0cd-9581f5c2f574.json new file mode 100644 index 000000000..2ad864b95 --- /dev/null +++ b/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/9ff060c8-d4fa-4880-a0cd-9581f5c2f574.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3.1-8b-instruct-ortho-v7", + "id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", + "developer": "lodrick-the-lafted", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3907 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1974 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/e3d6b3d7-a231-40c1-bac9-0b7fcb478bca.json b/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/e3d6b3d7-a231-40c1-bac9-0b7fcb478bca.json new file mode 100644 index 000000000..eef3e165a --- /dev/null +++ b/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/e3d6b3d7-a231-40c1-bac9-0b7fcb478bca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lordjia_Llama-3-Cantonese-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Cantonese-8B-Instruct", + "id": "lordjia/Llama-3-Cantonese-8B-Instruct", + "developer": "lordjia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4814 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4046 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json b/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json deleted file mode 100644 index 80ebe35d7..000000000 --- a/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lordjia_Llama-3-Cantonese-8B-Instruct/1762652580.3274932", - "retrieved_timestamp": "1762652580.3274932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lordjia/Llama-3-Cantonese-8B-Instruct", - "developer": "lordjia", - "inference_platform": "unknown", - "id": "lordjia/Llama-3-Cantonese-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6669259786256023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4814148018954038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40460416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35147938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/20acb302-3a74-4425-af4c-a1d719b90a88.json b/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/20acb302-3a74-4425-af4c-a1d719b90a88.json new file mode 100644 index 000000000..95ef13bda --- /dev/null +++ b/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/20acb302-3a74-4425-af4c-a1d719b90a88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lordjia_Qwen2-Cantonese-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-Cantonese-7B-Instruct", + "id": "lordjia/Qwen2-Cantonese-7B-Instruct", + "developer": "lordjia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.256 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json b/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json deleted file mode 100644 index 662dceb8e..000000000 --- a/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lordjia_Qwen2-Cantonese-7B-Instruct/1762652580.3277462", - "retrieved_timestamp": "1762652580.3277462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lordjia/Qwen2-Cantonese-7B-Instruct", - "developer": "lordjia", - "inference_platform": "unknown", - "id": "lordjia/Qwen2-Cantonese-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435278394659503 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215311346221223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25604229607250756 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40038541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38430851063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json b/data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json deleted file mode 100644 index 1beeb6bd2..000000000 --- a/data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lt-asset_nova-1.3b/1762652580.3279538", - "retrieved_timestamp": "1762652580.327955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lt-asset/nova-1.3b", - "developer": "lt-asset", - "inference_platform": "unknown", - "id": "lt-asset/nova-1.3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "NovaForCausalLM", - "params_billions": 1.347 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1214255951985177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31700122104895806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/lt-asset/nova-1.3b/a8613588-687d-4291-ae5a-57688501cffd.json b/data/hfopenllm_v2/lt-asset/nova-1.3b/a8613588-687d-4291-ae5a-57688501cffd.json new file mode 100644 index 000000000..80085f690 --- /dev/null +++ b/data/hfopenllm_v2/lt-asset/nova-1.3b/a8613588-687d-4291-ae5a-57688501cffd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lt-asset_nova-1.3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nova-1.3b", + "id": "lt-asset/nova-1.3b", + "developer": "lt-asset", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NovaForCausalLM", + "params_billions": 1.347 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1214 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.317 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/83dd67cb-5508-4aa5-9435-d5585b7f3d52.json b/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/83dd67cb-5508-4aa5-9435-d5585b7f3d52.json new file mode 100644 index 000000000..00e74d58e --- /dev/null +++ b/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/83dd67cb-5508-4aa5-9435-d5585b7f3d52.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lunahr_thea-3b-50r-u1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thea-3b-50r-u1", + "id": "lunahr/thea-3b-50r-u1", + "developer": "lunahr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.603 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2808 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json b/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json deleted file mode 100644 index 1435f4218..000000000 --- a/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lunahr_thea-3b-50r-u1/1762652580.328209", - "retrieved_timestamp": "1762652580.328209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lunahr/thea-3b-50r-u1", - "developer": "lunahr", - "inference_platform": "unknown", - "id": "lunahr/thea-3b-50r-u1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6030288523340293 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41046731029294475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2808344414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json b/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json deleted file mode 100644 index dc4346f8b..000000000 --- a/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lunahr_thea-v2-3b-50r/1762652580.328458", - "retrieved_timestamp": "1762652580.328459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lunahr/thea-v2-3b-50r", - "developer": "lunahr", - "inference_platform": "unknown", - "id": "lunahr/thea-v2-3b-50r", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370396104558128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4194416192911743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2409408244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/26d981bb-f2e5-4195-8d6f-594bb0b26f4a.json b/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/26d981bb-f2e5-4195-8d6f-594bb0b26f4a.json new file mode 100644 index 000000000..bc6aaf04b --- /dev/null +++ b/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/26d981bb-f2e5-4195-8d6f-594bb0b26f4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/lunahr_thea-v2-3b-50r/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thea-v2-3b-50r", + "id": "lunahr/thea-v2-3b-50r", + "developer": "lunahr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2409 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/m42-health/Llama3-Med42-70B/df06c977-b54c-4668-837f-eb583ef24d29.json b/data/hfopenllm_v2/m42-health/Llama3-Med42-70B/df06c977-b54c-4668-837f-eb583ef24d29.json new file mode 100644 index 000000000..8b02603c5 --- /dev/null +++ b/data/hfopenllm_v2/m42-health/Llama3-Med42-70B/df06c977-b54c-4668-837f-eb583ef24d29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/m42-health_Llama3-Med42-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-Med42-70B", + "id": "m42-health/Llama3-Med42-70B", + "developer": "m42-health", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6291 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6688 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4629 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4963 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/macadeliccc/Samantha-Qwen-2-7B/31a8ac03-f58b-46e3-9f17-53311b1fd506.json b/data/hfopenllm_v2/macadeliccc/Samantha-Qwen-2-7B/31a8ac03-f58b-46e3-9f17-53311b1fd506.json new file mode 100644 index 000000000..c21abe404 --- /dev/null +++ b/data/hfopenllm_v2/macadeliccc/Samantha-Qwen-2-7B/31a8ac03-f58b-46e3-9f17-53311b1fd506.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/macadeliccc_Samantha-Qwen-2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Samantha-Qwen-2-7B", + "id": "macadeliccc/Samantha-Qwen-2-7B", + "developer": "macadeliccc", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4377 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5082 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4799 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/3e4a7141-7a82-421a-a107-bbac3cbafc9b.json b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/3e4a7141-7a82-421a-a107-bbac3cbafc9b.json new file mode 100644 index 000000000..552af16da --- /dev/null +++ b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/3e4a7141-7a82-421a-a107-bbac3cbafc9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magistrate-3.2-3b-base", + "id": "macadeliccc/magistrate-3.2-3b-base", + "developer": "macadeliccc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1159 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3976 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1689 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json deleted file mode 100644 index 0ee69042e..000000000 --- a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-base/1762652580.32929", - "retrieved_timestamp": "1762652580.329291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "macadeliccc/magistrate-3.2-3b-base", - "developer": "macadeliccc", - "inference_platform": "unknown", - "id": "macadeliccc/magistrate-3.2-3b-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1159301763764589 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3342701056047533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39759374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16888297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/9a3069f2-81ed-484a-b6e6-a45a259e9a43.json b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/9a3069f2-81ed-484a-b6e6-a45a259e9a43.json new file mode 100644 index 000000000..55e0d15e4 --- /dev/null +++ b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/9a3069f2-81ed-484a-b6e6-a45a259e9a43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-it/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magistrate-3.2-3b-it", + "id": "macadeliccc/magistrate-3.2-3b-it", + "developer": "macadeliccc", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2292 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3257 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3763 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1592 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json deleted file mode 100644 index 4e6d4ed8a..000000000 --- a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-it/1762652580.329552", - "retrieved_timestamp": "1762652580.329552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "macadeliccc/magistrate-3.2-3b-it", - "developer": "macadeliccc", - "inference_platform": "unknown", - "id": "macadeliccc/magistrate-3.2-3b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22918744486850445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3256506790327196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3763229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15924202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c0a3d0c3-c541-4606-a925-4100b062284f.json b/data/hfopenllm_v2/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c0a3d0c3-c541-4606-a925-4100b062284f.json new file mode 100644 index 000000000..0f570e9e4 --- /dev/null +++ b/data/hfopenllm_v2/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c0a3d0c3-c541-4606-a925-4100b062284f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi3_intent_v56_3_w_unknown_5_lr_0.002", + "id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", + "developer": "magnifi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2018 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/20685a4b-686f-4cd4-b49d-3067a005256d.json b/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/20685a4b-686f-4cd4-b49d-3067a005256d.json new file mode 100644 index 000000000..3a60298fd --- /dev/null +++ b/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/20685a4b-686f-4cd4-b49d-3067a005256d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_Awqward2.5-32B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Awqward2.5-32B-Instruct", + "id": "maldv/Awqward2.5-32B-Instruct", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8255 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6974 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json b/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json deleted file mode 100644 index 651ee10a8..000000000 --- a/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_Awqward2.5-32B-Instruct/1762652580.3302772", - "retrieved_timestamp": "1762652580.330278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/Awqward2.5-32B-Instruct", - "developer": "maldv", - "inference_platform": "unknown", - "id": "maldv/Awqward2.5-32B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8254697535871487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6974465506773041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6231117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42748958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5723071808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json b/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json deleted file mode 100644 index c6f2fa72c..000000000 --- a/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_Lytta2.5-32B-Instruct/1762652580.3306072", - "retrieved_timestamp": "1762652580.3306088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/Lytta2.5-32B-Instruct", - "developer": "maldv", - "inference_platform": "unknown", - "id": "maldv/Lytta2.5-32B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25079455843827714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559971089357847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048204787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/85a91293-cd51-4f79-8b98-2f4bc67d78c1.json b/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/85a91293-cd51-4f79-8b98-2f4bc67d78c1.json new file mode 100644 index 000000000..4697190aa --- /dev/null +++ b/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/85a91293-cd51-4f79-8b98-2f4bc67d78c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_Lytta2.5-32B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lytta2.5-32B-Instruct", + "id": "maldv/Lytta2.5-32B-Instruct", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.56 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/d2e3a6c2-4e67-4150-b9a8-fec979fb1658.json b/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/d2e3a6c2-4e67-4150-b9a8-fec979fb1658.json new file mode 100644 index 000000000..345ca800e --- /dev/null +++ b/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/d2e3a6c2-4e67-4150-b9a8-fec979fb1658.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_Qwentile2.5-32B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentile2.5-32B-Instruct", + "id": "maldv/Qwentile2.5-32B-Instruct", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6963 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5879 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json b/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json deleted file mode 100644 index 2b9d88fa8..000000000 --- a/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_Qwentile2.5-32B-Instruct/1762652580.3309162", - "retrieved_timestamp": "1762652580.3309171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/Qwentile2.5-32B-Instruct", - "developer": "maldv", - "inference_platform": "unknown", - "id": "maldv/Qwentile2.5-32B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393161256576994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6962837451098368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219033232628398 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4682291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5879321808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/maldv/badger-kappa-llama-3-8b/c4d686f2-2af1-4271-9556-09380f07ba5f.json b/data/hfopenllm_v2/maldv/badger-kappa-llama-3-8b/c4d686f2-2af1-4271-9556-09380f07ba5f.json new file mode 100644 index 000000000..65f56c77f --- /dev/null +++ b/data/hfopenllm_v2/maldv/badger-kappa-llama-3-8b/c4d686f2-2af1-4271-9556-09380f07ba5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_badger-kappa-llama-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "badger-kappa-llama-3-8b", + "id": "maldv/badger-kappa-llama-3-8b", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5085 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3695 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/badger-lambda-llama-3-8b/93167303-b38e-43f0-a552-72c26ccb4339.json b/data/hfopenllm_v2/maldv/badger-lambda-llama-3-8b/93167303-b38e-43f0-a552-72c26ccb4339.json new file mode 100644 index 000000000..84491b944 --- /dev/null +++ b/data/hfopenllm_v2/maldv/badger-lambda-llama-3-8b/93167303-b38e-43f0-a552-72c26ccb4339.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_badger-lambda-llama-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "badger-lambda-llama-3-8b", + "id": "maldv/badger-lambda-llama-3-8b", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4963 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/badger-mu-llama-3-8b/b52a176f-f369-4791-a7e3-88a72709c868.json b/data/hfopenllm_v2/maldv/badger-mu-llama-3-8b/b52a176f-f369-4791-a7e3-88a72709c868.json new file mode 100644 index 000000000..006838490 --- /dev/null +++ b/data/hfopenllm_v2/maldv/badger-mu-llama-3-8b/b52a176f-f369-4791-a7e3-88a72709c868.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_badger-mu-llama-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "badger-mu-llama-3-8b", + "id": "maldv/badger-mu-llama-3-8b", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4919 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/badger-writer-llama-3-8b/b6310012-17f1-4ee0-abd0-0079a9299350.json b/data/hfopenllm_v2/maldv/badger-writer-llama-3-8b/b6310012-17f1-4ee0-abd0-0079a9299350.json new file mode 100644 index 000000000..08b482ae6 --- /dev/null +++ b/data/hfopenllm_v2/maldv/badger-writer-llama-3-8b/b6310012-17f1-4ee0-abd0-0079a9299350.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maldv_badger-writer-llama-3-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "badger-writer-llama-3-8b", + "id": "maldv/badger-writer-llama-3-8b", + "developer": "maldv", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5303 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json deleted file mode 100644 index cf24bfab7..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-1/1762652580.332221", - "retrieved_timestamp": "1762652580.332222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Cheng-1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/Cheng-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7788833628106757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524677845280024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-1/f581e832-0f77-496e-bcd3-6cfec51ef594.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-1/f581e832-0f77-496e-bcd3-6cfec51ef594.json new file mode 100644 index 000000000..db31c8ed3 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Cheng-1/f581e832-0f77-496e-bcd3-6cfec51ef594.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cheng-1", + "id": "marcuscedricridia/Cheng-1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7789 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5525 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/47b47c89-b13b-4099-98b2-854feae05f63.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/47b47c89-b13b-4099-98b2-854feae05f63.json new file mode 100644 index 000000000..a97a44973 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/47b47c89-b13b-4099-98b2-854feae05f63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cheng-2-v1.1", + "id": "marcuscedricridia/Cheng-2-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.827 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.651 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4167 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json deleted file mode 100644 index fa087c3bc..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2-v1.1/1762652580.332704", - "retrieved_timestamp": "1762652580.332705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Cheng-2-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/Cheng-2-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8269934883885868 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6510142192324059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41672916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076462765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-2/8d51ae58-7b20-4fa4-b234-2abb9cdeaad4.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-2/8d51ae58-7b20-4fa4-b234-2abb9cdeaad4.json new file mode 100644 index 000000000..d2b1be907 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Cheng-2/8d51ae58-7b20-4fa4-b234-2abb9cdeaad4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cheng-2", + "id": "marcuscedricridia/Cheng-2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8337 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6499 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4193 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json deleted file mode 100644 index d9f1516e6..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2/1762652580.332486", - "retrieved_timestamp": "1762652580.3324869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/Cheng-2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/Cheng-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8337378156624423 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6498988582965893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5438066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013297872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/4d4d5679-8ec6-49b8-a5d7-2a76497b44b7.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/4d4d5679-8ec6-49b8-a5d7-2a76497b44b7.json new file mode 100644 index 000000000..2a3ec93a3 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/4d4d5679-8ec6-49b8-a5d7-2a76497b44b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-MST-v1.1", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4653 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/0bdb6574-69e2-4858-b7aa-a90a5fadf741.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/0bdb6574-69e2-4858-b7aa-a90a5fadf741.json new file mode 100644 index 000000000..5208a426c --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/0bdb6574-69e2-4858-b7aa-a90a5fadf741.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-MST-v1.3", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4758 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.444 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST/fa1a92bb-ad25-4be2-a35f-7fdebbeeeba8.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST/fa1a92bb-ad25-4be2-a35f-7fdebbeeeba8.json new file mode 100644 index 000000000..637b7439c --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-MST/fa1a92bb-ad25-4be2-a35f-7fdebbeeeba8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-MST", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5458 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-Preview/d62ea0a1-cc9d-41b7-8d60-479b8e2262b5.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-Preview/d62ea0a1-cc9d-41b7-8d60-479b8e2262b5.json new file mode 100644 index 000000000..af57fa32c --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-Preview/d62ea0a1-cc9d-41b7-8d60-479b8e2262b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-Preview", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/912446e3-efdf-4ed0-80bd-261c6c87a3d0.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/912446e3-efdf-4ed0-80bd-261c6c87a3d0.json new file mode 100644 index 000000000..50261cfbe --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/912446e3-efdf-4ed0-80bd-261c6c87a3d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-RP-v1.4-1M", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7728 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/5e86dc31-ae3e-4ef7-858e-41e29b3a8031.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/5e86dc31-ae3e-4ef7-858e-41e29b3a8031.json new file mode 100644 index 000000000..91ec9fbad --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/5e86dc31-ae3e-4ef7-858e-41e29b3a8031.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-v1.1", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4227 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/80680e5e-ab83-4a59-aeec-9d4166509c47.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/80680e5e-ab83-4a59-aeec-9d4166509c47.json new file mode 100644 index 000000000..db41151dd --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/80680e5e-ab83-4a59-aeec-9d4166509c47.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-v1.2", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7865 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5403 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4403 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/c5bc9c92-8469-4174-aafd-67bb61aaccf2.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/c5bc9c92-8469-4174-aafd-67bb61aaccf2.json new file mode 100644 index 000000000..b018cf076 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/c5bc9c92-8469-4174-aafd-67bb61aaccf2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-v1.3", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3323 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4246 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/1d67b792-178b-4baa-a108-2362f658bd4e.json b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/1d67b792-178b-4baa-a108-2362f658bd4e.json new file mode 100644 index 000000000..142cdd945 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/1d67b792-178b-4baa-a108-2362f658bd4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hush-Qwen2.5-7B-v1.4", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4195 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Qwen2.5-7B-Preview/eb0c87b0-4795-4029-82c1-57ce37ba8259.json b/data/hfopenllm_v2/marcuscedricridia/Qwen2.5-7B-Preview/eb0c87b0-4795-4029-82c1-57ce37ba8259.json new file mode 100644 index 000000000..9e2e24bb9 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Qwen2.5-7B-Preview/eb0c87b0-4795-4029-82c1-57ce37ba8259.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Qwen2.5-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Preview", + "id": "marcuscedricridia/Qwen2.5-7B-Preview", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/dc9b2300-7ab0-4e92-9d23-15fe9ca52994.json b/data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/dc9b2300-7ab0-4e92-9d23-15fe9ca52994.json new file mode 100644 index 000000000..f754c4b96 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/dc9b2300-7ab0-4e92-9d23-15fe9ca52994.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yell-Qwen2.5-7B-Preview-v1.1", + "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5348 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1896 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4059 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview/e005624d-c822-4be1-9477-873642aae228.json b/data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview/e005624d-c822-4be1-9477-873642aae228.json new file mode 100644 index 000000000..0967ce44f --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Yell-Qwen2.5-7B-Preview/e005624d-c822-4be1-9477-873642aae228.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yell-Qwen2.5-7B-Preview", + "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5839 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1926 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4046 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json b/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json deleted file mode 100644 index 1d67ab32a..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_absolute-o1-7b/1762652580.335638", - "retrieved_timestamp": "1762652580.335639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/absolute-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/absolute-o1-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7515558717536137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469413884153854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/e9756d91-b9e2-4dd0-bf08-c6154c7d1f2e.json b/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/e9756d91-b9e2-4dd0-bf08-c6154c7d1f2e.json new file mode 100644 index 000000000..6b56c9588 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/e9756d91-b9e2-4dd0-bf08-c6154c7d1f2e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_absolute-o1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "absolute-o1-7b", + "id": "marcuscedricridia/absolute-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7516 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5469 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json deleted file mode 100644 index 5944869b7..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-2-28-2025/1762652580.3360791", - "retrieved_timestamp": "1762652580.3360798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b-2-28-2025", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b-2-28-2025", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7467098409996586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538413713363387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42733333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4365026595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/704598c3-c5d6-4ce0-bab3-0fa98118e16a.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/704598c3-c5d6-4ce0-bab3-0fa98118e16a.json new file mode 100644 index 000000000..f7db34598 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/704598c3-c5d6-4ce0-bab3-0fa98118e16a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-2-28-2025/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cursa-o1-7b-2-28-2025", + "id": "marcuscedricridia/cursa-o1-7b-2-28-2025", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4811 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4365 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json deleted file mode 100644 index 966234ad1..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.1/1762652580.336299", - "retrieved_timestamp": "1762652580.3363001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5492557305346194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43916223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/fafc9463-d725-4827-8bc1-5cd9e83814b6.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/fafc9463-d725-4827-8bc1-5cd9e83814b6.json new file mode 100644 index 000000000..542f287db --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/fafc9463-d725-4827-8bc1-5cd9e83814b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cursa-o1-7b-v1.1", + "id": "marcuscedricridia/cursa-o1-7b-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4985 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/109820e0-ee00-449c-9ae5-58a7bf1da5f8.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/109820e0-ee00-449c-9ae5-58a7bf1da5f8.json new file mode 100644 index 000000000..37f93dd1f --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/109820e0-ee00-449c-9ae5-58a7bf1da5f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cursa-o1-7b-v1.2-normalize-false", + "id": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json deleted file mode 100644 index d9be8bc3d..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/1762652580.3365178", - "retrieved_timestamp": "1762652580.3365178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7615726272955757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5492349810703803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4435671542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json deleted file mode 100644 index 36e4c452c..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b/1762652580.335863", - "retrieved_timestamp": "1762652580.335863", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7628215357473725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465860023973769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4300625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4392453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/37f29d5b-d803-4195-9ce0-75e45e32c160.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/37f29d5b-d803-4195-9ce0-75e45e32c160.json new file mode 100644 index 000000000..ad5a42f1a --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/37f29d5b-d803-4195-9ce0-75e45e32c160.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cursa-o1-7b", + "id": "marcuscedricridia/cursa-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7628 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4301 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/43546f48-8c46-4481-b1e5-f4b1ad2535be.json b/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/43546f48-8c46-4481-b1e5-f4b1ad2535be.json new file mode 100644 index 000000000..69bdd676a --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/43546f48-8c46-4481-b1e5-f4b1ad2535be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursor-o1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cursor-o1-7b", + "id": "marcuscedricridia/cursor-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3251 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json b/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json deleted file mode 100644 index 0e9e760b9..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursor-o1-7b/1762652580.3367229", - "retrieved_timestamp": "1762652580.336724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/cursor-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursor-o1-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4106880853912065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007453242508472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41009375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32513297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json b/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json deleted file mode 100644 index 77a11099c..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursorr-o1.2-7b/1762652580.336929", - "retrieved_timestamp": "1762652580.336929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/cursorr-o1.2-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursorr-o1.2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1659895743294459 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068134113454804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10804521276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/ec81e0ff-9cb4-4d43-9f78-1d5f4edc9103.json b/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/ec81e0ff-9cb4-4d43-9f78-1d5f4edc9103.json new file mode 100644 index 000000000..ebc368fd4 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/ec81e0ff-9cb4-4d43-9f78-1d5f4edc9103.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursorr-o1.2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cursorr-o1.2-7b", + "id": "marcuscedricridia/cursorr-o1.2-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.166 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json deleted file mode 100644 index 54d30ea84..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.1/1762652580.337136", - "retrieved_timestamp": "1762652580.337137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-explicit-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-explicit-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28803906966847964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31316553135589525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11951462765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/9290c86f-40b0-4520-b8aa-3460de62c396.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/9290c86f-40b0-4520-b8aa-3460de62c396.json new file mode 100644 index 000000000..3d23d32e5 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/9290c86f-40b0-4520-b8aa-3460de62c396.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "etr1o-explicit-v1.1", + "id": "marcuscedricridia/etr1o-explicit-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.288 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1195 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json deleted file mode 100644 index d290dd40a..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.2/1762652580.337388", - "retrieved_timestamp": "1762652580.337389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-explicit-v1.2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-explicit-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1504020443534267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29497368605886115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40311458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/a4bf576e-9556-4956-8dcb-4d8906d45db0.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/a4bf576e-9556-4956-8dcb-4d8906d45db0.json new file mode 100644 index 000000000..22ae724ac --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/a4bf576e-9556-4956-8dcb-4d8906d45db0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "etr1o-explicit-v1.2", + "id": "marcuscedricridia/etr1o-explicit-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1504 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.295 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/320a5c00-3307-4bc3-9f47-9befb88e461c.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/320a5c00-3307-4bc3-9f47-9befb88e461c.json new file mode 100644 index 000000000..a0ab10b5b --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/320a5c00-3307-4bc3-9f47-9befb88e461c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "etr1o-v1.1", + "id": "marcuscedricridia/etr1o-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json deleted file mode 100644 index eae191525..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.1/1762652580.3376079", - "retrieved_timestamp": "1762652580.337609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15971954414287426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31003625778742805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json deleted file mode 100644 index e3f05edd5..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.2/1762652580.337824", - "retrieved_timestamp": "1762652580.337825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-v1.2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7286998497320443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6349035922791185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35876132930513593 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4714479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5315824468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/844d1556-6bc6-467e-a145-f92646770727.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/844d1556-6bc6-467e-a145-f92646770727.json new file mode 100644 index 000000000..c2db8d268 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/844d1556-6bc6-467e-a145-f92646770727.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "etr1o-v1.2", + "id": "marcuscedricridia/etr1o-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7287 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6349 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3588 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/78923f4b-c2e7-4472-8398-10a0a8453ec5.json b/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/78923f4b-c2e7-4472-8398-10a0a8453ec5.json new file mode 100644 index 000000000..63b85c22d --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/78923f4b-c2e7-4472-8398-10a0a8453ec5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_fan-o1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fan-o1-7b", + "id": "marcuscedricridia/fan-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4849 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3274 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json b/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json deleted file mode 100644 index 9bda3591c..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_fan-o1-7b/1762652580.338023", - "retrieved_timestamp": "1762652580.338024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/fan-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/fan-o1-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4455588948434598 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4849058892394324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3273769946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-7b/17abe1bf-2e97-409e-88e3-4f661861a195.json b/data/hfopenllm_v2/marcuscedricridia/olmner-7b/17abe1bf-2e97-409e-88e3-4f661861a195.json new file mode 100644 index 000000000..116baa15b --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-7b/17abe1bf-2e97-409e-88e3-4f661861a195.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "olmner-7b", + "id": "marcuscedricridia/olmner-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7254 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5472 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.438 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4309 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json b/data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json deleted file mode 100644 index b29f528bd..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-7b/1762652580.338225", - "retrieved_timestamp": "1762652580.338225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/olmner-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7253775537795273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5471591805569388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json b/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json deleted file mode 100644 index 3ccbd009a..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-della-7b/1762652580.338445", - "retrieved_timestamp": "1762652580.3384461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/olmner-della-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-della-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7636958824807067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491231851969524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43858045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/756978e5-1dfe-433e-ba88-339004a50ea7.json b/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/756978e5-1dfe-433e-ba88-339004a50ea7.json new file mode 100644 index 000000000..41cca1ed9 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/756978e5-1dfe-433e-ba88-339004a50ea7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-della-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "olmner-della-7b", + "id": "marcuscedricridia/olmner-della-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7637 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4208 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/a889ae3a-5d86-4454-bfb9-332c4b61b836.json b/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/a889ae3a-5d86-4454-bfb9-332c4b61b836.json new file mode 100644 index 000000000..963ff040a --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/a889ae3a-5d86-4454-bfb9-332c4b61b836.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-o1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "olmner-o1-7b", + "id": "marcuscedricridia/olmner-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json b/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json deleted file mode 100644 index 7181dc4d9..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-o1-7b/1762652580.338658", - "retrieved_timestamp": "1762652580.338659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/olmner-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-o1-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480873056178129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42990625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43858045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/2c5e1086-03b7-4cdd-801e-03fb26183076.json b/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/2c5e1086-03b7-4cdd-801e-03fb26183076.json new file mode 100644 index 000000000..634719136 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/2c5e1086-03b7-4cdd-801e-03fb26183076.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-sbr-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "olmner-sbr-7b", + "id": "marcuscedricridia/olmner-sbr-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.76 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4947 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json b/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json deleted file mode 100644 index 1e05c66c0..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-sbr-7b/1762652580.338864", - "retrieved_timestamp": "1762652580.3388648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/olmner-sbr-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-sbr-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7600488924941378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5461642048146724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947129909365559 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4153645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412400265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json b/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json deleted file mode 100644 index 563ae6c8e..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_post-cursa-o1/1762652580.3390641", - "retrieved_timestamp": "1762652580.339065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/post-cursa-o1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/post-cursa-o1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7628215357473725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5479692437233474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871601208459215 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43514583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4360871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/d9578847-b732-4c75-b246-9cdf03674fe0.json b/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/d9578847-b732-4c75-b246-9cdf03674fe0.json new file mode 100644 index 000000000..2d244c450 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/d9578847-b732-4c75-b246-9cdf03674fe0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_post-cursa-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "post-cursa-o1", + "id": "marcuscedricridia/post-cursa-o1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7628 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4872 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/4c6f83fe-7896-4cf3-9434-b5f8d499f5ba.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/4c6f83fe-7896-4cf3-9434-b5f8d499f5ba.json new file mode 100644 index 000000000..57d91dea9 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/4c6f83fe-7896-4cf3-9434-b5f8d499f5ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pre-cursa-o1-v1.2", + "id": "marcuscedricridia/pre-cursa-o1-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json deleted file mode 100644 index 089e170c2..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.2/1762652580.339467", - "retrieved_timestamp": "1762652580.339468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548781677061308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5486788313377599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4402426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/619037af-d528-4579-b7e3-58628468d8fb.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/619037af-d528-4579-b7e3-58628468d8fb.json new file mode 100644 index 000000000..ba339407b --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/619037af-d528-4579-b7e3-58628468d8fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pre-cursa-o1-v1.3", + "id": "marcuscedricridia/pre-cursa-o1-v1.3", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5455 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4271 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.442 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json deleted file mode 100644 index 9db82ce43..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.3/1762652580.339683", - "retrieved_timestamp": "1762652580.339684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.3", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7506815250202795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5454519705653261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075528700906344 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419880319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json deleted file mode 100644 index 80bc2daf7..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.4/1762652580.3398788", - "retrieved_timestamp": "1762652580.33988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.4", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748783228500379 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5493014138981462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48338368580060426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42851041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4435671542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/5113b737-8d9f-4321-9a67-91f1aabb40a1.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/5113b737-8d9f-4321-9a67-91f1aabb40a1.json new file mode 100644 index 000000000..4cb1c2ce4 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/5113b737-8d9f-4321-9a67-91f1aabb40a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pre-cursa-o1-v1.4", + "id": "marcuscedricridia/pre-cursa-o1-v1.4", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4285 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json deleted file mode 100644 index b2cb168cf..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.6/1762652580.340074", - "retrieved_timestamp": "1762652580.340075", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.6", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473342320067097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4233645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/641ac372-2e5a-4b44-b22e-a17600a6a868.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/641ac372-2e5a-4b44-b22e-a17600a6a868.json new file mode 100644 index 000000000..f3a0dee4d --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/641ac372-2e5a-4b44-b22e-a17600a6a868.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pre-cursa-o1-v1.6", + "id": "marcuscedricridia/pre-cursa-o1-v1.6", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5473 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4234 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json deleted file mode 100644 index 3583c9ee4..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1/1762652580.3392608", - "retrieved_timestamp": "1762652580.339262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.740889728143548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5461688442794247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5037764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42596875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424035904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/7cbb0b08-871d-48fc-bf3e-86267f5ef19d.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/7cbb0b08-871d-48fc-bf3e-86267f5ef19d.json new file mode 100644 index 000000000..2cf7d9f19 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/7cbb0b08-871d-48fc-bf3e-86267f5ef19d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pre-cursa-o1", + "id": "marcuscedricridia/pre-cursa-o1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json b/data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json deleted file mode 100644 index f76d66179..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_r1o-et/1762652580.340277", - "retrieved_timestamp": "1762652580.340277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/r1o-et", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/r1o-et", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596800932636516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42092007019831174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2579787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/r1o-et/c82e887c-c8ab-4221-aa0b-e8b7a86e7c46.json b/data/hfopenllm_v2/marcuscedricridia/r1o-et/c82e887c-c8ab-4221-aa0b-e8b7a86e7c46.json new file mode 100644 index 000000000..57d5458d0 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/r1o-et/c82e887c-c8ab-4221-aa0b-e8b7a86e7c46.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_r1o-et/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "r1o-et", + "id": "marcuscedricridia/r1o-et", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3597 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4209 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0793 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json b/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json deleted file mode 100644 index 09403a9c8..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_sbr-o1-7b/1762652580.340477", - "retrieved_timestamp": "1762652580.340478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/sbr-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/sbr-o1-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7454609325478618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5478826565229475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4404166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43550531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/50c65a83-9d08-4155-ad2c-5a2f8ffc8743.json b/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/50c65a83-9d08-4155-ad2c-5a2f8ffc8743.json new file mode 100644 index 000000000..dab48a238 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/50c65a83-9d08-4155-ad2c-5a2f8ffc8743.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_sbr-o1-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sbr-o1-7b", + "id": "marcuscedricridia/sbr-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5479 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4985 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4404 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/99d97aef-bb6b-471b-8ed7-f6f92f75842c.json b/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/99d97aef-bb6b-471b-8ed7-f6f92f75842c.json new file mode 100644 index 000000000..b8ff455f6 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/99d97aef-bb6b-471b-8ed7-f6f92f75842c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_stray-r1o-et/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stray-r1o-et", + "id": "marcuscedricridia/stray-r1o-et", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2967 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json b/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json deleted file mode 100644 index c81631690..000000000 --- a/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_stray-r1o-et/1762652580.340682", - "retrieved_timestamp": "1762652580.340683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "marcuscedricridia/stray-r1o-et", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/stray-r1o-et", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15622215720953736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2967459956151434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json deleted file mode 100644 index 970b8eaa0..000000000 --- a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/1762652580.340896", - "retrieved_timestamp": "1762652580.340897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/b98504a0-f1d6-4872-b748-2ca8199c5328.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/b98504a0-f1d6-4872-b748-2ca8199c5328.json new file mode 100644 index 000000000..524853f69 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/b98504a0-f1d6-4872-b748-2ca8199c5328.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/5a159667-7460-4a97-884e-6a96df59873b.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/5a159667-7460-4a97-884e-6a96df59873b.json new file mode 100644 index 000000000..9c8242ed9 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/5a159667-7460-4a97-884e-6a96df59873b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json deleted file mode 100644 index 7213db15f..000000000 --- a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/1762652580.3411388", - "retrieved_timestamp": "1762652580.34114", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/16a2eceb-073d-4dc3-87a7-a15c641c5ebb.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/16a2eceb-073d-4dc3-87a7-a15c641c5ebb.json new file mode 100644 index 000000000..4054a3100 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/16a2eceb-073d-4dc3-87a7-a15c641c5ebb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3239 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json deleted file mode 100644 index 2fc9fe856..000000000 --- a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/1762652580.341354", - "retrieved_timestamp": "1762652580.341354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json deleted file mode 100644 index c7cec62a1..000000000 --- a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/1762652580.341564", - "retrieved_timestamp": "1762652580.341565", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18824607596732226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32327887380902803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17204122340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/e8e2d04b-21db-43dc-8b8f-7fa3bba87abc.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/e8e2d04b-21db-43dc-8b8f-7fa3bba87abc.json new file mode 100644 index 000000000..a118652bb --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/e8e2d04b-21db-43dc-8b8f-7fa3bba87abc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1882 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3233 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json deleted file mode 100644 index 324d6685f..000000000 --- a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652580.3417778", - "retrieved_timestamp": "1762652580.341779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 0.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16521496296493304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30237295164613204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42730208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/acbb93b3-f8fc-479d-9610-392efd7d4ecc.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/acbb93b3-f8fc-479d-9610-392efd7d4ecc.json new file mode 100644 index 000000000..e00a6160a --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/acbb93b3-f8fc-479d-9610-392efd7d4ecc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mattshumer/Reflection-Llama-3.1-70B/6d0589bd-1f05-44ee-afa5-3657b960d7c9.json b/data/hfopenllm_v2/mattshumer/Reflection-Llama-3.1-70B/6d0589bd-1f05-44ee-afa5-3657b960d7c9.json new file mode 100644 index 000000000..e8d1ce9f3 --- /dev/null +++ b/data/hfopenllm_v2/mattshumer/Reflection-Llama-3.1-70B/6d0589bd-1f05-44ee-afa5-3657b960d7c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mattshumer_Reflection-Llama-3.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Reflection-Llama-3.1-70B", + "id": "mattshumer/Reflection-Llama-3.1-70B", + "developer": "mattshumer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.645 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4577 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mattshumer/ref_70_e3/134663d8-05a8-4336-90e2-68e7cba5f1df.json b/data/hfopenllm_v2/mattshumer/ref_70_e3/134663d8-05a8-4336-90e2-68e7cba5f1df.json new file mode 100644 index 000000000..d4167a969 --- /dev/null +++ b/data/hfopenllm_v2/mattshumer/ref_70_e3/134663d8-05a8-4336-90e2-68e7cba5f1df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mattshumer_ref_70_e3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ref_70_e3", + "id": "mattshumer/ref_70_e3", + "developer": "mattshumer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6294 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6501 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json b/data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json deleted file mode 100644 index 6593c0097..000000000 --- a/data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mattshumer_ref_70_e3/1762652580.342239", - "retrieved_timestamp": "1762652580.34224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mattshumer/ref_70_e3", - "developer": "mattshumer", - "inference_platform": "unknown", - "id": "mattshumer/ref_70_e3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6294321289733462 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6500839481104265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4327604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302526595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/maywell/Qwen2-7B-Multilingual-RP/3bfced28-b06e-46ab-a6aa-171b0c424337.json b/data/hfopenllm_v2/maywell/Qwen2-7B-Multilingual-RP/3bfced28-b06e-46ab-a6aa-171b0c424337.json new file mode 100644 index 000000000..4a43aa1ea --- /dev/null +++ b/data/hfopenllm_v2/maywell/Qwen2-7B-Multilingual-RP/3bfced28-b06e-46ab-a6aa-171b0c424337.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/maywell_Qwen2-7B-Multilingual-RP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B-Multilingual-RP", + "id": "maywell/Qwen2-7B-Multilingual-RP", + "developer": "maywell", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5062 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.1-MedIT-SUN-8B/b6a83b82-6b05-4437-a076-e2a3982f6169.json b/data/hfopenllm_v2/meditsolutions/Llama-3.1-MedIT-SUN-8B/b6a83b82-6b05-4437-a076-e2a3982f6169.json new file mode 100644 index 000000000..a6245e049 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.1-MedIT-SUN-8B/b6a83b82-6b05-4437-a076-e2a3982f6169.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.1-MedIT-SUN-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-MedIT-SUN-8B", + "id": "meditsolutions/Llama-3.1-MedIT-SUN-8B", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7837 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2092 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4056 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json deleted file mode 100644 index 2658cd4a4..000000000 --- a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-Instruct/1762652580.343025", - "retrieved_timestamp": "1762652580.343026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-1B-Instruct", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaMedITForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6412973133507981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34738999022447486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35136458333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17810837765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f621201b-f571-4487-9f1e-b767675c659d.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f621201b-f571-4487-9f1e-b767675c659d.json new file mode 100644 index 000000000..c6b47ea7c --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f621201b-f571-4487-9f1e-b767675c659d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-1B-Instruct", + "id": "meditsolutions/Llama-3.2-SUN-1B-Instruct", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaMedITForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3474 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1781 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/710fdb79-fba4-42da-8e26-45b4caf75207.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/710fdb79-fba4-42da-8e26-45b4caf75207.json new file mode 100644 index 000000000..0f33814a2 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/710fdb79-fba4-42da-8e26-45b4caf75207.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-1B-chat", + "id": "meditsolutions/Llama-3.2-SUN-1B-chat", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1838 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json deleted file mode 100644 index 86a472474..000000000 --- a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-chat/1762652580.343276", - "retrieved_timestamp": "1762652580.343277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-1B-chat", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-1B-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5481743994822625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35144575516411386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18375997340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/35fa7a5e-8866-4ce3-9899-8737e908f34f.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/35fa7a5e-8866-4ce3-9899-8737e908f34f.json new file mode 100644 index 000000000..b58b7f561 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/35fa7a5e-8866-4ce3-9899-8737e908f34f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-2.4B-checkpoint-26000", + "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.209 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3018 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4103 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/2b24b69b-15dc-4666-83f3-c77db545bdbd.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/2b24b69b-15dc-4666-83f3-c77db545bdbd.json new file mode 100644 index 000000000..3c289add0 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/2b24b69b-15dc-4666-83f3-c77db545bdbd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-2.4B-checkpoint-34800", + "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.209 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2501 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3161 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/0d00d849-2147-4fc1-9e5f-d42a95be6ca5.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/0d00d849-2147-4fc1-9e5f-d42a95be6ca5.json new file mode 100644 index 000000000..92bc18771 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/0d00d849-2147-4fc1-9e5f-d42a95be6ca5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-2.4B-v1.0.0", + "id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.472 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5637 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json deleted file mode 100644 index 01c800742..000000000 --- a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.5B-chat/1762652580.344106", - "retrieved_timestamp": "1762652580.344107", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.5B-chat", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.5B-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.472 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.560414145578177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3574734302161124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1813497340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/f45135b0-3c26-44b5-9922-a6c0817a172d.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/f45135b0-3c26-44b5-9922-a6c0817a172d.json new file mode 100644 index 000000000..03c4a4de9 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/f45135b0-3c26-44b5-9922-a6c0817a172d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.5B-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-2.5B-chat", + "id": "meditsolutions/Llama-3.2-SUN-2.5B-chat", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.472 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3155 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/67eb0d6c-9086-4c80-8506-c3e1489f2673.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/67eb0d6c-9086-4c80-8506-c3e1489f2673.json new file mode 100644 index 000000000..9ccd525a8 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/67eb0d6c-9086-4c80-8506-c3e1489f2673.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-SUN-HDIC-1B-Instruct", + "id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6827 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1687 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json deleted file mode 100644 index 606c88e40..000000000 --- a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/1762652580.344357", - "retrieved_timestamp": "1762652580.344363", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6826631116548536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3507731670753292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3593645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16871675531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/79d3dc85-08f6-475c-ac2c-1ff32f5a089f.json b/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/79d3dc85-08f6-475c-ac2c-1ff32f5a089f.json new file mode 100644 index 000000000..47159802e --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/79d3dc85-08f6-475c-ac2c-1ff32f5a089f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", + "id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.646 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3655 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.219 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json b/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json deleted file mode 100644 index a46862178..000000000 --- a/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/1762652580.344661", - "retrieved_timestamp": "1762652580.344662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.646 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36550020611976225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034845834509661 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42534374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21899933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/4e9b3fa2-d3d2-4e4c-a1fa-c812f481f64a.json b/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/4e9b3fa2-d3d2-4e4c-a1fa-c812f481f64a.json new file mode 100644 index 000000000..555723348 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/4e9b3fa2-d3d2-4e4c-a1fa-c812f481f64a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", + "id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4385 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.35 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json b/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json deleted file mode 100644 index 1e072e950..000000000 --- a/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/1762652580.344883", - "retrieved_timestamp": "1762652580.344884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5814217387642566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5671722290858499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43845833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3499833776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/6e62a8a0-0bdf-4b6c-93de-593423dadd3a.json b/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/6e62a8a0-0bdf-4b6c-93de-593423dadd3a.json new file mode 100644 index 000000000..af35e21a0 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/6e62a8a0-0bdf-4b6c-93de-593423dadd3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_MedIT-Mesh-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MedIT-Mesh-3B-Instruct", + "id": "meditsolutions/MedIT-Mesh-3B-Instruct", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5814 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5576 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4012 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json b/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json deleted file mode 100644 index c1b2446f7..000000000 --- a/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_MedIT-Mesh-3B-Instruct/1762652580.345099", - "retrieved_timestamp": "1762652580.345099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/MedIT-Mesh-3B-Instruct", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/MedIT-Mesh-3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5814217387642566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5575523356865378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011801861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/871131c1-295d-40a0-a396-09d24b880064.json b/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/871131c1-295d-40a0-a396-09d24b880064.json new file mode 100644 index 000000000..870bf4c0c --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/871131c1-295d-40a0-a396-09d24b880064.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meditsolutions_SmolLM2-MedIT-Upscale-2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-MedIT-Upscale-2B", + "id": "meditsolutions/SmolLM2-MedIT-Upscale-2B", + "developer": "meditsolutions", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.114 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6429 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json b/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json deleted file mode 100644 index 7673c4804..000000000 --- a/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_SmolLM2-MedIT-Upscale-2B/1762652580.3453178", - "retrieved_timestamp": "1762652580.3453188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/SmolLM2-MedIT-Upscale-2B", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/SmolLM2-MedIT-Upscale-2B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.114 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6429207835210575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551122445928012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33136458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19705784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meetkai/functionary-small-v3.1/44eefbb2-22d4-4dff-889d-a87fc40b2eea.json b/data/hfopenllm_v2/meetkai/functionary-small-v3.1/44eefbb2-22d4-4dff-889d-a87fc40b2eea.json new file mode 100644 index 000000000..f08479499 --- /dev/null +++ b/data/hfopenllm_v2/meetkai/functionary-small-v3.1/44eefbb2-22d4-4dff-889d-a87fc40b2eea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meetkai_functionary-small-v3.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "functionary-small-v3.1", + "id": "meetkai/functionary-small-v3.1", + "developer": "meetkai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4982 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1571 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json b/data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json deleted file mode 100644 index a043f62c2..000000000 --- a/data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meetkai_functionary-small-v3.1/1762652580.345532", - "retrieved_timestamp": "1762652580.345533", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meetkai/functionary-small-v3.1", - "developer": "meetkai", - "inference_platform": "unknown", - "id": "meetkai/functionary-small-v3.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6274584768414474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4981781042779377 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33485704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meraGPT/mera-mix-4x7B/cd1de470-a174-4c08-9efe-a06d493dc4b2.json b/data/hfopenllm_v2/meraGPT/mera-mix-4x7B/cd1de470-a174-4c08-9efe-a06d493dc4b2.json new file mode 100644 index 000000000..b69c8564f --- /dev/null +++ b/data/hfopenllm_v2/meraGPT/mera-mix-4x7B/cd1de470-a174-4c08-9efe-a06d493dc4b2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meraGPT_mera-mix-4x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mera-mix-4x7B", + "id": "meraGPT/mera-mix-4x7B", + "developer": "meraGPT", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4832 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4057 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2748 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json b/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json deleted file mode 100644 index eda5652e9..000000000 --- a/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652580.346048", - "retrieved_timestamp": "1762652580.346048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49406907006742107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.436971949757697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36562500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3032746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/fdb55a14-0697-4775-8358-fed202498b4f.json b/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/fdb55a14-0697-4775-8358-fed202498b4f.json new file mode 100644 index 000000000..a962d2f5e --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/fdb55a14-0697-4775-8358-fed202498b4f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JAJUKA-WEWILLNEVERFORGETYOU-3B", + "id": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.437 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3033 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/SuperQwen-2.5-1.5B/c069a224-638a-4cad-a9ad-e4f8579e8c15.json b/data/hfopenllm_v2/mergekit-community/SuperQwen-2.5-1.5B/c069a224-638a-4cad-a9ad-e4f8579e8c15.json new file mode 100644 index 000000000..1f4baa35d --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/SuperQwen-2.5-1.5B/c069a224-638a-4cad-a9ad-e4f8579e8c15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_SuperQwen-2.5-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SuperQwen-2.5-1.5B", + "id": "mergekit-community/SuperQwen-2.5-1.5B", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2907 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1075 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/10e5c103-f25f-45bb-bfe6-a22876cffe87.json b/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/10e5c103-f25f-45bb-bfe6-a22876cffe87.json new file mode 100644 index 000000000..7af838cdf --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/10e5c103-f25f-45bb-bfe6-a22876cffe87.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_VirtuosoSmall-InstructModelStock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VirtuosoSmall-InstructModelStock", + "id": "mergekit-community/VirtuosoSmall-InstructModelStock", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5238 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4756 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json b/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json deleted file mode 100644 index c4b78c755..000000000 --- a/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_VirtuosoSmall-InstructModelStock/1762652580.346572", - "retrieved_timestamp": "1762652580.346573", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/VirtuosoSmall-InstructModelStock", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/VirtuosoSmall-InstructModelStock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237946426592552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517899193567194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420545212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/a9ecca9a-c5d4-45b2-a403-e74a98a46322.json b/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/a9ecca9a-c5d4-45b2-a403-e74a98a46322.json new file mode 100644 index 000000000..2b83b90c9 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/a9ecca9a-c5d4-45b2-a403-e74a98a46322.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_diabolic6045_ELN-AOC-CAIN/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "diabolic6045_ELN-AOC-CAIN", + "id": "mergekit-community/diabolic6045_ELN-AOC-CAIN", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0862 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json b/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json deleted file mode 100644 index 05a94677f..000000000 --- a/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_diabolic6045_ELN-AOC-CAIN/1762652580.346791", - "retrieved_timestamp": "1762652580.346791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/diabolic6045_ELN-AOC-CAIN", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/diabolic6045_ELN-AOC-CAIN", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0861547361002141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31256779393862577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11909906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/630d8a60-03b7-4550-82f4-e879b2e01c6c.json b/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/630d8a60-03b7-4550-82f4-e879b2e01c6c.json new file mode 100644 index 000000000..afddca479 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/630d8a60-03b7-4550-82f4-e879b2e01c6c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-dare_ties-ajgjgea/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-dare_ties-ajgjgea", + "id": "mergekit-community/mergekit-dare_ties-ajgjgea", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5263 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1744 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json b/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json deleted file mode 100644 index 9dbd95c11..000000000 --- a/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-dare_ties-ajgjgea/1762652580.347229", - "retrieved_timestamp": "1762652580.34723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/mergekit-dare_ties-ajgjgea", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-dare_ties-ajgjgea", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263423272472595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3494703687455365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17436835106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/206b5a96-ae07-41fd-822f-436d49c57dcb.json b/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/206b5a96-ae07-41fd-822f-436d49c57dcb.json new file mode 100644 index 000000000..160d88a78 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/206b5a96-ae07-41fd-822f-436d49c57dcb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-della-zgowfmf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-della-zgowfmf", + "id": "mergekit-community/mergekit-della-zgowfmf", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4828 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6591 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json b/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json deleted file mode 100644 index e3159992f..000000000 --- a/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-della-zgowfmf/1762652580.347496", - "retrieved_timestamp": "1762652580.347497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/mergekit-della-zgowfmf", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-della-zgowfmf", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4827535383892516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6590790528029254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178247734138974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4833854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json b/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json deleted file mode 100644 index 11f1a7a6e..000000000 --- a/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-model_stock-azgztvm/1762652580.347734", - "retrieved_timestamp": "1762652580.347735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/mergekit-model_stock-azgztvm", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-model_stock-azgztvm", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061592131101034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6542775546755846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47300000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405585106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/702d2120-5301-4e03-bb0f-1f8ab19e522a.json b/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/702d2120-5301-4e03-bb0f-1f8ab19e522a.json new file mode 100644 index 000000000..46f675fa0 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/702d2120-5301-4e03-bb0f-1f8ab19e522a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-model_stock-azgztvm/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-model_stock-azgztvm", + "id": "mergekit-community/mergekit-model_stock-azgztvm", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json b/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json deleted file mode 100644 index 031cc1de2..000000000 --- a/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-slerp-fmrazcr/1762652580.3479838", - "retrieved_timestamp": "1762652580.3479848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/mergekit-slerp-fmrazcr", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-slerp-fmrazcr", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41743241266506204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5341624678276029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41045833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/61e39700-c237-49fc-baef-3fa573b3b0c6.json b/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/61e39700-c237-49fc-baef-3fa573b3b0c6.json new file mode 100644 index 000000000..0b718a9e2 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/61e39700-c237-49fc-baef-3fa573b3b0c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-slerp-fmrazcr/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-slerp-fmrazcr", + "id": "mergekit-community/mergekit-slerp-fmrazcr", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5342 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1193 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4105 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/8892ab84-750d-494f-9f87-ad28e73cf364.json b/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/8892ab84-750d-494f-9f87-ad28e73cf364.json new file mode 100644 index 000000000..a58721bfd --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/8892ab84-750d-494f-9f87-ad28e73cf364.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-rraxdhv/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-ties-rraxdhv", + "id": "mergekit-community/mergekit-ties-rraxdhv", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json b/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json deleted file mode 100644 index 7dfdb2d52..000000000 --- a/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-rraxdhv/1762652580.348219", - "retrieved_timestamp": "1762652580.3482199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/mergekit-ties-rraxdhv", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-ties-rraxdhv", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11230756614671294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183590984128971 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39095744680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/538a2eb7-34e4-4e78-a382-60a13710096e.json b/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/538a2eb7-34e4-4e78-a382-60a13710096e.json new file mode 100644 index 000000000..929e2d82b --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/538a2eb7-34e4-4e78-a382-60a13710096e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-ykqemwr/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-ties-ykqemwr", + "id": "mergekit-community/mergekit-ties-ykqemwr", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.36 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5455 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1224 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4198 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json b/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json deleted file mode 100644 index f4d132ac8..000000000 --- a/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-ykqemwr/1762652580.3485382", - "retrieved_timestamp": "1762652580.3485398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/mergekit-ties-ykqemwr", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-ties-ykqemwr", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35995491961329273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5455496677885336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3734208776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json b/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json deleted file mode 100644 index b0080753c..000000000 --- a/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mergekit-community_sexeh_time_testing/1762652580.348824", - "retrieved_timestamp": "1762652580.348825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mergekit-community/sexeh_time_testing", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/sexeh_time_testing", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7329463601023063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241321549202608 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36668882978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/a041629e-8ed8-4a6c-95ee-98e759501e19.json b/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/a041629e-8ed8-4a6c-95ee-98e759501e19.json new file mode 100644 index 000000000..959317552 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/a041629e-8ed8-4a6c-95ee-98e759501e19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mergekit-community_sexeh_time_testing/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sexeh_time_testing", + "id": "mergekit-community/sexeh_time_testing", + "developer": "mergekit-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7329 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3667 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/09f05984-5815-4b3d-bc73-83ea1e5ecc27.json b/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/09f05984-5815-4b3d-bc73-83ea1e5ecc27.json new file mode 100644 index 000000000..29fbff778 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/09f05984-5815-4b3d-bc73-83ea1e5ecc27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-chat-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-13b-chat-hf", + "id": "meta-llama/Llama-2-13b-chat-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3985 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2315 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1923 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json b/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json deleted file mode 100644 index 139667c56..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-chat-hf/1762652580.34908", - "retrieved_timestamp": "1762652580.349081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-2-13b-chat-hf", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-13b-chat-hf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398472719052115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427367066714186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23154362416107382 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19232047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-13b-hf/6535524e-f8cf-4f2f-9d89-9ba70aedac91.json b/data/hfopenllm_v2/meta-llama/Llama-2-13b-hf/6535524e-f8cf-4f2f-9d89-9ba70aedac91.json new file mode 100644 index 000000000..50a1407eb --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-13b-hf/6535524e-f8cf-4f2f-9d89-9ba70aedac91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-13b-hf", + "id": "meta-llama/Llama-2-13b-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/08ea4f9d-0e3c-4a8b-85e6-075290d30ba4.json b/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/08ea4f9d-0e3c-4a8b-85e6-075290d30ba4.json new file mode 100644 index 000000000..741023f42 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/08ea4f9d-0e3c-4a8b-85e6-075290d30ba4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-chat-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-70b-chat-hf", + "id": "meta-llama/Llama-2-70b-chat-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4958 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3042 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json b/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json deleted file mode 100644 index 38a0cace8..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-chat-hf/1762652580.3497758", - "retrieved_timestamp": "1762652580.349777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-2-70b-chat-hf", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-70b-chat-hf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49579227560650185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30424741461642657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2432679521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-70b-hf/631f0a1f-a6f5-46f6-9aa0-31ac9764c086.json b/data/hfopenllm_v2/meta-llama/Llama-2-70b-hf/631f0a1f-a6f5-46f6-9aa0-31ac9764c086.json new file mode 100644 index 000000000..d8c229621 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-70b-hf/631f0a1f-a6f5-46f6-9aa0-31ac9764c086.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-70b-hf", + "id": "meta-llama/Llama-2-70b-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5473 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3718 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json b/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json deleted file mode 100644 index 12dbc5840..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-chat-hf/1762652580.350235", - "retrieved_timestamp": "1762652580.350236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-2-7b-chat-hf", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-7b-chat-hf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3986478100329348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3113546355002185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16879986702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/b771f6db-7516-4423-9010-3467db0e26e3.json b/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/b771f6db-7516-4423-9010-3467db0e26e3.json new file mode 100644 index 000000000..4c366b7ee --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/b771f6db-7516-4423-9010-3467db0e26e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-chat-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-7b-chat-hf", + "id": "meta-llama/Llama-2-7b-chat-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3114 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1688 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-7b-hf/cf580dfb-2924-4c4b-9352-394275b959bd.json b/data/hfopenllm_v2/meta-llama/Llama-2-7b-hf/cf580dfb-2924-4c4b-9352-394275b959bd.json new file mode 100644 index 000000000..009ce4f17 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-7b-hf/cf580dfb-2924-4c4b-9352-394275b959bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-hf/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-7b-hf", + "id": "meta-llama/Llama-2-7b-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2519 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1861 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json deleted file mode 100644 index 5f120adb8..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B-Instruct/1762652580.35089", - "retrieved_timestamp": "1762652580.350891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-70B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-70B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8668854195756149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6917287453663654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45806250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309175531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/ba549fe6-7718-4abf-a610-7e0f48611483.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/ba549fe6-7718-4abf-a610-7e0f48611483.json new file mode 100644 index 000000000..772844f47 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/ba549fe6-7718-4abf-a610-7e0f48611483.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-70B-Instruct", + "id": "meta-llama/Llama-3.1-70B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6917 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5309 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-70B/b92440b1-78a9-4288-a432-f057f2b04a2f.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B/b92440b1-78a9-4288-a432-f057f2b04a2f.json new file mode 100644 index 000000000..a94baa730 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B/b92440b1-78a9-4288-a432-f057f2b04a2f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-70B", + "id": "meta-llama/Llama-3.1-70B", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1684 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1843 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4654 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/838f3932-edf2-4f72-9238-981d1aadc771.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/838f3932-edf2-4f72-9238-981d1aadc771.json new file mode 100644 index 000000000..f3e5eb24a --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/838f3932-edf2-4f72-9238-981d1aadc771.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Instruct", + "id": "meta-llama/Llama-3.1-8B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5087 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1556 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3972 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json deleted file mode 100644 index 9156c1c91..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B-Instruct/1762652580.351296", - "retrieved_timestamp": "1762652580.3512971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-8B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4921707735475206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087032184331889 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39715625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37982047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-8B/61e933b2-5cd1-4f08-8a9e-5b06ef54b6d5.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B/61e933b2-5cd1-4f08-8a9e-5b06ef54b6d5.json new file mode 100644 index 000000000..b2fd3607e --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B/61e933b2-5cd1-4f08-8a9e-5b06ef54b6d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B", + "id": "meta-llama/Llama-3.1-8B", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/0b307c78-94c7-418f-bc47-5106b81c30de.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/0b307c78-94c7-418f-bc47-5106b81c30de.json new file mode 100644 index 000000000..f3ef3bdee --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/0b307c78-94c7-418f-bc47-5106b81c30de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-Instruct", + "id": "meta-llama/Llama-3.2-1B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.24 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5698 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1682 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json deleted file mode 100644 index 88341fcf0..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B-Instruct/1762652580.351711", - "retrieved_timestamp": "1762652580.351712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-1B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.24 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5698313807364459 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34968498061768266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16821808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-1B/18783694-3e7b-4d06-9378-5a3fa4a7a0a2.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B/18783694-3e7b-4d06-9378-5a3fa4a7a0a2.json new file mode 100644 index 000000000..c0d6f09c5 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B/18783694-3e7b-4d06-9378-5a3fa4a7a0a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B", + "id": "meta-llama/Llama-3.2-1B", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.24 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1478 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3115 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2282 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1203 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/dab922e5-1b46-4a90-b75c-1b26cd6cc6d3.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/dab922e5-1b46-4a90-b75c-1b26cd6cc6d3.json new file mode 100644 index 000000000..cad18f268 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/dab922e5-1b46-4a90-b75c-1b26cd6cc6d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Instruct", + "id": "meta-llama/Llama-3.2-3B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1767 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json deleted file mode 100644 index fc5516bca..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B-Instruct/1762652580.352124", - "retrieved_timestamp": "1762652580.352124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-3B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393161256576994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610070239466069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-3B/8cfa1f00-3b26-4d75-9b0a-0dea65e2e352.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B/8cfa1f00-3b26-4d75-9b0a-0dea65e2e352.json new file mode 100644 index 000000000..b00c2e441 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B/8cfa1f00-3b26-4d75-9b0a-0dea65e2e352.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B", + "id": "meta-llama/Llama-3.2-3B", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1337 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3905 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3577 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2488 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json b/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json deleted file mode 100644 index 6831a860c..000000000 --- a/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.3-70B-Instruct/1762652580.352333", - "retrieved_timestamp": "1762652580.352334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.3-70B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.3-70B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8997581971391464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6919312828325811 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48338368580060426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44612500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331615691489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/f74d26e6-9dfb-4e81-8522-8309b27760cf.json b/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/f74d26e6-9dfb-4e81-8522-8309b27760cf.json new file mode 100644 index 000000000..fd07a860b --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/f74d26e6-9dfb-4e81-8522-8309b27760cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.3-70B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.3-70B-Instruct", + "id": "meta-llama/Llama-3.3-70B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8998 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6919 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5332 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/2022bcf3-a057-4b0a-aa33-6cf074ffc714.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/2022bcf3-a057-4b0a-aa33-6cf074ffc714.json new file mode 100644 index 000000000..dc54a58af --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/2022bcf3-a057-4b0a-aa33-6cf074ffc714.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-70B-Instruct", + "id": "meta-llama/Meta-Llama-3-70B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8099 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6547 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5207 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json deleted file mode 100644 index 5c6288c08..000000000 --- a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B-Instruct/1762652580.352748", - "retrieved_timestamp": "1762652580.352749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-70B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-70B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8099077115387172 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6546699432372051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4153645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B/a6e79d12-42f6-47ad-95fa-ba03fa4d3a06.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B/a6e79d12-42f6-47ad-95fa-ba03fa4d3a06.json new file mode 100644 index 000000000..faa64a96e --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B/a6e79d12-42f6-47ad-95fa-ba03fa4d3a06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-70B", + "id": "meta-llama/Meta-Llama-3-70B", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1603 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6461 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4518 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json deleted file mode 100644 index e46017225..000000000 --- a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1762652580.353369", - "retrieved_timestamp": "1762652580.353369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-8B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-8B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47823220166934843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4910264175128683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3805416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359125664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/24d850fe-1817-4041-8767-085f4bd2bac3.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/24d850fe-1817-4041-8767-085f4bd2bac3.json new file mode 100644 index 000000000..ac1995522 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/24d850fe-1817-4041-8767-085f4bd2bac3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-8B-Instruct", + "id": "meta-llama/Meta-Llama-3-8B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4989 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3568 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/610a3be1-1032-4079-ba37-d6c2c5f9fd55.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/610a3be1-1032-4079-ba37-d6c2c5f9fd55.json new file mode 100644 index 000000000..7d274c62c --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/610a3be1-1032-4079-ba37-d6c2c5f9fd55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-8B-Instruct", + "id": "meta-llama/Meta-Llama-3-8B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4782 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3805 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json deleted file mode 100644 index a318de98c..000000000 --- a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1762652580.353163", - "retrieved_timestamp": "1762652580.353164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-8B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7408398604591373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49887111136169526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B/857bb10e-1b43-4714-a758-0cef5816ba02.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B/857bb10e-1b43-4714-a758-0cef5816ba02.json new file mode 100644 index 000000000..e2bd15b0b --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B/857bb10e-1b43-4714-a758-0cef5816ba02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3-8B", + "id": "meta-llama/Meta-Llama-3-8B", + "developer": "meta-llama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1455 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4598 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3614 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json deleted file mode 100644 index 02994afa9..000000000 --- a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/1762652579.470921", - "retrieved_timestamp": "1762652579.470922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892381104358657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427703119251256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json deleted file mode 100644 index 2467576f3..000000000 --- a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/1762652579.47045", - "retrieved_timestamp": "1762652579.4704509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", - "developer": "meta", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22134381219608418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34609423326328875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26586102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3089166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17495013297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json deleted file mode 100644 index 123f1e550..000000000 --- a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-Squared-8B/1762652579.471144", - "retrieved_timestamp": "1762652579.471145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "3rd-Degree-Burn/Llama-Squared-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/Llama-Squared-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27552449722292405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4431025683868353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30894791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2366190159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json b/data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json deleted file mode 100644 index f1c4723aa..000000000 --- a/data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AGI-0_Artificium-llama3.1-8B-001/1762652579.4738402", - "retrieved_timestamp": "1762652579.473841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AGI-0/Artificium-llama3.1-8B-001", - "developer": "meta", - "inference_platform": "unknown", - "id": "AGI-0/Artificium-llama3.1-8B-001", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5247687247614108 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42562150225923556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json b/data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json deleted file mode 100644 index 68b44e745..000000000 --- a/data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AGI-0_smartllama3.1-8B-001/1762652579.4741051", - "retrieved_timestamp": "1762652579.474106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AGI-0/smartllama3.1-8B-001", - "developer": "meta", - "inference_platform": "unknown", - "id": "AGI-0/smartllama3.1-8B-001", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35178659290682057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46701787510868176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43864583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486535904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json b/data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json deleted file mode 100644 index f961af38a..000000000 --- a/data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1762652579.4817438", - "retrieved_timestamp": "1762652579.481745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6359016298975606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015613456039083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3576875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35513630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json b/data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json deleted file mode 100644 index 357a04a87..000000000 --- a/data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-llama3-8b/1762652579.486878", - "retrieved_timestamp": "1762652579.486878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Azure99/blossom-v5-llama3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5-llama3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.434293230849701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184909197087261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2205784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json b/data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json deleted file mode 100644 index af5a6058c..000000000 --- a/data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_Meta-Llama-3-8Bee/1762652579.491223", - "retrieved_timestamp": "1762652579.491224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/Meta-Llama-3-8Bee", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/Meta-Llama-3-8Bee", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19506575885317623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46263641905752745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32197473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json deleted file mode 100644 index a151dcaf5..000000000 --- a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-101M-GQA/1762652579.491745", - "retrieved_timestamp": "1762652579.491746", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-101M-GQA", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-101M-GQA", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.101 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13843712460715346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017560771912554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3712708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json deleted file mode 100644 index cd03e29b0..000000000 --- a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/1762652579.492168", - "retrieved_timestamp": "1762652579.492168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.218 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19881248420856662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29290517164510593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json deleted file mode 100644 index 1ded543ca..000000000 --- a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA/1762652579.491959", - "retrieved_timestamp": "1762652579.49196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-220M-GQA", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-220M-GQA", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.218 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23860468002677343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30316731388708956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json deleted file mode 100644 index ebe7fc54f..000000000 --- a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-openhermes/1762652579.4923809", - "retrieved_timestamp": "1762652579.492382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-220M-openhermes", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-220M-openhermes", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.218 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555229014570229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30275191401927726 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3847291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json b/data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json deleted file mode 100644 index ada94388d..000000000 --- a/data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Ba2han_Llama-Phi-3_DoRA/1762652579.4940102", - "retrieved_timestamp": "1762652579.494011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Ba2han/Llama-Phi-3_DoRA", - "developer": "meta", - "inference_platform": "unknown", - "id": "Ba2han/Llama-Phi-3_DoRA", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5130531434371911 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514558259029191 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39153922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json b/data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json deleted file mode 100644 index 96dcdf9cf..000000000 --- a/data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/1762652579.495604", - "retrieved_timestamp": "1762652579.495605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 20.245 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23158552640327662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3453848032699584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1332280585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json b/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json deleted file mode 100644 index b2cf0bfbc..000000000 --- a/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-8B/1762652579.496156", - "retrieved_timestamp": "1762652579.496157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Neos-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49439376410147295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424998411442879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json b/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json deleted file mode 100644 index 611649dc6..000000000 --- a/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-base/1762652579.496382", - "retrieved_timestamp": "1762652579.496383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Neos-Llama-3.1-base", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Llama-3.1-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.65 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17508211545366295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29303397468240516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json b/data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json deleted file mode 100644 index ea59426d0..000000000 --- a/data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3-luminous-merged/1762652579.496879", - "retrieved_timestamp": "1762652579.49688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/llama-3-luminous-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/llama-3-luminous-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43234506664538974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153924501559338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3773271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json b/data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json deleted file mode 100644 index e37ca784a..000000000 --- a/data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/1762652579.4970949", - "retrieved_timestamp": "1762652579.4970958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634842218646525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5213365363748029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44062500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32064494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json b/data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json deleted file mode 100644 index 644c63503..000000000 --- a/data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/1762652579.497314", - "retrieved_timestamp": "1762652579.497314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 5.199 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5172497861230424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42930745041520607 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27102726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json b/data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json deleted file mode 100644 index 40163efa8..000000000 --- a/data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BrainWave-ML_llama3.2-3B-maths-orpo/1762652579.499409", - "retrieved_timestamp": "1762652579.49941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BrainWave-ML/llama3.2-3B-maths-orpo", - "developer": "meta", - "inference_platform": "unknown", - "id": "BrainWave-ML/llama3.2-3B-maths-orpo", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json b/data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json deleted file mode 100644 index 43dbe71a2..000000000 --- a/data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-base/1762652579.500559", - "retrieved_timestamp": "1762652579.5005598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CYFRAGOVPL/Llama-PLLuM-8B-base", - "developer": "meta", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/Llama-PLLuM-8B-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28988749850396944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43204480458140976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39703125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27568151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json b/data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json deleted file mode 100644 index b009b89aa..000000000 --- a/data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/1762652579.5080209", - "retrieved_timestamp": "1762652579.508022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679938119744496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5023929881802022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3152426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json b/data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json deleted file mode 100644 index 68cd4f463..000000000 --- a/data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-Llama-3-8B/1762652579.510189", - "retrieved_timestamp": "1762652579.510189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ContactDoctor/Bio-Medical-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "ContactDoctor/Bio-Medical-Llama-3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422365988909427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486311802622738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36477726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json b/data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json deleted file mode 100644 index 3244a425b..000000000 --- a/data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Corianas_llama-3-reactor/1762652579.5122728", - "retrieved_timestamp": "1762652579.512274", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Corianas/llama-3-reactor", - "developer": "meta", - "inference_platform": "unknown", - "id": "Corianas/llama-3-reactor", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": -1.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23001192391742797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457148560545015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39771874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2800864361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json b/data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json deleted file mode 100644 index 3df7b0b65..000000000 --- a/data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/CreitinGameplays_Llama-3.1-8B-R1-v0.1/1762652579.514677", - "retrieved_timestamp": "1762652579.514678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323485019747603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3057485865545513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36215624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12516622340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json b/data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json deleted file mode 100644 index 3ed1a59df..000000000 --- a/data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Llama3.3-70B-CogniLink/1762652579.527427", - "retrieved_timestamp": "1762652579.5274282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Llama3.3-70B-CogniLink", - "developer": "meta", - "inference_platform": "unknown", - "id": "Daemontatox/Llama3.3-70B-CogniLink", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6931042965996888 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.666832775829349 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44546979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4876979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5172872340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json b/data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json deleted file mode 100644 index 1452c844c..000000000 --- a/data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Llama_cot/1762652579.527702", - "retrieved_timestamp": "1762652579.527703", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Llama_cot", - "developer": "meta", - "inference_platform": "unknown", - "id": "Daemontatox/Llama_cot", - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548781677061308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4838374335391873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json b/data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json deleted file mode 100644 index 25e70568f..000000000 --- a/data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-Llama3-8b-ORPO/1762652579.534392", - "retrieved_timestamp": "1762652579.534392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-Llama3-8b-ORPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-Llama3-8b-ORPO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.737242738156979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49865578559911244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4189583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json b/data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json deleted file mode 100644 index 5bee942c7..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/1762652579.537201", - "retrieved_timestamp": "1762652579.537202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31356799957446246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762231983114653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39278125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3208942819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json deleted file mode 100644 index b18f864f8..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/1762652579.5376909", - "retrieved_timestamp": "1762652579.537696", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36849780803822746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.488693862545088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json deleted file mode 100644 index 93b54adff..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/1762652579.538059", - "retrieved_timestamp": "1762652579.53806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.664 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2506948230694557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44878062698346727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41644791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709441489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json deleted file mode 100644 index 4725df762..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1762652579.5383239", - "retrieved_timestamp": "1762652579.538326", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882564927725103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48860331670972784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30244348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json deleted file mode 100644 index 01c1140fb..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/1762652579.538624", - "retrieved_timestamp": "1762652579.538625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3436182662003484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47693843531787744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29695811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json deleted file mode 100644 index be3950574..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/1762652579.539129", - "retrieved_timestamp": "1762652579.539129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2853162940996556 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44623832540838126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2777593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json b/data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json deleted file mode 100644 index 815c0f1c8..000000000 --- a/data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/1762652579.5393531", - "retrieved_timestamp": "1762652579.539354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 18.405 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793135547015253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232300476265338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3559791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2720246010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json deleted file mode 100644 index 91f686ca2..000000000 --- a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1762652579.5443351", - "retrieved_timestamp": "1762652579.5443368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15067687070306784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29300816789978756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253324468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json deleted file mode 100644 index 47170fd57..000000000 --- a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1762652579.544659", - "retrieved_timestamp": "1762652579.5446599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15492338107332987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29372614029730437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json deleted file mode 100644 index 13d54b8cf..000000000 --- a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter1/1762652579.5448809", - "retrieved_timestamp": "1762652579.5448818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter1", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15754642127333254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29402546232087917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json deleted file mode 100644 index a658879e7..000000000 --- a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter2/1762652579.545113", - "retrieved_timestamp": "1762652579.545114", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter2", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13761264555822994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2980340303779312 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444108 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json deleted file mode 100644 index 035ec92dc..000000000 --- a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1762652579.54562", - "retrieved_timestamp": "1762652579.545621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1323920530858123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29722352809482616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3526666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json deleted file mode 100644 index 977e34c0c..000000000 --- a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1762652579.5453749", - "retrieved_timestamp": "1762652579.545376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1335910938531984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29752276438021447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34996875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json deleted file mode 100644 index 24b36418f..000000000 --- a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.1-8B-Inst/1762652579.547036", - "retrieved_timestamp": "1762652579.5470378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7794828831943688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511742159482904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20090634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3909583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.379155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json deleted file mode 100644 index 0ebd9bcc0..000000000 --- a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst/1762652579.5474088", - "retrieved_timestamp": "1762652579.547411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648856146136695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35048085637770016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31834375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085106382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json deleted file mode 100644 index 4fc9595a2..000000000 --- a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/1762652579.547727", - "retrieved_timestamp": "1762652579.5477278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5597148898256625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33650903200352716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18035239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json deleted file mode 100644 index 279266ece..000000000 --- a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/1762652579.5483131", - "retrieved_timestamp": "1762652579.548314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5844193406827218 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3512662445055541 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3117083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18184840425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json deleted file mode 100644 index 689fb4ac4..000000000 --- a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/1762652579.548037", - "retrieved_timestamp": "1762652579.548039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4998891829235318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3141475230443668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12691156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json b/data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json deleted file mode 100644 index fef869c9b..000000000 --- a/data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/1762652579.5498", - "retrieved_timestamp": "1762652579.5498009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.803263119633683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.512116784464076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38863031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json deleted file mode 100644 index 12ee04433..000000000 --- a/data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3-8b-Ita/1762652579.551231", - "retrieved_timestamp": "1762652579.551231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Llama-3-8b-Ita", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3-8b-Ita", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7530297388706411 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493576505761469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38522273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json deleted file mode 100644 index b5ebbcc4c..000000000 --- a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-ITA/1762652579.5514839", - "retrieved_timestamp": "1762652579.5514848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Llama-3.1-8b-ITA", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3.1-8b-ITA", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7916727616058724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5109356715302854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38763297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json deleted file mode 100644 index 85f607894..000000000 --- a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-Ita/1762652579.551703", - "retrieved_timestamp": "1762652579.5517042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Llama-3.1-8b-Ita", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3.1-8b-Ita", - "additional_details": { - "precision": "bfloat16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364843060856306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169995464792883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44871875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json deleted file mode 100644 index 9a20202dc..000000000 --- a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-Distilled/1762652579.551904", - "retrieved_timestamp": "1762652579.551905", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepMount00/Llama-3.1-Distilled", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3.1-Distilled", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7843787816327346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5100875314179011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40581249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json b/data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json deleted file mode 100644 index a17c588f9..000000000 --- a/data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/1762652579.555449", - "retrieved_timestamp": "1762652579.5554502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38488432913558246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5117943836412089 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33081570996978854 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44357291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json deleted file mode 100644 index bbad6c21d..000000000 --- a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/1762652579.5956101", - "retrieved_timestamp": "1762652579.5956109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188807918545016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074954889367559 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41700000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634640957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json deleted file mode 100644 index cb70e7b3c..000000000 --- a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/1762652579.596117", - "retrieved_timestamp": "1762652579.596118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5082569803676467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100577461090639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42357291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json deleted file mode 100644 index 971e17871..000000000 --- a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B/1762652579.5958989", - "retrieved_timestamp": "1762652579.5958998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31953771548380516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151575806137866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21509308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json deleted file mode 100644 index 577ae898c..000000000 --- a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/1762652579.5963311", - "retrieved_timestamp": "1762652579.596332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4689147018799009 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41602720836190127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2595578457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json b/data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json deleted file mode 100644 index 907be7141..000000000 --- a/data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/1762652579.596818", - "retrieved_timestamp": "1762652579.596819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4704384366813389 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41602720836190127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2595578457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json b/data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json deleted file mode 100644 index 44739adc3..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Alpaca-Llama3.1-8B/1762652579.5979578", - "retrieved_timestamp": "1762652579.5979588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Alpaca-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Alpaca-Llama3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15986914719610634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47552608539742874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3246343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json deleted file mode 100644 index c077b469d..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/1762652579.600618", - "retrieved_timestamp": "1762652579.600619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274010735958367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48648902139668476 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3619375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543051861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json deleted file mode 100644 index d98e15dff..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/1762652579.600828", - "retrieved_timestamp": "1762652579.600829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46731561146646455 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4932027479020209 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351894946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json deleted file mode 100644 index 67b79abde..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1762652579.603883", - "retrieved_timestamp": "1762652579.603883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3216111029845255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37162741490176326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31136458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1402094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json deleted file mode 100644 index a7d150f3a..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B/1762652579.603668", - "retrieved_timestamp": "1762652579.603669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4427363839058143 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36434977901496834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11153590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json deleted file mode 100644 index d06176c2f..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/1762652579.604102", - "retrieved_timestamp": "1762652579.604102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676234613048932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33260007841271594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3419375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11153590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json b/data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json deleted file mode 100644 index da319e3b6..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Llama-3.2-3B-Agent007-Coder/1762652579.6043148", - "retrieved_timestamp": "1762652579.6043148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399562050913798 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303758760727905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36680208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28515625 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json b/data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json deleted file mode 100644 index 67c19b5aa..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/1762652579.604741", - "retrieved_timestamp": "1762652579.6047418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274010735958367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45185934849403964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31341422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json b/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json deleted file mode 100644 index c6f74dba6..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/1762652579.606164", - "retrieved_timestamp": "1762652579.606165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553263119633683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4804219047211424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json b/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json deleted file mode 100644 index 9a6e8eb30..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/1762652579.6059399", - "retrieved_timestamp": "1762652579.605941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4828532737580731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47357563863974517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json deleted file mode 100644 index b860c3881..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/1762652579.6100821", - "retrieved_timestamp": "1762652579.610083", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.498640274471735 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977581192690881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3405917553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json deleted file mode 100644 index ec15a5557..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/1762652579.610341", - "retrieved_timestamp": "1762652579.610341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42117913802045237 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49561092312727917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json deleted file mode 100644 index c2e9d84cb..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/1762652579.6105568", - "retrieved_timestamp": "1762652579.610558", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880814017916905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49508699339363266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42801041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3355219414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json deleted file mode 100644 index c8908639b..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/1762652579.6107578", - "retrieved_timestamp": "1762652579.610759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40843960690966635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930009712421776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43721875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json deleted file mode 100644 index 8625ddb49..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/1762652579.611186", - "retrieved_timestamp": "1762652579.611187", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5079079065767719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4847020640542447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40630208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35305851063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json deleted file mode 100644 index e3010cf93..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/1762652579.611454", - "retrieved_timestamp": "1762652579.611454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39522577871159636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49553052334314723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4048125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35929188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json deleted file mode 100644 index e141bcfc5..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/1762652579.611669", - "retrieved_timestamp": "1762652579.61167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316382753316755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4827931104634334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523105053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json deleted file mode 100644 index efb0e091e..000000000 --- a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/1762652579.6118872", - "retrieved_timestamp": "1762652579.6118872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596047376516532 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4897693552241443 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json deleted file mode 100644 index 08897020e..000000000 --- a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/1762652579.614388", - "retrieved_timestamp": "1762652579.614389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163679038285084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5337975953250876 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40171874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375249335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json deleted file mode 100644 index 0131221ed..000000000 --- a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1762652579.614203", - "retrieved_timestamp": "1762652579.614203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Herplete-LLM-Llama-3.1-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6105976586568084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347253355929804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375249335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json deleted file mode 100644 index 06e7f2a56..000000000 --- a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1762652579.613958", - "retrieved_timestamp": "1762652579.6139588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Herplete-LLM-Llama-3.1-8b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719149634082013 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013428726325629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38599999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815492021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json b/data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json deleted file mode 100644 index 0314efe89..000000000 --- a/data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Etherll_Replete-LLM-V3-Llama-3.1-8b/1762652579.6150668", - "retrieved_timestamp": "1762652579.615068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Etherll/Replete-LLM-V3-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Replete-LLM-V3-Llama-3.1-8b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262924595628488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4543377420594779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34699135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json b/data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json deleted file mode 100644 index 05632eab4..000000000 --- a/data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Eurdem_Defne-llama3.1-8B/1762652579.615498", - "retrieved_timestamp": "1762652579.615499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Eurdem/Defne-llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Eurdem/Defne-llama3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5036115285220991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5320979090308238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43309375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865525265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json b/data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json deleted file mode 100644 index 8582011bc..000000000 --- a/data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/GenVRadmin_llama38bGenZ_Vikas-Merged/1762652579.627924", - "retrieved_timestamp": "1762652579.627925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "GenVRadmin/llama38bGenZ_Vikas-Merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "GenVRadmin/llama38bGenZ_Vikas-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30002947734234053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4535981003984562 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44016666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26221742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json b/data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json deleted file mode 100644 index d80132ade..000000000 --- a/data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Groq_Llama-3-Groq-8B-Tool-Use/1762652579.633301", - "retrieved_timestamp": "1762652579.633302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Groq/Llama-3-Groq-8B-Tool-Use", - "developer": "meta", - "inference_platform": "unknown", - "id": "Groq/Llama-3-Groq-8B-Tool-Use", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098230472922956 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4863384977901497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33992686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json b/data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json deleted file mode 100644 index 21e325f83..000000000 --- a/data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.0-8b-Llama-3/1762652579.633556", - "retrieved_timestamp": "1762652579.633556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39325212657969744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539075127777334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3832395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30668218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json b/data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json deleted file mode 100644 index e36c4917b..000000000 --- a/data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3-Aloe-8B-Alpha/1762652579.6361432", - "retrieved_timestamp": "1762652579.6361442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HPAI-BSC/Llama3-Aloe-8B-Alpha", - "developer": "meta", - "inference_platform": "unknown", - "id": "HPAI-BSC/Llama3-Aloe-8B-Alpha", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081073773144147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48308532966126966 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3295378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json b/data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json deleted file mode 100644 index 0792c0cc8..000000000 --- a/data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3.1-Aloe-Beta-8B/1762652579.636478", - "retrieved_timestamp": "1762652579.636513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7253276860951166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092760762748857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3834583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json b/data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json deleted file mode 100644 index a7bef9352..000000000 --- a/data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Hastagaras_Llama-3.1-Jamet-8B-MK.I/1762652579.637886", - "retrieved_timestamp": "1762652579.637887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", - "developer": "meta", - "inference_platform": "unknown", - "id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7338207068356406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048666433733161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json b/data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json deleted file mode 100644 index 792cbc0c4..000000000 --- a/data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Hastagaras_Zabuza-8B-Llama-3.1/1762652579.638141", - "retrieved_timestamp": "1762652579.6381419", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Hastagaras/Zabuza-8B-Llama-3.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Hastagaras/Zabuza-8B-Llama-3.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6265342624237025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4538915742546196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json b/data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json deleted file mode 100644 index 488c09054..000000000 --- a/data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HiroseKoichi_Llama-Salad-4x8B-V3/1762652579.640251", - "retrieved_timestamp": "1762652579.6402519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HiroseKoichi/Llama-Salad-4x8B-V3", - "developer": "meta", - "inference_platform": "unknown", - "id": "HiroseKoichi/Llama-Salad-4x8B-V3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6653523761397536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244649789001753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json b/data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json deleted file mode 100644 index 192db37ae..000000000 --- a/data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HoangHa_Pensez-Llama3.1-8B/1762652579.640512", - "retrieved_timestamp": "1762652579.640512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HoangHa/Pensez-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "HoangHa/Pensez-Llama3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3886809221753835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46691313514505667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json b/data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json deleted file mode 100644 index ec4a64141..000000000 --- a/data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/IDEA-CCNL_Ziya-LLaMA-13B-v1/1762652579.645581", - "retrieved_timestamp": "1762652579.645581", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "IDEA-CCNL/Ziya-LLaMA-13B-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "IDEA-CCNL/Ziya-LLaMA-13B-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16968643200042555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28770292445409473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37505208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json b/data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json deleted file mode 100644 index ee5947748..000000000 --- a/data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/1762652579.6465652", - "retrieved_timestamp": "1762652579.6465652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20243398626754788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43507435668237937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4609375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21600731382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json b/data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json deleted file mode 100644 index 941170ce9..000000000 --- a/data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/IntervitensInc_internlm2_5-20b-llamafied/1762652579.6480021", - "retrieved_timestamp": "1762652579.648003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "IntervitensInc/internlm2_5-20b-llamafied", - "developer": "meta", - "inference_platform": "unknown", - "id": "IntervitensInc/internlm2_5-20b-llamafied", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 19.861 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409952260003457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7478466526577329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44754166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4050864361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json b/data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json deleted file mode 100644 index 328268047..000000000 --- a/data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JackFram_llama-160m/1762652579.649858", - "retrieved_timestamp": "1762652579.649858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JackFram/llama-160m", - "developer": "meta", - "inference_platform": "unknown", - "id": "JackFram/llama-160m", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.162 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1791036671586945 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28880217539042424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json b/data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json deleted file mode 100644 index 5c7a77628..000000000 --- a/data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/JackFram_llama-68m/1762652579.650121", - "retrieved_timestamp": "1762652579.650121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "JackFram/llama-68m", - "developer": "meta", - "inference_platform": "unknown", - "id": "JackFram/llama-68m", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.068 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17263416623448008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29362986509336414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3909895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json b/data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json deleted file mode 100644 index 225e98fb9..000000000 --- a/data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/1762652579.694483", - "retrieved_timestamp": "1762652579.694484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", - "developer": "meta", - "inference_platform": "unknown", - "id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6185410266980501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177452540141246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json b/data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json deleted file mode 100644 index e78ebb6e6..000000000 --- a/data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/1762652579.6947358", - "retrieved_timestamp": "1762652579.694737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8096328851890761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147423127141911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38804853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json b/data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json deleted file mode 100644 index 81d20b7d4..000000000 --- a/data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/1762652579.695199", - "retrieved_timestamp": "1762652579.6952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22269245601670234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292556113105267 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33555208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12142619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json deleted file mode 100644 index 6db903420..000000000 --- a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-Cinder-Agent-v1/1762652579.695456", - "retrieved_timestamp": "1762652579.695457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/TinyLlama-Cinder-Agent-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/TinyLlama-Cinder-Agent-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26695612087040166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31160367351776513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json deleted file mode 100644 index e554b1681..000000000 --- a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-v1.1-Cinders-World/1762652579.6958752", - "retrieved_timestamp": "1762652579.6958761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/TinyLlama-v1.1-Cinders-World", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/TinyLlama-v1.1-Cinders-World", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24692260978647768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29979653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3356145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11984707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json deleted file mode 100644 index 2dea4a4c4..000000000 --- a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/1762652579.696125", - "retrieved_timestamp": "1762652579.696125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00784363267242029 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31463497508928434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json b/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json deleted file mode 100644 index 3c8f24fab..000000000 --- a/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/1762652579.696357", - "retrieved_timestamp": "1762652579.696357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21257596510591897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30843808427144626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10862699468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json b/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json deleted file mode 100644 index 23ce926e4..000000000 --- a/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-r1/1762652579.6965919", - "retrieved_timestamp": "1762652579.6965928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/Tinyllama-r1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/Tinyllama-r1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2119265770378152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3014631984266974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11344747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json b/data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json deleted file mode 100644 index 984cb85b1..000000000 --- a/data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-Llama-3b-v0.1/1762652579.69997", - "retrieved_timestamp": "1762652579.699971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "KingNish/Reasoning-Llama-3b-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "KingNish/Reasoning-Llama-3b-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6224628430342602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43433592509582786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31676041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029421542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json b/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json deleted file mode 100644 index 89d13b2a3..000000000 --- a/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/1762652579.7021902", - "retrieved_timestamp": "1762652579.702191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371412297149342 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986771544360115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40711458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.379155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json b/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json deleted file mode 100644 index 1bcae3e8c..000000000 --- a/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/1762652579.7024388", - "retrieved_timestamp": "1762652579.70244", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275912356990563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557141539616392 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37003125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056848404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json b/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json deleted file mode 100644 index 78bb0cb2d..000000000 --- a/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki100p/1762652579.704138", - "retrieved_timestamp": "1762652579.704139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LEESM/llama-2-7b-hf-lora-oki100p", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-2-7b-hf-lora-oki100p", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25129434345314877 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34916752720369776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18558843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json b/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json deleted file mode 100644 index ccfdfb6d9..000000000 --- a/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki10p/1762652579.704393", - "retrieved_timestamp": "1762652579.704394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LEESM/llama-2-7b-hf-lora-oki10p", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-2-7b-hf-lora-oki10p", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22701432199896276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3530929513059229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16788563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json b/data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json deleted file mode 100644 index d7bcfb4fc..000000000 --- a/data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LEESM_llama-3-8b-bnb-4b-kowiki231101/1762652579.704602", - "retrieved_timestamp": "1762652579.704603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LEESM/llama-3-8b-bnb-4b-kowiki231101", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-3-8b-bnb-4b-kowiki231101", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16848739123303944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4130805653617178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json b/data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json deleted file mode 100644 index 34eb17eeb..000000000 --- a/data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/1762652579.7048151", - "retrieved_timestamp": "1762652579.704816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21372513818889433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43430121169320707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38692708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json deleted file mode 100644 index 854ec7c38..000000000 --- a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/1762652579.733024", - "retrieved_timestamp": "1762652579.733025", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6946280314011268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48600920882996324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3316145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3505651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json deleted file mode 100644 index 7586ea330..000000000 --- a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/1762652579.733407", - "retrieved_timestamp": "1762652579.7334101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6762933460994606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908161460506797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3356145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34956781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json deleted file mode 100644 index 17789ae87..000000000 --- a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/1762652579.7341938", - "retrieved_timestamp": "1762652579.734195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32108693821283085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47387586084568856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json deleted file mode 100644 index 2f7a69538..000000000 --- a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/1762652579.7344582", - "retrieved_timestamp": "1762652579.734459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6490157227268093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4694777854416285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33402593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json b/data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json deleted file mode 100644 index 298b05471..000000000 --- a/data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.0-Llama-3.1-8B/1762652579.734967", - "retrieved_timestamp": "1762652579.734968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Locutusque/Hercules-6.0-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Hercules-6.0-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6630041622893922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48133037900119535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json b/data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json deleted file mode 100644 index 7335565ad..000000000 --- a/data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.1-Llama-3.1-8B/1762652579.735234", - "retrieved_timestamp": "1762652579.735234", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Locutusque/Hercules-6.1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Hercules-6.1-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6006806384836678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46562423765034017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35533333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json b/data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json deleted file mode 100644 index b5da0e8b1..000000000 --- a/data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-NeuralHercules-5.0-8B/1762652579.735453", - "retrieved_timestamp": "1762652579.735453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Locutusque/Llama-3-NeuralHercules-5.0-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Llama-3-NeuralHercules-5.0-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489310584803876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3940474241916672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29330119680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json b/data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json deleted file mode 100644 index 9725541dd..000000000 --- a/data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-Yggdrasil-2.0-8B/1762652579.7359009", - "retrieved_timestamp": "1762652579.735904", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Locutusque/Llama-3-Yggdrasil-2.0-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Llama-3-Yggdrasil-2.0-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370583385417359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47724551424666856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json b/data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json deleted file mode 100644 index 63b9b9610..000000000 --- a/data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-3B-Overthinker/1762652579.741945", - "retrieved_timestamp": "1762652579.7419462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Lyte/Llama-3.2-3B-Overthinker", - "developer": "meta", - "inference_platform": "unknown", - "id": "Lyte/Llama-3.2-3B-Overthinker", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6407975283359264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4320093097952517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34190625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json b/data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json deleted file mode 100644 index a6a4d12ee..000000000 --- a/data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MLP-KTLim_llama-3-Korean-Bllossom-8B/1762652579.7427032", - "retrieved_timestamp": "1762652579.7427042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MLP-KTLim/llama-3-Korean-Bllossom-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "MLP-KTLim/llama-3-Korean-Bllossom-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5112800702136997 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49004556470187666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359375 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json deleted file mode 100644 index 160957404..000000000 --- a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1762652579.744125", - "retrieved_timestamp": "1762652579.7441258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027192294223771 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47894081019705514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3086979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30011635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json deleted file mode 100644 index a3a7cf75a..000000000 --- a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1762652579.743867", - "retrieved_timestamp": "1762652579.7438679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118117705465941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811441560714845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3046979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3006150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json deleted file mode 100644 index 6f999e358..000000000 --- a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/1762652579.7443142", - "retrieved_timestamp": "1762652579.744315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44970566984490046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456960506522001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31341422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json deleted file mode 100644 index 927e906ec..000000000 --- a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/1762652579.744737", - "retrieved_timestamp": "1762652579.744738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457838535086903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46223963164680143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31406249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json b/data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json deleted file mode 100644 index 0823b9133..000000000 --- a/data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MagusCorp_grpo_lora_enem_llama3_7b/1762652579.745377", - "retrieved_timestamp": "1762652579.745378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MagusCorp/grpo_lora_enem_llama3_7b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MagusCorp/grpo_lora_enem_llama3_7b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4723622211288271 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48014581980384746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json deleted file mode 100644 index 7c4c6a297..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-llama3.1-70b/1762652579.751613", - "retrieved_timestamp": "1762652579.7516139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-llama3.1-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-llama3.1-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8434298771703524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.644755327496552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43803125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5282579787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json deleted file mode 100644 index ce47c4c10..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3-70b/1762652579.753183", - "retrieved_timestamp": "1762652579.753183", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-llama3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-llama3-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8208486814984242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6435431762417703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2394259818731118 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json deleted file mode 100644 index 77953022d..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3.1-70b/1762652579.753403", - "retrieved_timestamp": "1762652579.753404", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-llama3.1-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-llama3.1-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8592667455684251 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6792920009427085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43655589123867067 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45415625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json deleted file mode 100644 index c4002f02f..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3-70b/1762652579.7547278", - "retrieved_timestamp": "1762652579.7547278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-llama3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-llama3-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8010401290797307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6399173489368603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204454787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json deleted file mode 100644 index 1e43a7c9d..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3.1-70b/1762652579.755093", - "retrieved_timestamp": "1762652579.7550972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-llama3.1-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-llama3.1-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8604657863358112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6871653740091753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45682291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363198138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json deleted file mode 100644 index f4566a849..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-llama3-70b/1762652579.7565", - "retrieved_timestamp": "1762652579.756501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.4-llama3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.4-llama3-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5027371817887649 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6418191966839487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4287916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5203623670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json deleted file mode 100644 index 1a71a47c4..000000000 --- a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-llamaloi-3b/1762652579.758682", - "retrieved_timestamp": "1762652579.758683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-llamaloi-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-llamaloi-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7375175645066203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4587340004998879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3204787234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json b/data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json deleted file mode 100644 index f054c86d8..000000000 --- a/data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MoonRide_Llama-3.2-3B-Khelavaster/1762652579.762122", - "retrieved_timestamp": "1762652579.762123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MoonRide/Llama-3.2-3B-Khelavaster", - "developer": "meta", - "inference_platform": "unknown", - "id": "MoonRide/Llama-3.2-3B-Khelavaster", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924954675815725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45156712929620335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36990625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31216755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json b/data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json deleted file mode 100644 index e4793e71a..000000000 --- a/data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/1762652579.766795", - "retrieved_timestamp": "1762652579.766796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3280063564675062 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45284530156109354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240525265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json b/data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json deleted file mode 100644 index f69367064..000000000 --- a/data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/1762652579.7671611", - "retrieved_timestamp": "1762652579.767162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", - "developer": "meta", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.761 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1844993506119319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3040736853180832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34860416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json b/data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json deleted file mode 100644 index e02813a99..000000000 --- a/data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Naveenpoliasetty_llama3-8B-V2/1762652579.769772", - "retrieved_timestamp": "1762652579.769773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Naveenpoliasetty/llama3-8B-V2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Naveenpoliasetty/llama3-8B-V2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122616878770551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188657580065063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40813541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json b/data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json deleted file mode 100644 index e2672ae1b..000000000 --- a/data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-German-ORPO/1762652579.7705338", - "retrieved_timestamp": "1762652579.7705338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nekochu/Llama-3.1-8B-German-ORPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nekochu/Llama-3.1-8B-German-ORPO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610710692074806 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4982577044334462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46475 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33934507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json b/data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json deleted file mode 100644 index dd94c3699..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/1762652579.772268", - "retrieved_timestamp": "1762652579.772269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5311883580012146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240787338568713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32367708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1373005319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json deleted file mode 100644 index debe182c8..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/1762652579.7727091", - "retrieved_timestamp": "1762652579.7727098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6809144181881852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155095936229447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3665833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34375 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json deleted file mode 100644 index cbb148902..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/1762652579.772983", - "retrieved_timestamp": "1762652579.772984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7100903380807368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51203649030939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37576041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34408244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json deleted file mode 100644 index d07f0cedc..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/1762652579.773223", - "retrieved_timestamp": "1762652579.7732239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.759999024809727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525696414662245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38521875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36884973404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json deleted file mode 100644 index 8a7c2b0d6..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/1762652579.7734542", - "retrieved_timestamp": "1762652579.7734542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7995662619627034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250767747736031 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790724734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json deleted file mode 100644 index 8c222f2c8..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/1762652579.7736902", - "retrieved_timestamp": "1762652579.773691", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7746368524404137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531273698652086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json deleted file mode 100644 index 2ec1a74f9..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/1762652579.7739289", - "retrieved_timestamp": "1762652579.77393", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7764354135914928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294434267893284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3045302013422819 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39058333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37217420212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json deleted file mode 100644 index 139c2b71d..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/1762652579.7741492", - "retrieved_timestamp": "1762652579.7741492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7977677008116243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252760762748857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1986404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40896874999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json deleted file mode 100644 index 83f538115..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/1762652579.774375", - "retrieved_timestamp": "1762652579.774376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8016895171478344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261737638679802 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706249999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json deleted file mode 100644 index f1b04146b..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/1762652579.7746859", - "retrieved_timestamp": "1762652579.774687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7941207108250552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530825004382936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37857380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json deleted file mode 100644 index 8fea1d6ab..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/1762652579.7749188", - "retrieved_timestamp": "1762652579.7749188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8053863748188141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278362703806528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41566666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3854720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json deleted file mode 100644 index 1cc63cc99..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/1762652579.775126", - "retrieved_timestamp": "1762652579.775127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7533544329046928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5312389177563648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37470833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3732546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json deleted file mode 100644 index 19351d146..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/1762652579.7753332", - "retrieved_timestamp": "1762652579.775334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7564019025075688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316448098766001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json deleted file mode 100644 index 0ea021320..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/1762652579.775538", - "retrieved_timestamp": "1762652579.775538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508657030013697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193615033347353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39448958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json deleted file mode 100644 index 3cea00a5d..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/1762652579.775745", - "retrieved_timestamp": "1762652579.775746", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889001183526376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195180641442355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4029583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3888796542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json deleted file mode 100644 index ab504cabe..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/1762652579.775957", - "retrieved_timestamp": "1762652579.775958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7871514248859692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191641616026265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json deleted file mode 100644 index cf2476c38..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/1762652579.7761788", - "retrieved_timestamp": "1762652579.7761788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5001141415887622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170855986734039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40084374999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34266954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json deleted file mode 100644 index 57726cd78..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/1762652579.776387", - "retrieved_timestamp": "1762652579.7763882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6647528557560606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140787918844759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3613125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3488198138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json deleted file mode 100644 index 9d2465e50..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/1762652579.776601", - "retrieved_timestamp": "1762652579.776602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061592131101034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4918197968512548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json deleted file mode 100644 index e6c72c06b..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Mediver_V1.01/1762652579.7768", - "retrieved_timestamp": "1762652579.776801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18847103463255274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44148325896745977 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38978124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2993683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json deleted file mode 100644 index bef98b238..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Medusa_v1.01/1762652579.777005", - "retrieved_timestamp": "1762652579.7770061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7685419132346618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017727187674992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40667708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3531416223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json deleted file mode 100644 index 92829eac6..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1762652579.777212", - "retrieved_timestamp": "1762652579.777212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6345529860769425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5112504828088763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645279255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json deleted file mode 100644 index 8833e54f7..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/1762652579.777418", - "retrieved_timestamp": "1762652579.777418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8151283040111349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241273021389002 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37892708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3735871010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json deleted file mode 100644 index a9517c725..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/1762652579.777617", - "retrieved_timestamp": "1762652579.777618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7852531283660686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207086605445487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18504531722054382 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38522273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json deleted file mode 100644 index 4bc512fcf..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/1762652579.7778199", - "retrieved_timestamp": "1762652579.7778208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8078343240379969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5313965802672672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json deleted file mode 100644 index 780a017f5..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/1762652579.7780669", - "retrieved_timestamp": "1762652579.778068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2741004977903075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3284363786988483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1377992021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json deleted file mode 100644 index 38fa7ed6c..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/1762652579.778271", - "retrieved_timestamp": "1762652579.778271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24312601674667658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3111956727868642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json deleted file mode 100644 index 0a076f8d5..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Dolto_0.1/1762652579.778476", - "retrieved_timestamp": "1762652579.778477", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Dolto_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Dolto_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433782364127182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3350056502150862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13638630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json deleted file mode 100644 index b42c76625..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/1762652579.778893", - "retrieved_timestamp": "1762652579.7788942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24954564998648032 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3044651612138552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json deleted file mode 100644 index 8f34889c8..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1/1762652579.77868", - "retrieved_timestamp": "1762652579.77868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2552660274737696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3009715832098017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33936458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json deleted file mode 100644 index b2fa7805c..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/1762652579.779097", - "retrieved_timestamp": "1762652579.779098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339091388627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3279521771600605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31307291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16747007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json deleted file mode 100644 index a07595749..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OrcaSun_V1/1762652579.779477", - "retrieved_timestamp": "1762652579.779478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5948605256275571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.355031362479927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19040890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json deleted file mode 100644 index cbd4632c2..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/1762652579.779787", - "retrieved_timestamp": "1762652579.779788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5542693386880144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34277067367168224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15633311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json deleted file mode 100644 index 72739f803..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_SunOrca_V1/1762652579.7800052", - "retrieved_timestamp": "1762652579.780006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_SunOrca_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_SunOrca_V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542953807009845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34306447662530104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18841422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json deleted file mode 100644 index 4bac83dff..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Sydonia_0.1/1762652579.780214", - "retrieved_timestamp": "1762652579.780215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21967047434141412 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31210928710549807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22818791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12242353723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json deleted file mode 100644 index 659715fe8..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Syneridol_0.2/1762652579.780447", - "retrieved_timestamp": "1762652579.780447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21574865800520399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3138849872298115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33428125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12267287234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json deleted file mode 100644 index 189f5e7cd..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.1/1762652579.780673", - "retrieved_timestamp": "1762652579.780674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17638089158987041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31619439082949846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34609375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12308843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json deleted file mode 100644 index e7d6c471a..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.11/1762652579.780885", - "retrieved_timestamp": "1762652579.780886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28421698870109086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31019707628668325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json deleted file mode 100644 index 1d0712d0f..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v1/1762652579.781107", - "retrieved_timestamp": "1762652579.781108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_3b_Kermes_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4851759996808468 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409910297279671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40702083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2547373670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json deleted file mode 100644 index 2c6f34852..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2.1/1762652579.781543", - "retrieved_timestamp": "1762652579.781544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5583906257618674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44638999626044323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26919880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json deleted file mode 100644 index 4f9e1eaf4..000000000 --- a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2/1762652579.781325", - "retrieved_timestamp": "1762652579.781326", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_3b_Kermes_v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5753766672429155 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44554539692939316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2734375 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json b/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json deleted file mode 100644 index cfd4713db..000000000 --- a/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-1B-0929/1762652579.788707", - "retrieved_timestamp": "1762652579.7887082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NotASI/FineTome-Llama3.2-1B-0929", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-Llama3.2-1B-0929", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39907223943580805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3246274874705644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1428690159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json b/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json deleted file mode 100644 index 9a78f6eb0..000000000 --- a/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-3B-1002/1762652579.788946", - "retrieved_timestamp": "1762652579.7889469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NotASI/FineTome-Llama3.2-3B-1002", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-Llama3.2-3B-1002", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5474496558021605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4319470614025341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24368351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json b/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json deleted file mode 100644 index 5d545a2db..000000000 --- a/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-1B-1007/1762652579.789186", - "retrieved_timestamp": "1762652579.789187", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39237777984636324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32405671121485663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1427027925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json b/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json deleted file mode 100644 index 1e4ae93c3..000000000 --- a/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-3B-1007/1762652579.789401", - "retrieved_timestamp": "1762652579.789401", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5507719517546776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312372935321582 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2448470744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json deleted file mode 100644 index e6fc74f16..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Llama-3-8B/1762652579.78989", - "retrieved_timestamp": "1762652579.789891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Hermes-2-Pro-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361839918084017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507112624310082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30518617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json deleted file mode 100644 index 0bef36793..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Theta-Llama-3-8B/1762652579.79036", - "retrieved_timestamp": "1762652579.79036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Hermes-2-Theta-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-2-Theta-Llama-3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517883659800441 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206672260911865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33685172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json deleted file mode 100644 index 277207797..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-70B/1762652579.7905731", - "retrieved_timestamp": "1762652579.7905731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Hermes-3-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-3-Llama-3.1-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7661438316998896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6755780641387483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4948958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47265625 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json deleted file mode 100644 index 4b2ae4744..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-8B/1762652579.790786", - "retrieved_timestamp": "1762652579.790787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Hermes-3-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-3-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170172918966121 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177452540141246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json deleted file mode 100644 index 92bf3dd89..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.2-3B/1762652579.790994", - "retrieved_timestamp": "1762652579.790995", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Hermes-3-Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-3-Llama-3.2-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3824862476008103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43519901506714875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25440492021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json b/data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json deleted file mode 100644 index 0dba6ee42..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-llama-2-7b/1762652579.792065", - "retrieved_timestamp": "1762652579.792066", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-llama-2-7b", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-llama-2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17290788441335658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3823937686034717 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42571875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19398271276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json deleted file mode 100644 index 56562effc..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-13b-128k/1762652579.792277", - "retrieved_timestamp": "1762652579.792278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Llama-2-13b-128k", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Llama-2-13b-128k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16546430138698653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3826816443733663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23204787234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json deleted file mode 100644 index c5cd31d53..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-128k/1762652579.792481", - "retrieved_timestamp": "1762652579.7924821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Llama-2-7b-128k", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Llama-2-7b-128k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14847825990593846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32480295375597734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39669791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1791057180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json deleted file mode 100644 index 592c8bf80..000000000 --- a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-64k/1762652579.7927492", - "retrieved_timestamp": "1762652579.792753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Llama-2-7b-64k", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Llama-2-7b-64k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1699856381068897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3326277865253592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17985372340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json b/data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json deleted file mode 100644 index 4394b6016..000000000 --- a/data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OEvortex_Emotional-llama-8B/1762652579.797152", - "retrieved_timestamp": "1762652579.797153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OEvortex/Emotional-llama-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "OEvortex/Emotional-llama-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516369898535885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4838573702054177 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.365875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347406914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json deleted file mode 100644 index e3786cd2a..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/1762652579.8002949", - "retrieved_timestamp": "1762652579.8002958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7010476646409305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6507443429944494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45796875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4832114361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json deleted file mode 100644 index 0a42bb9ec..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/1762652579.800596", - "retrieved_timestamp": "1762652579.800596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5569666263292509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47875007373484046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json deleted file mode 100644 index af619462f..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/1762652579.800807", - "retrieved_timestamp": "1762652579.800808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6191904147661538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4856219845879779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.377875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json deleted file mode 100644 index 2d381b224..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/1762652579.801551", - "retrieved_timestamp": "1762652579.801553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332710541363582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6698491606025763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46295833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json deleted file mode 100644 index 44315a288..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/1762652579.801888", - "retrieved_timestamp": "1762652579.801889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6657269378582162 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006515954024578 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310339095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json deleted file mode 100644 index 92ed1b2aa..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/1762652579.8021362", - "retrieved_timestamp": "1762652579.802138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5997065563815123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065914870348772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3277094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json deleted file mode 100644 index fe3c543c0..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/1762652579.802413", - "retrieved_timestamp": "1762652579.8024142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3590052172679601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3266563226631131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1840093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json deleted file mode 100644 index 735541036..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/1762652579.802651", - "retrieved_timestamp": "1762652579.802652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4319450169993395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072660342069299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2479222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json deleted file mode 100644 index 5153b9d5f..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/1762652579.802965", - "retrieved_timestamp": "1762652579.8029802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.812080834408259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858038620320306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43456375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4869270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327460106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json deleted file mode 100644 index f25f827d1..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/1762652579.804163", - "retrieved_timestamp": "1762652579.8041642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.630880508162786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.601319898776811 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42404166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4673371010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json deleted file mode 100644 index c45b9a414..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/1762652579.8044102", - "retrieved_timestamp": "1762652579.804411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131453432448126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6080855261046028 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4794714095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json deleted file mode 100644 index 00fc5e07d..000000000 --- a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/1762652579.804652", - "retrieved_timestamp": "1762652579.8046532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.615 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5672582082208539 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509381466888461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43632291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.394780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json b/data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json deleted file mode 100644 index d001c40bf..000000000 --- a/data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenLeecher_llama3-8b-lima/1762652579.807648", - "retrieved_timestamp": "1762652579.8076491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenLeecher/llama3-8b-lima", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenLeecher/llama3-8b-lima", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706587410293574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4295828632822993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37127083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26263297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json b/data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json deleted file mode 100644 index 03926b5aa..000000000 --- a/data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/OpenScholar_Llama-3.1_OpenScholar-8B/1762652579.807913", - "retrieved_timestamp": "1762652579.807913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "OpenScholar/Llama-3.1_OpenScholar-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenScholar/Llama-3.1_OpenScholar-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064010159709571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207740834450674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4275104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json b/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json deleted file mode 100644 index 4df946123..000000000 --- a/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/1762652579.808416", - "retrieved_timestamp": "1762652579.808417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7791581891603169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084008018783934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3780751329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json b/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json deleted file mode 100644 index 2ad57994d..000000000 --- a/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/1762652579.8081658", - "retrieved_timestamp": "1762652579.808167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", - "developer": "meta", - "inference_platform": "unknown", - "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7776843220432896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057261652642643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37898936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json b/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json deleted file mode 100644 index abcf243b3..000000000 --- a/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/1762652579.8100638", - "retrieved_timestamp": "1762652579.8100648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073267838961463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json b/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json deleted file mode 100644 index 21087884c..000000000 --- a/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/1762652579.810312", - "retrieved_timestamp": "1762652579.810313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073267838961463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json b/data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json deleted file mode 100644 index d94ae571b..000000000 --- a/data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PJMixers_LLaMa-3-CursedStock-v2.0-8B/1762652579.809348", - "retrieved_timestamp": "1762652579.809348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6330791189599152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527115950402997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38562500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3556349734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json b/data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json deleted file mode 100644 index f56fcf11a..000000000 --- a/data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/RLHFlow_ArmoRM-Llama3-8B-v0.1/1762652579.8493571", - "retrieved_timestamp": "1762652579.8493571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "RLHFlow/ArmoRM-Llama3-8B-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "RLHFlow/ArmoRM-Llama3-8B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForRewardModelWithGating", - "params_billions": 7.511 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18967007539993883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876467446788138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10779587765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json b/data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json deleted file mode 100644 index 5e2a0f45d..000000000 --- a/data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Llama3-8B/1762652579.851821", - "retrieved_timestamp": "1762652579.851821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-Coder-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-Coder-Llama3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4729362535849324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271277102526684 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26090604026845643 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39530208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13306183510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json b/data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json deleted file mode 100644 index 4ddc386ad..000000000 --- a/data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/1762652579.8529909", - "retrieved_timestamp": "1762652579.852992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514966954347797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339203611594218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json b/data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json deleted file mode 100644 index 775abb440..000000000 --- a/data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SaisExperiments_RightSheep-Llama3.2-3B/1762652579.8563251", - "retrieved_timestamp": "1762652579.8563259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SaisExperiments/RightSheep-Llama3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SaisExperiments/RightSheep-Llama3.2-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156338515139829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407794300783824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25398936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json b/data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json deleted file mode 100644 index c1e8679e1..000000000 --- a/data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Llama3.2-3B-Uranus-1/1762652579.8570151", - "retrieved_timestamp": "1762652579.857016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Llama3.2-3B-Uranus-1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Sakalti/Llama3.2-3B-Uranus-1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5335365718515761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44368258173181263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3668645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3094248670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json b/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json deleted file mode 100644 index d4f9ae247..000000000 --- a/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/1762652579.878995", - "retrieved_timestamp": "1762652579.878996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7847034756667863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5138053850165866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36943151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json b/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json deleted file mode 100644 index b7f1c4c06..000000000 --- a/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/1762652579.879248", - "retrieved_timestamp": "1762652579.879248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7456858912130924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142440064892148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40128125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35846077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json b/data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json deleted file mode 100644 index 6c1ac3cc5..000000000 --- a/data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Brainy_LLAMA/1762652579.880492", - "retrieved_timestamp": "1762652579.8804932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sicarius-Prototyping/Brainy_LLAMA", - "developer": "meta", - "inference_platform": "unknown", - "id": "Sicarius-Prototyping/Brainy_LLAMA", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204224790223274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5117131754488634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3848902925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json b/data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json deleted file mode 100644 index 4463c5961..000000000 --- a/data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_LLAMA_3B/1762652579.882116", - "retrieved_timestamp": "1762652579.882117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_LLAMA_3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_LLAMA_3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299485365496884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40905101627873225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2941323138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json b/data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json deleted file mode 100644 index acbeecaee..000000000 --- a/data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/1762652579.883067", - "retrieved_timestamp": "1762652579.883067", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", - "developer": "meta", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713203189758729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4717234028484832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41194791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464926861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json deleted file mode 100644 index ee5ce9caf..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/1762652579.8857", - "retrieved_timestamp": "1762652579.8857012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088388495224864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3777426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json deleted file mode 100644 index 02fcbeb1a..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/1762652579.88546", - "retrieved_timestamp": "1762652579.885461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088388495224864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3777426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json deleted file mode 100644 index 83cf41661..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/1762652579.8859022", - "retrieved_timestamp": "1762652579.8859022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3247084402718121 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166586087861201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12790890957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json deleted file mode 100644 index c3e0469f5..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/1762652579.8861618", - "retrieved_timestamp": "1762652579.886163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060156188911545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3471458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json deleted file mode 100644 index 66222ca61..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/1762652579.886383", - "retrieved_timestamp": "1762652579.886384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060156188911545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3471458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json deleted file mode 100644 index e444f2f3c..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/1762652579.8865862", - "retrieved_timestamp": "1762652579.886587", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42467652495378927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33968360414253995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json deleted file mode 100644 index 85f31b38d..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/1762652579.886793", - "retrieved_timestamp": "1762652579.886794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331121424487028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399628268031015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json deleted file mode 100644 index 9a02602e3..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/1762652579.887009", - "retrieved_timestamp": "1762652579.88701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331121424487028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399628268031015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json deleted file mode 100644 index 54e8f8412..000000000 --- a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/1762652579.887351", - "retrieved_timestamp": "1762652579.8873532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331121424487028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399628268031015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json b/data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json deleted file mode 100644 index da6f2e34c..000000000 --- a/data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Skywork_Skywork-o1-Open-Llama-3.1-8B/1762652579.8887959", - "retrieved_timestamp": "1762652579.888797", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Skywork/Skywork-o1-Open-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Skywork/Skywork-o1-Open-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3518364605912313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45159089701897237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31564583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20304188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json b/data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json deleted file mode 100644 index 2aa8e4a40..000000000 --- a/data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/1762652579.889379", - "retrieved_timestamp": "1762652579.88938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 18.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25466650709007654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32093808427144627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38894791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11851728723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json b/data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json deleted file mode 100644 index bdced2a64..000000000 --- a/data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Zeus/1762652579.900112", - "retrieved_timestamp": "1762652579.9001129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "T145/Llama-3.1-8B-Zeus", - "developer": "meta", - "inference_platform": "unknown", - "id": "T145/Llama-3.1-8B-Zeus", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35176110497923285 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671175348446849 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33158333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1332280585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json b/data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json deleted file mode 100644 index 580a4e3ce..000000000 --- a/data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tarek07_Progenitor-V1.1-LLaMa-70B/1762652579.911703", - "retrieved_timestamp": "1762652579.911703", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tarek07/Progenitor-V1.1-LLaMa-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Tarek07/Progenitor-V1.1-LLaMa-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6906064796960952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6971116049173388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45805369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47356250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465425531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json b/data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json deleted file mode 100644 index 3e0f89b7e..000000000 --- a/data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Tarek07_Thalassic-Alpha-LLaMa-70B/1762652579.9119601", - "retrieved_timestamp": "1762652579.911961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Tarek07/Thalassic-Alpha-LLaMa-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Tarek07/Thalassic-Alpha-LLaMa-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7003484088884161 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6940408286616311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4437919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4801979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json b/data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json deleted file mode 100644 index 388c8a6a1..000000000 --- a/data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B/1762652579.912878", - "retrieved_timestamp": "1762652579.912879", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TencentARC/LLaMA-Pro-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "TencentARC/LLaMA-Pro-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.357 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2277135777514772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3484197711435169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18110039893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json b/data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json deleted file mode 100644 index 5bfc40306..000000000 --- a/data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TheDrummer_Llama-3SOME-8B-v2/1762652579.914594", - "retrieved_timestamp": "1762652579.9145951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TheDrummer/Llama-3SOME-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "TheDrummer/Llama-3SOME-8B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508049752434651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5203347869042534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3832708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json b/data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json deleted file mode 100644 index c87cd6ad8..000000000 --- a/data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/1762652579.9195771", - "retrieved_timestamp": "1762652579.919578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", - "developer": "meta", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22766371006706648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3071188438267271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json b/data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json deleted file mode 100644 index 2c927b699..000000000 --- a/data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama_v1.1/1762652579.919856", - "retrieved_timestamp": "1762652579.9198568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TinyLlama/TinyLlama_v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama_v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20006139266036338 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30237018045076064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36996874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10488696808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json b/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json deleted file mode 100644 index 78e69c993..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1/1762652579.9221509", - "retrieved_timestamp": "1762652579.922152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3229353670483207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5117012556460311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930513595166163 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31100398936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json b/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json deleted file mode 100644 index c8797eef8..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/1762652579.922394", - "retrieved_timestamp": "1762652579.922395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13641360479084386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292845246551473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36209375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11037234042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json b/data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json deleted file mode 100644 index 63013d34b..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Llama-8B-Harmony/1762652579.9232068", - "retrieved_timestamp": "1762652579.9232068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DS-R1-Llama-8B-Harmony", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Llama-8B-Harmony", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35663262366077564 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41536451555729687 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27435172872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json b/data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json deleted file mode 100644 index 95d09ce6d..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Llama-Lit-8B/1762652579.923411", - "retrieved_timestamp": "1762652579.923412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/DSR1-Distill-Llama-Lit-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DSR1-Distill-Llama-Lit-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18852090231696345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4284056327107781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35196374622356497 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27975398936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json b/data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json deleted file mode 100644 index 03e98613d..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Dolphin3-Llama3.2-Smart/1762652579.924712", - "retrieved_timestamp": "1762652579.924713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Dolphin3-Llama3.2-Smart", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Dolphin3-Llama3.2-Smart", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413660199382084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397507554563096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21949800531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json b/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json deleted file mode 100644 index 9d3dbe643..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT-Summary/1762652579.925437", - "retrieved_timestamp": "1762652579.925438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Hermes-Llama-3.2-CoT-Summary", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Hermes-Llama-3.2-CoT-Summary", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48302836473889277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42003008354054533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29014295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json b/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json deleted file mode 100644 index 67ecec5cf..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT/1762652579.925184", - "retrieved_timestamp": "1762652579.925184", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Hermes-Llama-3.2-CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Hermes-Llama-3.2-CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4177571066991139 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4615751505493966 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2947140957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json b/data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json deleted file mode 100644 index 62cac7e1a..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-Allades-Lit-8b/1762652579.927552", - "retrieved_timestamp": "1762652579.927553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Llama3.1-Allades-Lit-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Llama3.1-Allades-Lit-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24612361866514182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41832977787362163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37083333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2724401595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json b/data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json deleted file mode 100644 index f2f1b83f3..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-cc-Lit-8b/1762652579.927792", - "retrieved_timestamp": "1762652579.9277928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Llama3.1-cc-Lit-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Llama3.1-cc-Lit-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2993047336622384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3847994561866892 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json b/data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json deleted file mode 100644 index f7006eb3d..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Porpoise-R1-Llama3.2-3b/1762652579.931781", - "retrieved_timestamp": "1762652579.931781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Porpoise-R1-Llama3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Porpoise-R1-Llama3.2-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4352174452674459 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38236758004585686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21168550531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json b/data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json deleted file mode 100644 index bfe671e6b..000000000 --- a/data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_RomboHermes3-R1-Llama3.2-3b/1762652579.9345112", - "retrieved_timestamp": "1762652579.9345121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/RomboHermes3-R1-Llama3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/RomboHermes3-R1-Llama3.2-3b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300728733094855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42639466274987187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957114361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json b/data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json deleted file mode 100644 index 85323b854..000000000 --- a/data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UKzExecution_LlamaExecutor-8B-3.0.5/1762652579.938387", - "retrieved_timestamp": "1762652579.938387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UKzExecution/LlamaExecutor-8B-3.0.5", - "developer": "meta", - "inference_platform": "unknown", - "id": "UKzExecution/LlamaExecutor-8B-3.0.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.740290207759855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006000507021341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3625332446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json deleted file mode 100644 index add63a7cb..000000000 --- a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B-r-v-0.1/1762652579.944067", - "retrieved_timestamp": "1762652579.9440682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VIRNECT/llama-3-Korean-8B-r-v-0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "VIRNECT/llama-3-Korean-8B-r-v-0.1", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49157125316382755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48061568139086264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36748958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259640957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json deleted file mode 100644 index 1b792043f..000000000 --- a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1762652579.943881", - "retrieved_timestamp": "1762652579.943882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VIRNECT/llama-3-Korean-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "VIRNECT/llama-3-Korean-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021376614050719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.491837579362695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3536402925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json deleted file mode 100644 index 7640ec7ce..000000000 --- a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1762652579.943627", - "retrieved_timestamp": "1762652579.943627", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VIRNECT/llama-3-Korean-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "VIRNECT/llama-3-Korean-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49082453083378397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36615624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3538896276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json deleted file mode 100644 index 621606e0b..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-Fireplace/1762652579.944278", - "retrieved_timestamp": "1762652579.944279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3-70B-Fireplace", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3-70B-Fireplace", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7773596280092377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.648899361888402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4448541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4892785904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json deleted file mode 100644 index 32f113411..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-ShiningValiant2/1762652579.9445372", - "retrieved_timestamp": "1762652579.944538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3-70B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3-70B-ShiningValiant2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6121712611426571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338341405069171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48977726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json deleted file mode 100644 index 97b1bd73e..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-70B-ShiningValiant2/1762652579.94475", - "retrieved_timestamp": "1762652579.944751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-70B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-70B-ShiningValiant2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5355346037402979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6738408402945882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5172872340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json deleted file mode 100644 index 4c7b9fa6d..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1762652579.945206", - "retrieved_timestamp": "1762652579.945206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Cobalt", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Cobalt", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7168346653545925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4910700749859321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3512395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36627327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json deleted file mode 100644 index d8e24f31a..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1762652579.9449751", - "retrieved_timestamp": "1762652579.9449759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Cobalt", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Cobalt", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496134700372789 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4946769968149292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json deleted file mode 100644 index 767d3296f..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Enigma/1762652579.945396", - "retrieved_timestamp": "1762652579.945397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Enigma", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Enigma", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26805542626896633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44776000880153927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34092420212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json deleted file mode 100644 index 3b0c93f31..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Esper2/1762652579.945612", - "retrieved_timestamp": "1762652579.9456131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Esper2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Esper2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2567398945907968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4469866863000255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3560729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29039228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json deleted file mode 100644 index a07a62c29..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1762652579.945827", - "retrieved_timestamp": "1762652579.945827", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Fireplace2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Fireplace2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483240025354947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4609817052543379 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24069148936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json deleted file mode 100644 index 57d3ddd70..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1762652579.946038", - "retrieved_timestamp": "1762652579.946039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Fireplace2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Fireplace2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5328118281714739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4613311485871581 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24235372340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json deleted file mode 100644 index fe841b8b1..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1762652579.946223", - "retrieved_timestamp": "1762652579.9462242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6495653754260917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.477390600131639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39086458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818151595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json deleted file mode 100644 index 42f84ff24..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1762652579.946434", - "retrieved_timestamp": "1762652579.946435", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26780608784691284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4429290017852748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39591666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json deleted file mode 100644 index abcc149d4..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Enigma/1762652579.94662", - "retrieved_timestamp": "1762652579.946621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.2-3B-Enigma", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.2-3B-Enigma", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2786218345102107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722590772046992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2427692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json deleted file mode 100644 index 8978218f4..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Esper2/1762652579.947128", - "retrieved_timestamp": "1762652579.9471302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.2-3B-Esper2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.2-3B-Esper2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27497484452364174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38082611390366106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22573138297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json deleted file mode 100644 index 836fdcc53..000000000 --- a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-ShiningValiant2/1762652579.947389", - "retrieved_timestamp": "1762652579.9473898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ValiantLabs/Llama3.2-3B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.2-3B-ShiningValiant2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625101397624968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42259325337870185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38664583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28291223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json deleted file mode 100644 index ef3a5693e..000000000 --- a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-Llama3-8B/1762652579.9489238", - "retrieved_timestamp": "1762652579.948925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Einstein-v6.1-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v6.1-Llama3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4568245588372186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008295581095018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3130817819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json deleted file mode 100644 index 786b26f24..000000000 --- a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/1762652579.9492018", - "retrieved_timestamp": "1762652579.949203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39270247388041507 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043837450549643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43324999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30925864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json deleted file mode 100644 index 27ceae704..000000000 --- a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v8-Llama3.2-1B/1762652579.9499211", - "retrieved_timestamp": "1762652579.949922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Weyaxi/Einstein-v8-Llama3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v8-Llama3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18622255615101263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30184334823943154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json b/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json deleted file mode 100644 index bbeba984f..000000000 --- a/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/1762652579.952687", - "retrieved_timestamp": "1762652579.9526882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317878783849076 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527816493941946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33164583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1745345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json b/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json deleted file mode 100644 index 366a300b0..000000000 --- a/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/1762652579.952322", - "retrieved_timestamp": "1762652579.952322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", - "developer": "meta", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5575916346405316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35437497890840614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31297916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17627992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json b/data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json deleted file mode 100644 index 30f4eca5d..000000000 --- a/data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xkev_Llama-3.2V-11B-cot/1762652579.9552681", - "retrieved_timestamp": "1762652579.955269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xkev/Llama-3.2V-11B-cot", - "developer": "meta", - "inference_platform": "unknown", - "id": "Xkev/Llama-3.2V-11B-cot", - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41580894249480266 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.495871783411897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4158541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35871010638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json b/data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json deleted file mode 100644 index 354de378d..000000000 --- a/data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-IgneousIguana-8B/1762652579.965119", - "retrieved_timestamp": "1762652579.965119", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Yuma42/Llama3.1-IgneousIguana-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Yuma42/Llama3.1-IgneousIguana-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8133297428600558 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5190512670457804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42026041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39735704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json b/data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json deleted file mode 100644 index 20aeca710..000000000 --- a/data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-SuperHawk-8B/1762652579.965369", - "retrieved_timestamp": "1762652579.9653702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Yuma42/Llama3.1-SuperHawk-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Yuma42/Llama3.1-SuperHawk-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7986420475449585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199931545260023 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40835416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39453125 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json deleted file mode 100644 index 17befe33a..000000000 --- a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/1762652579.966579", - "retrieved_timestamp": "1762652579.96658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.63008151704145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163423288466883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json deleted file mode 100644 index c1f8175eb..000000000 --- a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/1762652579.96684", - "retrieved_timestamp": "1762652579.966841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49734149833552754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154785280029148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39470833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363031914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json deleted file mode 100644 index 99abf7903..000000000 --- a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/1762652579.967058", - "retrieved_timestamp": "1762652579.967059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7402403400754443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5438928349489152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38739583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3621176861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json deleted file mode 100644 index 1a629d89a..000000000 --- a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1762652579.967272", - "retrieved_timestamp": "1762652579.967272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7338705745200512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244464882599044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4065833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37450132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json deleted file mode 100644 index 0143352eb..000000000 --- a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/1762652579.96749", - "retrieved_timestamp": "1762652579.9674911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4164583305629064 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5078595074869328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706249999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json b/data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json deleted file mode 100644 index 61b59b16f..000000000 --- a/data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aaditya_Llama3-OpenBioLLM-70B/1762652579.969287", - "retrieved_timestamp": "1762652579.9692879", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aaditya/Llama3-OpenBioLLM-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "aaditya/Llama3-OpenBioLLM-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7596743307756753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6398872375485518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44171875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4867021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json b/data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json deleted file mode 100644 index 100428e8e..000000000 --- a/data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abacusai_Llama-3-Smaug-8B/1762652579.9700851", - "retrieved_timestamp": "1762652579.9700859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abacusai/Llama-3-Smaug-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "abacusai/Llama-3-Smaug-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48667535472546175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930712769667174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36224999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3184840425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json deleted file mode 100644 index b91f6b6b0..000000000 --- a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v1/1762652579.973002", - "retrieved_timestamp": "1762652579.973003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abhishek/autotrain-llama3-70b-orpo-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "abhishek/autotrain-llama3-70b-orpo-v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4233023932055834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5997985900252331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35790625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json deleted file mode 100644 index 7590e31ba..000000000 --- a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v2/1762652579.9732742", - "retrieved_timestamp": "1762652579.973275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abhishek/autotrain-llama3-70b-orpo-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "abhishek/autotrain-llama3-70b-orpo-v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406055931594835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5899473641612185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48179853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json deleted file mode 100644 index 513d68201..000000000 --- a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-orpo-v2/1762652579.9735", - "retrieved_timestamp": "1762652579.973501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abhishek/autotrain-llama3-orpo-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "abhishek/autotrain-llama3-orpo-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371656094717572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31593828880846425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22182513297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json deleted file mode 100644 index 091523dc4..000000000 --- a/data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-8B-drill/1762652579.976306", - "retrieved_timestamp": "1762652579.976307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama3.1-8B-drill", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-8B-drill", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.765169749597734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015680021795333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json deleted file mode 100644 index 8203d8c2c..000000000 --- a/data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish/1762652579.976594", - "retrieved_timestamp": "1762652579.976595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama3.1-Daredevilish", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-Daredevilish", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6291573026237051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5012506630648397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json deleted file mode 100644 index 8bdc023bc..000000000 --- a/data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-LexiHermes-SuperStorm/1762652579.97705", - "retrieved_timestamp": "1762652579.9770508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama3.1-LexiHermes-SuperStorm", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-LexiHermes-SuperStorm", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834545672149895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5266460888159817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3962604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json deleted file mode 100644 index bc0827d1b..000000000 --- a/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/1762652579.977621", - "retrieved_timestamp": "1762652579.977621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.718732961874493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215513828266275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json deleted file mode 100644 index e8d54c40b..000000000 --- a/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse/1762652579.977348", - "retrieved_timestamp": "1762652579.97735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "agentlans/Llama3.1-SuperDeepFuse", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-SuperDeepFuse", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7761605872418517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048544889908054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.369875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json b/data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json deleted file mode 100644 index 43d78f629..000000000 --- a/data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/1762652579.97824", - "retrieved_timestamp": "1762652579.978241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.195 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17480728910402177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2883257760266153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json deleted file mode 100644 index b605eda86..000000000 --- a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-First/1762652579.979876", - "retrieved_timestamp": "1762652579.979876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.01-First", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.01-First", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08135857303066973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31891926453372005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3193958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json deleted file mode 100644 index 1d84b1d15..000000000 --- a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-Last/1762652579.980133", - "retrieved_timestamp": "1762652579.9801338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.01-Last", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.01-Last", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09165015492227291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159283874883156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3206354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12267287234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json deleted file mode 100644 index cbc2c0367..000000000 --- a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-First/1762652579.9803479", - "retrieved_timestamp": "1762652579.9803488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.1-First", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.1-First", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10009330797838623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119615016336897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json deleted file mode 100644 index 3bb947208..000000000 --- a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-Last/1762652579.980555", - "retrieved_timestamp": "1762652579.980556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.1-Last", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.1-Last", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09497245087479 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3163776768490709 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11776928191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json deleted file mode 100644 index ec2f6319c..000000000 --- a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.BaseFiT/1762652579.980761", - "retrieved_timestamp": "1762652579.980762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.BaseFiT", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.BaseFiT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08827799128534511 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31745151457535453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1171875 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json b/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json deleted file mode 100644 index bb2526b04..000000000 --- a/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1762652579.981211", - "retrieved_timestamp": "1762652579.981212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akjindal53244/Llama-3.1-Storm-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "akjindal53244/Llama-3.1-Storm-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188671226840744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646524 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json b/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json deleted file mode 100644 index a8c4db233..000000000 --- a/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1762652579.980961", - "retrieved_timestamp": "1762652579.980962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "akjindal53244/Llama-3.1-Storm-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "akjindal53244/Llama-3.1-Storm-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.803263119633683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5196330402870707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json b/data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json deleted file mode 100644 index 4e1af7370..000000000 --- a/data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/alcholjung_llama3_medical_tuned/1762652579.9813929", - "retrieved_timestamp": "1762652579.9813938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "alcholjung/llama3_medical_tuned", - "developer": "meta", - "inference_platform": "unknown", - "id": "alcholjung/llama3_medical_tuned", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010566408241244343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4512943191660926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46602083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29463098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json deleted file mode 100644 index 15a8b6cdd..000000000 --- a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1762652579.981659", - "retrieved_timestamp": "1762652579.981659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8291167435737177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163626496199947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4501510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4948333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46451130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json deleted file mode 100644 index 56ba02b0b..000000000 --- a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1762652579.981919", - "retrieved_timestamp": "1762652579.981919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8379344583482937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6156847169556112 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49880208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4655917553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json deleted file mode 100644 index 1a61a4a09..000000000 --- a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-RM/1762652579.9831831", - "retrieved_timestamp": "1762652579.9831831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B-RM", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B-RM", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForSequenceClassification", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16701352411601217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2950041147470504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10821143617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json deleted file mode 100644 index 6aa3e76a7..000000000 --- a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1762652579.9825459", - "retrieved_timestamp": "1762652579.982547", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8266687943545348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4049833102731906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2826628989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json deleted file mode 100644 index 10b9cad8e..000000000 --- a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1762652579.982752", - "retrieved_timestamp": "1762652579.982752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8254697535871487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40608256120952024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json b/data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json deleted file mode 100644 index 4e2ea00ba..000000000 --- a/data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Llama3.1-60B/1762652579.989347", - "retrieved_timestamp": "1762652579.9893482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Llama3.1-60B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/Llama3.1-60B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 61.997 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18145188100905596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32417552719382076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3595833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310339095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json b/data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json deleted file mode 100644 index 322aa4cb3..000000000 --- a/data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yillama-40B/1762652580.004728", - "retrieved_timestamp": "1762652580.004729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Yillama-40B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/Yillama-40B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16968643200042555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40628855371888356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1981382978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json b/data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json deleted file mode 100644 index b4ca5a2d7..000000000 --- a/data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_llama3-Jallabi-40B-s/1762652580.006197", - "retrieved_timestamp": "1762652580.006198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/llama3-Jallabi-40B-s", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/llama3-Jallabi-40B-s", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 18.769 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19206815693471102 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32522424198526295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10879321808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json b/data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json deleted file mode 100644 index e9e79529c..000000000 --- a/data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_llama3AnFeng-40B/1762652580.006448", - "retrieved_timestamp": "1762652580.006449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/llama3AnFeng-40B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/llama3AnFeng-40B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 39.971 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17420776872032873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794080447660335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39399999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1979720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json b/data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json deleted file mode 100644 index 7d275c917..000000000 --- a/data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aloobun_Meta-Llama-3-7B-28Layers/1762652580.0090299", - "retrieved_timestamp": "1762652580.0090308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aloobun/Meta-Llama-3-7B-28Layers", - "developer": "meta", - "inference_platform": "unknown", - "id": "aloobun/Meta-Llama-3-7B-28Layers", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.158 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19636453498938372 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4437497014253391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35892708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json b/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json deleted file mode 100644 index af4d73c18..000000000 --- a/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1762652580.010782", - "retrieved_timestamp": "1762652580.010783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "amd/AMD-Llama-135m", - "developer": "meta", - "inference_platform": "unknown", - "id": "amd/AMD-Llama-135m", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.134 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184319826948054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29694449748780255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38457291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json b/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json deleted file mode 100644 index cf94961f7..000000000 --- a/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1762652580.010537", - "retrieved_timestamp": "1762652580.010538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "amd/AMD-Llama-135m", - "developer": "meta", - "inference_platform": "unknown", - "id": "amd/AMD-Llama-135m", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18422452426229072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2973931917569524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json b/data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json deleted file mode 100644 index 59fe479cf..000000000 --- a/data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-3.1-SuperNova-Lite/1762652580.016114", - "retrieved_timestamp": "1762652580.016115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Llama-3.1-SuperNova-Lite", - "developer": "meta", - "inference_platform": "unknown", - "id": "arcee-ai/Llama-3.1-SuperNova-Lite", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8017393848322452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151992115104819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41632291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json b/data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json deleted file mode 100644 index 4aa85b1a4..000000000 --- a/data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-Spark/1762652580.0163891", - "retrieved_timestamp": "1762652580.0163898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "arcee-ai/Llama-Spark", - "developer": "meta", - "inference_platform": "unknown", - "id": "arcee-ai/Llama-Spark", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7910732412221794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5053504145749979 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json b/data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json deleted file mode 100644 index aab6642af..000000000 --- a/data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/1762652580.018188", - "retrieved_timestamp": "1762652580.018189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", - "developer": "meta", - "inference_platform": "unknown", - "id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5756514935925566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46196134634468616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json b/data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json deleted file mode 100644 index d10396f8c..000000000 --- a/data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/1762652580.01895", - "retrieved_timestamp": "1762652580.018951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", - "developer": "meta", - "inference_platform": "unknown", - "id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34639090945358314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4668707690948544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.281000664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json b/data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json deleted file mode 100644 index e7f28b28b..000000000 --- a/data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m1a-llama/1762652580.0322502", - "retrieved_timestamp": "1762652580.032251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bfuzzy1/acheron-m1a-llama", - "developer": "meta", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-m1a-llama", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11245827737070972 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29560475093811295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36330208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json b/data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json deleted file mode 100644 index 98113095f..000000000 --- a/data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bosonai_Higgs-Llama-3-70B/1762652580.035682", - "retrieved_timestamp": "1762652580.035682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bosonai/Higgs-Llama-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bosonai/Higgs-Llama-3-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5560678998390935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.625765879603832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25226586102719034 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44708333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49019281914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json b/data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json deleted file mode 100644 index 39a1dc0b3..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Best-Mix-Llama-3.1-8B/1762652580.0419252", - "retrieved_timestamp": "1762652580.041926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Best-Mix-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Best-Mix-Llama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20670598456539757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.343178100574048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2928541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json b/data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json deleted file mode 100644 index 10a40277c..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_HyperLlama-3.1-8B/1762652580.045207", - "retrieved_timestamp": "1762652580.045208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/HyperLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/HyperLlama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7883005979689446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103385292046213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38292708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783244680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json deleted file mode 100644 index 8c7ad7407..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-Mix/1762652580.045413", - "retrieved_timestamp": "1762652580.045414", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924954675815725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5755964197928182 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json deleted file mode 100644 index 875f3d501..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-v3/1762652580.045624", - "retrieved_timestamp": "1762652580.045625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.1-8B-TitanFusion-v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.1-8B-TitanFusion-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4809549772381725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262113071794826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1419939577039275 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38056848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json deleted file mode 100644 index 1f74ae7a5..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-All-Mix/1762652580.045842", - "retrieved_timestamp": "1762652580.045843", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-All-Mix", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-All-Mix", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226049105262924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45083384652782293 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32869791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json deleted file mode 100644 index 4c29f1e90..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Bespoke-Thought/1762652580.046056", - "retrieved_timestamp": "1762652580.046057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Bespoke-Thought", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Bespoke-Thought", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4112621178473118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45217398665008424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33025 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31100398936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json deleted file mode 100644 index c145bd429..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Booval/1762652580.046278", - "retrieved_timestamp": "1762652580.046279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Booval", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Booval", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6669259786256023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45143904014934083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3394270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30576795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json deleted file mode 100644 index 16ac4a933..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1762652580.046704", - "retrieved_timestamp": "1762652580.046706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Deep-Test", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Deep-Test", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46516797652451053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4530851376077318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3152426861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json deleted file mode 100644 index 0e9345182..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1762652580.046481", - "retrieved_timestamp": "1762652580.046481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Deep-Test", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Deep-Test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.803 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17753006467284582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29502574011260374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10488696808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json deleted file mode 100644 index a1d9a5385..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Della/1762652580.0469692", - "retrieved_timestamp": "1762652580.0469701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Della", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Della", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35608297096149333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36834936417932634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21284906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json deleted file mode 100644 index 05f1765d7..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Long-Think/1762652580.047193", - "retrieved_timestamp": "1762652580.047194", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Long-Think", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Long-Think", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473499204333391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610394542442049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30477061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json deleted file mode 100644 index aa36da534..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Mix-Skill/1762652580.047411", - "retrieved_timestamp": "1762652580.047412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Mix-Skill", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Mix-Skill", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6404229666174639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45818358891543803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33961458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3120844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json deleted file mode 100644 index ec77eb772..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlus/1762652580.047628", - "retrieved_timestamp": "1762652580.047629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-ProdigyPlus", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-ProdigyPlus", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40152018865499095 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4392279045834126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35800000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28174867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json deleted file mode 100644 index 24ccef7d7..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/1762652580.047838", - "retrieved_timestamp": "1762652580.047839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1645157072124186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689926047041594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json deleted file mode 100644 index 25c722487..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RP-DeepThink/1762652580.048058", - "retrieved_timestamp": "1762652580.048059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-RP-DeepThink", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-RP-DeepThink", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7143867161354096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45625632795830356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32421875 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json deleted file mode 100644 index 4f0c5d710..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RRStock/1762652580.048298", - "retrieved_timestamp": "1762652580.048298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-RRStock", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-RRStock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6657269378582162 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45676937648721455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32355385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json deleted file mode 100644 index 74bd06fb5..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ToxicKod/1762652580.0485172", - "retrieved_timestamp": "1762652580.048518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-ToxicKod", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-ToxicKod", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6319299458769398 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525429005077621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28798204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json deleted file mode 100644 index 4c46c2d81..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/1762652580.048726", - "retrieved_timestamp": "1762652580.048727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.683362367407368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46497242330684924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24018126888217523 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31058843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json b/data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json deleted file mode 100644 index 4e00cdf72..000000000 --- a/data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Smol-Llama-3.2-3B/1762652580.061756", - "retrieved_timestamp": "1762652580.0617611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Smol-Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Smol-Llama-3.2-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678501930433471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453881406940321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34600000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3228058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json b/data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json deleted file mode 100644 index c89933354..000000000 --- a/data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/chargoddard_prometheus-2-llama-3-8b/1762652580.100514", - "retrieved_timestamp": "1762652580.100516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "chargoddard/prometheus-2-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "chargoddard/prometheus-2-llama-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5288900118352637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4931144581470071 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33958333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30867686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json b/data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json deleted file mode 100644 index e6d48259d..000000000 --- a/data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3-70Bx2-MOE/1762652580.10177", - "retrieved_timestamp": "1762652580.101771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/Llama-3-70Bx2-MOE", - "developer": "meta", - "inference_platform": "unknown", - "id": "cloudyu/Llama-3-70Bx2-MOE", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 126.926 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5482486469234964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6636234572270707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48118750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142121010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json b/data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json deleted file mode 100644 index 0929c4f10..000000000 --- a/data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3.2-3Bx4/1762652580.102047", - "retrieved_timestamp": "1762652580.102048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/Llama-3.2-3Bx4", - "developer": "meta", - "inference_platform": "unknown", - "id": "cloudyu/Llama-3.2-3Bx4", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 9.949 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5068584688626179 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43321946547659324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3495625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json b/data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json deleted file mode 100644 index ccb1493f4..000000000 --- a/data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cloudyu_S1-Llama-3.2-3Bx4-MoE/1762652580.103262", - "retrieved_timestamp": "1762652580.103263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cloudyu/S1-Llama-3.2-3Bx4-MoE", - "developer": "meta", - "inference_platform": "unknown", - "id": "cloudyu/S1-Llama-3.2-3Bx4-MoE", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 9.555 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530214275899059 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43578925882973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30435505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json deleted file mode 100644 index f7e583b2f..000000000 --- a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/1762652580.109549", - "retrieved_timestamp": "1762652580.1095521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", - "developer": "meta", - "inference_platform": "unknown", - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1326668794354535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800219303191354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43321875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590591755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json deleted file mode 100644 index f98209630..000000000 --- a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/1762652580.110752", - "retrieved_timestamp": "1762652580.110753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", - "developer": "meta", - "inference_platform": "unknown", - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13184240038652995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37889016032903705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43055208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2562333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json deleted file mode 100644 index c9c7aeac9..000000000 --- a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/1762652580.1111748", - "retrieved_timestamp": "1762652580.111176", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", - "developer": "meta", - "inference_platform": "unknown", - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12085156274241235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3780811415223316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43185416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25556848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json b/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json deleted file mode 100644 index fab6cadab..000000000 --- a/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.1-8B/1762652580.111501", - "retrieved_timestamp": "1762652580.1115022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7621222799948582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4916366353921198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36534375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json b/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json deleted file mode 100644 index a80ee638a..000000000 --- a/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.2-1B/1762652580.112042", - "retrieved_timestamp": "1762652580.112046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427787160290252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31222474255909144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32488541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13754986702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json deleted file mode 100644 index c57955694..000000000 --- a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9-llama3-8b/1762652580.113044", - "retrieved_timestamp": "1762652580.113045", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9-llama3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9-llama3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38503393218881454 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49499220166609187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json deleted file mode 100644 index 23495fcf9..000000000 --- a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-llama-3-70b/1762652580.113282", - "retrieved_timestamp": "1762652580.1132832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3760167466765959 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204919312821467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49756249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41298204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json deleted file mode 100644 index 11b8a64d7..000000000 --- a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/1762652580.1160939", - "retrieved_timestamp": "1762652580.116095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27572396796056686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35236263850832567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3236145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json b/data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json deleted file mode 100644 index 0a7710266..000000000 --- a/data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/1762652580.116315", - "retrieved_timestamp": "1762652580.116315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.380887157187374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46480279544898967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json b/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json deleted file mode 100644 index d6e727927..000000000 --- a/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1762652580.117069", - "retrieved_timestamp": "1762652580.1170702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cpayne1303/llama-43m-beta", - "developer": "meta", - "inference_platform": "unknown", - "id": "cpayne1303/llama-43m-beta", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.043 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19156837191983936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29767781029884355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json b/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json deleted file mode 100644 index b0903a2fd..000000000 --- a/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1762652580.117342", - "retrieved_timestamp": "1762652580.117342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cpayne1303/llama-43m-beta", - "developer": "meta", - "inference_platform": "unknown", - "id": "cpayne1303/llama-43m-beta", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.043 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19489066787235645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29646319842669744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json b/data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json deleted file mode 100644 index 3a8b2af92..000000000 --- a/data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cstr_llama3.1-8b-spaetzle-v90/1762652580.117986", - "retrieved_timestamp": "1762652580.1179872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cstr/llama3.1-8b-spaetzle-v90", - "developer": "meta", - "inference_platform": "unknown", - "id": "cstr/llama3.1-8b-spaetzle-v90", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7356192679867197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302860633332208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41343749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37308843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json b/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json deleted file mode 100644 index 7b1f12f7c..000000000 --- a/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/1762652580.121449", - "retrieved_timestamp": "1762652580.12145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43359397509718656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5634962649702303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43421875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4748171542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json b/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json deleted file mode 100644 index 49e2e68dc..000000000 --- a/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/1762652580.121731", - "retrieved_timestamp": "1762652580.121734", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37823973723054827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323935108539057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32497916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20894281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json b/data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json deleted file mode 100644 index eff32b927..000000000 --- a/data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-70B-Orpo-v0.1/1762652580.124833", - "retrieved_timestamp": "1762652580.124834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dfurman/Llama-3-70B-Orpo-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "dfurman/Llama-3-70B-Orpo-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46552376347015506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4534375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38929521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json b/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json deleted file mode 100644 index 717000bc9..000000000 --- a/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1762652580.1253839", - "retrieved_timestamp": "1762652580.125385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dfurman/Llama-3-8B-Orpo-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "dfurman/Llama-3-8B-Orpo-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3000039894147528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3852967582460245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.041540785498489434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22805851063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json b/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json deleted file mode 100644 index aabe82f8d..000000000 --- a/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1762652580.125153", - "retrieved_timestamp": "1762652580.125154", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dfurman/Llama-3-8B-Orpo-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "dfurman/Llama-3-8B-Orpo-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28351773294857646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842420919898036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22980385638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json b/data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json deleted file mode 100644 index 7d94f21e4..000000000 --- a/data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama3.1-Large/1762652580.133179", - "retrieved_timestamp": "1762652580.1331809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "dnhkng/RYS-Llama3.1-Large", - "developer": "meta", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama3.1-Large", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 81.677 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8492001223420524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6899112229777242 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3504531722054381 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5248503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json b/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json deleted file mode 100644 index 0b024064a..000000000 --- a/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-1B-en-vi/1762652580.1364539", - "retrieved_timestamp": "1762652580.1364548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "duyhv1411/Llama-3.2-1B-en-vi", - "developer": "meta", - "inference_platform": "unknown", - "id": "duyhv1411/Llama-3.2-1B-en-vi", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788317220530415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.329090872737918 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13414228723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json b/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json deleted file mode 100644 index ca3eef12e..000000000 --- a/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-3B-en-vi/1762652580.136725", - "retrieved_timestamp": "1762652580.136726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "duyhv1411/Llama-3.2-3B-en-vi", - "developer": "meta", - "inference_platform": "unknown", - "id": "duyhv1411/Llama-3.2-3B-en-vi", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4852014876084345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271639320986486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13588763297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json b/data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json deleted file mode 100644 index d25a353c8..000000000 --- a/data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_HappyLlama1/1762652580.139553", - "retrieved_timestamp": "1762652580.139554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/HappyLlama1", - "developer": "meta", - "inference_platform": "unknown", - "id": "ehristoforu/HappyLlama1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7362686560548235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49957323097428485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42868749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35455452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json b/data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json deleted file mode 100644 index 900ce513b..000000000 --- a/data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-it/1762652580.143829", - "retrieved_timestamp": "1762652580.14383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/mllama-3.1-8b-it", - "developer": "meta", - "inference_platform": "unknown", - "id": "ehristoforu/mllama-3.1-8b-it", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38788193105404767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4868027039491969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37990936555891236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3348645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26221742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json b/data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json deleted file mode 100644 index c836f56ef..000000000 --- a/data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/flammenai_Llama3.1-Flammades-70B/1762652580.154665", - "retrieved_timestamp": "1762652580.154666", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "flammenai/Llama3.1-Flammades-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "flammenai/Llama3.1-Flammades-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7058438277104748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6659721866694542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48705208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47523271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json b/data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json deleted file mode 100644 index 4300c8b00..000000000 --- a/data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-llama3-8B/1762652580.154922", - "retrieved_timestamp": "1762652580.154923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "flammenai/Mahou-1.2a-llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.2a-llama3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50925655039739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093660540433169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38466666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38173204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json b/data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json deleted file mode 100644 index b9c743a16..000000000 --- a/data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-llama3.1-70B/1762652580.155493", - "retrieved_timestamp": "1762652580.155494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "flammenai/Mahou-1.5-llama3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.5-llama3.1-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7146615424850509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650860641288713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4950208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47490026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json b/data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json deleted file mode 100644 index bca85d4a8..000000000 --- a/data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B/1762652580.156513", - "retrieved_timestamp": "1762652580.156514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fluently-lm/Llama-TI-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "fluently-lm/Llama-TI-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28803906966847964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520085504155627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4102708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.343999335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json b/data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json deleted file mode 100644 index fd13e6623..000000000 --- a/data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fulim_FineLlama-3.1-8B/1762652580.162704", - "retrieved_timestamp": "1762652580.162705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fulim/FineLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "fulim/FineLlama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14388267574480157 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456920741562608 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31673869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json b/data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json deleted file mode 100644 index 85c9a10af..000000000 --- a/data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gbueno86_Brinebreath-Llama-3.1-70B/1762652580.1638331", - "retrieved_timestamp": "1762652580.163834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gbueno86/Brinebreath-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "gbueno86/Brinebreath-Llama-3.1-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5532952565858589 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6880562247706813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45406250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5196143617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json b/data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json deleted file mode 100644 index 0fba664ae..000000000 --- a/data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/1762652580.1641119", - "retrieved_timestamp": "1762652580.1641128", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8071849359698933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6674314931312052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43682291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074800531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json b/data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json deleted file mode 100644 index 8fade22df..000000000 --- a/data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/glaiveai_Reflection-Llama-3.1-70B/1762652580.164674", - "retrieved_timestamp": "1762652580.164675", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "glaiveai/Reflection-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "glaiveai/Reflection-Llama-3.1-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 69.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5990571683134085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5681010035620444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2756797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43803125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6341422872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json b/data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json deleted file mode 100644 index c7fa42162..000000000 --- a/data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Llama-11B-Test/1762652580.16498", - "retrieved_timestamp": "1762652580.164981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gmonsoon/SahabatAI-Llama-11B-Test", - "developer": "meta", - "inference_platform": "unknown", - "id": "gmonsoon/SahabatAI-Llama-11B-Test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.52 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33757319467900726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727584153058988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40013541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3182347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json b/data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json deleted file mode 100644 index b2fcacad3..000000000 --- a/data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/1762652580.181649", - "retrieved_timestamp": "1762652580.18165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4797060687863757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269400362212973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3956948138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json b/data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json deleted file mode 100644 index 78d90a504..000000000 --- a/data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/1762652580.182574", - "retrieved_timestamp": "1762652580.182574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961499931293413 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48863582396592203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38385416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json b/data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json deleted file mode 100644 index 02d5230aa..000000000 --- a/data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/1762652580.183682", - "retrieved_timestamp": "1762652580.1836832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4544519652300341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013477378974034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38090093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json b/data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json deleted file mode 100644 index f2e76eb40..000000000 --- a/data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/1762652580.183897", - "retrieved_timestamp": "1762652580.183897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43659157701565177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287189378780882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683510638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json b/data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json deleted file mode 100644 index dffcb7f71..000000000 --- a/data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/1762652580.186095", - "retrieved_timestamp": "1762652580.1860962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219462138237654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222077363554879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45268749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39910239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json deleted file mode 100644 index ea7160540..000000000 --- a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v1-8B/1762652580.186311", - "retrieved_timestamp": "1762652580.186312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4277239945566652 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131817939007638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37957114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json deleted file mode 100644 index dcee63e4f..000000000 --- a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2-8B/1762652580.186511", - "retrieved_timestamp": "1762652580.1865118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39222817679313116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048214936442625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3895 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json deleted file mode 100644 index e72f16786..000000000 --- a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2.1-8B/1762652580.186715", - "retrieved_timestamp": "1762652580.186715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v2.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v2.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38950540122430705 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5095042703104161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json deleted file mode 100644 index bb360c3f9..000000000 --- a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v3-8B/1762652580.186964", - "retrieved_timestamp": "1762652580.186965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173825449806513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5012671264428366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3612034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json b/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json deleted file mode 100644 index 6fbd05450..000000000 --- a/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp-8B/1762652580.1947231", - "retrieved_timestamp": "1762652580.194724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Llama-Hermes-slerp-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "hotmailuser/Llama-Hermes-slerp-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390470617960345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5310290010444968 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33311170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json b/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json deleted file mode 100644 index 745f63a68..000000000 --- a/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp2-8B/1762652580.194975", - "retrieved_timestamp": "1762652580.194976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Llama-Hermes-slerp2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "hotmailuser/Llama-Hermes-slerp2-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3728440537773109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265283171967207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42481250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33793218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json b/data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json deleted file mode 100644 index 05a848623..000000000 --- a/data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_LlamaStock-8B/1762652580.19518", - "retrieved_timestamp": "1762652580.19518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/LlamaStock-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "hotmailuser/LlamaStock-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4249513513034304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5328942883826541 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41293749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json b/data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json deleted file mode 100644 index 7a2bcb5cd..000000000 --- a/data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huggyllama_llama-13b/1762652580.199647", - "retrieved_timestamp": "1762652580.199648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huggyllama/llama-13b", - "developer": "meta", - "inference_platform": "unknown", - "id": "huggyllama/llama-13b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24105262924595627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39878925581174585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19522938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json b/data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json deleted file mode 100644 index 431a87e2e..000000000 --- a/data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huggyllama_llama-65b/1762652580.1999428", - "retrieved_timestamp": "1762652580.199944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huggyllama/llama-65b", - "developer": "meta", - "inference_platform": "unknown", - "id": "huggyllama/llama-65b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 65.286 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25259311958935626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702556052882764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35945833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3077626329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json b/data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json deleted file mode 100644 index 8bf40d29a..000000000 --- a/data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/huggyllama_llama-7b/1762652580.200164", - "retrieved_timestamp": "1762652580.200165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "huggyllama/llama-7b", - "developer": "meta", - "inference_platform": "unknown", - "id": "huggyllama/llama-7b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25009530268576263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32773134782898566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33539583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13131648936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json b/data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json deleted file mode 100644 index 9f9bedfdf..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama31_8B_en_emo_v4/1762652580.202469", - "retrieved_timestamp": "1762652580.202469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama31_8B_en_emo_v4", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama31_8B_en_emo_v4", - "additional_details": { - "precision": "float16", - "architecture": "", - "params_billions": 4.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042504997850149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49155384618761383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3048537234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json b/data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json deleted file mode 100644 index 5ad6ede70..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_1B_en_emo_v1/1762652580.2027268", - "retrieved_timestamp": "1762652580.2027268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_1B_en_emo_v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_1B_en_emo_v1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.765 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44083808738591385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33802631394113886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34888541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17611369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json deleted file mode 100644 index 8077343c4..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_1000_stp/1762652580.202935", - "retrieved_timestamp": "1762652580.2029362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_1000_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_1000_stp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7295243287809678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45218477635502685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3620625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123337765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json deleted file mode 100644 index 76ed90686..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_2000_stp/1762652580.203131", - "retrieved_timestamp": "1762652580.203132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_2000_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_2000_stp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7368681764385165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345889848516396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35269791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json deleted file mode 100644 index 693dad624..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_300_stp/1762652580.203331", - "retrieved_timestamp": "1762652580.203331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_300_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_300_stp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.725552644760347 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45045681689917494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3620625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148271276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json deleted file mode 100644 index e5223ff26..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_5000_stp/1762652580.203531", - "retrieved_timestamp": "1762652580.203532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_5000_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_5000_stp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7100404703963262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4567949942342784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34460416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30668218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json deleted file mode 100644 index c52948ac2..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v2/1762652580.203742", - "retrieved_timestamp": "1762652580.203743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5454017562290279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4283518305582969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34822916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json deleted file mode 100644 index 402922aad..000000000 --- a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v3/1762652580.203954", - "retrieved_timestamp": "1762652580.203954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_v3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5759263199421978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43013596402782367 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35527083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27102726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json b/data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json deleted file mode 100644 index ffdf5c623..000000000 --- a/data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/jiangxinyang-shanda_Homer-LLama3-8B/1762652580.2879412", - "retrieved_timestamp": "1762652580.287943", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "jiangxinyang-shanda/Homer-LLama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "jiangxinyang-shanda/Homer-LLama3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991719748046295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173242047543128 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40562499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json b/data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json deleted file mode 100644 index fa7241dce..000000000 --- a/data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/keeeeenw_MicroLlama/1762652580.3060532", - "retrieved_timestamp": "1762652580.3060539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "keeeeenw/MicroLlama", - "developer": "meta", - "inference_platform": "unknown", - "id": "keeeeenw/MicroLlama", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.305 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19853765785892544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3007313991347165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36981249999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json b/data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json deleted file mode 100644 index 9a0a7b688..000000000 --- a/data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kevin009_llamaRAGdrama/1762652580.3065941", - "retrieved_timestamp": "1762652580.3065941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kevin009/llamaRAGdrama", - "developer": "meta", - "inference_platform": "unknown", - "id": "kevin009/llamaRAGdrama", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598372318780835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007385667099335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43157291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27235704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json b/data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json deleted file mode 100644 index aa4bcbb48..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-3-8b-stock-merge/1762652580.307331", - "retrieved_timestamp": "1762652580.307332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-3-8b-stock-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-3-8b-stock-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48117993590340297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162255701726589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37998670212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json b/data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json deleted file mode 100644 index 7d6dde3b0..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-breadcrumbs-ties-merge/1762652580.307606", - "retrieved_timestamp": "1762652580.307607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-breadcrumbs-ties-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-breadcrumbs-ties-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22051933314716063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415928172799896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44344791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171542553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json b/data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json deleted file mode 100644 index faebf522d..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-evolve-ties-best-merge/1762652580.307874", - "retrieved_timestamp": "1762652580.3078752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-evolve-ties-best-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-evolve-ties-best-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6743950495795601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413565104914732 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39455208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json b/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json deleted file mode 100644 index 516a70507..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-0.5-1-merge/1762652580.3081899", - "retrieved_timestamp": "1762652580.308191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-linear-0.5-0.5-1-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-linear-0.5-0.5-1-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48122980358781364 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5643013649244941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41427083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38331117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json b/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json deleted file mode 100644 index f223a16a2..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-1-0.5-merge/1762652580.308497", - "retrieved_timestamp": "1762652580.308498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-linear-0.5-1-0.5-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-linear-0.5-1-0.5-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031616111916382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5950766502131658 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4171875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3690159574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json b/data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json deleted file mode 100644 index 4847609e5..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-1-0.5-0.5-merge/1762652580.308746", - "retrieved_timestamp": "1762652580.308747", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-linear-1-0.5-0.5-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-linear-1-0.5-0.5-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45145436331156885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5526017944110775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363530585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json b/data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json deleted file mode 100644 index e4bc0e10c..000000000 --- a/data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khoantap_llama-slerp-merge/1762652580.308971", - "retrieved_timestamp": "1762652580.3089721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khoantap/llama-slerp-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-slerp-merge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49799088640363126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5782782780315171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40531249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3677692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json b/data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json deleted file mode 100644 index 3e768f791..000000000 --- a/data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/1762652580.309421", - "retrieved_timestamp": "1762652580.309421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", - "developer": "meta", - "inference_platform": "unknown", - "id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.583170432230925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4789526925494476 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3379375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json b/data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json deleted file mode 100644 index 16522c6d8..000000000 --- a/data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/laislemke_LLaMA-2-vicuna-7b-slerp/1762652580.311907", - "retrieved_timestamp": "1762652580.311908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "laislemke/LLaMA-2-vicuna-7b-slerp", - "developer": "meta", - "inference_platform": "unknown", - "id": "laislemke/LLaMA-2-vicuna-7b-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29320979445648654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29862163052356266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13422539893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json b/data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json deleted file mode 100644 index f41a9007f..000000000 --- a/data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_Llama-3-RedMagic4-8B/1762652580.318728", - "retrieved_timestamp": "1762652580.318729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/Llama-3-RedMagic4-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "lemon07r/Llama-3-RedMagic4-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4864005283758206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42560489470390417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676030585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json b/data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json deleted file mode 100644 index c7c1d04be..000000000 --- a/data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lemon07r_llama-3-NeuralMahou-8b/1762652580.319005", - "retrieved_timestamp": "1762652580.319006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lemon07r/llama-3-NeuralMahou-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "lemon07r/llama-3-NeuralMahou-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49009738604680025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41841123683301523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3690159574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json deleted file mode 100644 index 09ca2b8c9..000000000 --- a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/1762652580.32158", - "retrieved_timestamp": "1762652580.32158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5817464327983085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4714219934773132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33095079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json deleted file mode 100644 index b479a5b8d..000000000 --- a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/1762652580.3218", - "retrieved_timestamp": "1762652580.321801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6249107922534431 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47074584910573014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35158333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36136968085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json deleted file mode 100644 index 88dfa2084..000000000 --- a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/1762652580.322012", - "retrieved_timestamp": "1762652580.322013", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6636535503574958 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4864641205580417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35660416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json deleted file mode 100644 index 48e8f399a..000000000 --- a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/1762652580.322237", - "retrieved_timestamp": "1762652580.322238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6687245397766814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48333166095856117 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37691156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json deleted file mode 100644 index e6dcb2000..000000000 --- a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual/1762652580.321283", - "retrieved_timestamp": "1762652580.321284", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678003253589365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49499524187359745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json b/data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json deleted file mode 100644 index a5b93b82c..000000000 --- a/data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/m42-health_Llama3-Med42-70B/1762652580.328667", - "retrieved_timestamp": "1762652580.328667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "m42-health/Llama3-Med42-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "m42-health/Llama3-Med42-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6291074349392944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6687891109485058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46289583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962599734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json b/data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json deleted file mode 100644 index 724231ca9..000000000 --- a/data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_badger-kappa-llama-3-8b/1762652580.331178", - "retrieved_timestamp": "1762652580.331179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/badger-kappa-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-kappa-llama-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46946435457918323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084927997756815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json b/data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json deleted file mode 100644 index 6efb09e43..000000000 --- a/data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_badger-lambda-llama-3-8b/1762652580.331519", - "retrieved_timestamp": "1762652580.33152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/badger-lambda-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-lambda-llama-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860758343417687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49634866510444836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json b/data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json deleted file mode 100644 index 833eb1dc6..000000000 --- a/data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_badger-mu-llama-3-8b/1762652580.3317509", - "retrieved_timestamp": "1762652580.3317518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/badger-mu-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-mu-llama-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49194581488229006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514287576852281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35545833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673537234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json b/data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json deleted file mode 100644 index f977c9d5b..000000000 --- a/data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/maldv_badger-writer-llama-3-8b/1762652580.332005", - "retrieved_timestamp": "1762652580.332005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "maldv/badger-writer-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-writer-llama-3-8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5303140112678804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4863893856673737 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35809375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3759973404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json b/data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json deleted file mode 100644 index 94a384e1d..000000000 --- a/data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mattshumer_Reflection-Llama-3.1-70B/1762652580.341989", - "retrieved_timestamp": "1762652580.341989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mattshumer/Reflection-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "mattshumer/Reflection-Llama-3.1-70B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00452133671990319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.645001286484342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45765625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4955119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json deleted file mode 100644 index 344c83307..000000000 --- a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.1-MedIT-SUN-8B/1762652580.342782", - "retrieved_timestamp": "1762652580.342783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.1-MedIT-SUN-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.1-MedIT-SUN-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7837293935646308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186924904597405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40562499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3916223404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json deleted file mode 100644 index a52c92f8d..000000000 --- a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/1762652580.3434849", - "retrieved_timestamp": "1762652580.343486", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.209 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28139447776344545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017752699243885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41033333333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1344747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json deleted file mode 100644 index 042b99197..000000000 --- a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/1762652580.343692", - "retrieved_timestamp": "1762652580.343693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.209 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25009530268576263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3161124673749052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4022395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13572140957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json deleted file mode 100644 index d2a11993b..000000000 --- a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/1762652580.343897", - "retrieved_timestamp": "1762652580.343898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.472 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5636865738462834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390826682107771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32094791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15425531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json b/data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json deleted file mode 100644 index c20397a36..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-hf/1762652580.3493812", - "retrieved_timestamp": "1762652580.349382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-2-13b-hf", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-13b-hf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24824687385027283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41256242233835055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23778257978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json b/data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json deleted file mode 100644 index 00177dfb2..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-hf/1762652580.3500109", - "retrieved_timestamp": "1762652580.3500118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-2-70b-hf", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-70b-hf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2406780675274937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5472591190449342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37175864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json b/data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json deleted file mode 100644 index 07c2c5438..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-hf/1762652580.350465", - "retrieved_timestamp": "1762652580.350466", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-2-7b-hf", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-7b-hf", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2518938638368418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34961958199821835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37006249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18608710106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json deleted file mode 100644 index 1af4778d2..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B/1762652580.350682", - "retrieved_timestamp": "1762652580.350682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16843752354862876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.626006918317161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18429003021148038 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4654255319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json deleted file mode 100644 index 35b5da405..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B/1762652580.351093", - "retrieved_timestamp": "1762652580.351093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12459828809780273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46595905446007296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3811875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32878989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json deleted file mode 100644 index 1eb1a0361..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B/1762652580.3515048", - "retrieved_timestamp": "1762652580.351506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.24 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14777900415342402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31149540964608097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22818791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12034574468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json deleted file mode 100644 index 6854ce80b..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B/1762652580.351924", - "retrieved_timestamp": "1762652580.351925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13374069690643048 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3905117116991059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35771875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2487533244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json b/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json deleted file mode 100644 index 78ff1f71a..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B/1762652580.352541", - "retrieved_timestamp": "1762652580.352541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1603190645265673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6461074599904467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4709109042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json b/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json deleted file mode 100644 index 1598d77f9..000000000 --- a/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B/1762652580.352957", - "retrieved_timestamp": "1762652580.352957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14550614591506092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4597905195240255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32097739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json b/data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json deleted file mode 100644 index 313c48208..000000000 --- a/data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-70B-Synthia-v3.5/1762652580.358073", - "retrieved_timestamp": "1762652580.3580742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Llama-3-70B-Synthia-v3.5", - "developer": "meta", - "inference_platform": "unknown", - "id": "migtissera/Llama-3-70B-Synthia-v3.5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076499244227538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6488638026271278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49219791666666673 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658410904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json b/data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json deleted file mode 100644 index 2bb07db3d..000000000 --- a/data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-8B-Synthia-v3.5/1762652580.358322", - "retrieved_timestamp": "1762652580.358322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Llama-3-8B-Synthia-v3.5", - "developer": "meta", - "inference_platform": "unknown", - "id": "migtissera/Llama-3-8B-Synthia-v3.5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069582042314393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4887940933660044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40438541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30302526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json b/data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json deleted file mode 100644 index 6309729aa..000000000 --- a/data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/1762652580.360158", - "retrieved_timestamp": "1762652580.360159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", - "developer": "meta", - "inference_platform": "unknown", - "id": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13881168632561602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067536965504715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11062167553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json b/data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json deleted file mode 100644 index e3d0ee4ce..000000000 --- a/data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mkurman_llama-3.2-MEDIT-3B-o1/1762652580.365804", - "retrieved_timestamp": "1762652580.3658051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mkurman/llama-3.2-MEDIT-3B-o1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mkurman/llama-3.2-MEDIT-3B-o1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816517950150047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43996584807961553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27410239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json b/data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json deleted file mode 100644 index cedb8bfd5..000000000 --- a/data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-po1/1762652580.3669372", - "retrieved_timestamp": "1762652580.366938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mkxu/llama-3-8b-po1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mkxu/llama-3-8b-po1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4081149128756145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49760854852246356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562167553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json b/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json deleted file mode 100644 index ba5dd2c32..000000000 --- a/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v2/1762652580.3680582", - "retrieved_timestamp": "1762652580.3680582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/ChimeraLlama-3-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/ChimeraLlama-3-8B-v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44688315890725494 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045597361952603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568816489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json b/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json deleted file mode 100644 index 0f2202c99..000000000 --- a/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v3/1762652580.3683012", - "retrieved_timestamp": "1762652580.3683012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/ChimeraLlama-3-8B-v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/ChimeraLlama-3-8B-v3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078821970150317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49781902726529204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36685505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json b/data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json deleted file mode 100644 index 2bcd358b8..000000000 --- a/data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/1762652580.368906", - "retrieved_timestamp": "1762652580.368906", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34244360518978534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6693171063183693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4679188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json b/data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json deleted file mode 100644 index 9a84f60cd..000000000 --- a/data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_OrpoLlama-3-8B/1762652580.369958", - "retrieved_timestamp": "1762652580.3699589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/OrpoLlama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/OrpoLlama-3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36527524745453177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424079063503051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2705285904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json b/data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json deleted file mode 100644 index f804cb0ff..000000000 --- a/data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/1762652580.370961", - "retrieved_timestamp": "1762652580.370962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648931501748693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6541763652331517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5224401595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json b/data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json deleted file mode 100644 index eb7a94979..000000000 --- a/data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/1762652580.371218", - "retrieved_timestamp": "1762652580.371218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370396104558128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34730320150504124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2198304521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json b/data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json deleted file mode 100644 index 5b0b04770..000000000 --- a/data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mukaj_Llama-3.1-Hawkish-8B/1762652580.3748438", - "retrieved_timestamp": "1762652580.374845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mukaj/Llama-3.1-Hawkish-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "mukaj/Llama-3.1-Hawkish-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720468357291984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4883822828416351 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39672916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33311170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json b/data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json deleted file mode 100644 index 06815a4c0..000000000 --- a/data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/1762652580.379643", - "retrieved_timestamp": "1762652580.379644", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7228797368759337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6825051293384551 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json b/data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json deleted file mode 100644 index f91002769..000000000 --- a/data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/1762652580.379898", - "retrieved_timestamp": "1762652580.3798988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7092159913474027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6660891255994471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736535904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json b/data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json deleted file mode 100644 index fea6e2ce8..000000000 --- a/data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_llama-3-gutenberg-8B/1762652580.3850691", - "retrieved_timestamp": "1762652580.385074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/llama-3-gutenberg-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/llama-3-gutenberg-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371910973993448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49936002561994197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json b/data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json deleted file mode 100644 index 5b43e9251..000000000 --- a/data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-cc-8B/1762652580.385431", - "retrieved_timestamp": "1762652580.385432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/llama3.1-cc-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/llama3.1-cc-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5068086011782071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871187428614386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3346908244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json b/data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json deleted file mode 100644 index a9faa3a31..000000000 --- a/data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-kartoffeldes-70B/1762652580.385698", - "retrieved_timestamp": "1762652580.385699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/llama3.1-kartoffeldes-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/llama3.1-kartoffeldes-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8230218043679659 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6893878613110068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46460416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json b/data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json deleted file mode 100644 index 4b939255c..000000000 --- a/data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/necva_IE-cont-Llama3.1-8B/1762652580.3888798", - "retrieved_timestamp": "1762652580.388881", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "necva/IE-cont-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "necva/IE-cont-Llama3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json b/data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json deleted file mode 100644 index 86363821e..000000000 --- a/data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_Llama3.1-MFANN-8b/1762652580.3921962", - "retrieved_timestamp": "1762652580.3921971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/Llama3.1-MFANN-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/Llama3.1-MFANN-8b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29695651981187693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4281154680742545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27252327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json deleted file mode 100644 index 5f81efae8..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/1762652580.3924491", - "retrieved_timestamp": "1762652580.39245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4209796672828096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49237606236472237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json deleted file mode 100644 index f18ee6ebd..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/1762652580.3926558", - "retrieved_timestamp": "1762652580.3926558", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238021782204551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4914021594225444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37406249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34898603723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json deleted file mode 100644 index 7890f9a39..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/1762652580.39286", - "retrieved_timestamp": "1762652580.392861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41688275996577967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908971108837563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38209374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35164561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json deleted file mode 100644 index 197e7220b..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/1762652580.393064", - "retrieved_timestamp": "1762652580.393065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4328947193446721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951892200623516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3444980053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json deleted file mode 100644 index 762207dd6..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/1762652580.393313", - "retrieved_timestamp": "1762652580.393313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934746472692453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49675121796238325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3531416223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json deleted file mode 100644 index aabda84cd..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1762652580.3935192", - "retrieved_timestamp": "1762652580.39352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41281134057633745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49782535474346185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37542708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json deleted file mode 100644 index df4dda140..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-Abliterated-SLERP/1762652580.394179", - "retrieved_timestamp": "1762652580.39418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-Abliterated-SLERP", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-Abliterated-SLERP", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25906262051357065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45744999460878283 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3809166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2928025265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json deleted file mode 100644 index 1487ddf13..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/1762652580.394599", - "retrieved_timestamp": "1762652580.3946", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4201551882338861 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.492068920606988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543051861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json deleted file mode 100644 index 11a63d6d8..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/1762652580.394387", - "retrieved_timestamp": "1762652580.394388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37993856301280604 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49305765460927126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35305851063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json deleted file mode 100644 index ce2f62cac..000000000 --- a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-v2/1762652580.3948102", - "retrieved_timestamp": "1762652580.394811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-abliterated-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-abliterated-v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4429114748866341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4940829733015402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3490691489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json b/data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json deleted file mode 100644 index 70b297961..000000000 --- a/data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-1B-Llama-3.2/1762652580.4050229", - "retrieved_timestamp": "1762652580.4050229", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ngxson/MiniThinky-1B-Llama-3.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ngxson/MiniThinky-1B-Llama-3.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2771479673931834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31422650382721545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34336458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json b/data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json deleted file mode 100644 index d05586a36..000000000 --- a/data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-v2-1B-Llama-3.2/1762652580.405281", - "retrieved_timestamp": "1762652580.405282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ngxson/MiniThinky-v2-1B-Llama-3.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ngxson/MiniThinky-v2-1B-Llama-3.2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2963071317437732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32051111358951634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3356145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json deleted file mode 100644 index 849b3e9f2..000000000 --- a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r16-0to512tokens-test/1762652580.4104571", - "retrieved_timestamp": "1762652580.410458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r16-0to512tokens-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r16-0to512tokens-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469753587148765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34884166022601404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17278922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json deleted file mode 100644 index fbced9eac..000000000 --- a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-0to512tokens-test/1762652580.410711", - "retrieved_timestamp": "1762652580.410711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r32-0to512tokens-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r32-0to512tokens-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5682577782505973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3495183139510159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32094791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17603058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json deleted file mode 100644 index e7d9539fe..000000000 --- a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-test/1762652580.410917", - "retrieved_timestamp": "1762652580.410918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r32-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r32-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5819215237791282 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485960127764988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31564583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17810837765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json deleted file mode 100644 index 72f9055f8..000000000 --- a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r8-512tokens-test/1762652580.411124", - "retrieved_timestamp": "1762652580.411125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r8-512tokens-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r8-512tokens-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5791987482103043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3495762462148306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31694791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17528257978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json b/data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json deleted file mode 100644 index f6386e682..000000000 --- a/data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Minitron-4B-Depth-Base/1762652580.4147708", - "retrieved_timestamp": "1762652580.414772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.02 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16069362624502986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4170704193104893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40106250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2798371010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json b/data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json deleted file mode 100644 index 8fa37e047..000000000 --- a/data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_OpenMath2-Llama3.1-8B/1762652580.416384", - "retrieved_timestamp": "1762652580.416384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/OpenMath2-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nvidia/OpenMath2-Llama3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23305939352030391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40955241401694514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15533577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json b/data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json deleted file mode 100644 index 4e795be24..000000000 --- a/data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/1762652580.417561", - "retrieved_timestamp": "1762652580.417561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", - "developer": "meta", - "inference_platform": "unknown", - "id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765484470094904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31150775306414563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json b/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json deleted file mode 100644 index 641cf3d73..000000000 --- a/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1762652580.4263492", - "retrieved_timestamp": "1762652580.42635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/Llama-FinSent-S", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/Llama-FinSent-S", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2163980460733077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3169254117559263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11336436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json b/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json deleted file mode 100644 index 046a8d8fd..000000000 --- a/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1762652580.426095", - "retrieved_timestamp": "1762652580.426095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/Llama-FinSent-S", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/Llama-FinSent-S", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21187670935340452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31562055310321474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3832395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json b/data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json deleted file mode 100644 index b758104cd..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned10-llama-3.2-3B/1762652580.426529", - "retrieved_timestamp": "1762652580.4265301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned10-llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned10-llama-3.2-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.001 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17762980004166723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340421117164456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3721666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16397938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json b/data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json deleted file mode 100644 index eed76aad5..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-1b/1762652580.426731", - "retrieved_timestamp": "1762652580.426732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned20-llama-1b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned20-llama-1b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.075 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19936213690784896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30313627830972034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json b/data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json deleted file mode 100644 index 621252d2a..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-3.2-3b/1762652580.4269419", - "retrieved_timestamp": "1762652580.426943", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned20-llama-3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned20-llama-3.2-3b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.79 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17887870849346402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32478483912909756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12799202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json b/data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json deleted file mode 100644 index d45c54bfd..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-1b/1762652580.427145", - "retrieved_timestamp": "1762652580.427145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned40-llama-1b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned40-llama-1b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22843832143157933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29691563801419935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10821143617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json b/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json deleted file mode 100644 index 1ab99fb02..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-1B/1762652580.427387", - "retrieved_timestamp": "1762652580.427387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned40-llama-3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned40-llama-3.2-1B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22663976028050017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2982489713475327 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11145279255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json b/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json deleted file mode 100644 index 7bb259366..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-3b/1762652580.4275908", - "retrieved_timestamp": "1762652580.4275908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned40-llama-3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned40-llama-3.2-3b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.367 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21829634259320824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671170280977073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3539375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11768617021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json b/data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json deleted file mode 100644 index 4c6d797a4..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-1b/1762652580.4277859", - "retrieved_timestamp": "1762652580.4277859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned60-llama-1b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned60-llama-1b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.753 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18285039251408486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3016193474185398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40879166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json b/data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json deleted file mode 100644 index 0fb0ebd48..000000000 --- a/data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-3.2-3b/1762652580.42798", - "retrieved_timestamp": "1762652580.4279811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oopere/pruned60-llama-3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned60-llama-3.2-3b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.944 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1824758307956223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31662597093352013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3633333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json b/data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json deleted file mode 100644 index c08b6adc5..000000000 --- a/data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/orai-nlp_Llama-eus-8B/1762652580.43225", - "retrieved_timestamp": "1762652580.432275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "orai-nlp/Llama-eus-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "orai-nlp/Llama-eus-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21612321972366655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418245490788701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30576795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json b/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json deleted file mode 100644 index 74d383881..000000000 --- a/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Base/1762652580.442863", - "retrieved_timestamp": "1762652580.4428642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322123077877808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033213133882991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4222708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33294547872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json b/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json deleted file mode 100644 index 3226a2071..000000000 --- a/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Base/1762652580.443676", - "retrieved_timestamp": "1762652580.443677", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5200722970606879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49271325981523906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4340520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347739361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json b/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json deleted file mode 100644 index e731cd192..000000000 --- a/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-1.3B/1762652580.4538639", - "retrieved_timestamp": "1762652580.453865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Sheared-LLaMA-1.3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Sheared-LLaMA-1.3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2197702097102355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31970467392464424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11710438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json b/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json deleted file mode 100644 index 9958d3a41..000000000 --- a/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-2.7B/1762652580.4540951", - "retrieved_timestamp": "1762652580.4540958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Sheared-LLaMA-2.7B", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Sheared-LLaMA-2.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.7 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24165214962964932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32586855691245953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json b/data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json deleted file mode 100644 index 664aad348..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Llama-3-8B-Preview/1762652580.459939", - "retrieved_timestamp": "1762652580.459939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Deepthink-Llama-3-8B-Preview", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Deepthink-Llama-3-8B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29553252037926037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4664510845126107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549848942598187 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37070833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2738530585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json deleted file mode 100644 index 5ac91c424..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-3B-Math-Oct/1762652580.464829", - "retrieved_timestamp": "1762652580.46483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.2-3B-Math-Oct", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.2-3B-Math-Oct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4585233846194763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371840952508727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34698958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911402925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json deleted file mode 100644 index 151a0b9ba..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-6B-AlgoCode/1762652580.465046", - "retrieved_timestamp": "1762652580.465046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.2-6B-AlgoCode", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.2-6B-AlgoCode", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.339 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21357553513566227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37477424449567703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2869127516778524 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40134374999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17977061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json deleted file mode 100644 index 68cb090cc..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-8B-Distill-CoT/1762652580.465258", - "retrieved_timestamp": "1762652580.465258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-8B-Distill-CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-8B-Distill-CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341511633576688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4297620873695442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3719791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.273188164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json deleted file mode 100644 index 345211620..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-1B/1762652580.4655502", - "retrieved_timestamp": "1762652580.4655511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-Deepsync-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-Deepsync-1B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570071853792382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33856262083940014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35651041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17378656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json deleted file mode 100644 index 412eadf14..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-3B/1762652580.465787", - "retrieved_timestamp": "1762652580.465788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-Deepsync-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-Deepsync-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302218114602588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4291521655271033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33238541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031083776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json deleted file mode 100644 index 649d51870..000000000 --- a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Express.1-Math/1762652580.466016", - "retrieved_timestamp": "1762652580.466017", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-Express.1-Math", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-Express.1-Math", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084320713484665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33638140090435265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31434375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16098736702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json b/data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json deleted file mode 100644 index 6e279c242..000000000 --- a/data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pszemraj_Llama-3-6.3b-v0.1/1762652580.4812942", - "retrieved_timestamp": "1762652580.481295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pszemraj/Llama-3-6.3b-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "pszemraj/Llama-3-6.3b-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10438968603305895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41968070468284147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3908333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2839926861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json b/data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json deleted file mode 100644 index f5e09cb3b..000000000 --- a/data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_LLaMa_3.2_3B_Catalysts/1762652580.4818308", - "retrieved_timestamp": "1762652580.481832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/LLaMa_3.2_3B_Catalysts", - "developer": "meta", - "inference_platform": "unknown", - "id": "qingy2019/LLaMa_3.2_3B_Catalysts", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.499239794855428 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44681268798954793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37877083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30078125 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json b/data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json deleted file mode 100644 index 510e9a4fa..000000000 --- a/data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_OpenMath2-Llama3.1-8B/1762652580.482083", - "retrieved_timestamp": "1762652580.482084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/OpenMath2-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "qingy2019/OpenMath2-Llama3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23305939352030391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40955241401694514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15533577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json b/data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json deleted file mode 100644 index 4ad0b3250..000000000 --- a/data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/refuelai_Llama-3-Refueled/1762652580.494146", - "retrieved_timestamp": "1762652580.494147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "refuelai/Llama-3-Refueled", - "developer": "meta", - "inference_platform": "unknown", - "id": "refuelai/Llama-3-Refueled", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619952836252255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5870766201705051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json b/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json deleted file mode 100644 index 4af7213a8..000000000 --- a/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1762652580.495657", - "retrieved_timestamp": "1762652580.495657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "riaz/FineLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "riaz/FineLlama-3.1-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43734070045257695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45857296498013483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29637632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json b/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json deleted file mode 100644 index d3dfde243..000000000 --- a/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1762652580.4959512", - "retrieved_timestamp": "1762652580.495952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "riaz/FineLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "riaz/FineLlama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413660199382084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456451981676995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37762500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29778922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json b/data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json deleted file mode 100644 index ea4248329..000000000 --- a/data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Llama3-8B/1762652580.5000498", - "retrieved_timestamp": "1762652580.500051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/rombos_Replete-Coder-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "rombodawg/rombos_Replete-Coder-Llama3-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4714125187834945 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32762771025266835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39663541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13347739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json deleted file mode 100644 index 777c5686e..000000000 --- a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-CPO/1762652580.502833", - "retrieved_timestamp": "1762652580.502836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-CPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-CPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1545488193548673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3457919655499851 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40482291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1605718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json deleted file mode 100644 index e220f3596..000000000 --- a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-IPO/1762652580.503558", - "retrieved_timestamp": "1762652580.5035589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-IPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-IPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17685518867715438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474552716912811 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16173537234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json deleted file mode 100644 index 8164d9eb5..000000000 --- a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-KTO/1762652580.503802", - "retrieved_timestamp": "1762652580.5038028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-KTO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15284999357260956 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35007577568366255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41669791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1636469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json deleted file mode 100644 index 871db4fa3..000000000 --- a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SPO/1762652580.504033", - "retrieved_timestamp": "1762652580.504034", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-SPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-SPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15667207453999832 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834029554844597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3874270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17569813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json deleted file mode 100644 index ed3578723..000000000 --- a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SimPO/1762652580.504319", - "retrieved_timestamp": "1762652580.50432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-SimPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1658643510330368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34891553101294254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40069791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414561170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json b/data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json deleted file mode 100644 index 9e463585a..000000000 --- a/data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama3/1762652580.504582", - "retrieved_timestamp": "1762652580.504583", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama3", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3320777758569484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47821899796340944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39334375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316156914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json deleted file mode 100644 index 0dcbcd146..000000000 --- a/data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-001-300/1762652580.504826", - "retrieved_timestamp": "1762652580.504826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersalehk/Llama3-001-300", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3-001-300", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178643776291351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47445771982516544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40639583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158244680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json deleted file mode 100644 index 697ccd832..000000000 --- a/data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-SimPO/1762652580.505101", - "retrieved_timestamp": "1762652580.5051022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersalehk/Llama3-SimPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36420142998355476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48735382942408356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40459375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json deleted file mode 100644 index c62a4de37..000000000 --- a/data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_001_200/1762652580.505313", - "retrieved_timestamp": "1762652580.505314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersalehk/Llama3_001_200", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3_001_200", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.321836061649756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727921518419169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31831781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json deleted file mode 100644 index c3466e327..000000000 --- a/data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_01_300/1762652580.505522", - "retrieved_timestamp": "1762652580.505523", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersalehk/Llama3_01_300", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3_01_300", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958827023408999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4691387139601247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40648958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31241688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json b/data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json deleted file mode 100644 index cfd0d244a..000000000 --- a/data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sakhan10_quantized_open_llama_3b_v2/1762652580.507647", - "retrieved_timestamp": "1762652580.507648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sakhan10/quantized_open_llama_3b_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "sakhan10/quantized_open_llama_3b_v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18722212618075595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3019800780121471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10954122340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json deleted file mode 100644 index b3284d7c6..000000000 --- a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-70B-PlumChat/1762652580.5115242", - "retrieved_timestamp": "1762652580.5115242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sequelbox/Llama3.1-70B-PlumChat", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-70B-PlumChat", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5616131863455631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6752815345736151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028700906344411 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47737500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516373005319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json deleted file mode 100644 index 9bc3144a4..000000000 --- a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-MOTH/1762652580.511786", - "retrieved_timestamp": "1762652580.511787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-MOTH", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-MOTH", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244938984117696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.490246673015408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338597074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json deleted file mode 100644 index 8bb4b2d24..000000000 --- a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumChat/1762652580.512009", - "retrieved_timestamp": "1762652580.51201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-PlumChat", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-PlumChat", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42427647530773904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873291395699702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21268284574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json deleted file mode 100644 index 3944564cb..000000000 --- a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumCode/1762652580.512235", - "retrieved_timestamp": "1762652580.512235", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-PlumCode", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-PlumCode", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20448299401144518 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368086861425416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37734375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23354388297872342 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json deleted file mode 100644 index ecedb616a..000000000 --- a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumMath/1762652580.512456", - "retrieved_timestamp": "1762652580.512456", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-PlumMath", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-PlumMath", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.224241678745728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40323023090048143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39185416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29753989361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json b/data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json deleted file mode 100644 index 798d77234..000000000 --- a/data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/1762652580.5134048", - "retrieved_timestamp": "1762652580.513406", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", - "developer": "meta", - "inference_platform": "unknown", - "id": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6051022398347496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49813698712445653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json b/data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json deleted file mode 100644 index ecffbcd36..000000000 --- a/data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/skumar9_Llama-medx_v2/1762652580.517576", - "retrieved_timestamp": "1762652580.517576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "skumar9/Llama-medx_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "skumar9/Llama-medx_v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462337708391512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908589512175783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34632646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json b/data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json deleted file mode 100644 index 198f53dda..000000000 --- a/data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_DeepSeek-R1-Distill-Llama-3B/1762652580.543217", - "retrieved_timestamp": "1762652580.543217", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/DeepSeek-R1-Distill-Llama-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "suayptalha/DeepSeek-R1-Distill-Llama-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7092658590318134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44517853159705956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33958333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29778922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json b/data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json deleted file mode 100644 index ea1afb7bc..000000000 --- a/data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/1762652580.543882", - "retrieved_timestamp": "1762652580.543883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", - "developer": "meta", - "inference_platform": "unknown", - "id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6340532010620709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43549964909074995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34057291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json b/data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json deleted file mode 100644 index 0137c9bb9..000000000 --- a/data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_flflmillama/1762652580.5473018", - "retrieved_timestamp": "1762652580.5473018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/flflmillama", - "developer": "meta", - "inference_platform": "unknown", - "id": "sumink/flflmillama", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16756317681529453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38511286094747693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20960771276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json b/data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json deleted file mode 100644 index 102409d4b..000000000 --- a/data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_llamaft/1762652580.547796", - "retrieved_timestamp": "1762652580.547797", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/llamaft", - "developer": "meta", - "inference_platform": "unknown", - "id": "sumink/llamaft", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16086871722584964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762775648269859 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21143617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json b/data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json deleted file mode 100644 index 25046a696..000000000 --- a/data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_llamamerge/1762652580.547998", - "retrieved_timestamp": "1762652580.547999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/llamamerge", - "developer": "meta", - "inference_platform": "unknown", - "id": "sumink/llamamerge", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26718107953563214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46316160070587903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42397916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2589760638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json b/data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json deleted file mode 100644 index 9546aeaa7..000000000 --- a/data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tenyx_Llama3-TenyxChat-70B/1762652580.5593112", - "retrieved_timestamp": "1762652580.5593119", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tenyx/Llama3-TenyxChat-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "tenyx/Llama3-TenyxChat-70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8087086707713311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6511486901811531 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23564954682779457 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5210272606382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json b/data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json deleted file mode 100644 index b2bcbd589..000000000 --- a/data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-v2/1762652580.560884", - "retrieved_timestamp": "1762652580.560884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/CleverBoi-Llama-3.1-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-Llama-3.1-8B-v2", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19613957632415324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46678160110644784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31881648936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json b/data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json deleted file mode 100644 index 2eafbf37b..000000000 --- a/data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_Code-Llama-Bagel-8B/1762652580.561388", - "retrieved_timestamp": "1762652580.5613928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/Code-Llama-Bagel-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/Code-Llama-Bagel-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529676813078188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46974200049001086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28216422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json b/data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json deleted file mode 100644 index 2d47c7e49..000000000 --- a/data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_Llama-3.2-3B-VanRossum/1762652580.562204", - "retrieved_timestamp": "1762652580.562206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/Llama-3.2-3B-VanRossum", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/Llama-3.2-3B-VanRossum", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 3.696 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4782820693537591 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42787418229776697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3441666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27701130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json b/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json deleted file mode 100644 index 5e11b8ff6..000000000 --- a/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.1-8B-v2/1762652580.5627892", - "retrieved_timestamp": "1762652580.56279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/ReWiz-Llama-3.1-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Llama-3.1-8B-v2", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23790542427425895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46324275457450953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310339095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json b/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json deleted file mode 100644 index ae687e85c..000000000 --- a/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.2-3B/1762652580.5630422", - "retrieved_timestamp": "1762652580.563043", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/ReWiz-Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Llama-3.2-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648931501748693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343257577815292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28873005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json b/data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json deleted file mode 100644 index 5d24e5fb1..000000000 --- a/data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_LLaMA-2-7B-32K/1762652580.574694", - "retrieved_timestamp": "1762652580.5746949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/LLaMA-2-7B-32K", - "developer": "meta", - "inference_platform": "unknown", - "id": "togethercomputer/LLaMA-2-7B-32K", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18649738250065384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33995175217301715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17677859042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json b/data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json deleted file mode 100644 index 7a6c831e9..000000000 --- a/data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/trthminh1112_autotrain-llama32-1b-finetune/1762652580.577601", - "retrieved_timestamp": "1762652580.5776021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "trthminh1112/autotrain-llama32-1b-finetune", - "developer": "meta", - "inference_platform": "unknown", - "id": "trthminh1112/autotrain-llama32-1b-finetune", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17685518867715438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29956269409410674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35127083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json b/data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json deleted file mode 100644 index 0caef79eb..000000000 --- a/data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-codellama-34b-v2.0/1762652580.5824919", - "retrieved_timestamp": "1762652580.5824928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-codellama-34b-v2.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "uukuguy/speechless-codellama-34b-v2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46042168113937687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813126697444618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2692953020134229 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37870833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25423869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json b/data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json deleted file mode 100644 index 002324501..000000000 --- a/data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/1762652580.5833302", - "retrieved_timestamp": "1762652580.583331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", - "developer": "meta", - "inference_platform": "unknown", - "id": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45617517076911485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48455373040676664 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4655 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25590093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json b/data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json deleted file mode 100644 index 9459da7bd..000000000 --- a/data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vhab10_llama-3-8b-merged-linear/1762652580.5860548", - "retrieved_timestamp": "1762652580.5860548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vhab10/llama-3-8b-merged-linear", - "developer": "meta", - "inference_platform": "unknown", - "id": "vhab10/llama-3-8b-merged-linear", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.65 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5916634529714491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49370937443498536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4190520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37042885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json b/data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json deleted file mode 100644 index ce5a8746a..000000000 --- a/data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/1762652580.5867279", - "retrieved_timestamp": "1762652580.586729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5762510139762497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054841203275775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json b/data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json deleted file mode 100644 index d990a0747..000000000 --- a/data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Humanish-RP-Llama-3.1-8B/1762652580.587956", - "retrieved_timestamp": "1762652580.587957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Humanish-RP-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "vicgalle/Humanish-RP-Llama-3.1-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6669259786256023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5100385476143247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34765625 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json b/data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json deleted file mode 100644 index 08aec6b15..000000000 --- a/data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Roleplay-Llama-3-8B/1762652580.5885959", - "retrieved_timestamp": "1762652580.588597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Roleplay-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "vicgalle/Roleplay-Llama-3-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7320221456845614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5012318206922323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json b/data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json deleted file mode 100644 index 0b5e20318..000000000 --- a/data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/viettelsecurity-ai_security-llama3.2-3b/1762652580.588792", - "retrieved_timestamp": "1762652580.588792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "viettelsecurity-ai/security-llama3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "viettelsecurity-ai/security-llama3.2-3b", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5908888416069362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44005776161052806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2837433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json b/data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json deleted file mode 100644 index 7865c2283..000000000 --- a/data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/winglian_Llama-3-8b-64k-PoSE/1762652580.595902", - "retrieved_timestamp": "1762652580.595903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "winglian/Llama-3-8b-64k-PoSE", - "developer": "meta", - "inference_platform": "unknown", - "id": "winglian/Llama-3-8b-64k-PoSE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28569085581811815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37021796005121793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2466755319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json b/data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json deleted file mode 100644 index 0a2899f3e..000000000 --- a/data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/winglian_llama-3-8b-256k-PoSE/1762652580.5961442", - "retrieved_timestamp": "1762652580.596145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "winglian/llama-3-8b-256k-PoSE", - "developer": "meta", - "inference_platform": "unknown", - "id": "winglian/llama-3-8b-256k-PoSE", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2909114482905358 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156583397739859 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33155208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json deleted file mode 100644 index b43ba5be7..000000000 --- a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_8B_Instruct_CoT/1762652580.5972009", - "retrieved_timestamp": "1762652580.5972018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xinchen9/Llama3.1_8B_Instruct_CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "xinchen9/Llama3.1_8B_Instruct_CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2973565694579272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398206147249642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2878989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json deleted file mode 100644 index 695e37903..000000000 --- a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT/1762652580.597471", - "retrieved_timestamp": "1762652580.597472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xinchen9/Llama3.1_CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "xinchen9/Llama3.1_CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22461624046419057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43410143664277245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43045833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2738530585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json deleted file mode 100644 index 86670a284..000000000 --- a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT_V1/1762652580.597682", - "retrieved_timestamp": "1762652580.597683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xinchen9/Llama3.1_CoT_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "xinchen9/Llama3.1_CoT_V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2452991396162183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4376001847280673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45721875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2805019946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json b/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json deleted file mode 100644 index 03b965f79..000000000 --- a/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/1762652580.6118348", - "retrieved_timestamp": "1762652580.6118348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", - "developer": "meta", - "inference_platform": "unknown", - "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4012708502329375 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4615794426716074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42109375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json b/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json deleted file mode 100644 index d98fcb9aa..000000000 --- a/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/1762652580.6120949", - "retrieved_timestamp": "1762652580.612096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", - "developer": "meta", - "inference_platform": "unknown", - "id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2884878788281759 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977912063897858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39982291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32571476063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/mhl1/Qwen2.5-0.5B-cinstruct-stage1/cdabdd54-6101-471c-9bd8-446953be986b.json b/data/hfopenllm_v2/mhl1/Qwen2.5-0.5B-cinstruct-stage1/cdabdd54-6101-471c-9bd8-446953be986b.json new file mode 100644 index 000000000..a4956c1b7 --- /dev/null +++ b/data/hfopenllm_v2/mhl1/Qwen2.5-0.5B-cinstruct-stage1/cdabdd54-6101-471c-9bd8-446953be986b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mhl1_Qwen2.5-0.5B-cinstruct-stage1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-0.5B-cinstruct-stage1", + "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", + "developer": "mhl1", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.35 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json b/data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json deleted file mode 100644 index 092daea7a..000000000 --- a/data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/1024m_PHI-4-Hindi/1762652579.468371", - "retrieved_timestamp": "1762652579.4683719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "1024m/PHI-4-Hindi", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "1024m/PHI-4-Hindi", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00816832670647216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6710015642760666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4913541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json b/data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json deleted file mode 100644 index 2e0eafd96..000000000 --- a/data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Phi-3-14B-v0.1/1762652579.4966102", - "retrieved_timestamp": "1762652579.496611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "BlackBeenie/Neos-Phi-3-14B-v0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Phi-3-14B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4022449323350931 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6211931530444463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41254166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45636635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json b/data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json deleted file mode 100644 index 8f5ee78f8..000000000 --- a/data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Phi-4-COT/1762652579.5296152", - "retrieved_timestamp": "1762652579.5296159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Phi-4-COT", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/Phi-4-COT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17930313789633728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6172933868833469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500498670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json b/data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json deleted file mode 100644 index fd1e43a6a..000000000 --- a/data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_SphinX/1762652579.531104", - "retrieved_timestamp": "1762652579.531104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/SphinX", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/SphinX", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5725042886208593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440583486084486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3081570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44049999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43658577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json b/data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json deleted file mode 100644 index 11f05ec51..000000000 --- a/data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_Sphinx2.0/1762652579.531323", - "retrieved_timestamp": "1762652579.531324", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/Sphinx2.0", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/Sphinx2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7123133286346892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.647283976671531 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181268882175225 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json b/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json deleted file mode 100644 index 3708c1f29..000000000 --- a/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx/1762652579.5315351", - "retrieved_timestamp": "1762652579.5315359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/TinySphinx", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/TinySphinx", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2566900269063862 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33098404240871354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1697972074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json b/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json deleted file mode 100644 index bbe06be02..000000000 --- a/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx2.0/1762652579.531743", - "retrieved_timestamp": "1762652579.531744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Daemontatox/TinySphinx2.0", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/TinySphinx2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25351733400710114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168407073661037 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1731216755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json deleted file mode 100644 index 54639bff6..000000000 --- a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/1762652579.53347", - "retrieved_timestamp": "1762652579.5334709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4799055395240185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6536184886648629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47232380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json deleted file mode 100644 index cb8e73c8b..000000000 --- a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4-v2/1762652579.533969", - "retrieved_timestamp": "1762652579.533969", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-14b-phi-4-v2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-14b-phi-4-v2", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27747266142723526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6573002324945257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43994791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243517287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json deleted file mode 100644 index ecf60ac5d..000000000 --- a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4/1762652579.533744", - "retrieved_timestamp": "1762652579.533744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-14b-phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-14b-phi-4", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0497590836757581 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769303819643072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2583081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096770833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5174534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/DialoGPT-medium/8029cb75-8d3b-411d-b0eb-74539b8ecb2f.json b/data/hfopenllm_v2/microsoft/DialoGPT-medium/8029cb75-8d3b-411d-b0eb-74539b8ecb2f.json new file mode 100644 index 000000000..93673afea --- /dev/null +++ b/data/hfopenllm_v2/microsoft/DialoGPT-medium/8029cb75-8d3b-411d-b0eb-74539b8ecb2f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_DialoGPT-medium/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DialoGPT-medium", + "id": "microsoft/DialoGPT-medium", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.345 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1479 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json b/data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json deleted file mode 100644 index 2ab6b9b03..000000000 --- a/data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_Morphing-8B-Model_Stock/1762652579.577464", - "retrieved_timestamp": "1762652579.577465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/Morphing-8B-Model_Stock", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "DreadPoor/Morphing-8B-Model_Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396942172954088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38522273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json b/data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json deleted file mode 100644 index 08c6c83cf..000000000 --- a/data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepThinkers-Phi4/1762652579.599432", - "retrieved_timestamp": "1762652579.599433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/DeepThinkers-Phi4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "EpistemeAI/DeepThinkers-Phi4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6939786433330231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790415739665393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257646276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json b/data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json deleted file mode 100644 index bcbae6d76..000000000 --- a/data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B-v1.13a-philosophers/1762652579.60018", - "retrieved_timestamp": "1762652579.600181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI/Fireball-12B-v1.13a-philosophers", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-12B-v1.13a-philosophers", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08755324760524298 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5102697700597862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4080729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3366855053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json b/data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json deleted file mode 100644 index a7ac94bec..000000000 --- a/data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/1762652579.612791", - "retrieved_timestamp": "1762652579.612792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5312880933700359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6177842639287514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45985704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json b/data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json deleted file mode 100644 index f571f967f..000000000 --- a/data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Phi-4-RRStock/1762652579.616194", - "retrieved_timestamp": "1762652579.616194", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FINGU-AI/Phi-4-RRStock", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "FINGU-AI/Phi-4-RRStock", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.652 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28554125276488607 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6443442865581455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44794791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48828125 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json b/data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json deleted file mode 100644 index ddca9897b..000000000 --- a/data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/HeraiHench_Phi-4-slerp-ReasoningRP-14B/1762652579.639999", - "retrieved_timestamp": "1762652579.64", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 9.207 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15754642127333254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41957191458446336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3116145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18999335106382978 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json b/data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json deleted file mode 100644 index dbf77a74a..000000000 --- a/data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/1762652579.694953", - "retrieved_timestamp": "1762652579.694954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23565694579271884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4396616219689493 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34345833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2160904255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json deleted file mode 100644 index 3e791bae4..000000000 --- a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3-4b/1762652579.751861", - "retrieved_timestamp": "1762652579.751862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-phi3-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-phi3-4b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552520645221346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5595320442699866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40153124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3745844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json deleted file mode 100644 index 052dd78e9..000000000 --- a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3.5-4b/1762652579.752121", - "retrieved_timestamp": "1762652579.7521222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-phi3.5-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-phi3.5-4b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5659095644002359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483695590203843 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935339095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json deleted file mode 100644 index 0da756263..000000000 --- a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-phi3-4b/1762652579.7536151", - "retrieved_timestamp": "1762652579.7536159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-phi3-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-phi3-4b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069083365470286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5529604896487258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3975625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3813996010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json deleted file mode 100644 index e2ae00a80..000000000 --- a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-phi3-4b/1762652579.755463", - "retrieved_timestamp": "1762652579.755465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-phi3-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-phi3-4b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49264507063480456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5537867816134527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828125 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json deleted file mode 100644 index e35d463c0..000000000 --- a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-14b/1762652579.784184", - "retrieved_timestamp": "1762652579.7841852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/phi-4-14b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NikolaSigmoid/phi-4-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 14.704 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05607898154674043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669500080799667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046875000000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527842420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json deleted file mode 100644 index 615f5cc9b..000000000 --- a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-1steps/1762652579.784436", - "retrieved_timestamp": "1762652579.784437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/phi-4-1steps", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NikolaSigmoid/phi-4-1steps", - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 14.704 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05275668559422333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6707359457278651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2983383685800604 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40184563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52734375 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json deleted file mode 100644 index c767c4640..000000000 --- a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-300steps/1762652579.784649", - "retrieved_timestamp": "1762652579.78465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NikolaSigmoid/phi-4-300steps", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NikolaSigmoid/phi-4-300steps", - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 14.704 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05607898154674043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6701123802649077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4052013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287566489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json b/data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json deleted file mode 100644 index b78f459f6..000000000 --- a/data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/1762652579.7955709", - "retrieved_timestamp": "1762652579.795572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5342856952885011 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35023897852759145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3183125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1823470744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json b/data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json deleted file mode 100644 index 44e9b9486..000000000 --- a/data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NyxKrage_Microsoft_Phi-4/1762652579.7969122", - "retrieved_timestamp": "1762652579.796913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NyxKrage/Microsoft_Phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NyxKrage/Microsoft_Phi-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0585269307659233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6690562305322874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990936555891239 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40604026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286735372340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json b/data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json deleted file mode 100644 index fcb4f32a4..000000000 --- a/data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-13b/1762652580.3541", - "retrieved_timestamp": "1762652580.3541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Orca-2-13b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Orca-2-13b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3127933882099496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48844897288396094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5129687500000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27493351063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Orca-2-13b/65d10996-2c5b-4e11-9a07-319c2446a237.json b/data/hfopenllm_v2/microsoft/Orca-2-13b/65d10996-2c5b-4e11-9a07-319c2446a237.json new file mode 100644 index 000000000..346756a08 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Orca-2-13b/65d10996-2c5b-4e11-9a07-319c2446a237.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-13b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Orca-2-13b", + "id": "microsoft/Orca-2-13b", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3128 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4884 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json b/data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json deleted file mode 100644 index 065cc9ed1..000000000 --- a/data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-7b/1762652580.354311", - "retrieved_timestamp": "1762652580.354312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Orca-2-7b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Orca-2-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2183462102776189 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4452132267545943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23188164893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Orca-2-7b/ef21d739-b122-4ab8-a8ff-a7cfecad5c8e.json b/data/hfopenllm_v2/microsoft/Orca-2-7b/ef21d739-b122-4ab8-a8ff-a7cfecad5c8e.json new file mode 100644 index 000000000..b1da27202 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Orca-2-7b/ef21d739-b122-4ab8-a8ff-a7cfecad5c8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Orca-2-7b", + "id": "microsoft/Orca-2-7b", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2319 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json b/data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json deleted file mode 100644 index 97056e47c..000000000 --- a/data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Orion-zhen_phi-4-abliterated/1762652579.808864", - "retrieved_timestamp": "1762652579.808865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Orion-zhen/phi-4-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Orion-zhen/phi-4-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05760271634817839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6698239306664778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021148036253776 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40436241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5291722074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json b/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json deleted file mode 100644 index 93cb2a8e4..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-128k-instruct/1762652580.354526", - "retrieved_timestamp": "1762652580.354527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-medium-128k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-medium-128k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6040029344361849 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6382322530870549 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4129479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47116023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/45f3b963-497b-4d89-ac66-9ff0ba8dadf8.json b/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/45f3b963-497b-4d89-ac66-9ff0ba8dadf8.json new file mode 100644 index 000000000..a60530782 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/45f3b963-497b-4d89-ac66-9ff0ba8dadf8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-128k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-medium-128k-instruct", + "id": "microsoft/Phi-3-medium-128k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6382 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4129 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4712 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json b/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json deleted file mode 100644 index 0798ff4a4..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-4k-instruct/1762652580.354986", - "retrieved_timestamp": "1762652580.35499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-medium-4k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-medium-4k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6422713954529538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6412464890555547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4675864361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/4173435b-d907-4ac5-a8bd-dfa2759f3fb6.json b/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/4173435b-d907-4ac5-a8bd-dfa2759f3fb6.json new file mode 100644 index 000000000..5255ec1d7 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/4173435b-d907-4ac5-a8bd-dfa2759f3fb6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-4k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-medium-4k-instruct", + "id": "microsoft/Phi-3-medium-4k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6423 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6412 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1956 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4676 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json deleted file mode 100644 index a78d10c45..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-128k-instruct/1762652580.355347", - "retrieved_timestamp": "1762652580.3553479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-mini-128k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-mini-128k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5976331688807919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574531792679852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3936875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3734208776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/b4a79f30-3a04-4f78-861e-1571316a0642.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/b4a79f30-3a04-4f78-861e-1571316a0642.json new file mode 100644 index 000000000..9e63038b5 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/b4a79f30-3a04-4f78-861e-1571316a0642.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-128k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-128k-instruct", + "id": "microsoft/Phi-3-mini-128k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5976 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1405 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3937 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json deleted file mode 100644 index 4dba1fc09..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1762652580.355623", - "retrieved_timestamp": "1762652580.355624", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-mini-4k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-mini-4k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5612884923115112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5675972626334875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38663563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/53426038-df38-45ba-b621-34231c9cad7f.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/53426038-df38-45ba-b621-34231c9cad7f.json new file mode 100644 index 000000000..bbd15d9d0 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/53426038-df38-45ba-b621-34231c9cad7f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-4k-instruct", + "id": "microsoft/Phi-3-mini-4k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5491 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json deleted file mode 100644 index 5f018008a..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1762652580.355825", - "retrieved_timestamp": "1762652580.355826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-mini-4k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-mini-4k-instruct", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547674614467391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490718919495822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4021775265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/fa758fe5-21ec-45cc-941f-5cb5ca0612b1.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/fa758fe5-21ec-45cc-941f-5cb5ca0612b1.json new file mode 100644 index 000000000..14dce77a3 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/fa758fe5-21ec-45cc-941f-5cb5ca0612b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-4k-instruct", + "id": "microsoft/Phi-3-mini-4k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5613 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5676 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/d2a92a62-3bd0-4cb2-897b-742ea0d5203f.json b/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/d2a92a62-3bd0-4cb2-897b-742ea0d5203f.json new file mode 100644 index 000000000..4fa6eea19 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/d2a92a62-3bd0-4cb2-897b-742ea0d5203f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-128k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-small-128k-instruct", + "id": "microsoft/Phi-3-small-128k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3SmallForCausalLM", + "params_billions": 7.392 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6368 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6202 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2026 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4491 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json b/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json deleted file mode 100644 index 5efb4d517..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-128k-instruct/1762652580.356006", - "retrieved_timestamp": "1762652580.356006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-small-128k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-small-128k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3SmallForCausalLM", - "params_billions": 7.392 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6368258443153056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6202176778696983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2026086956521739 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43784375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4490525265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/8b752519-63d4-4638-b56e-1c45c7f4694e.json b/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/8b752519-63d4-4638-b56e-1c45c7f4694e.json new file mode 100644 index 000000000..ac0af8542 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/8b752519-63d4-4638-b56e-1c45c7f4694e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-8k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-small-8k-instruct", + "id": "microsoft/Phi-3-small-8k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3SmallForCausalLM", + "params_billions": 7.392 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6208 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1887 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4558 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4506 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json b/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json deleted file mode 100644 index da09bc8e2..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-8k-instruct/1762652580.356211", - "retrieved_timestamp": "1762652580.356212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3-small-8k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-small-8k-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3SmallForCausalLM", - "params_billions": 7.392 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6496651107949131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6208364880870563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18869565217391304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45579166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4506316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/8da71b7c-7b73-453f-998b-84e70b54e471.json b/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/8da71b7c-7b73-453f-998b-84e70b54e471.json new file mode 100644 index 000000000..abd80b617 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/8da71b7c-7b73-453f-998b-84e70b54e471.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-MoE-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3.5-MoE-instruct", + "id": "microsoft/Phi-3.5-MoE-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 42.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6408 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json b/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json deleted file mode 100644 index e3460f7b6..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-MoE-instruct/1762652580.356415", - "retrieved_timestamp": "1762652580.356415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3.5-MoE-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3.5-MoE-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 42.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.692454908531585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.640762564622586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46575797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/2b7b1216-3ea7-48f1-89f6-e5d84fef2b32.json b/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/2b7b1216-3ea7-48f1-89f6-e5d84fef2b32.json new file mode 100644 index 000000000..5a22577ff --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/2b7b1216-3ea7-48f1-89f6-e5d84fef2b32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-mini-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3.5-mini-instruct", + "id": "microsoft/Phi-3.5-mini-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5775 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3962 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json b/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json deleted file mode 100644 index 7253c43ee..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-mini-instruct/1762652580.356627", - "retrieved_timestamp": "1762652580.356628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-3.5-mini-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3.5-mini-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5774500547436359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5517785126111956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.402125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39619348404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json b/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json deleted file mode 100644 index ec9d45544..000000000 --- a/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-4-mini-instruct/1762652580.356846", - "retrieved_timestamp": "1762652580.356847", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/Phi-4-mini-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-4-mini-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.836 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.568862935505404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39320146276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/37e19712-3197-42da-a8f2-ae1f36c2b06c.json b/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/37e19712-3197-42da-a8f2-ae1f36c2b06c.json new file mode 100644 index 000000000..7a263b228 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/37e19712-3197-42da-a8f2-ae1f36c2b06c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-4-mini-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-mini-instruct", + "id": "microsoft/Phi-4-mini-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.836 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5689 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1699 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3932 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json b/data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json deleted file mode 100644 index 1938ea3d0..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_CoT_Phi/1762652579.820767", - "retrieved_timestamp": "1762652579.820768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/CoT_Phi", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/CoT_Phi", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6158681188136367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6750841958594904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33081570996978854 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42435416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901097074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json deleted file mode 100644 index 998c7e9ec..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Lo-Phi-14b/1762652579.825307", - "retrieved_timestamp": "1762652579.8253078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Lo-Phi-14b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Lo-Phi-14b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4941189377518318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6851928144814953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42323958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json deleted file mode 100644 index ccf3b03a7..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Math_Phi4_Reason/1762652579.826147", - "retrieved_timestamp": "1762652579.826147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Math_Phi4_Reason", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Math_Phi4_Reason", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3220111526305758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6240212275403677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32779456193353473 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029920212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json deleted file mode 100644 index 00bc638e2..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill.16bit/1762652579.8283992", - "retrieved_timestamp": "1762652579.8283992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Phi4.Turn.R1Distill.16bit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Phi4.Turn.R1Distill.16bit", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31264378515671754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6563340892011863 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256815159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json deleted file mode 100644 index aeeafa9a9..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/1762652579.8286002", - "retrieved_timestamp": "1762652579.8286011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2995296923274689 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.645570250166195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39285416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51171875 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json deleted file mode 100644 index a2da009f2..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4Basis-14B-sce/1762652579.828811", - "retrieved_timestamp": "1762652579.8288121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/Phi4Basis-14B-sce", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Phi4Basis-14B-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6501648958097848 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6909074263536413 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389793882978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json b/data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json deleted file mode 100644 index 97ae262a8..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_ThinkPhi1.1-Tensors/1762652579.831269", - "retrieved_timestamp": "1762652579.831269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/ThinkPhi1.1-Tensors", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/ThinkPhi1.1-Tensors", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907543096761038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6449416604455037 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json b/data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json deleted file mode 100644 index 9ecbdb593..000000000 --- a/data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Quazim0t0_graphite-14b-sce/1762652579.833386", - "retrieved_timestamp": "1762652579.833387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Quazim0t0/graphite-14b-sce", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/graphite-14b-sce", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3216864585965239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6631420093244736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5280086436170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json b/data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json deleted file mode 100644 index 8010ee002..000000000 --- a/data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sakalti_Phi3.5-Comets-3.8B/1762652579.858093", - "retrieved_timestamp": "1762652579.858093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sakalti/Phi3.5-Comets-3.8B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Sakalti/Phi3.5-Comets-3.8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20942876013422163 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3335116874180515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3763541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json b/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json deleted file mode 100644 index df6cf71e0..000000000 --- a/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-Line_14B/1762652579.8832798", - "retrieved_timestamp": "1762652579.8832798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Phi-Line_14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Phi-Line_14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6495653754260917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6154430096216078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44785416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5453789893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json b/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json deleted file mode 100644 index d66a0f2c4..000000000 --- a/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-lthy4/1762652579.883529", - "retrieved_timestamp": "1762652579.88353", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "SicariusSicariiStuff/Phi-lthy4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Phi-lthy4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.933 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7679423928509688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.587935701572946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40829166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433344414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json b/data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json deleted file mode 100644 index 4d7a69c67..000000000 --- a/data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Phi-4-AbliteratedRP/1762652579.931047", - "retrieved_timestamp": "1762652579.931048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Phi-4-AbliteratedRP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Triangle104/Phi-4-AbliteratedRP", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49227050891634194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6708776140201277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530751329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json b/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json deleted file mode 100644 index 64b04cd5d..000000000 --- a/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1-Ablit/1762652579.93156", - "retrieved_timestamp": "1762652579.93156", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Phi4-RP-o1-Ablit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Triangle104/Phi4-RP-o1-Ablit", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02385559205131274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6629825730619672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47541666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104720744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json b/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json deleted file mode 100644 index 20dadd6f9..000000000 --- a/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1/1762652579.9312892", - "retrieved_timestamp": "1762652579.9312901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Phi4-RP-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Triangle104/Phi4-RP-o1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022007163215822904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6652563961373095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110538563829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json b/data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json deleted file mode 100644 index f3b95a0a6..000000000 --- a/data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Undi95_Phi4-abliterated/1762652579.9391701", - "retrieved_timestamp": "1762652579.939171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Undi95/Phi4-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Undi95/Phi4-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6617552538375954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.680902103041113 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json b/data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json deleted file mode 100644 index fe3900adf..000000000 --- a/data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Phi-3-medium/1762652579.942282", - "retrieved_timestamp": "1762652579.942282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Phi-3-medium", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Phi-3-medium", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4408879550703245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6432931765847228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4845 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46650598404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json b/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json deleted file mode 100644 index 8b6bfb402..000000000 --- a/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-Megatron-Empathetic/1762652579.952935", - "retrieved_timestamp": "1762652579.952936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Phi-4-Megatron-Empathetic", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Phi-4-Megatron-Empathetic", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01726086783068924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6673396558729835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26963746223564955 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5071354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5082280585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json b/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json deleted file mode 100644 index adbe9585a..000000000 --- a/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-mini-UNOFFICAL/1762652579.9531882", - "retrieved_timestamp": "1762652579.9531891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.754 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12732106366662677 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29444372790183987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11444481382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json b/data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json deleted file mode 100644 index d708a4705..000000000 --- a/data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Youlln_3PRYMMAL-PHI3-3B-SLERP/1762652579.9609358", - "retrieved_timestamp": "1762652579.960937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Youlln/3PRYMMAL-PHI3-3B-SLERP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Youlln/3PRYMMAL-PHI3-3B-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3655500738041729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421833887682153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46484375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001828457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json b/data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json deleted file mode 100644 index f05b45476..000000000 --- a/data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/abideen_MedPhi-4-14B-v1/1762652579.973941", - "retrieved_timestamp": "1762652579.973942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "abideen/MedPhi-4-14B-v1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "abideen/MedPhi-4-14B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6276834355066778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6896781879584077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930513595166163 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4154583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338264627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json b/data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json deleted file mode 100644 index 2b98a3b61..000000000 --- a/data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_MistralPhi3-11B/1762652579.990464", - "retrieved_timestamp": "1762652579.990464", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/MistralPhi3-11B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "allknowingroger/MistralPhi3-11B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.234 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1942911474886634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6234314600705605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46875 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json b/data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json deleted file mode 100644 index b8a7225b3..000000000 --- a/data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Phi3mash1-17B-pass/1762652579.997936", - "retrieved_timestamp": "1762652579.997937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Phi3mash1-17B-pass", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "allknowingroger/Phi3mash1-17B-pass", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 16.687 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18842116694814204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6128878795560929 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45894281914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json b/data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json deleted file mode 100644 index a2a0f9c2b..000000000 --- a/data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_ROGERphi-7B-slerp/1762652580.0022678", - "retrieved_timestamp": "1762652580.002269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/ROGERphi-7B-slerp", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "allknowingroger/ROGERphi-7B-slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3861332375873793 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195583428468424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46853125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052692819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json b/data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json deleted file mode 100644 index 2a25a9698..000000000 --- a/data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/benhaotang_phi4-qwq-sky-t1/1762652580.030136", - "retrieved_timestamp": "1762652580.030137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "benhaotang/phi4-qwq-sky-t1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "benhaotang/phi4-qwq-sky-t1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04596249063595704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6710520703782934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48995833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json deleted file mode 100644 index ed9308f5a..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-3.5-mini-TitanFusion-0.1/1762652580.04916", - "retrieved_timestamp": "1762652580.049161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227950726295119 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373733988565133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4453125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json deleted file mode 100644 index f9013769e..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v2/1762652580.050115", - "retrieved_timestamp": "1762652580.050116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock-v2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.63752510006782 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6824667320746144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46617708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json deleted file mode 100644 index dc52e310e..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v3/1762652580.050334", - "retrieved_timestamp": "1762652580.050335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock-v3", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911636679565775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6726298549419627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json deleted file mode 100644 index d37a50d5d..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v4/1762652580.0505521", - "retrieved_timestamp": "1762652580.050553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock-v4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7110145524984818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6924302574038697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5393949468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json deleted file mode 100644 index fa3ccdcf3..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock/1762652580.0497222", - "retrieved_timestamp": "1762652580.049727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6878837041272712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6889699980822082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4297583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44413541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5368184840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json deleted file mode 100644 index cc43c35bd..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RP-v0/1762652580.050766", - "retrieved_timestamp": "1762652580.0507672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-RP-v0", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-RP-v0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6827129793392643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.685633603278299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364029255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json deleted file mode 100644 index ae1f4f0c4..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RR-Shoup/1762652580.050983", - "retrieved_timestamp": "1762652580.050983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-RR-Shoup", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-RR-Shoup", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6586579165503088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6947025970028124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44404166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428856382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json deleted file mode 100644 index 04f108b03..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RStock-v0.1/1762652580.051188", - "retrieved_timestamp": "1762652580.051189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-RStock-v0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-RStock-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7018721436898541 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6928310064675399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45836458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400598404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json deleted file mode 100644 index 04ff9a019..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-ReasoningRP/1762652580.05142", - "retrieved_timestamp": "1762652580.051421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-ReasoningRP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-ReasoningRP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736204382150472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6922187070022994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569486404833837 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420545212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json deleted file mode 100644 index 79cd25b27..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Sce-exp-v0.1/1762652580.0516632", - "retrieved_timestamp": "1762652580.0516639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Sce-exp-v0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Sce-exp-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6595322632836429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.694317957938629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44407291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423038563829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json deleted file mode 100644 index f738bcf77..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-Ex/1762652580.051897", - "retrieved_timestamp": "1762652580.051897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Stock-Ex", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Stock-Ex", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6574588757829227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864461628663387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374833776595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json deleted file mode 100644 index 7cc76a2ee..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-RP/1762652580.0521228", - "retrieved_timestamp": "1762652580.0521228", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Stock-RP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Stock-RP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6399231816025922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6859633715492438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47147916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json deleted file mode 100644 index 058ad34e9..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Trim-Exp1/1762652580.052348", - "retrieved_timestamp": "1762652580.052348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-4-Trim-Exp1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Trim-Exp1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.503 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12192538021338936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28516626650940224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4176875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json deleted file mode 100644 index 86fe10f56..000000000 --- a/data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-Seek-4-Sce-V1/1762652580.052572", - "retrieved_timestamp": "1762652580.052573", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bunnycore/Phi-Seek-4-Sce-V1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-Seek-4-Sce-V1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29348462080612775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6459114889718743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json b/data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json deleted file mode 100644 index 069310ed4..000000000 --- a/data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/carsenk_phi3.5_mini_exp_825_uncensored/1762652580.083884", - "retrieved_timestamp": "1762652580.083887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "carsenk/phi3.5_mini_exp_825_uncensored", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "carsenk/phi3.5_mini_exp_825_uncensored", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13641360479084386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29647345147918264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36441666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11751994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json deleted file mode 100644 index 8b7e82f3c..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/1762652580.112771", - "retrieved_timestamp": "1762652580.112771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406816136739407 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359697041031141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json deleted file mode 100644 index 6a671a021..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/1762652580.113518", - "retrieved_timestamp": "1762652580.1135192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3852588908540451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076225600626862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45979166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518783244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json deleted file mode 100644 index 29026f752..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/1762652580.113816", - "retrieved_timestamp": "1762652580.113816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44653297694561545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5484314644603556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4348020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3966921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json deleted file mode 100644 index 1cd99f2fb..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1762652580.1142762", - "retrieved_timestamp": "1762652580.1142762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36125369574950017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.612322545411745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4111770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4493849734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json deleted file mode 100644 index 0e9641323..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1762652580.114508", - "retrieved_timestamp": "1762652580.114509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4123614232458765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.638289226729353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45246010638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json deleted file mode 100644 index 692e8f443..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/1762652580.114048", - "retrieved_timestamp": "1762652580.114049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": -1.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247762603226107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6456739302686527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4190520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45553523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json deleted file mode 100644 index efedcdc9a..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/1762652580.115152", - "retrieved_timestamp": "1762652580.115152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3639266036339136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6046995537773227 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json deleted file mode 100644 index 73fac11c1..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/1762652580.1153762", - "retrieved_timestamp": "1762652580.115377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4126362495955177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48125401481062013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820811170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json deleted file mode 100644 index ce851df74..000000000 --- a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/1762652580.115594", - "retrieved_timestamp": "1762652580.115595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5600894515441251 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480369183144175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4429895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3376828457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json b/data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json deleted file mode 100644 index 62b3c0c58..000000000 --- a/data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_phi-4-25b/1762652580.144644", - "retrieved_timestamp": "1762652580.1446452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/phi-4-25b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "ehristoforu/phi-4-25b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 24.883 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6483663346587056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6907778236877188 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350731382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json b/data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json deleted file mode 100644 index ea7ac03cb..000000000 --- a/data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ehristoforu_ruphi-4b/1762652580.1457548", - "retrieved_timestamp": "1762652580.145756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ehristoforu/ruphi-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "ehristoforu/ruphi-4b", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17518185082248433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29060336568338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35117708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json b/data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json deleted file mode 100644 index e345ae44d..000000000 --- a/data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/fhai50032_Unaligned-Thinker-PHI-4/1762652580.154337", - "retrieved_timestamp": "1762652580.1543381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "fhai50032/Unaligned-Thinker-PHI-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "fhai50032/Unaligned-Thinker-PHI-4", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.056254072527560206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6642576780946753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4678541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147107712765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json b/data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json deleted file mode 100644 index 57b0b465b..000000000 --- a/data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Phi4-Slerp4-14B/1762652580.1958668", - "retrieved_timestamp": "1762652580.195868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Phi4-Slerp4-14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "hotmailuser/Phi4-Slerp4-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0629485321170051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6731037909447855 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39681208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json b/data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json deleted file mode 100644 index 0a58ea4e1..000000000 --- a/data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/1762652580.32982", - "retrieved_timestamp": "1762652580.329825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20181008612703183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281563256810973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1471908244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json b/data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json deleted file mode 100644 index c9174ac05..000000000 --- a/data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_phi-1/1762652580.357049", - "retrieved_timestamp": "1762652580.3570502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/phi-1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 1.418 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20680571993421898 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31394755895837845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35251041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json b/data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json deleted file mode 100644 index 259e82a1c..000000000 --- a/data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_phi-1_5/1762652580.357298", - "retrieved_timestamp": "1762652580.357298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/phi-1_5", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-1_5", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 1.418 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2032839532440591 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33597583211996657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34041666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16913231382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json b/data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json deleted file mode 100644 index b3236fc99..000000000 --- a/data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_phi-2/1762652580.357496", - "retrieved_timestamp": "1762652580.357497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/phi-2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-2", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.273875539125077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4881208771249696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4098958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26279920212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json b/data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json deleted file mode 100644 index 879edf5aa..000000000 --- a/data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1762652580.357901", - "retrieved_timestamp": "1762652580.357902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0585269307659233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6690562305322874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40604026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286735372340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json b/data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json deleted file mode 100644 index 9321312fb..000000000 --- a/data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1762652580.3577", - "retrieved_timestamp": "1762652580.357701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-4", - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.048785001573602486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6703464626619114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295046542553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json b/data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json deleted file mode 100644 index aa34f97ef..000000000 --- a/data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/1762652580.35902", - "retrieved_timestamp": "1762652580.359021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45387682460316403 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6206613823135703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731715425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json b/data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json deleted file mode 100644 index f8905ed19..000000000 --- a/data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mkurman_phi-4-MedIT-11B-exp-1/1762652580.3661451", - "retrieved_timestamp": "1762652580.366146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mkurman/phi-4-MedIT-11B-exp-1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mkurman/phi-4-MedIT-11B-exp-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 11.514 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5947607902587357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413943771388249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38479166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json b/data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json deleted file mode 100644 index 8095909cb..000000000 --- a/data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mkurman_phi4-MedIT-10B-o1/1762652580.366463", - "retrieved_timestamp": "1762652580.366464", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mkurman/phi4-MedIT-10B-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mkurman/phi4-MedIT-10B-o1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaMedITForCausalLM", - "params_billions": 10.255 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34629117408476173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519820312240642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3507313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json b/data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json deleted file mode 100644 index 03896dd62..000000000 --- a/data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_phixtral-2x2_8/1762652580.370162", - "retrieved_timestamp": "1762652580.370163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/phixtral-2x2_8", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mlabonne/phixtral-2x2_8", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 4.458 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3431184811854767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48885941873076205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3643541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json b/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json deleted file mode 100644 index c947cc2bb..000000000 --- a/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-gsm8k-3e/1762652580.374398", - "retrieved_timestamp": "1762652580.374399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrm8488/phi-4-14B-grpo-gsm8k-3e", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mrm8488/phi-4-14B-grpo-gsm8k-3e", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.688533092195375 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6805415739665394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526845079787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json b/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json deleted file mode 100644 index 26277b730..000000000 --- a/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-limo/1762652580.374649", - "retrieved_timestamp": "1762652580.37465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrm8488/phi-4-14B-grpo-limo", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mrm8488/phi-4-14B-grpo-limo", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.681239112222237 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.678485424233919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569486404833837 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260970744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json deleted file mode 100644 index 3c9092a8d..000000000 --- a/data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-abliterated-phi2-merge-unretrained/1762652580.3939252", - "retrieved_timestamp": "1762652580.393926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-abliterated-phi2-merge-unretrained", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-abliterated-phi2-merge-unretrained", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3005037744296245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104131503721586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31834375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14777260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json deleted file mode 100644 index 5e0d7c2fe..000000000 --- a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V2/1762652580.3950222", - "retrieved_timestamp": "1762652580.395023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-phigments-slerp-V2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-phigments-slerp-V2", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32316032571355113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48272762171598743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40372916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2716921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json deleted file mode 100644 index 2c461aba6..000000000 --- a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.2/1762652580.395236", - "retrieved_timestamp": "1762652580.395236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-phigments-slerp-V3.2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-phigments-slerp-V3.2", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35243598097492435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808549324972969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3707708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2705285904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json deleted file mode 100644 index 7602d1173..000000000 --- a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.3/1762652580.395446", - "retrieved_timestamp": "1762652580.395447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-phigments-slerp-V3.3", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-phigments-slerp-V3.3", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36909732842192056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48952950463630956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38921874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802526595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json b/data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json deleted file mode 100644 index ea88830f5..000000000 --- a/data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_phi-4/1762652580.435327", - "retrieved_timestamp": "1762652580.435328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_phi-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7780588837617521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6856329737542378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47030208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/phi-1/c6ae6691-64ec-443d-8d76-af614c8cc7f9.json b/data/hfopenllm_v2/microsoft/phi-1/c6ae6691-64ec-443d-8d76-af614c8cc7f9.json new file mode 100644 index 000000000..84c69a4be --- /dev/null +++ b/data/hfopenllm_v2/microsoft/phi-1/c6ae6691-64ec-443d-8d76-af614c8cc7f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_phi-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-1", + "id": "microsoft/phi-1", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 1.418 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2068 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3139 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3525 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/phi-1_5/80567722-8c6b-41b9-8103-3bdaedfdb8ee.json b/data/hfopenllm_v2/microsoft/phi-1_5/80567722-8c6b-41b9-8103-3bdaedfdb8ee.json new file mode 100644 index 000000000..2fc19d941 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/phi-1_5/80567722-8c6b-41b9-8103-3bdaedfdb8ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_phi-1_5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-1_5", + "id": "microsoft/phi-1_5", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 1.418 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2033 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.336 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3404 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1691 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/phi-2/20192dc4-ea3a-4413-8457-18a592fa0c64.json b/data/hfopenllm_v2/microsoft/phi-2/20192dc4-ea3a-4413-8457-18a592fa0c64.json new file mode 100644 index 000000000..428cbe527 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/phi-2/20192dc4-ea3a-4413-8457-18a592fa0c64.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_phi-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-2", + "id": "microsoft/phi-2", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2739 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4881 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2628 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/phi-4/8c878c05-86f7-4d61-81d7-9bb286516581.json b/data/hfopenllm_v2/microsoft/phi-4/8c878c05-86f7-4d61-81d7-9bb286516581.json new file mode 100644 index 000000000..5d88bde05 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/phi-4/8c878c05-86f7-4d61-81d7-9bb286516581.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4", + "id": "microsoft/phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6691 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3165 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/phi-4/fa753be0-4a98-4ec3-9cc9-3bf7b380ad17.json b/data/hfopenllm_v2/microsoft/phi-4/fa753be0-4a98-4ec3-9cc9-3bf7b380ad17.json new file mode 100644 index 000000000..8eaf9faa9 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/phi-4/fa753be0-4a98-4ec3-9cc9-3bf7b380ad17.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4", + "id": "microsoft/phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6703 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2787 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json deleted file mode 100644 index e03f510c1..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Empathetic/1762652580.469516", - "retrieved_timestamp": "1762652580.469517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Empathetic", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Empathetic", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.049659348306936704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6726820578371974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2620845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json deleted file mode 100644 index 3b8012923..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Math-IO/1762652580.469801", - "retrieved_timestamp": "1762652580.469801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Math-IO", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Math-IO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05897684809638426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6668255086606543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45770392749244715 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39848993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4872916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5205285904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json deleted file mode 100644 index 1e27b2a49..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-QwQ/1762652580.470021", - "retrieved_timestamp": "1762652580.470022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-QwQ", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-QwQ", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05592937849350833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6695574237334824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45770392749244715 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4650625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275099734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json deleted file mode 100644 index 21642d402..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-1/1762652580.470496", - "retrieved_timestamp": "1762652580.470498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Super-1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Super-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04176584795010572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.672933647971901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35196374622356497 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235206117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json deleted file mode 100644 index 3452fd946..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-o1/1762652580.470741", - "retrieved_timestamp": "1762652580.470741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Super-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Super-o1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04176584795010572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.672933647971901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35196374622356497 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235206117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json deleted file mode 100644 index 1651dcd0d..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super/1762652580.470242", - "retrieved_timestamp": "1762652580.470242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Super", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Super", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04813561350549875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720116458521787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json deleted file mode 100644 index 38dcc0326..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-o1/1762652580.470958", - "retrieved_timestamp": "1762652580.4709592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-o1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028976449154908976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6688727399756971 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49777083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173703457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json deleted file mode 100644 index 0eb1df0dd..000000000 --- a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi4-Super/1762652580.471183", - "retrieved_timestamp": "1762652580.4711838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Phi4-Super", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi4-Super", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04813561350549875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720116458521787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json b/data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json deleted file mode 100644 index 0d33d8a3e..000000000 --- a/data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rhysjones_phi-2-orange-v2/1762652580.495306", - "retrieved_timestamp": "1762652580.495307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rhysjones/phi-2-orange-v2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "rhysjones/phi-2-orange-v2", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3669740732367895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4770220109816213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3629583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25324135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json b/data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json deleted file mode 100644 index 80bc84d3d..000000000 --- a/data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Luminis-phi-4/1762652580.544511", - "retrieved_timestamp": "1762652580.544511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Luminis-phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "suayptalha/Luminis-phi-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6900069593124022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6920213038130584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45715625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423869680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json b/data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json deleted file mode 100644 index 0e104c263..000000000 --- a/data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_phi-4-tensopolis-v1/1762652580.5562031", - "retrieved_timestamp": "1762652580.5562031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/phi-4-tensopolis-v1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "tensopolis/phi-4-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6766679078179231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6871833310149728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49395770392749244 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json b/data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json deleted file mode 100644 index 9cf11c57b..000000000 --- a/data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_phi-3-mini-4k-python/1762652580.5645702", - "retrieved_timestamp": "1762652580.564571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/phi-3-mini-4k-python", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "theprint/phi-3-mini-4k-python", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 4.132 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24087753826513653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493759004635898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35771276595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json b/data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json deleted file mode 100644 index 61df9d31a..000000000 --- a/data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/unsloth_phi-4-bnb-4bit/1762652580.579705", - "retrieved_timestamp": "1762652580.579705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "unsloth/phi-4-bnb-4bit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "unsloth/phi-4-bnb-4bit", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.058 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6729710501469435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769854242339189 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255984042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json b/data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json deleted file mode 100644 index e8c67d6c6..000000000 --- a/data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/unsloth_phi-4-unsloth-bnb-4bit/1762652580.579966", - "retrieved_timestamp": "1762652580.579967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "unsloth/phi-4-unsloth-bnb-4bit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "unsloth/phi-4-unsloth-bnb-4bit", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.483 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6793906833867471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6791089896968764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40339583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285904255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json b/data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json deleted file mode 100644 index 58fb39c40..000000000 --- a/data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/unsloth_phi-4/1762652580.579377", - "retrieved_timestamp": "1762652580.579378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "unsloth/phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "unsloth/phi-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6882083981613231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6885874406040138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41142708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json b/data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json deleted file mode 100644 index 6983855b7..000000000 --- a/data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/1762652580.583631", - "retrieved_timestamp": "1762652580.5836318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37002154283966543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4982774952761688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/migtissera/Llama-3-70B-Synthia-v3.5/0516b46b-a957-413f-aadc-58f4339dc60a.json b/data/hfopenllm_v2/migtissera/Llama-3-70B-Synthia-v3.5/0516b46b-a957-413f-aadc-58f4339dc60a.json new file mode 100644 index 000000000..2429a8f70 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Llama-3-70B-Synthia-v3.5/0516b46b-a957-413f-aadc-58f4339dc60a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-70B-Synthia-v3.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-70B-Synthia-v3.5", + "id": "migtissera/Llama-3-70B-Synthia-v3.5", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6076 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Llama-3-8B-Synthia-v3.5/97200dd7-7ed0-4a7b-ace9-31c173f017f1.json b/data/hfopenllm_v2/migtissera/Llama-3-8B-Synthia-v3.5/97200dd7-7ed0-4a7b-ace9-31c173f017f1.json new file mode 100644 index 000000000..30146a9c1 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Llama-3-8B-Synthia-v3.5/97200dd7-7ed0-4a7b-ace9-31c173f017f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-8B-Synthia-v3.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Synthia-v3.5", + "id": "migtissera/Llama-3-8B-Synthia-v3.5", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4888 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4044 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/758f8332-ffa8-4059-ac6f-400f9367bb23.json b/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/758f8332-ffa8-4059-ac6f-400f9367bb23.json new file mode 100644 index 000000000..45ec18a45 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/758f8332-ffa8-4059-ac6f-400f9367bb23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-7B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tess-3-7B-SFT", + "id": "migtissera/Tess-3-7B-SFT", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3946 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4113 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3034 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json b/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json deleted file mode 100644 index 4eeda3c6f..000000000 --- a/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-7B-SFT/1762652580.358523", - "retrieved_timestamp": "1762652580.3585238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Tess-3-7B-SFT", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Tess-3-7B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946262583279033 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46073483895076217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4112708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30335771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/migtissera/Tess-3-Mistral-Nemo-12B/b1103662-055c-471e-ace8-dd75f607491d.json b/data/hfopenllm_v2/migtissera/Tess-3-Mistral-Nemo-12B/b1103662-055c-471e-ace8-dd75f607491d.json new file mode 100644 index 000000000..ada1e45ae --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Tess-3-Mistral-Nemo-12B/b1103662-055c-471e-ace8-dd75f607491d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-Mistral-Nemo-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tess-3-Mistral-Nemo-12B", + "id": "migtissera/Tess-3-Mistral-Nemo-12B", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4458 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/27b0d675-498f-4351-b92f-7c0d1a3c83bd.json b/data/hfopenllm_v2/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/27b0d675-498f-4351-b92f-7c0d1a3c83bd.json new file mode 100644 index 000000000..81b7d3cb0 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/27b0d675-498f-4351-b92f-7c0d1a3c83bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tess-v2.5-Phi-3-medium-128k-14B", + "id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6207 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4113 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3732 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Tess-v2.5.2-Qwen2-72B/3f1f88d4-2908-4f28-b8d3-4f9ded18ba0e.json b/data/hfopenllm_v2/migtissera/Tess-v2.5.2-Qwen2-72B/3f1f88d4-2908-4f28-b8d3-4f9ded18ba0e.json new file mode 100644 index 000000000..82ec5a874 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Tess-v2.5.2-Qwen2-72B/3f1f88d4-2908-4f28-b8d3-4f9ded18ba0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5.2-Qwen2-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tess-v2.5.2-Qwen2-72B", + "id": "migtissera/Tess-v2.5.2-Qwen2-72B", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4494 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6647 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2938 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/3883b0d3-e442-42d3-adc6-ed959c902dd3.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/3883b0d3-e442-42d3-adc6-ed959c902dd3.json new file mode 100644 index 000000000..3c3025654 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/3883b0d3-e442-42d3-adc6-ed959c902dd3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Trinity-2-Codestral-22B-v0.2", + "id": "migtissera/Trinity-2-Codestral-22B-v0.2", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4345 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5686 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.334 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json deleted file mode 100644 index 615df3750..000000000 --- a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1762652580.3597598", - "retrieved_timestamp": "1762652580.359761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Trinity-2-Codestral-22B-v0.2", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Trinity-2-Codestral-22B-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43446832183052075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5686364683055418 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33402593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json deleted file mode 100644 index 8f0138eb0..000000000 --- a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1762652580.359978", - "retrieved_timestamp": "1762652580.359979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Trinity-2-Codestral-22B-v0.2", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Trinity-2-Codestral-22B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44301121025545553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5706466356198404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/da172cdb-1388-42f5-97b1-ae8e15291631.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/da172cdb-1388-42f5-97b1-ae8e15291631.json new file mode 100644 index 000000000..5c049da80 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/da172cdb-1388-42f5-97b1-ae8e15291631.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Trinity-2-Codestral-22B-v0.2", + "id": "migtissera/Trinity-2-Codestral-22B-v0.2", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5706 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/7c94dbfa-4b3a-43fd-9f2c-b3d63d8ef700.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/7c94dbfa-4b3a-43fd-9f2c-b3d63d8ef700.json new file mode 100644 index 000000000..186299b68 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/7c94dbfa-4b3a-43fd-9f2c-b3d63d8ef700.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Trinity-2-Codestral-22B", + "id": "migtissera/Trinity-2-Codestral-22B", + "developer": "migtissera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5593 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3308 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json deleted file mode 100644 index dfecc3f2a..000000000 --- a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B/1762652580.35951", - "retrieved_timestamp": "1762652580.3595111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Trinity-2-Codestral-22B", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Trinity-2-Codestral-22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202050559182968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5593244825460373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307845744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/7cdd1de0-767d-4527-a024-c67166bb8b20.json b/data/hfopenllm_v2/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/7cdd1de0-767d-4527-a024-c67166bb8b20.json new file mode 100644 index 000000000..036d983ff --- /dev/null +++ b/data/hfopenllm_v2/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/7cdd1de0-767d-4527-a024-c67166bb8b20.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", + "id": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", + "developer": "mindw96", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3068 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1106 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/d4702278-54c4-42e8-a901-dfe5c7f2004a.json b/data/hfopenllm_v2/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/d4702278-54c4-42e8-a901-dfe5c7f2004a.json new file mode 100644 index 000000000..fb2414062 --- /dev/null +++ b/data/hfopenllm_v2/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/d4702278-54c4-42e8-a901-dfe5c7f2004a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen1.5-1.8B-OpenHermes-2.5", + "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", + "developer": "minghaowu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.837 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3375 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1792 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ministral/Ministral-3b-instruct/149f8ee5-4376-4fcc-8f87-7412a3083570.json b/data/hfopenllm_v2/ministral/Ministral-3b-instruct/149f8ee5-4376-4fcc-8f87-7412a3083570.json new file mode 100644 index 000000000..d2bace4f0 --- /dev/null +++ b/data/hfopenllm_v2/ministral/Ministral-3b-instruct/149f8ee5-4376-4fcc-8f87-7412a3083570.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ministral_Ministral-3b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ministral-3b-instruct", + "id": "ministral/Ministral-3b-instruct", + "developer": "ministral", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.316 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3192 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json b/data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json deleted file mode 100644 index a9d55af08..000000000 --- a/data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ministral_Ministral-3b-instruct/1762652580.360654", - "retrieved_timestamp": "1762652580.360655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ministral/Ministral-3b-instruct", - "developer": "ministral", - "inference_platform": "unknown", - "id": "ministral/Ministral-3b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.316 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1357642167227401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31918598478332383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10929188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral-community/Mistral-7B-v0.2/de82b746-c5d7-450a-bc2b-1b2859d91d6b.json b/data/hfopenllm_v2/mistral-community/Mistral-7B-v0.2/de82b746-c5d7-450a-bc2b-1b2859d91d6b.json new file mode 100644 index 000000000..0f1e81e70 --- /dev/null +++ b/data/hfopenllm_v2/mistral-community/Mistral-7B-v0.2/de82b746-c5d7-450a-bc2b-1b2859d91d6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistral-community_Mistral-7B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.2", + "id": "mistral-community/Mistral-7B-v0.2", + "developer": "mistral-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.451 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral-community/Mixtral-8x22B-v0.1/d2a916a6-288a-4761-a3fd-ca674edb67c1.json b/data/hfopenllm_v2/mistral-community/Mixtral-8x22B-v0.1/d2a916a6-288a-4761-a3fd-ca674edb67c1.json new file mode 100644 index 000000000..944c1169a --- /dev/null +++ b/data/hfopenllm_v2/mistral-community/Mixtral-8x22B-v0.1/d2a916a6-288a-4761-a3fd-ca674edb67c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistral-community_Mixtral-8x22B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral-8x22B-v0.1", + "id": "mistral-community/Mixtral-8x22B-v0.1", + "developer": "mistral-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1543 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3533 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.36 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral-community/mixtral-8x22B-v0.3/cda497f9-c7f9-48d6-944b-0167476e5e5c.json b/data/hfopenllm_v2/mistral-community/mixtral-8x22B-v0.3/cda497f9-c7f9-48d6-944b-0167476e5e5c.json new file mode 100644 index 000000000..036d6208b --- /dev/null +++ b/data/hfopenllm_v2/mistral-community/mixtral-8x22B-v0.3/cda497f9-c7f9-48d6-944b-0167476e5e5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistral-community_mixtral-8x22B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mixtral-8x22B-v0.3", + "id": "mistral-community/mixtral-8x22B-v0.3", + "developer": "mistral-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.625 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4037 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4639 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json b/data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json deleted file mode 100644 index 2dbe64db1..000000000 --- a/data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Corianas_Neural-Mistral-7B/1762652579.511706", - "retrieved_timestamp": "1762652579.5117068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Corianas/Neural-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Corianas/Neural-Mistral-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489235229191878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428023404192858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27376994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json b/data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json deleted file mode 100644 index 55f388252..000000000 --- a/data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/1762652579.536513", - "retrieved_timestamp": "1762652579.536514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123538876846767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4750220653053363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40051041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2744348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json b/data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json deleted file mode 100644 index 7bac3870e..000000000 --- a/data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_mistral-7b-test-merged/1762652579.536763", - "retrieved_timestamp": "1762652579.536763", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Dans-DiscountModels/mistral-7b-test-merged", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/mistral-7b-test-merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678003253589365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48981661658184755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29778922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json b/data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json deleted file mode 100644 index 71fe42bc1..000000000 --- a/data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DreadPoor_felix_dies-mistral-7B-model_stock/1762652579.5887182", - "retrieved_timestamp": "1762652579.5887191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DreadPoor/felix_dies-mistral-7B-model_stock", - "developer": "mistral", - "inference_platform": "unknown", - "id": "DreadPoor/felix_dies-mistral-7B-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30077860077926566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49009180735274227 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3109208776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json b/data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json deleted file mode 100644 index 51e75eba1..000000000 --- a/data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/1762652579.612103", - "retrieved_timestamp": "1762652579.612104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", - "developer": "mistral", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.58 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30972043067948596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43276373285682107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4029583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11477726063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json b/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json deleted file mode 100644 index 6ff3b094d..000000000 --- a/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_e2e/1762652579.625389", - "retrieved_timestamp": "1762652579.62539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuJhen/mistral_7b_v0.1_structedData_e2e", - "developer": "mistral", - "inference_platform": "unknown", - "id": "FuJhen/mistral_7b_v0.1_structedData_e2e", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17268403391889076 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113914854984489 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2810837765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json b/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json deleted file mode 100644 index db8b2c1b3..000000000 --- a/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_viggo/1762652579.625654", - "retrieved_timestamp": "1762652579.625655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "FuJhen/mistral_7b_v0.1_structedData_viggo", - "developer": "mistral", - "inference_platform": "unknown", - "id": "FuJhen/mistral_7b_v0.1_structedData_viggo", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17832905579418165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45238634545986817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2942154255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json b/data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json deleted file mode 100644 index 687f41dc2..000000000 --- a/data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Locutusque_TinyMistral-248M-v2.5/1762652579.73623", - "retrieved_timestamp": "1762652579.7362418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Locutusque/TinyMistral-248M-v2.5", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Locutusque/TinyMistral-248M-v2.5", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 0.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336409615376091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30385761123260785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37815624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json b/data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json deleted file mode 100644 index ba2af7d04..000000000 --- a/data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/M4-ai_TinyMistral-248M-v3/1762652579.742201", - "retrieved_timestamp": "1762652579.742202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "M4-ai/TinyMistral-248M-v3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "M4-ai/TinyMistral-248M-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 0.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16386631914431488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2884549938995566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json b/data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json deleted file mode 100644 index d699af57e..000000000 --- a/data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/1762652579.7477188", - "retrieved_timestamp": "1762652579.74772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.16 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16973629968483622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464368053320647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13788231382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json b/data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json deleted file mode 100644 index 46318925e..000000000 --- a/data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_DeepHermes-3-Mistral-24B-Preview/1762652579.78962", - "retrieved_timestamp": "1762652579.7896209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/DeepHermes-3-Mistral-24B-Preview", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45357761849669986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6488196385442672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45902593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json b/data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json deleted file mode 100644 index 3d4986c51..000000000 --- a/data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Mistral-7B/1762652579.790145", - "retrieved_timestamp": "1762652579.790146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Hermes-2-Pro-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-2-Pro-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5668337788179807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4995435330498075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29463098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json b/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json deleted file mode 100644 index f3740d5f3..000000000 --- a/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-128k/1762652579.793008", - "retrieved_timestamp": "1762652579.7930088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Mistral-7b-128k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Mistral-7b-128k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19336693307091848 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314467711273296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.289311835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json b/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json deleted file mode 100644 index f0adcd4ac..000000000 --- a/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-64k/1762652579.7932239", - "retrieved_timestamp": "1762652579.793225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NousResearch/Yarn-Mistral-7b-64k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Mistral-7b-64k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2079548930171944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42931904551037814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41238541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2913896276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json b/data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json deleted file mode 100644 index 486785803..000000000 --- a/data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Open-Orca_Mistral-7B-OpenOrca/1762652579.799384", - "retrieved_timestamp": "1762652579.799385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Open-Orca/Mistral-7B-OpenOrca", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Open-Orca/Mistral-7B-OpenOrca", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977659277384008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4768173517353546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38578124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26529255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json b/data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json deleted file mode 100644 index 03cfeb98d..000000000 --- a/data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/PranavHarshan_LaMistral-V4/1762652579.8148758", - "retrieved_timestamp": "1762652579.814877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "PranavHarshan/LaMistral-V4", - "developer": "mistral", - "inference_platform": "unknown", - "id": "PranavHarshan/LaMistral-V4", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.623861354539289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184255342586473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json b/data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json deleted file mode 100644 index c717acd63..000000000 --- a/data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/1762652579.817152", - "retrieved_timestamp": "1762652579.817153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37062106322335847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36271140677296004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4840104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2829953457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json b/data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json deleted file mode 100644 index 7994ea1ef..000000000 --- a/data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/1762652579.911438", - "retrieved_timestamp": "1762652579.9114392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", - "developer": "mistral", - "inference_platform": "unknown", - "id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43918912928806675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43195515014882774 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765957446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json b/data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json deleted file mode 100644 index 78bb78b70..000000000 --- a/data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TencentARC_MetaMath-Mistral-Pro/1762652579.913366", - "retrieved_timestamp": "1762652579.913366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TencentARC/MetaMath-Mistral-Pro", - "developer": "mistral", - "inference_platform": "unknown", - "id": "TencentARC/MetaMath-Mistral-Pro", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21187670935340452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44131618555883606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35241666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2471742021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json b/data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json deleted file mode 100644 index dcb2cd2bb..000000000 --- a/data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/TencentARC_Mistral_Pro_8B_v0.1/1762652579.913616", - "retrieved_timestamp": "1762652579.913617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "TencentARC/Mistral_Pro_8B_v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "TencentARC/Mistral_Pro_8B_v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21145227995053123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525975968066435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42422916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765126329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json b/data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json deleted file mode 100644 index bf1dbc730..000000000 --- a/data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Redemption-Arc/1762652579.929934", - "retrieved_timestamp": "1762652579.9299352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Mistral-Redemption-Arc", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Triangle104/Mistral-Redemption-Arc", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40289432040319684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6254876729064861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509640957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json b/data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json deleted file mode 100644 index 43bc47611..000000000 --- a/data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Small-24b-Harmony/1762652579.930191", - "retrieved_timestamp": "1762652579.9301918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Triangle104/Mistral-Small-24b-Harmony", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Triangle104/Mistral-Small-24b-Harmony", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16871234989826994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6433732705921861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5430518617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json deleted file mode 100644 index 93a4dac45..000000000 --- a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/1762652579.9377868", - "retrieved_timestamp": "1762652579.937788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5047352136774869 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468056921650662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26953125 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json deleted file mode 100644 index c2436fc71..000000000 --- a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/1762652579.937983", - "retrieved_timestamp": "1762652579.937984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445848127413041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4465719945610438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40854166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677027925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json deleted file mode 100644 index c869d4666..000000000 --- a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/1762652579.938179", - "retrieved_timestamp": "1762652579.9381802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350678422142138 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4396587862984616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40711458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2657912234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json deleted file mode 100644 index f54ddfd48..000000000 --- a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO/1762652579.93755", - "retrieved_timestamp": "1762652579.93755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43549227161708715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4438979817093698 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39647916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26205119680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json b/data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json deleted file mode 100644 index ef9a3f570..000000000 --- a/data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Unbabel_TowerInstruct-Mistral-7B-v0.2/1762652579.938655", - "retrieved_timestamp": "1762652579.938656", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Unbabel/TowerInstruct-Mistral-7B-v0.2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Unbabel/TowerInstruct-Mistral-7B-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2843422119975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.388195180992626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4522291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19680851063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json b/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json deleted file mode 100644 index 1b298e9d5..000000000 --- a/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash1-7B-s/1762652579.990727", - "retrieved_timestamp": "1762652579.990727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Mistralmash1-7B-s", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allknowingroger/Mistralmash1-7B-s", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39610012544493056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277485757172445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3292885638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json b/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json deleted file mode 100644 index c69580634..000000000 --- a/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash2-7B-s/1762652579.991016", - "retrieved_timestamp": "1762652579.9910169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allknowingroger/Mistralmash2-7B-s", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allknowingroger/Mistralmash2-7B-s", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4101883003763348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530485814102601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43724999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345246010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json b/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json deleted file mode 100644 index 05ba26fe1..000000000 --- a/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-24b-Sertraline-0304/1762652580.007422", - "retrieved_timestamp": "1762652580.007423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/Mistral-Small-24b-Sertraline-0304", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allura-org/Mistral-Small-24b-Sertraline-0304", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6799902037704402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6524908933699552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5105551861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json b/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json deleted file mode 100644 index 4ffe53343..000000000 --- a/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-Sisyphus-24b-2503/1762652580.007755", - "retrieved_timestamp": "1762652580.007756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "allura-org/Mistral-Small-Sisyphus-24b-2503", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allura-org/Mistral-Small-Sisyphus-24b-2503", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6848362345243952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6269790835863639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127160904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json b/data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json deleted file mode 100644 index cb20f261b..000000000 --- a/data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/amazon_MegaBeam-Mistral-7B-300k/1762652580.010282", - "retrieved_timestamp": "1762652580.010283", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "amazon/MegaBeam-Mistral-7B-300k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "amazon/MegaBeam-Mistral-7B-300k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520347123410329 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227731731112974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39799999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2549035904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json deleted file mode 100644 index e2046fd4c..000000000 --- a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-2/1762652580.020659", - "retrieved_timestamp": "1762652580.020659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21792178087474567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422884892437673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40060416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json deleted file mode 100644 index 078496090..000000000 --- a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-4/1762652580.0209029", - "retrieved_timestamp": "1762652580.0209038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2133007087860211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35070947402846286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2310505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json deleted file mode 100644 index 77aadb582..000000000 --- a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-3-over-8/1762652580.02111", - "retrieved_timestamp": "1762652580.021111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23942915907569692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4299940969601492 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38175000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30011635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json deleted file mode 100644 index 3741c162a..000000000 --- a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-5-over-16/1762652580.021311", - "retrieved_timestamp": "1762652580.021312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21182684166899385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4124151161773006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29579454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json deleted file mode 100644 index 16aeae24a..000000000 --- a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-7-over-16/1762652580.0215192", - "retrieved_timestamp": "1762652580.02152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22936253584932426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43158208189876196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30302526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json b/data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json deleted file mode 100644 index 556c10b47..000000000 --- a/data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/aws-prototyping_MegaBeam-Mistral-7B-512k/1762652580.0217311", - "retrieved_timestamp": "1762652580.0217311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "aws-prototyping/MegaBeam-Mistral-7B-512k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "aws-prototyping/MegaBeam-Mistral-7B-512k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5972586071623293 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3662336639946533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25889295212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json b/data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json deleted file mode 100644 index 37b9943d3..000000000 --- a/data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/1762652580.021987", - "retrieved_timestamp": "1762652580.0219882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", - "developer": "mistral", - "inference_platform": "unknown", - "id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.607924750772395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395057669562011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42330208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3469082446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json b/data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json deleted file mode 100644 index 8159c6cba..000000000 --- a/data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/1762652580.026026", - "retrieved_timestamp": "1762652580.026027", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", - "developer": "mistral", - "inference_platform": "unknown", - "id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6705729686121713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155964285724085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36768617021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json b/data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json deleted file mode 100644 index e5fa1327d..000000000 --- a/data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/cckm_tinymistral_950m/1762652580.099487", - "retrieved_timestamp": "1762652580.099488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "cckm/tinymistral_950m", - "developer": "mistral", - "inference_platform": "unknown", - "id": "cckm/tinymistral_950m", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 0.955 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23952889444451833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29694562621388126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10962433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json b/data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json deleted file mode 100644 index 7ab777902..000000000 --- a/data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/1762652580.101214", - "retrieved_timestamp": "1762652580.101215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36734863495525205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882191262277366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40553124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2551529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json b/data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json deleted file mode 100644 index 961fd89cf..000000000 --- a/data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-mistral-7B/1762652580.155141", - "retrieved_timestamp": "1762652580.155141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "flammenai/Mahou-1.2a-mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.2a-mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4552010886669592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118111474458115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38962500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31632313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json b/data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json deleted file mode 100644 index e76abb501..000000000 --- a/data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-mistral-nemo-12B/1762652580.155725", - "retrieved_timestamp": "1762652580.1557262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "flammenai/Mahou-1.5-mistral-nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.5-mistral-nemo-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6751441730164851 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5522361927910235 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4520416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3602061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json b/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json deleted file mode 100644 index 82fcf98b7..000000000 --- a/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock-24B/1762652580.195392", - "retrieved_timestamp": "1762652580.195392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Mistral-modelstock-24B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "hotmailuser/Mistral-modelstock-24B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3424192254329623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.645229041403176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41023489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4590416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json b/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json deleted file mode 100644 index accc2a719..000000000 --- a/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock2-24B/1762652580.195659", - "retrieved_timestamp": "1762652580.19566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "hotmailuser/Mistral-modelstock2-24B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "hotmailuser/Mistral-modelstock2-24B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43184528163051816 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6689381929188762 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24018126888217523 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46161458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5318317819148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json b/data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json deleted file mode 100644 index e0caad6f1..000000000 --- a/data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/irahulpandey_mistralai-7B-slerp-v0.1/1762652580.23053", - "retrieved_timestamp": "1762652580.230531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "irahulpandey/mistralai-7B-slerp-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "irahulpandey/mistralai-7B-slerp-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4966167546554254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010682924547378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45497916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2951296542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json b/data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json deleted file mode 100644 index 0449e1968..000000000 --- a/data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/kaist-ai_mistral-orpo-capybara-7k/1762652580.30416", - "retrieved_timestamp": "1762652580.304161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "kaist-ai/mistral-orpo-capybara-7k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "kaist-ai/mistral-orpo-capybara-7k", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536733644507684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4488995185492166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297124335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json b/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json deleted file mode 100644 index 830c578b4..000000000 --- a/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1762652580.325205", - "retrieved_timestamp": "1762652580.325206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llmat/Mistral-v0.3-7B-ORPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "llmat/Mistral-v0.3-7B-ORPO", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3639764713183243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.400465557804411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23013630319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json b/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json deleted file mode 100644 index 078e32806..000000000 --- a/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1762652580.324949", - "retrieved_timestamp": "1762652580.324949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "llmat/Mistral-v0.3-7B-ORPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "llmat/Mistral-v0.3-7B-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3770406964631622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39776607302918093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2278091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json b/data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json deleted file mode 100644 index 3769f2cc6..000000000 --- a/data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-Mistral-Nemo-12B/1762652580.358769", - "retrieved_timestamp": "1762652580.35877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "migtissera/Tess-3-Mistral-Nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "migtissera/Tess-3-Mistral-Nemo-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.335499807178287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.489942302453045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25648271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json b/data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json deleted file mode 100644 index 3ee5b0f73..000000000 --- a/data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistral-community_Mistral-7B-v0.2/1762652580.360901", - "retrieved_timestamp": "1762652580.3609018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistral-community/Mistral-7B-v0.2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistral-community/Mistral-7B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22663976028050017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4510187962797583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2952958776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json b/data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json deleted file mode 100644 index 11a1422c3..000000000 --- a/data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistral-community_Mixtral-8x22B-v0.1/1762652580.361141", - "retrieved_timestamp": "1762652580.361141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistral-community/Mixtral-8x22B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistral-community/Mixtral-8x22B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166564417177914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38000000000000006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15428571428571428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35333333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json b/data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json deleted file mode 100644 index 7d9e9bede..000000000 --- a/data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistral-community_mixtral-8x22B-v0.3/1762652580.361342", - "retrieved_timestamp": "1762652580.361343", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistral-community/mixtral-8x22B-v0.3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistral-community/mixtral-8x22B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.63 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25826362939223485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6250002178435845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4036979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46392952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json b/data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json deleted file mode 100644 index 99dff1310..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Codestral-22B-v0.1/1762652580.361543", - "retrieved_timestamp": "1762652580.361544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Codestral-22B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Codestral-22B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5771752283939946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5139136921003167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155751329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json deleted file mode 100644 index 3967feb13..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.1/1762652580.362653", - "retrieved_timestamp": "1762652580.362654", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-7B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2385548123423627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419401145517045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30127992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json deleted file mode 100644 index 228659bf6..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.3/1762652580.362854", - "retrieved_timestamp": "1762652580.362854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-7B-v0.3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22663976028050017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45168546294642503 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2952958776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json deleted file mode 100644 index 24af7a6e1..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Base-2407/1762652580.363275", - "retrieved_timestamp": "1762652580.363276", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-Nemo-Base-2407", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Nemo-Base-2407", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.58 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16299197241098062 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5035062000369291 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34715757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json deleted file mode 100644 index 63d0de330..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-24B-Base-2501/1762652580.363713", - "retrieved_timestamp": "1762652580.363714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-Small-24B-Base-2501", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Small-24B-Base-2501", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16723848278124265 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6441860347172437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42366666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406416223404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json deleted file mode 100644 index 7b4049f2c..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-v0.1/1762652580.364491", - "retrieved_timestamp": "1762652580.364492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mixtral-8x22B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x22B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25826362939223485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6239807473187268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4036979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46392952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json deleted file mode 100644 index d7de77d6a..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1762652580.3651662", - "retrieved_timestamp": "1762652580.3651662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mixtral-8x7B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x7B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23260947618984296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097711377553386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4413125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json deleted file mode 100644 index 0a2054ae0..000000000 --- a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1762652580.364961", - "retrieved_timestamp": "1762652580.364962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mixtral-8x7B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x7B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24152692633324024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508666743762444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43213541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3849734042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json deleted file mode 100644 index 03289b0c4..000000000 --- a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1762652580.376322", - "retrieved_timestamp": "1762652580.376323", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860004787297703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47214400722999256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40057291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30867686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json deleted file mode 100644 index 4cb2a138d..000000000 --- a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1762652580.376105", - "retrieved_timestamp": "1762652580.376106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4963917959901949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4699417600389813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062666223404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json deleted file mode 100644 index e58c5ae3d..000000000 --- a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish/1762652580.375733", - "retrieved_timestamp": "1762652580.3757372", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nazimali/Mistral-Nemo-Kurdish", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nazimali/Mistral-Nemo-Kurdish", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3401208792670115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5133321102266589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4115729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3234707446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json b/data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json deleted file mode 100644 index 9e7d65f50..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_BigKartoffel-mistral-nemo-20B/1762652580.376553", - "retrieved_timestamp": "1762652580.376553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/BigKartoffel-mistral-nemo-20B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/BigKartoffel-mistral-nemo-20B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 20.427 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5857181168189294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.55148305168682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529753989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json b/data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json deleted file mode 100644 index 0d741047e..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/1762652580.376802", - "retrieved_timestamp": "1762652580.3768032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.153 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191480826429429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5217926041279988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3080119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json b/data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json deleted file mode 100644 index 8770bbade..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_DoublePotato-Mistral-Nemo-13B/1762652580.377009", - "retrieved_timestamp": "1762652580.3770099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/DoublePotato-Mistral-Nemo-13B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/DoublePotato-Mistral-Nemo-13B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.338 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6796156420519777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437915398770364 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359624335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json b/data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json deleted file mode 100644 index 3c159f0ca..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Flammades-Mistral-Nemo-12B/1762652580.3785129", - "retrieved_timestamp": "1762652580.3785138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Flammades-Mistral-Nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Flammades-Mistral-Nemo-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38415958545548035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299609345270283 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.480625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36610704787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json b/data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json deleted file mode 100644 index b6364218d..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Gutensuppe-mistral-nemo-12B/1762652580.378963", - "retrieved_timestamp": "1762652580.378964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Gutensuppe-mistral-nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Gutensuppe-mistral-nemo-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29161070404305023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5486832203098263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3680186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json b/data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json deleted file mode 100644 index 2302e4df5..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/1762652580.379175", - "retrieved_timestamp": "1762652580.379176", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37214479802479644 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4981450458280896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46230208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29928523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json b/data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json deleted file mode 100644 index 8655ec50b..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/1762652580.3801112", - "retrieved_timestamp": "1762652580.380112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34948824674086976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5586245741555749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43566666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36278257978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json b/data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json deleted file mode 100644 index 441078e40..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/1762652580.380727", - "retrieved_timestamp": "1762652580.380728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6824880206740338 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496040380079439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45216666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json deleted file mode 100644 index 00a37f970..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/1762652580.380932", - "retrieved_timestamp": "1762652580.380933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5716798095719358 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40762540890255944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4059375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2728557180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json deleted file mode 100644 index 588a2cbc0..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/1762652580.3813472", - "retrieved_timestamp": "1762652580.3813481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6535869271311232 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374496172235809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42330208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3546376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json deleted file mode 100644 index e93a4dc02..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/1762652580.381143", - "retrieved_timestamp": "1762652580.381144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567068711020093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274606999473499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41321874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35787898936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json deleted file mode 100644 index d8f6fdbbb..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v2/1762652580.3824818", - "retrieved_timestamp": "1762652580.382483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Prism-12B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6974006746543615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491875637377679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567154255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json deleted file mode 100644 index 1ee0d1dda..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v7/1762652580.382694", - "retrieved_timestamp": "1762652580.382695", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B-v7", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Prism-12B-v7", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6961517662025647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5521104600038905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46388541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35904255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json deleted file mode 100644 index 617e26beb..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B/1762652580.382256", - "retrieved_timestamp": "1762652580.382257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Prism-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858103166265509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5475186352291487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46261458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3581283244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json deleted file mode 100644 index e19360b7f..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Drummer-22B/1762652580.3829079", - "retrieved_timestamp": "1762652580.3829088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Small-Drummer-22B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Small-Drummer-22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6331289866443259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5793201948136216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40636458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40949135638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json deleted file mode 100644 index 32b3a2068..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/1762652580.383116", - "retrieved_timestamp": "1762652580.383116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48932277468228746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5858932329112819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41240026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json b/data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json deleted file mode 100644 index 42873bdd3..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Stella-mistral-nemo-12B-v2/1762652580.384186", - "retrieved_timestamp": "1762652580.384186", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Stella-mistral-nemo-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Stella-mistral-nemo-12B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32743121584063617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483750956495209 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json deleted file mode 100644 index 22510eefb..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades-12B/1762652580.385997", - "retrieved_timestamp": "1762652580.385998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-bophades-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-bophades-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6794405510711579 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988471515853883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35006648936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json deleted file mode 100644 index 3c00f8031..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades3-12B/1762652580.386282", - "retrieved_timestamp": "1762652580.386283", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-bophades3-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-bophades3-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577835698169745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544933208169299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4604479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371010638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json deleted file mode 100644 index 3d2acc8bb..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-cc-12B/1762652580.386496", - "retrieved_timestamp": "1762652580.386497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-cc-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-cc-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14353249378316202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399409546487519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44236458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json deleted file mode 100644 index ddd0b7b8e..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutades-12B/1762652580.3867059", - "retrieved_timestamp": "1762652580.3867059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutades-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutades-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3425189608017837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407194259684368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3560505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json deleted file mode 100644 index c575369df..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v2/1762652580.38711", - "retrieved_timestamp": "1762652580.387111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6203395878491292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397203788283472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990026595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json deleted file mode 100644 index 32b5b5d91..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v3/1762652580.387317", - "retrieved_timestamp": "1762652580.3873181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827085466562057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544065799051091 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44503125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json deleted file mode 100644 index 2910f147d..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v4/1762652580.3875241", - "retrieved_timestamp": "1762652580.387525", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v4", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.237929804031082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269028864823667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3575465425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json deleted file mode 100644 index b52718620..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B/1762652580.3869052", - "retrieved_timestamp": "1762652580.3869061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.350386973231027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5281363707697807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706250000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562167553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json deleted file mode 100644 index 12e646ab8..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg2-12B-test/1762652580.387729", - "retrieved_timestamp": "1762652580.38773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg2-12B-test", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg2-12B-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33847192116916447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525477908630255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35546875 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json deleted file mode 100644 index aa64cc417..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-kartoffel-12B/1762652580.3880079", - "retrieved_timestamp": "1762652580.3880079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-kartoffel-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-kartoffel-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7031709198260616 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483796436144805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46528125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35846077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json deleted file mode 100644 index 210d505fd..000000000 --- a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-narwhal-12B/1762652580.388214", - "retrieved_timestamp": "1762652580.388215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-narwhal-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-narwhal-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549187267561182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057374929934754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38469791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34832114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json deleted file mode 100644 index 403b6b3f6..000000000 --- a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/1762652580.4083898", - "retrieved_timestamp": "1762652580.408391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.451 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16484040124647048 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44679984097967057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537400265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json deleted file mode 100644 index 135230041..000000000 --- a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/1762652580.4086552", - "retrieved_timestamp": "1762652580.408656", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.451 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15727159492369136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949668154807224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1926529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json deleted file mode 100644 index 6eed173f9..000000000 --- a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/1762652580.408863", - "retrieved_timestamp": "1762652580.408864", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.451 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14120976786038822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30524522602918064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11710438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json b/data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json deleted file mode 100644 index 1cafff0bd..000000000 --- a/data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Base/1762652580.415714", - "retrieved_timestamp": "1762652580.415715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Mistral-NeMo-Minitron-8B-Base", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nvidia/Mistral-NeMo-Minitron-8B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.88 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456597383830457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219098090521418 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40915625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37957114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json b/data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json deleted file mode 100644 index c819e843c..000000000 --- a/data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pszemraj_Mistral-v0.3-6B/1762652580.481565", - "retrieved_timestamp": "1762652580.481566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pszemraj/Mistral-v0.3-6B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "pszemraj/Mistral-v0.3-6B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 5.939 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2453744952282167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774050646438491 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39077083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2142619680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json b/data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json deleted file mode 100644 index 1b75969da..000000000 --- a/data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shivam9980_mistral-7b-news-cnn-merged/1762652580.515563", - "retrieved_timestamp": "1762652580.515563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shivam9980/mistral-7b-news-cnn-merged", - "developer": "mistral", - "inference_platform": "unknown", - "id": "shivam9980/mistral-7b-news-cnn-merged", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4634192830578421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3635484854246454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45226041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28274601063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json b/data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json deleted file mode 100644 index 29b90c9cc..000000000 --- a/data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shivank21_mistral_dpo_self/1762652580.5158348", - "retrieved_timestamp": "1762652580.515836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shivank21/mistral_dpo_self", - "developer": "mistral", - "inference_platform": "unknown", - "id": "shivank21/mistral_dpo_self", - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 7.913 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.340345837932242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3216256961597798 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2214095744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json b/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json deleted file mode 100644 index 9830cf0c7..000000000 --- a/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT/1762652580.5171149", - "retrieved_timestamp": "1762652580.5171149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "siqi00/Mistral-7B-DFT", - "developer": "mistral", - "inference_platform": "unknown", - "id": "siqi00/Mistral-7B-DFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5568668909604294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46648773367771273 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2962932180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json b/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json deleted file mode 100644 index 94e6324dc..000000000 --- a/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT2/1762652580.5173602", - "retrieved_timestamp": "1762652580.517361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "siqi00/Mistral-7B-DFT2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "siqi00/Mistral-7B-DFT2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5803723010501026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39683798240076246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523936170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json b/data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json deleted file mode 100644 index 9a434788c..000000000 --- a/data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/spmurrayzzz_Mistral-Syndicate-7B/1762652580.534304", - "retrieved_timestamp": "1762652580.534305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "spmurrayzzz/Mistral-Syndicate-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "spmurrayzzz/Mistral-Syndicate-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.249595517670891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42450570755678535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43855208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2631316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json b/data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json deleted file mode 100644 index c239838a8..000000000 --- a/data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/teknium_CollectiveCognition-v1.1-Mistral-7B/1762652580.55394", - "retrieved_timestamp": "1762652580.553941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "teknium/CollectiveCognition-v1.1-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "teknium/CollectiveCognition-v1.1-Mistral-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27904626391308396 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4493426704276236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28366023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json b/data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json deleted file mode 100644 index 5e6de5025..000000000 --- a/data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2-Mistral-7B/1762652580.5544581", - "retrieved_timestamp": "1762652580.5544589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "teknium/OpenHermes-2-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-2-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286151854856226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947516371878204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45197916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2931349734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json b/data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json deleted file mode 100644 index 5a9648e70..000000000 --- a/data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2.5-Mistral-7B/1762652580.554678", - "retrieved_timestamp": "1762652580.5546792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "teknium/OpenHermes-2.5-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-2.5-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5571417173100706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4870013259924984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4241979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3054355053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json b/data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json deleted file mode 100644 index a454e2ff6..000000000 --- a/data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-2501-tensopolis-v1/1762652580.555758", - "retrieved_timestamp": "1762652580.555758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/mistral-small-2501-tensopolis-v1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "tensopolis/mistral-small-2501-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7762104549262623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6474735931872574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42797916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4464760638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json b/data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json deleted file mode 100644 index 8ca474d2a..000000000 --- a/data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-r1-tensopolis/1762652580.556001", - "retrieved_timestamp": "1762652580.5560021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/mistral-small-r1-tensopolis", - "developer": "mistral", - "inference_platform": "unknown", - "id": "tensopolis/mistral-small-r1-tensopolis", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.462220242290456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435969591888976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.290785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035073138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json b/data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json deleted file mode 100644 index ed892002e..000000000 --- a/data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_Conversely-Mistral-7B/1762652580.56185", - "retrieved_timestamp": "1762652580.5618508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/Conversely-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "theprint/Conversely-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.496 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2608113139802391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672348146697077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28257978723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json b/data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json deleted file mode 100644 index 08d3dafee..000000000 --- a/data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tianyil1_MistralForCausalLM_Cal_DPO/1762652580.566411", - "retrieved_timestamp": "1762652580.566412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tianyil1/MistralForCausalLM_Cal_DPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "tianyil1/MistralForCausalLM_Cal_DPO", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327619604870633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43814239617517153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2763464095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json b/data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json deleted file mode 100644 index 575be4ff9..000000000 --- a/data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-code-mistral-7b-v1.0/1762652580.581523", - "retrieved_timestamp": "1762652580.581524", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-code-mistral-7b-v1.0", - "developer": "mistral", - "inference_platform": "unknown", - "id": "uukuguy/speechless-code-mistral-7b-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36652415590632853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571712887094195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45017708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145777925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json b/data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json deleted file mode 100644 index 982a915f2..000000000 --- a/data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mistral-Prometheus-7B/1762652580.5881548", - "retrieved_timestamp": "1762652580.5881548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Merge-Mistral-Prometheus-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "vicgalle/Merge-Mistral-Prometheus-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48480143796238423 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.420139773821292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2716921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json b/data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json deleted file mode 100644 index c07c6aafe..000000000 --- a/data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xinchen9_Mistral-7B-CoT/1762652580.5978932", - "retrieved_timestamp": "1762652580.597894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xinchen9/Mistral-7B-CoT", - "developer": "mistral", - "inference_platform": "unknown", - "id": "xinchen9/Mistral-7B-CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2783470081605695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38726762098069667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2283909574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json deleted file mode 100644 index aad97c46d..000000000 --- a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1762652580.6038961", - "retrieved_timestamp": "1762652580.603897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Mistral-7B-200K", - "developer": "mistral", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Mistral-7B-200K", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.504 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17698041197356346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3410500846818921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37399999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529089095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json deleted file mode 100644 index fc5514ef7..000000000 --- a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1762652580.6036632", - "retrieved_timestamp": "1762652580.603664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Mistral-7B-200K", - "developer": "mistral", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Mistral-7B-200K", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.504 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1855731680829089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149272793394017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25731382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json deleted file mode 100644 index e432507ca..000000000 --- a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B/1762652580.603384", - "retrieved_timestamp": "1762652580.603385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Mistral-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.504 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23283443485507344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43340366992362034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27800864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Codestral-22B-v0.1/b56c6c01-a226-4090-9332-330535d79e24.json b/data/hfopenllm_v2/mistralai/Codestral-22B-v0.1/b56c6c01-a226-4090-9332-330535d79e24.json new file mode 100644 index 000000000..d2f9f4474 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Codestral-22B-v0.1/b56c6c01-a226-4090-9332-330535d79e24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Codestral-22B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Codestral-22B-v0.1", + "id": "mistralai/Codestral-22B-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5772 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5139 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/0ddc8e10-9cc5-48eb-b5b0-a2c2f071862b.json b/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/0ddc8e10-9cc5-48eb-b5b0-a2c2f071862b.json new file mode 100644 index 000000000..3393c5511 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/0ddc8e10-9cc5-48eb-b5b0-a2c2f071862b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Ministral-8B-Instruct-2410/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ministral-8B-Instruct-2410", + "id": "mistralai/Ministral-8B-Instruct-2410", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.02 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5896 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4762 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1956 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3291 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json b/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json deleted file mode 100644 index 311c8bbfe..000000000 --- a/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Ministral-8B-Instruct-2410/1762652580.361781", - "retrieved_timestamp": "1762652580.361782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Ministral-8B-Instruct-2410", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Ministral-8B-Instruct-2410", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.02 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896399331551394 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47616402016891385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/2917c469-7e22-497e-8d62-9b9972266658.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/2917c469-7e22-497e-8d62-9b9972266658.json new file mode 100644 index 000000000..46b328f13 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/2917c469-7e22-497e-8d62-9b9972266658.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-v0.1", + "id": "mistralai/Mistral-7B-Instruct-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3355 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2414 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json deleted file mode 100644 index 2748d9852..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.1/1762652580.3620229", - "retrieved_timestamp": "1762652580.3620229", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-7B-Instruct-v0.1", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487060998151571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33548084759810987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38476041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24143949468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/2424d85c-e092-4e7c-bf4f-ae014d08a159.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/2424d85c-e092-4e7c-bf4f-ae014d08a159.json new file mode 100644 index 000000000..3c886d188 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/2424d85c-e092-4e7c-bf4f-ae014d08a159.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-v0.2", + "id": "mistralai/Mistral-7B-Instruct-v0.2", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2717 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json deleted file mode 100644 index 7f842c5a6..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.2/1762652580.362234", - "retrieved_timestamp": "1762652580.3622348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-7B-Instruct-v0.2", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-Instruct-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496227786717023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44597355203292793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39660416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2716921542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/90278363-1d8f-47ca-a7dc-c51c6b511dc9.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/90278363-1d8f-47ca-a7dc-c51c6b511dc9.json new file mode 100644 index 000000000..e6172895f --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/90278363-1d8f-47ca-a7dc-c51c6b511dc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-v0.3", + "id": "mistralai/Mistral-7B-Instruct-v0.3", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5465 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4722 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3075 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json deleted file mode 100644 index 153fbe50d..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.3/1762652580.362444", - "retrieved_timestamp": "1762652580.362445", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-7B-Instruct-v0.3", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-Instruct-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465254413844156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47219631712648397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30751329787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-v0.1/3c3197ee-675d-4bb7-874d-28104d2a3cae.json b/data/hfopenllm_v2/mistralai/Mistral-7B-v0.1/3c3197ee-675d-4bb7-874d-28104d2a3cae.json new file mode 100644 index 000000000..59e48b5f6 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-v0.1/3c3197ee-675d-4bb7-874d-28104d2a3cae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.1", + "id": "mistralai/Mistral-7B-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2386 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3013 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-v0.3/eb5a8679-bfdd-40f2-9a32-55c04a65ae7e.json b/data/hfopenllm_v2/mistralai/Mistral-7B-v0.3/eb5a8679-bfdd-40f2-9a32-55c04a65ae7e.json new file mode 100644 index 000000000..ab5aca3af --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-v0.3/eb5a8679-bfdd-40f2-9a32-55c04a65ae7e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-v0.3", + "id": "mistralai/Mistral-7B-v0.3", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json b/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json deleted file mode 100644 index 7962a0e28..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Large-Instruct-2411/1762652580.3630579", - "retrieved_timestamp": "1762652580.363059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-Large-Instruct-2411", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Large-Instruct-2411", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 122.61 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8400577135334246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6746647735675069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43708053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.454 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561835106382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/d770f88d-b110-4f27-85e9-e52217c11798.json b/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/d770f88d-b110-4f27-85e9-e52217c11798.json new file mode 100644 index 000000000..439c483ad --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/d770f88d-b110-4f27-85e9-e52217c11798.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Large-Instruct-2411/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Large-Instruct-2411", + "id": "mistralai/Mistral-Large-Instruct-2411", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 122.61 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8401 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6747 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.454 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Nemo-Base-2407/364328ce-5de7-401f-ad84-0c76e3c1dc91.json b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Base-2407/364328ce-5de7-401f-ad84-0c76e3c1dc91.json new file mode 100644 index 000000000..9a51045d8 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Base-2407/364328ce-5de7-401f-ad84-0c76e3c1dc91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Base-2407/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Base-2407", + "id": "mistralai/Mistral-Nemo-Base-2407", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.58 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.163 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5035 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3921 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3472 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json deleted file mode 100644 index 1cda9e8d6..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Instruct-2407/1762652580.363499", - "retrieved_timestamp": "1762652580.363499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-Nemo-Instruct-2407", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Nemo-Instruct-2407", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6380248850826917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5036523950310812 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3517287234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/f7dcfdbb-ff12-4692-9702-712de3d0b7ba.json b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/f7dcfdbb-ff12-4692-9702-712de3d0b7ba.json new file mode 100644 index 000000000..3522fc93d --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/f7dcfdbb-ff12-4692-9702-712de3d0b7ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Instruct-2407/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Instruct-2407", + "id": "mistralai/Mistral-Nemo-Instruct-2407", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.638 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5037 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1269 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-24B-Base-2501/d641aa88-9981-4a25-90d5-fcc4564ede52.json b/data/hfopenllm_v2/mistralai/Mistral-Small-24B-Base-2501/d641aa88-9981-4a25-90d5-fcc4564ede52.json new file mode 100644 index 000000000..f8fd7098d --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Small-24B-Base-2501/d641aa88-9981-4a25-90d5-fcc4564ede52.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-24B-Base-2501/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-24B-Base-2501", + "id": "mistralai/Mistral-Small-24B-Base-2501", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6442 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json deleted file mode 100644 index 9f7425f06..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1762652580.363916", - "retrieved_timestamp": "1762652580.363917", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-Small-Instruct-2409", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Small-Instruct-2409", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.05 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.666975846310013 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5213075098146217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36320833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/8915e742-df2e-41bc-b83f-3e111edfd257.json b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/8915e742-df2e-41bc-b83f-3e111edfd257.json new file mode 100644 index 000000000..b9bddd2f0 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/8915e742-df2e-41bc-b83f-3e111edfd257.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-Instruct-2409", + "id": "mistralai/Mistral-Small-Instruct-2409", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6283 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.583 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4063 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json deleted file mode 100644 index 70312fea2..000000000 --- a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1762652580.364117", - "retrieved_timestamp": "1762652580.364118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mistral-Small-Instruct-2409", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Small-Instruct-2409", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282829558903709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5830283846898211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4063333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/e29a5e35-8677-4e53-83fd-85e919b4366a.json b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/e29a5e35-8677-4e53-83fd-85e919b4366a.json new file mode 100644 index 000000000..883513689 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/e29a5e35-8677-4e53-83fd-85e919b4366a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-Instruct-2409", + "id": "mistralai/Mistral-Small-Instruct-2409", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.05 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.667 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5213 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3632 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/e5c55d38-dc04-42b4-9aca-ae7be436ebe0.json b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/e5c55d38-dc04-42b4-9aca-ae7be436ebe0.json new file mode 100644 index 000000000..95271fa45 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/e5c55d38-dc04-42b4-9aca-ae7be436ebe0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral-8x22B-Instruct-v0.1", + "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7184 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6125 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4483 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json deleted file mode 100644 index 8b34d1f83..000000000 --- a/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-Instruct-v0.1/1762652580.3642921", - "retrieved_timestamp": "1762652580.3642921", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mixtral-8x22B-Instruct-v0.1", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183584001560305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6124924926272018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43111458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44830452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x22B-v0.1/504baceb-6684-430d-a532-b7b5b0b061fe.json b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-v0.1/504baceb-6684-430d-a532-b7b5b0b061fe.json new file mode 100644 index 000000000..e967e9002 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-v0.1/504baceb-6684-430d-a532-b7b5b0b061fe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral-8x22B-v0.1", + "id": "mistralai/Mixtral-8x22B-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4037 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4639 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json deleted file mode 100644 index 06afa7c16..000000000 --- a/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-Instruct-v0.1/1762652580.364703", - "retrieved_timestamp": "1762652580.364704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5599143605633053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49623654013356494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42032291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36918218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/31fcd34a-af1e-4eab-bd9a-5ec17eb572d2.json b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/31fcd34a-af1e-4eab-bd9a-5ec17eb572d2.json new file mode 100644 index 000000000..659f51d7a --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/31fcd34a-af1e-4eab-bd9a-5ec17eb572d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral-8x7B-Instruct-v0.1", + "id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3692 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/01ab0a3e-393a-497a-9b32-8af790b7581a.json b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/01ab0a3e-393a-497a-9b32-8af790b7581a.json new file mode 100644 index 000000000..b2775e111 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/01ab0a3e-393a-497a-9b32-8af790b7581a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral-8x7B-v0.1", + "id": "mistralai/Mixtral-8x7B-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3871 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/541967a6-b856-4dc9-958a-9335197fba99.json b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/541967a6-b856-4dc9-958a-9335197fba99.json new file mode 100644 index 000000000..2f9aa7f43 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-v0.1/541967a6-b856-4dc9-958a-9335197fba99.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixtral-8x7B-v0.1", + "id": "mistralai/Mixtral-8x7B-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2415 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5087 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.385 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json b/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json deleted file mode 100644 index db498381b..000000000 --- a/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mixtao_MixTAO-7Bx2-MoE-v8.1/1762652580.3653471", - "retrieved_timestamp": "1762652580.365348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mixtao/MixTAO-7Bx2-MoE-v8.1", - "developer": "mixtao", - "inference_platform": "unknown", - "id": "mixtao/MixTAO-7Bx2-MoE-v8.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41623337189767595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189059391733521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4463333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123337765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/ee31c801-67cb-46a3-9e39-02e842c0473f.json b/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/ee31c801-67cb-46a3-9e39-02e842c0473f.json new file mode 100644 index 000000000..f557e9ecc --- /dev/null +++ b/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/ee31c801-67cb-46a3-9e39-02e842c0473f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mixtao_MixTAO-7Bx2-MoE-v8.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MixTAO-7Bx2-MoE-v8.1", + "id": "mixtao/MixTAO-7Bx2-MoE-v8.1", + "developer": "mixtao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5189 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4463 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mkurman/llama-3.2-MEDIT-3B-o1/65fabe8b-05af-461e-b804-fcff3492da34.json b/data/hfopenllm_v2/mkurman/llama-3.2-MEDIT-3B-o1/65fabe8b-05af-461e-b804-fcff3492da34.json new file mode 100644 index 000000000..3b0161b0b --- /dev/null +++ b/data/hfopenllm_v2/mkurman/llama-3.2-MEDIT-3B-o1/65fabe8b-05af-461e-b804-fcff3492da34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mkurman_llama-3.2-MEDIT-3B-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3.2-MEDIT-3B-o1", + "id": "mkurman/llama-3.2-MEDIT-3B-o1", + "developer": "mkurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1307 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2741 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mkurman/phi-4-MedIT-11B-exp-1/7e1a7121-2c9f-4196-bbdd-48aea257f384.json b/data/hfopenllm_v2/mkurman/phi-4-MedIT-11B-exp-1/7e1a7121-2c9f-4196-bbdd-48aea257f384.json new file mode 100644 index 000000000..f017555d6 --- /dev/null +++ b/data/hfopenllm_v2/mkurman/phi-4-MedIT-11B-exp-1/7e1a7121-2c9f-4196-bbdd-48aea257f384.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mkurman_phi-4-MedIT-11B-exp-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-MedIT-11B-exp-1", + "id": "mkurman/phi-4-MedIT-11B-exp-1", + "developer": "mkurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 11.514 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5948 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mkurman/phi4-MedIT-10B-o1/dd32609c-316e-4511-8791-fcae33a1a506.json b/data/hfopenllm_v2/mkurman/phi4-MedIT-10B-o1/dd32609c-316e-4511-8791-fcae33a1a506.json new file mode 100644 index 000000000..5207ed657 --- /dev/null +++ b/data/hfopenllm_v2/mkurman/phi4-MedIT-10B-o1/dd32609c-316e-4511-8791-fcae33a1a506.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mkurman_phi4-MedIT-10B-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi4-MedIT-10B-o1", + "id": "mkurman/phi4-MedIT-10B-o1", + "developer": "mkurman", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaMedITForCausalLM", + "params_billions": 10.255 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3463 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json b/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json deleted file mode 100644 index 296c6f738..000000000 --- a/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-instruct-fpo/1762652580.366677", - "retrieved_timestamp": "1762652580.366678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mkxu/llama-3-8b-instruct-fpo", - "developer": "mkxu", - "inference_platform": "unknown", - "id": "mkxu/llama-3-8b-instruct-fpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790161216682846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4959114413700331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36045545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/d95d7058-49eb-47d7-b790-3a253291d22b.json b/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/d95d7058-49eb-47d7-b790-3a253291d22b.json new file mode 100644 index 000000000..9ef45d0ae --- /dev/null +++ b/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/d95d7058-49eb-47d7-b790-3a253291d22b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-instruct-fpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-fpo", + "id": "mkxu/llama-3-8b-instruct-fpo", + "developer": "mkxu", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4959 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3605 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mkxu/llama-3-8b-po1/37cbc3d6-1198-4e23-b86c-1fd979eacd9a.json b/data/hfopenllm_v2/mkxu/llama-3-8b-po1/37cbc3d6-1198-4e23-b86c-1fd979eacd9a.json new file mode 100644 index 000000000..db56d51b6 --- /dev/null +++ b/data/hfopenllm_v2/mkxu/llama-3-8b-po1/37cbc3d6-1198-4e23-b86c-1fd979eacd9a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-po1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-po1", + "id": "mkxu/llama-3-8b-po1", + "developer": "mkxu", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4976 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/76d0d338-e502-4638-adad-c4c4df00c26f.json b/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/76d0d338-e502-4638-adad-c4c4df00c26f.json new file mode 100644 index 000000000..e4a056c61 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/76d0d338-e502-4638-adad-c4c4df00c26f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_AlphaMonarch-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AlphaMonarch-7B", + "id": "mlabonne/AlphaMonarch-7B", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4939 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4121 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json b/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json deleted file mode 100644 index 410fc4461..000000000 --- a/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_AlphaMonarch-7B/1762652580.367184", - "retrieved_timestamp": "1762652580.3671849", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/AlphaMonarch-7B", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/AlphaMonarch-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49394384677101205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4625522037183211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24725731382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json b/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json deleted file mode 100644 index ac45b5f52..000000000 --- a/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_Beyonder-4x7B-v3/1762652580.36743", - "retrieved_timestamp": "1762652580.367431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/Beyonder-4x7B-v3", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Beyonder-4x7B-v3", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5608385749810503 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4670522037183211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40454166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2512466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/f47375bd-547a-4d0b-8c96-bbe2bc1ac445.json b/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/f47375bd-547a-4d0b-8c96-bbe2bc1ac445.json new file mode 100644 index 000000000..ae8e73867 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/f47375bd-547a-4d0b-8c96-bbe2bc1ac445.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_Beyonder-4x7B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Beyonder-4x7B-v3", + "id": "mlabonne/Beyonder-4x7B-v3", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5608 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4671 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2512 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/6b1ed68c-3099-4bd7-892b-cdc36c90ccfe.json b/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/6b1ed68c-3099-4bd7-892b-cdc36c90ccfe.json new file mode 100644 index 000000000..7401e378b --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/6b1ed68c-3099-4bd7-892b-cdc36c90ccfe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-52B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BigQwen2.5-52B-Instruct", + "id": "mlabonne/BigQwen2.5-52B-Instruct", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 52.268 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7913 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7121 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4113 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5519 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json b/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json deleted file mode 100644 index 0269bf4a5..000000000 --- a/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-52B-Instruct/1762652580.3676438", - "retrieved_timestamp": "1762652580.367645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/BigQwen2.5-52B-Instruct", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/BigQwen2.5-52B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 52.268 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7913480675718205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7121004678698547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5519448138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/0e59c8ca-cde0-4482-ab03-3309bcb8737c.json b/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/0e59c8ca-cde0-4482-ab03-3309bcb8737c.json new file mode 100644 index 000000000..015017389 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/0e59c8ca-cde0-4482-ab03-3309bcb8737c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-Echo-47B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BigQwen2.5-Echo-47B-Instruct", + "id": "mlabonne/BigQwen2.5-Echo-47B-Instruct", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 47.392 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7357 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6125 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json b/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json deleted file mode 100644 index a3f0c1286..000000000 --- a/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-Echo-47B-Instruct/1762652580.36785", - "retrieved_timestamp": "1762652580.36785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/BigQwen2.5-Echo-47B-Instruct", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/BigQwen2.5-Echo-47B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 47.392 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7356691356711305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6125111878044905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4124791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734042553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v2/d7e900e2-0574-44cd-a68a-0dd2715cf48c.json b/data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v2/d7e900e2-0574-44cd-a68a-0dd2715cf48c.json new file mode 100644 index 000000000..e8c365a9a --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v2/d7e900e2-0574-44cd-a68a-0dd2715cf48c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChimeraLlama-3-8B-v2", + "id": "mlabonne/ChimeraLlama-3-8B-v2", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4469 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5046 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3569 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v3/fd626c3f-566d-4193-9a85-e7c9a89e671c.json b/data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v3/fd626c3f-566d-4193-9a85-e7c9a89e671c.json new file mode 100644 index 000000000..e51cf0145 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/ChimeraLlama-3-8B-v3/fd626c3f-566d-4193-9a85-e7c9a89e671c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChimeraLlama-3-8B-v3", + "id": "mlabonne/ChimeraLlama-3-8B-v3", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4408 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/196b04ae-fd53-400f-9f08-19edd4959f6e.json b/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/196b04ae-fd53-400f-9f08-19edd4959f6e.json new file mode 100644 index 000000000..8e430f3b6 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/196b04ae-fd53-400f-9f08-19edd4959f6e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daredevil-8B-abliterated", + "id": "mlabonne/Daredevil-8B-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4426 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json b/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json deleted file mode 100644 index 0c2bddc3d..000000000 --- a/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B-abliterated/1762652580.3686998", - "retrieved_timestamp": "1762652580.3686998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/Daredevil-8B-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Daredevil-8B-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44263664853699297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4254272523147253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40702083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700964095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json b/data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json deleted file mode 100644 index 209bada1d..000000000 --- a/data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B/1762652580.368499", - "retrieved_timestamp": "1762652580.3685", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/Daredevil-8B", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Daredevil-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45477665926408595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194408746721715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/Daredevil-8B/57177299-076a-4506-89a7-ce54af08df4f.json b/data/hfopenllm_v2/mlabonne/Daredevil-8B/57177299-076a-4506-89a7-ce54af08df4f.json new file mode 100644 index 000000000..f05fd6db8 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Daredevil-8B/57177299-076a-4506-89a7-ce54af08df4f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daredevil-8B", + "id": "mlabonne/Daredevil-8B", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/d3bdf36f-7f89-4b5a-b6cb-847b49200b5b.json b/data/hfopenllm_v2/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/d3bdf36f-7f89-4b5a-b6cb-847b49200b5b.json new file mode 100644 index 000000000..dced4ed52 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/d3bdf36f-7f89-4b5a-b6cb-847b49200b5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes-3-Llama-3.1-70B-lorablated", + "id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6693 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5029 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4679 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json b/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json deleted file mode 100644 index 1d7aebc90..000000000 --- a/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/1762652580.369122", - "retrieved_timestamp": "1762652580.369123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7329463601023063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48740648734902187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36488541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/92619b9e-dacf-4d0a-9f8b-6e131af74fa4.json b/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/92619b9e-dacf-4d0a-9f8b-6e131af74fa4.json new file mode 100644 index 000000000..2110b6907 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/92619b9e-dacf-4d0a-9f8b-6e131af74fa4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Meta-Llama-3.1-8B-Instruct-abliterated", + "id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7329 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4874 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3503 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json b/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json deleted file mode 100644 index d5fe70fc4..000000000 --- a/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_NeuralBeagle14-7B/1762652580.369343", - "retrieved_timestamp": "1762652580.369343", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/NeuralBeagle14-7B", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/NeuralBeagle14-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49351941736813876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46278709452353844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43194791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2601396276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/cbb408ea-ced6-4f47-9066-d4ff6d604b1e.json b/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/cbb408ea-ced6-4f47-9066-d4ff6d604b1e.json new file mode 100644 index 000000000..6fc2a6175 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/cbb408ea-ced6-4f47-9066-d4ff6d604b1e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_NeuralBeagle14-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralBeagle14-7B", + "id": "mlabonne/NeuralBeagle14-7B", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4935 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4628 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4319 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json deleted file mode 100644 index 6a91066d4..000000000 --- a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1762652580.369559", - "retrieved_timestamp": "1762652580.36956", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/NeuralDaredevil-8B-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/NeuralDaredevil-8B-abliterated", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.756077208473517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110566504436299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38414228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/6999bb02-29fd-4c59-886f-184362afa06e.json b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/6999bb02-29fd-4c59-886f-184362afa06e.json new file mode 100644 index 000000000..6a7692af4 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/6999bb02-29fd-4c59-886f-184362afa06e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralDaredevil-8B-abliterated", + "id": "mlabonne/NeuralDaredevil-8B-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3841 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/913d1d8e-0b02-4ce5-9b7c-403143a8c880.json b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/913d1d8e-0b02-4ce5-9b7c-403143a8c880.json new file mode 100644 index 000000000..24cd13aa3 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/913d1d8e-0b02-4ce5-9b7c-403143a8c880.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeuralDaredevil-8B-abliterated", + "id": "mlabonne/NeuralDaredevil-8B-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3802 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json deleted file mode 100644 index 9a7ba956a..000000000 --- a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1762652580.369774", - "retrieved_timestamp": "1762652580.369775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlabonne/NeuralDaredevil-8B-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/NeuralDaredevil-8B-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41623337189767595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123964057729099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801529255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlabonne/OrpoLlama-3-8B/82c87bc0-29cf-4150-92f5-c80fb0028ea6.json b/data/hfopenllm_v2/mlabonne/OrpoLlama-3-8B/82c87bc0-29cf-4150-92f5-c80fb0028ea6.json new file mode 100644 index 000000000..e19f78336 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/OrpoLlama-3-8B/82c87bc0-29cf-4150-92f5-c80fb0028ea6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_OrpoLlama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OrpoLlama-3-8B", + "id": "mlabonne/OrpoLlama-3-8B", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3653 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2705 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/phixtral-2x2_8/a18834ad-6143-4ce2-9842-471817a60a39.json b/data/hfopenllm_v2/mlabonne/phixtral-2x2_8/a18834ad-6143-4ce2-9842-471817a60a39.json new file mode 100644 index 000000000..f6430bd4f --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/phixtral-2x2_8/a18834ad-6143-4ce2-9842-471817a60a39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlabonne_phixtral-2x2_8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phixtral-2x2_8", + "id": "mlabonne/phixtral-2x2_8", + "developer": "mlabonne", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 4.458 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4889 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json b/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json deleted file mode 100644 index 501ede02b..000000000 --- a/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/1762652580.3704169", - "retrieved_timestamp": "1762652580.3704178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", - "developer": "mlx-community", - "inference_platform": "unknown", - "id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368983186833158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32921013057720044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16381316489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/be900bcf-8ec9-484f-81db-0e83975c1ecd.json b/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/be900bcf-8ec9-484f-81db-0e83975c1ecd.json new file mode 100644 index 000000000..da8271107 --- /dev/null +++ b/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/be900bcf-8ec9-484f-81db-0e83975c1ecd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", + "id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", + "developer": "mlx-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3249 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1638 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d226ccf6-674b-44c6-8b11-d782b59a961a.json b/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d226ccf6-674b-44c6-8b11-d782b59a961a.json new file mode 100644 index 000000000..b1e204eb5 --- /dev/null +++ b/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d226ccf6-674b-44c6-8b11-d782b59a961a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-24B-Instruct-2501-bf16", + "id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", + "developer": "mlx-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6283 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6713 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3225 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4618 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json b/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json deleted file mode 100644 index 9a724e1ad..000000000 --- a/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/1762652580.3707452", - "retrieved_timestamp": "1762652580.3707461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", - "developer": "mlx-community", - "inference_platform": "unknown", - "id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282829558903709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6713272911918485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4618333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/d8839a1a-8d07-4e0b-bd44-2668c84f750c.json b/data/hfopenllm_v2/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/d8839a1a-8d07-4e0b-bd44-2668c84f750c.json new file mode 100644 index 000000000..c98ce35a8 --- /dev/null +++ b/data/hfopenllm_v2/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/d8839a1a-8d07-4e0b-bd44-2668c84f750c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-70B-japanese-suzume-vector-v0.1", + "id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", + "developer": "mmnga", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2326 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/e90b04db-2eb3-483a-ab0e-ea8aef821d84.json b/data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/e90b04db-2eb3-483a-ab0e-ea8aef821d84.json new file mode 100644 index 000000000..f696f25a8 --- /dev/null +++ b/data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/e90b04db-2eb3-483a-ab0e-ea8aef821d84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-ReDistill-Llama3-8B-v1.1", + "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", + "developer": "mobiuslabsgmbh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/900921ae-fbb2-4488-ab19-18987c1d008d.json b/data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/900921ae-fbb2-4488-ab19-18987c1d008d.json new file mode 100644 index 000000000..195277705 --- /dev/null +++ b/data/hfopenllm_v2/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/900921ae-fbb2-4488-ab19-18987c1d008d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-ReDistill-Qwen-7B-v1.1", + "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", + "developer": "mobiuslabsgmbh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4009 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2326 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/0da0a7cd-c075-4bc0-8e88-8acc7212e5c3.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/0da0a7cd-c075-4bc0-8e88-8acc7212e5c3.json new file mode 100644 index 000000000..3c3e590ae --- /dev/null +++ b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/0da0a7cd-c075-4bc0-8e88-8acc7212e5c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-2x8B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Moe-2x8B-v0.2", + "id": "moeru-ai/L3.1-Moe-2x8B-v0.2", + "developer": "moeru-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1699 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json deleted file mode 100644 index 44a689bc7..000000000 --- a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-2x8B-v0.2/1762652580.371698", - "retrieved_timestamp": "1762652580.3716989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "moeru-ai/L3.1-Moe-2x8B-v0.2", - "developer": "moeru-ai", - "inference_platform": "unknown", - "id": "moeru-ai/L3.1-Moe-2x8B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347947889377962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255688392585965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38580452127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/b50a49cd-2909-4dbe-9c9f-c150abb99845.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/b50a49cd-2909-4dbe-9c9f-c150abb99845.json new file mode 100644 index 000000000..e9612c059 --- /dev/null +++ b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/b50a49cd-2909-4dbe-9c9f-c150abb99845.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Moe-4x8B-v0.1", + "id": "moeru-ai/L3.1-Moe-4x8B-v0.1", + "developer": "moeru-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4939 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3609 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3454 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json deleted file mode 100644 index 883211ab4..000000000 --- a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.1/1762652580.371937", - "retrieved_timestamp": "1762652580.371938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "moeru-ai/L3.1-Moe-4x8B-v0.1", - "developer": "moeru-ai", - "inference_platform": "unknown", - "id": "moeru-ai/L3.1-Moe-4x8B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433219413378724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49392781736367014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3609166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/13831d81-a9dd-43c7-bce1-240aad42fbc6.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/13831d81-a9dd-43c7-bce1-240aad42fbc6.json new file mode 100644 index 000000000..5c203bb82 --- /dev/null +++ b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/13831d81-a9dd-43c7-bce1-240aad42fbc6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Moe-4x8B-v0.2", + "id": "moeru-ai/L3.1-Moe-4x8B-v0.2", + "developer": "moeru-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3234 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json deleted file mode 100644 index b980bd4b6..000000000 --- a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.2/1762652580.372139", - "retrieved_timestamp": "1762652580.37214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "moeru-ai/L3.1-Moe-4x8B-v0.2", - "developer": "moeru-ai", - "inference_platform": "unknown", - "id": "moeru-ai/L3.1-Moe-4x8B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406554608438943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.446625675582615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27626329787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/56ea7cb3-3a1e-477a-bac8-26a0fde6297a.json b/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/56ea7cb3-3a1e-477a-bac8-26a0fde6297a.json new file mode 100644 index 000000000..4f353589f --- /dev/null +++ b/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/56ea7cb3-3a1e-477a-bac8-26a0fde6297a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3_1-8B-Instruct-orca-ORPO", + "id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", + "developer": "monsterapi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json b/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json deleted file mode 100644 index c6e0f7606..000000000 --- a/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/1762652580.3723478", - "retrieved_timestamp": "1762652580.3723478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", - "developer": "monsterapi", - "inference_platform": "unknown", - "id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22728914834860392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28653625778742803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/8ce19b33-4f2b-4b8d-80bd-1ed399a5e9dd.json b/data/hfopenllm_v2/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/8ce19b33-4f2b-4b8d-80bd-1ed399a5e9dd.json new file mode 100644 index 000000000..1294499b7 --- /dev/null +++ b/data/hfopenllm_v2/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/8ce19b33-4f2b-4b8d-80bd-1ed399a5e9dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-LoRA-MonsterInstruct", + "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", + "developer": "monsterapi", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3903 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.365 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1987 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mosaicml/mpt-7b/18ab167d-b72e-4fa9-94a8-09edc641c73f.json b/data/hfopenllm_v2/mosaicml/mpt-7b/18ab167d-b72e-4fa9-94a8-09edc641c73f.json new file mode 100644 index 000000000..80c3fe30d --- /dev/null +++ b/data/hfopenllm_v2/mosaicml/mpt-7b/18ab167d-b72e-4fa9-94a8-09edc641c73f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mosaicml_mpt-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mpt-7b", + "id": "mosaicml/mpt-7b", + "developer": "mosaicml", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MPTForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2152 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1206 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json b/data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json deleted file mode 100644 index a948d9691..000000000 --- a/data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mosaicml_mpt-7b/1762652580.3728561", - "retrieved_timestamp": "1762652580.372857", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mosaicml/mpt-7b", - "developer": "mosaicml", - "inference_platform": "unknown", - "id": "mosaicml/mpt-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MPTForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21519900530592162 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32997415960801324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12059507978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/7df237ea-29c0-4d0a-9092-c41df4c13aca.json b/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/7df237ea-29c0-4d0a-9092-c41df4c13aca.json new file mode 100644 index 000000000..553a8fd2a --- /dev/null +++ b/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/7df237ea-29c0-4d0a-9092-c41df4c13aca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B-Instruct-CoT-Reflection", + "id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", + "developer": "mosama", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3212 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json b/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json deleted file mode 100644 index a42dde576..000000000 --- a/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/1762652580.373101", - "retrieved_timestamp": "1762652580.3731022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", - "developer": "mosama", - "inference_platform": "unknown", - "id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2870394996387363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41093712633583523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3211979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json b/data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json deleted file mode 100644 index df7eccab4..000000000 --- a/data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCogito/1762652580.373355", - "retrieved_timestamp": "1762652580.373356", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrdayl/OpenCogito", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCogito", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933773498761065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47196973414577464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3451628989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/mrdayl/OpenCogito/e5dc8caa-2d86-4ff0-af8d-22d85c8faeb0.json b/data/hfopenllm_v2/mrdayl/OpenCogito/e5dc8caa-2d86-4ff0-af8d-22d85c8faeb0.json new file mode 100644 index 000000000..f7201e897 --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCogito/e5dc8caa-2d86-4ff0-af8d-22d85c8faeb0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCogito/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenCogito", + "id": "mrdayl/OpenCogito", + "developer": "mrdayl", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3934 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.472 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3452 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito-r1/01591bb6-9daf-40fb-b802-0a007f4cc388.json b/data/hfopenllm_v2/mrdayl/OpenCognito-r1/01591bb6-9daf-40fb-b802-0a007f4cc388.json new file mode 100644 index 000000000..745fb3a4c --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCognito-r1/01591bb6-9daf-40fb-b802-0a007f4cc388.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenCognito-r1", + "id": "mrdayl/OpenCognito-r1", + "developer": "mrdayl", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1903 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json b/data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json deleted file mode 100644 index ea494979e..000000000 --- a/data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r1/1762652580.3737972", - "retrieved_timestamp": "1762652580.373798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrdayl/OpenCognito-r1", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCognito-r1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42412687225450696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4673346036303057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json b/data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json deleted file mode 100644 index e933cd67f..000000000 --- a/data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r2/1762652580.373997", - "retrieved_timestamp": "1762652580.3739982", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrdayl/OpenCognito-r2", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCognito-r2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958751667797001 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46882818163435913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42016666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34616023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito-r2/f6c32abf-bbae-4827-9ce2-29ce20c9463e.json b/data/hfopenllm_v2/mrdayl/OpenCognito-r2/f6c32abf-bbae-4827-9ce2-29ce20c9463e.json new file mode 100644 index 000000000..45eaf201c --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCognito-r2/f6c32abf-bbae-4827-9ce2-29ce20c9463e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenCognito-r2", + "id": "mrdayl/OpenCognito-r2", + "developer": "mrdayl", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3462 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json b/data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json deleted file mode 100644 index 278eba393..000000000 --- a/data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito/1762652580.373594", - "retrieved_timestamp": "1762652580.373594", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrdayl/OpenCognito", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCognito", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40621661635571393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4705607805549634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3443317819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito/74a6605d-3557-4458-bef5-cc9420434e68.json b/data/hfopenllm_v2/mrdayl/OpenCognito/74a6605d-3557-4458-bef5-cc9420434e68.json new file mode 100644 index 000000000..6d8c6acce --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCognito/74a6605d-3557-4458-bef5-cc9420434e68.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenCognito", + "id": "mrdayl/OpenCognito", + "developer": "mrdayl", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4706 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3443 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json b/data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json deleted file mode 100644 index c75087634..000000000 --- a/data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenThink/1762652580.374203", - "retrieved_timestamp": "1762652580.374204", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "mrdayl/OpenThink", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenThink", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20540720842919008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34597850879756104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28851963746223563 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32888541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18500664893617022 - } - } - ] -} diff --git a/data/hfopenllm_v2/mrdayl/OpenThink/dbe6e126-d35c-4634-a544-adf374ed5d00.json b/data/hfopenllm_v2/mrdayl/OpenThink/dbe6e126-d35c-4634-a544-adf374ed5d00.json new file mode 100644 index 000000000..383e8e8c7 --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenThink/dbe6e126-d35c-4634-a544-adf374ed5d00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenThink/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenThink", + "id": "mrdayl/OpenThink", + "developer": "mrdayl", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2885 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.185 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-gsm8k-3e/d68681c1-01e4-4af0-9a81-e0aaed0ae865.json b/data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-gsm8k-3e/d68681c1-01e4-4af0-9a81-e0aaed0ae865.json new file mode 100644 index 000000000..0b5f7c44b --- /dev/null +++ b/data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-gsm8k-3e/d68681c1-01e4-4af0-9a81-e0aaed0ae865.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-gsm8k-3e/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-14B-grpo-gsm8k-3e", + "id": "mrm8488/phi-4-14B-grpo-gsm8k-3e", + "developer": "mrm8488", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6885 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6805 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5268 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-limo/de9620b8-7112-436f-8941-fae2c5e7f9e0.json b/data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-limo/de9620b8-7112-436f-8941-fae2c5e7f9e0.json new file mode 100644 index 000000000..2b68d13b9 --- /dev/null +++ b/data/hfopenllm_v2/mrm8488/phi-4-14B-grpo-limo/de9620b8-7112-436f-8941-fae2c5e7f9e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-limo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-14B-grpo-limo", + "id": "mrm8488/phi-4-14B-grpo-limo", + "developer": "mrm8488", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6812 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6785 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3981 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5261 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mukaj/Llama-3.1-Hawkish-8B/cafee7ac-deb6-4c4b-af8f-81548648cb14.json b/data/hfopenllm_v2/mukaj/Llama-3.1-Hawkish-8B/cafee7ac-deb6-4c4b-af8f-81548648cb14.json new file mode 100644 index 000000000..0f6acc63a --- /dev/null +++ b/data/hfopenllm_v2/mukaj/Llama-3.1-Hawkish-8B/cafee7ac-deb6-4c4b-af8f-81548648cb14.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/mukaj_Llama-3.1-Hawkish-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Hawkish-8B", + "id": "mukaj/Llama-3.1-Hawkish-8B", + "developer": "mukaj", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4884 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2432 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3967 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/3e3cb617-6f19-4731-b31a-b1f4d88237d5.json b/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/3e3cb617-6f19-4731-b31a-b1f4d88237d5.json new file mode 100644 index 000000000..8280be34b --- /dev/null +++ b/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/3e3cb617-6f19-4731-b31a-b1f4d88237d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/natong19_Mistral-Nemo-Instruct-2407-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Instruct-2407-abliterated", + "id": "natong19/Mistral-Nemo-Instruct-2407-abliterated", + "developer": "natong19", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6392 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3518 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json b/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json deleted file mode 100644 index b8e8265b3..000000000 --- a/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/natong19_Mistral-Nemo-Instruct-2407-abliterated/1762652580.375077", - "retrieved_timestamp": "1762652580.375078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "natong19/Mistral-Nemo-Instruct-2407-abliterated", - "developer": "natong19", - "inference_platform": "unknown", - "id": "natong19/Mistral-Nemo-Instruct-2407-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392239258500778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048447739625885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/3c2c2c14-d065-4d6c-8c98-44ba8f2ca461.json b/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/3c2c2c14-d065-4d6c-8c98-44ba8f2ca461.json new file mode 100644 index 000000000..f21a3cf8b --- /dev/null +++ b/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/3c2c2c14-d065-4d6c-8c98-44ba8f2ca461.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/natong19_Qwen2-7B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2-7B-Instruct-abliterated", + "id": "natong19/Qwen2-7B-Instruct-abliterated", + "developer": "natong19", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5837 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json b/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json deleted file mode 100644 index 2797577cb..000000000 --- a/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/natong19_Qwen2-7B-Instruct-abliterated/1762652580.375325", - "retrieved_timestamp": "1762652580.375325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "natong19/Qwen2-7B-Instruct-abliterated", - "developer": "natong19", - "inference_platform": "unknown", - "id": "natong19/Qwen2-7B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5836945970026197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5553035842403061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/8909f916-401b-4457-ab8f-2691696049c6.json b/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/8909f916-401b-4457-ab8f-2691696049c6.json new file mode 100644 index 000000000..642a4f5d2 --- /dev/null +++ b/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/8909f916-401b-4457-ab8f-2691696049c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Kurdish-Instruct", + "id": "nazimali/Mistral-Nemo-Kurdish-Instruct", + "developer": "nazimali", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4699 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3063 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/ae191508-7dad-4cac-ad4a-af95d7a15b5d.json b/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/ae191508-7dad-4cac-ad4a-af95d7a15b5d.json new file mode 100644 index 000000000..030f58d03 --- /dev/null +++ b/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish-Instruct/ae191508-7dad-4cac-ad4a-af95d7a15b5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Kurdish-Instruct", + "id": "nazimali/Mistral-Nemo-Kurdish-Instruct", + "developer": "nazimali", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish/507f5047-fac3-415f-b9fa-aae4311fa837.json b/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish/507f5047-fac3-415f-b9fa-aae4311fa837.json new file mode 100644 index 000000000..ef98c1d8b --- /dev/null +++ b/data/hfopenllm_v2/nazimali/Mistral-Nemo-Kurdish/507f5047-fac3-415f-b9fa-aae4311fa837.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Kurdish", + "id": "nazimali/Mistral-Nemo-Kurdish", + "developer": "nazimali", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3401 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/BigKartoffel-mistral-nemo-20B/0ee8716c-74f0-41b4-94a2-efc715150293.json b/data/hfopenllm_v2/nbeerbower/BigKartoffel-mistral-nemo-20B/0ee8716c-74f0-41b4-94a2-efc715150293.json new file mode 100644 index 000000000..b7f28eb06 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/BigKartoffel-mistral-nemo-20B/0ee8716c-74f0-41b4-94a2-efc715150293.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_BigKartoffel-mistral-nemo-20B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BigKartoffel-mistral-nemo-20B", + "id": "nbeerbower/BigKartoffel-mistral-nemo-20B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 20.427 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5857 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/fcf491f4-cf57-4c95-9de1-4702ab5d54c7.json b/data/hfopenllm_v2/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/fcf491f4-cf57-4c95-9de1-4702ab5d54c7.json new file mode 100644 index 000000000..59fb04144 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/fcf491f4-cf57-4c95-9de1-4702ab5d54c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelKartoffel-Mistral-Nemo-23B", + "id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.153 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5191 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5218 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.308 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/DoublePotato-Mistral-Nemo-13B/4fd20259-c7c7-4da5-9013-ae2feb2175b1.json b/data/hfopenllm_v2/nbeerbower/DoublePotato-Mistral-Nemo-13B/4fd20259-c7c7-4da5-9013-ae2feb2175b1.json new file mode 100644 index 000000000..27b871961 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/DoublePotato-Mistral-Nemo-13B/4fd20259-c7c7-4da5-9013-ae2feb2175b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_DoublePotato-Mistral-Nemo-13B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoublePotato-Mistral-Nemo-13B", + "id": "nbeerbower/DoublePotato-Mistral-Nemo-13B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.338 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5438 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-1.5B/a7c8c345-cade-48fd-93c0-0f344044d2b5.json b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-1.5B/a7c8c345-cade-48fd-93c0-0f344044d2b5.json new file mode 100644 index 000000000..cc0a30ad9 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-1.5B/a7c8c345-cade-48fd-93c0-0f344044d2b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dumpling-Qwen2.5-1.5B", + "id": "nbeerbower/Dumpling-Qwen2.5-1.5B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.416 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2772 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-14B/7a8e3986-7688-4a26-a74c-a9bb47cd3e8d.json b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-14B/7a8e3986-7688-4a26-a74c-a9bb47cd3e8d.json new file mode 100644 index 000000000..043eb77ba --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-14B/7a8e3986-7688-4a26-a74c-a9bb47cd3e8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dumpling-Qwen2.5-14B", + "id": "nbeerbower/Dumpling-Qwen2.5-14B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6451 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3097 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/7a2ffb4d-1135-42a1-b28b-3b4e4d014979.json b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/7a2ffb4d-1135-42a1-b28b-3b4e4d014979.json new file mode 100644 index 000000000..c9e94af62 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/7a2ffb4d-1135-42a1-b28b-3b4e4d014979.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dumpling-Qwen2.5-7B-1k-r16", + "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5214 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/25468720-93d7-4f10-a534-30c4976657e8.json b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/25468720-93d7-4f10-a534-30c4976657e8.json new file mode 100644 index 000000000..f310a40a3 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/25468720-93d7-4f10-a534-30c4976657e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dumpling-Qwen2.5-7B-1k-r64-2e-5", + "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5301 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/5ba1d617-9d9a-4c3b-b9cc-3224ace129b3.json b/data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/5ba1d617-9d9a-4c3b-b9cc-3224ace129b3.json new file mode 100644 index 000000000..d59eb6c26 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/5ba1d617-9d9a-4c3b-b9cc-3224ace129b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-abliterated-TIES-Qwen2.5-1.5B", + "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3997 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2712 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/27b2b46f-1323-4ddd-9f65-d8fcd9cd6508.json b/data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/27b2b46f-1323-4ddd-9f65-d8fcd9cd6508.json new file mode 100644 index 000000000..af7d95ee5 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/27b2b46f-1323-4ddd-9f65-d8fcd9cd6508.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-abliterated-TIES-Qwen2.5-14B", + "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7836 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6372 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Flammades-Mistral-Nemo-12B/65917125-bb7c-4d64-ba5f-b5e4f67ec332.json b/data/hfopenllm_v2/nbeerbower/Flammades-Mistral-Nemo-12B/65917125-bb7c-4d64-ba5f-b5e4f67ec332.json new file mode 100644 index 000000000..1fe39793f --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Flammades-Mistral-Nemo-12B/65917125-bb7c-4d64-ba5f-b5e4f67ec332.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Flammades-Mistral-Nemo-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Flammades-Mistral-Nemo-12B", + "id": "nbeerbower/Flammades-Mistral-Nemo-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.53 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4806 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Gemma2-Gutenberg-Doppel-9B/30bf22d8-b93a-4775-8073-30e14e15e35d.json b/data/hfopenllm_v2/nbeerbower/Gemma2-Gutenberg-Doppel-9B/30bf22d8-b93a-4775-8073-30e14e15e35d.json new file mode 100644 index 000000000..4dfbf3259 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Gemma2-Gutenberg-Doppel-9B/30bf22d8-b93a-4775-8073-30e14e15e35d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Gemma2-Gutenberg-Doppel-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma2-Gutenberg-Doppel-9B", + "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7171 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4608 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Gutensuppe-mistral-nemo-12B/ff510365-a13d-4e44-9709-59a56e864991.json b/data/hfopenllm_v2/nbeerbower/Gutensuppe-mistral-nemo-12B/ff510365-a13d-4e44-9709-59a56e864991.json new file mode 100644 index 000000000..0a1e89534 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Gutensuppe-mistral-nemo-12B/ff510365-a13d-4e44-9709-59a56e864991.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Gutensuppe-mistral-nemo-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gutensuppe-mistral-nemo-12B", + "id": "nbeerbower/Gutensuppe-mistral-nemo-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2916 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/6d1eebc4-228b-43f3-b31c-3d5b1591ae2d.json b/data/hfopenllm_v2/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/6d1eebc4-228b-43f3-b31c-3d5b1591ae2d.json new file mode 100644 index 000000000..edbb8c718 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/6d1eebc4-228b-43f3-b31c-3d5b1591ae2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hermes2-Gutenberg2-Mistral-7B", + "id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3721 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4981 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4623 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2993 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json b/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json deleted file mode 100644 index 96bd39d46..000000000 --- a/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Kartoffel-Deepfry-12B/1762652580.379381", - "retrieved_timestamp": "1762652580.3793821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Kartoffel-Deepfry-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Kartoffel-Deepfry-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021620411618949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5365374219062301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4791666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3582114361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/f1e8cdbb-14b7-4959-a053-fb1b37629aff.json b/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/f1e8cdbb-14b7-4959-a053-fb1b37629aff.json new file mode 100644 index 000000000..318400868 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/f1e8cdbb-14b7-4959-a053-fb1b37629aff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Kartoffel-Deepfry-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kartoffel-Deepfry-12B", + "id": "nbeerbower/Kartoffel-Deepfry-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5022 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5365 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4792 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/4145d1a0-8d6a-4d64-8a45-a89cf343ac46.json b/data/hfopenllm_v2/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/4145d1a0-8d6a-4d64-8a45-a89cf343ac46.json new file mode 100644 index 000000000..254d39ab3 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/4145d1a0-8d6a-4d64-8a45-a89cf343ac46.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Nemotron-lorablated-70B", + "id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7229 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6825 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3338 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/d6966190-e254-4902-8472-cac59bfbdbe0.json b/data/hfopenllm_v2/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/d6966190-e254-4902-8472-cac59bfbdbe0.json new file mode 100644 index 000000000..348fd047b --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/d6966190-e254-4902-8472-cac59bfbdbe0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-Gutenberg-Doppel-70B", + "id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7092 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6661 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4897 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4737 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5fdb5437-f413-451d-9800-42036cda7686.json b/data/hfopenllm_v2/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5fdb5437-f413-451d-9800-42036cda7686.json new file mode 100644 index 000000000..16901bff0 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5fdb5437-f413-451d-9800-42036cda7686.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lyra-Gutenberg-mistral-nemo-12B", + "id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3495 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3628 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json deleted file mode 100644 index 2e558a4df..000000000 --- a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg-12B/1762652580.380318", - "retrieved_timestamp": "1762652580.380318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Lyra4-Gutenberg-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Lyra4-Gutenberg-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212185888996751 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538669487933139 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35713098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/347577a4-2768-4472-ba48-9b174ad89724.json b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/347577a4-2768-4472-ba48-9b174ad89724.json new file mode 100644 index 000000000..fb5f4a0df --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/347577a4-2768-4472-ba48-9b174ad89724.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lyra4-Gutenberg-12B", + "id": "nbeerbower/Lyra4-Gutenberg-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5387 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1299 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4038 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3571 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/33af440e-837d-4454-9340-af0d3ee74f77.json b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/33af440e-837d-4454-9340-af0d3ee74f77.json new file mode 100644 index 000000000..6dfbf9b42 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/33af440e-837d-4454-9340-af0d3ee74f77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg2-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lyra4-Gutenberg2-12B", + "id": "nbeerbower/Lyra4-Gutenberg2-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5345 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3972 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json deleted file mode 100644 index 41a55725c..000000000 --- a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg2-12B/1762652580.380519", - "retrieved_timestamp": "1762652580.3805199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Lyra4-Gutenberg2-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Lyra4-Gutenberg2-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25851296781428834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344527944750038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39721874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35654920212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/1a1f4709-8d05-4905-8105-0c3606d5ef5b.json b/data/hfopenllm_v2/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/1a1f4709-8d05-4905-8105-0c3606d5ef5b.json new file mode 100644 index 000000000..5eb89a5fa --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/1a1f4709-8d05-4905-8105-0c3606d5ef5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mahou-1.5-mistral-nemo-12B-lorablated", + "id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/28421948-089b-4487-bb71-a06e5ce74402.json b/data/hfopenllm_v2/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/28421948-089b-4487-bb71-a06e5ce74402.json new file mode 100644 index 000000000..dd9733de3 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/28421948-089b-4487-bb71-a06e5ce74402.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Gutenberg-Doppel-7B-FFT", + "id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5717 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4076 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4059 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2729 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/3fa0c783-9226-4fc8-b3a0-6e960684f43d.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/3fa0c783-9226-4fc8-b3a0-6e960684f43d.json new file mode 100644 index 000000000..40710b344 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/3fa0c783-9226-4fc8-b3a0-6e960684f43d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Gutenberg-Doppel-12B-v2", + "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3546 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/743b7fe2-f998-408c-98b1-af02d9c1ee2a.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/743b7fe2-f998-408c-98b1-af02d9c1ee2a.json new file mode 100644 index 000000000..0895b9113 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/743b7fe2-f998-408c-98b1-af02d9c1ee2a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Gutenberg-Doppel-12B", + "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4132 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/0039c88b-a881-4ce0-9a0a-a10f1a8cbc70.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/0039c88b-a881-4ce0-9a0a-a10f1a8cbc70.json new file mode 100644 index 000000000..361dfbd4d --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/0039c88b-a881-4ce0-9a0a-a10f1a8cbc70.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Moderne-12B-FFT-experimental", + "id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5234 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3715 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3455 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json deleted file mode 100644 index df74851dc..000000000 --- a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/1762652580.3819818", - "retrieved_timestamp": "1762652580.381983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33522498082864577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5234089179237257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3714895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3454953457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v2/87c7fbd9-7648-4d0d-ac9e-8ba85860e335.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v2/87c7fbd9-7648-4d0d-ac9e-8ba85860e335.json new file mode 100644 index 000000000..85ba56e3d --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v2/87c7fbd9-7648-4d0d-ac9e-8ba85860e335.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Prism-12B-v2", + "id": "nbeerbower/Mistral-Nemo-Prism-12B-v2", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6974 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v7/6ca3ab87-c05e-46b5-879d-4fc8bf75417b.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v7/6ca3ab87-c05e-46b5-879d-4fc8bf75417b.json new file mode 100644 index 000000000..4e58c2e14 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B-v7/6ca3ab87-c05e-46b5-879d-4fc8bf75417b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Prism-12B-v7", + "id": "nbeerbower/Mistral-Nemo-Prism-12B-v7", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5521 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4639 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.359 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B/525f1b9f-88a2-459d-bb4a-7c01a0107968.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B/525f1b9f-88a2-459d-bb4a-7c01a0107968.json new file mode 100644 index 000000000..8eb277f61 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Prism-12B/525f1b9f-88a2-459d-bb4a-7c01a0107968.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Nemo-Prism-12B", + "id": "nbeerbower/Mistral-Nemo-Prism-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6858 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Small-Drummer-22B/503f79be-7f05-4464-ac9f-0f284f1e7965.json b/data/hfopenllm_v2/nbeerbower/Mistral-Small-Drummer-22B/503f79be-7f05-4464-ac9f-0f284f1e7965.json new file mode 100644 index 000000000..f6ad8dc3b --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Small-Drummer-22B/503f79be-7f05-4464-ac9f-0f284f1e7965.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Drummer-22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-Drummer-22B", + "id": "nbeerbower/Mistral-Small-Drummer-22B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5793 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4064 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/86ec7d95-6f6d-4ca6-97d5-7a910f42a06d.json b/data/hfopenllm_v2/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/86ec7d95-6f6d-4ca6-97d5-7a910f42a06d.json new file mode 100644 index 000000000..ade724a65 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/86ec7d95-6f6d-4ca6-97d5-7a910f42a06d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Small-Gutenberg-Doppel-22B", + "id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5859 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3971 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json b/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json deleted file mode 100644 index e4ab38c02..000000000 --- a/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Nemo-Loony-12B-experimental/1762652580.383332", - "retrieved_timestamp": "1762652580.383332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Nemo-Loony-12B-experimental", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Nemo-Loony-12B-experimental", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37344357416100393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38222228797769536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/d472ba79-6592-4f8a-a99c-ec3f71468d3e.json b/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/d472ba79-6592-4f8a-a99c-ec3f71468d3e.json new file mode 100644 index 000000000..cdcb586d6 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/d472ba79-6592-4f8a-a99c-ec3f71468d3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Nemo-Loony-12B-experimental/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemo-Loony-12B-experimental", + "id": "nbeerbower/Nemo-Loony-12B-experimental", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3734 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3822 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json b/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json deleted file mode 100644 index 5fc725d06..000000000 --- a/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_Nemoties-ChatML-12B/1762652580.383542", - "retrieved_timestamp": "1762652580.383543", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/Nemoties-ChatML-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Nemoties-ChatML-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6381999760635115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5470252374810588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45086458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/6ddc052c-6bda-4d8e-ad97-20d881c8cfb7.json b/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/6ddc052c-6bda-4d8e-ad97-20d881c8cfb7.json new file mode 100644 index 000000000..2c18c82d6 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/6ddc052c-6bda-4d8e-ad97-20d881c8cfb7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Nemoties-ChatML-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemoties-ChatML-12B", + "id": "nbeerbower/Nemoties-ChatML-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.547 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3551 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/76d1aed8-80fe-4b4f-bd81-ea0d6bf085c4.json b/data/hfopenllm_v2/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/76d1aed8-80fe-4b4f-bd81-ea0d6bf085c4.json new file mode 100644 index 000000000..c994ceaa2 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/76d1aed8-80fe-4b4f-bd81-ea0d6bf085c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Gutenberg-Doppel-14B", + "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8091 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6382 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4921 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json b/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json deleted file mode 100644 index c7b3df639..000000000 --- a/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_SmolNemo-12B-FFT-experimental/1762652580.383975", - "retrieved_timestamp": "1762652580.383976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/SmolNemo-12B-FFT-experimental", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/SmolNemo-12B-FFT-experimental", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3348005514257725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3336088810494464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38469791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12167553191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/d2845d6e-65dd-4448-901d-d554b3e741f3.json b/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/d2845d6e-65dd-4448-901d-d554b3e741f3.json new file mode 100644 index 000000000..baa82798c --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/d2845d6e-65dd-4448-901d-d554b3e741f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_SmolNemo-12B-FFT-experimental/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolNemo-12B-FFT-experimental", + "id": "nbeerbower/SmolNemo-12B-FFT-experimental", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3336 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1217 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Stella-mistral-nemo-12B-v2/f7dd203f-24d8-4875-878a-12ed99e20cd3.json b/data/hfopenllm_v2/nbeerbower/Stella-mistral-nemo-12B-v2/f7dd203f-24d8-4875-878a-12ed99e20cd3.json new file mode 100644 index 000000000..2d27ecb57 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Stella-mistral-nemo-12B-v2/f7dd203f-24d8-4875-878a-12ed99e20cd3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_Stella-mistral-nemo-12B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Stella-mistral-nemo-12B-v2", + "id": "nbeerbower/Stella-mistral-nemo-12B-v2", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3274 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4304 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-27B/287ae246-bee5-4fae-b78f-203491aa8df2.json b/data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-27B/287ae246-bee5-4fae-b78f-203491aa8df2.json new file mode 100644 index 000000000..883e02d15 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-27B/287ae246-bee5-4fae-b78f-203491aa8df2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-27B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma2-gutenberg-27B", + "id": "nbeerbower/gemma2-gutenberg-27B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3797 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1982 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-9B/9ee493f7-e031-4593-beae-65be17678e00.json b/data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-9B/9ee493f7-e031-4593-beae-65be17678e00.json new file mode 100644 index 000000000..49d199adb --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/gemma2-gutenberg-9B/9ee493f7-e031-4593-beae-65be17678e00.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma2-gutenberg-9B", + "id": "nbeerbower/gemma2-gutenberg-9B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4192 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/llama-3-gutenberg-8B/86b10c6f-41c6-4d0a-ae59-f90e204e466c.json b/data/hfopenllm_v2/nbeerbower/llama-3-gutenberg-8B/86b10c6f-41c6-4d0a-ae59-f90e204e466c.json new file mode 100644 index 000000000..787cd3d0f --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/llama-3-gutenberg-8B/86b10c6f-41c6-4d0a-ae59-f90e204e466c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_llama-3-gutenberg-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-gutenberg-8B", + "id": "nbeerbower/llama-3-gutenberg-8B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4994 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/llama3.1-cc-8B/043e3533-7d5c-4d45-bcd8-0dbcc8ca4819.json b/data/hfopenllm_v2/nbeerbower/llama3.1-cc-8B/043e3533-7d5c-4d45-bcd8-0dbcc8ca4819.json new file mode 100644 index 000000000..17baa63e8 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/llama3.1-cc-8B/043e3533-7d5c-4d45-bcd8-0dbcc8ca4819.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-cc-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3.1-cc-8B", + "id": "nbeerbower/llama3.1-cc-8B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5068 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4871 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/llama3.1-kartoffeldes-70B/1b3269fb-4b16-42b6-80c0-3d54bc2b4fed.json b/data/hfopenllm_v2/nbeerbower/llama3.1-kartoffeldes-70B/1b3269fb-4b16-42b6-80c0-3d54bc2b4fed.json new file mode 100644 index 000000000..ed92bd808 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/llama3.1-kartoffeldes-70B/1b3269fb-4b16-42b6-80c0-3d54bc2b4fed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-kartoffeldes-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3.1-kartoffeldes-70B", + "id": "nbeerbower/llama3.1-kartoffeldes-70B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.823 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6894 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4646 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4988 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades-12B/ee625c29-62c4-49da-9790-e7e67233157d.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades-12B/ee625c29-62c4-49da-9790-e7e67233157d.json new file mode 100644 index 000000000..75703bbb6 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades-12B/ee625c29-62c4-49da-9790-e7e67233157d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-bophades-12B", + "id": "nbeerbower/mistral-nemo-bophades-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6794 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4988 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1231 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades3-12B/02b16bf2-62bb-401e-9726-2135d8d610be.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades3-12B/02b16bf2-62bb-401e-9726-2135d8d610be.json new file mode 100644 index 000000000..c34d6ed08 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-bophades3-12B/02b16bf2-62bb-401e-9726-2135d8d610be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades3-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-bophades3-12B", + "id": "nbeerbower/mistral-nemo-bophades3-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5449 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-cc-12B/db10c6f9-2962-46cc-aa4e-4c99c4b494d1.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-cc-12B/db10c6f9-2962-46cc-aa4e-4c99c4b494d1.json new file mode 100644 index 000000000..25981a535 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-cc-12B/db10c6f9-2962-46cc-aa4e-4c99c4b494d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-cc-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-cc-12B", + "id": "nbeerbower/mistral-nemo-cc-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3598 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutades-12B/aa37bda0-2e0a-4361-a5b4-468154d8ac72.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutades-12B/aa37bda0-2e0a-4361-a5b4-468154d8ac72.json new file mode 100644 index 000000000..4cebf3ef7 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutades-12B/aa37bda0-2e0a-4361-a5b4-468154d8ac72.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutades-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-gutades-12B", + "id": "nbeerbower/mistral-nemo-gutades-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3425 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5407 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.404 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v2/d9a6565c-5a0b-4893-b6e0-1fc52ec55bf5.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v2/d9a6565c-5a0b-4893-b6e0-1fc52ec55bf5.json new file mode 100644 index 000000000..f920c7857 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v2/d9a6565c-5a0b-4893-b6e0-1fc52ec55bf5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-gutenberg-12B-v2", + "id": "nbeerbower/mistral-nemo-gutenberg-12B-v2", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3499 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v3/becf9805-83a9-4137-a938-81a61a10e4f0.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v3/becf9805-83a9-4137-a938-81a61a10e4f0.json new file mode 100644 index 000000000..b760c9b8c --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v3/becf9805-83a9-4137-a938-81a61a10e4f0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-gutenberg-12B-v3", + "id": "nbeerbower/mistral-nemo-gutenberg-12B-v3", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5441 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v4/6e848120-bc31-4628-af05-30707a6dcc41.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v4/6e848120-bc31-4628-af05-30707a6dcc41.json new file mode 100644 index 000000000..ab9d229a4 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B-v4/6e848120-bc31-4628-af05-30707a6dcc41.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-gutenberg-12B-v4", + "id": "nbeerbower/mistral-nemo-gutenberg-12B-v4", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4104 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B/864af855-71b0-4b11-ae3f-56294a7d0db9.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B/864af855-71b0-4b11-ae3f-56294a7d0db9.json new file mode 100644 index 000000000..c29fbdb15 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg-12B/864af855-71b0-4b11-ae3f-56294a7d0db9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-gutenberg-12B", + "id": "nbeerbower/mistral-nemo-gutenberg-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3504 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5281 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg2-12B-test/285bd390-1dd9-4db2-af45-68dea557da3c.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg2-12B-test/285bd390-1dd9-4db2-af45-68dea557da3c.json new file mode 100644 index 000000000..dc74a419a --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-gutenberg2-12B-test/285bd390-1dd9-4db2-af45-68dea557da3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg2-12B-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-gutenberg2-12B-test", + "id": "nbeerbower/mistral-nemo-gutenberg2-12B-test", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3385 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5255 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4157 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-kartoffel-12B/459e2375-1a15-4129-bee0-dc8852d531e2.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-kartoffel-12B/459e2375-1a15-4129-bee0-dc8852d531e2.json new file mode 100644 index 000000000..0d4219fba --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-kartoffel-12B/459e2375-1a15-4129-bee0-dc8852d531e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-kartoffel-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-kartoffel-12B", + "id": "nbeerbower/mistral-nemo-kartoffel-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4653 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3585 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-narwhal-12B/7b4c7d92-f581-4057-bec9-e3a8c6a5386e.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-narwhal-12B/7b4c7d92-f581-4057-bec9-e3a8c6a5386e.json new file mode 100644 index 000000000..d17ad6414 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-narwhal-12B/7b4c7d92-f581-4057-bec9-e3a8c6a5386e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-narwhal-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-narwhal-12B", + "id": "nbeerbower/mistral-nemo-narwhal-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3483 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json deleted file mode 100644 index 32c6c0ce1..000000000 --- a/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-wissenschaft-12B/1762652580.388424", - "retrieved_timestamp": "1762652580.388424", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-wissenschaft-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-wissenschaft-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520133246452745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040306120993181 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35322473404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/7ceab841-f9a3-455b-9314-243d8fc3cd11.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/7ceab841-f9a3-455b-9314-243d8fc3cd11.json new file mode 100644 index 000000000..5c64e8342 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/7ceab841-f9a3-455b-9314-243d8fc3cd11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-wissenschaft-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-nemo-wissenschaft-12B", + "id": "nbeerbower/mistral-nemo-wissenschaft-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.504 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json b/data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json deleted file mode 100644 index 4ade79d88..000000000 --- a/data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nbrahme_IndusQ/1762652580.38863", - "retrieved_timestamp": "1762652580.388631", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nbrahme/IndusQ", - "developer": "nbrahme", - "inference_platform": "unknown", - "id": "nbrahme/IndusQ", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 1.176 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24397487555242311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30624035198474986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26510067114093966 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3366354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/nbrahme/IndusQ/c1e2fb45-22d8-4eb4-8971-ce89c3048b9e.json b/data/hfopenllm_v2/nbrahme/IndusQ/c1e2fb45-22d8-4eb4-8971-ce89c3048b9e.json new file mode 100644 index 000000000..21a04901a --- /dev/null +++ b/data/hfopenllm_v2/nbrahme/IndusQ/c1e2fb45-22d8-4eb4-8971-ce89c3048b9e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nbrahme_IndusQ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IndusQ", + "id": "nbrahme/IndusQ", + "developer": "nbrahme", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 1.176 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.244 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/necva/IE-cont-Llama3.1-8B/68cb2ca1-1648-41a2-92b7-969bccdca4ee.json b/data/hfopenllm_v2/necva/IE-cont-Llama3.1-8B/68cb2ca1-1648-41a2-92b7-969bccdca4ee.json new file mode 100644 index 000000000..baefc2323 --- /dev/null +++ b/data/hfopenllm_v2/necva/IE-cont-Llama3.1-8B/68cb2ca1-1648-41a2-92b7-969bccdca4ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/necva_IE-cont-Llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IE-cont-Llama3.1-8B", + "id": "necva/IE-cont-Llama3.1-8B", + "developer": "necva", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/necva/replica-IEPile/5f285d61-5e4b-4c5c-8960-c10313d76ae3.json b/data/hfopenllm_v2/necva/replica-IEPile/5f285d61-5e4b-4c5c-8960-c10313d76ae3.json new file mode 100644 index 000000000..065eb3646 --- /dev/null +++ b/data/hfopenllm_v2/necva/replica-IEPile/5f285d61-5e4b-4c5c-8960-c10313d76ae3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/necva_replica-IEPile/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "replica-IEPile", + "id": "necva/replica-IEPile", + "developer": "necva", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.65 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4678 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4779 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1239 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json b/data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json deleted file mode 100644 index 56c34f99b..000000000 --- a/data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/necva_replica-IEPile/1762652580.389119", - "retrieved_timestamp": "1762652580.38912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "necva/replica-IEPile", - "developer": "necva", - "inference_platform": "unknown", - "id": "necva/replica-IEPile", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.65 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4677910167245132 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4778579652970231 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3560505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/3af19898-8590-4aec-b324-46c7fbf596d3.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/3af19898-8590-4aec-b324-46c7fbf596d3.json new file mode 100644 index 000000000..a9a8b3aca --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/3af19898-8590-4aec-b324-46c7fbf596d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.1-bf16-falcon3-7b-instruct", + "id": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4825 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json deleted file mode 100644 index bcc83f1a9..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/1762652580.389358", - "retrieved_timestamp": "1762652580.389359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527050448365891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516128933222162 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48248958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3923703457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json deleted file mode 100644 index beaec7e6a..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-falcon3-10b-instruct/1762652580.389616", - "retrieved_timestamp": "1762652580.389617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-falcon3-10b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-falcon3-10b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.755152994055772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5952883626256132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187998670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/e8472266-6d03-439f-bd6b-e3ac5ef2cf09.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/e8472266-6d03-439f-bd6b-e3ac5ef2cf09.json new file mode 100644 index 000000000..e86ad0154 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/e8472266-6d03-439f-bd6b-e3ac5ef2cf09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-falcon3-10b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.1-falcon3-10b-instruct", + "id": "neopolita/jessi-v0.1-falcon3-10b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7552 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5953 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2002 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/3f578b45-48f9-4022-991c-32a71706aba3.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/3f578b45-48f9-4022-991c-32a71706aba3.json new file mode 100644 index 000000000..428e15bc6 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/3f578b45-48f9-4022-991c-32a71706aba3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-qwen2.5-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.1-qwen2.5-7b-instruct", + "id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json deleted file mode 100644 index b6ee1c77d..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-qwen2.5-7b-instruct/1762652580.3898308", - "retrieved_timestamp": "1762652580.3898308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326715337526651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292315105257686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42278922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json deleted file mode 100644 index 334d2fbf1..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-virtuoso-small/1762652580.3900428", - "retrieved_timestamp": "1762652580.3900428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-virtuoso-small", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-virtuoso-small", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7959192719761344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6442861439957068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43616666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5129654255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/ef8c22a7-3898-422e-88e2-1a8c14ab5bf2.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/ef8c22a7-3898-422e-88e2-1a8c14ab5bf2.json new file mode 100644 index 000000000..d6a3717c7 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/ef8c22a7-3898-422e-88e2-1a8c14ab5bf2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-virtuoso-small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.1-virtuoso-small", + "id": "neopolita/jessi-v0.1-virtuoso-small", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7959 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6443 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.513 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json deleted file mode 100644 index 08ac0e1fb..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-10b-instruct/1762652580.390252", - "retrieved_timestamp": "1762652580.390252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.2-falcon3-10b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.2-falcon3-10b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7768099753099553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6204846671314362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42813541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/81630ea2-d496-4872-92b7-e476badaf50d.json b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/81630ea2-d496-4872-92b7-e476badaf50d.json new file mode 100644 index 000000000..26642a60f --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/81630ea2-d496-4872-92b7-e476badaf50d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-10b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.2-falcon3-10b-instruct", + "id": "neopolita/jessi-v0.2-falcon3-10b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6205 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4281 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/9436d04a-9c81-47ad-a7b8-496e14058627.json b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/9436d04a-9c81-47ad-a7b8-496e14058627.json new file mode 100644 index 000000000..b8f742e94 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/9436d04a-9c81-47ad-a7b8-496e14058627.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.2-falcon3-7b-instruct", + "id": "neopolita/jessi-v0.2-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2538 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3905 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json deleted file mode 100644 index 85e72cbf9..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-7b-instruct/1762652580.39046", - "retrieved_timestamp": "1762652580.39046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.2-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.2-falcon3-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5770754930251731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363079188886575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3904587765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json b/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json deleted file mode 100644 index 1ef4f5ea5..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.3-falcon3-7b-instruct/1762652580.390663", - "retrieved_timestamp": "1762652580.3906639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.3-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.3-falcon3-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538793502664194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46915625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3970246010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/f1e6e54e-cb97-4980-8957-2190ee5c4c34.json b/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/f1e6e54e-cb97-4980-8957-2190ee5c4c34.json new file mode 100644 index 000000000..ed27b7620 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/f1e6e54e-cb97-4980-8957-2190ee5c4c34.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.3-falcon3-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.3-falcon3-7b-instruct", + "id": "neopolita/jessi-v0.3-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4692 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.397 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/30914dd3-c857-4aaf-b6b9-d1c7e4917e89.json b/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/30914dd3-c857-4aaf-b6b9-d1c7e4917e89.json new file mode 100644 index 000000000..b113a0091 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/30914dd3-c857-4aaf-b6b9-d1c7e4917e89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.4-falcon3-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.4-falcon3-7b-instruct", + "id": "neopolita/jessi-v0.4-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5522 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4971 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4004 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json b/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json deleted file mode 100644 index f3b93fd94..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.4-falcon3-7b-instruct/1762652580.39086", - "retrieved_timestamp": "1762652580.390861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.4-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.4-falcon3-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7603735865281896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5521668757306609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49712500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40043218085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/1c389a32-68b3-47c0-a6b8-2c2291293002.json b/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/1c389a32-68b3-47c0-a6b8-2c2291293002.json new file mode 100644 index 000000000..d7d351e7b --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/1c389a32-68b3-47c0-a6b8-2c2291293002.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.5-falcon3-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.5-falcon3-7b-instruct", + "id": "neopolita/jessi-v0.5-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4865 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json b/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json deleted file mode 100644 index 5b6368cf2..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.5-falcon3-7b-instruct/1762652580.391073", - "retrieved_timestamp": "1762652580.391074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.5-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.5-falcon3-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7411645544931892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589627302276082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48652083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3966090425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json b/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json deleted file mode 100644 index 16f5ba0e6..000000000 --- a/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.6-falcon3-7b-instruct/1762652580.391277", - "retrieved_timestamp": "1762652580.391277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/jessi-v0.6-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.6-falcon3-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7401904723910335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5508818723957883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49042708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3956948138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/e759a217-6571-446d-9bf9-d1512793f307.json b/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/e759a217-6571-446d-9bf9-d1512793f307.json new file mode 100644 index 000000000..02cb96cd3 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/e759a217-6571-446d-9bf9-d1512793f307.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.6-falcon3-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jessi-v0.6-falcon3-7b-instruct", + "id": "neopolita/jessi-v0.6-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4904 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3957 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/753f3b21-7365-4117-b2a0-a91f03ec3d39.json b/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/753f3b21-7365-4117-b2a0-a91f03ec3d39.json new file mode 100644 index 000000000..21bbf2ac9 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/753f3b21-7365-4117-b2a0-a91f03ec3d39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/neopolita_loki-v0.1-virtuoso/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "loki-v0.1-virtuoso", + "id": "neopolita/loki-v0.1-virtuoso", + "developer": "neopolita", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7819 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6467 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3391 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json b/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json deleted file mode 100644 index 07de7bb12..000000000 --- a/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/neopolita_loki-v0.1-virtuoso/1762652580.3914938", - "retrieved_timestamp": "1762652580.391495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "neopolita/loki-v0.1-virtuoso", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/loki-v0.1-virtuoso", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7819308324135517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6467251502613163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3391238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128823138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/297ef102-67c1-4e9c-b418-fed026bb1f8a.json b/data/hfopenllm_v2/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/297ef102-67c1-4e9c-b418-fed026bb1f8a.json new file mode 100644 index 000000000..504b55a12 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/297ef102-67c1-4e9c-b418-fed026bb1f8a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", + "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2877 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0015 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json b/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json deleted file mode 100644 index f95dccb8c..000000000 --- a/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/1762652580.3919501", - "retrieved_timestamp": "1762652580.391951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2586880587951081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30859903405301287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/9fbf73d7-7d67-4d6c-a5b9-efc627cd1b2b.json b/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/9fbf73d7-7d67-4d6c-a5b9-efc627cd1b2b.json new file mode 100644 index 000000000..3a61410cd --- /dev/null +++ b/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/9fbf73d7-7d67-4d6c-a5b9-efc627cd1b2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-MFANN-TIES-unretrained-7b", + "id": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2587 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3086 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Llama3.1-MFANN-8b/b1446577-f13f-434a-a0b4-916091395d4a.json b/data/hfopenllm_v2/netcat420/Llama3.1-MFANN-8b/b1446577-f13f-434a-a0b4-916091395d4a.json new file mode 100644 index 000000000..4ba8de1e7 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Llama3.1-MFANN-8b/b1446577-f13f-434a-a0b4-916091395d4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Llama3.1-MFANN-8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-MFANN-8b", + "id": "netcat420/Llama3.1-MFANN-8b", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4281 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2725 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/fc8946aa-8b04-482c-8c05-d026d2af07be.json b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/fc8946aa-8b04-482c-8c05-d026d2af07be.json new file mode 100644 index 000000000..db28a161a --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/fc8946aa-8b04-482c-8c05-d026d2af07be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/fabe3784-948c-4618-9cf0-c76a3ddd3820.json b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/fabe3784-948c-4618-9cf0-c76a3ddd3820.json new file mode 100644 index 000000000..fe6d32e73 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/fabe3784-948c-4618-9cf0-c76a3ddd3820.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4238 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4914 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3741 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/736dcf09-6a19-4e88-a790-7a7ee74d8717.json b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/736dcf09-6a19-4e88-a790-7a7ee74d8717.json new file mode 100644 index 000000000..ca9c619d8 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/736dcf09-6a19-4e88-a790-7a7ee74d8717.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-Llama3.1-Abliterated-SLERP-V4", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3821 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3516 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/75b4c750-1570-4825-a04a-965c06861fd4.json b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/75b4c750-1570-4825-a04a-965c06861fd4.json new file mode 100644 index 000000000..2cffceba6 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/75b4c750-1570-4825-a04a-965c06861fd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-Llama3.1-Abliterated-SLERP-V5", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4329 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4952 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3445 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/b7f8b678-2aea-4d41-ba21-2083fc472574.json b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/b7f8b678-2aea-4d41-ba21-2083fc472574.json new file mode 100644 index 000000000..4f314f567 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/b7f8b678-2aea-4d41-ba21-2083fc472574.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-Llama3.1-Abliterated-Slerp-TIES", + "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4293 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/a8010630-58de-448c-af08-70b8ffec431b.json b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/a8010630-58de-448c-af08-70b8ffec431b.json new file mode 100644 index 000000000..3eb29349d --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/a8010630-58de-448c-af08-70b8ffec431b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-Llama3.1-Abliterated-Slerp-V3.2", + "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4128 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-SFT/4a0c2ce5-a4b4-4d35-b65d-bbc6e36a649b.json b/data/hfopenllm_v2/netcat420/MFANN-SFT/4a0c2ce5-a4b4-4d35-b65d-bbc6e36a649b.json new file mode 100644 index 000000000..dde4c2499 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-SFT/4a0c2ce5-a4b4-4d35-b65d-bbc6e36a649b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-SFT", + "id": "netcat420/MFANN-SFT", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4852 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3336 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json b/data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json deleted file mode 100644 index cbdd9d108..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-SFT/1762652580.393719", - "retrieved_timestamp": "1762652580.3937201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN-SFT", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN-SFT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36822298168858625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.485188719488523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3336103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN-abliterated-phi2-merge-unretrained/1132251a-59c7-402e-9957-f9288864508f.json b/data/hfopenllm_v2/netcat420/MFANN-abliterated-phi2-merge-unretrained/1132251a-59c7-402e-9957-f9288864508f.json new file mode 100644 index 000000000..d5dbf98ee --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-abliterated-phi2-merge-unretrained/1132251a-59c7-402e-9957-f9288864508f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-abliterated-phi2-merge-unretrained/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-abliterated-phi2-merge-unretrained", + "id": "netcat420/MFANN-abliterated-phi2-merge-unretrained", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3005 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1478 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-llama3.1-Abliterated-SLERP/e2fac049-8f9f-4b71-bcd3-5746b7d90150.json b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-Abliterated-SLERP/e2fac049-8f9f-4b71-bcd3-5746b7d90150.json new file mode 100644 index 000000000..3a55b4765 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-Abliterated-SLERP/e2fac049-8f9f-4b71-bcd3-5746b7d90150.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-Abliterated-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-llama3.1-Abliterated-SLERP", + "id": "netcat420/MFANN-llama3.1-Abliterated-SLERP", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2591 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4574 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/d891a1e1-ad65-498f-9ee8-59523c1bfd19.json b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/d891a1e1-ad65-498f-9ee8-59523c1bfd19.json new file mode 100644 index 000000000..ea3ef3c69 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/d891a1e1-ad65-498f-9ee8-59523c1bfd19.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-llama3.1-abliterated-SLERP-v3.1", + "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4921 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/9dd3103f-6c4f-4077-ac27-3a9b0f4a5882.json b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/9dd3103f-6c4f-4077-ac27-3a9b0f4a5882.json new file mode 100644 index 000000000..cf3326495 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/9dd3103f-6c4f-4077-ac27-3a9b0f4a5882.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-llama3.1-abliterated-SLERP-v3", + "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3531 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-v2/ca031f70-5785-46d1-8a58-b279d8340776.json b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-v2/ca031f70-5785-46d1-8a58-b279d8340776.json new file mode 100644 index 000000000..27620df52 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-llama3.1-abliterated-v2/ca031f70-5785-46d1-8a58-b279d8340776.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-llama3.1-abliterated-v2", + "id": "netcat420/MFANN-llama3.1-abliterated-v2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3491 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V2/18457711-92b8-4c27-a89a-928fecdf5724.json b/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V2/18457711-92b8-4c27-a89a-928fecdf5724.json new file mode 100644 index 000000000..f7adf105b --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V2/18457711-92b8-4c27-a89a-928fecdf5724.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-phigments-slerp-V2", + "id": "netcat420/MFANN-phigments-slerp-V2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3232 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4827 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4037 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2717 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.2/3398aeb8-08a8-4be9-a24c-efeabcaa2139.json b/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.2/3398aeb8-08a8-4be9-a24c-efeabcaa2139.json new file mode 100644 index 000000000..fd602beef --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.2/3398aeb8-08a8-4be9-a24c-efeabcaa2139.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-phigments-slerp-V3.2", + "id": "netcat420/MFANN-phigments-slerp-V3.2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4809 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2705 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.3/707bc006-4318-41bc-b91b-aa43ca7cba6f.json b/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.3/707bc006-4318-41bc-b91b-aa43ca7cba6f.json new file mode 100644 index 000000000..bc28c5ad6 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-phigments-slerp-V3.3/707bc006-4318-41bc-b91b-aa43ca7cba6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN-phigments-slerp-V3.3", + "id": "netcat420/MFANN-phigments-slerp-V3.3", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4895 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3892 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2803 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3b/7bfda919-13be-4b68-8655-99fe6a4605a2.json b/data/hfopenllm_v2/netcat420/MFANN3b/7bfda919-13be-4b68-8655-99fe6a4605a2.json new file mode 100644 index 000000000..1d0c531da --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3b/7bfda919-13be-4b68-8655-99fe6a4605a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3b", + "id": "netcat420/MFANN3b", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3606 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2306 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json b/data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json deleted file mode 100644 index 18e2a8c15..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3b/1762652580.395648", - "retrieved_timestamp": "1762652580.395648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3b", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3b", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2524435165361241 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433128382028508 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23055186170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json deleted file mode 100644 index e70ff3e37..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.15/1762652580.3958452", - "retrieved_timestamp": "1762652580.395846", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.15", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.15", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2012105657433388 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453931293669888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24684175531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.15/f844e739-5f0d-4db4-ba66-bd33b1290571.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.15/f844e739-5f0d-4db4-ba66-bd33b1290571.json new file mode 100644 index 000000000..da81b800a --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.15/f844e739-5f0d-4db4-ba66-bd33b1290571.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.15/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.15", + "id": "netcat420/MFANN3bv0.15", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2012 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3958 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2468 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.18/0cde6639-6a89-4682-bb3e-a2a24a1bc8ab.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.18/0cde6639-6a89-4682-bb3e-a2a24a1bc8ab.json new file mode 100644 index 000000000..691ede716 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.18/0cde6639-6a89-4682-bb3e-a2a24a1bc8ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.18/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.18", + "id": "netcat420/MFANN3bv0.18", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2206 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4514 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4024 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json deleted file mode 100644 index 6ecf5cdcc..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.18/1762652580.396076", - "retrieved_timestamp": "1762652580.396081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.18", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.18", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22064455644356973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4514366169824164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json deleted file mode 100644 index 14a9411c9..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.19/1762652580.396478", - "retrieved_timestamp": "1762652580.396479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.19", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.19", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22581528123157665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515800678058734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40239583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25199468085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.19/87652005-4404-4c45-bd4f-5f63c44adf63.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.19/87652005-4404-4c45-bd4f-5f63c44adf63.json new file mode 100644 index 000000000..b5b586fea --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.19/87652005-4404-4c45-bd4f-5f63c44adf63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.19/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.19", + "id": "netcat420/MFANN3bv0.19", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4024 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.252 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json deleted file mode 100644 index 2c143dba8..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.20/1762652580.3967948", - "retrieved_timestamp": "1762652580.396796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.20", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.20", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21934578030736224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4493365019423472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.20/a7e0bc2d-784d-4719-ac08-d8fa0c29d178.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.20/a7e0bc2d-784d-4719-ac08-d8fa0c29d178.json new file mode 100644 index 000000000..560868144 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.20/a7e0bc2d-784d-4719-ac08-d8fa0c29d178.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.20/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.20", + "id": "netcat420/MFANN3bv0.20", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2193 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4077 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json deleted file mode 100644 index 60b680993..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.21/1762652580.397045", - "retrieved_timestamp": "1762652580.397046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.21", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.21", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1909189838517356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44700236898039053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37594791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23927859042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.21/e8ba93e6-6f90-4169-8403-381b7f9e26ab.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.21/e8ba93e6-6f90-4169-8403-381b7f9e26ab.json new file mode 100644 index 000000000..12ac93cd4 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.21/e8ba93e6-6f90-4169-8403-381b7f9e26ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.21/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.21", + "id": "netcat420/MFANN3bv0.21", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.447 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3759 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json deleted file mode 100644 index b807a7f5b..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.22/1762652580.39726", - "retrieved_timestamp": "1762652580.3972611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.22", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.22", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1979381374752324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44851095830051274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35213541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2517453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.22/ea86b542-3d06-4e71-b49d-17cdd362b465.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.22/ea86b542-3d06-4e71-b49d-17cdd362b465.json new file mode 100644 index 000000000..48ebc0f8a --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.22/ea86b542-3d06-4e71-b49d-17cdd362b465.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.22/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.22", + "id": "netcat420/MFANN3bv0.22", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4485 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3521 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.23/15615d2c-46a1-47c7-a273-697e97bdf9f2.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.23/15615d2c-46a1-47c7-a273-697e97bdf9f2.json new file mode 100644 index 000000000..0a80010af --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.23/15615d2c-46a1-47c7-a273-697e97bdf9f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.23/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.23", + "id": "netcat420/MFANN3bv0.23", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3427 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2418 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json deleted file mode 100644 index 1d4392d17..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.23/1762652580.39747", - "retrieved_timestamp": "1762652580.397471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.23", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.23", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20480768804549704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44954178056127364 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3427395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2417719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json deleted file mode 100644 index 4d2034354..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.24/1762652580.397681", - "retrieved_timestamp": "1762652580.397682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.24", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.24", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2200450360598767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4407346600666096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3520729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23520611702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.24/a2b8da3f-c99e-4dba-b4a2-23739281eaf2.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.24/a2b8da3f-c99e-4dba-b4a2-23739281eaf2.json new file mode 100644 index 000000000..95741769c --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.24/a2b8da3f-c99e-4dba-b4a2-23739281eaf2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv0.24", + "id": "netcat420/MFANN3bv0.24", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.22 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3521 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.1/76f3fa3a-1629-4cdd-b457-3a108784b427.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.1/76f3fa3a-1629-4cdd-b457-3a108784b427.json new file mode 100644 index 000000000..2c3b57cd6 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.1/76f3fa3a-1629-4cdd-b457-3a108784b427.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv1.1", + "id": "netcat420/MFANN3bv1.1", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2507 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3397 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3223 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json deleted file mode 100644 index 7861d1408..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.1/1762652580.3978848", - "retrieved_timestamp": "1762652580.397886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.1", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.1", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2506948230694557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3397086626022651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11585771276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json deleted file mode 100644 index b6d2b893e..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.2/1762652580.3980958", - "retrieved_timestamp": "1762652580.3980958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.2", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.2", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2686050789682487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3659932511014956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31555208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14502992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.2/c9e979e1-4433-4a38-8fd4-c14895e74f44.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.2/c9e979e1-4433-4a38-8fd4-c14895e74f44.json new file mode 100644 index 000000000..f3b97324c --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.2/c9e979e1-4433-4a38-8fd4-c14895e74f44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv1.2", + "id": "netcat420/MFANN3bv1.2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2686 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.3/3f2effba-1ab8-476d-b228-ed9491e83adf.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.3/3f2effba-1ab8-476d-b228-ed9491e83adf.json new file mode 100644 index 000000000..5ea688265 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.3/3f2effba-1ab8-476d-b228-ed9491e83adf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv1.3", + "id": "netcat420/MFANN3bv1.3", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2547 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3299 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2276 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json deleted file mode 100644 index 1e34d7c9c..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.3/1762652580.3983822", - "retrieved_timestamp": "1762652580.3983831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.3", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.3", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25466650709007654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4456312489762861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.329875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22755984042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json deleted file mode 100644 index 16f17a5ff..000000000 --- a/data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.4/1762652580.398614", - "retrieved_timestamp": "1762652580.3986151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.4", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.4", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35243598097492435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808549324972969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3707708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2705285904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.4/a5f0fb1b-27a7-495f-a010-3307afdb8949.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.4/a5f0fb1b-27a7-495f-a010-3307afdb8949.json new file mode 100644 index 000000000..6b3a33345 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.4/a5f0fb1b-27a7-495f-a010-3307afdb8949.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANN3bv1.4", + "id": "netcat420/MFANN3bv1.4", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4809 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2705 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.19/22f2aa1d-fff1-430a-9c20-3b32859d9665.json b/data/hfopenllm_v2/netcat420/MFANNv0.19/22f2aa1d-fff1-430a-9c20-3b32859d9665.json new file mode 100644 index 000000000..4621646f1 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.19/22f2aa1d-fff1-430a-9c20-3b32859d9665.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.19/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.19", + "id": "netcat420/MFANNv0.19", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3057 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4731 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json b/data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json deleted file mode 100644 index 704df58a1..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.19/1762652580.39887", - "retrieved_timestamp": "1762652580.39887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.19", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.19", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30567449921763146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47313832038755316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35269791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24725731382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json b/data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json deleted file mode 100644 index 76b146d99..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.20/1762652580.399081", - "retrieved_timestamp": "1762652580.399082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.20", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.20", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34786477657061043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4574431878198548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38739583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32022938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.20/daff0e6f-d29f-4861-855f-902a0cd9a469.json b/data/hfopenllm_v2/netcat420/MFANNv0.20/daff0e6f-d29f-4861-855f-902a0cd9a469.json new file mode 100644 index 000000000..b815945e1 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.20/daff0e6f-d29f-4861-855f-902a0cd9a469.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.20/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.20", + "id": "netcat420/MFANNv0.20", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3479 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4574 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3202 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.21/0f5cb926-b691-4d57-87f5-290235fd250a.json b/data/hfopenllm_v2/netcat420/MFANNv0.21/0f5cb926-b691-4d57-87f5-290235fd250a.json new file mode 100644 index 000000000..0d6243929 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.21/0f5cb926-b691-4d57-87f5-290235fd250a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.21/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.21", + "id": "netcat420/MFANNv0.21", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3233 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4576 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3031 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json b/data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json deleted file mode 100644 index f4d547b08..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.21/1762652580.399296", - "retrieved_timestamp": "1762652580.399297", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.21", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.21", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233099287667832 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45763723048372523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031083776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json b/data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json deleted file mode 100644 index 9c738c30a..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.22.1/1762652580.3995059", - "retrieved_timestamp": "1762652580.399507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.22.1", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.22.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3089469274857378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46608928527824584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.22.1/d9e813da-2966-4901-99f9-c7627c64fc52.json b/data/hfopenllm_v2/netcat420/MFANNv0.22.1/d9e813da-2966-4901-99f9-c7627c64fc52.json new file mode 100644 index 000000000..7b869bbb5 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.22.1/d9e813da-2966-4901-99f9-c7627c64fc52.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.22.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.22.1", + "id": "netcat420/MFANNv0.22.1", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4661 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.23/4cb98a5b-3eb7-4fa8-adfd-17add38d3332.json b/data/hfopenllm_v2/netcat420/MFANNv0.23/4cb98a5b-3eb7-4fa8-adfd-17add38d3332.json new file mode 100644 index 000000000..80ebfd3ef --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.23/4cb98a5b-3eb7-4fa8-adfd-17add38d3332.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.23/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.23", + "id": "netcat420/MFANNv0.23", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4898 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json b/data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json deleted file mode 100644 index 0f304a014..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.23/1762652580.3997262", - "retrieved_timestamp": "1762652580.399727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.23", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.23", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3127435205255389 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4898102063834755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33876329787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json b/data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json deleted file mode 100644 index 3de06ef85..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.24/1762652580.3999438", - "retrieved_timestamp": "1762652580.3999438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.24", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.24", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162409074588758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479027491915232 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347739361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.24/f7494fd4-d248-46a6-a46d-f9d8db560aae.json b/data/hfopenllm_v2/netcat420/MFANNv0.24/f7494fd4-d248-46a6-a46d-f9d8db560aae.json new file mode 100644 index 000000000..89be208aa --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.24/f7494fd4-d248-46a6-a46d-f9d8db560aae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.24", + "id": "netcat420/MFANNv0.24", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.479 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3348 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.25/4b8533d1-7770-435f-ba76-a5c658aabd8f.json b/data/hfopenllm_v2/netcat420/MFANNv0.25/4b8533d1-7770-435f-ba76-a5c658aabd8f.json new file mode 100644 index 000000000..c62b5f82d --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.25/4b8533d1-7770-435f-ba76-a5c658aabd8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.25/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MFANNv0.25", + "id": "netcat420/MFANNv0.25", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4794 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json b/data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json deleted file mode 100644 index 3e9b593c4..000000000 --- a/data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.25/1762652580.400151", - "retrieved_timestamp": "1762652580.400151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netcat420/MFANNv0.25", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.25", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34666573580322435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47940650861209216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36879166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427526595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/309c7906-0010-4f17-848f-185062d96a26.json b/data/hfopenllm_v2/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/309c7906-0010-4f17-848f-185062d96a26.json new file mode 100644 index 000000000..f9f6fbeee --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/309c7906-0010-4f17-848f-185062d96a26.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", + "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5878 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5237 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3376 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-7b-MFANN-slerp/f18ab2ab-098b-4e46-8f8d-433b52cdb81b.json b/data/hfopenllm_v2/netcat420/Qwen2.5-7b-MFANN-slerp/f18ab2ab-098b-4e46-8f8d-433b52cdb81b.json new file mode 100644 index 000000000..25a7240c6 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-7b-MFANN-slerp/f18ab2ab-098b-4e46-8f8d-433b52cdb81b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-MFANN-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7b-MFANN-slerp", + "id": "netcat420/Qwen2.5-7b-MFANN-slerp", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5089 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3417 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/b4a70c71-dfac-4888-937e-d5220b491b0e.json b/data/hfopenllm_v2/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/b4a70c71-dfac-4888-937e-d5220b491b0e.json new file mode 100644 index 000000000..07ded6ed7 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/b4a70c71-dfac-4888-937e-d5220b491b0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7b-nerd-uncensored-MFANN-slerp", + "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.11 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/b879a534-6b24-4873-a0e4-e18453540121.json b/data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/b879a534-6b24-4873-a0e4-e18453540121.json new file mode 100644 index 000000000..6830a0165 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/b879a534-6b24-4873-a0e4-e18453540121.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", + "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6486 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2991 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4152 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3432 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/c67ae8f2-596b-4dab-8c4f-768b2f0608b4.json b/data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/c67ae8f2-596b-4dab-8c4f-768b2f0608b4.json new file mode 100644 index 000000000..2e55a4ead --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/c67ae8f2-596b-4dab-8c4f-768b2f0608b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", + "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2568 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4058 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/7766c638-b4dc-4b2d-8c14-becdb1b709ef.json b/data/hfopenllm_v2/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/7766c638-b4dc-4b2d-8c14-becdb1b709ef.json new file mode 100644 index 000000000..a0253b996 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/7766c638-b4dc-4b2d-8c14-becdb1b709ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", + "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2324 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3528 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1677 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/Qwen2.5-MFANN-7b/dd211bef-3940-4d78-8f7b-a67da81d605b.json b/data/hfopenllm_v2/netcat420/Qwen2.5-MFANN-7b/dd211bef-3940-4d78-8f7b-a67da81d605b.json new file mode 100644 index 000000000..1928a3f0f --- /dev/null +++ b/data/hfopenllm_v2/netcat420/Qwen2.5-MFANN-7b/dd211bef-3940-4d78-8f7b-a67da81d605b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-MFANN-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-MFANN-7b", + "id": "netcat420/Qwen2.5-MFANN-7b", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6097 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2787 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3233 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/87e20b7a-85c8-4845-94b0-ace1e18814cb.json b/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/87e20b7a-85c8-4845-94b0-ace1e18814cb.json new file mode 100644 index 000000000..51f2c598a --- /dev/null +++ b/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/87e20b7a-85c8-4845-94b0-ace1e18814cb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-MFANN-7b-SLERP-V1.2", + "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3438 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/9ab01db6-3154-4c5b-b6a2-35479538d332.json b/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/9ab01db6-3154-4c5b-b6a2-35479538d332.json new file mode 100644 index 000000000..6d923911a --- /dev/null +++ b/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/9ab01db6-3154-4c5b-b6a2-35479538d332.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-MFANN-7b-SLERPv1.1", + "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2968 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4126 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-v1.1/9d35316a-011d-4e45-ae57-317b53de621f.json b/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-v1.1/9d35316a-011d-4e45-ae57-317b53de621f.json new file mode 100644 index 000000000..6d09d9e25 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/qwen2.5-MFANN-7b-v1.1/9d35316a-011d-4e45-ae57-317b53de621f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-MFANN-7b-v1.1", + "id": "netcat420/qwen2.5-MFANN-7b-v1.1", + "developer": "netcat420", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6088 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4967 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2825 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/c9e7fec0-b244-4ca1-a117-a52fdd4671a5.json b/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/c9e7fec0-b244-4ca1-a117-a52fdd4671a5.json new file mode 100644 index 000000000..d488ce1e9 --- /dev/null +++ b/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/c9e7fec0-b244-4ca1-a117-a52fdd4671a5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/netease-youdao_Confucius-o1-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Confucius-o1-14B", + "id": "netease-youdao/Confucius-o1-14B", + "developer": "netease-youdao", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.63 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4313 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3649 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json b/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json deleted file mode 100644 index 002e5782b..000000000 --- a/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/netease-youdao_Confucius-o1-14B/1762652580.4025002", - "retrieved_timestamp": "1762652580.402501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "netease-youdao/Confucius-o1-14B", - "developer": "netease-youdao", - "inference_platform": "unknown", - "id": "netease-youdao/Confucius-o1-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6378497941018719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6299772409698484 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4338125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265126329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/0659cb01-0d52-42cb-9e3a-2d8cac01692e.json b/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/0659cb01-0d52-42cb-9e3a-2d8cac01692e.json new file mode 100644 index 000000000..889543f52 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/0659cb01-0d52-42cb-9e3a-2d8cac01692e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-7B-v0.1", + "id": "newsbang/Homer-7B-v0.1", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6109 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5601 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json b/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json deleted file mode 100644 index 44ef3a150..000000000 --- a/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.1/1762652580.402741", - "retrieved_timestamp": "1762652580.402742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-7B-v0.1", - "developer": "newsbang", - "inference_platform": "unknown", - "id": "newsbang/Homer-7B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108724850064495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601389961416444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43569791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4474734042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/98490bb1-70f0-4e7a-8fd6-698ec9fcbd5a.json b/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/98490bb1-70f0-4e7a-8fd6-698ec9fcbd5a.json new file mode 100644 index 000000000..0e6b4cdcc --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/98490bb1-70f0-4e7a-8fd6-698ec9fcbd5a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-7B-v0.2", + "id": "newsbang/Homer-7B-v0.2", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7494 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.441 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json b/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json deleted file mode 100644 index bd9b017bb..000000000 --- a/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.2/1762652580.403213", - "retrieved_timestamp": "1762652580.4032168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "newsbang/Homer-7B-v0.2", - "developer": "newsbang", - "inference_platform": "unknown", - "id": "newsbang/Homer-7B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7493827488840721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5517330182832224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409906914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/newsbang/Homer-v0.3-Qwen2.5-7B/6e0f7e7e-8927-436e-95a7-5a7c626ca241.json b/data/hfopenllm_v2/newsbang/Homer-v0.3-Qwen2.5-7B/6e0f7e7e-8927-436e-95a7-5a7c626ca241.json new file mode 100644 index 000000000..b533392a6 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-v0.3-Qwen2.5-7B/6e0f7e7e-8927-436e-95a7-5a7c626ca241.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.3-Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-v0.3-Qwen2.5-7B", + "id": "newsbang/Homer-v0.3-Qwen2.5-7B", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3089 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4744 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-v0.4-Qwen2.5-7B/9c5b3f4d-6e0b-482b-b142-dd7b387cae22.json b/data/hfopenllm_v2/newsbang/Homer-v0.4-Qwen2.5-7B/9c5b3f4d-6e0b-482b-b142-dd7b387cae22.json new file mode 100644 index 000000000..c5e3960e9 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-v0.4-Qwen2.5-7B/9c5b3f4d-6e0b-482b-b142-dd7b387cae22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.4-Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-v0.4-Qwen2.5-7B", + "id": "newsbang/Homer-v0.4-Qwen2.5-7B", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7999 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5533 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2779 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-v0.5-Qwen2.5-7B/04840708-a4cc-407c-8b2a-876b382920a1.json b/data/hfopenllm_v2/newsbang/Homer-v0.5-Qwen2.5-7B/04840708-a4cc-407c-8b2a-876b382920a1.json new file mode 100644 index 000000000..63f957333 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-v0.5-Qwen2.5-7B/04840708-a4cc-407c-8b2a-876b382920a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.5-Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-v0.5-Qwen2.5-7B", + "id": "newsbang/Homer-v0.5-Qwen2.5-7B", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7881 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.554 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4193 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-72B/83b0844c-70fe-4b63-8ed2-4147390518ee.json b/data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-72B/83b0844c-70fe-4b63-8ed2-4147390518ee.json new file mode 100644 index 000000000..a97243437 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-72B/83b0844c-70fe-4b63-8ed2-4147390518ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-v1.0-Qwen2.5-72B", + "id": "newsbang/Homer-v1.0-Qwen2.5-72B", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7628 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.731 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4677 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-7B/9cf10c60-bee1-4f4f-9e03-c3c10287bded.json b/data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-7B/9cf10c60-bee1-4f4f-9e03-c3c10287bded.json new file mode 100644 index 000000000..c299cf546 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-v1.0-Qwen2.5-7B/9cf10c60-bee1-4f4f-9e03-c3c10287bded.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Homer-v1.0-Qwen2.5-7B", + "id": "newsbang/Homer-v1.0-Qwen2.5-7B", + "developer": "newsbang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6393 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3323 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3221 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4278 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4535 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nguyentd/FinancialAdvice-Qwen2.5-7B/8e92dd9e-a68c-46ef-9b03-955c06a21437.json b/data/hfopenllm_v2/nguyentd/FinancialAdvice-Qwen2.5-7B/8e92dd9e-a68c-46ef-9b03-955c06a21437.json new file mode 100644 index 000000000..332c8a2d4 --- /dev/null +++ b/data/hfopenllm_v2/nguyentd/FinancialAdvice-Qwen2.5-7B/8e92dd9e-a68c-46ef-9b03-955c06a21437.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nguyentd_FinancialAdvice-Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FinancialAdvice-Qwen2.5-7B", + "id": "nguyentd/FinancialAdvice-Qwen2.5-7B", + "developer": "nguyentd", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4731 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ngxson/MiniThinky-1B-Llama-3.2/dd1139d8-2b44-4516-b24a-1219826f5482.json b/data/hfopenllm_v2/ngxson/MiniThinky-1B-Llama-3.2/dd1139d8-2b44-4516-b24a-1219826f5482.json new file mode 100644 index 000000000..2e0f2504c --- /dev/null +++ b/data/hfopenllm_v2/ngxson/MiniThinky-1B-Llama-3.2/dd1139d8-2b44-4516-b24a-1219826f5482.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-1B-Llama-3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniThinky-1B-Llama-3.2", + "id": "ngxson/MiniThinky-1B-Llama-3.2", + "developer": "ngxson", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ngxson/MiniThinky-v2-1B-Llama-3.2/e37e86f7-b67b-4f0a-b1bd-92f30842b303.json b/data/hfopenllm_v2/ngxson/MiniThinky-v2-1B-Llama-3.2/e37e86f7-b67b-4f0a-b1bd-92f30842b303.json new file mode 100644 index 000000000..34a2fa568 --- /dev/null +++ b/data/hfopenllm_v2/ngxson/MiniThinky-v2-1B-Llama-3.2/e37e86f7-b67b-4f0a-b1bd-92f30842b303.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-v2-1B-Llama-3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniThinky-v2-1B-Llama-3.2", + "id": "ngxson/MiniThinky-v2-1B-Llama-3.2", + "developer": "ngxson", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2963 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json b/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json deleted file mode 100644 index 6ad23d4e3..000000000 --- a/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_Delirium-v1_1030_0227/1762652580.4055", - "retrieved_timestamp": "1762652580.4055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nhyha/N3N_Delirium-v1_1030_0227", - "developer": "nhyha", - "inference_platform": "unknown", - "id": "nhyha/N3N_Delirium-v1_1030_0227", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8022890375315275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890686677822234 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40981249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41497672872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/bc3b55d5-35ca-48b5-832e-8544e145b1b1.json b/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/bc3b55d5-35ca-48b5-832e-8544e145b1b1.json new file mode 100644 index 000000000..c0a263780 --- /dev/null +++ b/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/bc3b55d5-35ca-48b5-832e-8544e145b1b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_Delirium-v1_1030_0227/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "N3N_Delirium-v1_1030_0227", + "id": "nhyha/N3N_Delirium-v1_1030_0227", + "developer": "nhyha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8023 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5891 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.415 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/5757cd3d-c64e-4743-8200-5e610e24bf95.json b/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/5757cd3d-c64e-4743-8200-5e610e24bf95.json new file mode 100644 index 000000000..0ece90da0 --- /dev/null +++ b/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/5757cd3d-c64e-4743-8200-5e610e24bf95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "N3N_Llama-3.1-8B-Instruct_1028_0216", + "id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", + "developer": "nhyha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.405 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3638 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json b/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json deleted file mode 100644 index 1d339f07c..000000000 --- a/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/1762652580.405756", - "retrieved_timestamp": "1762652580.405757", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", - "developer": "nhyha", - "inference_platform": "unknown", - "id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4796063334175543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5053741309920361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40503125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36377992021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241029_1532/ae8cd3ad-ce7b-41f4-8e4a-f11002af2e58.json b/data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241029_1532/ae8cd3ad-ce7b-41f4-8e4a-f11002af2e58.json new file mode 100644 index 000000000..9ba31eded --- /dev/null +++ b/data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241029_1532/ae8cd3ad-ce7b-41f4-8e4a-f11002af2e58.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241029_1532/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "N3N_gemma-2-9b-it_20241029_1532", + "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532", + "developer": "nhyha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6752 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5863 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241110_2026/bee54048-ebb2-4051-a18f-aa85b0f2ce27.json b/data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241110_2026/bee54048-ebb2-4051-a18f-aa85b0f2ce27.json new file mode 100644 index 000000000..3720f36fb --- /dev/null +++ b/data/hfopenllm_v2/nhyha/N3N_gemma-2-9b-it_20241110_2026/bee54048-ebb2-4051-a18f-aa85b0f2ce27.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241110_2026/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "N3N_gemma-2-9b-it_20241110_2026", + "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026", + "developer": "nhyha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6283 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5867 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/2f98c85b-5a2e-467e-9626-b1bdefe7bdd7.json b/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/2f98c85b-5a2e-467e-9626-b1bdefe7bdd7.json new file mode 100644 index 000000000..f76d4c073 --- /dev/null +++ b/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/2f98c85b-5a2e-467e-9626-b1bdefe7bdd7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merge_Qwen2.5-7B-Instruct_20241023_0314", + "id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", + "developer": "nhyha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4251 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4542 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json b/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json deleted file mode 100644 index 5c2afd856..000000000 --- a/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/1762652580.406431", - "retrieved_timestamp": "1762652580.406431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", - "developer": "nhyha", - "inference_platform": "unknown", - "id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5694568190179834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558529241660143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45420545212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/nidum/Nidum-Limitless-Gemma-2B/2c530a3b-888e-4a61-b97b-ea875b30ec9c.json b/data/hfopenllm_v2/nidum/Nidum-Limitless-Gemma-2B/2c530a3b-888e-4a61-b97b-ea875b30ec9c.json new file mode 100644 index 000000000..e6ba40bf6 --- /dev/null +++ b/data/hfopenllm_v2/nidum/Nidum-Limitless-Gemma-2B/2c530a3b-888e-4a61-b97b-ea875b30ec9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nidum_Nidum-Limitless-Gemma-2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nidum-Limitless-Gemma-2B", + "id": "nidum/Nidum-Limitless-Gemma-2B", + "developer": "nidum", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nisten/franqwenstein-35b/4c9fb322-735e-4644-8121-088d00f78c5f.json b/data/hfopenllm_v2/nisten/franqwenstein-35b/4c9fb322-735e-4644-8121-088d00f78c5f.json new file mode 100644 index 000000000..d683ac73a --- /dev/null +++ b/data/hfopenllm_v2/nisten/franqwenstein-35b/4c9fb322-735e-4644-8121-088d00f78c5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "franqwenstein-35b", + "id": "nisten/franqwenstein-35b", + "developer": "nisten", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 34.714 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6647 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5731 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nisten/franqwenstein-35b/e7e7733f-682b-4e68-8f07-85f3ba7a7ae1.json b/data/hfopenllm_v2/nisten/franqwenstein-35b/e7e7733f-682b-4e68-8f07-85f3ba7a7ae1.json new file mode 100644 index 000000000..12f77c7a5 --- /dev/null +++ b/data/hfopenllm_v2/nisten/franqwenstein-35b/e7e7733f-682b-4e68-8f07-85f3ba7a7ae1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "franqwenstein-35b", + "id": "nisten/franqwenstein-35b", + "developer": "nisten", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 34.714 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6591 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4681 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5611 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nisten/tqwendo-36b/e9a4e1e2-bd55-4c3d-99eb-8fafd8f6ec44.json b/data/hfopenllm_v2/nisten/tqwendo-36b/e9a4e1e2-bd55-4c3d-99eb-8fafd8f6ec44.json new file mode 100644 index 000000000..5712a649d --- /dev/null +++ b/data/hfopenllm_v2/nisten/tqwendo-36b/e9a4e1e2-bd55-4c3d-99eb-8fafd8f6ec44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nisten_tqwendo-36b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tqwendo-36b", + "id": "nisten/tqwendo-36b", + "developer": "nisten", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 35.69 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6432 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4154 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/42ed92b3-63bc-4fa1-bc16-c19bfb73368f.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/42ed92b3-63bc-4fa1-bc16-c19bfb73368f.json new file mode 100644 index 000000000..b018debaf --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/42ed92b3-63bc-4fa1-bc16-c19bfb73368f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lion-Lamarck-v.1.0.8", + "id": "nlpguy/Lion-Lamarck-v.1.0.8", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5869 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4643 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json deleted file mode 100644 index 4a721b3ca..000000000 --- a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.8/1762652580.40752", - "retrieved_timestamp": "1762652580.407521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Lion-Lamarck-v.1.0.8", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Lion-Lamarck-v.1.0.8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45090471061228654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5868930914775694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46434507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/915ae579-786a-4eb2-a1bb-107a12c9c40d.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/915ae579-786a-4eb2-a1bb-107a12c9c40d.json new file mode 100644 index 000000000..e62e07a5e --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/915ae579-786a-4eb2-a1bb-107a12c9c40d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lion-Lamarck-v.1.0.9", + "id": "nlpguy/Lion-Lamarck-v.1.0.9", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5918 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.53 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4704 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json deleted file mode 100644 index ba0c0e63c..000000000 --- a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.9/1762652580.407768", - "retrieved_timestamp": "1762652580.4077692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Lion-Lamarck-v.1.0.9", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Lion-Lamarck-v.1.0.9", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34089549063152436 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5918237099420903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5641993957703928 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47041223404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json deleted file mode 100644 index efc7551e7..000000000 --- a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.1.0/1762652580.4079711", - "retrieved_timestamp": "1762652580.4079711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Lion-Lamarck-v.1.1.0", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Lion-Lamarck-v.1.1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657750324694034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5962460968547941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630984042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/3489ffea-a607-4f3d-a0c2-bd17147f244f.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/3489ffea-a607-4f3d-a0c2-bd17147f244f.json new file mode 100644 index 000000000..4a3058b4b --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/3489ffea-a607-4f3d-a0c2-bd17147f244f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lion-Lamarck-v.1.1.0", + "id": "nlpguy/Lion-Lamarck-v.1.1.0", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5325 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4631 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Miisce-one/7b5ba8a8-16c3-4169-b97d-13dd5d4f8395.json b/data/hfopenllm_v2/nlpguy/Miisce-one/7b5ba8a8-16c3-4169-b97d-13dd5d4f8395.json new file mode 100644 index 000000000..39e5855db --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Miisce-one/7b5ba8a8-16c3-4169-b97d-13dd5d4f8395.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Miisce-one/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Miisce-one", + "id": "nlpguy/Miisce-one", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json b/data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json deleted file mode 100644 index 0335e04de..000000000 --- a/data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_Miisce-one/1762652580.4081762", - "retrieved_timestamp": "1762652580.408177", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/Miisce-one", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Miisce-one", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6065761069517768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504562869685913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4169184290030212 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48198958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/6411c44a-b2b3-4fe3-8ba4-9422a0a0b31e.json b/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/6411c44a-b2b3-4fe3-8ba4-9422a0a0b31e.json new file mode 100644 index 000000000..823d8c02e --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/6411c44a-b2b3-4fe3-8ba4-9422a0a0b31e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-NeMo-Minitron-Upscale-v1", + "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.451 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/fe344f84-7428-45af-940f-736275bc4d50.json b/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/fe344f84-7428-45af-940f-736275bc4d50.json new file mode 100644 index 000000000..5887f8230 --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/fe344f84-7428-45af-940f-736275bc4d50.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-NeMo-Minitron-Upscale-v2", + "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.451 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1573 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3791 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1927 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/60956ea2-8b0b-4e4b-801a-d0689f9d46f4.json b/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/60956ea2-8b0b-4e4b-801a-d0689f9d46f4.json new file mode 100644 index 000000000..f652b2940 --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/60956ea2-8b0b-4e4b-801a-d0689f9d46f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-NeMo-Minitron-Upscale-v3", + "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.451 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/StableProse/1ad54bdc-419a-4dd9-9fbb-d7b7ee7038d1.json b/data/hfopenllm_v2/nlpguy/StableProse/1ad54bdc-419a-4dd9-9fbb-d7b7ee7038d1.json new file mode 100644 index 000000000..7c16d0d96 --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/StableProse/1ad54bdc-419a-4dd9-9fbb-d7b7ee7038d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_StableProse/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StableProse", + "id": "nlpguy/StableProse", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1972 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5117 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4067 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3468 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json b/data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json deleted file mode 100644 index 85b6fc8b0..000000000 --- a/data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_StableProse/1762652580.40907", - "retrieved_timestamp": "1762652580.40907", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/StableProse", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/StableProse", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19723888172271792 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5116558625577087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468251329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json b/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json deleted file mode 100644 index d17af3398..000000000 --- a/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nlpguy_StarFusion-alpha1/1762652580.409272", - "retrieved_timestamp": "1762652580.409272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nlpguy/StarFusion-alpha1", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/StarFusion-alpha1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5660092997690572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428694115507034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3190658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/2ab375f0-2477-48a5-a5d9-0b5d0d7d0a84.json b/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/2ab375f0-2477-48a5-a5d9-0b5d0d7d0a84.json new file mode 100644 index 000000000..3dae9ae39 --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/2ab375f0-2477-48a5-a5d9-0b5d0d7d0a84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nlpguy_StarFusion-alpha1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StarFusion-alpha1", + "id": "nlpguy/StarFusion-alpha1", + "developer": "nlpguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.566 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e0525a52-d38c-4b2f-b59b-048b4bf71cb2.json b/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e0525a52-d38c-4b2f-b59b-048b4bf71cb2.json new file mode 100644 index 000000000..d9ea08253 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e0525a52-d38c-4b2f-b59b-048b4bf71cb2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_Llama-3.2-4x3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-4x3B-Instruct", + "id": "noname0202/Llama-3.2-4x3B-Instruct", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 9.949 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7067 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1586 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json b/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json deleted file mode 100644 index 2135d9813..000000000 --- a/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_Llama-3.2-4x3B-Instruct/1762652580.409481", - "retrieved_timestamp": "1762652580.409481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/Llama-3.2-4x3B-Instruct", - "developer": "noname0202", - "inference_platform": "unknown", - "id": "noname0202/Llama-3.2-4x3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 9.949 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7067181744438091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4647311192852755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36739583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/noname0202/gemma-2-2b-it-ties/01bc964f-552b-4cda-9ed0-cf720f0c8de4.json b/data/hfopenllm_v2/noname0202/gemma-2-2b-it-ties/01bc964f-552b-4cda-9ed0-cf720f0c8de4.json new file mode 100644 index 000000000..51a2199d0 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/gemma-2-2b-it-ties/01bc964f-552b-4cda-9ed0-cf720f0c8de4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-2b-it-ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-it-ties", + "id": "noname0202/gemma-2-2b-it-ties", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1266 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4206 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3929 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2561 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json deleted file mode 100644 index 35a3b17ed..000000000 --- a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v1/1762652580.410035", - "retrieved_timestamp": "1762652580.410036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", - "developer": "noname0202", - "inference_platform": "unknown", - "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29880494864736673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4519290530910057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40801041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3125 - } - } - ] -} diff --git a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/c9e95c55-978e-485b-8a77-ab2e668e3254.json b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/c9e95c55-978e-485b-8a77-ab2e668e3254.json new file mode 100644 index 000000000..088f2166d --- /dev/null +++ b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/c9e95c55-978e-485b-8a77-ab2e668e3254.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-sft-jp-en-zh-v1", + "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2988 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/c71c606b-ccb7-48e9-a6c8-b72205ec6c06.json b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/c71c606b-ccb7-48e9-a6c8-b72205ec6c06.json new file mode 100644 index 000000000..26c8c9012 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/c71c606b-ccb7-48e9-a6c8-b72205ec6c06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-sft-jp-en-zh-v2", + "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3612 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json deleted file mode 100644 index ca0953615..000000000 --- a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v2/1762652580.4102452", - "retrieved_timestamp": "1762652580.4102452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", - "developer": "noname0202", - "inference_platform": "unknown", - "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993470657854493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515041184509401 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36751994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/noname0202/llama-math-1b-r16-0to512tokens-test/ae1801cb-d112-4d1a-895d-c6743779846a.json b/data/hfopenllm_v2/noname0202/llama-math-1b-r16-0to512tokens-test/ae1801cb-d112-4d1a-895d-c6743779846a.json new file mode 100644 index 000000000..ff9746f84 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/llama-math-1b-r16-0to512tokens-test/ae1801cb-d112-4d1a-895d-c6743779846a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r16-0to512tokens-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-math-1b-r16-0to512tokens-test", + "id": "noname0202/llama-math-1b-r16-0to512tokens-test", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.547 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3143 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1728 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/llama-math-1b-r32-0to512tokens-test/008e3601-dfc4-4bc1-bf8b-f5cef43ae098.json b/data/hfopenllm_v2/noname0202/llama-math-1b-r32-0to512tokens-test/008e3601-dfc4-4bc1-bf8b-f5cef43ae098.json new file mode 100644 index 000000000..d00eac878 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/llama-math-1b-r32-0to512tokens-test/008e3601-dfc4-4bc1-bf8b-f5cef43ae098.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-0to512tokens-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-math-1b-r32-0to512tokens-test", + "id": "noname0202/llama-math-1b-r32-0to512tokens-test", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5683 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3495 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0906 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3209 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.176 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/llama-math-1b-r32-test/379b315d-96fb-4edb-b2d6-3dc113a10c17.json b/data/hfopenllm_v2/noname0202/llama-math-1b-r32-test/379b315d-96fb-4edb-b2d6-3dc113a10c17.json new file mode 100644 index 000000000..442e75821 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/llama-math-1b-r32-test/379b315d-96fb-4edb-b2d6-3dc113a10c17.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-math-1b-r32-test", + "id": "noname0202/llama-math-1b-r32-test", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5819 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1781 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/llama-math-1b-r8-512tokens-test/8cd36aa1-6f87-4d4d-a1bf-adc87e0a26c6.json b/data/hfopenllm_v2/noname0202/llama-math-1b-r8-512tokens-test/8cd36aa1-6f87-4d4d-a1bf-adc87e0a26c6.json new file mode 100644 index 000000000..982f70f06 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/llama-math-1b-r8-512tokens-test/8cd36aa1-6f87-4d4d-a1bf-adc87e0a26c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r8-512tokens-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-math-1b-r8-512tokens-test", + "id": "noname0202/llama-math-1b-r8-512tokens-test", + "developer": "noname0202", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5792 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1753 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json b/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json deleted file mode 100644 index 7c9745909..000000000 --- a/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/1762652580.4113228", - "retrieved_timestamp": "1762652580.4113238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", - "developer": "notbdq", - "inference_platform": "unknown", - "id": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8413564896696322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6198222551365405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4849567819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/f76ce244-29f7-44f0-9850-7291f8e4cbf1.json b/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/f76ce244-29f7-44f0-9850-7291f8e4cbf1.json new file mode 100644 index 000000000..92f477fa6 --- /dev/null +++ b/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/f76ce244-29f7-44f0-9850-7291f8e4cbf1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", + "id": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", + "developer": "notbdq", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8414 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6198 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.485 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/506871f1-0c87-4e8c-a270-eed7b5da2599.json b/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/506871f1-0c87-4e8c-a270-eed7b5da2599.json new file mode 100644 index 000000000..6a557a511 --- /dev/null +++ b/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/506871f1-0c87-4e8c-a270-eed7b5da2599.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nothingiisreal_L3.1-8B-Celeste-V1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-8B-Celeste-V1.5", + "id": "nothingiisreal/L3.1-8B-Celeste-V1.5", + "developer": "nothingiisreal", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5012 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3749 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json b/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json deleted file mode 100644 index 812d3c5c9..000000000 --- a/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nothingiisreal_L3.1-8B-Celeste-V1.5/1762652580.4115741", - "retrieved_timestamp": "1762652580.411575", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nothingiisreal/L3.1-8B-Celeste-V1.5", - "developer": "nothingiisreal", - "inference_platform": "unknown", - "id": "nothingiisreal/L3.1-8B-Celeste-V1.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326715337526651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5011796822721141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37486458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37042885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json deleted file mode 100644 index a71b908f9..000000000 --- a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v2/1762652580.411832", - "retrieved_timestamp": "1762652580.411832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nothingiisreal/MN-12B-Starcannon-v2", - "developer": "nothingiisreal", - "inference_platform": "unknown", - "id": "nothingiisreal/MN-12B-Starcannon-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3925273828995953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5004499888471767 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39781249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31283244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/c20264fd-b1f9-4e0f-9f6e-1d58f1c18cda.json b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/c20264fd-b1f9-4e0f-9f6e-1d58f1c18cda.json new file mode 100644 index 000000000..b14841c8a --- /dev/null +++ b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/c20264fd-b1f9-4e0f-9f6e-1d58f1c18cda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Starcannon-v2", + "id": "nothingiisreal/MN-12B-Starcannon-v2", + "developer": "nothingiisreal", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0597 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/59f14dca-923a-41f1-b443-cc3551063f45.json b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/59f14dca-923a-41f1-b443-cc3551063f45.json new file mode 100644 index 000000000..9f9ddf54f --- /dev/null +++ b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/59f14dca-923a-41f1-b443-cc3551063f45.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MN-12B-Starcannon-v3", + "id": "nothingiisreal/MN-12B-Starcannon-v3", + "developer": "nothingiisreal", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5171 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4046 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json deleted file mode 100644 index c6ec8add6..000000000 --- a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v3/1762652580.412042", - "retrieved_timestamp": "1762652580.412042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nothingiisreal/MN-12B-Starcannon-v3", - "developer": "nothingiisreal", - "inference_platform": "unknown", - "id": "nothingiisreal/MN-12B-Starcannon-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38073755413414184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170553444795719 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40463541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32646276595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a1ba054f-b0a1-4827-b7ea-3988aa4cf1f1.json b/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a1ba054f-b0a1-4827-b7ea-3988aa4cf1f1.json new file mode 100644 index 000000000..e9741ba03 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a1ba054f-b0a1-4827-b7ea-3988aa4cf1f1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceInstruct-1.5B", + "id": "nvidia/AceInstruct-1.5B", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3948 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3932 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json b/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json deleted file mode 100644 index 20f4a7fbe..000000000 --- a/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-1.5B/1762652580.412246", - "retrieved_timestamp": "1762652580.412247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceInstruct-1.5B", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceInstruct-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3947758613811354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931958135346713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34600000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2573969414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json b/data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json deleted file mode 100644 index efc6313bd..000000000 --- a/data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-72B/1762652580.4124959", - "retrieved_timestamp": "1762652580.4124968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceInstruct-72B", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceInstruct-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.711888899231816 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6139041785911337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6261329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48736702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-72B/51d8f53f-ad7e-4dae-9e2a-0895729ff790.json b/data/hfopenllm_v2/nvidia/AceInstruct-72B/51d8f53f-ad7e-4dae-9e2a-0895729ff790.json new file mode 100644 index 000000000..589e694b8 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceInstruct-72B/51d8f53f-ad7e-4dae-9e2a-0895729ff790.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceInstruct-72B", + "id": "nvidia/AceInstruct-72B", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6139 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3213 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4206 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4874 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-7B/421119ea-0da8-4b26-a335-f2e720618c44.json b/data/hfopenllm_v2/nvidia/AceInstruct-7B/421119ea-0da8-4b26-a335-f2e720618c44.json new file mode 100644 index 000000000..50b4c8bff --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceInstruct-7B/421119ea-0da8-4b26-a335-f2e720618c44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceInstruct-7B", + "id": "nvidia/AceInstruct-7B", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5501 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4177 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json b/data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json deleted file mode 100644 index a540d9948..000000000 --- a/data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-7B/1762652580.412692", - "retrieved_timestamp": "1762652580.412693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceInstruct-7B", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceInstruct-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422290633297429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.550118130896558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4255 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json b/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json deleted file mode 100644 index a11edde87..000000000 --- a/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-1.5B-Instruct/1762652580.412895", - "retrieved_timestamp": "1762652580.412896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceMath-1.5B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-1.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32123654126606294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4024301274933693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20636635638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/b0e6bfb2-a8d4-4b1d-859a-aa821f646e57.json b/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/b0e6bfb2-a8d4-4b1d-859a-aa821f646e57.json new file mode 100644 index 000000000..cd6ae73f6 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/b0e6bfb2-a8d4-4b1d-859a-aa821f646e57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-1.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-1.5B-Instruct", + "id": "nvidia/AceMath-1.5B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4024 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2064 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json b/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json deleted file mode 100644 index 6a4f8fb07..000000000 --- a/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-Instruct/1762652580.413093", - "retrieved_timestamp": "1762652580.4130938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceMath-72B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-72B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.494993284485166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.640215611099268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7145015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44107380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/7c4c2ccf-7d7b-4d24-802e-20c182290d07.json b/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/7c4c2ccf-7d7b-4d24-802e-20c182290d07.json new file mode 100644 index 000000000..8c57ec762 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/7c4c2ccf-7d7b-4d24-802e-20c182290d07.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-72B-Instruct", + "id": "nvidia/AceMath-72B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.495 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6402 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4062 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4411 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json b/data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json deleted file mode 100644 index 171eb6815..000000000 --- a/data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-RM/1762652580.413297", - "retrieved_timestamp": "1762652580.413298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceMath-72B-RM", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-72B-RM", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForSequenceClassification", - "params_billions": 71.461 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14125963554479892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2717426350897727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceMath-72B-RM/95212a55-f382-4869-9e11-cfa201ba865b.json b/data/hfopenllm_v2/nvidia/AceMath-72B-RM/95212a55-f382-4869-9e11-cfa201ba865b.json new file mode 100644 index 000000000..a94d6d7b0 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-72B-RM/95212a55-f382-4869-9e11-cfa201ba865b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-RM/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-72B-RM", + "id": "nvidia/AceMath-72B-RM", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForSequenceClassification", + "params_billions": 71.461 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2717 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2341 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/a7da2118-063c-489f-bb31-40f1b7beeefe.json b/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/a7da2118-063c-489f-bb31-40f1b7beeefe.json new file mode 100644 index 000000000..66e2a33e8 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/a7da2118-063c-489f-bb31-40f1b7beeefe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-7B-Instruct", + "id": "nvidia/AceMath-7B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4994 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6337 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4193 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json b/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json deleted file mode 100644 index 50b14fbf0..000000000 --- a/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-Instruct/1762652580.413503", - "retrieved_timestamp": "1762652580.413504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceMath-7B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317756885064964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49938547326244365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834773936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/AceMath-7B-RM/9a75ae18-8f9a-40a5-8a7b-0c38df34e9dd.json b/data/hfopenllm_v2/nvidia/AceMath-7B-RM/9a75ae18-8f9a-40a5-8a7b-0c38df34e9dd.json new file mode 100644 index 000000000..9eb39d05c --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-7B-RM/9a75ae18-8f9a-40a5-8a7b-0c38df34e9dd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-RM/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AceMath-7B-RM", + "id": "nvidia/AceMath-7B-RM", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForSequenceClassification", + "params_billions": 7.071 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1494 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2423 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json b/data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json deleted file mode 100644 index 427f94913..000000000 --- a/data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-RM/1762652580.4138508", - "retrieved_timestamp": "1762652580.413853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/AceMath-7B-RM", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-7B-RM", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForSequenceClassification", - "params_billions": 7.071 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14937809456686035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2422689292768334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35800000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11386303191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json deleted file mode 100644 index 1a859a04e..000000000 --- a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Base/1762652580.4142", - "retrieved_timestamp": "1762652580.4142022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Hymba-1.5B-Base", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Hymba-1.5B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "HymbaForCausalLM", - "params_billions": 1.523 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2295121389025563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32564785214182224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19223736702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/a85d4a1f-fbd9-4d21-9700-9e55e30c1391.json b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/a85d4a1f-fbd9-4d21-9700-9e55e30c1391.json new file mode 100644 index 000000000..f9cc991e3 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/a85d4a1f-fbd9-4d21-9700-9e55e30c1391.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hymba-1.5B-Base", + "id": "nvidia/Hymba-1.5B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "HymbaForCausalLM", + "params_billions": 1.523 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2295 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3256 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1922 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/2fd1c45e-209c-43da-ae85-d60887513a96.json b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/2fd1c45e-209c-43da-ae85-d60887513a96.json new file mode 100644 index 000000000..9057e3697 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/2fd1c45e-209c-43da-ae85-d60887513a96.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hymba-1.5B-Instruct", + "id": "nvidia/Hymba-1.5B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "HymbaForCausalLM", + "params_billions": 1.523 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6009 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3067 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.204 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json deleted file mode 100644 index 127002d92..000000000 --- a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Instruct/1762652580.414529", - "retrieved_timestamp": "1762652580.41453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Hymba-1.5B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Hymba-1.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "HymbaForCausalLM", - "params_billions": 1.523 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6009055971488984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067133908231881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33158333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20403922872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Llama-3.1-Minitron-4B-Depth-Base/91e0e6aa-b933-4a02-a28d-8d69e698c60a.json b/data/hfopenllm_v2/nvidia/Llama-3.1-Minitron-4B-Depth-Base/91e0e6aa-b933-4a02-a28d-8d69e698c60a.json new file mode 100644 index 000000000..cafbeee97 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Llama-3.1-Minitron-4B-Depth-Base/91e0e6aa-b933-4a02-a28d-8d69e698c60a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Minitron-4B-Depth-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Minitron-4B-Depth-Base", + "id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.02 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1607 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4011 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json b/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json deleted file mode 100644 index ac330a38d..000000000 --- a/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/1762652580.415039", - "retrieved_timestamp": "1762652580.41504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7380672172059026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6316000668895038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42673716012084595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4327604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49185505319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/6f3f3d06-2937-4c55-9b95-a62ae5253571.json b/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/6f3f3d06-2937-4c55-9b95-a62ae5253571.json new file mode 100644 index 000000000..666f668f2 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/6f3f3d06-2937-4c55-9b95-a62ae5253571.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-Nemotron-70B-Instruct-HF", + "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4267 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4919 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Minitron-4B-Base/9b3ffdd3-ac18-4084-9e83-1bfc61db0ec2.json b/data/hfopenllm_v2/nvidia/Minitron-4B-Base/9b3ffdd3-ac18-4084-9e83-1bfc61db0ec2.json new file mode 100644 index 000000000..2e2aa65ef --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Minitron-4B-Base/9b3ffdd3-ac18-4084-9e83-1bfc61db0ec2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Minitron-4B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minitron-4B-Base", + "id": "nvidia/Minitron-4B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2218 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4134 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.262 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json b/data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json deleted file mode 100644 index ceffb3fbf..000000000 --- a/data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Minitron-4B-Base/1762652580.415251", - "retrieved_timestamp": "1762652580.415252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Minitron-4B-Base", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Minitron-4B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2217937295265451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4083876243992497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.261968085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json b/data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json deleted file mode 100644 index f659cf81f..000000000 --- a/data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Minitron-8B-Base/1762652580.415456", - "retrieved_timestamp": "1762652580.415456", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Minitron-8B-Base", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Minitron-8B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 7.22 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24242676099416216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43950631883576047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40255208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Minitron-8B-Base/60077cbd-87af-4a00-a359-9235acb011ed.json b/data/hfopenllm_v2/nvidia/Minitron-8B-Base/60077cbd-87af-4a00-a359-9235acb011ed.json new file mode 100644 index 000000000..e6699dedf --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Minitron-8B-Base/60077cbd-87af-4a00-a359-9235acb011ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Minitron-8B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minitron-8B-Base", + "id": "nvidia/Minitron-8B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 7.22 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4395 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3181 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Base/577936a8-b450-4233-b633-064565b3d1a4.json b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Base/577936a8-b450-4233-b633-064565b3d1a4.json new file mode 100644 index 000000000..b690a4bd6 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Base/577936a8-b450-4233-b633-064565b3d1a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-NeMo-Minitron-8B-Base", + "id": "nvidia/Mistral-NeMo-Minitron-8B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.88 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4092 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3796 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/470b9413-2cc8-4bf4-9e7c-0b8e99929568.json b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/470b9413-2cc8-4bf4-9e7c-0b8e99929568.json new file mode 100644 index 000000000..964990218 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/470b9413-2cc8-4bf4-9e7c-0b8e99929568.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-NeMo-Minitron-8B-Instruct", + "id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5004 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5321 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json deleted file mode 100644 index d338c4fbd..000000000 --- a/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Instruct/1762652580.415967", - "retrieved_timestamp": "1762652580.415968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5003889679384035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5320919605840294 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38857291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39910239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/3cbf9c73-0dc8-402e-bc94-c6d52b9f1af7.json b/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/3cbf9c73-0dc8-402e-bc94-c6d52b9f1af7.json new file mode 100644 index 000000000..95835d5c7 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/3cbf9c73-0dc8-402e-bc94-c6d52b9f1af7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_Nemotron-Mini-4B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nemotron-Mini-4B-Instruct", + "id": "nvidia/Nemotron-Mini-4B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json b/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json deleted file mode 100644 index 71e140cef..000000000 --- a/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nvidia_Nemotron-Mini-4B-Instruct/1762652580.41618", - "retrieved_timestamp": "1762652580.416181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nvidia/Nemotron-Mini-4B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Nemotron-Mini-4B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6668761109411916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3864840798591535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26263297872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/nvidia/OpenMath2-Llama3.1-8B/3fccb1d0-5ae1-427a-adae-37004ecbacaa.json b/data/hfopenllm_v2/nvidia/OpenMath2-Llama3.1-8B/3fccb1d0-5ae1-427a-adae-37004ecbacaa.json new file mode 100644 index 000000000..2fec4ab85 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/OpenMath2-Llama3.1-8B/3fccb1d0-5ae1-427a-adae-37004ecbacaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nvidia_OpenMath2-Llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenMath2-Llama3.1-8B", + "id": "nvidia/OpenMath2-Llama3.1-8B", + "developer": "nvidia", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2674 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1553 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json b/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json deleted file mode 100644 index b14c96dfd..000000000 --- a/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/nxmwxm_Beast-Soul-new/1762652580.416598", - "retrieved_timestamp": "1762652580.416599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "nxmwxm/Beast-Soul-new", - "developer": "nxmwxm", - "inference_platform": "unknown", - "id": "nxmwxm/Beast-Soul-new", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48687482546310457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227143628884523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3101728723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/6463183f-4043-4b96-b4d1-0bd41b4d6876.json b/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/6463183f-4043-4b96-b4d1-0bd41b4d6876.json new file mode 100644 index 000000000..63096f4f3 --- /dev/null +++ b/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/6463183f-4043-4b96-b4d1-0bd41b4d6876.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/nxmwxm_Beast-Soul-new/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Beast-Soul-new", + "id": "nxmwxm/Beast-Soul-new", + "developer": "nxmwxm", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4869 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5227 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3102 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/0b102423-1a06-4e5b-a287-710695658b63.json b/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/0b102423-1a06-4e5b-a287-710695658b63.json new file mode 100644 index 000000000..14b07b1d6 --- /dev/null +++ b/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/0b102423-1a06-4e5b-a287-710695658b63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/occiglot_occiglot-7b-es-en-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "occiglot-7b-es-en-instruct", + "id": "occiglot/occiglot-7b-es-en-instruct", + "developer": "occiglot", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3485 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json b/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json deleted file mode 100644 index 784795149..000000000 --- a/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/occiglot_occiglot-7b-es-en-instruct/1762652580.416852", - "retrieved_timestamp": "1762652580.416853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "occiglot/occiglot-7b-es-en-instruct", - "developer": "occiglot", - "inference_platform": "unknown", - "id": "occiglot/occiglot-7b-es-en-instruct", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485141646387142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110970229781084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2310505319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json b/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json deleted file mode 100644 index 659ed5bd7..000000000 --- a/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/odyssey-labs_Astral-1-10B/1762652580.417092", - "retrieved_timestamp": "1762652580.417093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "odyssey-labs/Astral-1-10B", - "developer": "odyssey-labs", - "inference_platform": "unknown", - "id": "odyssey-labs/Astral-1-10B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38780657544204933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4872563924334199 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42797916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/b7e4ffd8-2a5a-4364-844a-a308dd7c899c.json b/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/b7e4ffd8-2a5a-4364-844a-a308dd7c899c.json new file mode 100644 index 000000000..81a00bf26 --- /dev/null +++ b/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/b7e4ffd8-2a5a-4364-844a-a308dd7c899c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/odyssey-labs_Astral-1-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Astral-1-10B", + "id": "odyssey-labs/Astral-1-10B", + "developer": "odyssey-labs", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3878 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4873 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2985 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/olabs-ai/reflection_model/3fa2e3ef-a375-4ca5-9f85-7cb986313d53.json b/data/hfopenllm_v2/olabs-ai/reflection_model/3fa2e3ef-a375-4ca5-9f85-7cb986313d53.json new file mode 100644 index 000000000..619a40e89 --- /dev/null +++ b/data/hfopenllm_v2/olabs-ai/reflection_model/3fa2e3ef-a375-4ca5-9f85-7cb986313d53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/olabs-ai_reflection_model/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "reflection_model", + "id": "olabs-ai/reflection_model", + "developer": "olabs-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4713 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3508 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json b/data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json deleted file mode 100644 index b8728bfd8..000000000 --- a/data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/olabs-ai_reflection_model/1762652580.417324", - "retrieved_timestamp": "1762652580.417325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "olabs-ai/reflection_model", - "developer": "olabs-ai", - "inference_platform": "unknown", - "id": "olabs-ai/reflection_model", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15986914719610634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4712508645838735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35083333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/abd48d9d-0443-40be-a23a-68922771e14f.json b/data/hfopenllm_v2/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/abd48d9d-0443-40be-a23a-68922771e14f.json new file mode 100644 index 000000000..03e54e74a --- /dev/null +++ b/data/hfopenllm_v2/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/abd48d9d-0443-40be-a23a-68922771e14f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama_3.2_1b-autoredteam_helpfulness-train", + "id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3115 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1132 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/436ff0a4-9907-4e56-a5f2-c97f1b13f81a.json b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/436ff0a4-9907-4e56-a5f2-c97f1b13f81a.json new file mode 100644 index 000000000..070fd99d6 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/436ff0a4-9907-4e56-a5f2-c97f1b13f81a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-3B-v1-AutoRedteam-Harmless-only", + "id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.776 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1525 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2315 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.11 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json deleted file mode 100644 index d394a69ea..000000000 --- a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/1762652580.418057", - "retrieved_timestamp": "1762652580.418057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.776 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.152475431854147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123669789182832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23154362416107382 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10995678191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/7a654100-b206-4011-828e-fb386df27d0c.json b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/7a654100-b206-4011-828e-fb386df27d0c.json new file mode 100644 index 000000000..20317204f --- /dev/null +++ b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/7a654100-b206-4011-828e-fb386df27d0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-3B-v1-AutoRedteam", + "id": "ontocord/RedPajama-3B-v1-AutoRedteam", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.776 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1343 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2424 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json deleted file mode 100644 index 3235ff7d5..000000000 --- a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam/1762652580.4178078", - "retrieved_timestamp": "1762652580.4178078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/RedPajama-3B-v1-AutoRedteam", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/RedPajama-3B-v1-AutoRedteam", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.776 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13434021729012352 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30256825198631376 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36606249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/2f0e262c-a099-41f4-89f1-8b251708a960.json b/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/2f0e262c-a099-41f4-89f1-8b251708a960.json new file mode 100644 index 000000000..b83d49b55 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/2f0e262c-a099-41f4-89f1-8b251708a960.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama3b_v1-autoredteam_helpfulness-train", + "id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.776 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2848 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1107 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json b/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json deleted file mode 100644 index 9f1d9605d..000000000 --- a/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/1762652580.4182642", - "retrieved_timestamp": "1762652580.418265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.776 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2847666414003732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30927408550278385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/7bf3e9ca-7d6f-4d43-b8fe-aceb8d60c7c6.json b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/7bf3e9ca-7d6f-4d43-b8fe-aceb8d60c7c6.json new file mode 100644 index 000000000..14485961f --- /dev/null +++ b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/7bf3e9ca-7d6f-4d43-b8fe-aceb8d60c7c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8-stack_2x/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merged_0.2_expert_0.8-stack_2x", + "id": "ontocord/merged_0.2_expert_0.8-stack_2x", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 6.512 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3541 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1103 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json deleted file mode 100644 index 243202b7e..000000000 --- a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8-stack_2x/1762652580.418678", - "retrieved_timestamp": "1762652580.418679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/merged_0.2_expert_0.8-stack_2x", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/merged_0.2_expert_0.8-stack_2x", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 6.512 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17960345217356613 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30061312694162695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11028922872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/8703dbdd-12ef-457b-8cda-f570c8f5c890.json b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/8703dbdd-12ef-457b-8cda-f570c8f5c890.json new file mode 100644 index 000000000..4f9707321 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/8703dbdd-12ef-457b-8cda-f570c8f5c890.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merged_0.2_expert_0.8", + "id": "ontocord/merged_0.2_expert_0.8", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1743 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3046 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json deleted file mode 100644 index a9d92aae0..000000000 --- a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8/1762652580.418474", - "retrieved_timestamp": "1762652580.418475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/merged_0.2_expert_0.8", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/merged_0.2_expert_0.8", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17425763640473943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3046000784127159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36206249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json b/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json deleted file mode 100644 index deae9eb59..000000000 --- a/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_merged_0.5_expert_0.5/1762652580.418875", - "retrieved_timestamp": "1762652580.418876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/merged_0.5_expert_0.5", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/merged_0.5_expert_0.5", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1787291054402319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017011118802398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35424999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d77f3e8f-1eea-478e-babd-ba873d2d427c.json b/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d77f3e8f-1eea-478e-babd-ba873d2d427c.json new file mode 100644 index 000000000..3c97d6dc5 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d77f3e8f-1eea-478e-babd-ba873d2d427c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_merged_0.5_expert_0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "merged_0.5_expert_0.5", + "id": "ontocord/merged_0.5_expert_0.5", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/783a4385-c802-4bb3-9a21-90629d16efc7.json b/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/783a4385-c802-4bb3-9a21-90629d16efc7.json new file mode 100644 index 000000000..b166ddfd7 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/783a4385-c802-4bb3-9a21-90629d16efc7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", + "id": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1318 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json b/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json deleted file mode 100644 index 74812a254..000000000 --- a/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/1762652580.419096", - "retrieved_timestamp": "1762652580.419096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13184240038652995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3004467893724157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json deleted file mode 100644 index 7f8db2550..000000000 --- a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1-instruct/1762652580.4195461", - "retrieved_timestamp": "1762652580.4195468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.888 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15302508455342934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2853913447506418 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/bb4ff51e-ce3a-42f5-871e-3e5e8977bc42.json b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/bb4ff51e-ce3a-42f5-871e-3e5e8977bc42.json new file mode 100644 index 000000000..3d829740d --- /dev/null +++ b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/bb4ff51e-ce3a-42f5-871e-3e5e8977bc42.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord_wide_7b-stacked-stage1-instruct", + "id": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.888 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2854 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3538 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json deleted file mode 100644 index 1f4473e9e..000000000 --- a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1/1762652580.41932", - "retrieved_timestamp": "1762652580.419321", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/ontocord_wide_7b-stacked-stage1", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/ontocord_wide_7b-stacked-stage1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.888 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14845388014911545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28965200351622594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11053856382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/e80d25b5-3f4b-45a7-9472-09f98db03bf0.json b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/e80d25b5-3f4b-45a7-9472-09f98db03bf0.json new file mode 100644 index 000000000..0a16462de --- /dev/null +++ b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/e80d25b5-3f4b-45a7-9472-09f98db03bf0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord_wide_7b-stacked-stage1", + "id": "ontocord/ontocord_wide_7b-stacked-stage1", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.888 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1485 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2897 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json b/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json deleted file mode 100644 index 1c96a4531..000000000 --- a/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_starcoder2-29b-ls/1762652580.419764", - "retrieved_timestamp": "1762652580.419765", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/starcoder2-29b-ls", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/starcoder2-29b-ls", - "additional_details": { - "precision": "float16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 29.009 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21492417895628046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37349755200329665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36999999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1869182180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/7fed0b1d-0d79-4784-8fd6-42f8611b1751.json b/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/7fed0b1d-0d79-4784-8fd6-42f8611b1751.json new file mode 100644 index 000000000..3cb91cf0f --- /dev/null +++ b/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/7fed0b1d-0d79-4784-8fd6-42f8611b1751.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_starcoder2-29b-ls/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "starcoder2-29b-ls", + "id": "ontocord/starcoder2-29b-ls", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 29.009 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2149 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3735 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1869 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json b/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json deleted file mode 100644 index b556455de..000000000 --- a/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_starcoder2_3b-AutoRedteam/1762652580.419971", - "retrieved_timestamp": "1762652580.4199722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/starcoder2_3b-AutoRedteam", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/starcoder2_3b-AutoRedteam", - "additional_details": { - "precision": "float16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 3.181 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15737133029251277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3497644619743598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13364361702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/be534cd3-8245-4370-ba6c-9687b431ee8d.json b/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/be534cd3-8245-4370-ba6c-9687b431ee8d.json new file mode 100644 index 000000000..ad59c53df --- /dev/null +++ b/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/be534cd3-8245-4370-ba6c-9687b431ee8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_starcoder2_3b-AutoRedteam/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "starcoder2_3b-AutoRedteam", + "id": "ontocord/starcoder2_3b-AutoRedteam", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 3.181 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1574 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3646 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1336 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json b/data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json deleted file mode 100644 index 86d91e97c..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-merge_test/1762652580.420181", - "retrieved_timestamp": "1762652580.420182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b-merge_test", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b-merge_test", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17628115622104903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011467446788138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10663231382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b-merge_test/e98967b7-3aff-4baa-92eb-eff86bf09797.json b/data/hfopenllm_v2/ontocord/wide_3b-merge_test/e98967b7-3aff-4baa-92eb-eff86bf09797.json new file mode 100644 index 000000000..e0c7c037c --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b-merge_test/e98967b7-3aff-4baa-92eb-eff86bf09797.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-merge_test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b-merge_test", + "id": "ontocord/wide_3b-merge_test", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3011 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1066 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/8736a22a-f980-4a01-953d-217f27050129.json b/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/8736a22a-f980-4a01-953d-217f27050129.json new file mode 100644 index 000000000..abe0c08b1 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/8736a22a-f980-4a01-953d-217f27050129.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b-stage1_shuf_sample1_jsonl-pretrained", + "id": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3632 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json b/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json deleted file mode 100644 index 7eadd46b3..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/1762652580.420386", - "retrieved_timestamp": "1762652580.420386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13946107439371977 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30036095049490824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36320833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11402925531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/75a2b5c9-7c73-4bb4-8e99-af4a3a27589d.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/75a2b5c9-7c73-4bb4-8e99-af4a3a27589d.json new file mode 100644 index 000000000..129aad1c1 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/75a2b5c9-7c73-4bb4-8e99-af4a3a27589d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", + "id": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3031 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json deleted file mode 100644 index 0ecbfb78d..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/1762652580.420605", - "retrieved_timestamp": "1762652580.420605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16636413604790845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30309127879396963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/0e0ebdc7-a5bd-4314-9bd7-fc8a11541a4e.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/0e0ebdc7-a5bd-4314-9bd7-fc8a11541a4e.json new file mode 100644 index 000000000..2b49af2c2 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/0e0ebdc7-a5bd-4314-9bd7-fc8a11541a4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", + "id": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1697 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2975 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json deleted file mode 100644 index bd2d68905..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/1762652580.420933", - "retrieved_timestamp": "1762652580.420937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16973629968483622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2975125970659158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json deleted file mode 100644 index 1c596fb7d..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/1762652580.4213939", - "retrieved_timestamp": "1762652580.4213948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14800396281865452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30953444521357315 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/f8579305-003b-4727-b904-bad4f363a616.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/f8579305-003b-4727-b904-bad4f363a616.json new file mode 100644 index 000000000..b29b83734 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/f8579305-003b-4727-b904-bad4f363a616.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3095 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3103f36a-4a88-4a39-8261-0b597f8d6db4.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3103f36a-4a88-4a39-8261-0b597f8d6db4.json new file mode 100644 index 000000000..e2c5d63f0 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3103f36a-4a88-4a39-8261-0b597f8d6db4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1237 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json deleted file mode 100644 index 5d9cd41c9..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/1762652580.4216871", - "retrieved_timestamp": "1762652580.421689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367407368005781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3060091508023586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json deleted file mode 100644 index 9649f586b..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/1762652580.42205", - "retrieved_timestamp": "1762652580.422051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1191527369601546 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2955590587949957 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11826795212765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/eda9de3b-ae53-4102-b203-eddadbc50464.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/eda9de3b-ae53-4102-b203-eddadbc50464.json new file mode 100644 index 000000000..d30f5ad9b --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/eda9de3b-ae53-4102-b203-eddadbc50464.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1192 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2956 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3553 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1183 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json deleted file mode 100644 index 635f88256..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1762652580.42265", - "retrieved_timestamp": "1762652580.4226508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1161551350416894 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3184343946486203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json deleted file mode 100644 index 5958d3c60..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1762652580.422383", - "retrieved_timestamp": "1762652580.422384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1128328390891723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171441625189962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26845637583892623 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11294880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7de4fa8-d97d-400f-bc3f-ecb1963a03ed.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7de4fa8-d97d-400f-bc3f-ecb1963a03ed.json new file mode 100644 index 000000000..9fef62a26 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7de4fa8-d97d-400f-bc3f-ecb1963a03ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.346 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/fa6ecaf9-457e-4135-ad25-4790ebc27737.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/fa6ecaf9-457e-4135-ad25-4790ebc27737.json new file mode 100644 index 000000000..053d679ba --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/fa6ecaf9-457e-4135-ad25-4790ebc27737.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/ebaa99c4-ff66-421d-8ba7-dae2c5fa274c.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/ebaa99c4-ff66-421d-8ba7-dae2c5fa274c.json new file mode 100644 index 000000000..4832c4eb6 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/ebaa99c4-ff66-421d-8ba7-dae2c5fa274c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1317 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1144 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json deleted file mode 100644 index 09d81183e..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/1762652580.422879", - "retrieved_timestamp": "1762652580.42288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13169279733329786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30640062669813056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34460416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11444481382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json deleted file mode 100644 index 8e92b7176..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/1762652580.4231439", - "retrieved_timestamp": "1762652580.423145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.118178654857999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3037498354512724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35669791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/e388c707-8b35-49a4-94eb-f32e983fe33e.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/e388c707-8b35-49a4-94eb-f32e983fe33e.json new file mode 100644 index 000000000..b73dce637 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/e388c707-8b35-49a4-94eb-f32e983fe33e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1182 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json deleted file mode 100644 index 12ea7e1f2..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/1762652580.423407", - "retrieved_timestamp": "1762652580.423407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12399876771410967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30324371251012056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/f6273192-31cf-4ee1-af45-c2f62de05330.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/f6273192-31cf-4ee1-af45-c2f62de05330.json new file mode 100644 index 000000000..f93ee1cb2 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/f6273192-31cf-4ee1-af45-c2f62de05330.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/105650e6-d9cf-4106-9d55-6f3c08f2f1cf.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/105650e6-d9cf-4106-9d55-6f3c08f2f1cf.json new file mode 100644 index 000000000..635e7e0ec --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/105650e6-d9cf-4106-9d55-6f3c08f2f1cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_math.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1298 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3052 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json deleted file mode 100644 index 1d91d4711..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/1762652580.423659", - "retrieved_timestamp": "1762652580.42366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12981888057022034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30518984588252307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json deleted file mode 100644 index 2b0bc53bd..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/1762652580.4239051", - "retrieved_timestamp": "1762652580.4239051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/a1d23749-40c0-4ccb-a104-bf0de63bc2bd.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/a1d23749-40c0-4ccb-a104-bf0de63bc2bd.json new file mode 100644 index 000000000..22c12a778 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/a1d23749-40c0-4ccb-a104-bf0de63bc2bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2049 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/4e4b4cf9-48d5-4ff6-92c0-1e9d7b874b6b.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/4e4b4cf9-48d5-4ff6-92c0-1e9d7b874b6b.json new file mode 100644 index 000000000..b7dea3b9d --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/4e4b4cf9-48d5-4ff6-92c0-1e9d7b874b6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1461 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2998 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1141 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json deleted file mode 100644 index 970bf2117..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/1762652580.42415", - "retrieved_timestamp": "1762652580.424151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.146105666298754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29981162881428614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39257291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1141123670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/3c4713a3-3973-4a04-9c4a-a6782251734e.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/3c4713a3-3973-4a04-9c4a-a6782251734e.json new file mode 100644 index 000000000..7106f073b --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/3c4713a3-3973-4a04-9c4a-a6782251734e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.2-ss1-expert_formatted_text", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3069 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1146 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json deleted file mode 100644 index f1dbba082..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/1762652580.424435", - "retrieved_timestamp": "1762652580.424437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14872870649875664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068950688059236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34739583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json deleted file mode 100644 index 1cc6b3b99..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/1762652580.424736", - "retrieved_timestamp": "1762652580.424736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12454842041339201 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3047398483929371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36581250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/de70c700-a007-4e87-a3db-941ee285eb1f.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/de70c700-a007-4e87-a3db-941ee285eb1f.json new file mode 100644 index 000000000..0860e6dcd --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/de70c700-a007-4e87-a3db-941ee285eb1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.2-ss1-expert_how-to", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/a1324a7f-1911-4fa9-8d83-be891f752a61.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/a1324a7f-1911-4fa9-8d83-be891f752a61.json new file mode 100644 index 000000000..ffb80060b --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/a1324a7f-1911-4fa9-8d83-be891f752a61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.2-ss1-expert_math", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1915 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1092 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json deleted file mode 100644 index 0eca7b54e..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/1762652580.42496", - "retrieved_timestamp": "1762652580.424961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19151850423542865 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3059577262726771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37003125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10920877659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/9c4af0df-f538-4755-8cd0-eec6b2b26524.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/9c4af0df-f538-4755-8cd0-eec6b2b26524.json new file mode 100644 index 000000000..1ef6d19ad --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/9c4af0df-f538-4755-8cd0-eec6b2b26524.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.2-ss1-expert_news", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1658 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2926 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json deleted file mode 100644 index 3a1310370..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/1762652580.425178", - "retrieved_timestamp": "1762652580.4251788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16581448334862608 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925879483112595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36209375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json deleted file mode 100644 index 3507969e3..000000000 --- a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/1762652580.425399", - "retrieved_timestamp": "1762652580.4254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1733832896714052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2979956844198214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35685416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11402925531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/fde650a6-a5d1-4edc-bd64-8be806663263.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/fde650a6-a5d1-4edc-bd64-8be806663263.json new file mode 100644 index 000000000..511c00854 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/fde650a6-a5d1-4edc-bd64-8be806663263.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_3b_sft_stage1.2-ss1-expert_software", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1734 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3569 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/96dd1a08-b166-4d8e-ac31-5e948adf931b.json b/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/96dd1a08-b166-4d8e-ac31-5e948adf931b.json new file mode 100644 index 000000000..a1a9a9d89 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/96dd1a08-b166-4d8e-ac31-5e948adf931b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", + "id": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", + "developer": "ontocord", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.888 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1244 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json b/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json deleted file mode 100644 index 55bedeb26..000000000 --- a/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/1762652580.425614", - "retrieved_timestamp": "1762652580.425615", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.888 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12439881736015992 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30264484636677236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11145279255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json b/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json deleted file mode 100644 index cfb3e508b..000000000 --- a/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oobabooga_CodeBooga-34B-v0.1/1762652580.425838", - "retrieved_timestamp": "1762652580.425838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oobabooga/CodeBooga-34B-v0.1", - "developer": "oobabooga", - "inference_platform": "unknown", - "id": "oobabooga/CodeBooga-34B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 33.744 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250180631834643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3427441185661722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43102083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23595412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3b90b9db-a68e-4ee9-bd4d-a18cec357753.json b/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3b90b9db-a68e-4ee9-bd4d-a18cec357753.json new file mode 100644 index 000000000..e386ca80a --- /dev/null +++ b/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3b90b9db-a68e-4ee9-bd4d-a18cec357753.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oobabooga_CodeBooga-34B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CodeBooga-34B-v0.1", + "id": "oobabooga/CodeBooga-34B-v0.1", + "developer": "oobabooga", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 33.744 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.525 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3427 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.236 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/Llama-FinSent-S/444a6ace-77d4-4d93-b80b-ff5c7e2f6888.json b/data/hfopenllm_v2/oopere/Llama-FinSent-S/444a6ace-77d4-4d93-b80b-ff5c7e2f6888.json new file mode 100644 index 000000000..466bcc8a4 --- /dev/null +++ b/data/hfopenllm_v2/oopere/Llama-FinSent-S/444a6ace-77d4-4d93-b80b-ff5c7e2f6888.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-FinSent-S", + "id": "oopere/Llama-FinSent-S", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2119 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3156 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/Llama-FinSent-S/7e11a778-fccf-4a91-81cf-c06f1a5c77c4.json b/data/hfopenllm_v2/oopere/Llama-FinSent-S/7e11a778-fccf-4a91-81cf-c06f1a5c77c4.json new file mode 100644 index 000000000..d92a91932 --- /dev/null +++ b/data/hfopenllm_v2/oopere/Llama-FinSent-S/7e11a778-fccf-4a91-81cf-c06f1a5c77c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-FinSent-S", + "id": "oopere/Llama-FinSent-S", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2164 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1134 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned10-llama-3.2-3B/e5d126d7-e0bf-43dc-95c0-184ea1d586ea.json b/data/hfopenllm_v2/oopere/pruned10-llama-3.2-3B/e5d126d7-e0bf-43dc-95c0-184ea1d586ea.json new file mode 100644 index 000000000..afabeb373 --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned10-llama-3.2-3B/e5d126d7-e0bf-43dc-95c0-184ea1d586ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned10-llama-3.2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned10-llama-3.2-3B", + "id": "oopere/pruned10-llama-3.2-3B", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.001 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1776 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.334 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3722 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned20-llama-1b/d05b129c-6b9e-4e6b-80fc-af65db620c5d.json b/data/hfopenllm_v2/oopere/pruned20-llama-1b/d05b129c-6b9e-4e6b-80fc-af65db620c5d.json new file mode 100644 index 000000000..d15232953 --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned20-llama-1b/d05b129c-6b9e-4e6b-80fc-af65db620c5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-1b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned20-llama-1b", + "id": "oopere/pruned20-llama-1b", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.075 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1994 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3031 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned20-llama-3.2-3b/d9792fac-29c1-45b2-b649-cdebb6830e2f.json b/data/hfopenllm_v2/oopere/pruned20-llama-3.2-3b/d9792fac-29c1-45b2-b649-cdebb6830e2f.json new file mode 100644 index 000000000..bc86d9513 --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned20-llama-3.2-3b/d9792fac-29c1-45b2-b649-cdebb6830e2f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-3.2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned20-llama-3.2-3b", + "id": "oopere/pruned20-llama-3.2-3b", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.79 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1789 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3248 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.128 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned40-llama-1b/fcc2f06a-e6c8-4c28-bf22-4ee582392912.json b/data/hfopenllm_v2/oopere/pruned40-llama-1b/fcc2f06a-e6c8-4c28-bf22-4ee582392912.json new file mode 100644 index 000000000..2afb1508c --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned40-llama-1b/fcc2f06a-e6c8-4c28-bf22-4ee582392912.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-1b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned40-llama-1b", + "id": "oopere/pruned40-llama-1b", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2284 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1082 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned40-llama-3.2-1B/c6e13327-90b3-440d-9367-dbcec54dd6cc.json b/data/hfopenllm_v2/oopere/pruned40-llama-3.2-1B/c6e13327-90b3-440d-9367-dbcec54dd6cc.json new file mode 100644 index 000000000..ee1ba1f96 --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned40-llama-3.2-1B/c6e13327-90b3-440d-9367-dbcec54dd6cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned40-llama-3.2-1B", + "id": "oopere/pruned40-llama-3.2-1B", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2982 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned40-llama-3.2-3b/30b02429-350c-4d86-aded-ba8597bec4d5.json b/data/hfopenllm_v2/oopere/pruned40-llama-3.2-3b/30b02429-350c-4d86-aded-ba8597bec4d5.json new file mode 100644 index 000000000..343aa660b --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned40-llama-3.2-3b/30b02429-350c-4d86-aded-ba8597bec4d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned40-llama-3.2-3b", + "id": "oopere/pruned40-llama-3.2-3b", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.367 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2299 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3539 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1177 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned60-llama-1b/7d1ee802-106e-4313-ba1d-72d5a0676c88.json b/data/hfopenllm_v2/oopere/pruned60-llama-1b/7d1ee802-106e-4313-ba1d-72d5a0676c88.json new file mode 100644 index 000000000..116617b5f --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned60-llama-1b/7d1ee802-106e-4313-ba1d-72d5a0676c88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-1b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned60-llama-1b", + "id": "oopere/pruned60-llama-1b", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.753 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1829 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3016 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4088 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oopere/pruned60-llama-3.2-3b/1b3af020-f65e-44b8-a9a2-ad60fa686427.json b/data/hfopenllm_v2/oopere/pruned60-llama-3.2-3b/1b3af020-f65e-44b8-a9a2-ad60fa686427.json new file mode 100644 index 000000000..321e3d3d4 --- /dev/null +++ b/data/hfopenllm_v2/oopere/pruned60-llama-3.2-3b/1b3af020-f65e-44b8-a9a2-ad60fa686427.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-3.2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pruned60-llama-3.2-3b", + "id": "oopere/pruned60-llama-3.2-3b", + "developer": "oopere", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.944 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3166 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/6e40871d-bc23-4f1c-a005-f5b8eb096f84.json b/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/6e40871d-bc23-4f1c-a005-f5b8eb096f84.json new file mode 100644 index 000000000..2ba09d639 --- /dev/null +++ b/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/6e40871d-bc23-4f1c-a005-f5b8eb096f84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-1.5B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Atlas-Flash-1.5B-Preview", + "id": "open-atlas/Atlas-Flash-1.5B-Preview", + "developer": "open-atlas", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3215 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3488 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json b/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json deleted file mode 100644 index fe77c0f03..000000000 --- a/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-1.5B-Preview/1762652580.4281778", - "retrieved_timestamp": "1762652580.4281778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "open-atlas/Atlas-Flash-1.5B-Preview", - "developer": "open-atlas", - "inference_platform": "unknown", - "id": "open-atlas/Atlas-Flash-1.5B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3269569187533522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215460102660847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34879166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13738364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/1ab33ed2-ea3b-4c6f-a2ac-2465ddd844f4.json b/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/1ab33ed2-ea3b-4c6f-a2ac-2465ddd844f4.json new file mode 100644 index 000000000..5c4b96098 --- /dev/null +++ b/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/1ab33ed2-ea3b-4c6f-a2ac-2465ddd844f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-7B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Atlas-Flash-7B-Preview", + "id": "open-atlas/Atlas-Flash-7B-Preview", + "developer": "open-atlas", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3836 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2784 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json b/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json deleted file mode 100644 index 2af9ff559..000000000 --- a/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-7B-Preview/1762652580.428412", - "retrieved_timestamp": "1762652580.428413", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "open-atlas/Atlas-Flash-7B-Preview", - "developer": "open-atlas", - "inference_platform": "unknown", - "id": "open-atlas/Atlas-Flash-7B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907543096761038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3541994356643969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38358333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27842420212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json b/data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json deleted file mode 100644 index c1bb12288..000000000 --- a/data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-3B/1762652580.428618", - "retrieved_timestamp": "1762652580.428618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "open-neo/Kyro-n1-3B", - "developer": "open-neo", - "inference_platform": "unknown", - "id": "open-neo/Kyro-n1-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45949746672163194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46853756471175373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40879166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34225398936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/open-neo/Kyro-n1-3B/ec601f5d-bf19-4407-ac41-6b9272d94735.json b/data/hfopenllm_v2/open-neo/Kyro-n1-3B/ec601f5d-bf19-4407-ac41-6b9272d94735.json new file mode 100644 index 000000000..eab4e7f5e --- /dev/null +++ b/data/hfopenllm_v2/open-neo/Kyro-n1-3B/ec601f5d-bf19-4407-ac41-6b9272d94735.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kyro-n1-3B", + "id": "open-neo/Kyro-n1-3B", + "developer": "open-neo", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2855 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4088 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-neo/Kyro-n1-7B/87e53761-e8b7-4032-ae7a-c3a91704d115.json b/data/hfopenllm_v2/open-neo/Kyro-n1-7B/87e53761-e8b7-4032-ae7a-c3a91704d115.json new file mode 100644 index 000000000..7c4002489 --- /dev/null +++ b/data/hfopenllm_v2/open-neo/Kyro-n1-7B/87e53761-e8b7-4032-ae7a-c3a91704d115.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kyro-n1-7B", + "id": "open-neo/Kyro-n1-7B", + "developer": "open-neo", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5573 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5387 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json b/data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json deleted file mode 100644 index 6015254cb..000000000 --- a/data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-7B/1762652580.42885", - "retrieved_timestamp": "1762652580.42885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "open-neo/Kyro-n1-7B", - "developer": "open-neo", - "inference_platform": "unknown", - "id": "open-neo/Kyro-n1-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5572669406064796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386561160683788 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38972809667673713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38841666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433344414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/59492d86-4b85-4865-84e9-84ab4ace630c.json b/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/59492d86-4b85-4865-84e9-84ab4ace630c.json new file mode 100644 index 000000000..b02b2a84c --- /dev/null +++ b/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/59492d86-4b85-4865-84e9-84ab4ace630c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/open-thoughts_OpenThinker-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenThinker-7B", + "id": "open-thoughts/OpenThinker-7B", + "developer": "open-thoughts", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4089 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4165 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json b/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json deleted file mode 100644 index 6ed1b1110..000000000 --- a/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/open-thoughts_OpenThinker-7B/1762652580.4290519", - "retrieved_timestamp": "1762652580.4290528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "open-thoughts/OpenThinker-7B", - "developer": "open-thoughts", - "inference_platform": "unknown", - "id": "open-thoughts/OpenThinker-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4088895242401273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5342727589615611 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41647273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai-community/gpt2-large/cc082df2-259c-44c1-abe4-ef349056a2a9.json b/data/hfopenllm_v2/openai-community/gpt2-large/cc082df2-259c-44c1-abe4-ef349056a2a9.json new file mode 100644 index 000000000..f23bbd9c9 --- /dev/null +++ b/data/hfopenllm_v2/openai-community/gpt2-large/cc082df2-259c-44c1-abe4-ef349056a2a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2-large/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2-large", + "id": "openai-community/gpt2-large", + "developer": "openai-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.812 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3069 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3789 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai-community/gpt2-medium/3f069053-b24e-4242-9302-d46b82e511aa.json b/data/hfopenllm_v2/openai-community/gpt2-medium/3f069053-b24e-4242-9302-d46b82e511aa.json new file mode 100644 index 000000000..3800ef51a --- /dev/null +++ b/data/hfopenllm_v2/openai-community/gpt2-medium/3f069053-b24e-4242-9302-d46b82e511aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2-medium/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2-medium", + "id": "openai-community/gpt2-medium", + "developer": "openai-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.38 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai-community/gpt2-xl/62cd9bcb-a74c-40b9-be84-a0077235ae3c.json b/data/hfopenllm_v2/openai-community/gpt2-xl/62cd9bcb-a74c-40b9-be84-a0077235ae3c.json new file mode 100644 index 000000000..4094508d3 --- /dev/null +++ b/data/hfopenllm_v2/openai-community/gpt2-xl/62cd9bcb-a74c-40b9-be84-a0077235ae3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2-xl/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2-xl", + "id": "openai-community/gpt2-xl", + "developer": "openai-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 1.608 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3009 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai-community/gpt2/b4cd25f1-87d5-4173-a4d3-928444f6cb37.json b/data/hfopenllm_v2/openai-community/gpt2/b4cd25f1-87d5-4173-a4d3-928444f6cb37.json new file mode 100644 index 000000000..172402db8 --- /dev/null +++ b/data/hfopenllm_v2/openai-community/gpt2/b4cd25f1-87d5-4173-a4d3-928444f6cb37.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2", + "id": "openai-community/gpt2", + "developer": "openai-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.137 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1793 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4471 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai-community/gpt2/ddd4716e-d8ae-46a1-8fb4-c27e2da40e6e.json b/data/hfopenllm_v2/openai-community/gpt2/ddd4716e-d8ae-46a1-8fb4-c27e2da40e6e.json new file mode 100644 index 000000000..040f869b3 --- /dev/null +++ b/data/hfopenllm_v2/openai-community/gpt2/ddd4716e-d8ae-46a1-8fb4-c27e2da40e6e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2", + "id": "openai-community/gpt2", + "developer": "openai-community", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.137 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.178 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3017 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.439 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1165 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json b/data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json deleted file mode 100644 index 5a9db74a0..000000000 --- a/data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_gpt-sw3-40b/1762652579.475041", - "retrieved_timestamp": "1762652579.475042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "AI-Sweden-Models/gpt-sw3-40b", - "developer": "openai", - "inference_platform": "unknown", - "id": "AI-Sweden-Models/gpt-sw3-40b", - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 39.927 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1470298807164989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3267744702957652 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36323958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12757646276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json b/data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json deleted file mode 100644 index 0174bba92..000000000 --- a/data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_causal_gpt2/1762652579.548641", - "retrieved_timestamp": "1762652579.5486422", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/causal_gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "DeepAutoAI/causal_gpt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1812767900282362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30257073962835446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42695833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json b/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json deleted file mode 100644 index e28e5a50e..000000000 --- a/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2/1762652579.549271", - "retrieved_timestamp": "1762652579.549272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/d2nwg_causal_gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "DeepAutoAI/d2nwg_causal_gpt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19161823960425006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30268984588252307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11510970744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json b/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json deleted file mode 100644 index df649573a..000000000 --- a/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2_v1/1762652579.549553", - "retrieved_timestamp": "1762652579.5495539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "DeepAutoAI/d2nwg_causal_gpt2_v1", - "developer": "openai", - "inference_platform": "unknown", - "id": "DeepAutoAI/d2nwg_causal_gpt2_v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1988623518929773 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29918984588252306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4336875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json deleted file mode 100644 index d0cfc8a00..000000000 --- a/data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-j-6b/1762652579.5928068", - "retrieved_timestamp": "1762652579.592808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/gpt-j-6b", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-j-6b", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2522185578708937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191044431037278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12408577127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json deleted file mode 100644 index 4738a8ca8..000000000 --- a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-1.3B/1762652579.59305", - "retrieved_timestamp": "1762652579.59305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/gpt-neo-1.3B", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neo-1.3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 1.366 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20790502533278366 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30392315869356407 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json deleted file mode 100644 index 0f55a993b..000000000 --- a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-125m/1762652579.593268", - "retrieved_timestamp": "1762652579.593268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/gpt-neo-125m", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neo-125m", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 0.15 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19054442213327305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115156885791523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3593333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10255984042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json deleted file mode 100644 index 732492590..000000000 --- a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-2.7B/1762652579.5934908", - "retrieved_timestamp": "1762652579.5934908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/gpt-neo-2.7B", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neo-2.7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 2.718 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2589628851447493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139516033315253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11627327127659574 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json deleted file mode 100644 index 2dee7fd94..000000000 --- a/data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neox-20b/1762652579.5937028", - "retrieved_timestamp": "1762652579.593704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "EleutherAI/gpt-neox-20b", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neox-20b", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 20.739 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2586880587951081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31650380320877564 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36466666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1155252659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json b/data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json deleted file mode 100644 index a6bcc4828..000000000 --- a/data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Kimargin_GPT-NEO-1.3B-wiki/1762652579.6992168", - "retrieved_timestamp": "1762652579.699218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Kimargin/GPT-NEO-1.3B-wiki", - "developer": "openai", - "inference_platform": "unknown", - "id": "Kimargin/GPT-NEO-1.3B-wiki", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 1.316 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19206815693471102 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3026339952046975 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json b/data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json deleted file mode 100644 index 734fd2374..000000000 --- a/data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/NYTK_PULI-GPTrio/1762652579.769266", - "retrieved_timestamp": "1762652579.769266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "NYTK/PULI-GPTrio", - "developer": "openai", - "inference_platform": "unknown", - "id": "NYTK/PULI-GPTrio", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.673 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21797164855915638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30600290906237543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38187499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json b/data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json deleted file mode 100644 index d90f99ab0..000000000 --- a/data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/Sharathhebbar24_chat_gpt2_dpo/1762652579.8799832", - "retrieved_timestamp": "1762652579.8799841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "Sharathhebbar24/chat_gpt2_dpo", - "developer": "openai", - "inference_platform": "unknown", - "id": "Sharathhebbar24/chat_gpt2_dpo", - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09861944086135896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29022988561565644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json b/data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json deleted file mode 100644 index 4193c1b12..000000000 --- a/data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/distilbert_distilgpt2/1762652580.1266282", - "retrieved_timestamp": "1762652580.126629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "distilbert/distilgpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "distilbert/distilgpt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.088 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06110010328151527 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3037988148650536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42072916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json b/data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json deleted file mode 100644 index adba12ac0..000000000 --- a/data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gpt2/1762652580.1809301", - "retrieved_timestamp": "1762652580.180931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai/gpt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1934168007553292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036385401516729 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43241666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json b/data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json deleted file mode 100644 index 34cb2f14d..000000000 --- a/data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/gpt2/1762652580.181142", - "retrieved_timestamp": "1762652580.181143", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai/gpt2", - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08333333333333333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30833333333333335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23333333333333334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4333333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json b/data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json deleted file mode 100644 index 1a48b4dfe..000000000 --- a/data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/langgptai_Qwen-las-v0.1/1762652580.313808", - "retrieved_timestamp": "1762652580.313809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "langgptai/Qwen-las-v0.1", - "developer": "openai", - "inference_platform": "unknown", - "id": "langgptai/Qwen-las-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.901 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33010412372504955 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38925525629956187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009374999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2325465425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json b/data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json deleted file mode 100644 index 82c822975..000000000 --- a/data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/meraGPT_mera-mix-4x7B/1762652580.345789", - "retrieved_timestamp": "1762652580.34579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "meraGPT/mera-mix-4x7B", - "developer": "openai", - "inference_platform": "unknown", - "id": "meraGPT/mera-mix-4x7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4831779677921249 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40189899163661713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27476728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json b/data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json deleted file mode 100644 index 42deab28e..000000000 --- a/data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/microsoft_DialoGPT-medium/1762652580.353813", - "retrieved_timestamp": "1762652580.3538141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "microsoft/DialoGPT-medium", - "developer": "openai", - "inference_platform": "unknown", - "id": "microsoft/DialoGPT-medium", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.345 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14790422744983311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3014156380141994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json b/data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json deleted file mode 100644 index c0a83b051..000000000 --- a/data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2-large/1762652580.4297202", - "retrieved_timestamp": "1762652580.429721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openai-community/gpt2-large", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2-large", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.812 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20478220011790937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30688418760118824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3788645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json b/data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json deleted file mode 100644 index 9d4cc279e..000000000 --- a/data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2-medium/1762652580.4299362", - "retrieved_timestamp": "1762652580.429937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openai-community/gpt2-medium", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2-medium", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.38 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22084402718121252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3050280232176266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11818484042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json b/data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json deleted file mode 100644 index 1c0e900ff..000000000 --- a/data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2-xl/1762652580.430138", - "retrieved_timestamp": "1762652580.430138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openai-community/gpt2-xl", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2-xl", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 1.608 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20385798570016445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30085761123260785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json b/data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json deleted file mode 100644 index 0c9dfe1d9..000000000 --- a/data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1762652580.429537", - "retrieved_timestamp": "1762652580.429537", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openai-community/gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2", - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17795449407571912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30165801067653053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43902083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11652260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json b/data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json deleted file mode 100644 index c60c9816f..000000000 --- a/data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1762652580.42929", - "retrieved_timestamp": "1762652580.429291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openai-community/gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17925327021192655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3035711244213359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44705208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11594082446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json b/data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json deleted file mode 100644 index 8cac0cf95..000000000 --- a/data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/postbot_gpt2-medium-emailgen/1762652580.4421701", - "retrieved_timestamp": "1762652580.4421709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "postbot/gpt2-medium-emailgen", - "developer": "openai", - "inference_platform": "unknown", - "id": "postbot/gpt2-medium-emailgen", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.38 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1492030035860406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31304286003933807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3911145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json b/data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json deleted file mode 100644 index d2aed364c..000000000 --- a/data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_ftgpt/1762652580.5475452", - "retrieved_timestamp": "1762652580.5475461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/ftgpt", - "developer": "openai", - "inference_platform": "unknown", - "id": "sumink/ftgpt", - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0787100449030794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29190853217047663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1171875 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json b/data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json deleted file mode 100644 index 7495f47a7..000000000 --- a/data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-JT-6B-v1/1762652580.574097", - "retrieved_timestamp": "1762652580.5740979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/GPT-JT-6B-v1", - "developer": "openai", - "inference_platform": "unknown", - "id": "togethercomputer/GPT-JT-6B-v1", - "additional_details": { - "precision": "float16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20610646418170453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33026609127426704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16256648936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json b/data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json deleted file mode 100644 index a6aa7b475..000000000 --- a/data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/universalml_NepaliGPT-2.0/1762652580.578092", - "retrieved_timestamp": "1762652580.578093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "universalml/NepaliGPT-2.0", - "developer": "openai", - "inference_platform": "unknown", - "id": "universalml/NepaliGPT-2.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03649538779327739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46604761322722105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4656770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3299534574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json b/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json deleted file mode 100644 index 8ddfb1839..000000000 --- a/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-3B-Multilingual/1762652580.611115", - "retrieved_timestamp": "1762652580.611116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yuchenxie/ArlowGPT-3B-Multilingual", - "developer": "openai", - "inference_platform": "unknown", - "id": "yuchenxie/ArlowGPT-3B-Multilingual", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395486198841297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4301403132173714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37266666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2816655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json b/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json deleted file mode 100644 index e734cac16..000000000 --- a/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-8B/1762652580.611377", - "retrieved_timestamp": "1762652580.611378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yuchenxie/ArlowGPT-8B", - "developer": "openai", - "inference_platform": "unknown", - "id": "yuchenxie/ArlowGPT-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7846536079823756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080162816130412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/1e5b62a3-018b-429a-b2b4-325545ee99dc.json b/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/1e5b62a3-018b-429a-b2b4-325545ee99dc.json new file mode 100644 index 000000000..404c67964 --- /dev/null +++ b/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/1e5b62a3-018b-429a-b2b4-325545ee99dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openbmb_MiniCPM-S-1B-sft-llama-format/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MiniCPM-S-1B-sft-llama-format", + "id": "openbmb/MiniCPM-S-1B-sft-llama-format", + "developer": "openbmb", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3049 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3317 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json b/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json deleted file mode 100644 index 00f290146..000000000 --- a/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openbmb_MiniCPM-S-1B-sft-llama-format/1762652580.430347", - "retrieved_timestamp": "1762652580.430348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openbmb/MiniCPM-S-1B-sft-llama-format", - "developer": "openbmb", - "inference_platform": "unknown", - "id": "openbmb/MiniCPM-S-1B-sft-llama-format", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328767669782843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30493136322070497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33167708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1858377659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json b/data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json deleted file mode 100644 index 8a103cd20..000000000 --- a/data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-0106/1762652580.430586", - "retrieved_timestamp": "1762652580.4305868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openchat/openchat-3.5-0106", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat-3.5-0106", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5966590867786362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46169787083960595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42543749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat-3.5-0106/958d410e-ce43-44c0-8a56-685c0a618408.json b/data/hfopenllm_v2/openchat/openchat-3.5-0106/958d410e-ce43-44c0-8a56-685c0a618408.json new file mode 100644 index 000000000..c3931b7fa --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat-3.5-0106/958d410e-ce43-44c0-8a56-685c0a618408.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-0106/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat-3.5-0106", + "id": "openchat/openchat-3.5-0106", + "developer": "openchat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5967 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4254 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3291 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat-3.5-1210/57c53f20-aa32-49fd-926a-f26c9d0759d4.json b/data/hfopenllm_v2/openchat/openchat-3.5-1210/57c53f20-aa32-49fd-926a-f26c9d0759d4.json new file mode 100644 index 000000000..2d6ea71b7 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat-3.5-1210/57c53f20-aa32-49fd-926a-f26c9d0759d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-1210/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat-3.5-1210", + "id": "openchat/openchat-3.5-1210", + "developer": "openchat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6037 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4535 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json b/data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json deleted file mode 100644 index 331f1ab5c..000000000 --- a/data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-1210/1762652580.430838", - "retrieved_timestamp": "1762652580.430839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openchat/openchat-3.5-1210", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat-3.5-1210", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.603678240402133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4535356846447984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4414375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3142453457446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json b/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json deleted file mode 100644 index 972427071..000000000 --- a/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openchat_openchat-3.6-8b-20240522/1762652580.4310489", - "retrieved_timestamp": "1762652580.43105", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openchat/openchat-3.6-8b-20240522", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat-3.6-8b-20240522", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343355629729118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338412089001999 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32288896276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/76def522-6fe1-458f-bfbf-99b50ece3367.json b/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/76def522-6fe1-458f-bfbf-99b50ece3367.json new file mode 100644 index 000000000..162c9b8e8 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/76def522-6fe1-458f-bfbf-99b50ece3367.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openchat_openchat-3.6-8b-20240522/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat-3.6-8b-20240522", + "id": "openchat/openchat-3.6-8b-20240522", + "developer": "openchat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5338 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3999 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3229 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json b/data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json deleted file mode 100644 index 1c7bd62f8..000000000 --- a/data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openchat_openchat_3.5/1762652580.431262", - "retrieved_timestamp": "1762652580.431263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openchat/openchat_3.5", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat_3.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5931118321608887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44263196862832893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31532579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat_3.5/c467bc88-6769-48ac-abd4-867ee38bbe57.json b/data/hfopenllm_v2/openchat/openchat_3.5/c467bc88-6769-48ac-abd4-867ee38bbe57.json new file mode 100644 index 000000000..652c57fc3 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat_3.5/c467bc88-6769-48ac-abd4-867ee38bbe57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openchat_openchat_3.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat_3.5", + "id": "openchat/openchat_3.5", + "developer": "openchat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5931 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4426 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4229 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json b/data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json deleted file mode 100644 index 6f49605c4..000000000 --- a/data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2/1762652580.431712", - "retrieved_timestamp": "1762652580.431714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openchat/openchat_v3.2", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat_v3.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2980558252104416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4330564283474314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2421875 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat_v3.2/801681eb-66f4-46e0-bb2b-7ba4b46679af.json b/data/hfopenllm_v2/openchat/openchat_v3.2/801681eb-66f4-46e0-bb2b-7ba4b46679af.json new file mode 100644 index 000000000..781672e47 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat_v3.2/801681eb-66f4-46e0-bb2b-7ba4b46679af.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat_v3.2", + "id": "openchat/openchat_v3.2", + "developer": "openchat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2981 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4331 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2422 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json b/data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json deleted file mode 100644 index f1785cffb..000000000 --- a/data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2_super/1762652580.431961", - "retrieved_timestamp": "1762652580.431962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "openchat/openchat_v3.2_super", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat_v3.2_super", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2861906408329898 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42212089838803973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/openchat/openchat_v3.2_super/cdd0ea1c-b17a-4816-953c-1d7164c64114.json b/data/hfopenllm_v2/openchat/openchat_v3.2_super/cdd0ea1c-b17a-4816-953c-1d7164c64114.json new file mode 100644 index 000000000..a8f2b40b0 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat_v3.2_super/cdd0ea1c-b17a-4816-953c-1d7164c64114.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2_super/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat_v3.2_super", + "id": "openchat/openchat_v3.2_super", + "developer": "openchat", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2862 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4161 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2425 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/orai-nlp/Llama-eus-8B/b2060893-1f7d-4e7a-a458-3623147ac118.json b/data/hfopenllm_v2/orai-nlp/Llama-eus-8B/b2060893-1f7d-4e7a-a458-3623147ac118.json new file mode 100644 index 000000000..5964aa7f2 --- /dev/null +++ b/data/hfopenllm_v2/orai-nlp/Llama-eus-8B/b2060893-1f7d-4e7a-a458-3623147ac118.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/orai-nlp_Llama-eus-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-eus-8B", + "id": "orai-nlp/Llama-eus-8B", + "developer": "orai-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2161 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4418 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3919 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3058 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json b/data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json deleted file mode 100644 index 080fa16ed..000000000 --- a/data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/oxyapi_oxy-1-small/1762652580.432582", - "retrieved_timestamp": "1762652580.432582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "oxyapi/oxy-1-small", - "developer": "oxyapi", - "inference_platform": "unknown", - "id": "oxyapi/oxy-1-small", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6244608749229821 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5884593784818278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5000831117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/oxyapi/oxy-1-small/cf8aac35-679a-4ebb-bca8-6e0f2d42e71b.json b/data/hfopenllm_v2/oxyapi/oxy-1-small/cf8aac35-679a-4ebb-bca8-6e0f2d42e71b.json new file mode 100644 index 000000000..eae996b8a --- /dev/null +++ b/data/hfopenllm_v2/oxyapi/oxy-1-small/cf8aac35-679a-4ebb-bca8-6e0f2d42e71b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/oxyapi_oxy-1-small/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oxy-1-small", + "id": "oxyapi/oxy-1-small", + "developer": "oxyapi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5001 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ozone-ai/0x-lite/34bfe887-5a3a-4626-997e-c35d3a0ec341.json b/data/hfopenllm_v2/ozone-ai/0x-lite/34bfe887-5a3a-4626-997e-c35d3a0ec341.json new file mode 100644 index 000000000..b6787efe4 --- /dev/null +++ b/data/hfopenllm_v2/ozone-ai/0x-lite/34bfe887-5a3a-4626-997e-c35d3a0ec341.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ozone-ai_0x-lite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "0x-lite", + "id": "ozone-ai/0x-lite", + "developer": "ozone-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.774 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json b/data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json deleted file mode 100644 index 846db725a..000000000 --- a/data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ozone-ai_0x-lite/1762652580.432846", - "retrieved_timestamp": "1762652580.432847", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ozone-ai/0x-lite", - "developer": "ozone-ai", - "inference_platform": "unknown", - "id": "ozone-ai/0x-lite", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7739874643723099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6340580988016683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31963087248322153 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183676861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json b/data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json deleted file mode 100644 index 12cb40df3..000000000 --- a/data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ozone-research_Chirp-01/1762652580.433142", - "retrieved_timestamp": "1762652580.4331431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ozone-research/Chirp-01", - "developer": "ozone-research", - "inference_platform": "unknown", - "id": "ozone-research/Chirp-01", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6347524568145853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649560260501419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2718120805369128 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3508144946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/ozone-research/Chirp-01/b81acc47-6fd5-4f89-8c70-f8f14b677e04.json b/data/hfopenllm_v2/ozone-research/Chirp-01/b81acc47-6fd5-4f89-8c70-f8f14b677e04.json new file mode 100644 index 000000000..92d7d8fbf --- /dev/null +++ b/data/hfopenllm_v2/ozone-research/Chirp-01/b81acc47-6fd5-4f89-8c70-f8f14b677e04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ozone-research_Chirp-01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Chirp-01", + "id": "ozone-research/Chirp-01", + "developer": "ozone-research", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6348 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3467 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3508 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/30b977a8-7882-49be-8621-9ee3fce270ec.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/30b977a8-7882-49be-8621-9ee3fce270ec.json new file mode 100644 index 000000000..fa0ba291d --- /dev/null +++ b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/30b977a8-7882-49be-8621-9ee3fce270ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-TW3-JRGL-V1", + "id": "paloalma/ECE-TW3-JRGL-V1", + "developer": "paloalma", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5535 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6284 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json deleted file mode 100644 index 432e5603d..000000000 --- a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V1/1762652580.433397", - "retrieved_timestamp": "1762652580.433398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "paloalma/ECE-TW3-JRGL-V1", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/ECE-TW3-JRGL-V1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534947273235016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6283667540784627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46208333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.422124335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/3367fd79-713c-4691-80cd-4abb6b2818ef.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/3367fd79-713c-4691-80cd-4abb6b2818ef.json new file mode 100644 index 000000000..503c32c60 --- /dev/null +++ b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/3367fd79-713c-4691-80cd-4abb6b2818ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-TW3-JRGL-V2", + "id": "paloalma/ECE-TW3-JRGL-V2", + "developer": "paloalma", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.288 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2255 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6031 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.185 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4588 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json deleted file mode 100644 index ee6c395fe..000000000 --- a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V2/1762652580.433646", - "retrieved_timestamp": "1762652580.4336472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "paloalma/ECE-TW3-JRGL-V2", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/ECE-TW3-JRGL-V2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.288 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2254894790267601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6030988136029874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18504531722054382 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47932291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4587765957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json deleted file mode 100644 index a427a8558..000000000 --- a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V5/1762652580.433843", - "retrieved_timestamp": "1762652580.4338439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "paloalma/ECE-TW3-JRGL-V5", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/ECE-TW3-JRGL-V5", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4552509563513699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6024712037668832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46476063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/add899b8-f3e6-4d87-8846-8254f4dfbd5f.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/add899b8-f3e6-4d87-8846-8254f4dfbd5f.json new file mode 100644 index 000000000..9ec62858b --- /dev/null +++ b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/add899b8-f3e6-4d87-8846-8254f4dfbd5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-TW3-JRGL-V5", + "id": "paloalma/ECE-TW3-JRGL-V5", + "developer": "paloalma", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4553 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6025 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1835 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4621 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4648 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json b/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json deleted file mode 100644 index e1b307fc0..000000000 --- a/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/paloalma_Le_Triomphant-ECE-TW3/1762652580.434039", - "retrieved_timestamp": "1762652580.434039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "paloalma/Le_Triomphant-ECE-TW3", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/Le_Triomphant-ECE-TW3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402055435134332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6112057897556996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4725 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476313164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/53829ec0-f233-4b61-a672-6a467823caaa.json b/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/53829ec0-f233-4b61-a672-6a467823caaa.json new file mode 100644 index 000000000..bd04d24ff --- /dev/null +++ b/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/53829ec0-f233-4b61-a672-6a467823caaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/paloalma_Le_Triomphant-ECE-TW3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Le_Triomphant-ECE-TW3", + "id": "paloalma/Le_Triomphant-ECE-TW3", + "developer": "paloalma", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json b/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json deleted file mode 100644 index f386de8f5..000000000 --- a/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/paloalma_TW3-JRGL-v2/1762652580.43424", - "retrieved_timestamp": "1762652580.434241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "paloalma/TW3-JRGL-v2", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/TW3-JRGL-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316127874040878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6137525505395743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48583333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4857878989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/e2b41200-bff2-4835-a0ea-27ff56937570.json b/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/e2b41200-bff2-4835-a0ea-27ff56937570.json new file mode 100644 index 000000000..f94c8540b --- /dev/null +++ b/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/e2b41200-bff2-4835-a0ea-27ff56937570.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/paloalma_TW3-JRGL-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TW3-JRGL-v2", + "id": "paloalma/TW3-JRGL-v2", + "developer": "paloalma", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6138 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.179 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4858 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/3d33f26d-72be-451e-bcf0-501e0bc2f1db.json b/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/3d33f26d-72be-451e-bcf0-501e0bc2f1db.json new file mode 100644 index 000000000..4ce9999c5 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/3d33f26d-72be-451e-bcf0-501e0bc2f1db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_Al_Dente_v1_8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Al_Dente_v1_8b", + "id": "pankajmathur/Al_Dente_v1_8b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4835 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3987 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.286 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json b/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json deleted file mode 100644 index 7b1e2b31a..000000000 --- a/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_Al_Dente_v1_8b/1762652580.434438", - "retrieved_timestamp": "1762652580.434439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/Al_Dente_v1_8b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/Al_Dente_v1_8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693721547715617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48347371404380524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2859873670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/3b4c05fc-2ccf-46db-8d64-045508f6614b.json b/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/3b4c05fc-2ccf-46db-8d64-045508f6614b.json new file mode 100644 index 000000000..8fcac09dd --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/3b4c05fc-2ccf-46db-8d64-045508f6614b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_model_007_13b_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "model_007_13b_v2", + "id": "pankajmathur/model_007_13b_v2", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3056 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4702 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2461 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json b/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json deleted file mode 100644 index 17ebc3fe6..000000000 --- a/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_model_007_13b_v2/1762652580.434693", - "retrieved_timestamp": "1762652580.4346938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/model_007_13b_v2", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/model_007_13b_v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30564901129004374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702292766687601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46109375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24609375 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_3b/af83a91c-3b07-48c6-9726-5bd77347f810.json b/data/hfopenllm_v2/pankajmathur/orca_mini_3b/af83a91c-3b07-48c6-9726-5bd77347f810.json new file mode 100644 index 000000000..ba7ea5916 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_3b/af83a91c-3b07-48c6-9726-5bd77347f810.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_3b", + "id": "pankajmathur/orca_mini_3b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.426 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3349 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json b/data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json deleted file mode 100644 index de939bf55..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_3b/1762652580.434913", - "retrieved_timestamp": "1762652580.434913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_3b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.426 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07421419611076388 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196070040004752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3349270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_7b/48759b07-9aea-42bd-8d73-9c4208d2789f.json b/data/hfopenllm_v2/pankajmathur/orca_mini_7b/48759b07-9aea-42bd-8d73-9c4208d2789f.json new file mode 100644 index 000000000..9f6551021 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_7b/48759b07-9aea-42bd-8d73-9c4208d2789f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_7b", + "id": "pankajmathur/orca_mini_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1246 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json b/data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json deleted file mode 100644 index 845f633bf..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_7b/1762652580.435124", - "retrieved_timestamp": "1762652580.4351249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04121619525082337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332228472650342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12458444148936171 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_phi-4/68820679-55f4-494d-91a0-0db1bccb8983.json b/data/hfopenllm_v2/pankajmathur/orca_mini_phi-4/68820679-55f4-494d-91a0-0db1bccb8983.json new file mode 100644 index 000000000..cf14929d8 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_phi-4/68820679-55f4-494d-91a0-0db1bccb8983.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_phi-4", + "id": "pankajmathur/orca_mini_phi-4", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7781 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6856 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4703 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5255 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/029774ac-a63d-4acc-a37c-4194e4afdecc.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/029774ac-a63d-4acc-a37c-4194e4afdecc.json new file mode 100644 index 000000000..37a85929b --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/029774ac-a63d-4acc-a37c-4194e4afdecc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v2_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v2_7b", + "id": "pankajmathur/orca_mini_v2_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3593 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1542 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json deleted file mode 100644 index 59677d418..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v2_7b/1762652580.435575", - "retrieved_timestamp": "1762652580.435576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v2_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v2_7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13578859647956312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35363417847864514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1541722074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/146df856-e2c8-41eb-b860-ceb78c126e55.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/146df856-e2c8-41eb-b860-ceb78c126e55.json new file mode 100644 index 000000000..d255ddd83 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/146df856-e2c8-41eb-b860-ceb78c126e55.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_13b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v3_13b", + "id": "pankajmathur/orca_mini_v3_13b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2897 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4711 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4598 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json deleted file mode 100644 index 782a46286..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_13b/1762652580.435779", - "retrieved_timestamp": "1762652580.43578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v3_13b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v3_13b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28966253983873896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4710970361474938 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45979166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23046875 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/74c6bea7-ad16-4f08-a2b7-9c894b9ce207.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/74c6bea7-ad16-4f08-a2b7-9c894b9ce207.json new file mode 100644 index 000000000..39384845f --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/74c6bea7-ad16-4f08-a2b7-9c894b9ce207.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v3_70b", + "id": "pankajmathur/orca_mini_v3_70b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5949 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5079 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3757 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json deleted file mode 100644 index b410acb5f..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_70b/1762652580.43598", - "retrieved_timestamp": "1762652580.435981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v3_70b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v3_70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4014703209705803 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949312065598904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5078541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3757480053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json deleted file mode 100644 index 0d5a0b0d9..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_7b/1762652580.436181", - "retrieved_timestamp": "1762652580.436182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v3_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v3_7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820937335159599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095332668279368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49823958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20836103723404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/b5e97b2d-d8a2-485a-8b0a-71590e4a376e.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/b5e97b2d-d8a2-485a-8b0a-71590e4a376e.json new file mode 100644 index 000000000..33c79ebe0 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/b5e97b2d-d8a2-485a-8b0a-71590e4a376e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v3_7b", + "id": "pankajmathur/orca_mini_v3_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4095 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4982 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2084 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json deleted file mode 100644 index c32177943..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b/1762652580.436376", - "retrieved_timestamp": "1762652580.436377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v5_8b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v5_8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48060479527653294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064242853619262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3075964095744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/e79d0a8c-caec-4dec-b119-3229ffa69a73.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/e79d0a8c-caec-4dec-b119-3229ffa69a73.json new file mode 100644 index 000000000..73bf37a29 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/e79d0a8c-caec-4dec-b119-3229ffa69a73.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v5_8b", + "id": "pankajmathur/orca_mini_v5_8b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4806 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3076 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json deleted file mode 100644 index 3c6d2bd7a..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_dpo/1762652580.436573", - "retrieved_timestamp": "1762652580.436574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v5_8b_dpo", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v5_8b_dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48964746871633935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074598658862709 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.389375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31158577127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/2c760893-b52a-40a9-9420-fb193a62a5c3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/2c760893-b52a-40a9-9420-fb193a62a5c3.json new file mode 100644 index 000000000..1fb80ed95 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/2c760893-b52a-40a9-9420-fb193a62a5c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v5_8b_dpo", + "id": "pankajmathur/orca_mini_v5_8b_dpo", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4896 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3894 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json deleted file mode 100644 index 11c1acb75..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_orpo/1762652580.436766", - "retrieved_timestamp": "1762652580.4367669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v5_8b_orpo", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v5_8b_orpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08243239050164675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496374377369289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2947140957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/ef9b84e0-68b0-4caa-9980-96ea5e7f440b.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/ef9b84e0-68b0-4caa-9980-96ea5e7f440b.json new file mode 100644 index 000000000..b10b829f3 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/ef9b84e0-68b0-4caa-9980-96ea5e7f440b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_orpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v5_8b_orpo", + "id": "pankajmathur/orca_mini_v5_8b_orpo", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0824 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4964 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2947 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json deleted file mode 100644 index a13b3369a..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b/1762652580.436963", - "retrieved_timestamp": "1762652580.436963", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v6_8b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v6_8b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011116060940526692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30286959112076134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3554583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/fb48aff8-3f6b-4934-9fb8-d72bf8614d6f.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/fb48aff8-3f6b-4934-9fb8-d72bf8614d6f.json new file mode 100644 index 000000000..0c0674af7 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/fb48aff8-3f6b-4934-9fb8-d72bf8614d6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v6_8b", + "id": "pankajmathur/orca_mini_v6_8b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0111 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json deleted file mode 100644 index d078494cc..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b_dpo/1762652580.43716", - "retrieved_timestamp": "1762652580.437161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v6_8b_dpo", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v6_8b_dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882564927725103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520280774453148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359624335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/9450acd9-16b6-49a2-9b73-cf1161b96df3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/9450acd9-16b6-49a2-9b73-cf1161b96df3.json new file mode 100644 index 000000000..92b4ebda8 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/9450acd9-16b6-49a2-9b73-cf1161b96df3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b_dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v6_8b_dpo", + "id": "pankajmathur/orca_mini_v6_8b_dpo", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3596 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/0d50ec2d-5dd4-487e-80cb-9533246a9876.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/0d50ec2d-5dd4-487e-80cb-9533246a9876.json new file mode 100644 index 000000000..475ec2f73 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/0d50ec2d-5dd4-487e-80cb-9533246a9876.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v7_72b", + "id": "pankajmathur/orca_mini_v7_72b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.593 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6842 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json deleted file mode 100644 index 47a554eee..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_72b/1762652580.437353", - "retrieved_timestamp": "1762652580.437354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v7_72b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v7_72b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5929622291076566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6842301988001044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5621675531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f6e6827d-fbf8-49cd-bdad-e8c7ea87550a.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f6e6827d-fbf8-49cd-bdad-e8c7ea87550a.json new file mode 100644 index 000000000..7c8b93f98 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f6e6827d-fbf8-49cd-bdad-e8c7ea87550a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v7_7b", + "id": "pankajmathur/orca_mini_v7_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json deleted file mode 100644 index 53a0e149c..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_7b/1762652580.437545", - "retrieved_timestamp": "1762652580.437546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v7_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v7_7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387646998851935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274909601771501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43597916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4167220744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json deleted file mode 100644 index f99db7923..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v8_1_70b/1762652580.4377441", - "retrieved_timestamp": "1762652580.4377449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v8_1_70b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v8_1_70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8571434903832941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6781305630707934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43288590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44370833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49833776595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/c5e48fd8-0eea-46a9-8790-1745923561d3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/c5e48fd8-0eea-46a9-8790-1745923561d3.json new file mode 100644 index 000000000..2d067df48 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/c5e48fd8-0eea-46a9-8790-1745923561d3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v8_1_70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v8_1_70b", + "id": "pankajmathur/orca_mini_v8_1_70b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8571 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6781 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4329 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4437 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/870c7739-8886-47df-8e20-09bfae03b9c5.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/870c7739-8886-47df-8e20-09bfae03b9c5.json new file mode 100644 index 000000000..1af69e1fb --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/870c7739-8886-47df-8e20-09bfae03b9c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_0_3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_0_3B-Instruct", + "id": "pankajmathur/orca_mini_v9_0_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5754 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4413 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3659 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2603 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json deleted file mode 100644 index 8a2391905..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_0_3B-Instruct/1762652580.437941", - "retrieved_timestamp": "1762652580.437942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_0_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_0_3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5753766672429155 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412946064233128 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36590625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2603058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json deleted file mode 100644 index 1c2b08550..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_1_1B-Instruct/1762652580.438177", - "retrieved_timestamp": "1762652580.438178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_1_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_1_1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3629270336041702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3205118362595434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3380625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13738364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/d8eb5fd1-f1d4-481d-85af-88a11d7b6f6f.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/d8eb5fd1-f1d4-481d-85af-88a11d7b6f6f.json new file mode 100644 index 000000000..fd6a47751 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/d8eb5fd1-f1d4-481d-85af-88a11d7b6f6f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_1_1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_1_1B-Instruct", + "id": "pankajmathur/orca_mini_v9_1_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3629 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/6625b2e0-1f65-4dc5-9913-ceb0e82e6439.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/6625b2e0-1f65-4dc5-9913-ceb0e82e6439.json new file mode 100644 index 000000000..a42075775 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/6625b2e0-1f65-4dc5-9913-ceb0e82e6439.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_2_14B", + "id": "pankajmathur/orca_mini_v9_2_14B", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7781 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6856 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4703 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5255 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json deleted file mode 100644 index ecbf43cd9..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_14B/1762652580.438377", - "retrieved_timestamp": "1762652580.438378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_2_14B", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_2_14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7780588837617521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6856329737542378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47030208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255152925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/24e7df20-e046-48f7-909e-502d0c70216a.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/24e7df20-e046-48f7-909e-502d0c70216a.json new file mode 100644 index 000000000..7a7d9e13a --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/24e7df20-e046-48f7-909e-502d0c70216a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_2_70b", + "id": "pankajmathur/orca_mini_v9_2_70b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6745 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2938 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.471 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4821 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json deleted file mode 100644 index 22553e8d4..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_70b/1762652580.438577", - "retrieved_timestamp": "1762652580.438578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_2_70b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_2_70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8382591523823455 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744868732778627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47098958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48213098404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/7920f562-9e7f-4a64-85f4-584b13af44de.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/7920f562-9e7f-4a64-85f4-584b13af44de.json new file mode 100644 index 000000000..2d1a1c562 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/7920f562-9e7f-4a64-85f4-584b13af44de.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_4_70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_4_70B", + "id": "pankajmathur/orca_mini_v9_4_70B", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8015 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json deleted file mode 100644 index 5d036382f..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_4_70B/1762652580.438774", - "retrieved_timestamp": "1762652580.438774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_4_70B", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_4_70B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8014645584826039 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6418899297276105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4647291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45362367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json deleted file mode 100644 index c1bed4a49..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct/1762652580.438983", - "retrieved_timestamp": "1762652580.438984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_5_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_5_1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46379384477630464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3337001077145985 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13696808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/c6620817-69fe-40e2-bb0a-1e9c739ab65d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/c6620817-69fe-40e2-bb0a-1e9c739ab65d.json new file mode 100644 index 000000000..e8976fd6e --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/c6620817-69fe-40e2-bb0a-1e9c739ab65d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_5_1B-Instruct", + "id": "pankajmathur/orca_mini_v9_5_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4638 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3337 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/520e2d66-4143-493b-8533-64f86c6d676e.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/520e2d66-4143-493b-8533-64f86c6d676e.json new file mode 100644 index 000000000..ede7abcd5 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/520e2d66-4143-493b-8533-64f86c6d676e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_5_1B-Instruct_preview", + "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3936 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3277 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json deleted file mode 100644 index d5c464f76..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/1762652580.439178", - "retrieved_timestamp": "1762652580.439179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935768206137493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32769514793198123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13272938829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json deleted file mode 100644 index 099be93b4..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_3B-Instruct/1762652580.439394", - "retrieved_timestamp": "1762652580.4393952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_5_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_5_3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207066140063919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44963802133275826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1321752265861027 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2869127516778524 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4269895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2882313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/993bdfd2-3a88-4de3-9ed9-9b7b63c0f4f5.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/993bdfd2-3a88-4de3-9ed9-9b7b63c0f4f5.json new file mode 100644 index 000000000..067bf5772 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/993bdfd2-3a88-4de3-9ed9-9b7b63c0f4f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_5_3B-Instruct", + "id": "pankajmathur/orca_mini_v9_5_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4496 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.427 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2882 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json deleted file mode 100644 index 5f8b2a450..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_1B-Instruct/1762652580.439626", - "retrieved_timestamp": "1762652580.439627", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_6_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_6_1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6085741388404988 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3561349568441982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085106382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/4e1be694-cc4d-4943-a8e4-74913cfb2ebe.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/4e1be694-cc4d-4943-a8e4-74913cfb2ebe.json new file mode 100644 index 000000000..6331cdee7 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/4e1be694-cc4d-4943-a8e4-74913cfb2ebe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_6_1B-Instruct", + "id": "pankajmathur/orca_mini_v9_6_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6086 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3561 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json deleted file mode 100644 index 737e5533a..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_3B-Instruct/1762652580.439853", - "retrieved_timestamp": "1762652580.439853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_6_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_6_3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7316475839660989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45683272658133456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28507313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/42c174d1-6211-4438-bb9a-24f3cf386a6d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/42c174d1-6211-4438-bb9a-24f3cf386a6d.json new file mode 100644 index 000000000..4727738e9 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/42c174d1-6211-4438-bb9a-24f3cf386a6d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_6_3B-Instruct", + "id": "pankajmathur/orca_mini_v9_6_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7316 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4568 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1329 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4068 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2851 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/625bf39b-a118-4ec6-82d0-5405cf70ba53.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/625bf39b-a118-4ec6-82d0-5405cf70ba53.json new file mode 100644 index 000000000..ad5c217d2 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/625bf39b-a118-4ec6-82d0-5405cf70ba53.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_7_1B-Instruct", + "id": "pankajmathur/orca_mini_v9_7_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json deleted file mode 100644 index f288188c1..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_1B-Instruct/1762652580.4400692", - "retrieved_timestamp": "1762652580.44007", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_7_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_7_1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5610136659618701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181526961435924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35269791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1344747340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json deleted file mode 100644 index 5c859fbf0..000000000 --- a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_3B-Instruct/1762652580.44028", - "retrieved_timestamp": "1762652580.4402812", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_7_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_7_3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5618381450107935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3297133908231881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746675531914893 - } - } - ] -} diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/e09cb198-d259-42ea-a356-6efe61b1e12b.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/e09cb198-d259-42ea-a356-6efe61b1e12b.json new file mode 100644 index 000000000..57b5c8eb9 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/e09cb198-d259-42ea-a356-6efe61b1e12b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orca_mini_v9_7_3B-Instruct", + "id": "pankajmathur/orca_mini_v9_7_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5618 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1375 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/5838b130-c2e6-400c-80b7-6822efb5db2c.json b/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/5838b130-c2e6-400c-80b7-6822efb5db2c.json new file mode 100644 index 000000000..4666ebf13 --- /dev/null +++ b/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/5838b130-c2e6-400c-80b7-6822efb5db2c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/paulml_ECE-ILAB-Q1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-ILAB-Q1", + "id": "paulml/ECE-ILAB-Q1", + "developer": "paulml", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7865 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6718 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4614 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json b/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json deleted file mode 100644 index 440f2b303..000000000 --- a/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/paulml_ECE-ILAB-Q1/1762652580.440484", - "retrieved_timestamp": "1762652580.440484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "paulml/ECE-ILAB-Q1", - "developer": "paulml", - "inference_platform": "unknown", - "id": "paulml/ECE-ILAB-Q1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7864521691334547 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6717755530661759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3557401812688822 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46137500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.550531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/52b51638-64cd-4b19-8fc7-c223d50bc549.json b/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/52b51638-64cd-4b19-8fc7-c223d50bc549.json new file mode 100644 index 000000000..e89688fb3 --- /dev/null +++ b/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/52b51638-64cd-4b19-8fc7-c223d50bc549.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-16K-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1.5-Pints-16K-v0.1", + "id": "pints-ai/1.5-Pints-16K-v0.1", + "developer": "pints-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.566 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1636 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json b/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json deleted file mode 100644 index c46aa9a03..000000000 --- a/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-16K-v0.1/1762652580.4407208", - "retrieved_timestamp": "1762652580.440722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pints-ai/1.5-Pints-16K-v0.1", - "developer": "pints-ai", - "inference_platform": "unknown", - "id": "pints-ai/1.5-Pints-16K-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.566 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1635914927946737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3133077677150869 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/28b3178b-c963-4267-9649-3f7fc10fba3c.json b/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/28b3178b-c963-4267-9649-3f7fc10fba3c.json new file mode 100644 index 000000000..9f6e80b6f --- /dev/null +++ b/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/28b3178b-c963-4267-9649-3f7fc10fba3c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-2K-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1.5-Pints-2K-v0.1", + "id": "pints-ai/1.5-Pints-2K-v0.1", + "developer": "pints-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.566 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1104 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json b/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json deleted file mode 100644 index 6f88cecf2..000000000 --- a/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-2K-v0.1/1762652580.4409652", - "retrieved_timestamp": "1762652580.440966", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "pints-ai/1.5-Pints-2K-v0.1", - "developer": "pints-ai", - "inference_platform": "unknown", - "id": "pints-ai/1.5-Pints-2K-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.566 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615593292463996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29801943389750435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018749999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11037234042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/piotr25691/thea-3b-25r/748298a2-5042-4636-ac7e-051c28916f3a.json b/data/hfopenllm_v2/piotr25691/thea-3b-25r/748298a2-5042-4636-ac7e-051c28916f3a.json new file mode 100644 index 000000000..b4477c2ac --- /dev/null +++ b/data/hfopenllm_v2/piotr25691/thea-3b-25r/748298a2-5042-4636-ac7e-051c28916f3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/piotr25691_thea-3b-25r/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thea-3b-25r", + "id": "piotr25691/thea-3b-25r", + "developer": "piotr25691", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7344 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4484 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1782 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json b/data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json deleted file mode 100644 index d9c4956bd..000000000 --- a/data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/piotr25691_thea-3b-25r/1762652580.44117", - "retrieved_timestamp": "1762652580.441171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "piotr25691/thea-3b-25r", - "developer": "piotr25691", - "inference_platform": "unknown", - "id": "piotr25691/thea-3b-25r", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344202272193336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44844100293649863 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33145833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3182347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/03bcd4e6-1620-424a-9200-c0cf4b73bbd2.json b/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/03bcd4e6-1620-424a-9200-c0cf4b73bbd2.json new file mode 100644 index 000000000..add3fcd04 --- /dev/null +++ b/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/03bcd4e6-1620-424a-9200-c0cf4b73bbd2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/piotr25691_thea-c-3b-25r/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thea-c-3b-25r", + "id": "piotr25691/thea-c-3b-25r", + "developer": "piotr25691", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1526 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3315 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3178 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json b/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json deleted file mode 100644 index 44e4e8732..000000000 --- a/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/piotr25691_thea-c-3b-25r/1762652580.441559", - "retrieved_timestamp": "1762652580.441561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "piotr25691/thea-c-3b-25r", - "developer": "piotr25691", - "inference_platform": "unknown", - "id": "piotr25691/thea-c-3b-25r", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7401904723910335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4532410175874399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178191489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/c7fba530-63cc-4ece-a171-4a2919aa8057.json b/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/c7fba530-63cc-4ece-a171-4a2919aa8057.json new file mode 100644 index 000000000..2c66084d4 --- /dev/null +++ b/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/c7fba530-63cc-4ece-a171-4a2919aa8057.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/piotr25691_thea-rp-3b-25r/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thea-rp-3b-25r", + "id": "piotr25691/thea-rp-3b-25r", + "developer": "piotr25691", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6578 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.439 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1322 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json b/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json deleted file mode 100644 index f30d08b19..000000000 --- a/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/piotr25691_thea-rp-3b-25r/1762652580.441917", - "retrieved_timestamp": "1762652580.441918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "piotr25691/thea-rp-3b-25r", - "developer": "piotr25691", - "inference_platform": "unknown", - "id": "piotr25691/thea-rp-3b-25r", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577835698169745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390291036559586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/postbot/gpt2-medium-emailgen/c25c1046-a8d5-4f4b-9a72-c4591cfb4023.json b/data/hfopenllm_v2/postbot/gpt2-medium-emailgen/c25c1046-a8d5-4f4b-9a72-c4591cfb4023.json new file mode 100644 index 000000000..e0f2e52de --- /dev/null +++ b/data/hfopenllm_v2/postbot/gpt2-medium-emailgen/c25c1046-a8d5-4f4b-9a72-c4591cfb4023.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/postbot_gpt2-medium-emailgen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2-medium-emailgen", + "id": "postbot/gpt2-medium-emailgen", + "developer": "postbot", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.38 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1492 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.313 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3911 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/c3800a5c-310b-41cb-9b07-cfc1f1b13256.json b/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/c3800a5c-310b-41cb-9b07-cfc1f1b13256.json new file mode 100644 index 000000000..6b36039e0 --- /dev/null +++ b/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/c3800a5c-310b-41cb-9b07-cfc1f1b13256.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prince-canuma_Ministral-8B-Instruct-2410-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ministral-8B-Instruct-2410-HF", + "id": "prince-canuma/Ministral-8B-Instruct-2410-HF", + "developer": "prince-canuma", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.02 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5912 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1918 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3298 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json b/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json deleted file mode 100644 index 04e34f63a..000000000 --- a/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prince-canuma_Ministral-8B-Instruct-2410-HF/1762652580.442474", - "retrieved_timestamp": "1762652580.442475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prince-canuma/Ministral-8B-Instruct-2410-HF", - "developer": "prince-canuma", - "inference_platform": "unknown", - "id": "prince-canuma/Ministral-8B-Instruct-2410-HF", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.02 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911636679565775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4585611339334732 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32978723404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Base/e8e2b99f-cf83-4776-9117-aa2b5d9c8068.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Base/e8e2b99f-cf83-4776-9117-aa2b5d9c8068.json new file mode 100644 index 000000000..f4645a2cd --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Base/e8e2b99f-cf83-4776-9117-aa2b5d9c8068.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-ProLong-512k-Base", + "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5322 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5033 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4223 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/2da19e45-117f-446b-b956-b35a20bb7411.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/2da19e45-117f-446b-b956-b35a20bb7411.json new file mode 100644 index 000000000..f3f5626a4 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/2da19e45-117f-446b-b956-b35a20bb7411.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-ProLong-512k-Instruct", + "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5028 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4266 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3231 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json deleted file mode 100644 index 12d1fc2a1..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1762652580.4434712", - "retrieved_timestamp": "1762652580.443472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3977734632996006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49830327201612584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3246343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/9e982a33-19cb-4381-8560-884bc8946a2b.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/9e982a33-19cb-4381-8560-884bc8946a2b.json new file mode 100644 index 000000000..62d796011 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/9e982a33-19cb-4381-8560-884bc8946a2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-ProLong-512k-Instruct", + "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3978 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3246 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json deleted file mode 100644 index 4db4b89d0..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1762652580.4431858", - "retrieved_timestamp": "1762652580.443187", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5508218194390884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5028310716285619 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42664583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32313829787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Base/9130a862-cfd7-47ce-a92a-f60438739491.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Base/9130a862-cfd7-47ce-a92a-f60438739491.json new file mode 100644 index 000000000..41ea67f01 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Base/9130a862-cfd7-47ce-a92a-f60438739491.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-ProLong-64k-Base", + "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5201 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4927 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3348 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/858d3717-fcb2-45d9-8eaa-1b00ae0ca918.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/858d3717-fcb2-45d9-8eaa-1b00ae0ca918.json new file mode 100644 index 000000000..b83c173d0 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/858d3717-fcb2-45d9-8eaa-1b00ae0ca918.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-ProLong-64k-Instruct", + "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5563 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5083 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3275 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json deleted file mode 100644 index 44ded15e0..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/1762652580.443907", - "retrieved_timestamp": "1762652580.4439082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5563172382611471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083040804243396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43969791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32746010638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json deleted file mode 100644 index 4434f55d2..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-CPO/1762652580.444415", - "retrieved_timestamp": "1762652580.444416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37034623687371726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4594875922440002 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976230053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/5f1f137b-cb2f-4ee6-8bc9-5e0b94939f35.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/5f1f137b-cb2f-4ee6-8bc9-5e0b94939f35.json new file mode 100644 index 000000000..25691f45b --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/5f1f137b-cb2f-4ee6-8bc9-5e0b94939f35.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-CPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-CPO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3703 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3609 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2976 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/6feca911-7a6e-43a2-b59d-7cb48070fe8e.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/6feca911-7a6e-43a2-b59d-7cb48070fe8e.json new file mode 100644 index 000000000..84c86e76e --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/6feca911-7a6e-43a2-b59d-7cb48070fe8e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-DPO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4666 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3078 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json deleted file mode 100644 index bf5d10be3..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-DPO/1762652580.444683", - "retrieved_timestamp": "1762652580.444684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111251479407973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46658506064913546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3078457446808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json deleted file mode 100644 index 0271cb722..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-IPO/1762652580.444937", - "retrieved_timestamp": "1762652580.444937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486562321307464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690068582318399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3919479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115026595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/d3ad9813-273e-47de-be16-312cc67ac64f.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/d3ad9813-273e-47de-be16-312cc67ac64f.json new file mode 100644 index 000000000..95f0c784c --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/d3ad9813-273e-47de-be16-312cc67ac64f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-IPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-IPO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.469 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3919 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/317205ee-2cc6-4523-9662-be6508314b08.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/317205ee-2cc6-4523-9662-be6508314b08.json new file mode 100644 index 000000000..1afb80262 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/317205ee-2cc6-4523-9662-be6508314b08.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-KTO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4523 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4693 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json deleted file mode 100644 index c4f72f912..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-KTO/1762652580.4452229", - "retrieved_timestamp": "1762652580.445225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4522533544329047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4692852292721417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3054355053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/3b5fe65a-50a1-4036-b81a-86117356cab9.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/3b5fe65a-50a1-4036-b81a-86117356cab9.json new file mode 100644 index 000000000..4c1233c1f --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/3b5fe65a-50a1-4036-b81a-86117356cab9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-ORPO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4734 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3707 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3083 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json deleted file mode 100644 index a70c948a8..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/1762652580.445469", - "retrieved_timestamp": "1762652580.4454699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45165383404921167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47340573024653915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3706770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30826130319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/812ac262-97f4-485e-93de-f8d420b8658e.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/812ac262-97f4-485e-93de-f8d420b8658e.json new file mode 100644 index 000000000..21691d320 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/812ac262-97f4-485e-93de-f8d420b8658e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-RDPO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.448 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4662 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4027 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3014 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json deleted file mode 100644 index 5f1550171..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/1762652580.445683", - "retrieved_timestamp": "1762652580.445684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4480068440626427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46620140448752295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30144614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/39cd7eb0-781e-47b6-8eaa-c72e702f778f.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/39cd7eb0-781e-47b6-8eaa-c72e702f778f.json new file mode 100644 index 000000000..564369fd0 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/39cd7eb0-781e-47b6-8eaa-c72e702f778f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-RRHF", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3357 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3722 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2889 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json deleted file mode 100644 index 3028e534e..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/1762652580.445896", - "retrieved_timestamp": "1762652580.445896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3357247658435174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4520360167602379 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37222916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2888962765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json deleted file mode 100644 index 1be27c9da..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/1762652580.4460979", - "retrieved_timestamp": "1762652580.446099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4890479483326463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4704075127777334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30634973404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/9411a8a4-306e-43da-96d7-c93eb3aac398.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/9411a8a4-306e-43da-96d7-c93eb3aac398.json new file mode 100644 index 000000000..3220421bd --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/9411a8a4-306e-43da-96d7-c93eb3aac398.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-SLiC-HF", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.489 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4704 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4091 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3063 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json deleted file mode 100644 index b90bf9c96..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/1762652580.446312", - "retrieved_timestamp": "1762652580.446312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685401401614383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47412507033960827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41268750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31050531914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/c93feb32-0526-44ac-b3ed-95f08c37cc9f.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/c93feb32-0526-44ac-b3ed-95f08c37cc9f.json new file mode 100644 index 000000000..5ed4540df --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/c93feb32-0526-44ac-b3ed-95f08c37cc9f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT-SimPO", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4685 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4741 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4127 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3105 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/1a3b0f7a-afb6-4002-9321-23a86f000c5c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/1a3b0f7a-afb6-4002-9321-23a86f000c5c.json new file mode 100644 index 000000000..d4c8acc9b --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/1a3b0f7a-afb6-4002-9321-23a86f000c5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Base-8B-SFT", + "id": "princeton-nlp/Llama-3-Base-8B-SFT", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4643 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json deleted file mode 100644 index acd418503..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT/1762652580.444184", - "retrieved_timestamp": "1762652580.444185", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27959591661236627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.464303802632615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093417553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json deleted file mode 100644 index 79e48c880..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/1762652580.44678", - "retrieved_timestamp": "1762652580.446781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7505817896514582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026669871217129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37059507978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/8d29363d-3096-4c54-a40e-acf4a7318a04.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/8d29363d-3096-4c54-a40e-acf4a7318a04.json new file mode 100644 index 000000000..6799069cb --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/8d29363d-3096-4c54-a40e-acf4a7318a04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-CPO-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7506 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5027 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3706 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/8cea452d-63b8-4e82-9511-64c94f8e140d.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/8cea452d-63b8-4e82-9511-64c94f8e140d.json new file mode 100644 index 000000000..acd408cf8 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/8cea452d-63b8-4e82-9511-64c94f8e140d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-CPO", + "id": "princeton-nlp/Llama-3-Instruct-8B-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7293 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4999 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3652 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json deleted file mode 100644 index 6f438cc31..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO/1762652580.4465249", - "retrieved_timestamp": "1762652580.446526", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-CPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7292993701157373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4998793158888361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36519281914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/5e5b5424-1d48-4a5e-8775-52c75609c338.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/5e5b5424-1d48-4a5e-8775-52c75609c338.json new file mode 100644 index 000000000..68024676a --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/5e5b5424-1d48-4a5e-8775-52c75609c338.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-DPO-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5056 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0899 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3844 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3769 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json deleted file mode 100644 index 8b9c2797f..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/1762652580.447217", - "retrieved_timestamp": "1762652580.447217", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7208063493752133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.505620320855615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37691156914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/73787033-ed1d-4d2e-b7b2-e886ef6f1036.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/73787033-ed1d-4d2e-b7b2-e886ef6f1036.json new file mode 100644 index 000000000..9471420c1 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/73787033-ed1d-4d2e-b7b2-e886ef6f1036.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-DPO", + "id": "princeton-nlp/Llama-3-Instruct-8B-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3665 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json deleted file mode 100644 index db88b8387..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO/1762652580.447003", - "retrieved_timestamp": "1762652580.447003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6757436934001781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991303079139502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36652260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/54c9403f-2525-45c0-a585-9ff598f95f6b.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/54c9403f-2525-45c0-a585-9ff598f95f6b.json new file mode 100644 index 000000000..8762059d7 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/54c9403f-2525-45c0-a585-9ff598f95f6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-KTO-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3777 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3668 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json deleted file mode 100644 index 24d0f6d37..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/1762652580.447652", - "retrieved_timestamp": "1762652580.447653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7290245437660962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5079766897761946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3667719414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/77d0d88d-7ca8-4f3e-8b79-295f53140635.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/77d0d88d-7ca8-4f3e-8b79-295f53140635.json new file mode 100644 index 000000000..0d6769c0d --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/77d0d88d-7ca8-4f3e-8b79-295f53140635.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-KTO", + "id": "princeton-nlp/Llama-3-Instruct-8B-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6864 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4982 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3599 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json deleted file mode 100644 index d8b59693a..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO/1762652580.4474308", - "retrieved_timestamp": "1762652580.447432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864098370102439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4981903187457697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36984374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json deleted file mode 100644 index 2793ce30b..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/1762652580.448072", - "retrieved_timestamp": "1762652580.448073", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7633213207622442 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507835231782556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37308843085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/727f27e3-2a3f-4572-8db5-87e498c4b6ca.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/727f27e3-2a3f-4572-8db5-87e498c4b6ca.json new file mode 100644 index 000000000..26a0731e0 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/727f27e3-2a3f-4572-8db5-87e498c4b6ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-ORPO-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7633 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5078 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3731 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json deleted file mode 100644 index 01fcf9141..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO/1762652580.447865", - "retrieved_timestamp": "1762652580.4478662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712813113649561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5001206199104097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36461103723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/b6e0cc97-27cf-4082-a908-95d5c39014b8.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/b6e0cc97-27cf-4082-a908-95d5c39014b8.json new file mode 100644 index 000000000..64f178681 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/b6e0cc97-27cf-4082-a908-95d5c39014b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-ORPO", + "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7128 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5001 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3646 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json deleted file mode 100644 index 76c41d85c..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1762652580.448503", - "retrieved_timestamp": "1762652580.448504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7076922565459647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5049218189829557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37741023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/3b77ec51-fd47-4bc7-9e96-ed46202fef7c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/3b77ec51-fd47-4bc7-9e96-ed46202fef7c.json new file mode 100644 index 000000000..bac88e0ae --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/3b77ec51-fd47-4bc7-9e96-ed46202fef7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-RDPO-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7077 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5049 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3774 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json deleted file mode 100644 index ad6f694d4..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO/1762652580.448289", - "retrieved_timestamp": "1762652580.44829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6660017642078574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033626077797596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36070478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/b24cdd3f-3e44-4ebe-b2b4-209ee0bbfbd3.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/b24cdd3f-3e44-4ebe-b2b4-209ee0bbfbd3.json new file mode 100644 index 000000000..03be08868 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/b24cdd3f-3e44-4ebe-b2b4-209ee0bbfbd3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-RDPO", + "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.666 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3752 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json deleted file mode 100644 index 51c24e15d..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/1762652580.4489532", - "retrieved_timestamp": "1762652580.448954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712488419615509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49838952572927536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/e47a3cab-dfef-47f6-9377-9ee32489bab6.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/e47a3cab-dfef-47f6-9377-9ee32489bab6.json new file mode 100644 index 000000000..d4d35dc63 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/e47a3cab-dfef-47f6-9377-9ee32489bab6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-RRHF-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7125 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/1e4481fe-458b-4c23-8a6c-55439fb8b4fd.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/1e4481fe-458b-4c23-8a6c-55439fb8b4fd.json new file mode 100644 index 000000000..e44593e69 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/1e4481fe-458b-4c23-8a6c-55439fb8b4fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-RRHF", + "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4911 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3476 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3644 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json deleted file mode 100644 index c2c95dd30..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF/1762652580.4487302", - "retrieved_timestamp": "1762652580.448731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49105468765647214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3475520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36436170212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json deleted file mode 100644 index 6418b7ac8..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/1762652580.4493709", - "retrieved_timestamp": "1762652580.4493718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7109646848140712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49838952572927536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/6421e9dc-e7ca-4e1c-9f4f-1d1ac409c4d1.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/6421e9dc-e7ca-4e1c-9f4f-1d1ac409c4d1.json new file mode 100644 index 000000000..7c04e216a --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/6421e9dc-e7ca-4e1c-9f4f-1d1ac409c4d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SLiC-HF-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.711 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/55f43b53-6ed9-4c16-bf75-c968999a6f36.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/55f43b53-6ed9-4c16-bf75-c968999a6f36.json new file mode 100644 index 000000000..abb7ea88f --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/55f43b53-6ed9-4c16-bf75-c968999a6f36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SLiC-HF", + "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.74 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5029 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3585 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json deleted file mode 100644 index 716a4f240..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/1762652580.449163", - "retrieved_timestamp": "1762652580.449164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399655137258031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029422936734547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35846077127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json deleted file mode 100644 index ec7b25170..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/1762652580.44994", - "retrieved_timestamp": "1762652580.449941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6808645505037745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.503833834044343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36220079787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/6ce93e70-04b1-46b8-b3e3-7eb0df35e1c1.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/6ce93e70-04b1-46b8-b3e3-7eb0df35e1c1.json new file mode 100644 index 000000000..ffe9fbf5e --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/6ce93e70-04b1-46b8-b3e3-7eb0df35e1c1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SimPO-v0.2", + "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5038 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/95096a89-2baf-4b14-bc6e-1f30e920c086.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/95096a89-2baf-4b14-bc6e-1f30e920c086.json new file mode 100644 index 000000000..c46b79969 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/95096a89-2baf-4b14-bc6e-1f30e920c086.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Instruct-8B-SimPO", + "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6504 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4845 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3948 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json deleted file mode 100644 index 58ed138d1..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO/1762652580.449708", - "retrieved_timestamp": "1762652580.449709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6503898544750152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48446848524905367 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39483333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3489029255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json deleted file mode 100644 index 6a4310b7e..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-CPO/1762652580.45017", - "retrieved_timestamp": "1762652580.450171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-CPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46549267055856236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43821512506663574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/f1651632-2787-47cf-b471-89d1b89a6b01.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/f1651632-2787-47cf-b471-89d1b89a6b01.json new file mode 100644 index 000000000..625ad8634 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/f1651632-2787-47cf-b471-89d1b89a6b01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-CPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-CPO", + "id": "princeton-nlp/Mistral-7B-Base-SFT-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4655 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json deleted file mode 100644 index c715a451c..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-DPO/1762652580.450392", - "retrieved_timestamp": "1762652580.4503932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44033830237104216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43501123979612694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26454454787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/e1fb2ac9-8f60-4dc1-9e0d-99fcb91a53a9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/e1fb2ac9-8f60-4dc1-9e0d-99fcb91a53a9.json new file mode 100644 index 000000000..a960ca923 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/e1fb2ac9-8f60-4dc1-9e0d-99fcb91a53a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-DPO", + "id": "princeton-nlp/Mistral-7B-Base-SFT-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.435 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4122 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2645 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json deleted file mode 100644 index d122a81ef..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-IPO/1762652580.4506009", - "retrieved_timestamp": "1762652580.450602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-IPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-IPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48295300912689443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4458024605899282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37762500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2791722074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/d3accbc1-d698-4357-ab08-0b98fb49b4ed.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/d3accbc1-d698-4357-ab08-0b98fb49b4ed.json new file mode 100644 index 000000000..908f14ac8 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/d3accbc1-d698-4357-ab08-0b98fb49b4ed.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-IPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-IPO", + "id": "princeton-nlp/Mistral-7B-Base-SFT-IPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.483 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4458 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2792 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/5388a25a-5780-4ae1-999f-172b558a7b52.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/5388a25a-5780-4ae1-999f-172b558a7b52.json new file mode 100644 index 000000000..31e5cf5b4 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/5388a25a-5780-4ae1-999f-172b558a7b52.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-KTO", + "id": "princeton-nlp/Mistral-7B-Base-SFT-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json deleted file mode 100644 index c6f1fb012..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-KTO/1762652580.450817", - "retrieved_timestamp": "1762652580.450818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.478481540091402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44764334464528677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43678124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28715093085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json deleted file mode 100644 index 3257bfec3..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RDPO/1762652580.451031", - "retrieved_timestamp": "1762652580.4510322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46064663980460735 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44395328626924213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27767619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/9e4143ff-d461-4fdb-8bc7-86f959f69e68.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/9e4143ff-d461-4fdb-8bc7-86f959f69e68.json new file mode 100644 index 000000000..56f6acce0 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/9e4143ff-d461-4fdb-8bc7-86f959f69e68.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RDPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-RDPO", + "id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.444 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3579 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/5d843bd7-b34b-41d4-92ff-c25a709b4930.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/5d843bd7-b34b-41d4-92ff-c25a709b4930.json new file mode 100644 index 000000000..8699b54e7 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/5d843bd7-b34b-41d4-92ff-c25a709b4930.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RRHF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-RRHF", + "id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4281 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2398 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json deleted file mode 100644 index 41967ec14..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RRHF/1762652580.451245", - "retrieved_timestamp": "1762652580.451246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44066299640509404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42805937403716016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23977726063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json deleted file mode 100644 index fc6baebea..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/1762652580.451465", - "retrieved_timestamp": "1762652580.451466", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127284494031392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44223991890402176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42608333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2780917553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/87975b2f-298b-4297-8f4d-e5bb1bf5d113.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/87975b2f-298b-4297-8f4d-e5bb1bf5d113.json new file mode 100644 index 000000000..09f698c9e --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/87975b2f-298b-4297-8f4d-e5bb1bf5d113.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-SLiC-HF", + "id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5127 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4261 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2781 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/41bb8174-f3d6-4862-b892-dbc9f6e2e696.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/41bb8174-f3d6-4862-b892-dbc9f6e2e696.json new file mode 100644 index 000000000..5f30fecdb --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/41bb8174-f3d6-4862-b892-dbc9f6e2e696.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Base-SFT-SimPO", + "id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4701 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3971 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json deleted file mode 100644 index e2b0b5601..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SimPO/1762652580.4516768", - "retrieved_timestamp": "1762652580.451678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47006387496287627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398050727924064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27019614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json deleted file mode 100644 index 51c1ec8ca..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-CPO/1762652580.4518862", - "retrieved_timestamp": "1762652580.4518871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-CPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203047912871182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406922267565148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701130319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/683ad2cd-5e39-4088-b98b-94d89dda7b88.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/683ad2cd-5e39-4088-b98b-94d89dda7b88.json new file mode 100644 index 000000000..256520f89 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/683ad2cd-5e39-4088-b98b-94d89dda7b88.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-CPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-CPO", + "id": "princeton-nlp/Mistral-7B-Instruct-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/08ffd7ab-ccca-4258-be6d-cbc151cc43aa.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/08ffd7ab-ccca-4258-be6d-cbc151cc43aa.json new file mode 100644 index 000000000..c489fb059 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/08ffd7ab-ccca-4258-be6d-cbc151cc43aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-DPO", + "id": "princeton-nlp/Mistral-7B-Instruct-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5176 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3833 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json deleted file mode 100644 index 25be3b94e..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-DPO/1762652580.4521", - "retrieved_timestamp": "1762652580.4521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.517624347841505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4060358459697702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2748503989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/4b6efad4-c697-4f0a-8d24-75dc49d8ec06.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/4b6efad4-c697-4f0a-8d24-75dc49d8ec06.json new file mode 100644 index 000000000..2ed1a6808 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/4b6efad4-c697-4f0a-8d24-75dc49d8ec06.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-IPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-IPO", + "id": "princeton-nlp/Mistral-7B-Instruct-IPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4929 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4322 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4324 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2708 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json deleted file mode 100644 index d6e87ad4a..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-IPO/1762652580.45231", - "retrieved_timestamp": "1762652580.45231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-IPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-IPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4929198969844457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322183023180588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43241666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2707779255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/4986c30a-85b0-4263-9be4-d69c9b067e0c.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/4986c30a-85b0-4263-9be4-d69c9b067e0c.json new file mode 100644 index 000000000..9d84a30d3 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/4986c30a-85b0-4263-9be4-d69c9b067e0c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-KTO", + "id": "princeton-nlp/Mistral-7B-Instruct-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4908 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json deleted file mode 100644 index 6273514f6..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-KTO/1762652580.452526", - "retrieved_timestamp": "1762652580.452527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-KTO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907966417993147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139586477181159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3952708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28125 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json deleted file mode 100644 index 4b64a76c5..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-ORPO/1762652580.452744", - "retrieved_timestamp": "1762652580.452745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-ORPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-ORPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4719621714827768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41040615756566107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2662067819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/47b5a878-1a4a-425f-ae6f-ac286f681cca.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/47b5a878-1a4a-425f-ae6f-ac286f681cca.json new file mode 100644 index 000000000..ef91d0d7f --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/47b5a878-1a4a-425f-ae6f-ac286f681cca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-ORPO", + "id": "princeton-nlp/Mistral-7B-Instruct-ORPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.472 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2662 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/992a6862-46b9-415e-858f-2eff8709ca81.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/992a6862-46b9-415e-858f-2eff8709ca81.json new file mode 100644 index 000000000..99e611d5b --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/992a6862-46b9-415e-858f-2eff8709ca81.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RDPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-RDPO", + "id": "princeton-nlp/Mistral-7B-Instruct-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4887 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json deleted file mode 100644 index 127930a11..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RDPO/1762652580.452956", - "retrieved_timestamp": "1762652580.452957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-RDPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4887232542985944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40501479745073615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27767619680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json deleted file mode 100644 index 176473968..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RRHF/1762652580.4531672", - "retrieved_timestamp": "1762652580.4531682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-RRHF", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49601723427173233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41897663476657404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/c6391381-c973-4068-b72c-af08762d9e5c.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/c6391381-c973-4068-b72c-af08762d9e5c.json new file mode 100644 index 000000000..d0f2bc97e --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/c6391381-c973-4068-b72c-af08762d9e5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RRHF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-RRHF", + "id": "princeton-nlp/Mistral-7B-Instruct-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/0f6e18e6-1b0f-43f4-a9af-6632f6ce63cc.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/0f6e18e6-1b0f-43f4-a9af-6632f6ce63cc.json new file mode 100644 index 000000000..43e0e4f1d --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/0f6e18e6-1b0f-43f4-a9af-6632f6ce63cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-SLiC-HF", + "id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.404 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3913 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2715 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json deleted file mode 100644 index f72b82a7d..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/1762652580.453388", - "retrieved_timestamp": "1762652580.4533892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5115294086357531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040013641288438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39130208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27152593085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/56d9ee92-6774-4c9b-9861-c5f0a9945e7c.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/56d9ee92-6774-4c9b-9861-c5f0a9945e7c.json new file mode 100644 index 000000000..b5dab8271 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/56d9ee92-6774-4c9b-9861-c5f0a9945e7c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-SimPO", + "id": "princeton-nlp/Mistral-7B-Instruct-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4687 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2797 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json deleted file mode 100644 index c8bb5293a..000000000 --- a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SimPO/1762652580.45361", - "retrieved_timestamp": "1762652580.45361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-SimPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686897432146704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4507226157033399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2796708776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-1.3B/d3e753cc-37fc-4d77-8b2d-da90a7843d60.json b/data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-1.3B/d3e753cc-37fc-4d77-8b2d-da90a7843d60.json new file mode 100644 index 000000000..e3ac0d5ad --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-1.3B/d3e753cc-37fc-4d77-8b2d-da90a7843d60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-1.3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sheared-LLaMA-1.3B", + "id": "princeton-nlp/Sheared-LLaMA-1.3B", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1171 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-2.7B/eb08ef6f-6631-47c4-8f52-bf9454ad34b6.json b/data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-2.7B/eb08ef6f-6631-47c4-8f52-bf9454ad34b6.json new file mode 100644 index 000000000..8203b5fb5 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Sheared-LLaMA-2.7B/eb08ef6f-6631-47c4-8f52-bf9454ad34b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-2.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sheared-LLaMA-2.7B", + "id": "princeton-nlp/Sheared-LLaMA-2.7B", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.7 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2417 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3259 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3567 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1187 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/2207b154-c5d4-4e5a-ade0-271e62d6345f.json b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/2207b154-c5d4-4e5a-ade0-271e62d6345f.json new file mode 100644 index 000000000..fd91acee8 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/2207b154-c5d4-4e5a-ade0-271e62d6345f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-DPO", + "id": "princeton-nlp/gemma-2-9b-it-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2769 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json deleted file mode 100644 index 4497cfc39..000000000 --- a/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-DPO/1762652580.454305", - "retrieved_timestamp": "1762652580.4543061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "princeton-nlp/gemma-2-9b-it-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/gemma-2-9b-it-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27687203287277756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5941444682956648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-SimPO/f4161154-7777-4261-9275-a3002a1305d8.json b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-SimPO/f4161154-7777-4261-9275-a3002a1305d8.json new file mode 100644 index 000000000..0d667a77a --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-SimPO/f4161154-7777-4261-9275-a3002a1305d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-SimPO", + "id": "princeton-nlp/gemma-2-9b-it-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3207 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5839 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3975 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json deleted file mode 100644 index 24dedc2e6..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-1.5B-xElite/1762652580.4551811", - "retrieved_timestamp": "1762652580.455182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Bellatrix-1.5B-xElite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Bellatrix-1.5B-xElite", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1964144026737944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35011984799236834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1657247340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/8523812d-1db6-4a9d-b06b-ac904191789d.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/8523812d-1db6-4a9d-b06b-ac904191789d.json new file mode 100644 index 000000000..a73f99ed2 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/8523812d-1db6-4a9d-b06b-ac904191789d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-1.5B-xElite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bellatrix-1.5B-xElite", + "id": "prithivMLmods/Bellatrix-1.5B-xElite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1964 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3619 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1657 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json deleted file mode 100644 index 7d40211df..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1.5B-R1/1762652580.455581", - "retrieved_timestamp": "1762652580.455582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33522498082864577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40221745714531076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3682916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27509973404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/6cd9ea81-618d-444e-a892-d4f9819daa67.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/6cd9ea81-618d-444e-a892-d4f9819daa67.json new file mode 100644 index 000000000..ed7c9d8c6 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/6cd9ea81-618d-444e-a892-d4f9819daa67.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1.5B-R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bellatrix-Tiny-1.5B-R1", + "id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4022 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2751 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/2217326d-377a-4503-8180-206c12c87436.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/2217326d-377a-4503-8180-206c12c87436.json new file mode 100644 index 000000000..e1aaf8729 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/2217326d-377a-4503-8180-206c12c87436.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bellatrix-Tiny-1B-v2", + "id": "prithivMLmods/Bellatrix-Tiny-1B-v2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.151 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3268 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1493 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json deleted file mode 100644 index 20b0df048..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1B-v2/1762652580.4558249", - "retrieved_timestamp": "1762652580.4558249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Bellatrix-Tiny-1B-v2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Bellatrix-Tiny-1B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15095169705270903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3267684418723903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14926861702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/3bbb10fc-e3b9-4c6a-ac35-ee5de9ecd330.json b/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/3bbb10fc-e3b9-4c6a-ac35-ee5de9ecd330.json new file mode 100644 index 000000000..b456222e3 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/3bbb10fc-e3b9-4c6a-ac35-ee5de9ecd330.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Blaze-14B-xElite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Blaze-14B-xElite", + "id": "prithivMLmods/Blaze-14B-xElite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6628 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json b/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json deleted file mode 100644 index 24a3fe08f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Blaze-14B-xElite/1762652580.456049", - "retrieved_timestamp": "1762652580.45605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Blaze-14B-xElite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Blaze-14B-xElite", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03632029681245762 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6627817236091689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46248958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111369680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/01124f11-b739-422b-97f7-062074b8d0fb.json b/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/01124f11-b739-422b-97f7-062074b8d0fb.json new file mode 100644 index 000000000..7b3bedda6 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/01124f11-b739-422b-97f7-062074b8d0fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_COCO-7B-Instruct-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "COCO-7B-Instruct-1M", + "id": "prithivMLmods/COCO-7B-Instruct-1M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4743 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.541 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json b/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json deleted file mode 100644 index f68e49749..000000000 --- a/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_COCO-7B-Instruct-1M/1762652580.456335", - "retrieved_timestamp": "1762652580.456337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/COCO-7B-Instruct-1M", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/COCO-7B-Instruct-1M", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4743103853331383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5409956853800891 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41863364361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json deleted file mode 100644 index 64f65540f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-1M/1762652580.457102", - "retrieved_timestamp": "1762652580.457103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite-1M", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite-1M", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5612884923115112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6329399079569701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46760416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5152094414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/7cc4c93b-7c43-4bed-84a3-fa1cd9130abb.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/7cc4c93b-7c43-4bed-84a3-fa1cd9130abb.json new file mode 100644 index 000000000..f174ff65b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/7cc4c93b-7c43-4bed-84a3-fa1cd9130abb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite-1M", + "id": "prithivMLmods/Calcium-Opus-14B-Elite-1M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5613 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6329 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4676 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5152 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json deleted file mode 100644 index b9c397afe..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-Stock/1762652580.457346", - "retrieved_timestamp": "1762652580.4573472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.614294516327788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6328767168557433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5284242021276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/bf3aa551-f9c6-4203-b2d4-55cf9e6e2872.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/bf3aa551-f9c6-4203-b2d4-55cf9e6e2872.json new file mode 100644 index 000000000..5f39e061c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/bf3aa551-f9c6-4203-b2d4-55cf9e6e2872.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite-Stock", + "id": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6143 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6329 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4668 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4808 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5284 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/2eae8905-5338-4a78-86e7-d354d06efa23.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/2eae8905-5338-4a78-86e7-d354d06efa23.json new file mode 100644 index 000000000..ead3156a2 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/2eae8905-5338-4a78-86e7-d354d06efa23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite", + "id": "prithivMLmods/Calcium-Opus-14B-Elite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6296 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5307 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json deleted file mode 100644 index bf32a49fa..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1762652580.456628", - "retrieved_timestamp": "1762652580.456629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6051521075191603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6317361472468987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json deleted file mode 100644 index 01da8e370..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1762652580.456884", - "retrieved_timestamp": "1762652580.456885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6063511482865463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6295900497885079 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37084592145015105 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48732291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5306682180851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/9dcc4121-e046-49c7-969e-7255b0c32d3d.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/9dcc4121-e046-49c7-969e-7255b0c32d3d.json new file mode 100644 index 000000000..4683966d3 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/9dcc4121-e046-49c7-969e-7255b0c32d3d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite", + "id": "prithivMLmods/Calcium-Opus-14B-Elite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6052 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6317 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4789 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json deleted file mode 100644 index 34810e3c5..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2-R1/1762652580.457828", - "retrieved_timestamp": "1762652580.4578292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6325793339450436 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6362357624539174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48998958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5247672872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/dd7d4acd-549a-467b-b461-0eba5b019122.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/dd7d4acd-549a-467b-b461-0eba5b019122.json new file mode 100644 index 000000000..4c691d1f5 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/dd7d4acd-549a-467b-b461-0eba5b019122.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2-R1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite2-R1", + "id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6326 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6362 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3338 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.49 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5248 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/159969cc-32c5-4f6f-b586-8e6d44180b44.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/159969cc-32c5-4f6f-b586-8e6d44180b44.json new file mode 100644 index 000000000..48a8d72bd --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/159969cc-32c5-4f6f-b586-8e6d44180b44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite2", + "id": "prithivMLmods/Calcium-Opus-14B-Elite2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6176 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6318 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.469 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5301 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json deleted file mode 100644 index 01e851ee1..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2/1762652580.457599", - "retrieved_timestamp": "1762652580.4576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6176168122803052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6318256156619112 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49395833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300864361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json deleted file mode 100644 index d3ff3ae39..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite3/1762652580.458055", - "retrieved_timestamp": "1762652580.458056", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite3", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428285837134359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6350402275340573 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4705438066465257 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4794791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334940159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/b80e559d-e519-4678-8abc-ee5591b81fac.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/b80e559d-e519-4678-8abc-ee5591b81fac.json new file mode 100644 index 000000000..25816e9c4 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/b80e559d-e519-4678-8abc-ee5591b81fac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite3", + "id": "prithivMLmods/Calcium-Opus-14B-Elite3", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5428 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4705 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4795 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5335 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json deleted file mode 100644 index 1972c0308..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite4/1762652580.4582741", - "retrieved_timestamp": "1762652580.458275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite4", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6111971790405014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6195264951573699 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36253776435045315 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46871875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514876994680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/90c137c9-939d-4e77-9fcc-9e33551a6121.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/90c137c9-939d-4e77-9fcc-9e33551a6121.json new file mode 100644 index 000000000..2a82505c9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/90c137c9-939d-4e77-9fcc-9e33551a6121.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Elite4", + "id": "prithivMLmods/Calcium-Opus-14B-Elite4", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6112 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6195 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3625 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4687 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json deleted file mode 100644 index 06e05cad5..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Merge/1762652580.4585001", - "retrieved_timestamp": "1762652580.458503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Merge", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Merge", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4949434168007554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6319290054891645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48608333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5355718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/f25d6fef-d337-4cf7-ba05-ca6ff5eccd52.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/f25d6fef-d337-4cf7-ba05-ca6ff5eccd52.json new file mode 100644 index 000000000..7ea9119d9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/f25d6fef-d337-4cf7-ba05-ca6ff5eccd52.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-14B-Merge", + "id": "prithivMLmods/Calcium-Opus-14B-Merge", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4949 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4861 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5356 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json deleted file mode 100644 index 79baa0630..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-20B-v1/1762652580.458724", - "retrieved_timestamp": "1762652580.4587252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-20B-v1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-20B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 19.173 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3092716215197897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.599033246250772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178247734138974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49433333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734042553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/c6f92306-dcdc-4549-bfc2-feb62a3a6ef6.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/c6f92306-dcdc-4549-bfc2-feb62a3a6ef6.json new file mode 100644 index 000000000..84bf99e76 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/c6f92306-dcdc-4549-bfc2-feb62a3a6ef6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-20B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Calcium-Opus-20B-v1", + "id": "prithivMLmods/Calcium-Opus-20B-v1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 19.173 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.599 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3618 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4943 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/96c64d23-d23d-486c-83a4-4c0ab4f09d60.json b/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/96c64d23-d23d-486c-83a4-4c0ab4f09d60.json new file mode 100644 index 000000000..8b565a25a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/96c64d23-d23d-486c-83a4-4c0ab4f09d60.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Codepy-Deepthink-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Codepy-Deepthink-3B", + "id": "prithivMLmods/Codepy-Deepthink-3B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4327 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4259 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.309 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json b/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json deleted file mode 100644 index 680c8de2f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Codepy-Deepthink-3B/1762652580.458943", - "retrieved_timestamp": "1762652580.458944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Codepy-Deepthink-3B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Codepy-Deepthink-3B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43271962836385236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259451388094382 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3090093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/243abf0b-0f88-4b4f-ab51-6c8aebaf19be.json b/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/243abf0b-0f88-4b4f-ab51-6c8aebaf19be.json new file mode 100644 index 000000000..8f5675e29 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/243abf0b-0f88-4b4f-ab51-6c8aebaf19be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Coma-II-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Coma-II-14B", + "id": "prithivMLmods/Coma-II-14B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4168 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6321 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5351 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.504 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json b/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json deleted file mode 100644 index 52f312510..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Coma-II-14B/1762652580.4591591", - "retrieved_timestamp": "1762652580.45916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Coma-II-14B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Coma-II-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.416832892281369 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6320713788922736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5039893617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/438fb728-d6ad-4c28-a43c-ff82d522cd50.json b/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/438fb728-d6ad-4c28-a43c-ff82d522cd50.json new file mode 100644 index 000000000..8ad6643ba --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/438fb728-d6ad-4c28-a43c-ff82d522cd50.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Condor-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Condor-Opus-14B-Exp", + "id": "prithivMLmods/Condor-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6154 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5014 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json b/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json deleted file mode 100644 index 9cae80c6a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Condor-Opus-14B-Exp/1762652580.4595032", - "retrieved_timestamp": "1762652580.4595041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Condor-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Condor-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40431831983581346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6154220154262888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5226586102719033 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5014128989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json b/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json deleted file mode 100644 index 48f4555e8..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Cygnus-II-14B/1762652580.4597278", - "retrieved_timestamp": "1762652580.459729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Cygnus-II-14B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Cygnus-II-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6184412913292286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6660565208074918 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395770392749245 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390625 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/94b45b8d-b754-4fb4-843d-b7ffeafc4f1b.json b/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/94b45b8d-b754-4fb4-843d-b7ffeafc4f1b.json new file mode 100644 index 000000000..e0650e76a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/94b45b8d-b754-4fb4-843d-b7ffeafc4f1b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Cygnus-II-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cygnus-II-14B", + "id": "prithivMLmods/Cygnus-II-14B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6184 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6661 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Llama-3-8B-Preview/5618fc82-d455-4261-8e34-1190d70fd3f3.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Llama-3-8B-Preview/5618fc82-d455-4261-8e34-1190d70fd3f3.json new file mode 100644 index 000000000..78544d542 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Deepthink-Llama-3-8B-Preview/5618fc82-d455-4261-8e34-1190d70fd3f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Llama-3-8B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deepthink-Llama-3-8B-Preview", + "id": "prithivMLmods/Deepthink-Llama-3-8B-Preview", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2955 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3707 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2739 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json deleted file mode 100644 index f9cd0b58a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-14B/1762652580.460205", - "retrieved_timestamp": "1762652580.460206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Deepthink-Reasoning-14B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Deepthink-Reasoning-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423542866261519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6334054936091441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47315625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295877659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/395f6339-3fca-4f4d-befc-2d231008efdd.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/395f6339-3fca-4f4d-befc-2d231008efdd.json new file mode 100644 index 000000000..3bf404b51 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/395f6339-3fca-4f4d-befc-2d231008efdd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deepthink-Reasoning-14B", + "id": "prithivMLmods/Deepthink-Reasoning-14B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5424 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6334 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4732 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5296 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json deleted file mode 100644 index 3498f890b..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-7B/1762652580.460416", - "retrieved_timestamp": "1762652580.460416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Deepthink-Reasoning-7B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Deepthink-Reasoning-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48400244684104843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5505070216145282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33459214501510576 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492353723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/b22696ac-7074-44f2-b72f-c59ca0a41ce6.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/b22696ac-7074-44f2-b72f-c59ca0a41ce6.json new file mode 100644 index 000000000..4a2741947 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/b22696ac-7074-44f2-b72f-c59ca0a41ce6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deepthink-Reasoning-7B", + "id": "prithivMLmods/Deepthink-Reasoning-7B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.484 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3346 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6856f8b6-a719-4f69-be71-4df582015f28.json b/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6856f8b6-a719-4f69-be71-4df582015f28.json new file mode 100644 index 000000000..e70ce3f68 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6856f8b6-a719-4f69-be71-4df582015f28.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Dinobot-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dinobot-Opus-14B-Exp", + "id": "prithivMLmods/Dinobot-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.824 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json b/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json deleted file mode 100644 index 7738a4b86..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Dinobot-Opus-14B-Exp/1762652580.460635", - "retrieved_timestamp": "1762652580.460635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Dinobot-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Dinobot-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8239958864701216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6370093752306357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979222074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json b/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json deleted file mode 100644 index c307942fb..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/1762652580.460851", - "retrieved_timestamp": "1762652580.460852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35423454212600347 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38277850218543213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36596875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2757646276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/f2c0ea2b-76ae-4469-832e-84c0b79fa283.json b/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/f2c0ea2b-76ae-4469-832e-84c0b79fa283.json new file mode 100644 index 000000000..588c887dc --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/f2c0ea2b-76ae-4469-832e-84c0b79fa283.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Elita-0.1-Distilled-R1-abliterated", + "id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3542 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2758 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Elita-1/5619e3cb-eb3e-4420-a156-6f7b2a5d372d.json b/data/hfopenllm_v2/prithivMLmods/Elita-1/5619e3cb-eb3e-4420-a156-6f7b2a5d372d.json new file mode 100644 index 000000000..54138e141 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Elita-1/5619e3cb-eb3e-4420-a156-6f7b2a5d372d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Elita-1", + "id": "prithivMLmods/Elita-1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4906 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.652 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3429 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3758 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4834 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json b/data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json deleted file mode 100644 index dcb1ec5c5..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-1/1762652580.4610822", - "retrieved_timestamp": "1762652580.4610822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Elita-1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Elita-1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4906470387460826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520409113818334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429003021148036 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48341666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/9d5e329f-491a-4608-bcac-1ee63046b34a.json b/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/9d5e329f-491a-4608-bcac-1ee63046b34a.json new file mode 100644 index 000000000..188b84ba8 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/9d5e329f-491a-4608-bcac-1ee63046b34a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Epimetheus-14B-Axo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Epimetheus-14B-Axo", + "id": "prithivMLmods/Epimetheus-14B-Axo", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5546 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6613 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5304 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json b/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json deleted file mode 100644 index 5ff0754bd..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Epimetheus-14B-Axo/1762652580.461361", - "retrieved_timestamp": "1762652580.461361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Epimetheus-14B-Axo", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Epimetheus-14B-Axo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554643900406477 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6613340892011862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4819583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/80953f08-6530-4bab-a375-cc542081aabb.json b/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/80953f08-6530-4bab-a375-cc542081aabb.json new file mode 100644 index 000000000..31a9c3f6f --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/80953f08-6530-4bab-a375-cc542081aabb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Equuleus-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Equuleus-Opus-14B-Exp", + "id": "prithivMLmods/Equuleus-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7001 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6434 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4952 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json b/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json deleted file mode 100644 index 556441943..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Equuleus-Opus-14B-Exp/1762652580.46158", - "retrieved_timestamp": "1762652580.46158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Equuleus-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Equuleus-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7000735825387749 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6433769213927613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/0b8691a8-f394-4da3-a67b-faa1af9b42c9.json b/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/0b8691a8-f394-4da3-a67b-faa1af9b42c9.json new file mode 100644 index 000000000..4e0e78a3e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/0b8691a8-f394-4da3-a67b-faa1af9b42c9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Eridanus-Opus-14B-r999/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eridanus-Opus-14B-r999", + "id": "prithivMLmods/Eridanus-Opus-14B-r999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6386 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6584 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4769 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5362 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json b/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json deleted file mode 100644 index 156b7a3df..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Eridanus-Opus-14B-r999/1762652580.461785", - "retrieved_timestamp": "1762652580.461786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Eridanus-Opus-14B-r999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Eridanus-Opus-14B-r999", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.638574537781974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6583918169279829 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361535904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json b/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json deleted file mode 100644 index 94bac0459..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Evac-Opus-14B-Exp/1762652580.461996", - "retrieved_timestamp": "1762652580.461997", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Evac-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Evac-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5916135852870383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6475440673701862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47278125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/fb541a2b-d9bd-4aa2-8b83-da62a3b77731.json b/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/fb541a2b-d9bd-4aa2-8b83-da62a3b77731.json new file mode 100644 index 000000000..29b96ac56 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/fb541a2b-d9bd-4aa2-8b83-da62a3b77731.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Evac-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Evac-Opus-14B-Exp", + "id": "prithivMLmods/Evac-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5916 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4215 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5317 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json b/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json deleted file mode 100644 index f996a899a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_FastThink-0.5B-Tiny/1762652580.462207", - "retrieved_timestamp": "1762652580.462208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/FastThink-0.5B-Tiny", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/FastThink-0.5B-Tiny", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25798880304259364 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3205583807088257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16489361702127658 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/c20d1c62-d3e0-4e30-b0d3-4c62a6585d23.json b/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/c20d1c62-d3e0-4e30-b0d3-4c62a6585d23.json new file mode 100644 index 000000000..6a08d3a8b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/c20d1c62-d3e0-4e30-b0d3-4c62a6585d23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_FastThink-0.5B-Tiny/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FastThink-0.5B-Tiny", + "id": "prithivMLmods/FastThink-0.5B-Tiny", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.258 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3566 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1649 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json deleted file mode 100644 index 7320f7a0b..000000000 --- a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview/1762652580.4624221", - "retrieved_timestamp": "1762652580.462423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/GWQ-9B-Preview", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/GWQ-9B-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065836425129767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5805745804247511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39835438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/8a10eeb6-7178-4c78-8940-68fad78e389b.json b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/8a10eeb6-7178-4c78-8940-68fad78e389b.json new file mode 100644 index 000000000..5fd930e8e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/8a10eeb6-7178-4c78-8940-68fad78e389b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GWQ-9B-Preview", + "id": "prithivMLmods/GWQ-9B-Preview", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5806 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4951 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3984 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json deleted file mode 100644 index ae2e9ebf7..000000000 --- a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview2/1762652580.462637", - "retrieved_timestamp": "1762652580.4626381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/GWQ-9B-Preview2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/GWQ-9B-Preview2", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5208967761096114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5797218710843371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48598958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3996841755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/f0bb774c-a842-4261-b817-b169ce65a493.json b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/f0bb774c-a842-4261-b817-b169ce65a493.json new file mode 100644 index 000000000..99ceb5f7b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/f0bb774c-a842-4261-b817-b169ce65a493.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GWQ-9B-Preview2", + "id": "prithivMLmods/GWQ-9B-Preview2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5209 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5797 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2372 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3997 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ2b/59afe234-3a7f-49bb-873c-df6cf793e5e5.json b/data/hfopenllm_v2/prithivMLmods/GWQ2b/59afe234-3a7f-49bb-873c-df6cf793e5e5.json new file mode 100644 index 000000000..814634ab3 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/GWQ2b/59afe234-3a7f-49bb-873c-df6cf793e5e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ2b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GWQ2b", + "id": "prithivMLmods/GWQ2b", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json b/data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json deleted file mode 100644 index a34fbadad..000000000 --- a/data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ2b/1762652580.462852", - "retrieved_timestamp": "1762652580.4628532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/GWQ2b", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/GWQ2b", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41148707651254224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41433702954085216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43111458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24725731382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/4074081a-66a6-42e4-994f-72541f90888b.json b/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/4074081a-66a6-42e4-994f-72541f90888b.json new file mode 100644 index 000000000..72bbcb358 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/4074081a-66a6-42e4-994f-72541f90888b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Gaea-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gaea-Opus-14B-Exp", + "id": "prithivMLmods/Gaea-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5956 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.656 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4859 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json b/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json deleted file mode 100644 index 5491c1915..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Gaea-Opus-14B-Exp/1762652580.463063", - "retrieved_timestamp": "1762652580.463063", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Gaea-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Gaea-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5956351369920699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6560465337491567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48589583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400598404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp1/6a618ec8-c029-49ec-9ea5-da52b5231280.json b/data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp1/6a618ec8-c029-49ec-9ea5-da52b5231280.json new file mode 100644 index 000000000..9d9a90116 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp1/6a618ec8-c029-49ec-9ea5-da52b5231280.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Galactic-Qwen-14B-Exp1", + "id": "prithivMLmods/Galactic-Qwen-14B-Exp1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5832 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6582 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4018 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp2/edc8f510-c961-4c1f-9757-e80c4247f275.json b/data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp2/edc8f510-c961-4c1f-9757-e80c4247f275.json new file mode 100644 index 000000000..26d244447 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Galactic-Qwen-14B-Exp2/edc8f510-c961-4c1f-9757-e80c4247f275.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Galactic-Qwen-14B-Exp2", + "id": "prithivMLmods/Galactic-Qwen-14B-Exp2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.662 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3474 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5691 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/aaa5d1e6-5aca-4471-87ea-7195610a6c1d.json b/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/aaa5d1e6-5aca-4471-87ea-7195610a6c1d.json new file mode 100644 index 000000000..7057ef00e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/aaa5d1e6-5aca-4471-87ea-7195610a6c1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Gauss-Opus-14B-R999/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gauss-Opus-14B-R999", + "id": "prithivMLmods/Gauss-Opus-14B-R999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6228 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json b/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json deleted file mode 100644 index 1c189a821..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Gauss-Opus-14B-R999/1762652580.463757", - "retrieved_timestamp": "1762652580.463758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Gauss-Opus-14B-R999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Gauss-Opus-14B-R999", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39065457430728245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6227831608555382 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500748005319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/89b45e8b-9979-4c7f-8aa6-c6ab7009cab0.json b/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/89b45e8b-9979-4c7f-8aa6-c6ab7009cab0.json new file mode 100644 index 000000000..96783fe07 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/89b45e8b-9979-4c7f-8aa6-c6ab7009cab0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Jolt-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jolt-v0.1", + "id": "prithivMLmods/Jolt-v0.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6521 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json b/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json deleted file mode 100644 index 7d491b90a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Jolt-v0.1/1762652580.463978", - "retrieved_timestamp": "1762652580.463979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Jolt-v0.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Jolt-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092066827129793 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6521408461659391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json b/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json deleted file mode 100644 index 35d167e89..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Lacerta-Opus-14B-Elite8/1762652580.464193", - "retrieved_timestamp": "1762652580.464193", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Lacerta-Opus-14B-Elite8", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Lacerta-Opus-14B-Elite8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.614144913274556 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6401384743047456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4635416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5321642287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/41000c74-8b29-4369-996f-cf3a2fd09f63.json b/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/41000c74-8b29-4369-996f-cf3a2fd09f63.json new file mode 100644 index 000000000..4a4f258fd --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/41000c74-8b29-4369-996f-cf3a2fd09f63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Lacerta-Opus-14B-Elite8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lacerta-Opus-14B-Elite8", + "id": "prithivMLmods/Lacerta-Opus-14B-Elite8", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6141 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6401 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4635 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5322 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/a1765846-74e1-440a-8851-12a571444059.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/a1765846-74e1-440a-8851-12a571444059.json new file mode 100644 index 000000000..ae7d80588 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/a1765846-74e1-440a-8851-12a571444059.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-5B-Instruct", + "id": "prithivMLmods/Llama-3.1-5B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.413 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1407 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json deleted file mode 100644 index b44d0a7af..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-5B-Instruct/1762652580.464407", - "retrieved_timestamp": "1762652580.4644082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.1-5B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.1-5B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.413 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14066011516110588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3051074188361172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35400000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11835106382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json deleted file mode 100644 index 253545fad..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-8B-Open-SFT/1762652580.464622", - "retrieved_timestamp": "1762652580.4646232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.1-8B-Open-SFT", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.1-8B-Open-SFT", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122616878770551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4967982234773378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39036458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/9c6b594f-387a-42a3-9e40-3b26363e6071.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/9c6b594f-387a-42a3-9e40-3b26363e6071.json new file mode 100644 index 000000000..41ba66370 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/9c6b594f-387a-42a3-9e40-3b26363e6071.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-8B-Open-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Open-SFT", + "id": "prithivMLmods/Llama-3.1-8B-Open-SFT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4123 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3904 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.2-3B-Math-Oct/2b910401-457a-45dd-920a-559f4595897b.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.2-3B-Math-Oct/2b910401-457a-45dd-920a-559f4595897b.json new file mode 100644 index 000000000..7b7a839ae --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-3.2-3B-Math-Oct/2b910401-457a-45dd-920a-559f4595897b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-3B-Math-Oct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Math-Oct", + "id": "prithivMLmods/Llama-3.2-3B-Math-Oct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4585 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1156 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.347 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.2-6B-AlgoCode/90b7be49-53a0-4d7f-8995-cbc52fe3a70f.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.2-6B-AlgoCode/90b7be49-53a0-4d7f-8995-cbc52fe3a70f.json new file mode 100644 index 000000000..8570bf857 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-3.2-6B-AlgoCode/90b7be49-53a0-4d7f-8995-cbc52fe3a70f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-6B-AlgoCode/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-6B-AlgoCode", + "id": "prithivMLmods/Llama-3.2-6B-AlgoCode", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.339 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2136 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3748 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4013 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1798 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-8B-Distill-CoT/5e8854ba-7147-4fdd-a568-1ea58e79e7d8.json b/data/hfopenllm_v2/prithivMLmods/Llama-8B-Distill-CoT/5e8854ba-7147-4fdd-a568-1ea58e79e7d8.json new file mode 100644 index 000000000..8bc00a642 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-8B-Distill-CoT/5e8854ba-7147-4fdd-a568-1ea58e79e7d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-8B-Distill-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-8B-Distill-CoT", + "id": "prithivMLmods/Llama-8B-Distill-CoT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2732 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-1B/df6e0cfb-d720-428a-a5ad-b1529faa07c0.json b/data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-1B/df6e0cfb-d720-428a-a5ad-b1529faa07c0.json new file mode 100644 index 000000000..193336b1d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-1B/df6e0cfb-d720-428a-a5ad-b1529faa07c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-1B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Deepsync-1B", + "id": "prithivMLmods/Llama-Deepsync-1B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.357 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3386 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-3B/a88a6e6f-2253-4b67-9527-55ab6153e40f.json b/data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-3B/a88a6e6f-2253-4b67-9527-55ab6153e40f.json new file mode 100644 index 000000000..702c1642b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-Deepsync-3B/a88a6e6f-2253-4b67-9527-55ab6153e40f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Deepsync-3B", + "id": "prithivMLmods/Llama-Deepsync-3B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4302 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4292 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3324 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3031 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-Express.1-Math/00c66a37-b46b-47e8-a098-ce12433c1135.json b/data/hfopenllm_v2/prithivMLmods/Llama-Express.1-Math/00c66a37-b46b-47e8-a098-ce12433c1135.json new file mode 100644 index 000000000..3d5e5f05f --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-Express.1-Math/00c66a37-b46b-47e8-a098-ce12433c1135.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Express.1-Math/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-Express.1-Math", + "id": "prithivMLmods/Llama-Express.1-Math", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5084 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3143 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.161 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/6ad5483c-13dc-4e79-a719-66af383d195a.json b/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/6ad5483c-13dc-4e79-a719-66af383d195a.json new file mode 100644 index 000000000..d8310afc7 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/6ad5483c-13dc-4e79-a719-66af383d195a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-10B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LwQ-10B-Instruct", + "id": "prithivMLmods/LwQ-10B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5122 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4544 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3318 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json b/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json deleted file mode 100644 index f74edb72f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-10B-Instruct/1762652580.4662411", - "retrieved_timestamp": "1762652580.466242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/LwQ-10B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/LwQ-10B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934770852449279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5121712029712329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45439583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.331781914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/9fa6813a-7acb-4c08-9912-6dc0d356a7e2.json b/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/9fa6813a-7acb-4c08-9912-6dc0d356a7e2.json new file mode 100644 index 000000000..e4c1d9c13 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/9fa6813a-7acb-4c08-9912-6dc0d356a7e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-Reasoner-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LwQ-Reasoner-10B", + "id": "prithivMLmods/LwQ-Reasoner-10B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5866 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4079 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json b/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json deleted file mode 100644 index ec1358f68..000000000 --- a/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-Reasoner-10B/1762652580.466471", - "retrieved_timestamp": "1762652580.466471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/LwQ-Reasoner-10B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/LwQ-Reasoner-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29413400887423147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5866254169962443 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40785416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41472739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json b/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json deleted file mode 100644 index 491a896e0..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Opus-14B-Exp/1762652580.466739", - "retrieved_timestamp": "1762652580.466739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Magellanic-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Magellanic-Opus-14B-Exp", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6866347956754744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6382505935140227 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37990936555891236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49262500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5272606382978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/3880e3bf-6ff0-4eef-a519-2649014254e1.json b/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/3880e3bf-6ff0-4eef-a519-2649014254e1.json new file mode 100644 index 000000000..227d602c1 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/3880e3bf-6ff0-4eef-a519-2649014254e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magellanic-Opus-14B-Exp", + "id": "prithivMLmods/Magellanic-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6866 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6383 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5273 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Magellanic-Qwen-25B-R999/e77efb9d-b1fc-4833-8e7f-8da683019018.json b/data/hfopenllm_v2/prithivMLmods/Magellanic-Qwen-25B-R999/e77efb9d-b1fc-4833-8e7f-8da683019018.json new file mode 100644 index 000000000..e8a7e420c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Magellanic-Qwen-25B-R999/e77efb9d-b1fc-4833-8e7f-8da683019018.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Qwen-25B-R999/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magellanic-Qwen-25B-R999", + "id": "prithivMLmods/Magellanic-Qwen-25B-R999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 24.962 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1873 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2608 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0053 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.13 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/2bcc02df-8d27-412a-8b58-c331df98e4d4.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/2bcc02df-8d27-412a-8b58-c331df98e4d4.json new file mode 100644 index 000000000..1ccbd4ee2 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/2bcc02df-8d27-412a-8b58-c331df98e4d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp.v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Corpus-14B-Exp.v2", + "id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.487 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6321 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2591 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.481 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json deleted file mode 100644 index 1f998285c..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp.v2/1762652580.467396", - "retrieved_timestamp": "1762652580.4673972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48704991644392437 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.632146083740281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48096742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/622531d5-03f8-42cf-974e-94291aa1e515.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/622531d5-03f8-42cf-974e-94291aa1e515.json new file mode 100644 index 000000000..7f5ceece4 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/622531d5-03f8-42cf-974e-94291aa1e515.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Corpus-14B-Exp", + "id": "prithivMLmods/Megatron-Corpus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6355 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3429 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4767 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.526 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json deleted file mode 100644 index 6f5c7701d..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp/1762652580.46718", - "retrieved_timestamp": "1762652580.46718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Corpus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Corpus-14B-Exp", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49826571275327247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6355171004470184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429003021148036 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4766875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260139627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/b772f20f-afbd-496c-9f94-e5fd30d54466.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/b772f20f-afbd-496c-9f94-e5fd30d54466.json new file mode 100644 index 000000000..05306c382 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/b772f20f-afbd-496c-9f94-e5fd30d54466.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Opus-14B-2.0", + "id": "prithivMLmods/Megatron-Opus-14B-2.0", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6694 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6871 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2779 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.517 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json deleted file mode 100644 index 7fd5aaa4a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.0/1762652580.467613", - "retrieved_timestamp": "1762652580.467613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-2.0", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-2.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6693739278447852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6870557211788685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json deleted file mode 100644 index 8e264e0c7..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.1/1762652580.4678242", - "retrieved_timestamp": "1762652580.467825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-2.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-2.1", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02455484780382718 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6726960005125086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2998489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49275 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173703457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/169d5ad3-ae4a-42de-b951-f264d85bf623.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/169d5ad3-ae4a-42de-b951-f264d85bf623.json new file mode 100644 index 000000000..0def4383d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/169d5ad3-ae4a-42de-b951-f264d85bf623.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Opus-14B-2.1", + "id": "prithivMLmods/Megatron-Opus-14B-2.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6727 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2998 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4928 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json deleted file mode 100644 index 02caa8249..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Exp/1762652580.46803", - "retrieved_timestamp": "1762652580.468031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979410187192206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6516090109599467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48865625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400598404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/e84c3b50-4ea9-4f41-be11-50c6aa3d4656.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/e84c3b50-4ea9-4f41-be11-50c6aa3d4656.json new file mode 100644 index 000000000..6ccd36216 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/e84c3b50-4ea9-4f41-be11-50c6aa3d4656.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Opus-14B-Exp", + "id": "prithivMLmods/Megatron-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4887 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/594780dc-d969-4a6b-b90b-1cc32f40c452.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/594780dc-d969-4a6b-b90b-1cc32f40c452.json new file mode 100644 index 000000000..5f0f9b228 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/594780dc-d969-4a6b-b90b-1cc32f40c452.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Opus-14B-Stock", + "id": "prithivMLmods/Megatron-Opus-14B-Stock", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6412 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3346 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5293 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json deleted file mode 100644 index dcf541677..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Stock/1762652580.468238", - "retrieved_timestamp": "1762652580.468238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-Stock", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-Stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173750094194515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6411753580495262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33459214501510576 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48202083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5293384308510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/4ff7c238-d69c-4b92-83d0-69cacdfa0fe6.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/4ff7c238-d69c-4b92-83d0-69cacdfa0fe6.json new file mode 100644 index 000000000..781a02264 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/4ff7c238-d69c-4b92-83d0-69cacdfa0fe6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-7B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Megatron-Opus-7B-Exp", + "id": "prithivMLmods/Megatron-Opus-7B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1971 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json deleted file mode 100644 index 91c9d0cf2..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-7B-Exp/1762652580.468447", - "retrieved_timestamp": "1762652580.468448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-7B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-7B-Exp", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017300761978217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367154102661396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/bb576dc9-eede-48d6-b438-732da91a4d29.json b/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/bb576dc9-eede-48d6-b438-732da91a4d29.json new file mode 100644 index 000000000..a719fbf21 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/bb576dc9-eede-48d6-b438-732da91a4d29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Messier-Opus-14B-Elite7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Messier-Opus-14B-Elite7", + "id": "prithivMLmods/Messier-Opus-14B-Elite7", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7113 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6499 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5404 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json b/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json deleted file mode 100644 index 98623f65a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Messier-Opus-14B-Elite7/1762652580.4686568", - "retrieved_timestamp": "1762652580.468658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Messier-Opus-14B-Elite7", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Messier-Opus-14B-Elite7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7113392465325337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6498611961862557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403922872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/0fb2fe17-b55d-4802-ad48-bd4d711e1e0f.json b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/0fb2fe17-b55d-4802-ad48-bd4d711e1e0f.json new file mode 100644 index 000000000..86bda99c0 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/0fb2fe17-b55d-4802-ad48-bd4d711e1e0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Omni-Reasoner-Merged", + "id": "prithivMLmods/Omni-Reasoner-Merged", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4616 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4364 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json deleted file mode 100644 index cd6b7e1ea..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner-Merged/1762652580.468864", - "retrieved_timestamp": "1762652580.468864", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Omni-Reasoner-Merged", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Omni-Reasoner-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599473840520929 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5507848245879011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4616458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43641954787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/03d59002-dc98-467f-b2a9-605ef8d9b763.json b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/03d59002-dc98-467f-b2a9-605ef8d9b763.json new file mode 100644 index 000000000..12f798f0d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/03d59002-dc98-467f-b2a9-605ef8d9b763.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner3-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Omni-Reasoner3-Merged", + "id": "prithivMLmods/Omni-Reasoner3-Merged", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4935 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1088 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.295 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json deleted file mode 100644 index 896bd24dd..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner3-Merged/1762652580.46908", - "retrieved_timestamp": "1762652580.4690812", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Omni-Reasoner3-Merged", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Omni-Reasoner3-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493469549683728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387847138827546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29496343085106386 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json b/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json deleted file mode 100644 index 10aac2c61..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Pegasus-Opus-14B-Exp/1762652580.469298", - "retrieved_timestamp": "1762652580.4692988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Pegasus-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Pegasus-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6981752860188744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6547548394062034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/8a7034fd-7027-4a87-9cac-c95b745935d0.json b/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/8a7034fd-7027-4a87-9cac-c95b745935d0.json new file mode 100644 index 000000000..8f2a0236b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/8a7034fd-7027-4a87-9cac-c95b745935d0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Pegasus-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pegasus-Opus-14B-Exp", + "id": "prithivMLmods/Pegasus-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6982 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-Empathetic/717f745f-1eae-4277-8a31-dbed140ef3e8.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-Empathetic/717f745f-1eae-4277-8a31-dbed140ef3e8.json new file mode 100644 index 000000000..6b7f41a83 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-Empathetic/717f745f-1eae-4277-8a31-dbed140ef3e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Empathetic/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Empathetic", + "id": "prithivMLmods/Phi-4-Empathetic", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6727 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2621 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5066 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-Math-IO/2dc78735-c0c3-4dd7-8e97-52c92785e623.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-Math-IO/2dc78735-c0c3-4dd7-8e97-52c92785e623.json new file mode 100644 index 000000000..6dc544cef --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-Math-IO/2dc78735-c0c3-4dd7-8e97-52c92785e623.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Math-IO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Math-IO", + "id": "prithivMLmods/Phi-4-Math-IO", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.059 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4577 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3985 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-QwQ/e9ab98ff-5cf0-4437-9cf3-c77ecb546c84.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-QwQ/e9ab98ff-5cf0-4437-9cf3-c77ecb546c84.json new file mode 100644 index 000000000..6915cdbb1 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-QwQ/e9ab98ff-5cf0-4437-9cf3-c77ecb546c84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-QwQ/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-QwQ", + "id": "prithivMLmods/Phi-4-QwQ", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0559 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6696 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4577 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4651 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-Super-1/6303d73e-4129-472a-a6fd-c64cb3de7204.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-Super-1/6303d73e-4129-472a-a6fd-c64cb3de7204.json new file mode 100644 index 000000000..6500288f7 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-Super-1/6303d73e-4129-472a-a6fd-c64cb3de7204.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Super-1", + "id": "prithivMLmods/Phi-4-Super-1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6729 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-Super-o1/8a689e8f-19cc-45b7-80be-ce861a549af7.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-Super-o1/8a689e8f-19cc-45b7-80be-ce861a549af7.json new file mode 100644 index 000000000..6a3f46611 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-Super-o1/8a689e8f-19cc-45b7-80be-ce861a549af7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Super-o1", + "id": "prithivMLmods/Phi-4-Super-o1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0418 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6729 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.352 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5235 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-Super/84881315-55a4-4f05-a115-cf82f850090d.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-Super/84881315-55a4-4f05-a115-cf82f850090d.json new file mode 100644 index 000000000..659daae2b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-Super/84881315-55a4-4f05-a115-cf82f850090d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-Super", + "id": "prithivMLmods/Phi-4-Super", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi-4-o1/970dc71c-42be-4d50-86ac-f7301ec969ca.json b/data/hfopenllm_v2/prithivMLmods/Phi-4-o1/970dc71c-42be-4d50-86ac-f7301ec969ca.json new file mode 100644 index 000000000..f575a083a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi-4-o1/970dc71c-42be-4d50-86ac-f7301ec969ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-o1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-4-o1", + "id": "prithivMLmods/Phi-4-o1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6689 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5174 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Phi4-Super/c02e1fcf-a837-4b8a-a42d-63837c56128d.json b/data/hfopenllm_v2/prithivMLmods/Phi4-Super/c02e1fcf-a837-4b8a-a42d-63837c56128d.json new file mode 100644 index 000000000..af8b3653b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Phi4-Super/c02e1fcf-a837-4b8a-a42d-63837c56128d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi4-Super/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi4-Super", + "id": "prithivMLmods/Phi4-Super", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0481 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/37280340-5b9a-47d9-aa37-9299d9025518.json b/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/37280340-5b9a-47d9-aa37-9299d9025518.json new file mode 100644 index 000000000..71bc4e12e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/37280340-5b9a-47d9-aa37-9299d9025518.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Porpoise-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Porpoise-Opus-14B-Exp", + "id": "prithivMLmods/Porpoise-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7098 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json b/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json deleted file mode 100644 index c8b0531d9..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Porpoise-Opus-14B-Exp/1762652580.47141", - "retrieved_timestamp": "1762652580.471411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Porpoise-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Porpoise-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7098155117310957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6518903547146537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4925625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/46e7ad9b-b774-46b9-933c-913d1b307f7a.json b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/46e7ad9b-b774-46b9-933c-913d1b307f7a.json new file mode 100644 index 000000000..bb915353c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/46e7ad9b-b774-46b9-933c-913d1b307f7a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Primal-Opus-14B-Optimus-v1", + "id": "prithivMLmods/Primal-Opus-14B-Optimus-v1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5013 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6419 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3384 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5259 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json deleted file mode 100644 index cea103f3f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v1/1762652580.4716318", - "retrieved_timestamp": "1762652580.471633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Primal-Opus-14B-Optimus-v1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Primal-Opus-14B-Optimus-v1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013131823561483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419423743359406 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5259308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json deleted file mode 100644 index 45650cc2f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v2/1762652580.471854", - "retrieved_timestamp": "1762652580.471854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Primal-Opus-14B-Optimus-v2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Primal-Opus-14B-Optimus-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6403730989330532 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6543780845512958 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48998958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542220744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/c154d3f5-39dc-43c0-85ea-2e43b08494b4.json b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/c154d3f5-39dc-43c0-85ea-2e43b08494b4.json new file mode 100644 index 000000000..ff98c5def --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/c154d3f5-39dc-43c0-85ea-2e43b08494b4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Primal-Opus-14B-Optimus-v2", + "id": "prithivMLmods/Primal-Opus-14B-Optimus-v2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6404 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.49 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5422 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json deleted file mode 100644 index 2e8975026..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-14B-Conversational/1762652580.472128", - "retrieved_timestamp": "1762652580.472129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT-14B-Conversational", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT-14B-Conversational", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047427492386867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6239828933798323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527842420212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/abd830e4-2b7f-4895-8262-75926edbafd9.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/abd830e4-2b7f-4895-8262-75926edbafd9.json new file mode 100644 index 000000000..370d5eafa --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/abd830e4-2b7f-4895-8262-75926edbafd9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-14B-Conversational/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-LCoT-14B-Conversational", + "id": "prithivMLmods/QwQ-LCoT-14B-Conversational", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4047 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.624 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4653 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/2c945021-72e3-4e7a-9c6f-81efb27b2206.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/2c945021-72e3-4e7a-9c6f-81efb27b2206.json new file mode 100644 index 000000000..183db0968 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/2c945021-72e3-4e7a-9c6f-81efb27b2206.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-LCoT-3B-Instruct", + "id": "prithivMLmods/QwQ-LCoT-3B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4763 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2825 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3582 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json deleted file mode 100644 index c6d557f2f..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-3B-Instruct/1762652580.47235", - "retrieved_timestamp": "1762652580.472351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT-3B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT-3B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354424039326764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47629783868435643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43579166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3582114361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json deleted file mode 100644 index d479bf3ad..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-7B-Instruct/1762652580.4725702", - "retrieved_timestamp": "1762652580.472571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT-7B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT-7B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986901421561457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5466466326018563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4801875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4334275265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/5f0ea694-7f73-45fa-b54f-49fc06d1a6d9.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/5f0ea694-7f73-45fa-b54f-49fc06d1a6d9.json new file mode 100644 index 000000000..97c5d8499 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/5f0ea694-7f73-45fa-b54f-49fc06d1a6d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-LCoT-7B-Instruct", + "id": "prithivMLmods/QwQ-LCoT-7B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4802 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json deleted file mode 100644 index 47c331ac0..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT1-Merged/1762652580.47278", - "retrieved_timestamp": "1762652580.472781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT1-Merged", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT1-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47513486438206187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.548095531408024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731117824773414 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46961458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4357546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/6c73f6ae-8ffd-4948-8071-33eab07437a6.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/6c73f6ae-8ffd-4948-8071-33eab07437a6.json new file mode 100644 index 000000000..5f5612428 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/6c73f6ae-8ffd-4948-8071-33eab07437a6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT1-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-LCoT1-Merged", + "id": "prithivMLmods/QwQ-LCoT1-Merged", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4751 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3731 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4696 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json deleted file mode 100644 index 9ec579623..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT2-7B-Instruct/1762652580.473001", - "retrieved_timestamp": "1762652580.473002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT2-7B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT2-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561177675235043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5424862934133593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341755319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/fbf71df3-b9c3-4f9c-b538-e4ccf097e81c.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/fbf71df3-b9c3-4f9c-b538-e4ccf097e81c.json new file mode 100644 index 000000000..32ba1bb53 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/fbf71df3-b9c3-4f9c-b538-e4ccf097e81c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT2-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-LCoT2-7B-Instruct", + "id": "prithivMLmods/QwQ-LCoT2-7B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5561 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5425 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4342 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e3dcfd94-ca04-4cd3-ada5-e701a8b776da.json b/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e3dcfd94-ca04-4cd3-ada5-e701a8b776da.json new file mode 100644 index 000000000..380be752b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e3dcfd94-ca04-4cd3-ada5-e701a8b776da.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-MathOct-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-MathOct-7B", + "id": "prithivMLmods/QwQ-MathOct-7B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4684 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4601 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.433 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json b/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json deleted file mode 100644 index 22f89f449..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-MathOct-7B/1762652580.473228", - "retrieved_timestamp": "1762652580.4732292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-MathOct-7B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-MathOct-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4684404047926169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5485512215016556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4600625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4330119680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json deleted file mode 100644 index 02c0248dc..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/1762652580.4734771", - "retrieved_timestamp": "1762652580.473483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21939564799177294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36662076641982305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33459214501510576 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34339583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19132313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/9278bcf2-bfab-437f-bd64-7496b24fb8cf.json b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/9278bcf2-bfab-437f-bd64-7496b24fb8cf.json new file mode 100644 index 000000000..cd87f5d54 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/9278bcf2-bfab-437f-bd64-7496b24fb8cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-R1-Distill-1.5B-CoT", + "id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2194 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3346 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1913 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/633aa068-5613-41d8-a194-aebc9ce1586f.json b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/633aa068-5613-41d8-a194-aebc9ce1586f.json new file mode 100644 index 000000000..3e840aa4b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/633aa068-5613-41d8-a194-aebc9ce1586f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-7B-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-R1-Distill-7B-CoT", + "id": "prithivMLmods/QwQ-R1-Distill-7B-CoT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.35 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4683 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2804 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json deleted file mode 100644 index 6772d638d..000000000 --- a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-7B-CoT/1762652580.473804", - "retrieved_timestamp": "1762652580.473805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/QwQ-R1-Distill-7B-CoT", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-R1-Distill-7B-CoT", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500378994401522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438788672517715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37790624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2804188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Qwen-7B-Distill-Reasoner/d3c1a922-a453-4c7b-b33b-52934e7bf72b.json b/data/hfopenllm_v2/prithivMLmods/Qwen-7B-Distill-Reasoner/d3c1a922-a453-4c7b-b33b-52934e7bf72b.json new file mode 100644 index 000000000..d23b96a42 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Qwen-7B-Distill-Reasoner/d3c1a922-a453-4c7b-b33b-52934e7bf72b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen-7B-Distill-Reasoner/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-7B-Distill-Reasoner", + "id": "prithivMLmods/Qwen-7B-Distill-Reasoner", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4409 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2818 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/3a27b2a6-5eea-450b-91c7-1dc006229985.json b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/3a27b2a6-5eea-450b-91c7-1dc006229985.json new file mode 100644 index 000000000..5d9c9b69d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/3a27b2a6-5eea-450b-91c7-1dc006229985.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-1.5B-DeepSeek-R1-Instruct", + "id": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1397 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1123 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json deleted file mode 100644 index 2343333f3..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/1762652580.474298", - "retrieved_timestamp": "1762652580.474299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13968603305895025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28243669901671337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/395e37ae-005d-47c0-9cf5-919460e34350.json b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/395e37ae-005d-47c0-9cf5-919460e34350.json new file mode 100644 index 000000000..d92c0c284 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/395e37ae-005d-47c0-9cf5-919460e34350.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-DeepSeek-R1-1M", + "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4193 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3322 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4606 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/b03b7c7a-f263-4712-bcf4-2e32ca4bd237.json b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/b03b7c7a-f263-4712-bcf4-2e32ca4bd237.json new file mode 100644 index 000000000..5a7f8d146 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/b03b7c7a-f263-4712-bcf4-2e32ca4bd237.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-DeepSeek-R1-1M", + "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1861 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/452ab810-6921-4922-9446-f2a5c081dc61.json b/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/452ab810-6921-4922-9446-f2a5c081dc61.json new file mode 100644 index 000000000..b9e15197c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/452ab810-6921-4922-9446-f2a5c081dc61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_SmolLM2-CoT-360M/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-CoT-360M", + "id": "prithivMLmods/SmolLM2-CoT-360M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2216 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3135 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2366 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1085 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json b/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json deleted file mode 100644 index a0234f405..000000000 --- a/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_SmolLM2-CoT-360M/1762652580.475137", - "retrieved_timestamp": "1762652580.475137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/SmolLM2-CoT-360M", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/SmolLM2-CoT-360M", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22156877086131466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31352960121180296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1085438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/1abba5a0-f1a3-4f39-a81c-f4cd641d33ac.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/1abba5a0-f1a3-4f39-a81c-f4cd641d33ac.json new file mode 100644 index 000000000..29c5fd248 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/1abba5a0-f1a3-4f39-a81c-f4cd641d33ac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sombrero-Opus-14B-Elite5", + "id": "prithivMLmods/Sombrero-Opus-14B-Elite5", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7881 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4287 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.52 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json deleted file mode 100644 index 5f56d83cf..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite5/1762652580.4753642", - "retrieved_timestamp": "1762652580.4753652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Elite5", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Elite5", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7880756393037142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6501539892126272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520029920212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json deleted file mode 100644 index 90dcbac66..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite6/1762652580.47572", - "retrieved_timestamp": "1762652580.475722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Elite6", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Elite6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226049105262924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6487937804559186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389793882978723 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/b2eefd3a-795c-4dc0-a10e-924bece05ea5.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/b2eefd3a-795c-4dc0-a10e-924bece05ea5.json new file mode 100644 index 000000000..625a33f9d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/b2eefd3a-795c-4dc0-a10e-924bece05ea5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sombrero-Opus-14B-Elite6", + "id": "prithivMLmods/Sombrero-Opus-14B-Elite6", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7226 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4079 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.539 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/008cc919-f156-4a2e-af4b-eed015ca91f6.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/008cc919-f156-4a2e-af4b-eed015ca91f6.json new file mode 100644 index 000000000..f425d01b1 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/008cc919-f156-4a2e-af4b-eed015ca91f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sombrero-Opus-14B-Sm1", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6355 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5299 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json deleted file mode 100644 index 0d48f3121..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm1/1762652580.476064", - "retrieved_timestamp": "1762652580.476065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812872068334242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635462046379832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5298958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.512466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json deleted file mode 100644 index 8756134ac..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm2/1762652580.476301", - "retrieved_timestamp": "1762652580.4763021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272242095417935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6609367219259568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344913563829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/9d56082f-5e46-4d7a-8f06-cb44fc983b3f.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/9d56082f-5e46-4d7a-8f06-cb44fc983b3f.json new file mode 100644 index 000000000..333fc6de0 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/9d56082f-5e46-4d7a-8f06-cb44fc983b3f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sombrero-Opus-14B-Sm2", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6609 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4864 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5088 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json deleted file mode 100644 index 1e2f11833..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm4/1762652580.476516", - "retrieved_timestamp": "1762652580.4765172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm4", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4346932804957513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6612776404137711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300033244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/7ea26e73-a501-40bf-8f01-81ab8e850a91.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/7ea26e73-a501-40bf-8f01-81ab8e850a91.json new file mode 100644 index 000000000..9a2d75354 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/7ea26e73-a501-40bf-8f01-81ab8e850a91.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sombrero-Opus-14B-Sm4", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm4", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6613 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4879 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5192 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.53 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json deleted file mode 100644 index 530365faa..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm5/1762652580.476726", - "retrieved_timestamp": "1762652580.476726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm5", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6851609285584471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6563944936055776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.480625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/e3343130-cf4f-4e5c-b2d3-5dda13d575b9.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/e3343130-cf4f-4e5c-b2d3-5dda13d575b9.json new file mode 100644 index 000000000..2d767adc0 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/e3343130-cf4f-4e5c-b2d3-5dda13d575b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sombrero-Opus-14B-Sm5", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm5", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6852 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6564 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4806 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/ba1965f8-b59f-4d71-920c-e3b401ca0534.json b/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/ba1965f8-b59f-4d71-920c-e3b401ca0534.json new file mode 100644 index 000000000..55de0790e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/ba1965f8-b59f-4d71-920c-e3b401ca0534.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sqweeks-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sqweeks-7B-Instruct", + "id": "prithivMLmods/Sqweeks-7B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4667 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json b/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json deleted file mode 100644 index 2deecdfcd..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sqweeks-7B-Instruct/1762652580.476933", - "retrieved_timestamp": "1762652580.476934", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Sqweeks-7B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sqweeks-7B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21579852568961466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4666692459456812 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143504531722054 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44760416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3133311170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json b/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json deleted file mode 100644 index fa87168f2..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Tadpole-Opus-14B-Exp/1762652580.477141", - "retrieved_timestamp": "1762652580.477142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Tadpole-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Tadpole-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5749522378400422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.636858708544215 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31344410876132933 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47284375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322473404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/6dc87410-a39e-41b1-8759-68c1556c8419.json b/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/6dc87410-a39e-41b1-8759-68c1556c8419.json new file mode 100644 index 000000000..dc0362947 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/6dc87410-a39e-41b1-8759-68c1556c8419.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Tadpole-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tadpole-Opus-14B-Exp", + "id": "prithivMLmods/Tadpole-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6369 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3134 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5322 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json b/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json deleted file mode 100644 index e17756f86..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Taurus-Opus-7B/1762652580.477352", - "retrieved_timestamp": "1762652580.4773529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Taurus-Opus-7B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Taurus-Opus-7B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42232831110342783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367364587851736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43988541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951130319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/c4ebe788-fb60-453b-914b-56bf87dd6374.json b/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/c4ebe788-fb60-453b-914b-56bf87dd6374.json new file mode 100644 index 000000000..02432070f --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/c4ebe788-fb60-453b-914b-56bf87dd6374.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Taurus-Opus-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Taurus-Opus-7B", + "id": "prithivMLmods/Taurus-Opus-7B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4223 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2168 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3263 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4399 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/45a44cc8-a550-4d2f-b0f4-37b4aac6a2b5.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/45a44cc8-a550-4d2f-b0f4-37b4aac6a2b5.json new file mode 100644 index 000000000..880fb2e12 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/45a44cc8-a550-4d2f-b0f4-37b4aac6a2b5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangulum-10B", + "id": "prithivMLmods/Triangulum-10B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3229 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4172 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4178 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json deleted file mode 100644 index efc85ec7e..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-10B/1762652580.477568", - "retrieved_timestamp": "1762652580.477569", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Triangulum-10B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Triangulum-10B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3229353670483207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968023910391113 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549848942598187 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41724999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4178025265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/10593c13-3b30-4605-8063-c6a6526fc9d9.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/10593c13-3b30-4605-8063-c6a6526fc9d9.json new file mode 100644 index 000000000..6aff0ab5e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/10593c13-3b30-4605-8063-c6a6526fc9d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangulum-5B", + "id": "prithivMLmods/Triangulum-5B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.413 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1283 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1223 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json deleted file mode 100644 index 662070786..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-5B/1762652580.477782", - "retrieved_timestamp": "1762652580.477782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Triangulum-5B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Triangulum-5B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.413 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283206336963701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3124115848614622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3445416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12234042553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json deleted file mode 100644 index 8e48d35ea..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-v2-10B/1762652580.478046", - "retrieved_timestamp": "1762652580.478047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Triangulum-v2-10B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Triangulum-v2-10B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6705231009277606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064531367418446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42807291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44664228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/12b8f4d7-2ae8-492c-8756-f7cb21a58c76.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/12b8f4d7-2ae8-492c-8756-f7cb21a58c76.json new file mode 100644 index 000000000..dc5fbfdfd --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/12b8f4d7-2ae8-492c-8756-f7cb21a58c76.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-v2-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangulum-v2-10B", + "id": "prithivMLmods/Triangulum-v2-10B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6705 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6065 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4281 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4466 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/96d9b675-c299-4138-a381-fb4de36287e5.json b/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/96d9b675-c299-4138-a381-fb4de36287e5.json new file mode 100644 index 000000000..044957de8 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/96d9b675-c299-4138-a381-fb4de36287e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Tucana-Opus-14B-r999/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tucana-Opus-14B-r999", + "id": "prithivMLmods/Tucana-Opus-14B-r999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6067 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4063 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3918 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json b/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json deleted file mode 100644 index 89198d7e4..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Tucana-Opus-14B-r999/1762652580.47826", - "retrieved_timestamp": "1762652580.478261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Tucana-Opus-14B-r999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Tucana-Opus-14B-r999", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.606725710005009 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6556888858891955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47303125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/17fffa9b-8ed4-44c7-87ea-7ee2c1f28e6a.json b/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/17fffa9b-8ed4-44c7-87ea-7ee2c1f28e6a.json new file mode 100644 index 000000000..075ca9a4b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/17fffa9b-8ed4-44c7-87ea-7ee2c1f28e6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Tulu-MathLingo-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tulu-MathLingo-8B", + "id": "prithivMLmods/Tulu-MathLingo-8B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5589 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4659 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3864 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3044 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json b/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json deleted file mode 100644 index ac85a4c4a..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Tulu-MathLingo-8B/1762652580.478472", - "retrieved_timestamp": "1762652580.478473", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Tulu-MathLingo-8B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Tulu-MathLingo-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589402784611497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658807905856453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38642708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304438164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json deleted file mode 100644 index 1e728f156..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-7B-Elite14/1762652580.4786801", - "retrieved_timestamp": "1762652580.478681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-7B-Elite14", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-7B-Elite14", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14882844186757802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28285388717732607 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10887632978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/8999a5f3-f421-4663-835e-7626cebd2282.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/8999a5f3-f421-4663-835e-7626cebd2282.json new file mode 100644 index 000000000..df24efe05 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/8999a5f3-f421-4663-835e-7626cebd2282.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-7B-Elite14/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-7B-Elite14", + "id": "prithivMLmods/Viper-Coder-7B-Elite14", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1488 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2829 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1089 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json deleted file mode 100644 index 741d07a0d..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.2/1762652580.4788852", - "retrieved_timestamp": "1762652580.478886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-Hybrid-v1.2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-Hybrid-v1.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6735705705306365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6390749226915919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48217708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/951e1a4f-ed6c-49ca-b648-6086989e333f.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/951e1a4f-ed6c-49ca-b648-6086989e333f.json new file mode 100644 index 000000000..82f43e5e6 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/951e1a4f-ed6c-49ca-b648-6086989e333f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-Hybrid-v1.2", + "id": "prithivMLmods/Viper-Coder-Hybrid-v1.2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6736 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4822 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5243 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json deleted file mode 100644 index e59283d49..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.3/1762652580.4790971", - "retrieved_timestamp": "1762652580.479098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-Hybrid-v1.3", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-Hybrid-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7554776880898239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6470999423290662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4403229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097240691489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/2acc0666-e0ff-4760-a74a-227a02775344.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/2acc0666-e0ff-4760-a74a-227a02775344.json new file mode 100644 index 000000000..0c7fa3197 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/2acc0666-e0ff-4760-a74a-227a02775344.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-Hybrid-v1.3", + "id": "prithivMLmods/Viper-Coder-Hybrid-v1.3", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6471 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4403 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5097 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json deleted file mode 100644 index da7db9acb..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-HybridMini-v1.3/1762652580.4793081", - "retrieved_timestamp": "1762652580.479309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-HybridMini-v1.3", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-HybridMini-v1.3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.610372699991578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5365472959273401 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45048958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4351728723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/3196c71d-0e0a-4d29-8bca-c31ba3d99dfd.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/3196c71d-0e0a-4d29-8bca-c31ba3d99dfd.json new file mode 100644 index 000000000..37eb769a1 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/3196c71d-0e0a-4d29-8bca-c31ba3d99dfd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-HybridMini-v1.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-HybridMini-v1.3", + "id": "prithivMLmods/Viper-Coder-HybridMini-v1.3", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6104 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5365 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.463 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json deleted file mode 100644 index 40ab918ab..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v0.1/1762652580.479637", - "retrieved_timestamp": "1762652580.479639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v0.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5521460835028835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6143056870893655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43944791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3927859042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/e858aa6c-c424-447e-b512-7dcf794f9f0f.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/e858aa6c-c424-447e-b512-7dcf794f9f0f.json new file mode 100644 index 000000000..f8a0e8b52 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/e858aa6c-c424-447e-b512-7dcf794f9f0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-v0.1", + "id": "prithivMLmods/Viper-Coder-v0.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5521 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.327 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4394 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3928 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/8773eac5-205e-4264-981b-58f1a25f872a.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/8773eac5-205e-4264-981b-58f1a25f872a.json new file mode 100644 index 000000000..781f52345 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/8773eac5-205e-4264-981b-58f1a25f872a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-v1.1", + "id": "prithivMLmods/Viper-Coder-v1.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5232 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json deleted file mode 100644 index 27563dc4b..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.1/1762652580.479969", - "retrieved_timestamp": "1762652580.47997", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v1.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v1.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.443236168920686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492289468853992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460725075528701 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523188164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/c26ae286-a9b8-499f-b886-4b75be0cf2da.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/c26ae286-a9b8-499f-b886-4b75be0cf2da.json new file mode 100644 index 000000000..852cfb91f --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/c26ae286-a9b8-499f-b886-4b75be0cf2da.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.6-r999/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-v1.6-r999", + "id": "prithivMLmods/Viper-Coder-v1.6-r999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4433 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6492 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5232 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json deleted file mode 100644 index 511149cc1..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.6-r999/1762652580.480214", - "retrieved_timestamp": "1762652580.480215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v1.6-r999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v1.6-r999", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432860366050967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492289468853992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5657099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523188164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json deleted file mode 100644 index b9c9e05ea..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.7-Vsm6/1762652580.480439", - "retrieved_timestamp": "1762652580.4804401", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v1.7-Vsm6", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v1.7-Vsm6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5003889679384035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6502342489348574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39681208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287566489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/d3a61998-2d41-4349-bd15-ce29143cc910.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/d3a61998-2d41-4349-bd15-ce29143cc910.json new file mode 100644 index 000000000..0353bf927 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/d3a61998-2d41-4349-bd15-ce29143cc910.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.7-Vsm6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-Coder-v1.7-Vsm6", + "id": "prithivMLmods/Viper-Coder-v1.7-Vsm6", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5004 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5288 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/56b66428-2751-4c62-b98c-6c60e58c45ca.json b/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/56b66428-2751-4c62-b98c-6c60e58c45ca.json new file mode 100644 index 000000000..022eaef25 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/56b66428-2751-4c62-b98c-6c60e58c45ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-OneCoder-UIGEN/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Viper-OneCoder-UIGEN", + "id": "prithivMLmods/Viper-OneCoder-UIGEN", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4692 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6047 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3904 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json b/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json deleted file mode 100644 index f4115e916..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-OneCoder-UIGEN/1762652580.480654", - "retrieved_timestamp": "1762652580.480654", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Viper-OneCoder-UIGEN", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-OneCoder-UIGEN", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4691895282295421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6046507657311738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3867069486404834 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45141666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json b/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json deleted file mode 100644 index 7f4cab570..000000000 --- a/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Volans-Opus-14B-Exp/1762652580.480862", - "retrieved_timestamp": "1762652580.480863", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/Volans-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Volans-Opus-14B-Exp", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867675545330834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6521211711040636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5384807180851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/9b2ec4af-4a7c-4cf7-8b7d-79b6cc219880.json b/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/9b2ec4af-4a7c-4cf7-8b7d-79b6cc219880.json new file mode 100644 index 000000000..ee55e09c5 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/9b2ec4af-4a7c-4cf7-8b7d-79b6cc219880.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Volans-Opus-14B-Exp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Volans-Opus-14B-Exp", + "id": "prithivMLmods/Volans-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5868 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6521 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4252 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5385 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json b/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json deleted file mode 100644 index e54727a43..000000000 --- a/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/prithivMLmods_WebMind-7B-v0.1/1762652580.481075", - "retrieved_timestamp": "1762652580.481076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "prithivMLmods/WebMind-7B-v0.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/WebMind-7B-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278161943642867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433559211614739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4537395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4279421542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/5855a920-428f-4699-becc-73d4422f706f.json b/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/5855a920-428f-4699-becc-73d4422f706f.json new file mode 100644 index 000000000..5e3113aff --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/5855a920-428f-4699-becc-73d4422f706f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/prithivMLmods_WebMind-7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WebMind-7B-v0.1", + "id": "prithivMLmods/WebMind-7B-v0.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5434 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4537 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pszemraj/Llama-3-6.3b-v0.1/f1004f08-7f46-4eb1-8f60-66893fca7180.json b/data/hfopenllm_v2/pszemraj/Llama-3-6.3b-v0.1/f1004f08-7f46-4eb1-8f60-66893fca7180.json new file mode 100644 index 000000000..00c890719 --- /dev/null +++ b/data/hfopenllm_v2/pszemraj/Llama-3-6.3b-v0.1/f1004f08-7f46-4eb1-8f60-66893fca7180.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pszemraj_Llama-3-6.3b-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-6.3b-v0.1", + "id": "pszemraj/Llama-3-6.3b-v0.1", + "developer": "pszemraj", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1044 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.284 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pszemraj/Mistral-v0.3-6B/97db158a-3035-45d3-8d92-a08c9e605493.json b/data/hfopenllm_v2/pszemraj/Mistral-v0.3-6B/97db158a-3035-45d3-8d92-a08c9e605493.json new file mode 100644 index 000000000..73d3cc391 --- /dev/null +++ b/data/hfopenllm_v2/pszemraj/Mistral-v0.3-6B/97db158a-3035-45d3-8d92-a08c9e605493.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/pszemraj_Mistral-v0.3-6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-v0.3-6B", + "id": "pszemraj/Mistral-v0.3-6B", + "developer": "pszemraj", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 5.939 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3774 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3908 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2143 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/LLaMa_3.2_3B_Catalysts/0d81b928-2a24-4eb4-93d5-224e3c505532.json b/data/hfopenllm_v2/qingy2019/LLaMa_3.2_3B_Catalysts/0d81b928-2a24-4eb4-93d5-224e3c505532.json new file mode 100644 index 000000000..077f5175f --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/LLaMa_3.2_3B_Catalysts/0d81b928-2a24-4eb4-93d5-224e3c505532.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_LLaMa_3.2_3B_Catalysts/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMa_3.2_3B_Catalysts", + "id": "qingy2019/LLaMa_3.2_3B_Catalysts", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4468 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1292 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3788 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3008 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/OpenMath2-Llama3.1-8B/bf4cc7ee-cad4-42af-8638-6b371577ec68.json b/data/hfopenllm_v2/qingy2019/OpenMath2-Llama3.1-8B/bf4cc7ee-cad4-42af-8638-6b371577ec68.json new file mode 100644 index 000000000..8d52de0fa --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/OpenMath2-Llama3.1-8B/bf4cc7ee-cad4-42af-8638-6b371577ec68.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_OpenMath2-Llama3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenMath2-Llama3.1-8B", + "id": "qingy2019/OpenMath2-Llama3.1-8B", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2331 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2674 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3436 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1553 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Oracle-14B/5b574dda-0d85-47aa-9ebc-7f8581d402ca.json b/data/hfopenllm_v2/qingy2019/Oracle-14B/5b574dda-0d85-47aa-9ebc-7f8581d402ca.json new file mode 100644 index 000000000..8c2258503 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Oracle-14B/5b574dda-0d85-47aa-9ebc-7f8581d402ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Oracle-14B", + "id": "qingy2019/Oracle-14B", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2401 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0725 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3703 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Oracle-14B/6043830f-8a9d-4a03-9de5-4805724a9ae8.json b/data/hfopenllm_v2/qingy2019/Oracle-14B/6043830f-8a9d-4a03-9de5-4805724a9ae8.json new file mode 100644 index 000000000..3296a5d2c --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Oracle-14B/6043830f-8a9d-4a03-9de5-4805724a9ae8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Oracle-14B", + "id": "qingy2019/Oracle-14B", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2358 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4612 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0642 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3717 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json b/data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json deleted file mode 100644 index 602e55ff1..000000000 --- a/data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1762652580.4822989", - "retrieved_timestamp": "1762652580.4822989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Oracle-14B", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Oracle-14B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23583203677353867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4611577021562399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37166666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23819813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json b/data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json deleted file mode 100644 index 3f63e6995..000000000 --- a/data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1762652580.482562", - "retrieved_timestamp": "1762652580.482562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Oracle-14B", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Oracle-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24007854714380067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622299618883472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37033333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2378656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json deleted file mode 100644 index f6939ce9a..000000000 --- a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/1762652580.4831731", - "retrieved_timestamp": "1762652580.4831731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5980830862112528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6375080075350833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/9d5fdb25-0d6a-4d5c-bcfb-0903504e620a.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/9d5fdb25-0d6a-4d5c-bcfb-0903504e620a.json new file mode 100644 index 000000000..7da78d24a --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/9d5fdb25-0d6a-4d5c-bcfb-0903504e620a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-14B-Instruct-Alpha", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5981 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6375 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/217819b0-2c4b-4c26-823b-1ea14f893e01.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/217819b0-2c4b-4c26-823b-1ea14f893e01.json new file mode 100644 index 000000000..302221eda --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/217819b0-2c4b-4c26-823b-1ea14f893e01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-14B-Instruct-Pro", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3558 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json deleted file mode 100644 index 96ec60837..000000000 --- a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/1762652580.483387", - "retrieved_timestamp": "1762652580.483388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1921678923035324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5318689754519911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35580119680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/0f844855-fb46-4b53-82c2-f36e5721c385.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/0f844855-fb46-4b53-82c2-f36e5721c385.json new file mode 100644 index 000000000..792719c86 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/0f844855-fb46-4b53-82c2-f36e5721c385.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-14B-Instruct", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6005 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6356 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3691 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json deleted file mode 100644 index 218973378..000000000 --- a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1762652580.482764", - "retrieved_timestamp": "1762652580.482764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066259746361875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6350068875885949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/59aaa7ed-27d4-4765-b115-90570ad86c77.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/59aaa7ed-27d4-4765-b115-90570ad86c77.json new file mode 100644 index 000000000..5ff1d5b79 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/59aaa7ed-27d4-4765-b115-90570ad86c77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-14B-Instruct", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6066 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4757 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json deleted file mode 100644 index 09289c784..000000000 --- a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1762652580.48299", - "retrieved_timestamp": "1762652580.4829912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6005310354304356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6356492397286339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4756666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339095744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/4478c5ff-3b51-4be2-abce-3fb6a951b6e7.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/4478c5ff-3b51-4be2-abce-3fb6a951b6e7.json new file mode 100644 index 000000000..12db4718f --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/4478c5ff-3b51-4be2-abce-3fb6a951b6e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Ultimate-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Ultimate-14B-Instruct", + "id": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", + "developer": "qingy2019", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3938 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5842 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4929 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json deleted file mode 100644 index a6073902b..000000000 --- a/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Ultimate-14B-Instruct/1762652580.483648", - "retrieved_timestamp": "1762652580.483649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39380177927897975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5841561592804249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4135 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4929355053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json b/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json deleted file mode 100644 index 140b7aa43..000000000 --- a/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/1762652580.483871", - "retrieved_timestamp": "1762652580.483871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20136016879657087 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8269136508088061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/9202146d-5889-49fd-9025-e03153ba9093.json b/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/9202146d-5889-49fd-9025-e03153ba9093.json new file mode 100644 index 000000000..e323ed5cd --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/9202146d-5889-49fd-9025-e03153ba9093.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Benchmaxx-Llama-3.2-1B-Instruct", + "id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2014 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4804 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1113 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/94257d3e-2b1e-47a1-bbd1-7fc696a574b3.json b/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/94257d3e-2b1e-47a1-bbd1-7fc696a574b3.json new file mode 100644 index 000000000..a3811ca19 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/94257d3e-2b1e-47a1-bbd1-7fc696a574b3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Eyas-17B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eyas-17B-Instruct", + "id": "qingy2024/Eyas-17B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 17.431 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6575 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6085 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.247 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4522 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json b/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json deleted file mode 100644 index 7d2e044c1..000000000 --- a/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Eyas-17B-Instruct/1762652580.484141", - "retrieved_timestamp": "1762652580.484141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Eyas-17B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Eyas-17B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 17.431 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6574588757829227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6084550080292097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45216666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43425864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/2245cf71-fb8d-44ca-b58d-06608312ee8c.json b/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/2245cf71-fb8d-44ca-b58d-06608312ee8c.json new file mode 100644 index 000000000..97d1ad31b --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/2245cf71-fb8d-44ca-b58d-06608312ee8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Falcon3-2x10B-MoE-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-2x10B-MoE-Instruct", + "id": "qingy2024/Falcon3-2x10B-MoE-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 18.799 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6185 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4423 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json b/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json deleted file mode 100644 index 689c3f541..000000000 --- a/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Falcon3-2x10B-MoE-Instruct/1762652580.484361", - "retrieved_timestamp": "1762652580.484362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Falcon3-2x10B-MoE-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Falcon3-2x10B-MoE-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 18.799 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7849783020164276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6184925726037823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42835416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44232047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json b/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json deleted file mode 100644 index 179c78216..000000000 --- a/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Fusion-14B-Instruct/1762652580.4845738", - "retrieved_timestamp": "1762652580.484575", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Fusion-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Fusion-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7259770741632203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395930812164231 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368580060422961 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44004166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504404920212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/9a823fde-7802-4876-b72c-d8f73cd17236.json b/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/9a823fde-7802-4876-b72c-d8f73cd17236.json new file mode 100644 index 000000000..7fb915e5f --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/9a823fde-7802-4876-b72c-d8f73cd17236.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Fusion-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fusion-14B-Instruct", + "id": "qingy2024/Fusion-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.726 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6396 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json b/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json deleted file mode 100644 index 5e8096a1c..000000000 --- a/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Fusion2-14B-Instruct/1762652580.4848042", - "retrieved_timestamp": "1762652580.4848042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Fusion2-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Fusion2-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064010159709571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.611852372286455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46338541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5050698138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/ede99239-ef8f-49eb-a48b-0ec2553c99e5.json b/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/ede99239-ef8f-49eb-a48b-0ec2553c99e5.json new file mode 100644 index 000000000..130eb0036 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/ede99239-ef8f-49eb-a48b-0ec2553c99e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Fusion2-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fusion2-14B-Instruct", + "id": "qingy2024/Fusion2-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3127 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4634 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5051 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/4a307570-994f-491c-87a7-ad90b7965b8b.json b/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/4a307570-994f-491c-87a7-ad90b7965b8b.json new file mode 100644 index 000000000..144b32ba3 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/4a307570-994f-491c-87a7-ad90b7965b8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Fusion4-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fusion4-14B-Instruct", + "id": "qingy2024/Fusion4-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json b/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json deleted file mode 100644 index fade1fef8..000000000 --- a/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Fusion4-14B-Instruct/1762652580.4850292", - "retrieved_timestamp": "1762652580.48503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Fusion4-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Fusion4-14B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7648949232480928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6542520469477617 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193650265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/eb448d78-6417-4533-8458-99c1869a74ae.json b/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/eb448d78-6417-4533-8458-99c1869a74ae.json new file mode 100644 index 000000000..2f6ab406d --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/eb448d78-6417-4533-8458-99c1869a74ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_OwO-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OwO-14B-Instruct", + "id": "qingy2024/OwO-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1383 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6165 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4407 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5181 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json b/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json deleted file mode 100644 index d2865f268..000000000 --- a/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_OwO-14B-Instruct/1762652580.485259", - "retrieved_timestamp": "1762652580.485259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/OwO-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/OwO-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1383119013107444 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6164807172760662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44068749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181183510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json b/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json deleted file mode 100644 index 348c15985..000000000 --- a/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_QwEnlarge-16B-Instruct/1762652580.485478", - "retrieved_timestamp": "1762652580.4854789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/QwEnlarge-16B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/QwEnlarge-16B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 15.871 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7801821389468832 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949341698087998 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45996978851963743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.410125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44755651595744683 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/e1b8e4ad-4327-46b9-b957-fbd02e57c87e.json b/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/e1b8e4ad-4327-46b9-b957-fbd02e57c87e.json new file mode 100644 index 000000000..64c399092 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/e1b8e4ad-4327-46b9-b957-fbd02e57c87e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_QwEnlarge-16B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwEnlarge-16B-Instruct", + "id": "qingy2024/QwEnlarge-16B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 15.871 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7802 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5949 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4101 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json b/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json deleted file mode 100644 index bcf56db1f..000000000 --- a/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_QwQ-14B-Math-v0.2/1762652580.48586", - "retrieved_timestamp": "1762652580.4858618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/QwQ-14B-Math-v0.2", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/QwQ-14B-Math-v0.2", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33909692948044523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.573097955260854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47997007978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/aab6b224-b948-4fb1-84b7-0dbe5c46d527.json b/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/aab6b224-b948-4fb1-84b7-0dbe5c46d527.json new file mode 100644 index 000000000..1d8168654 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/aab6b224-b948-4fb1-84b7-0dbe5c46d527.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_QwQ-14B-Math-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "QwQ-14B-Math-v0.2", + "id": "qingy2024/QwQ-14B-Math-v0.2", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3391 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5731 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4811 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.48 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/2e5cd1de-6109-4f76-b722-abbd4b207f4d.json b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/2e5cd1de-6109-4f76-b722-abbd4b207f4d.json new file mode 100644 index 000000000..dd72818b2 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/2e5cd1de-6109-4f76-b722-abbd4b207f4d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B-Instruct-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwarkstar-4B-Instruct-Preview", + "id": "qingy2024/Qwarkstar-4B-Instruct-Preview", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.473 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5324 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1284 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3896 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2502 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json deleted file mode 100644 index 5cf0f02ba..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B-Instruct-Preview/1762652580.4865122", - "retrieved_timestamp": "1762652580.486513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwarkstar-4B-Instruct-Preview", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwarkstar-4B-Instruct-Preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.473 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324372664530114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43584381808469397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38959374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.250249335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/767d1296-4971-478f-8d78-1d63d162ae5b.json b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/767d1296-4971-478f-8d78-1d63d162ae5b.json new file mode 100644 index 000000000..4fa365663 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/767d1296-4971-478f-8d78-1d63d162ae5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwarkstar-4B", + "id": "qingy2024/Qwarkstar-4B", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.473 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1994 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2425 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json deleted file mode 100644 index 5cb9cf504..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B/1762652580.486229", - "retrieved_timestamp": "1762652580.4862301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwarkstar-4B", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwarkstar-4B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.473 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19941200459225966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40149118131308104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44283333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-4B/eab74e3b-de61-4fa9-87c2-56e69b70349a.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-4B/eab74e3b-de61-4fa9-87c2-56e69b70349a.json new file mode 100644 index 000000000..9a1241837 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-4B/eab74e3b-de61-4fa9-87c2-56e69b70349a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-4B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-4B", + "id": "qingy2024/Qwen2.5-4B", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.168 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/3219d563-3bfb-4618-8cb3-e9b198d5b11f.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/3219d563-3bfb-4618-8cb3-e9b198d5b11f.json new file mode 100644 index 000000000..56cfd6f64 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/3219d563-3bfb-4618-8cb3-e9b198d5b11f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-Draft-1.5B-Instruct", + "id": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1579 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json deleted file mode 100644 index f33165f16..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/1762652580.487137", - "retrieved_timestamp": "1762652580.487138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4125110262991086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3836795503038973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35800000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22440159574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json deleted file mode 100644 index 7f21aa8bc..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/1762652580.48737", - "retrieved_timestamp": "1762652580.487371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7704402097545624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.646486159387426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42900302114803623 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49659242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/233fd27c-561e-4c9e-a917-cbc5b08c055a.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/233fd27c-561e-4c9e-a917-cbc5b08c055a.json new file mode 100644 index 000000000..140c278d3 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/233fd27c-561e-4c9e-a917-cbc5b08c055a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-14B-Instruct-Alpha", + "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7704 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6465 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4966 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/a875e8f7-a4e6-4c17-abbc-b8d4b73b7501.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/a875e8f7-a4e6-4c17-abbc-b8d4b73b7501.json new file mode 100644 index 000000000..f9376abc8 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/a875e8f7-a4e6-4c17-abbc-b8d4b73b7501.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Math-14B-Instruct-Preview", + "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7826 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6294 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4758 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4993 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json deleted file mode 100644 index 58be28289..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/1762652580.487701", - "retrieved_timestamp": "1762652580.4877021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825802204816554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6293942245934432 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4114583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49933510638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/4b68ba49-6681-4add-9197-2cd711701e15.json b/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/4b68ba49-6681-4add-9197-2cd711701e15.json new file mode 100644 index 000000000..c2c1c3606 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/4b68ba49-6681-4add-9197-2cd711701e15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.6-14B-Instruct", + "id": "qingy2024/Qwen2.6-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5811 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6394 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3051 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5285 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json b/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json deleted file mode 100644 index 7e72e940d..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-14B-Instruct/1762652580.48806", - "retrieved_timestamp": "1762652580.488061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwen2.6-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.6-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5810970447302047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6394142844483001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285073138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json b/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json deleted file mode 100644 index 4a40f2330..000000000 --- a/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-Math-14B-Instruct/1762652580.488592", - "retrieved_timestamp": "1762652580.4885938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "qingy2024/Qwen2.6-Math-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.6-Math-14B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38623186478543603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6324437508110833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42900302114803623 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4758541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241023936170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/5679ca73-3d5f-4bc7-bea2-5e9e713db0cc.json b/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/5679ca73-3d5f-4bc7-bea2-5e9e713db0cc.json new file mode 100644 index 000000000..ee424cd53 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/5679ca73-3d5f-4bc7-bea2-5e9e713db0cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-Math-14B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.6-Math-14B-Instruct", + "id": "qingy2024/Qwen2.6-Math-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3862 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6324 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4759 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qq8933/OpenLongCoT-Base-Gemma2-2B/a6c631f6-890c-4199-abee-18b012bc48df.json b/data/hfopenllm_v2/qq8933/OpenLongCoT-Base-Gemma2-2B/a6c631f6-890c-4199-abee-18b012bc48df.json new file mode 100644 index 000000000..62c5fe7c9 --- /dev/null +++ b/data/hfopenllm_v2/qq8933/OpenLongCoT-Base-Gemma2-2B/a6c631f6-890c-4199-abee-18b012bc48df.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/qq8933_OpenLongCoT-Base-Gemma2-2B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenLongCoT-Base-Gemma2-2B", + "id": "qq8933/OpenLongCoT-Base-Gemma2-2B", + "developer": "qq8933", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.204 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1965 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3106 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3222 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json b/data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json deleted file mode 100644 index 8438754b9..000000000 --- a/data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/raphgg_test-2.5-72B/1762652580.489263", - "retrieved_timestamp": "1762652580.489265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "raphgg/test-2.5-72B", - "developer": "raphgg", - "inference_platform": "unknown", - "id": "raphgg/test-2.5-72B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8437047035199936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7266099425567868 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4108761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48118750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5836934840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/raphgg/test-2.5-72B/1edc3610-40fc-467d-8410-26d4b6adebce.json b/data/hfopenllm_v2/raphgg/test-2.5-72B/1edc3610-40fc-467d-8410-26d4b6adebce.json new file mode 100644 index 000000000..f9c3617d5 --- /dev/null +++ b/data/hfopenllm_v2/raphgg/test-2.5-72B/1edc3610-40fc-467d-8410-26d4b6adebce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/raphgg_test-2.5-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "test-2.5-72B", + "id": "raphgg/test-2.5-72B", + "developer": "raphgg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8437 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4109 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5837 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/42c773ba-8fb4-4b3c-8ac7-0688519bb55c.json b/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/42c773ba-8fb4-4b3c-8ac7-0688519bb55c.json new file mode 100644 index 000000000..33c5db79a --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/42c773ba-8fb4-4b3c-8ac7-0688519bb55c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rasyosef_Mistral-NeMo-Minitron-8B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-NeMo-Minitron-8B-Chat", + "id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", + "developer": "rasyosef", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4759 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4304 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2404 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json b/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json deleted file mode 100644 index 3ab954b17..000000000 --- a/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rasyosef_Mistral-NeMo-Minitron-8B-Chat/1762652580.4896698", - "retrieved_timestamp": "1762652580.489672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4451843331249973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47594353379058535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4304270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2403590425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/1a371df5-447f-4fd8-8fe8-dbf9a1dc079a.json b/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/1a371df5-447f-4fd8-8fe8-dbf9a1dc079a.json new file mode 100644 index 000000000..a55a5da1a --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/1a371df5-447f-4fd8-8fe8-dbf9a1dc079a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rasyosef_Phi-1_5-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-1_5-Instruct-v0.1", + "id": "rasyosef/Phi-1_5-Instruct-v0.1", + "developer": "rasyosef", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 1.415 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3118 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1562 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json b/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json deleted file mode 100644 index 37fbdb8b7..000000000 --- a/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rasyosef_Phi-1_5-Instruct-v0.1/1762652580.4902148", - "retrieved_timestamp": "1762652580.490216", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rasyosef/Phi-1_5-Instruct-v0.1", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/Phi-1_5-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 1.415 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24022815019703275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3117898107092894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15616688829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/821a21a0-6fd7-438a-933d-5e31b2dd2adc.json b/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/821a21a0-6fd7-438a-933d-5e31b2dd2adc.json new file mode 100644 index 000000000..02df295de --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/821a21a0-6fd7-438a-933d-5e31b2dd2adc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-apo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-2-instruct-apo", + "id": "rasyosef/phi-2-instruct-apo", + "developer": "rasyosef", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4445 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2155 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json b/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json deleted file mode 100644 index 65065147d..000000000 --- a/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-apo/1762652580.490494", - "retrieved_timestamp": "1762652580.490495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rasyosef/phi-2-instruct-apo", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/phi-2-instruct-apo", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31459194936102874 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44450964630048634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21550864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json b/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json deleted file mode 100644 index a9ed43758..000000000 --- a/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-v0.1/1762652580.490772", - "retrieved_timestamp": "1762652580.490773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rasyosef/phi-2-instruct-v0.1", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/phi-2-instruct-v0.1", - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681476260765879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47261184292654473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22465093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/781a4cc6-a69d-4106-81aa-06e114f7c897.json b/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/781a4cc6-a69d-4106-81aa-06e114f7c897.json new file mode 100644 index 000000000..16d481683 --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/781a4cc6-a69d-4106-81aa-06e114f7c897.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-2-instruct-v0.1", + "id": "rasyosef/phi-2-instruct-v0.1", + "developer": "rasyosef", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3681 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4726 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3524 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2247 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json b/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json deleted file mode 100644 index c7026ff70..000000000 --- a/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/realtreetune_rho-1b-sft-MATH/1762652580.4910588", - "retrieved_timestamp": "1762652580.49106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "realtreetune/rho-1b-sft-MATH", - "developer": "realtreetune", - "inference_platform": "unknown", - "id": "realtreetune/rho-1b-sft-MATH", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.212101668018635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3144153389594046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/e49c98b4-46f4-406e-9eeb-7072bf72b9a3.json b/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/e49c98b4-46f4-406e-9eeb-7072bf72b9a3.json new file mode 100644 index 000000000..56ba23460 --- /dev/null +++ b/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/e49c98b4-46f4-406e-9eeb-7072bf72b9a3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/realtreetune_rho-1b-sft-MATH/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rho-1b-sft-MATH", + "id": "realtreetune/rho-1b-sft-MATH", + "developer": "realtreetune", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2121 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3144 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0347 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3458 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/3b7524a8-d17b-4788-93f2-11076df464a7.json b/data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/3b7524a8-d17b-4788-93f2-11076df464a7.json new file mode 100644 index 000000000..b8063cb4c --- /dev/null +++ b/data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/3b7524a8-d17b-4788-93f2-11076df464a7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2854 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1005 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4607 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/6188a57f-4bc3-42a5-ad18-c59774e40407.json b/data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/6188a57f-4bc3-42a5-ad18-c59774e40407.json new file mode 100644 index 000000000..eda35f4dd --- /dev/null +++ b/data/hfopenllm_v2/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/6188a57f-4bc3-42a5-ad18-c59774e40407.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5974 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.1/28689805-7c4c-438e-8431-f4a6aceb5e94.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.1/28689805-7c4c-438e-8431-f4a6aceb5e94.json new file mode 100644 index 000000000..fb6c7b792 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.1/28689805-7c4c-438e-8431-f4a6aceb5e94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.1", + "id": "recoilme/recoilme-gemma-2-9B-v0.1", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7515 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5995 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4159 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7c156689-9668-4ded-bacc-c88a03ad1526.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7c156689-9668-4ded-bacc-c88a03ad1526.json new file mode 100644 index 000000000..8be1d1216 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7c156689-9668-4ded-bacc-c88a03ad1526.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.2", + "id": "recoilme/recoilme-gemma-2-9B-v0.2", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7592 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6026 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4099 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7e43f187-1959-4dfe-802f-094ba88f3b0d.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7e43f187-1959-4dfe-802f-094ba88f3b0d.json new file mode 100644 index 000000000..2194c56b4 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.2/7e43f187-1959-4dfe-802f-094ba88f3b0d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.2", + "id": "recoilme/recoilme-gemma-2-9B-v0.2", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2747 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6031 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/a6170173-ef17-4cfa-a76e-8e51cb8cb970.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/a6170173-ef17-4cfa-a76e-8e51cb8cb970.json new file mode 100644 index 000000000..b07a872e1 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/a6170173-ef17-4cfa-a76e-8e51cb8cb970.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.3", + "id": "recoilme/recoilme-gemma-2-9B-v0.3", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7439 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5993 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4072 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/e998d52b-dd94-4ef2-9cfc-5034ded0105a.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/e998d52b-dd94-4ef2-9cfc-5034ded0105a.json new file mode 100644 index 000000000..f0948a794 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.3/e998d52b-dd94-4ef2-9cfc-5034ded0105a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.3", + "id": "recoilme/recoilme-gemma-2-9B-v0.3", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5761 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4632 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.4/a3ac60bd-8fb3-47d9-b378-1f0c4d74fed2.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.4/a3ac60bd-8fb3-47d9-b378-1f0c4d74fed2.json new file mode 100644 index 000000000..4927c66a1 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.4/a3ac60bd-8fb3-47d9-b378-1f0c4d74fed2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.4", + "id": "recoilme/recoilme-gemma-2-9B-v0.4", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2562 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5967 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.5/0f69217c-74ed-4398-8d1b-53d1a43be890.json b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.5/0f69217c-74ed-4398-8d1b-53d1a43be890.json new file mode 100644 index 000000000..31bf944b2 --- /dev/null +++ b/data/hfopenllm_v2/recoilme/recoilme-gemma-2-9B-v0.5/0f69217c-74ed-4398-8d1b-53d1a43be890.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-9B-v0.5", + "id": "recoilme/recoilme-gemma-2-9B-v0.5", + "developer": "recoilme", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5981 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2115 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4232 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.42 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json b/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json deleted file mode 100644 index ffa1028b6..000000000 --- a/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/1762652580.493407", - "retrieved_timestamp": "1762652580.493408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", - "developer": "redrix", - "inference_platform": "unknown", - "id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359590331431713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128840998052852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38178124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179853723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/b973adcc-769c-4009-87c5-5f5af02a5d3a.json b/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/b973adcc-769c-4009-87c5-5f5af02a5d3a.json new file mode 100644 index 000000000..f90aef9ac --- /dev/null +++ b/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/b973adcc-769c-4009-87c5-5f5af02a5d3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", + "id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", + "developer": "redrix", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.536 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5129 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1133 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json b/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json deleted file mode 100644 index 827f9cfec..000000000 --- a/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/redrix_patricide-12B-Unslop-Mell/1762652580.4937751", - "retrieved_timestamp": "1762652580.4937768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "redrix/patricide-12B-Unslop-Mell", - "developer": "redrix", - "inference_platform": "unknown", - "id": "redrix/patricide-12B-Unslop-Mell", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40739016919551235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398666865853622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570478723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/4b30f11e-a2b9-40e9-b080-9d7484a5d048.json b/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/4b30f11e-a2b9-40e9-b080-9d7484a5d048.json new file mode 100644 index 000000000..98a7c58db --- /dev/null +++ b/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/4b30f11e-a2b9-40e9-b080-9d7484a5d048.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/redrix_patricide-12B-Unslop-Mell/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "patricide-12B-Unslop-Mell", + "id": "redrix/patricide-12B-Unslop-Mell", + "developer": "redrix", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4074 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5399 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1314 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/refuelai/Llama-3-Refueled/befdae09-4caa-4996-a3ac-fe36310aaf01.json b/data/hfopenllm_v2/refuelai/Llama-3-Refueled/befdae09-4caa-4996-a3ac-fe36310aaf01.json new file mode 100644 index 000000000..d68f26a1b --- /dev/null +++ b/data/hfopenllm_v2/refuelai/Llama-3-Refueled/befdae09-4caa-4996-a3ac-fe36310aaf01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/refuelai_Llama-3-Refueled/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Refueled", + "id": "refuelai/Llama-3-Refueled", + "developer": "refuelai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5871 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4454 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json b/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json deleted file mode 100644 index 97ac7b61c..000000000 --- a/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rhplus0831_maid-yuzu-v7/1762652580.494505", - "retrieved_timestamp": "1762652580.494506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rhplus0831/maid-yuzu-v7", - "developer": "rhplus0831", - "inference_platform": "unknown", - "id": "rhplus0831/maid-yuzu-v7", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6462430794735745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.480491692312673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35397273936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/8cd7fc1b-2873-4154-9de7-c0b8e5f4f5e9.json b/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/8cd7fc1b-2873-4154-9de7-c0b8e5f4f5e9.json new file mode 100644 index 000000000..92de2faa1 --- /dev/null +++ b/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/8cd7fc1b-2873-4154-9de7-c0b8e5f4f5e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rhplus0831_maid-yuzu-v7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maid-yuzu-v7", + "id": "rhplus0831/maid-yuzu-v7", + "developer": "rhplus0831", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4805 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json b/data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json deleted file mode 100644 index e4cf4e522..000000000 --- a/data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rhymes-ai_Aria/1762652580.4949272", - "retrieved_timestamp": "1762652580.494928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rhymes-ai/Aria", - "developer": "rhymes-ai", - "inference_platform": "unknown", - "id": "rhymes-ai/Aria", - "additional_details": { - "precision": "bfloat16", - "architecture": "AriaForConditionalGeneration", - "params_billions": 25.307 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773079872516035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5695312446413633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44049202127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/rhymes-ai/Aria/7f6e5858-f5d4-41cf-9bb7-c3c82a55c392.json b/data/hfopenllm_v2/rhymes-ai/Aria/7f6e5858-f5d4-41cf-9bb7-c3c82a55c392.json new file mode 100644 index 000000000..f9084404d --- /dev/null +++ b/data/hfopenllm_v2/rhymes-ai/Aria/7f6e5858-f5d4-41cf-9bb7-c3c82a55c392.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rhymes-ai_Aria/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aria", + "id": "rhymes-ai/Aria", + "developer": "rhymes-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "AriaForConditionalGeneration", + "params_billions": 25.307 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4773 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5695 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4405 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rhysjones/phi-2-orange-v2/7b8bf84f-4101-41a1-b6ff-9cadbb5f84a3.json b/data/hfopenllm_v2/rhysjones/phi-2-orange-v2/7b8bf84f-4101-41a1-b6ff-9cadbb5f84a3.json new file mode 100644 index 000000000..c708415c7 --- /dev/null +++ b/data/hfopenllm_v2/rhysjones/phi-2-orange-v2/7b8bf84f-4101-41a1-b6ff-9cadbb5f84a3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rhysjones_phi-2-orange-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-2-orange-v2", + "id": "rhysjones/phi-2-orange-v2", + "developer": "rhysjones", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.477 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.363 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2532 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/riaz/FineLlama-3.1-8B/1f3a733d-a6d3-453b-9763-61992cd514b0.json b/data/hfopenllm_v2/riaz/FineLlama-3.1-8B/1f3a733d-a6d3-453b-9763-61992cd514b0.json new file mode 100644 index 000000000..8ee7040c9 --- /dev/null +++ b/data/hfopenllm_v2/riaz/FineLlama-3.1-8B/1f3a733d-a6d3-453b-9763-61992cd514b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineLlama-3.1-8B", + "id": "riaz/FineLlama-3.1-8B", + "developer": "riaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4586 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3763 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2964 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/riaz/FineLlama-3.1-8B/d0eed3c1-2226-48c5-a314-e429f66c5053.json b/data/hfopenllm_v2/riaz/FineLlama-3.1-8B/d0eed3c1-2226-48c5-a314-e429f66c5053.json new file mode 100644 index 000000000..d57a902b6 --- /dev/null +++ b/data/hfopenllm_v2/riaz/FineLlama-3.1-8B/d0eed3c1-2226-48c5-a314-e429f66c5053.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FineLlama-3.1-8B", + "id": "riaz/FineLlama-3.1-8B", + "developer": "riaz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4137 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4565 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rmdhirr/Gluon-8B/957f02f1-45c7-4cce-b5aa-86bb5e485ad3.json b/data/hfopenllm_v2/rmdhirr/Gluon-8B/957f02f1-45c7-4cce-b5aa-86bb5e485ad3.json new file mode 100644 index 000000000..37d3c1fbe --- /dev/null +++ b/data/hfopenllm_v2/rmdhirr/Gluon-8B/957f02f1-45c7-4cce-b5aa-86bb5e485ad3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rmdhirr_Gluon-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gluon-8B", + "id": "rmdhirr/Gluon-8B", + "developer": "rmdhirr", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5053 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5153 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1443 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3808 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json b/data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json deleted file mode 100644 index 80a28bb1d..000000000 --- a/data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rmdhirr_Gluon-8B/1762652580.496151", - "retrieved_timestamp": "1762652580.4961522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rmdhirr/Gluon-8B", - "developer": "rmdhirr", - "inference_platform": "unknown", - "id": "rmdhirr/Gluon-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052848663767692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153305292144984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14425981873111782 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38081781914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-14b/55a01e8e-318a-4609-a862-bab4d62b3e7a.json b/data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-14b/55a01e8e-318a-4609-a862-bab4d62b3e7a.json new file mode 100644 index 000000000..c74361cf2 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-14b/55a01e8e-318a-4609-a862-bab4d62b3e7a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-Coder-V2.5-Qwen-14b", + "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7047 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6165 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-7b/cbdcd76f-be8f-42fe-89ed-d1d09d9d785f.json b/data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-7b/cbdcd76f-be8f-42fe-89ed-d1d09d9d785f.json new file mode 100644 index 000000000..e153878a8 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-Coder-V2.5-Qwen-7b/cbdcd76f-be8f-42fe-89ed-d1d09d9d785f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-Coder-V2.5-Qwen-7b", + "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5077 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3338 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3979 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/c7b6515e-6f96-468b-8bc0-15212c31e790.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/c7b6515e-6f96-468b-8bc0-15212c31e790.json new file mode 100644 index 000000000..65683c672 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/c7b6515e-6f96-468b-8bc0-15212c31e790.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-0.5b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2847 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3294 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/f27f3a1d-c19a-42b2-8b49-64ecfe5d3405.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/f27f3a1d-c19a-42b2-8b49-64ecfe5d3405.json new file mode 100644 index 000000000..c6d4a9338 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/f27f3a1d-c19a-42b2-8b49-64ecfe5d3405.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-1.5b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3402 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4257 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4186 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2922 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-14b/994aa481-627a-4bed-8719-9e874373cbc6.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-14b/994aa481-627a-4bed-8719-9e874373cbc6.json new file mode 100644 index 000000000..85be3f982 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-14b/994aa481-627a-4bed-8719-9e874373cbc6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-14b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6481 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4554 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4717 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-32b/9f5cd849-20b1-4e8d-9deb-f286dcfd9d6e.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-32b/9f5cd849-20b1-4e8d-9deb-f286dcfd9d6e.json new file mode 100644 index 000000000..85c6ec04a --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-32b/9f5cd849-20b1-4e8d-9deb-f286dcfd9d6e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-32b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-32b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6827 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7046 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4955 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-3b/c4dd34f2-7acc-4a94-a9aa-3c6aeeae8a8c.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-3b/c4dd34f2-7acc-4a94-a9aa-3c6aeeae8a8c.json new file mode 100644 index 000000000..272c24433 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-3b/c4dd34f2-7acc-4a94-a9aa-3c6aeeae8a8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-3b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5342 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4809 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2795 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4042 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-72b/e908b473-a015-4156-8e88-d67153479cb9.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-72b/e908b473-a015-4156-8e88-d67153479cb9.json new file mode 100644 index 000000000..04db7b85b --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-72b/e908b473-a015-4156-8e88-d67153479cb9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-72b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7155 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.723 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3985 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4599 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5935 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-7b/173af77d-7a51-4d5a-8fd3-366aaa5d78a0.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-7b/173af77d-7a51-4d5a-8fd3-366aaa5d78a0.json new file mode 100644 index 000000000..5fba641ae --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5-Qwen-7b/173af77d-7a51-4d5a-8fd3-366aaa5d78a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5-Qwen-7b", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6237 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4291 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4469 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/0bb65f09-323d-485f-886e-5a35c8bcd342.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/0bb65f09-323d-485f-886e-5a35c8bcd342.json new file mode 100644 index 000000000..18b32860b --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/0bb65f09-323d-485f-886e-5a35c8bcd342.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5.1-Qwen-3b", + "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2566 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1208 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2741 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/86b4c877-ef2d-4563-93a2-92d7e77eab5c.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/86b4c877-ef2d-4563-93a2-92d7e77eab5c.json new file mode 100644 index 000000000..df1c02b1d --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/86b4c877-ef2d-4563-93a2-92d7e77eab5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.5.1-Qwen-3b", + "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2595 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2719 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/be2ee3f6-37ee-4895-821a-3d3c7eb04eac.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/be2ee3f6-37ee-4895-821a-3d3c7eb04eac.json new file mode 100644 index 000000000..945721456 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/be2ee3f6-37ee-4895-821a-3d3c7eb04eac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.6-Nemotron-70b", + "id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7527 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4669 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json deleted file mode 100644 index 4e74d60db..000000000 --- a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/1762652580.499233", - "retrieved_timestamp": "1762652580.499234", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", - "developer": "rombodawg", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7526551771521784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937699482580332 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40604026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46686458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329122340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Qwen-14b/e574af17-dd3b-4c09-8689-ea598d44e562.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Qwen-14b/e574af17-dd3b-4c09-8689-ea598d44e562.json new file mode 100644 index 000000000..a382429d5 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Qwen-14b/e574af17-dd3b-4c09-8689-ea598d44e562.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Qwen-14b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-LLM-V2.6-Qwen-14b", + "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8432 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6442 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4221 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4961 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/83958185-047a-4356-918d-2f45f273c08a.json b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/83958185-047a-4356-918d-2f45f273c08a.json new file mode 100644 index 000000000..3c606dff2 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/83958185-047a-4356-918d-2f45f273c08a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombos_Replete-Coder-Instruct-8b-Merged", + "id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5388 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json deleted file mode 100644 index 75c6acd19..000000000 --- a/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/1762652580.499815", - "retrieved_timestamp": "1762652580.499816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", - "developer": "rombodawg", - "inference_platform": "unknown", - "id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387571643239937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461693860075828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085106382978725 - } - } - ] -} diff --git a/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Llama3-8B/d04c6e84-0b63-4de1-9278-aa37c9d2c8e3.json b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Llama3-8B/d04c6e84-0b63-4de1-9278-aa37c9d2c8e3.json new file mode 100644 index 000000000..c4f4fe486 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Llama3-8B/d04c6e84-0b63-4de1-9278-aa37c9d2c8e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Llama3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombos_Replete-Coder-Llama3-8B", + "id": "rombodawg/rombos_Replete-Coder-Llama3-8B", + "developer": "rombodawg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1335 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json b/data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json deleted file mode 100644 index 4c7bf262f..000000000 --- a/data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo-70B/1762652580.500333", - "retrieved_timestamp": "1762652580.500333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rootxhacker/Apollo-70B", - "developer": "rootxhacker", - "inference_platform": "unknown", - "id": "rootxhacker/Apollo-70B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098560707810831 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6804215148524603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5611782477341389 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45721476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5279255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/rootxhacker/Apollo-70B/a218e260-7f56-4676-af58-254bd84d0327.json b/data/hfopenllm_v2/rootxhacker/Apollo-70B/a218e260-7f56-4676-af58-254bd84d0327.json new file mode 100644 index 000000000..c04a8ad40 --- /dev/null +++ b/data/hfopenllm_v2/rootxhacker/Apollo-70B/a218e260-7f56-4676-af58-254bd84d0327.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Apollo-70B", + "id": "rootxhacker/Apollo-70B", + "developer": "rootxhacker", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5099 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6804 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4948 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5279 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json b/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json deleted file mode 100644 index 4f0c116a8..000000000 --- a/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo_v2-32B/1762652580.500606", - "retrieved_timestamp": "1762652580.500606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rootxhacker/Apollo_v2-32B", - "developer": "rootxhacker", - "inference_platform": "unknown", - "id": "rootxhacker/Apollo_v2-32B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4280486885907171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7072274795963693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4993854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5869348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/f21fb2c8-4abe-40de-ab2c-9d23e95ee281.json b/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/f21fb2c8-4abe-40de-ab2c-9d23e95ee281.json new file mode 100644 index 000000000..c4804ceaf --- /dev/null +++ b/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/f21fb2c8-4abe-40de-ab2c-9d23e95ee281.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo_v2-32B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Apollo_v2-32B", + "id": "rootxhacker/Apollo_v2-32B", + "developer": "rootxhacker", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7072 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5869 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json b/data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json deleted file mode 100644 index b9548dce8..000000000 --- a/data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rootxhacker_apollo-7B/1762652580.500841", - "retrieved_timestamp": "1762652580.500842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rootxhacker/apollo-7B", - "developer": "rootxhacker", - "inference_platform": "unknown", - "id": "rootxhacker/apollo-7B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29533304964161755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3636262699883149 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17478390957446807 - } - } - ] -} diff --git a/data/hfopenllm_v2/rootxhacker/apollo-7B/da5774b2-8a6f-4f2d-8267-beb25490b06a.json b/data/hfopenllm_v2/rootxhacker/apollo-7B/da5774b2-8a6f-4f2d-8267-beb25490b06a.json new file mode 100644 index 000000000..eac1bfc10 --- /dev/null +++ b/data/hfopenllm_v2/rootxhacker/apollo-7B/da5774b2-8a6f-4f2d-8267-beb25490b06a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rootxhacker_apollo-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "apollo-7B", + "id": "rootxhacker/apollo-7B", + "developer": "rootxhacker", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3636 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4131 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1748 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json b/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json deleted file mode 100644 index 0f73c3302..000000000 --- a/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/1762652580.501065", - "retrieved_timestamp": "1762652580.501066", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", - "developer": "rsh345", - "inference_platform": "unknown", - "id": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3891807071902552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188437309746964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30535239361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/274705bd-8eb6-4863-8998-f5d67c4ac827.json b/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/274705bd-8eb6-4863-8998-f5d67c4ac827.json new file mode 100644 index 000000000..cf9929e17 --- /dev/null +++ b/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/274705bd-8eb6-4863-8998-f5d67c4ac827.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", + "id": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", + "developer": "rsh345", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3892 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5188 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0733 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/5b95cc2f-3378-45e7-9f56-6bb7e1ce4826.json b/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/5b95cc2f-3378-45e7-9f56-6bb7e1ce4826.json new file mode 100644 index 000000000..ca4143a6b --- /dev/null +++ b/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/5b95cc2f-3378-45e7-9f56-6bb7e1ce4826.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rubenroy_Geneva-12B-GCv2-5m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Geneva-12B-GCv2-5m", + "id": "rubenroy/Geneva-12B-GCv2-5m", + "developer": "rubenroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2586 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3525 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.325 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json b/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json deleted file mode 100644 index 5546e08ba..000000000 --- a/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rubenroy_Geneva-12B-GCv2-5m/1762652580.501345", - "retrieved_timestamp": "1762652580.501346", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rubenroy/Geneva-12B-GCv2-5m", - "developer": "rubenroy", - "inference_platform": "unknown", - "id": "rubenroy/Geneva-12B-GCv2-5m", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2586381911106974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278373390214104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249667553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/6918d1a3-e547-46b7-9062-274057c1f513.json b/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/6918d1a3-e547-46b7-9062-274057c1f513.json new file mode 100644 index 000000000..5a7ac28c6 --- /dev/null +++ b/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/6918d1a3-e547-46b7-9062-274057c1f513.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rubenroy_Gilgamesh-72B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gilgamesh-72B", + "id": "rubenroy/Gilgamesh-72B", + "developer": "rubenroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8486 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7253 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5802 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json b/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json deleted file mode 100644 index bed93c1c8..000000000 --- a/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rubenroy_Gilgamesh-72B/1762652580.5016088", - "retrieved_timestamp": "1762652580.5016088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rubenroy/Gilgamesh-72B", - "developer": "rubenroy", - "inference_platform": "unknown", - "id": "rubenroy/Gilgamesh-72B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8486006019583594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7253327589560739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5802027925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/599deb3c-49f9-4c0b-af8d-78f9e166820b.json b/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/599deb3c-49f9-4c0b-af8d-78f9e166820b.json new file mode 100644 index 000000000..fc4afde1a --- /dev/null +++ b/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/599deb3c-49f9-4c0b-af8d-78f9e166820b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rubenroy_Zurich-14B-GCv2-5m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Zurich-14B-GCv2-5m", + "id": "rubenroy/Zurich-14B-GCv2-5m", + "developer": "rubenroy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6164 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3616 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5233 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json b/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json deleted file mode 100644 index 6afe6ef09..000000000 --- a/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rubenroy_Zurich-14B-GCv2-5m/1762652580.5018299", - "retrieved_timestamp": "1762652580.5018299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rubenroy/Zurich-14B-GCv2-5m", - "developer": "rubenroy", - "inference_platform": "unknown", - "id": "rubenroy/Zurich-14B-GCv2-5m", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163679038285084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6308359017750411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4874479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5232712765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/b4ea3f14-3787-434b-8f26-20ff640c0146.json b/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/b4ea3f14-3787-434b-8f26-20ff640c0146.json new file mode 100644 index 000000000..739248056 --- /dev/null +++ b/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/b4ea3f14-3787-434b-8f26-20ff640c0146.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ruizhe1217_sft-s1-qwen-0.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sft-s1-qwen-0.5b", + "id": "ruizhe1217/sft-s1-qwen-0.5b", + "developer": "ruizhe1217", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3301 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1892 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json b/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json deleted file mode 100644 index 59f76d592..000000000 --- a/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ruizhe1217_sft-s1-qwen-0.5b/1762652580.502058", - "retrieved_timestamp": "1762652580.502059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ruizhe1217/sft-s1-qwen-0.5b", - "developer": "ruizhe1217", - "inference_platform": "unknown", - "id": "ruizhe1217/sft-s1-qwen-0.5b", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27487510915482033 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33005365550588683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27097315436241615 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31958333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1891622340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json b/data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json deleted file mode 100644 index 0d6068f12..000000000 --- a/data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/rwitz_go-bruins-v2/1762652580.5023239", - "retrieved_timestamp": "1762652580.502325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "rwitz/go-bruins-v2", - "developer": "rwitz", - "inference_platform": "unknown", - "id": "rwitz/go-bruins-v2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40958877999264176 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37988446841089685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2760970744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/rwitz/go-bruins-v2/6952c527-ca23-494a-910c-1c027e4a5a29.json b/data/hfopenllm_v2/rwitz/go-bruins-v2/6952c527-ca23-494a-910c-1c027e4a5a29.json new file mode 100644 index 000000000..9ab1d084b --- /dev/null +++ b/data/hfopenllm_v2/rwitz/go-bruins-v2/6952c527-ca23-494a-910c-1c027e4a5a29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/rwitz_go-bruins-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "go-bruins-v2", + "id": "rwitz/go-bruins-v2", + "developer": "rwitz", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2761 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-CPO/3f12e79c-dd1b-428d-9094-10a047205e3e.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-CPO/3f12e79c-dd1b-428d-9094-10a047205e3e.json new file mode 100644 index 000000000..63c7d223e --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-CPO/3f12e79c-dd1b-428d-9094-10a047205e3e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-CPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7B-CPO", + "id": "sabersaleh/Llama2-7B-CPO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1545 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3458 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1606 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json deleted file mode 100644 index 24dbb643d..000000000 --- a/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-DPO/1762652580.50325", - "retrieved_timestamp": "1762652580.503252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-DPO", - "developer": "sabersaleh", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-DPO", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14533105493424114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3512218731420535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16256648936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/d508da29-0288-4a0a-b727-fc5355515c5e.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/d508da29-0288-4a0a-b727-fc5355515c5e.json new file mode 100644 index 000000000..86e3966a1 --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/d508da29-0288-4a0a-b727-fc5355515c5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7B-DPO", + "id": "sabersaleh/Llama2-7B-DPO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3512 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-IPO/48cf5a8a-70c6-4c55-8959-32d773d6dbcf.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-IPO/48cf5a8a-70c6-4c55-8959-32d773d6dbcf.json new file mode 100644 index 000000000..2dd58f3e1 --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-IPO/48cf5a8a-70c6-4c55-8959-32d773d6dbcf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-IPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7B-IPO", + "id": "sabersaleh/Llama2-7B-IPO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1769 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1617 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-KTO/4bb7d331-f305-4c08-a073-87ba7b2cbde2.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-KTO/4bb7d331-f305-4c08-a073-87ba7b2cbde2.json new file mode 100644 index 000000000..1848858c0 --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-KTO/4bb7d331-f305-4c08-a073-87ba7b2cbde2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-KTO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7B-KTO", + "id": "sabersaleh/Llama2-7B-KTO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1528 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4167 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1636 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-SPO/94639454-c525-4e6f-af27-d92d45a9ac40.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-SPO/94639454-c525-4e6f-af27-d92d45a9ac40.json new file mode 100644 index 000000000..108215f39 --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-SPO/94639454-c525-4e6f-af27-d92d45a9ac40.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7B-SPO", + "id": "sabersaleh/Llama2-7B-SPO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1567 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3383 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1757 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-SimPO/9fa81bb7-7abc-4764-9465-d61217590da5.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-SimPO/9fa81bb7-7abc-4764-9465-d61217590da5.json new file mode 100644 index 000000000..7ee98a8ff --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-SimPO/9fa81bb7-7abc-4764-9465-d61217590da5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7B-SimPO", + "id": "sabersaleh/Llama2-7B-SimPO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1659 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3489 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1641 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama3/9a683492-4057-4de4-a30a-aa66becffb13.json b/data/hfopenllm_v2/sabersaleh/Llama3/9a683492-4057-4de4-a30a-aa66becffb13.json new file mode 100644 index 000000000..8c46cb2b7 --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama3/9a683492-4057-4de4-a30a-aa66becffb13.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3", + "id": "sabersaleh/Llama3", + "developer": "sabersaleh", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3321 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4782 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3933 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3162 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersalehk/Llama3-001-300/b917df45-62f2-4c3b-943a-ad6c98ef8bc1.json b/data/hfopenllm_v2/sabersalehk/Llama3-001-300/b917df45-62f2-4c3b-943a-ad6c98ef8bc1.json new file mode 100644 index 000000000..aa57d205c --- /dev/null +++ b/data/hfopenllm_v2/sabersalehk/Llama3-001-300/b917df45-62f2-4c3b-943a-ad6c98ef8bc1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-001-300/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-001-300", + "id": "sabersalehk/Llama3-001-300", + "developer": "sabersalehk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3179 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4745 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4064 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersalehk/Llama3-SimPO/ba658bc7-b89d-4fb7-a794-f48bd3715a49.json b/data/hfopenllm_v2/sabersalehk/Llama3-SimPO/ba658bc7-b89d-4fb7-a794-f48bd3715a49.json new file mode 100644 index 000000000..ec3a89b1c --- /dev/null +++ b/data/hfopenllm_v2/sabersalehk/Llama3-SimPO/ba658bc7-b89d-4fb7-a794-f48bd3715a49.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-SimPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-SimPO", + "id": "sabersalehk/Llama3-SimPO", + "developer": "sabersalehk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3642 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4874 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4046 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersalehk/Llama3_001_200/93f79cdc-ffd7-4299-9876-c0c7bed55ae5.json b/data/hfopenllm_v2/sabersalehk/Llama3_001_200/93f79cdc-ffd7-4299-9876-c0c7bed55ae5.json new file mode 100644 index 000000000..71b6274f4 --- /dev/null +++ b/data/hfopenllm_v2/sabersalehk/Llama3_001_200/93f79cdc-ffd7-4299-9876-c0c7bed55ae5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_001_200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3_001_200", + "id": "sabersalehk/Llama3_001_200", + "developer": "sabersalehk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3218 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4037 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3183 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersalehk/Llama3_01_300/5a91b0bf-b043-41d2-960d-5f0e78abc400.json b/data/hfopenllm_v2/sabersalehk/Llama3_01_300/5a91b0bf-b043-41d2-960d-5f0e78abc400.json new file mode 100644 index 000000000..5ecbad865 --- /dev/null +++ b/data/hfopenllm_v2/sabersalehk/Llama3_01_300/5a91b0bf-b043-41d2-960d-5f0e78abc400.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_01_300/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3_01_300", + "id": "sabersalehk/Llama3_01_300", + "developer": "sabersalehk", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2959 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4691 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4065 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/263f56e5-b578-475a-9bc4-b5ffc142f9e2.json b/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/263f56e5-b578-475a-9bc4-b5ffc142f9e2.json new file mode 100644 index 000000000..603e3b142 --- /dev/null +++ b/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/263f56e5-b578-475a-9bc4-b5ffc142f9e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saishf_Fimbulvetr-Kuro-Lotus-10.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fimbulvetr-Kuro-Lotus-10.7B", + "id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", + "developer": "saishf", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4939 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4342 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json b/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json deleted file mode 100644 index c04a0e359..000000000 --- a/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saishf_Fimbulvetr-Kuro-Lotus-10.7B/1762652580.5057359", - "retrieved_timestamp": "1762652580.5057359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", - "developer": "saishf", - "inference_platform": "unknown", - "id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49394384677101205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4342316286386943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33892952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/9219ff66-73ba-45d8-99a0-23d23b3555ba.json b/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/9219ff66-73ba-45d8-99a0-23d23b3555ba.json new file mode 100644 index 000000000..11c0ba6c8 --- /dev/null +++ b/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/9219ff66-73ba-45d8-99a0-23d23b3555ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saishf_Neural-SOVLish-Devil-8B-L3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Neural-SOVLish-Devil-8B-L3", + "id": "saishf/Neural-SOVLish-Devil-8B-L3", + "developer": "saishf", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.411 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3807 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json b/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json deleted file mode 100644 index 5b86a63e6..000000000 --- a/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saishf_Neural-SOVLish-Devil-8B-L3/1762652580.506007", - "retrieved_timestamp": "1762652580.506007", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saishf/Neural-SOVLish-Devil-8B-L3", - "developer": "saishf", - "inference_platform": "unknown", - "id": "saishf/Neural-SOVLish-Devil-8B-L3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988036188424493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141802159065874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3807347074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json deleted file mode 100644 index e5b5d1074..000000000 --- a/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Base_Reasoning/1762652580.5062242", - "retrieved_timestamp": "1762652580.5062249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saishshinde15/TethysAI_Base_Reasoning", - "developer": "saishshinde15", - "inference_platform": "unknown", - "id": "saishshinde15/TethysAI_Base_Reasoning", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6368757119997164 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518558867290183 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3236369680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/b2328396-e9b2-464d-94e4-f03db19144ea.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/b2328396-e9b2-464d-94e4-f03db19144ea.json new file mode 100644 index 000000000..75ae575a6 --- /dev/null +++ b/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/b2328396-e9b2-464d-94e4-f03db19144ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Base_Reasoning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TethysAI_Base_Reasoning", + "id": "saishshinde15/TethysAI_Base_Reasoning", + "developer": "saishshinde15", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6369 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3142 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3236 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/3f895edf-8f54-48ff-a731-666144af0fda.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/3f895edf-8f54-48ff-a731-666144af0fda.json new file mode 100644 index 000000000..ce5a5eb6e --- /dev/null +++ b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/3f895edf-8f54-48ff-a731-666144af0fda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TethysAI_Vortex", + "id": "saishshinde15/TethysAI_Vortex", + "developer": "saishshinde15", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4298 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4749 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.315 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4458 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3241 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json deleted file mode 100644 index 22534d13a..000000000 --- a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex/1762652580.5066721", - "retrieved_timestamp": "1762652580.5066729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saishshinde15/TethysAI_Vortex", - "developer": "saishshinde15", - "inference_platform": "unknown", - "id": "saishshinde15/TethysAI_Vortex", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4297718941297978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4749261293502527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240525265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json deleted file mode 100644 index f9683860e..000000000 --- a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex_Reasoning/1762652580.506901", - "retrieved_timestamp": "1762652580.506902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saishshinde15/TethysAI_Vortex_Reasoning", - "developer": "saishshinde15", - "inference_platform": "unknown", - "id": "saishshinde15/TethysAI_Vortex_Reasoning", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40211970903868405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4693805860486275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40844791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3380984042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/b48b8e16-a555-466b-8b1c-246137223311.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/b48b8e16-a555-466b-8b1c-246137223311.json new file mode 100644 index 000000000..5425463aa --- /dev/null +++ b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/b48b8e16-a555-466b-8b1c-246137223311.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex_Reasoning/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TethysAI_Vortex_Reasoning", + "id": "saishshinde15/TethysAI_Vortex_Reasoning", + "developer": "saishshinde15", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4694 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json b/data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json deleted file mode 100644 index ae9f37aec..000000000 --- a/data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sakaltcommunity_novablast-preview/1762652580.507118", - "retrieved_timestamp": "1762652580.5071192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sakaltcommunity/novablast-preview", - "developer": "sakaltcommunity", - "inference_platform": "unknown", - "id": "sakaltcommunity/novablast-preview", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4530279657974175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7042765234852668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5915059840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/sakaltcommunity/novablast-preview/5fdcb98f-4c50-4cdb-bd99-dd32efc6d6f3.json b/data/hfopenllm_v2/sakaltcommunity/novablast-preview/5fdcb98f-4c50-4cdb-bd99-dd32efc6d6f3.json new file mode 100644 index 000000000..899a6a5a1 --- /dev/null +++ b/data/hfopenllm_v2/sakaltcommunity/novablast-preview/5fdcb98f-4c50-4cdb-bd99-dd32efc6d6f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sakaltcommunity_novablast-preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "novablast-preview", + "id": "sakaltcommunity/novablast-preview", + "developer": "sakaltcommunity", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7043 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4894 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5915 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json b/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json deleted file mode 100644 index addb9cf44..000000000 --- a/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sakaltcommunity_sakaltum-7b/1762652580.5073972", - "retrieved_timestamp": "1762652580.507398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sakaltcommunity/sakaltum-7b", - "developer": "sakaltcommunity", - "inference_platform": "unknown", - "id": "sakaltcommunity/sakaltum-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2603868845773658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4575213514148995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2769281914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/d49c5e72-0dd0-4663-a310-9cd9bf1f5150.json b/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/d49c5e72-0dd0-4663-a310-9cd9bf1f5150.json new file mode 100644 index 000000000..3cafb3d05 --- /dev/null +++ b/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/d49c5e72-0dd0-4663-a310-9cd9bf1f5150.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sakaltcommunity_sakaltum-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sakaltum-7b", + "id": "sakaltcommunity/sakaltum-7b", + "developer": "sakaltcommunity", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4575 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2769 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sakhan10/quantized_open_llama_3b_v2/0176903f-e6ca-4f21-b98a-00bc443bf244.json b/data/hfopenllm_v2/sakhan10/quantized_open_llama_3b_v2/0176903f-e6ca-4f21-b98a-00bc443bf244.json new file mode 100644 index 000000000..19cf5433b --- /dev/null +++ b/data/hfopenllm_v2/sakhan10/quantized_open_llama_3b_v2/0176903f-e6ca-4f21-b98a-00bc443bf244.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sakhan10_quantized_open_llama_3b_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "quantized_open_llama_3b_v2", + "id": "sakhan10/quantized_open_llama_3b_v2", + "developer": "sakhan10", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1872 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3682 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/11f32afc-95c1-4531-ae45-5a0974d36b3a.json b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/11f32afc-95c1-4531-ae45-5a0974d36b3a.json new file mode 100644 index 000000000..30efb3c9e --- /dev/null +++ b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/11f32afc-95c1-4531-ae45-5a0974d36b3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "luxia-21.4b-alignment-v1.0", + "id": "saltlux/luxia-21.4b-alignment-v1.0", + "developer": "saltlux", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6373 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4328 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3403 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json deleted file mode 100644 index 994505910..000000000 --- a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.0/1762652580.507964", - "retrieved_timestamp": "1762652580.5079648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saltlux/luxia-21.4b-alignment-v1.0", - "developer": "saltlux", - "inference_platform": "unknown", - "id": "saltlux/luxia-21.4b-alignment-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36929679915956326 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6373342606775594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43284374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34034242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/70657dd7-63cf-40f4-92a0-1097fc1ce9ae.json b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/70657dd7-63cf-40f4-92a0-1097fc1ce9ae.json new file mode 100644 index 000000000..1f660f4e7 --- /dev/null +++ b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/70657dd7-63cf-40f4-92a0-1097fc1ce9ae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "luxia-21.4b-alignment-v1.2", + "id": "saltlux/luxia-21.4b-alignment-v1.2", + "developer": "saltlux", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6371 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json deleted file mode 100644 index 862b853f6..000000000 --- a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.2/1762652580.508301", - "retrieved_timestamp": "1762652580.5083032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "saltlux/luxia-21.4b-alignment-v1.2", - "developer": "saltlux", - "inference_platform": "unknown", - "id": "saltlux/luxia-21.4b-alignment-v1.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41153694419695297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6371180708112368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4458958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34732380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/53cf325b-6f32-4791-8f95-8b982ea03b23.json b/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/53cf325b-6f32-4791-8f95-8b982ea03b23.json new file mode 100644 index 000000000..3a8e6c998 --- /dev/null +++ b/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/53cf325b-6f32-4791-8f95-8b982ea03b23.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sam-paech_Darkest-muse-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Darkest-muse-v1", + "id": "sam-paech/Darkest-muse-v1", + "developer": "sam-paech", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7344 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json b/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json deleted file mode 100644 index b99904bfa..000000000 --- a/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sam-paech_Darkest-muse-v1/1762652580.508588", - "retrieved_timestamp": "1762652580.508589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sam-paech/Darkest-muse-v1", - "developer": "sam-paech", - "inference_platform": "unknown", - "id": "sam-paech/Darkest-muse-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344202272193336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968439530708949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json b/data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json deleted file mode 100644 index 63619707f..000000000 --- a/data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sam-paech_Delirium-v1/1762652580.508875", - "retrieved_timestamp": "1762652580.508876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sam-paech/Delirium-v1", - "developer": "sam-paech", - "inference_platform": "unknown", - "id": "sam-paech/Delirium-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207564816908026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5962113834521733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45144791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4189660904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/sam-paech/Delirium-v1/8c50491b-6ed4-4f38-9d3f-d5168600cf4f.json b/data/hfopenllm_v2/sam-paech/Delirium-v1/8c50491b-6ed4-4f38-9d3f-d5168600cf4f.json new file mode 100644 index 000000000..2fdabfe78 --- /dev/null +++ b/data/hfopenllm_v2/sam-paech/Delirium-v1/8c50491b-6ed4-4f38-9d3f-d5168600cf4f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sam-paech_Delirium-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delirium-v1", + "id": "sam-paech/Delirium-v1", + "developer": "sam-paech", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5962 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.419 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json b/data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json deleted file mode 100644 index 0fc96e4c8..000000000 --- a/data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sam-paech_Quill-v1/1762652580.5091672", - "retrieved_timestamp": "1762652580.5091681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sam-paech/Quill-v1", - "developer": "sam-paech", - "inference_platform": "unknown", - "id": "sam-paech/Quill-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712213593265868 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5969226347989487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4171376329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/sam-paech/Quill-v1/7adf79de-a51d-4b87-989a-c218ec6d99e3.json b/data/hfopenllm_v2/sam-paech/Quill-v1/7adf79de-a51d-4b87-989a-c218ec6d99e3.json new file mode 100644 index 000000000..cd6582f9b --- /dev/null +++ b/data/hfopenllm_v2/sam-paech/Quill-v1/7adf79de-a51d-4b87-989a-c218ec6d99e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sam-paech_Quill-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quill-v1", + "id": "sam-paech/Quill-v1", + "developer": "sam-paech", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7122 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5969 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2122 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3398 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/92358e5a-5e73-4747-9e92-e5ac003b97f7.json b/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/92358e5a-5e73-4747-9e92-e5ac003b97f7.json new file mode 100644 index 000000000..10e232e86 --- /dev/null +++ b/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/92358e5a-5e73-4747-9e92-e5ac003b97f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sarvamai_OpenHathi-7B-Hi-v0.1-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenHathi-7B-Hi-v0.1-Base", + "id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", + "developer": "sarvamai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.87 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1804 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1543 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json b/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json deleted file mode 100644 index bdbbd54f8..000000000 --- a/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sarvamai_OpenHathi-7B-Hi-v0.1-Base/1762652580.509491", - "retrieved_timestamp": "1762652580.5094929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", - "developer": "sarvamai", - "inference_platform": "unknown", - "id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.87 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18040244329490196 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33540458231510667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15433843085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/schnapss/testmerge-7b/f1636512-b98f-4fe4-adf3-abd556dd0ab9.json b/data/hfopenllm_v2/schnapss/testmerge-7b/f1636512-b98f-4fe4-adf3-abd556dd0ab9.json new file mode 100644 index 000000000..5594f41d4 --- /dev/null +++ b/data/hfopenllm_v2/schnapss/testmerge-7b/f1636512-b98f-4fe4-adf3-abd556dd0ab9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/schnapss_testmerge-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "testmerge-7b", + "id": "schnapss/testmerge-7b", + "developer": "schnapss", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5187 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0687 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json b/data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json deleted file mode 100644 index 81850f341..000000000 --- a/data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/schnapss_testmerge-7b/1762652580.509877", - "retrieved_timestamp": "1762652580.509878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "schnapss/testmerge-7b", - "developer": "schnapss", - "inference_platform": "unknown", - "id": "schnapss/testmerge-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39222817679313116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5187478405637375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json b/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json deleted file mode 100644 index d620c9147..000000000 --- a/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/1762652580.510147", - "retrieved_timestamp": "1762652580.510148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", - "developer": "sci-m-wang", - "inference_platform": "unknown", - "id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335186194851882 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32727821561411724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2362034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/9333afdd-4866-412b-b11b-dfb118a06db9.json b/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/9333afdd-4866-412b-b11b-dfb118a06db9.json new file mode 100644 index 000000000..792d4db80 --- /dev/null +++ b/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/9333afdd-4866-412b-b11b-dfb118a06db9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-Instruct-sa-v0.1", + "id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", + "developer": "sci-m-wang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3273 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.39 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2362 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json b/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json deleted file mode 100644 index b6d280f58..000000000 --- a/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/1762652580.510415", - "retrieved_timestamp": "1762652580.510418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", - "developer": "sci-m-wang", - "inference_platform": "unknown", - "id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.642 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020623057930734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5502038722383045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39852061170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/840c0e19-6d75-47a2-b64b-f9c51cb1dcff.json b/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/840c0e19-6d75-47a2-b64b-f9c51cb1dcff.json new file mode 100644 index 000000000..4019e2d95 --- /dev/null +++ b/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/840c0e19-6d75-47a2-b64b-f9c51cb1dcff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-4k-instruct-sa-v0.1", + "id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", + "developer": "sci-m-wang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.642 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5502 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.148 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3985 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/071b49f2-8e23-47b1-9858-78d676d9905e.json b/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/071b49f2-8e23-47b1-9858-78d676d9905e.json new file mode 100644 index 000000000..ea4d21a1e --- /dev/null +++ b/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/071b49f2-8e23-47b1-9858-78d676d9905e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-llm-7b-chat-sa-v0.1", + "id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", + "developer": "sci-m-wang", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4036 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3718 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4173 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2209 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json b/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json deleted file mode 100644 index 163fd03c9..000000000 --- a/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/1762652580.5106509", - "retrieved_timestamp": "1762652580.5106518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", - "developer": "sci-m-wang", - "inference_platform": "unknown", - "id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035935761557113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37177200995276305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22091090425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/securin/Securin-LLM-V2.5-Qwen-1.5B/d3821f53-87aa-470a-a403-c8e3cd100ae1.json b/data/hfopenllm_v2/securin/Securin-LLM-V2.5-Qwen-1.5B/d3821f53-87aa-470a-a403-c8e3cd100ae1.json new file mode 100644 index 000000000..4a45da063 --- /dev/null +++ b/data/hfopenllm_v2/securin/Securin-LLM-V2.5-Qwen-1.5B/d3821f53-87aa-470a-a403-c8e3cd100ae1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/securin_Securin-LLM-V2.5-Qwen-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Securin-LLM-V2.5-Qwen-1.5B", + "id": "securin/Securin-LLM-V2.5-Qwen-1.5B", + "developer": "securin", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1492 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3606 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/senseable/WestLake-7B-v2/389dbaba-c9cd-4e6b-afb3-f2ee3951faa0.json b/data/hfopenllm_v2/senseable/WestLake-7B-v2/389dbaba-c9cd-4e6b-afb3-f2ee3951faa0.json new file mode 100644 index 000000000..60607b3dd --- /dev/null +++ b/data/hfopenllm_v2/senseable/WestLake-7B-v2/389dbaba-c9cd-4e6b-afb3-f2ee3951faa0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/senseable_WestLake-7B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WestLake-7B-v2", + "id": "senseable/WestLake-7B-v2", + "developer": "senseable", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4419 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3937 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json b/data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json deleted file mode 100644 index a83727d77..000000000 --- a/data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/senseable_WestLake-7B-v2/1762652580.511263", - "retrieved_timestamp": "1762652580.511264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "senseable/WestLake-7B-v2", - "developer": "senseable", - "inference_platform": "unknown", - "id": "senseable/WestLake-7B-v2", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418620371724801 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073276290688943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39371874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27642952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/sequelbox/Llama3.1-70B-PlumChat/5f78f39a-42cc-4cf6-bb27-e2160765bf24.json b/data/hfopenllm_v2/sequelbox/Llama3.1-70B-PlumChat/5f78f39a-42cc-4cf6-bb27-e2160765bf24.json new file mode 100644 index 000000000..e8f20a830 --- /dev/null +++ b/data/hfopenllm_v2/sequelbox/Llama3.1-70B-PlumChat/5f78f39a-42cc-4cf6-bb27-e2160765bf24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-70B-PlumChat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-70B-PlumChat", + "id": "sequelbox/Llama3.1-70B-PlumChat", + "developer": "sequelbox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6753 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4774 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sequelbox/Llama3.1-8B-MOTH/b6e3d811-bf9d-474e-b82d-358a44e0dfc9.json b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-MOTH/b6e3d811-bf9d-474e-b82d-358a44e0dfc9.json new file mode 100644 index 000000000..fa67f2014 --- /dev/null +++ b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-MOTH/b6e3d811-bf9d-474e-b82d-358a44e0dfc9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-MOTH/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-MOTH", + "id": "sequelbox/Llama3.1-8B-MOTH", + "developer": "sequelbox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3689 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumChat/bef1cbad-4f75-4dde-b467-6145f72a87f4.json b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumChat/bef1cbad-4f75-4dde-b467-6145f72a87f4.json new file mode 100644 index 000000000..7e037a4de --- /dev/null +++ b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumChat/bef1cbad-4f75-4dde-b467-6145f72a87f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumChat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-PlumChat", + "id": "sequelbox/Llama3.1-8B-PlumChat", + "developer": "sequelbox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3755 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumCode/654bebe0-b461-427e-a4cf-06386e9272d8.json b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumCode/654bebe0-b461-427e-a4cf-06386e9272d8.json new file mode 100644 index 000000000..cef3dea6d --- /dev/null +++ b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumCode/654bebe0-b461-427e-a4cf-06386e9272d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumCode/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-PlumCode", + "id": "sequelbox/Llama3.1-8B-PlumCode", + "developer": "sequelbox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0272 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3773 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2335 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumMath/37ef4e34-58f8-463a-950f-48b3a6833d54.json b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumMath/37ef4e34-58f8-463a-950f-48b3a6833d54.json new file mode 100644 index 000000000..553cddc4a --- /dev/null +++ b/data/hfopenllm_v2/sequelbox/Llama3.1-8B-PlumMath/37ef4e34-58f8-463a-950f-48b3a6833d54.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumMath/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1-8B-PlumMath", + "id": "sequelbox/Llama3.1-8B-PlumMath", + "developer": "sequelbox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2242 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.318 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3919 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2975 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sequelbox/gemma-2-9B-MOTH/20687086-8aab-40f1-aec6-03917f4f9bf5.json b/data/hfopenllm_v2/sequelbox/gemma-2-9B-MOTH/20687086-8aab-40f1-aec6-03917f4f9bf5.json new file mode 100644 index 000000000..1c49b7861 --- /dev/null +++ b/data/hfopenllm_v2/sequelbox/gemma-2-9B-MOTH/20687086-8aab-40f1-aec6-03917f4f9bf5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sequelbox_gemma-2-9B-MOTH/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9B-MOTH", + "id": "sequelbox/gemma-2-9B-MOTH", + "developer": "sequelbox", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2059 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.308 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3409 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json deleted file mode 100644 index 471194651..000000000 --- a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/1762652580.512954", - "retrieved_timestamp": "1762652580.512954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6967014189018471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510381184158217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39657291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35289228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/53a0a998-a0a6-4800-80bf-bfd83123f2f6.json b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/53a0a998-a0a6-4800-80bf-bfd83123f2f6.json new file mode 100644 index 000000000..4beac0c7e --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/53a0a998-a0a6-4800-80bf-bfd83123f2f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Experimental-1206-Instruct", + "id": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6967 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5104 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3966 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/4ee8df1c-e8ff-4a56-816c-0c2258a226e7.json b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/4ee8df1c-e8ff-4a56-816c-0c2258a226e7.json new file mode 100644 index 000000000..a0e1dafd6 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/4ee8df1c-e8ff-4a56-816c-0c2258a226e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Experimental-1208-Instruct", + "id": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4964 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json deleted file mode 100644 index b70879dff..000000000 --- a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/1762652580.513202", - "retrieved_timestamp": "1762652580.513203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099981382731153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49642264289263355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3789895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35106382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/42c8d84d-c8b8-42c6-8f49-4e971df173d7.json b/data/hfopenllm_v2/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/42c8d84d-c8b8-42c6-8f49-4e971df173d7.json new file mode 100644 index 000000000..27df7dbc2 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/42c8d84d-c8b8-42c6-8f49-4e971df173d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LlamaZero-3.1-8B-Experimental-1208", + "id": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", + "developer": "sethuiyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4981 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.108 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/77b57dea-22e1-48a6-b8ae-9e474f08ad5f.json b/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/77b57dea-22e1-48a6-b8ae-9e474f08ad5f.json new file mode 100644 index 000000000..38d270dc1 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/77b57dea-22e1-48a6-b8ae-9e474f08ad5f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llamaverse-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llamaverse-3.1-8B-Instruct", + "id": "sethuiyer/Llamaverse-3.1-8B-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6185 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json b/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json deleted file mode 100644 index 79da9926a..000000000 --- a/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llamaverse-3.1-8B-Instruct/1762652580.513652", - "retrieved_timestamp": "1762652580.513653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sethuiyer/Llamaverse-3.1-8B-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llamaverse-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6185410266980501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414159562743479 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523105053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json b/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json deleted file mode 100644 index 5dc427403..000000000 --- a/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llamazing-3.1-8B-Instruct/1762652580.513854", - "retrieved_timestamp": "1762652580.513855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sethuiyer/Llamazing-3.1-8B-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llamazing-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5711301568726534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.529106967510303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39759374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606216755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/a9ed5d04-57d2-4566-91df-b798be939fdb.json b/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/a9ed5d04-57d2-4566-91df-b798be939fdb.json new file mode 100644 index 000000000..f6ecfe05e --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/a9ed5d04-57d2-4566-91df-b798be939fdb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llamazing-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llamazing-3.1-8B-Instruct", + "id": "sethuiyer/Llamazing-3.1-8B-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5711 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5291 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3976 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3606 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Qwen2.5-7B-Anvita/bad4ec47-fe84-4518-b072-6955938f0c86.json b/data/hfopenllm_v2/sethuiyer/Qwen2.5-7B-Anvita/bad4ec47-fe84-4518-b072-6955938f0c86.json new file mode 100644 index 000000000..de9d6f292 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Qwen2.5-7B-Anvita/bad4ec47-fe84-4518-b072-6955938f0c86.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sethuiyer_Qwen2.5-7B-Anvita/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Anvita", + "id": "sethuiyer/Qwen2.5-7B-Anvita", + "developer": "sethuiyer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json b/data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json deleted file mode 100644 index 4309f9dfe..000000000 --- a/data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shadowml_BeagSake-7B/1762652580.514317", - "retrieved_timestamp": "1762652580.514318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shadowml/BeagSake-7B", - "developer": "shadowml", - "inference_platform": "unknown", - "id": "shadowml/BeagSake-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215960318621258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47110342371098474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25847739361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/shadowml/BeagSake-7B/497e585c-059a-4e18-9a8f-bdaa066f59ea.json b/data/hfopenllm_v2/shadowml/BeagSake-7B/497e585c-059a-4e18-9a8f-bdaa066f59ea.json new file mode 100644 index 000000000..3c7a54074 --- /dev/null +++ b/data/hfopenllm_v2/shadowml/BeagSake-7B/497e585c-059a-4e18-9a8f-bdaa066f59ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shadowml_BeagSake-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BeagSake-7B", + "id": "shadowml/BeagSake-7B", + "developer": "shadowml", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5216 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4711 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2585 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json b/data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json deleted file mode 100644 index 8eb1b7c37..000000000 --- a/data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shadowml_Mixolar-4x7b/1762652580.5145578", - "retrieved_timestamp": "1762652580.514559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shadowml/Mixolar-4x7b", - "developer": "shadowml", - "inference_platform": "unknown", - "id": "shadowml/Mixolar-4x7b", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 36.099 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3893303102434873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215949876221495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053523936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/shadowml/Mixolar-4x7b/e24b2a4e-83e4-4a79-bc41-03a54af00595.json b/data/hfopenllm_v2/shadowml/Mixolar-4x7b/e24b2a4e-83e4-4a79-bc41-03a54af00595.json new file mode 100644 index 000000000..193aa7115 --- /dev/null +++ b/data/hfopenllm_v2/shadowml/Mixolar-4x7b/e24b2a4e-83e4-4a79-bc41-03a54af00595.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shadowml_Mixolar-4x7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mixolar-4x7b", + "id": "shadowml/Mixolar-4x7b", + "developer": "shadowml", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 36.099 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/15e39361-585b-4870-b91a-64dce4fb37ec.json b/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/15e39361-585b-4870-b91a-64dce4fb37ec.json new file mode 100644 index 000000000..015b23d62 --- /dev/null +++ b/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/15e39361-585b-4870-b91a-64dce4fb37ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Shastra-LLAMA2-Math-Commonsense-SFT", + "id": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", + "developer": "shastraai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3042 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1997 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json b/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json deleted file mode 100644 index b8d51ea3b..000000000 --- a/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/1762652580.5147672", - "retrieved_timestamp": "1762652580.5147672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", - "developer": "shastraai", - "inference_platform": "unknown", - "id": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3041507644161935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.384316753625765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19971742021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json b/data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json deleted file mode 100644 index 83505e538..000000000 --- a/data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shivam9980_NEPALI-LLM/1762652580.51522", - "retrieved_timestamp": "1762652580.5152209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shivam9980/NEPALI-LLM", - "developer": "shivam9980", - "inference_platform": "unknown", - "id": "shivam9980/NEPALI-LLM", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.273 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.041666112581284324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828457133787513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41219791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2064494680851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/shivam9980/NEPALI-LLM/96efd11b-e9f2-4bf1-90f9-561714137edf.json b/data/hfopenllm_v2/shivam9980/NEPALI-LLM/96efd11b-e9f2-4bf1-90f9-561714137edf.json new file mode 100644 index 000000000..60b5094a5 --- /dev/null +++ b/data/hfopenllm_v2/shivam9980/NEPALI-LLM/96efd11b-e9f2-4bf1-90f9-561714137edf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shivam9980_NEPALI-LLM/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NEPALI-LLM", + "id": "shivam9980/NEPALI-LLM", + "developer": "shivam9980", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.273 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0417 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4122 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2064 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shivam9980/mistral-7b-news-cnn-merged/98e9936d-d376-4c72-80a6-0a28cf722ac4.json b/data/hfopenllm_v2/shivam9980/mistral-7b-news-cnn-merged/98e9936d-d376-4c72-80a6-0a28cf722ac4.json new file mode 100644 index 000000000..aa2c40613 --- /dev/null +++ b/data/hfopenllm_v2/shivam9980/mistral-7b-news-cnn-merged/98e9936d-d376-4c72-80a6-0a28cf722ac4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shivam9980_mistral-7b-news-cnn-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-7b-news-cnn-merged", + "id": "shivam9980/mistral-7b-news-cnn-merged", + "developer": "shivam9980", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4634 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0189 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4523 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shivank21/mistral_dpo_self/7ada9c83-7851-4da2-b9d1-d744b174b777.json b/data/hfopenllm_v2/shivank21/mistral_dpo_self/7ada9c83-7851-4da2-b9d1-d744b174b777.json new file mode 100644 index 000000000..ab32e42b6 --- /dev/null +++ b/data/hfopenllm_v2/shivank21/mistral_dpo_self/7ada9c83-7851-4da2-b9d1-d744b174b777.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shivank21_mistral_dpo_self/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral_dpo_self", + "id": "shivank21/mistral_dpo_self", + "developer": "shivank21", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 7.913 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3403 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3216 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2214 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shuttleai/shuttle-3/a6ed72b7-14f1-464c-a7f5-590791982696.json b/data/hfopenllm_v2/shuttleai/shuttle-3/a6ed72b7-14f1-464c-a7f5-590791982696.json new file mode 100644 index 000000000..b5153a72b --- /dev/null +++ b/data/hfopenllm_v2/shuttleai/shuttle-3/a6ed72b7-14f1-464c-a7f5-590791982696.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shuttleai_shuttle-3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shuttle-3", + "id": "shuttleai/shuttle-3", + "developer": "shuttleai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8154 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.742 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.46 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4119 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4377 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5716 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json b/data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json deleted file mode 100644 index 0c95dc884..000000000 --- a/data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shuttleai_shuttle-3/1762652580.5160902", - "retrieved_timestamp": "1762652580.5160909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shuttleai/shuttle-3", - "developer": "shuttleai", - "inference_platform": "unknown", - "id": "shuttleai/shuttle-3", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.815403130360776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7420334281529087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45996978851963743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41191275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4376875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5716422872340425 - } - } - ] -} diff --git a/data/hfopenllm_v2/shyamieee/Padma-v7.0/79e3f38d-ae2b-44a7-be0d-024adad6bcd6.json b/data/hfopenllm_v2/shyamieee/Padma-v7.0/79e3f38d-ae2b-44a7-be0d-024adad6bcd6.json new file mode 100644 index 000000000..17b7fc494 --- /dev/null +++ b/data/hfopenllm_v2/shyamieee/Padma-v7.0/79e3f38d-ae2b-44a7-be0d-024adad6bcd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/shyamieee_Padma-v7.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Padma-v7.0", + "id": "shyamieee/Padma-v7.0", + "developer": "shyamieee", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3841 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5119 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json b/data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json deleted file mode 100644 index 8dee10570..000000000 --- a/data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/shyamieee_Padma-v7.0/1762652580.51635", - "retrieved_timestamp": "1762652580.51635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "shyamieee/Padma-v7.0", - "developer": "shyamieee", - "inference_platform": "unknown", - "id": "shyamieee/Padma-v7.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841097177710696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118785631761485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43855208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029421542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json b/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json deleted file mode 100644 index 91f38e983..000000000 --- a/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-9B-Instruct-v1.0/1762652580.516612", - "retrieved_timestamp": "1762652580.516613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "silma-ai/SILMA-9B-Instruct-v1.0", - "developer": "silma-ai", - "inference_platform": "unknown", - "id": "silma-ai/SILMA-9B-Instruct-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5841943820174914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219015032853501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46369791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39195478723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/ef13bdea-cf73-4ead-b6d7-73a155fa9a79.json b/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/ef13bdea-cf73-4ead-b6d7-73a155fa9a79.json new file mode 100644 index 000000000..d7f107192 --- /dev/null +++ b/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/ef13bdea-cf73-4ead-b6d7-73a155fa9a79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-9B-Instruct-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SILMA-9B-Instruct-v1.0", + "id": "silma-ai/SILMA-9B-Instruct-v1.0", + "developer": "silma-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5842 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5219 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1163 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/2663884f-941c-4e16-8029-b38e3a543733.json b/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/2663884f-941c-4e16-8029-b38e3a543733.json new file mode 100644 index 000000000..31c0700a3 --- /dev/null +++ b/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/2663884f-941c-4e16-8029-b38e3a543733.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SILMA-Kashif-2B-Instruct-v1.0", + "id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", + "developer": "silma-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1181 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json b/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json deleted file mode 100644 index 5020c2453..000000000 --- a/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/1762652580.516862", - "retrieved_timestamp": "1762652580.5168629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", - "developer": "silma-ai", - "inference_platform": "unknown", - "id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11807781131841291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37932201246317715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4042604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22581449468085107 - } - } - ] -} diff --git a/data/hfopenllm_v2/siqi00/Mistral-7B-DFT/ca7af645-4796-4b31-ae7d-2cbebe5a369b.json b/data/hfopenllm_v2/siqi00/Mistral-7B-DFT/ca7af645-4796-4b31-ae7d-2cbebe5a369b.json new file mode 100644 index 000000000..6c0b09631 --- /dev/null +++ b/data/hfopenllm_v2/siqi00/Mistral-7B-DFT/ca7af645-4796-4b31-ae7d-2cbebe5a369b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-DFT", + "id": "siqi00/Mistral-7B-DFT", + "developer": "siqi00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4665 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2963 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/siqi00/Mistral-7B-DFT2/f95e098c-d320-4db1-887d-8c3252bbaf77.json b/data/hfopenllm_v2/siqi00/Mistral-7B-DFT2/f95e098c-d320-4db1-887d-8c3252bbaf77.json new file mode 100644 index 000000000..e31ef75fe --- /dev/null +++ b/data/hfopenllm_v2/siqi00/Mistral-7B-DFT2/f95e098c-d320-4db1-887d-8c3252bbaf77.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-DFT2", + "id": "siqi00/Mistral-7B-DFT2", + "developer": "siqi00", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5804 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/skumar9/Llama-medx_v2/2bbf6dc9-8dd5-4dee-908e-d4a8fc03bc84.json b/data/hfopenllm_v2/skumar9/Llama-medx_v2/2bbf6dc9-8dd5-4dee-908e-d4a8fc03bc84.json new file mode 100644 index 000000000..675a3eeee --- /dev/null +++ b/data/hfopenllm_v2/skumar9/Llama-medx_v2/2bbf6dc9-8dd5-4dee-908e-d4a8fc03bc84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/skumar9_Llama-medx_v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-medx_v2", + "id": "skumar9/Llama-medx_v2", + "developer": "skumar9", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4909 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3661 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3463 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json b/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json deleted file mode 100644 index f9000c0d0..000000000 --- a/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/skymizer_Llama2-7b-sft-chat-custom-template-dpo/1762652580.517826", - "retrieved_timestamp": "1762652580.517826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", - "developer": "skymizer", - "inference_platform": "unknown", - "id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2352823840742563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36884662302661564 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19464760638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/5f4edfdb-a62c-4410-83a3-1ceb15d2e7b0.json b/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/5f4edfdb-a62c-4410-83a3-1ceb15d2e7b0.json new file mode 100644 index 000000000..69c7e0621 --- /dev/null +++ b/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/5f4edfdb-a62c-4410-83a3-1ceb15d2e7b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/skymizer_Llama2-7b-sft-chat-custom-template-dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama2-7b-sft-chat-custom-template-dpo", + "id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", + "developer": "skymizer", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2353 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1946 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/someon98/qwen-CoMa-0.5b/aadfae06-73b6-4306-b056-0a733b9bd8f4.json b/data/hfopenllm_v2/someon98/qwen-CoMa-0.5b/aadfae06-73b6-4306-b056-0a733b9bd8f4.json new file mode 100644 index 000000000..5b36f35c5 --- /dev/null +++ b/data/hfopenllm_v2/someon98/qwen-CoMa-0.5b/aadfae06-73b6-4306-b056-0a733b9bd8f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/someon98_qwen-CoMa-0.5b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen-CoMa-0.5b", + "id": "someon98/qwen-CoMa-0.5b", + "developer": "someon98", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2277 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2953 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4046 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json b/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json deleted file mode 100644 index df18a5cd0..000000000 --- a/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_ChocoTrio-14B-v1/1762652580.518315", - "retrieved_timestamp": "1762652580.518315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/ChocoTrio-14B-v1", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/ChocoTrio-14B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7088912973133508 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505840125855428 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4820520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369847074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/cfecbfbc-46c3-4dd3-8bd9-afe4cd386973.json b/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/cfecbfbc-46c3-4dd3-8bd9-afe4cd386973.json new file mode 100644 index 000000000..9b58091a6 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/cfecbfbc-46c3-4dd3-8bd9-afe4cd386973.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_ChocoTrio-14B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChocoTrio-14B-v1", + "id": "sometimesanotion/ChocoTrio-14B-v1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7089 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6506 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3973 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4821 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json deleted file mode 100644 index d65ac6adf..000000000 --- a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-40/1762652580.518558", - "retrieved_timestamp": "1762652580.518559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/IF-reasoning-experiment-40", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/IF-reasoning-experiment-40", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6329793835910938 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6111859401994667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5024933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/97640dd1-d415-4b56-818c-cdcede3c52fd.json b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/97640dd1-d415-4b56-818c-cdcede3c52fd.json new file mode 100644 index 000000000..3e9ffdabd --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/97640dd1-d415-4b56-818c-cdcede3c52fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-40/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IF-reasoning-experiment-40", + "id": "sometimesanotion/IF-reasoning-experiment-40", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.633 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5025 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json deleted file mode 100644 index 36826867d..000000000 --- a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-80/1762652580.5187662", - "retrieved_timestamp": "1762652580.518767", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/IF-reasoning-experiment-80", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/IF-reasoning-experiment-80", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.383 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462761029623622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103836132239286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5024583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367686170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b750c460-ef70-4abf-b77d-118a82039598.json b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b750c460-ef70-4abf-b77d-118a82039598.json new file mode 100644 index 000000000..f511af28e --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b750c460-ef70-4abf-b77d-118a82039598.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-80/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IF-reasoning-experiment-80", + "id": "sometimesanotion/IF-reasoning-experiment-80", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.383 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5463 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json b/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json deleted file mode 100644 index 8bca25881..000000000 --- a/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_KytheraMix-7B-v0.2/1762652580.5189881", - "retrieved_timestamp": "1762652580.5189881", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/KytheraMix-7B-v0.2", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/KytheraMix-7B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6128705168951715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635202746804572 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45941666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45054853723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/f4c20519-9e33-4698-a17a-07e5fe7d2707.json b/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/f4c20519-9e33-4698-a17a-07e5fe7d2707.json new file mode 100644 index 000000000..99248458d --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/f4c20519-9e33-4698-a17a-07e5fe7d2707.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_KytheraMix-7B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KytheraMix-7B-v0.2", + "id": "sometimesanotion/KytheraMix-7B-v0.2", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6129 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/0f204733-55b4-4c06-bd12-dbc2e2593abd.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/0f204733-55b4-4c06-bd12-dbc2e2593abd.json new file mode 100644 index 000000000..f40410446 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/0f204733-55b4-4c06-bd12-dbc2e2593abd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.1-experimental/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.1-experimental", + "id": "sometimesanotion/Lamarck-14B-v0.1-experimental", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6583 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4728 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5408 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json deleted file mode 100644 index f9b660c73..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.1-experimental/1762652580.519198", - "retrieved_timestamp": "1762652580.519199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.1-experimental", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.1-experimental", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353850006870658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6582539239967329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47284375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/0bb226ed-fe88-4678-9b50-f77883ceb708.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/0bb226ed-fe88-4678-9b50-f77883ceb708.json new file mode 100644 index 000000000..27f7815dc --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/0bb226ed-fe88-4678-9b50-f77883ceb708.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.3", + "id": "sometimesanotion/Lamarck-14B-v0.3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6611 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json deleted file mode 100644 index 24ff91a22..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.3/1762652580.519407", - "retrieved_timestamp": "1762652580.5194082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.3", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031616111916382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6611400465373158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4688125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410571808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/fb297e45-9e14-4853-8384-75c187b28a9b.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/fb297e45-9e14-4853-8384-75c187b28a9b.json new file mode 100644 index 000000000..96418c26f --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/fb297e45-9e14-4853-8384-75c187b28a9b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.4-Qwenvergence", + "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4906 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6535 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3399 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/4f6eba27-2ab4-4b33-9568-814d15fbd6b9.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/4f6eba27-2ab4-4b33-9568-814d15fbd6b9.json new file mode 100644 index 000000000..d6ddf3a3c --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/4f6eba27-2ab4-4b33-9568-814d15fbd6b9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.6-002-model_stock", + "id": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6692 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.518 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json deleted file mode 100644 index a5da960dd..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/1762652580.520087", - "retrieved_timestamp": "1762652580.520087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669224324791553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6143349188724702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5180208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054022606382979 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json deleted file mode 100644 index ec174ee62..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-model_stock/1762652580.520298", - "retrieved_timestamp": "1762652580.520299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.6-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.6-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6789662539838739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6269436532753222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50065625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/c3bc3d69-a987-4dd0-b6a5-e0ecc50034fb.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/c3bc3d69-a987-4dd0-b6a5-e0ecc50034fb.json new file mode 100644 index 000000000..509a05a43 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/c3bc3d69-a987-4dd0-b6a5-e0ecc50034fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.6-model_stock", + "id": "sometimesanotion/Lamarck-14B-v0.6-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6269 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5198 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/5d02ba78-cf8b-44ee-a1b3-e51ecf437d89.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/5d02ba78-cf8b-44ee-a1b3-e51ecf437d89.json new file mode 100644 index 000000000..0f6ebd239 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/5d02ba78-cf8b-44ee-a1b3-e51ecf437d89.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.6", + "id": "sometimesanotion/Lamarck-14B-v0.6", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6973 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.646 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json deleted file mode 100644 index 07dc77eb5..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6/1762652580.519876", - "retrieved_timestamp": "1762652580.519876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.6", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.6", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6972510716011294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6460312233782931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4846875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json deleted file mode 100644 index a7ec7a290..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-Fusion/1762652580.52051", - "retrieved_timestamp": "1762652580.520511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.7-Fusion", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.7-Fusion", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6821134589555713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6543636625652262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390625 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/4a43fa67-2438-4c2a-b17b-9d2f221e5a86.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/4a43fa67-2438-4c2a-b17b-9d2f221e5a86.json new file mode 100644 index 000000000..1e00d79b8 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/4a43fa67-2438-4c2a-b17b-9d2f221e5a86.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-Fusion/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.7-Fusion", + "id": "sometimesanotion/Lamarck-14B-v0.7-Fusion", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.401 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/2c044767-1169-48c6-9e37-e9d1e35f4cfe.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/2c044767-1169-48c6-9e37-e9d1e35f4cfe.json new file mode 100644 index 000000000..0e64b71d7 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/2c044767-1169-48c6-9e37-e9d1e35f4cfe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.7-rc1", + "id": "sometimesanotion/Lamarck-14B-v0.7-rc1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7305 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6486 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3852 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4715 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json deleted file mode 100644 index 423595cc7..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc1/1762652580.520714", - "retrieved_timestamp": "1762652580.520715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.7-rc1", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.7-rc1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7305482785675341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6486027992626241 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47147916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415558510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json deleted file mode 100644 index 3a6fbb5b0..000000000 --- a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc4/1762652580.520921", - "retrieved_timestamp": "1762652580.5209222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.7-rc4", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.7-rc4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7210811757248545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6509652911243554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025679758308157 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4911979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/bad67b35-d9ef-417a-955b-9c33e87cb927.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/bad67b35-d9ef-417a-955b-9c33e87cb927.json new file mode 100644 index 000000000..ebc21eef2 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/bad67b35-d9ef-417a-955b-9c33e87cb927.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarck-14B-v0.7-rc4", + "id": "sometimesanotion/Lamarck-14B-v0.7-rc4", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7211 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.651 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4026 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3893 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4912 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/60eaa315-f489-405d-a67d-7f1312e90cab.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/60eaa315-f489-405d-a67d-7f1312e90cab.json new file mode 100644 index 000000000..1c1c6efe8 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/60eaa315-f489-405d-a67d-7f1312e90cab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LamarckInfusion-14B-v1", + "id": "sometimesanotion/LamarckInfusion-14B-v1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7198 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4169 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json deleted file mode 100644 index e66b41679..000000000 --- a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v1/1762652580.521131", - "retrieved_timestamp": "1762652580.521132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v1", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7198322672730577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539252513912222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4169184290030212 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48989583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376496010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/50de312a-293d-41a4-8bee-4feb0c148b90.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/50de312a-293d-41a4-8bee-4feb0c148b90.json new file mode 100644 index 000000000..a6c8039e6 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/50de312a-293d-41a4-8bee-4feb0c148b90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-hi/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LamarckInfusion-14B-v2-hi", + "id": "sometimesanotion/LamarckInfusion-14B-v2-hi", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6855 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4847 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5405 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json deleted file mode 100644 index 161224be7..000000000 --- a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-hi/1762652580.521555", - "retrieved_timestamp": "1762652580.521556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v2-hi", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v2-hi", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.685485622592499 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555026541798943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404753989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/56f24cac-394c-4439-8f2e-8270e7519bda.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/56f24cac-394c-4439-8f2e-8270e7519bda.json new file mode 100644 index 000000000..b52d3f3f1 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/56f24cac-394c-4439-8f2e-8270e7519bda.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-lo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LamarckInfusion-14B-v2-lo", + "id": "sometimesanotion/LamarckInfusion-14B-v2-lo", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6788 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6528 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4237 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5397 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json deleted file mode 100644 index 9675ebf9c..000000000 --- a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-lo/1762652580.52177", - "retrieved_timestamp": "1762652580.521771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v2-lo", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v2-lo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6787911630030541 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6528441920403686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397273936170213 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/8efa1423-0a39-4674-a94d-3d92448010d6.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/8efa1423-0a39-4674-a94d-3d92448010d6.json new file mode 100644 index 000000000..c108f183f --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/8efa1423-0a39-4674-a94d-3d92448010d6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LamarckInfusion-14B-v2", + "id": "sometimesanotion/LamarckInfusion-14B-v2", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6812 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6564 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5416 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json deleted file mode 100644 index 237a9d662..000000000 --- a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2/1762652580.521342", - "retrieved_timestamp": "1762652580.521342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v2", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6811892445378263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6564434429766982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4992604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416389627659575 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/350b3491-cba8-46b4-a07f-3d1277270530.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/350b3491-cba8-46b4-a07f-3d1277270530.json new file mode 100644 index 000000000..54ff84883 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/350b3491-cba8-46b4-a07f-3d1277270530.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LamarckInfusion-14B-v3", + "id": "sometimesanotion/LamarckInfusion-14B-v3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7131 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6518 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4124 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5407 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json deleted file mode 100644 index 1439c7f71..000000000 --- a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v3/1762652580.5219798", - "retrieved_timestamp": "1762652580.5219798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v3", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131378076836128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517667892516962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4123867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48202083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407247340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen-14B-ProseStock-v4/0741ead7-24f3-49b0-9967-f726df84f78a.json b/data/hfopenllm_v2/sometimesanotion/Qwen-14B-ProseStock-v4/0741ead7-24f3-49b0-9967-f726df84f78a.json new file mode 100644 index 000000000..37d385cfe --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen-14B-ProseStock-v4/0741ead7-24f3-49b0-9967-f726df84f78a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-14B-ProseStock-v4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-14B-ProseStock-v4", + "id": "sometimesanotion/Qwen-14B-ProseStock-v4", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4942 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.364 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4938 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/1ea4d10e-e099-4967-8c43-e84acaeb40be.json b/data/hfopenllm_v2/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/1ea4d10e-e099-4967-8c43-e84acaeb40be.json new file mode 100644 index 000000000..fb27ac194 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/1ea4d10e-e099-4967-8c43-e84acaeb40be.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen-2.5-14B-Virmarckeoso", + "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.657 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5377 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/6c78d9f7-a61e-4f65-ac57-61597f735541.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/6c78d9f7-a61e-4f65-ac57-61597f735541.json new file mode 100644 index 000000000..916ea88f9 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/6c78d9f7-a61e-4f65-ac57-61597f735541.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Vimarckoso-v2", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4819 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.538 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/e9bcfb1f-c688-4e7a-918a-e697adaf7aa5.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/e9bcfb1f-c688-4e7a-918a-e697adaf7aa5.json new file mode 100644 index 000000000..5757a1b7a --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/e9bcfb1f-c688-4e7a-918a-e697adaf7aa5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Vimarckoso-v3-IF-Variant", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6413 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5521 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5319 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4589 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/153cfe7f-c27a-40b8-b8d2-54351f26f583.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/153cfe7f-c27a-40b8-b8d2-54351f26f583.json new file mode 100644 index 000000000..874b6da06 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/153cfe7f-c27a-40b8-b8d2-54351f26f583.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Vimarckoso-v3-Prose01", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6872 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6359 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3995 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5275 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/b58372cd-5d55-4f42-a5da-2970e55b44b0.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/b58372cd-5d55-4f42-a5da-2970e55b44b0.json new file mode 100644 index 000000000..b0e9a12fb --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/b58372cd-5d55-4f42-a5da-2970e55b44b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Vimarckoso-v3-model_stock", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7162 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5316 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/34a028ac-2002-480c-a1af-5b945ffe872e.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/34a028ac-2002-480c-a1af-5b945ffe872e.json new file mode 100644 index 000000000..f888babcd --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/34a028ac-2002-480c-a1af-5b945ffe872e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Vimarckoso-v3", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7257 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4003 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4807 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5343 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso/065ffc51-154c-4a93-a342-0dd476fda473.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso/065ffc51-154c-4a93-a342-0dd476fda473.json new file mode 100644 index 000000000..c071dc2d8 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-14B-Vimarckoso/065ffc51-154c-4a93-a342-0dd476fda473.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Vimarckoso", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4574 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6446 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3384 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3926 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4859 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/ebc74f4f-157d-4ee4-8b99-9fb5b685afd5.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/ebc74f4f-157d-4ee4-8b99-9fb5b685afd5.json new file mode 100644 index 000000000..580c363ac --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/ebc74f4f-157d-4ee4-8b99-9fb5b685afd5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Gordion-v0.1-Prose", + "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5347 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5599 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3205 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4525 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/91004d26-7b8b-4c0a-bd8c-8880654dc93a.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/91004d26-7b8b-4c0a-bd8c-8880654dc93a.json new file mode 100644 index 000000000..8e9f53078 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/91004d26-7b8b-4c0a-bd8c-8880654dc93a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Gordion-v0.1-Reason", + "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4917 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5498 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2621 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4434 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4307 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/5eb1aa92-a031-40d4-ad64-552075dae68a.json b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/5eb1aa92-a031-40d4-ad64-552075dae68a.json new file mode 100644 index 000000000..806d212aa --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/5eb1aa92-a031-40d4-ad64-552075dae68a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Gordion-v0.1", + "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5524 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2915 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4016 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.43 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentessential-14B-v1/3ebc147d-58f2-4605-a011-a71c591fac0e.json b/data/hfopenllm_v2/sometimesanotion/Qwentessential-14B-v1/3ebc147d-58f2-4605-a011-a71c591fac0e.json new file mode 100644 index 000000000..22b1d0f94 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentessential-14B-v1/3ebc147d-58f2-4605-a011-a71c591fac0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentessential-14B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentessential-14B-v1", + "id": "sometimesanotion/Qwentessential-14B-v1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v013/01795776-e909-46d3-8b6c-0989334e3d0e.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v013/01795776-e909-46d3-8b6c-0989334e3d0e.json new file mode 100644 index 000000000..29ab8f101 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v013/01795776-e909-46d3-8b6c-0989334e3d0e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v013/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v013", + "id": "sometimesanotion/Qwentinuum-14B-v013", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6711 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6087 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v1/00dffa94-31f9-4b5c-b032-03dd20fc2e8d.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v1/00dffa94-31f9-4b5c-b032-03dd20fc2e8d.json new file mode 100644 index 000000000..a544e7948 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v1/00dffa94-31f9-4b5c-b032-03dd20fc2e8d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v1", + "id": "sometimesanotion/Qwentinuum-14B-v1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6573 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.541 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v2/736249d0-cea9-46c6-9677-ecae4b410af4.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v2/736249d0-cea9-46c6-9677-ecae4b410af4.json new file mode 100644 index 000000000..f73c3f45f --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v2/736249d0-cea9-46c6-9677-ecae4b410af4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v2", + "id": "sometimesanotion/Qwentinuum-14B-v2", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5378 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6555 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5409 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v3/ef602cfe-3453-4189-b583-292cf05421d1.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v3/ef602cfe-3453-4189-b583-292cf05421d1.json new file mode 100644 index 000000000..fadcebc67 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v3/ef602cfe-3453-4189-b583-292cf05421d1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v3", + "id": "sometimesanotion/Qwentinuum-14B-v3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6158 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6539 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v5/559af2c1-deca-4c35-b83a-004c22ac958a.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v5/559af2c1-deca-4c35-b83a-004c22ac958a.json new file mode 100644 index 000000000..a63f22447 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v5/559af2c1-deca-4c35-b83a-004c22ac958a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v5", + "id": "sometimesanotion/Qwentinuum-14B-v5", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6286 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3876 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4874 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5418 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6-Prose/8d66d895-626a-477f-91b6-2195f35aacb3.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6-Prose/8d66d895-626a-477f-91b6-2195f35aacb3.json new file mode 100644 index 000000000..6b40b100b --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6-Prose/8d66d895-626a-477f-91b6-2195f35aacb3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6-Prose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v6-Prose", + "id": "sometimesanotion/Qwentinuum-14B-v6-Prose", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5643 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4913 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5392 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6/004df803-70da-4e59-b3ad-f210c790f29e.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6/004df803-70da-4e59-b3ad-f210c790f29e.json new file mode 100644 index 000000000..ff2133b12 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v6/004df803-70da-4e59-b3ad-f210c790f29e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v6", + "id": "sometimesanotion/Qwentinuum-14B-v6", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6304 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.49 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v7/bb2972ca-e673-4be5-bc7e-2689adeac3a9.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v7/bb2972ca-e673-4be5-bc7e-2689adeac3a9.json new file mode 100644 index 000000000..86db2ec7b --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v7/bb2972ca-e673-4be5-bc7e-2689adeac3a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v7", + "id": "sometimesanotion/Qwentinuum-14B-v7", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6109 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6551 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3573 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3909 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.482 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.541 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v8/eacf2411-a0ea-41fd-8363-e565fce0f26f.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v8/eacf2411-a0ea-41fd-8363-e565fce0f26f.json new file mode 100644 index 000000000..14b1b0dfd --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v8/eacf2411-a0ea-41fd-8363-e565fce0f26f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v8", + "id": "sometimesanotion/Qwentinuum-14B-v8", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6534 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3912 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4873 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v9/4eefe3cd-ff42-4d4c-89c6-c3e48d8c85e9.json b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v9/4eefe3cd-ff42-4d4c-89c6-c3e48d8c85e9.json new file mode 100644 index 000000000..470b2bb89 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwentinuum-14B-v9/4eefe3cd-ff42-4d4c-89c6-c3e48d8c85e9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwentinuum-14B-v9", + "id": "sometimesanotion/Qwentinuum-14B-v9", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.658 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3859 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4781 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-qv256/f19dab38-48ed-438e-8a62-86e4d111f6c8.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-qv256/f19dab38-48ed-438e-8a62-86e4d111f6c8.json new file mode 100644 index 000000000..17591ed25 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-qv256/f19dab38-48ed-438e-8a62-86e4d111f6c8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-qv256/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-qv256", + "id": "sometimesanotion/Qwenvergence-14B-qv256", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7006 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6312 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3897 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3784 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5178 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/ff4b6d28-62e2-4671-8df9-690ce7f13f0b.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/ff4b6d28-62e2-4671-8df9-690ce7f13f0b.json new file mode 100644 index 000000000..10044a8d2 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/ff4b6d28-62e2-4671-8df9-690ce7f13f0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v0.6-004-model_stock", + "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.686 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6249 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4094 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5033 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5193 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v10/9c05a7e4-f495-41d0-a7f0-1959e7434ba2.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v10/9c05a7e4-f495-41d0-a7f0-1959e7434ba2.json new file mode 100644 index 000000000..1827b725f --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v10/9c05a7e4-f495-41d0-a7f0-1959e7434ba2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v10", + "id": "sometimesanotion/Qwenvergence-14B-v10", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6757 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6316 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4789 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5239 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v11/404e3d61-26d3-4f95-9847-064f0c7c6970.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v11/404e3d61-26d3-4f95-9847-064f0c7c6970.json new file mode 100644 index 000000000..df64804c6 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v11/404e3d61-26d3-4f95-9847-064f0c7c6970.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v11/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v11", + "id": "sometimesanotion/Qwenvergence-14B-v11", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7192 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6368 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4645 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/0b4574f2-1b71-427f-9923-17db449be191.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/0b4574f2-1b71-427f-9923-17db449be191.json new file mode 100644 index 000000000..12b28bc82 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/0b4574f2-1b71-427f-9923-17db449be191.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v12-Prose-DS", + "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6173 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6507 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5151 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose/775b88cd-98e8-4d93-acca-e294f68f2da2.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose/775b88cd-98e8-4d93-acca-e294f68f2da2.json new file mode 100644 index 000000000..c5c86aec1 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v12-Prose/775b88cd-98e8-4d93-acca-e294f68f2da2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v12-Prose", + "id": "sometimesanotion/Qwenvergence-14B-v12-Prose", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5412 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6504 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3535 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4991 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/89464568-47cb-4659-af37-8b061d3f0c8c.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/89464568-47cb-4659-af37-8b061d3f0c8c.json new file mode 100644 index 000000000..84c78b460 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/89464568-47cb-4659-af37-8b061d3f0c8c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v13-Prose-DS", + "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7178 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6405 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.386 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4927 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/9fad9d73-acbf-4ffc-886c-551c1fe1ed45.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/9fad9d73-acbf-4ffc-886c-551c1fe1ed45.json new file mode 100644 index 000000000..5ec413f82 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/9fad9d73-acbf-4ffc-886c-551c1fe1ed45.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v15-Prose-MS", + "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4913 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5393 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v2-Prose/c1882335-0df5-4df2-bfa1-c16126c328fb.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v2-Prose/c1882335-0df5-4df2-bfa1-c16126c328fb.json new file mode 100644 index 000000000..a6c3cb27c --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v2-Prose/c1882335-0df5-4df2-bfa1-c16126c328fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v2-Prose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v2-Prose", + "id": "sometimesanotion/Qwenvergence-14B-v2-Prose", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4705 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6519 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3557 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4926 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Prose/291471ed-3b7c-4bd4-91bb-c27cd74ec460.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Prose/291471ed-3b7c-4bd4-91bb-c27cd74ec460.json new file mode 100644 index 000000000..556f2a8cf --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Prose/291471ed-3b7c-4bd4-91bb-c27cd74ec460.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Prose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v3-Prose", + "id": "sometimesanotion/Qwenvergence-14B-v3-Prose", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4918 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6513 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4939 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.537 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/53565fe4-0368-477b-9916-ac9a4b8a9c7b.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/53565fe4-0368-477b-9916-ac9a4b8a9c7b.json new file mode 100644 index 000000000..0e8401765 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/53565fe4-0368-477b-9916-ac9a4b8a9c7b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v3-Reason", + "id": "sometimesanotion/Qwenvergence-14B-v3-Reason", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5278 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6557 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3119 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/f6cb5e9d-c4c9-44a2-9adf-7fa5639d84d9.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/f6cb5e9d-c4c9-44a2-9adf-7fa5639d84d9.json new file mode 100644 index 000000000..49b6488af --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3-Reason/f6cb5e9d-c4c9-44a2-9adf-7fa5639d84d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v3-Reason", + "id": "sometimesanotion/Qwenvergence-14B-v3-Reason", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5367 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6561 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3867 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5395 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3/e51fee25-7648-49d9-a8da-b8dbc68a722b.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3/e51fee25-7648-49d9-a8da-b8dbc68a722b.json new file mode 100644 index 000000000..6b7f693f2 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v3/e51fee25-7648-49d9-a8da-b8dbc68a722b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v3", + "id": "sometimesanotion/Qwenvergence-14B-v3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5044 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6548 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5386 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/6acdc96b-cfde-439f-b6b3-a66257b3fcde.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/6acdc96b-cfde-439f-b6b3-a66257b3fcde.json new file mode 100644 index 000000000..1fad180ca --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/6acdc96b-cfde-439f-b6b3-a66257b3fcde.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v6-Prose-model_stock", + "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4811 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.653 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3603 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose/850da8de-ca13-4f15-bb9f-68b910355cfd.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose/850da8de-ca13-4f15-bb9f-68b910355cfd.json new file mode 100644 index 000000000..a338fb9a8 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v6-Prose/850da8de-ca13-4f15-bb9f-68b910355cfd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v6-Prose", + "id": "sometimesanotion/Qwenvergence-14B-v6-Prose", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.599 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4887 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v8/542fbb7a-d4eb-4cbf-b63a-4305cb108361.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v8/542fbb7a-d4eb-4cbf-b63a-4305cb108361.json new file mode 100644 index 000000000..fd2e91afc --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v8/542fbb7a-d4eb-4cbf-b63a-4305cb108361.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v8", + "id": "sometimesanotion/Qwenvergence-14B-v8", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5913 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6522 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3809 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v9/1dbb8206-6a86-4e2c-8ee0-d80fed014a69.json b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v9/1dbb8206-6a86-4e2c-8ee0-d80fed014a69.json new file mode 100644 index 000000000..e4304c3e3 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Qwenvergence-14B-v9/1dbb8206-6a86-4e2c-8ee0-d80fed014a69.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v9/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenvergence-14B-v9", + "id": "sometimesanotion/Qwenvergence-14B-v9", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6598 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6166 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4139 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json deleted file mode 100644 index 046bce113..000000000 --- a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-prose-model_stock/1762652580.5312169", - "retrieved_timestamp": "1762652580.5312169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/lamarck-14b-prose-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/lamarck-14b-prose-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276486389446668 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6487621585665343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48459375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.535405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/6341de3c-8d4c-4af8-8f0d-c81e948bacd6.json b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/6341de3c-8d4c-4af8-8f0d-c81e948bacd6.json new file mode 100644 index 000000000..404e5cee8 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/6341de3c-8d4c-4af8-8f0d-c81e948bacd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-prose-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lamarck-14b-prose-model_stock", + "id": "sometimesanotion/lamarck-14b-prose-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4276 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4846 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5354 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/e6cb6a87-6db8-4aee-bede-ce8a60dc8f4a.json b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/e6cb6a87-6db8-4aee-bede-ce8a60dc8f4a.json new file mode 100644 index 000000000..92a2a8d29 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/e6cb6a87-6db8-4aee-bede-ce8a60dc8f4a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-reason-model_stock/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lamarck-14b-reason-model_stock", + "id": "sometimesanotion/lamarck-14b-reason-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4965 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6569 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4741 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json deleted file mode 100644 index b58783abd..000000000 --- a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-reason-model_stock/1762652580.531434", - "retrieved_timestamp": "1762652580.531434", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sometimesanotion/lamarck-14b-reason-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/lamarck-14b-reason-model_stock", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49646715160219335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6568898541408251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47408333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/5113439d-1394-46f2-a38e-34b54e94a9e6.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/5113439d-1394-46f2-a38e-34b54e94a9e6.json new file mode 100644 index 000000000..d02f279fc --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/5113439d-1394-46f2-a38e-34b54e94a9e6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", + "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3804 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2466 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3861 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json deleted file mode 100644 index 92ade5c52..000000000 --- a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/1762652580.531641", - "retrieved_timestamp": "1762652580.5316422", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28933784580468713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38041816886828617 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3860625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14012632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/a03d88aa-7ccd-4f8a-9a1e-c9469d3ae559.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/a03d88aa-7ccd-4f8a-9a1e-c9469d3ae559.json new file mode 100644 index 000000000..374cc5708 --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/a03d88aa-7ccd-4f8a-9a1e-c9469d3ae559.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", + "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3959 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4272 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2124 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json deleted file mode 100644 index d3f333518..000000000 --- a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/1762652580.531905", - "retrieved_timestamp": "1762652580.5319061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199377651298555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39586243698929185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4271770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21243351063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1cfb40a7-7373-417c-aa1c-f6ab63ecb3b8.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1cfb40a7-7373-417c-aa1c-f6ab63ecb3b8.json new file mode 100644 index 000000000..37b361fb2 --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1cfb40a7-7373-417c-aa1c-f6ab63ecb3b8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", + "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3764 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4404 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2055 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json deleted file mode 100644 index 6fa4631f1..000000000 --- a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1762652580.532109", - "retrieved_timestamp": "1762652580.53211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37644117607946914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828367247244511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4404166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20553523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/446ac93f-d47c-4207-bf32-0cd94e88a931.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/446ac93f-d47c-4207-bf32-0cd94e88a931.json new file mode 100644 index 000000000..a216eeafc --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/446ac93f-d47c-4207-bf32-0cd94e88a931.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-sft-bnb-4bit-DPO-mtbc-213steps", + "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4197 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json deleted file mode 100644 index 6ee712307..000000000 --- a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/1762652580.532313", - "retrieved_timestamp": "1762652580.532314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4275489035758454 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197290890050172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40863541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27086103723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/7e4ba4f8-2768-4e7b-a11d-75ad22a47c45.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/7e4ba4f8-2768-4e7b-a11d-75ad22a47c45.json new file mode 100644 index 000000000..51e507e7a --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/7e4ba4f8-2768-4e7b-a11d-75ad22a47c45.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-sft-bnb-4bit-DPO-mtbo-180steps", + "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2748 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json deleted file mode 100644 index 88d8768ad..000000000 --- a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/1762652580.532533", - "retrieved_timestamp": "1762652580.5325341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40871443325930756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322585223071556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27476728723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json deleted file mode 100644 index 5bcfb9a64..000000000 --- a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/1762652580.5327501", - "retrieved_timestamp": "1762652580.532751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4032190144372487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43053552565190517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2711103723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/ca77f821-4722-45b1-b731-7d774232acb4.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/ca77f821-4722-45b1-b731-7d774232acb4.json new file mode 100644 index 000000000..c5738810a --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/ca77f821-4722-45b1-b731-7d774232acb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zephyr-sft-bnb-4bit-DPO-mtbr-180steps", + "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2711 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json b/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json deleted file mode 100644 index 4d5017a6b..000000000 --- a/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sophosympatheia_Midnight-Miqu-70B-v1.5/1762652580.532959", - "retrieved_timestamp": "1762652580.53296", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sophosympatheia/Midnight-Miqu-70B-v1.5", - "developer": "sophosympatheia", - "inference_platform": "unknown", - "id": "sophosympatheia/Midnight-Miqu-70B-v1.5", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118465671086051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5606228371685053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42441666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/f32d2a11-edd3-4662-aed7-88c6820b2c2e.json b/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/f32d2a11-edd3-4662-aed7-88c6820b2c2e.json new file mode 100644 index 000000000..fddda10e0 --- /dev/null +++ b/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/f32d2a11-edd3-4662-aed7-88c6820b2c2e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sophosympatheia_Midnight-Miqu-70B-v1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Midnight-Miqu-70B-v1.5", + "id": "sophosympatheia/Midnight-Miqu-70B-v1.5", + "developer": "sophosympatheia", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6118 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5606 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3825 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json deleted file mode 100644 index cc1aa45a7..000000000 --- a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.0-Instruct/1762652580.533494", - "retrieved_timestamp": "1762652580.533494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.0-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.0-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252430218486948 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361579931173499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4467083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351063829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/71c56883-dd14-4f16-b839-5ce607a4aadb.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/71c56883-dd14-4f16-b839-5ce607a4aadb.json new file mode 100644 index 000000000..12049af61 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/71c56883-dd14-4f16-b839-5ce607a4aadb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.0-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bielik-11B-v2.0-Instruct", + "id": "speakleash/Bielik-11B-v2.0-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5252 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5362 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1186 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3171 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4467 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3351 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/639004c2-81a5-410d-bd61-e3e263f55335.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/639004c2-81a5-410d-bd61-e3e263f55335.json new file mode 100644 index 000000000..e5d5e6b4f --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/639004c2-81a5-410d-bd61-e3e263f55335.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.1-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bielik-11B-v2.1-Instruct", + "id": "speakleash/Bielik-11B-v2.1-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.553 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2666 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4185 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json deleted file mode 100644 index 6d767453d..000000000 --- a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.1-Instruct/1762652580.533698", - "retrieved_timestamp": "1762652580.533698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.1-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.1-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5089817240477489 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5530119844151298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26661631419939574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34466422872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/5f232a99-07c9-4df7-9d3b-837966ea6de5.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/5f232a99-07c9-4df7-9d3b-837966ea6de5.json new file mode 100644 index 000000000..29b63348b --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/5f232a99-07c9-4df7-9d3b-837966ea6de5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.2-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bielik-11B-v2.2-Instruct", + "id": "speakleash/Bielik-11B-v2.2-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5552 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5597 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2681 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4171 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3487 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json deleted file mode 100644 index d2e45b50f..000000000 --- a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.2-Instruct/1762652580.533901", - "retrieved_timestamp": "1762652580.5339022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.2-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.2-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551935531057595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5596561190863629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2681268882175227 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41712499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486535904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/482e34ee-8974-46c6-b3f4-4cc9872ef562.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/482e34ee-8974-46c6-b3f4-4cc9872ef562.json new file mode 100644 index 000000000..d3532936e --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/482e34ee-8974-46c6-b3f4-4cc9872ef562.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.3-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bielik-11B-v2.3-Instruct", + "id": "speakleash/Bielik-11B-v2.3-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5583 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5663 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2085 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4518 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3444 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json deleted file mode 100644 index 32f9496c4..000000000 --- a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.3-Instruct/1762652580.534104", - "retrieved_timestamp": "1762652580.534104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.3-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.3-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.558290890393046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5662699020280031 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2/13743252-3ba3-406d-8e95-5a4cd3ac3772.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2/13743252-3ba3-406d-8e95-5a4cd3ac3772.json new file mode 100644 index 000000000..0c4687f31 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2/13743252-3ba3-406d-8e95-5a4cd3ac3772.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bielik-11B-v2", + "id": "speakleash/Bielik-11B-v2", + "developer": "speakleash", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2381 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4931 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3137 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json deleted file mode 100644 index 089c120c0..000000000 --- a/data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2/1762652580.533211", - "retrieved_timestamp": "1762652580.533211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23810489501190177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49308409091594996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39244791666666673 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/spmurrayzzz/Mistral-Syndicate-7B/ff25cb66-ed6f-421a-a038-1feb24666645.json b/data/hfopenllm_v2/spmurrayzzz/Mistral-Syndicate-7B/ff25cb66-ed6f-421a-a038-1feb24666645.json new file mode 100644 index 000000000..7a760f4b4 --- /dev/null +++ b/data/hfopenllm_v2/spmurrayzzz/Mistral-Syndicate-7B/ff25cb66-ed6f-421a-a038-1feb24666645.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/spmurrayzzz_Mistral-Syndicate-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-Syndicate-7B", + "id": "spmurrayzzz/Mistral-Syndicate-7B", + "developer": "spmurrayzzz", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.034 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4386 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2631 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/843f0d9a-04e8-4cea-bb18-94651a814d1f.json b/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/843f0d9a-04e8-4cea-bb18-94651a814d1f.json new file mode 100644 index 000000000..4c523e210 --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/843f0d9a-04e8-4cea-bb18-94651a814d1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_12B_v2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChatWaifu_12B_v2.0", + "id": "spow12/ChatWaifu_12B_v2.0", + "developer": "spow12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5208 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.071 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4432 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json b/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json deleted file mode 100644 index 6964b681f..000000000 --- a/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_12B_v2.0/1762652580.534569", - "retrieved_timestamp": "1762652580.53457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "spow12/ChatWaifu_12B_v2.0", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_12B_v2.0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675833455232114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207681738205238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44317708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33876329787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json b/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json deleted file mode 100644 index 8d935dd08..000000000 --- a/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_22B_v2.0_preview/1762652580.534824", - "retrieved_timestamp": "1762652580.5348248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "spow12/ChatWaifu_22B_v2.0_preview", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_22B_v2.0_preview", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744947849483814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170153091362338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39876994680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/fa3ccf4a-9b26-4a76-a974-3a776adec7c2.json b/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/fa3ccf4a-9b26-4a76-a974-3a776adec7c2.json new file mode 100644 index 000000000..f32e11888 --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/fa3ccf4a-9b26-4a76-a974-3a776adec7c2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_22B_v2.0_preview/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChatWaifu_22B_v2.0_preview", + "id": "spow12/ChatWaifu_22B_v2.0_preview", + "developer": "spow12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6745 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3154 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3988 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json b/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json deleted file mode 100644 index 3befb1816..000000000 --- a/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v1.4/1762652580.535029", - "retrieved_timestamp": "1762652580.5350301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "spow12/ChatWaifu_v1.4", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_v1.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5690567693719332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176247229970669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47433333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ef4ac8ab-4ff5-4fce-94b6-443b1ef7964f.json b/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ef4ac8ab-4ff5-4fce-94b6-443b1ef7964f.json new file mode 100644 index 000000000..4403618f1 --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ef4ac8ab-4ff5-4fce-94b6-443b1ef7964f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v1.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChatWaifu_v1.4", + "id": "spow12/ChatWaifu_v1.4", + "developer": "spow12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5176 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1057 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4743 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/468bbea7-6dee-4a1a-84b3-e44b0f3ab95a.json b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/468bbea7-6dee-4a1a-84b3-e44b0f3ab95a.json new file mode 100644 index 000000000..83ea96c1c --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/468bbea7-6dee-4a1a-84b3-e44b0f3ab95a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChatWaifu_v2.0_22B", + "id": "spow12/ChatWaifu_v2.0_22B", + "developer": "spow12", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6511 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5926 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1858 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3247 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3836 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json deleted file mode 100644 index d57d17cca..000000000 --- a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1762652580.535436", - "retrieved_timestamp": "1762652580.535437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "spow12/ChatWaifu_v2.0_22B", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_v2.0_22B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517384982956334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5908050619550995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/bd8fdfa5-bda1-402b-9010-94bf78b0127b.json b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/bd8fdfa5-bda1-402b-9010-94bf78b0127b.json new file mode 100644 index 000000000..699016bfe --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/bd8fdfa5-bda1-402b-9010-94bf78b0127b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ChatWaifu_v2.0_22B", + "id": "spow12/ChatWaifu_v2.0_22B", + "developer": "spow12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6517 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5908 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3842 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json deleted file mode 100644 index ac622fc40..000000000 --- a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1762652580.5352252", - "retrieved_timestamp": "1762652580.535226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "spow12/ChatWaifu_v2.0_22B", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_v2.0_22B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6510891102275296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.592630190761292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json b/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json deleted file mode 100644 index 70af5dc1c..000000000 --- a/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ssmits_Qwen2.5-95B-Instruct/1762652580.535626", - "retrieved_timestamp": "1762652580.5356271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ssmits/Qwen2.5-95B-Instruct", - "developer": "ssmits", - "inference_platform": "unknown", - "id": "ssmits/Qwen2.5-95B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 94.648 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8431051831363006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037799697488242 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4283854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5216921542553191 - } - } - ] -} diff --git a/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/a0b34b40-3e68-463f-a7fa-3c58c15aa16d.json b/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/a0b34b40-3e68-463f-a7fa-3c58c15aa16d.json new file mode 100644 index 000000000..21deb3a14 --- /dev/null +++ b/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/a0b34b40-3e68-463f-a7fa-3c58c15aa16d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ssmits_Qwen2.5-95B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-95B-Instruct", + "id": "ssmits/Qwen2.5-95B-Instruct", + "developer": "ssmits", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 94.648 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7038 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5217 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json b/data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json deleted file mode 100644 index 65b60c18d..000000000 --- a/data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_StableBeluga2/1762652580.535889", - "retrieved_timestamp": "1762652580.5358899", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/StableBeluga2", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/StableBeluga2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37871403431783224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5824128134553807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3326130319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/StableBeluga2/dbf4fbac-cd99-426d-b725-600e60af00d2.json b/data/hfopenllm_v2/stabilityai/StableBeluga2/dbf4fbac-cd99-426d-b725-600e60af00d2.json new file mode 100644 index 000000000..e8054b7c1 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/StableBeluga2/dbf4fbac-cd99-426d-b725-600e60af00d2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_StableBeluga2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StableBeluga2", + "id": "stabilityai/StableBeluga2", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5824 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3163 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.473 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3326 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json b/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json deleted file mode 100644 index 5cca8b148..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b-chat/1762652580.536706", - "retrieved_timestamp": "1762652580.5367072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-2-12b-chat", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-12b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 12.143 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4081647805600252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672024731282805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2734375 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/f793c471-1638-476a-a050-455a32368e29.json b/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/f793c471-1638-476a-a050-455a32368e29.json new file mode 100644 index 000000000..53354e46f --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/f793c471-1638-476a-a050-455a32368e29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-2-12b-chat", + "id": "stabilityai/stablelm-2-12b-chat", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 12.143 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0536 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-12b/1d9c1beb-f84b-4eb7-9c1e-ce5a70afabfb.json b/data/hfopenllm_v2/stabilityai/stablelm-2-12b/1d9c1beb-f84b-4eb7-9c1e-ce5a70afabfb.json new file mode 100644 index 000000000..864befec9 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-12b/1d9c1beb-f84b-4eb7-9c1e-ce5a70afabfb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-2-12b", + "id": "stabilityai/stablelm-2-12b", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 12.143 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4479 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3072 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json b/data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json deleted file mode 100644 index 146ed9ea5..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b/1762652580.536407", - "retrieved_timestamp": "1762652580.536408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-2-12b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-12b", - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 12.143 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1569214129620518 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508654171114765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3071808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json deleted file mode 100644 index 92109e517..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b-chat/1762652580.5372329", - "retrieved_timestamp": "1762652580.5372338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-2-1_6b-chat", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-1_6b-chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 1.645 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30599919325168334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390172395486522 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16215093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/99396d97-d875-4cd9-a8a1-a9aec5c43bfc.json b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/99396d97-d875-4cd9-a8a1-a9aec5c43bfc.json new file mode 100644 index 000000000..28f4bd994 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/99396d97-d875-4cd9-a8a1-a9aec5c43bfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b-chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-2-1_6b-chat", + "id": "stabilityai/stablelm-2-1_6b-chat", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 1.645 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.339 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json deleted file mode 100644 index 785524409..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b/1762652580.5369868", - "retrieved_timestamp": "1762652580.536989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-2-1_6b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-1_6b", - "additional_details": { - "precision": "float16", - "architecture": "StableLmForCausalLM", - "params_billions": 1.645 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11570521771122844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338457720511071 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38819791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1463597074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/82a44b46-156f-4232-92e4-6a08d7a4f197.json b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/82a44b46-156f-4232-92e4-6a08d7a4f197.json new file mode 100644 index 000000000..c8c4b25df --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/82a44b46-156f-4232-92e4-6a08d7a4f197.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-2-1_6b", + "id": "stabilityai/stablelm-2-1_6b", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "StableLmForCausalLM", + "params_billions": 1.645 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1157 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3385 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1464 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/3b40defd-5a2e-4d6e-838f-dbbbf12236fb.json b/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/3b40defd-5a2e-4d6e-838f-dbbbf12236fb.json new file mode 100644 index 000000000..c7a4b2631 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/3b40defd-5a2e-4d6e-838f-dbbbf12236fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-zephyr-1_6b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-2-zephyr-1_6b", + "id": "stabilityai/stablelm-2-zephyr-1_6b", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "StableLmForCausalLM", + "params_billions": 1.645 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3352 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3511 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1714 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json b/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json deleted file mode 100644 index aa8889f1d..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-zephyr-1_6b/1762652580.537471", - "retrieved_timestamp": "1762652580.537472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-2-zephyr-1_6b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-zephyr-1_6b", - "additional_details": { - "precision": "float16", - "architecture": "StableLmForCausalLM", - "params_billions": 1.645 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32793100085550786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351608706280727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3511458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17137632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json b/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json deleted file mode 100644 index 8c2442f2e..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-3b-4e1t/1762652580.5377111", - "retrieved_timestamp": "1762652580.537712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-3b-4e1t", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-3b-4e1t", - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 2.795 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22031986240951784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3504211415826912 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37778124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/dde41cd5-e6d1-43a9-9593-1a5751bc5f44.json b/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/dde41cd5-e6d1-43a9-9593-1a5751bc5f44.json new file mode 100644 index 000000000..5c466e1f8 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/dde41cd5-e6d1-43a9-9593-1a5751bc5f44.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-3b-4e1t/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-3b-4e1t", + "id": "stabilityai/stablelm-3b-4e1t", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 2.795 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2203 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3504 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2374 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/1cffcbeb-ef81-4efe-b883-0a8540a799e7.json b/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/1cffcbeb-ef81-4efe-b883-0a8540a799e7.json new file mode 100644 index 000000000..736b9c1ac --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/1cffcbeb-ef81-4efe-b883-0a8540a799e7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-zephyr-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stablelm-zephyr-3b", + "id": "stabilityai/stablelm-zephyr-3b", + "developer": "stabilityai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 2.795 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2391 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4183 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1768 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json b/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json deleted file mode 100644 index 97b97d007..000000000 --- a/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-zephyr-3b/1762652580.537945", - "retrieved_timestamp": "1762652580.5379462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stabilityai/stablelm-zephyr-3b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-zephyr-3b", - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 2.795 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36832271705740766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3866361442837871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17677859042553193 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/033ef96e-3d2d-49a4-bbff-8bc815a1b40e.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/033ef96e-3d2d-49a4-bbff-8bc815a1b40e.json new file mode 100644 index 000000000..436ce33be --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/033ef96e-3d2d-49a4-bbff-8bc815a1b40e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0130/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miscii-14b-0130", + "id": "sthenno-com/miscii-14b-0130", + "developer": "sthenno-com", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6647 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6505 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.432 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4912 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json deleted file mode 100644 index 0762d1acd..000000000 --- a/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0130/1762652580.540879", - "retrieved_timestamp": "1762652580.54088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-0130", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-0130", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6647029880716498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505409113818335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4911666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363198138297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/bfe654b8-cb79-4845-bf14-85012207ce90.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/bfe654b8-cb79-4845-bf14-85012207ce90.json new file mode 100644 index 000000000..19981a9c3 --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/bfe654b8-cb79-4845-bf14-85012207ce90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0218/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miscii-14b-0218", + "id": "sthenno-com/miscii-14b-0218", + "developer": "sthenno-com", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6559 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3834 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4273 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5298 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json deleted file mode 100644 index b1dec202a..000000000 --- a/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0218/1762652580.541173", - "retrieved_timestamp": "1762652580.541174", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-0218", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-0218", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7655941790006073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6558708629267258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143504531722054 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297539893617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json deleted file mode 100644 index a197b5d62..000000000 --- a/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1028/1762652580.541399", - "retrieved_timestamp": "1762652580.5414", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-1028", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-1028", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8236711924360696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.64483340535341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5152925531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/5c4efc23-9591-447b-aecc-4c82797d7d01.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/5c4efc23-9591-447b-aecc-4c82797d7d01.json new file mode 100644 index 000000000..49b055e2c --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/5c4efc23-9591-447b-aecc-4c82797d7d01.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1028/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miscii-14b-1028", + "id": "sthenno-com/miscii-14b-1028", + "developer": "sthenno-com", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8237 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6448 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.503 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4182 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/a5fe3fab-95d9-41ac-a95f-66205e489dae.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/a5fe3fab-95d9-41ac-a95f-66205e489dae.json new file mode 100644 index 000000000..4635e6e86 --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/a5fe3fab-95d9-41ac-a95f-66205e489dae.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1225/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miscii-14b-1225", + "id": "sthenno-com/miscii-14b-1225", + "developer": "sthenno-com", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7878 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6572 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4517 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5272 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json deleted file mode 100644 index c25214c05..000000000 --- a/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1225/1762652580.541638", - "retrieved_timestamp": "1762652580.5416389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-1225", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-1225", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.787800812954073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6571708988407374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4365729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271775265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json b/data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json deleted file mode 100644 index 3b88d6a25..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-0120/1762652580.538178", - "retrieved_timestamp": "1762652580.5381792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-0120", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-0120", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390319906736348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6373174111347703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46332291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290059840425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-0120/c0bf8ffb-444a-43a3-9514-76aa92c5f5b7.json b/data/hfopenllm_v2/sthenno/tempesthenno-0120/c0bf8ffb-444a-43a3-9514-76aa92c5f5b7.json new file mode 100644 index 000000000..c30b2e881 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-0120/c0bf8ffb-444a-43a3-9514-76aa92c5f5b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-0120/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-0120", + "id": "sthenno/tempesthenno-0120", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.539 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6373 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3943 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/3d556d9f-036b-4368-bb4a-18ad6b444bdf.json b/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/3d556d9f-036b-4368-bb4a-18ad6b444bdf.json new file mode 100644 index 000000000..78afda82b --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/3d556d9f-036b-4368-bb4a-18ad6b444bdf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-fusion-0309/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-fusion-0309", + "id": "sthenno/tempesthenno-fusion-0309", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7692 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6581 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4325 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json b/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json deleted file mode 100644 index d97efa8e8..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-fusion-0309/1762652580.538481", - "retrieved_timestamp": "1762652580.538483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-fusion-0309", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-fusion-0309", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7691913013027656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6580880569586895 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5258477393617021 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json b/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json deleted file mode 100644 index b0b774f49..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-kto-0205-ckpt80/1762652580.5387661", - "retrieved_timestamp": "1762652580.538767", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-kto-0205-ckpt80", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-kto-0205-ckpt80", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8054362425032248 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654273895095419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285904255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/92905e27-1033-4423-b87d-23236f9be964.json b/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/92905e27-1033-4423-b87d-23236f9be964.json new file mode 100644 index 000000000..588277559 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/92905e27-1033-4423-b87d-23236f9be964.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-kto-0205-ckpt80/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-kto-0205-ckpt80", + "id": "sthenno/tempesthenno-kto-0205-ckpt80", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8054 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6543 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4592 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4248 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5286 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/17326bb0-42c2-469a-ac19-6a4b75d9e6e2.json b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/17326bb0-42c2-469a-ac19-6a4b75d9e6e2.json new file mode 100644 index 000000000..752a4f6d2 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/17326bb0-42c2-469a-ac19-6a4b75d9e6e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-nuslerp-001", + "id": "sthenno/tempesthenno-nuslerp-001", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7926 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6578 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4758 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.43 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5257 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json deleted file mode 100644 index b78a266ac..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-001/1762652580.5390232", - "retrieved_timestamp": "1762652580.5390239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-nuslerp-001", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-nuslerp-001", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7926468437080281 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577675676172494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256815159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/11574f56-6c34-48e4-8fb5-c58d42f07330.json b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/11574f56-6c34-48e4-8fb5-c58d42f07330.json new file mode 100644 index 000000000..26b47b3b3 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/11574f56-6c34-48e4-8fb5-c58d42f07330.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-0124/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-nuslerp-0124", + "id": "sthenno/tempesthenno-nuslerp-0124", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7004 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6469 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4116 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3901 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4859 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json deleted file mode 100644 index 3c7cb7d90..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-0124/1762652580.539254", - "retrieved_timestamp": "1762652580.5392551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-nuslerp-0124", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-nuslerp-0124", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7003982765728267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6468547741903091 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.411631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48592708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5352393617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json b/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json deleted file mode 100644 index a7db1cf26..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-ppo-ckpt40/1762652580.539634", - "retrieved_timestamp": "1762652580.539635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-ppo-ckpt40", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-ppo-ckpt40", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7923221496739761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6549600322869433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735649546827795 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4351770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5291722074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/8f728c51-15f9-422d-bbdb-4d976961ab9d.json b/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/8f728c51-15f9-422d-bbdb-4d976961ab9d.json new file mode 100644 index 000000000..e24916e51 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/8f728c51-15f9-422d-bbdb-4d976961ab9d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-ppo-ckpt40/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-ppo-ckpt40", + "id": "sthenno/tempesthenno-ppo-ckpt40", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7923 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.655 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4736 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5292 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json deleted file mode 100644 index 3a4401da9..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0309-ckpt10/1762652580.539892", - "retrieved_timestamp": "1762652580.539893", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-sft-0309-ckpt10", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-sft-0309-ckpt10", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7743620260907724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6551647758995857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4364166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257646276595744 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/8d6e4b5e-ad17-4390-bc6b-ab6581a62442.json b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/8d6e4b5e-ad17-4390-bc6b-ab6581a62442.json new file mode 100644 index 000000000..884df38c1 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/8d6e4b5e-ad17-4390-bc6b-ab6581a62442.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0309-ckpt10/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-sft-0309-ckpt10", + "id": "sthenno/tempesthenno-sft-0309-ckpt10", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7744 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6552 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4721 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4364 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5258 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json deleted file mode 100644 index b47230f13..000000000 --- a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0314-stage1-ckpt50/1762652580.540305", - "retrieved_timestamp": "1762652580.540307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393659933421101 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6601015847983588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/5e33bf05-6c67-4ecc-982d-7590e9953145.json b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/5e33bf05-6c67-4ecc-982d-7590e9953145.json new file mode 100644 index 000000000..bdf205c44 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/5e33bf05-6c67-4ecc-982d-7590e9953145.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0314-stage1-ckpt50/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempesthenno-sft-0314-stage1-ckpt50", + "id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7394 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6601 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4683 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3733 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json b/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json deleted file mode 100644 index 97ad8a5d9..000000000 --- a/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sthenno_tempestissimo-14b-0309/1762652580.540641", - "retrieved_timestamp": "1762652580.540643", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sthenno/tempestissimo-14b-0309", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempestissimo-14b-0309", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548781677061308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6587329699954757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43123958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528091755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/f55ae879-bd95-409c-a8a3-9a57cd615a31.json b/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/f55ae879-bd95-409c-a8a3-9a57cd615a31.json new file mode 100644 index 000000000..63af9669e --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/f55ae879-bd95-409c-a8a3-9a57cd615a31.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sthenno_tempestissimo-14b-0309/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tempestissimo-14b-0309", + "id": "sthenno/tempestissimo-14b-0309", + "developer": "sthenno", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7549 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4796 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3666 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4312 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5281 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json b/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json deleted file mode 100644 index bff4064ec..000000000 --- a/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/streamerbtw1002_Nexuim-R1-7B-Instruct/1762652580.541884", - "retrieved_timestamp": "1762652580.541885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "streamerbtw1002/Nexuim-R1-7B-Instruct", - "developer": "streamerbtw1002", - "inference_platform": "unknown", - "id": "streamerbtw1002/Nexuim-R1-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6934289906337407 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175174748142363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33555208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413813164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/b8426ac9-14f1-4e07-9c7e-b50cb2c7a1e3.json b/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/b8426ac9-14f1-4e07-9c7e-b50cb2c7a1e3.json new file mode 100644 index 000000000..c89456ac8 --- /dev/null +++ b/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/b8426ac9-14f1-4e07-9c7e-b50cb2c7a1e3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/streamerbtw1002_Nexuim-R1-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexuim-R1-7B-Instruct", + "id": "streamerbtw1002/Nexuim-R1-7B-Instruct", + "developer": "streamerbtw1002", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6934 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4456 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json b/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json deleted file mode 100644 index a1414e45b..000000000 --- a/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/1762652580.5421681", - "retrieved_timestamp": "1762652580.542169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", - "developer": "stupidity-ai", - "inference_platform": "unknown", - "id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23181048506850713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2822965317600308 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ] -} diff --git a/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/51fd90b0-0d5a-4199-ba5b-ff29eeeab06b.json b/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/51fd90b0-0d5a-4199-ba5b-ff29eeeab06b.json new file mode 100644 index 000000000..09f45ca03 --- /dev/null +++ b/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/51fd90b0-0d5a-4199-ba5b-ff29eeeab06b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-MultiMoose", + "id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", + "developer": "stupidity-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2318 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2823 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3485 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json deleted file mode 100644 index 96468cbf8..000000000 --- a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.1/1762652580.542475", - "retrieved_timestamp": "1762652580.5424771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Clarus-7B-v0.1", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Clarus-7B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7454110648634512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496611433440965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/c46e4fa1-afae-4b68-a13e-034b5cd2b779.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/c46e4fa1-afae-4b68-a13e-034b5cd2b779.json new file mode 100644 index 000000000..516714298 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/c46e4fa1-afae-4b68-a13e-034b5cd2b779.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Clarus-7B-v0.1", + "id": "suayptalha/Clarus-7B-v0.1", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5497 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.443 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/42cc06ed-20fc-4e84-836f-3d7243ec336d.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/42cc06ed-20fc-4e84-836f-3d7243ec336d.json new file mode 100644 index 000000000..c5b09072a --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/42cc06ed-20fc-4e84-836f-3d7243ec336d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Clarus-7B-v0.2", + "id": "suayptalha/Clarus-7B-v0.2", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7679 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.549 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4856 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json deleted file mode 100644 index 48380c062..000000000 --- a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.2/1762652580.542793", - "retrieved_timestamp": "1762652580.542794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Clarus-7B-v0.2", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Clarus-7B-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7679423928509688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490057426751466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48564954682779454 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399933510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json deleted file mode 100644 index 4d3842dd2..000000000 --- a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.3/1762652580.543006", - "retrieved_timestamp": "1762652580.543007", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Clarus-7B-v0.3", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Clarus-7B-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5525985716155296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44022916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4384973404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/aaa53387-af33-4454-95f0-3af85f4778c0.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/aaa53387-af33-4454-95f0-3af85f4778c0.json new file mode 100644 index 000000000..d58a5c307 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/aaa53387-af33-4454-95f0-3af85f4778c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Clarus-7B-v0.3", + "id": "suayptalha/Clarus-7B-v0.3", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5526 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4879 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4385 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/DeepSeek-R1-Distill-Llama-3B/465bca6d-b32a-4d34-9916-fc8b3166faa0.json b/data/hfopenllm_v2/suayptalha/DeepSeek-R1-Distill-Llama-3B/465bca6d-b32a-4d34-9916-fc8b3166faa0.json new file mode 100644 index 000000000..c2621e7be --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/DeepSeek-R1-Distill-Llama-3B/465bca6d-b32a-4d34-9916-fc8b3166faa0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_DeepSeek-R1-Distill-Llama-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepSeek-R1-Distill-Llama-3B", + "id": "suayptalha/DeepSeek-R1-Distill-Llama-3B", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7093 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2092 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json b/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json deleted file mode 100644 index 9cecbf940..000000000 --- a/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/1762652580.543463", - "retrieved_timestamp": "1762652580.543463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5590927389495824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48121875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406000664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/bf138f3d-09d9-4dea-aa43-5efc804bc775.json b/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/bf138f3d-09d9-4dea-aa43-5efc804bc775.json new file mode 100644 index 000000000..21408cebb --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/bf138f3d-09d9-4dea-aa43-5efc804bc775.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-Jessi-v0.4-7B-Slerp", + "id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5591 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3965 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4812 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json b/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json deleted file mode 100644 index abf964a5c..000000000 --- a/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_HomerCreativeAnvita-Mix-Qw7B/1762652580.543669", - "retrieved_timestamp": "1762652580.54367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807816593305763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5564653181490319 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44159375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4444813829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/cb4e944c-66f6-49f2-b1e0-d90454e34315.json b/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/cb4e944c-66f6-49f2-b1e0-d90454e34315.json new file mode 100644 index 000000000..52dfc2c70 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/cb4e944c-66f6-49f2-b1e0-d90454e34315.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_HomerCreativeAnvita-Mix-Qw7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HomerCreativeAnvita-Mix-Qw7B", + "id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7808 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5565 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.361 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4416 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4445 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/b2b6bc49-bda1-4a3e-a071-ec0a0bdc1313.json b/data/hfopenllm_v2/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/b2b6bc49-bda1-4a3e-a071-ec0a0bdc1313.json new file mode 100644 index 000000000..6760ae662 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/b2b6bc49-bda1-4a3e-a071-ec0a0bdc1313.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Komodo-Llama-3.2-3B-v2-fp16", + "id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6341 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3406 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json b/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json deleted file mode 100644 index 335fe98ae..000000000 --- a/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Lamarckvergence-14B/1762652580.544092", - "retrieved_timestamp": "1762652580.544093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Lamarckvergence-14B", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Lamarckvergence-14B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7655941790006073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.651698573892736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44215625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5283410904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/933f3d40-8726-418f-be2f-1f9686e9ab02.json b/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/933f3d40-8726-418f-be2f-1f9686e9ab02.json new file mode 100644 index 000000000..e8329fe1f --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/933f3d40-8726-418f-be2f-1f9686e9ab02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Lamarckvergence-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lamarckvergence-14B", + "id": "suayptalha/Lamarckvergence-14B", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7656 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6517 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.54 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4422 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5283 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/af1bf15c-7c5f-46fa-ba3a-821b521e86f4.json b/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/af1bf15c-7c5f-46fa-ba3a-821b521e86f4.json new file mode 100644 index 000000000..14484ddbd --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/af1bf15c-7c5f-46fa-ba3a-821b521e86f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Lix-14B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lix-14B-v0.1", + "id": "suayptalha/Lix-14B-v0.1", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6608 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5314 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json b/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json deleted file mode 100644 index d1e98c693..000000000 --- a/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Lix-14B-v0.1/1762652580.5443048", - "retrieved_timestamp": "1762652580.5443058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Lix-14B-v0.1", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Lix-14B-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7813313120298586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6607910825152539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5314162234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Luminis-phi-4/43df4336-1eb8-4df7-8309-1199aafc07b1.json b/data/hfopenllm_v2/suayptalha/Luminis-phi-4/43df4336-1eb8-4df7-8309-1199aafc07b1.json new file mode 100644 index 000000000..1b20a9640 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Luminis-phi-4/43df4336-1eb8-4df7-8309-1199aafc07b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Luminis-phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Luminis-phi-4", + "id": "suayptalha/Luminis-phi-4", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.69 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.692 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Maestro-10B/44ae222d-407c-4c8b-9b67-75440631f848.json b/data/hfopenllm_v2/suayptalha/Maestro-10B/44ae222d-407c-4c8b-9b67-75440631f848.json new file mode 100644 index 000000000..352d09f29 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Maestro-10B/44ae222d-407c-4c8b-9b67-75440631f848.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Maestro-10B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Maestro-10B", + "id": "suayptalha/Maestro-10B", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7768 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5746 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1911 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4218 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json b/data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json deleted file mode 100644 index bb92ea017..000000000 --- a/data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Maestro-10B/1762652580.5447612", - "retrieved_timestamp": "1762652580.5447621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Maestro-10B", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Maestro-10B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7767601076255447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5746090622656775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43972916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42179188829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/a87db0fe-3727-4ff1-875f-9edd3109f3a2.json b/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/a87db0fe-3727-4ff1-875f-9edd3109f3a2.json new file mode 100644 index 000000000..e6ce27e84 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/a87db0fe-3727-4ff1-875f-9edd3109f3a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/suayptalha_Rombos-2.5-T.E-8.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombos-2.5-T.E-8.1", + "id": "suayptalha/Rombos-2.5-T.E-8.1", + "developer": "suayptalha", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6925 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5515 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4446 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json b/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json deleted file mode 100644 index 2412a9d56..000000000 --- a/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/suayptalha_Rombos-2.5-T.E-8.1/1762652580.544959", - "retrieved_timestamp": "1762652580.544959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "suayptalha/Rombos-2.5-T.E-8.1", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Rombos-2.5-T.E-8.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6925047762159957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514641249478369 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445644946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/Qmerft/0c73e33a-7f6f-4925-970b-db289069d5ca.json b/data/hfopenllm_v2/sumink/Qmerft/0c73e33a-7f6f-4925-970b-db289069d5ca.json new file mode 100644 index 000000000..bcee9059c --- /dev/null +++ b/data/hfopenllm_v2/sumink/Qmerft/0c73e33a-7f6f-4925-970b-db289069d5ca.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_Qmerft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qmerft", + "id": "sumink/Qmerft", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1564 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2939 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3688 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1157 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json b/data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json deleted file mode 100644 index 3025c05a5..000000000 --- a/data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_Qmerft/1762652580.5451572", - "retrieved_timestamp": "1762652580.5451572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/Qmerft", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/Qmerft", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15639724819035714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29390930175643937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36876041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/Qwenftmodel/02bc7f5c-dc2f-4d8c-adcb-a89a34ff5549.json b/data/hfopenllm_v2/sumink/Qwenftmodel/02bc7f5c-dc2f-4d8c-adcb-a89a34ff5549.json new file mode 100644 index 000000000..4e22ca5b7 --- /dev/null +++ b/data/hfopenllm_v2/sumink/Qwenftmodel/02bc7f5c-dc2f-4d8c-adcb-a89a34ff5549.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_Qwenftmodel/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenftmodel", + "id": "sumink/Qwenftmodel", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1729 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3823 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3617 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/Qwenmplus/590c031c-2aa6-48e6-9b3f-68b1a585dd39.json b/data/hfopenllm_v2/sumink/Qwenmplus/590c031c-2aa6-48e6-9b3f-68b1a585dd39.json new file mode 100644 index 000000000..9502b1af4 --- /dev/null +++ b/data/hfopenllm_v2/sumink/Qwenmplus/590c031c-2aa6-48e6-9b3f-68b1a585dd39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_Qwenmplus/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwenmplus", + "id": "sumink/Qwenmplus", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.204 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3828 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1992 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/Qwensci/970c9fb8-c217-444b-a025-f4d9acdd679d.json b/data/hfopenllm_v2/sumink/Qwensci/970c9fb8-c217-444b-a025-f4d9acdd679d.json new file mode 100644 index 000000000..0a21c2f79 --- /dev/null +++ b/data/hfopenllm_v2/sumink/Qwensci/970c9fb8-c217-444b-a025-f4d9acdd679d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_Qwensci/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwensci", + "id": "sumink/Qwensci", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.174 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3282 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3609 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.126 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/bbhqwen/07a08dd7-822b-49ac-859b-d2fc75b9c88d.json b/data/hfopenllm_v2/sumink/bbhqwen/07a08dd7-822b-49ac-859b-d2fc75b9c88d.json new file mode 100644 index 000000000..c216d7b5c --- /dev/null +++ b/data/hfopenllm_v2/sumink/bbhqwen/07a08dd7-822b-49ac-859b-d2fc75b9c88d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbhqwen", + "id": "sumink/bbhqwen", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1809 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1617 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/bbhqwen2/0c0e9250-b75a-4549-9fb2-2b5c9ac2ef49.json b/data/hfopenllm_v2/sumink/bbhqwen2/0c0e9250-b75a-4549-9fb2-2b5c9ac2ef49.json new file mode 100644 index 000000000..1dc18132d --- /dev/null +++ b/data/hfopenllm_v2/sumink/bbhqwen2/0c0e9250-b75a-4549-9fb2-2b5c9ac2ef49.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbhqwen2", + "id": "sumink/bbhqwen2", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1149 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/bbhqwen3/2ae306b1-5409-4418-b5e4-50feff9dafe7.json b/data/hfopenllm_v2/sumink/bbhqwen3/2ae306b1-5409-4418-b5e4-50feff9dafe7.json new file mode 100644 index 000000000..d43a52330 --- /dev/null +++ b/data/hfopenllm_v2/sumink/bbhqwen3/2ae306b1-5409-4418-b5e4-50feff9dafe7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbhqwen3", + "id": "sumink/bbhqwen3", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1943 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3796 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/bbhqwen4/44bf5d75-afb2-48fa-a0fa-96d283b0ae94.json b/data/hfopenllm_v2/sumink/bbhqwen4/44bf5d75-afb2-48fa-a0fa-96d283b0ae94.json new file mode 100644 index 000000000..7b63e0384 --- /dev/null +++ b/data/hfopenllm_v2/sumink/bbhqwen4/44bf5d75-afb2-48fa-a0fa-96d283b0ae94.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbhqwen4", + "id": "sumink/bbhqwen4", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1449 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3199 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1509 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/bbhqwen5/e3860bb2-b2e4-4fdf-91cb-3343ad6440d7.json b/data/hfopenllm_v2/sumink/bbhqwen5/e3860bb2-b2e4-4fdf-91cb-3343ad6440d7.json new file mode 100644 index 000000000..d06e6e52f --- /dev/null +++ b/data/hfopenllm_v2/sumink/bbhqwen5/e3860bb2-b2e4-4fdf-91cb-3343ad6440d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbhqwen5", + "id": "sumink/bbhqwen5", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1522 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2913 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0023 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1131 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/bbhqwen6/6369fceb-148f-4491-9488-420182a9838f.json b/data/hfopenllm_v2/sumink/bbhqwen6/6369fceb-148f-4491-9488-420182a9838f.json new file mode 100644 index 000000000..80f188511 --- /dev/null +++ b/data/hfopenllm_v2/sumink/bbhqwen6/6369fceb-148f-4491-9488-420182a9838f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen6/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bbhqwen6", + "id": "sumink/bbhqwen6", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1893 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2782 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1153 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/flflmillama/045c814e-a30f-4b6b-b4f4-382dee4063b7.json b/data/hfopenllm_v2/sumink/flflmillama/045c814e-a30f-4b6b-b4f4-382dee4063b7.json new file mode 100644 index 000000000..05e8e4568 --- /dev/null +++ b/data/hfopenllm_v2/sumink/flflmillama/045c814e-a30f-4b6b-b4f4-382dee4063b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_flflmillama/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flflmillama", + "id": "sumink/flflmillama", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3851 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2096 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/ftgpt/59d2b375-5696-47d0-9c96-1a826c08bea0.json b/data/hfopenllm_v2/sumink/ftgpt/59d2b375-5696-47d0-9c96-1a826c08bea0.json new file mode 100644 index 000000000..697ef4b76 --- /dev/null +++ b/data/hfopenllm_v2/sumink/ftgpt/59d2b375-5696-47d0-9c96-1a826c08bea0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_ftgpt/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ftgpt", + "id": "sumink/ftgpt", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1172 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/llamaft/ff601b4f-24a1-4376-8c5e-5bda2ea88f65.json b/data/hfopenllm_v2/sumink/llamaft/ff601b4f-24a1-4376-8c5e-5bda2ea88f65.json new file mode 100644 index 000000000..69e1ab691 --- /dev/null +++ b/data/hfopenllm_v2/sumink/llamaft/ff601b4f-24a1-4376-8c5e-5bda2ea88f65.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_llamaft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llamaft", + "id": "sumink/llamaft", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1609 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3763 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2114 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/llamamerge/8c043ba8-f7dd-4cc8-a3b1-7201042b8dc8.json b/data/hfopenllm_v2/sumink/llamamerge/8c043ba8-f7dd-4cc8-a3b1-7201042b8dc8.json new file mode 100644 index 000000000..f90932f9e --- /dev/null +++ b/data/hfopenllm_v2/sumink/llamamerge/8c043ba8-f7dd-4cc8-a3b1-7201042b8dc8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_llamamerge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llamamerge", + "id": "sumink/llamamerge", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2672 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4632 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.424 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.259 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/llftfl7/ce27dff4-9ca7-47cb-bc18-b5dd167c72a2.json b/data/hfopenllm_v2/sumink/llftfl7/ce27dff4-9ca7-47cb-bc18-b5dd167c72a2.json new file mode 100644 index 000000000..454849f8b --- /dev/null +++ b/data/hfopenllm_v2/sumink/llftfl7/ce27dff4-9ca7-47cb-bc18-b5dd167c72a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_llftfl7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llftfl7", + "id": "sumink/llftfl7", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1714 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3786 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3632 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1743 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json b/data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json deleted file mode 100644 index ddca913eb..000000000 --- a/data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_llftfl7/1762652580.548197", - "retrieved_timestamp": "1762652580.548198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/llftfl7", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/llftfl7", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17143512546709397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37864273336631166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36320833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17428523936170212 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json b/data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json deleted file mode 100644 index 917dc6f2c..000000000 --- a/data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_llmer/1762652580.548394", - "retrieved_timestamp": "1762652580.548395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/llmer", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/llmer", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191132860809319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4884590875207178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35289228723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/llmer/d69ecbfa-5036-48b8-8fed-f9162e2857f5.json b/data/hfopenllm_v2/sumink/llmer/d69ecbfa-5036-48b8-8fed-f9162e2857f5.json new file mode 100644 index 000000000..2b5e1209f --- /dev/null +++ b/data/hfopenllm_v2/sumink/llmer/d69ecbfa-5036-48b8-8fed-f9162e2857f5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_llmer/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llmer", + "id": "sumink/llmer", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3191 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4885 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.065 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json b/data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json deleted file mode 100644 index a0395c147..000000000 --- a/data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_qwft/1762652580.548597", - "retrieved_timestamp": "1762652580.548597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/qwft", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/qwft", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11965252197502627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30021752093452153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11294880319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/qwft/b5924329-c182-482a-bee8-22fcb348281d.json b/data/hfopenllm_v2/sumink/qwft/b5924329-c182-482a-bee8-22fcb348281d.json new file mode 100644 index 000000000..734732a61 --- /dev/null +++ b/data/hfopenllm_v2/sumink/qwft/b5924329-c182-482a-bee8-22fcb348281d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_qwft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwft", + "id": "sumink/qwft", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1197 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3002 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1129 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json b/data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json deleted file mode 100644 index 8f8d099ca..000000000 --- a/data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_qwmer/1762652580.54879", - "retrieved_timestamp": "1762652580.548791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/qwmer", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/qwmer", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22124407682726277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298800979582788 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22149268617021275 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/qwmer/a6a6b6f2-ac28-4c4a-806e-8abe8c7f9190.json b/data/hfopenllm_v2/sumink/qwmer/a6a6b6f2-ac28-4c4a-806e-8abe8c7f9190.json new file mode 100644 index 000000000..88f04b8bd --- /dev/null +++ b/data/hfopenllm_v2/sumink/qwmer/a6a6b6f2-ac28-4c4a-806e-8abe8c7f9190.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_qwmer/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwmer", + "id": "sumink/qwmer", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4299 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0008 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4032 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2215 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json b/data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json deleted file mode 100644 index aa0887316..000000000 --- a/data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_solarmer3/1762652580.5489879", - "retrieved_timestamp": "1762652580.5489888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/solarmer3", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/solarmer3", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3741428299135183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265990319952963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44013541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.332280585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/solarmer3/b904301c-d0c0-41a4-b92e-92b2d7c9c13a.json b/data/hfopenllm_v2/sumink/solarmer3/b904301c-d0c0-41a4-b92e-92b2d7c9c13a.json new file mode 100644 index 000000000..ac7e81ef1 --- /dev/null +++ b/data/hfopenllm_v2/sumink/solarmer3/b904301c-d0c0-41a4-b92e-92b2d7c9c13a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_solarmer3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "solarmer3", + "id": "sumink/solarmer3", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3741 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5266 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0582 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json b/data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json deleted file mode 100644 index 332c3be3d..000000000 --- a/data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_somer/1762652580.549191", - "retrieved_timestamp": "1762652580.549192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/somer", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/somer", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29902990731259727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519370328606347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.465 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447473404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/somer/b5de0218-91dc-487a-be90-70f8bcb64803.json b/data/hfopenllm_v2/sumink/somer/b5de0218-91dc-487a-be90-70f8bcb64803.json new file mode 100644 index 000000000..3dadaebd8 --- /dev/null +++ b/data/hfopenllm_v2/sumink/somer/b5de0218-91dc-487a-be90-70f8bcb64803.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_somer/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "somer", + "id": "sumink/somer", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.299 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5194 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3447 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/somer2/3870f65b-3429-45c2-846f-6af30155a78b.json b/data/hfopenllm_v2/sumink/somer2/3870f65b-3429-45c2-846f-6af30155a78b.json new file mode 100644 index 000000000..f13d9325b --- /dev/null +++ b/data/hfopenllm_v2/sumink/somer2/3870f65b-3429-45c2-846f-6af30155a78b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_somer2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "somer2", + "id": "sumink/somer2", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3132 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5167 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3433 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json b/data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json deleted file mode 100644 index 59a2b9d96..000000000 --- a/data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_somer2/1762652580.549396", - "retrieved_timestamp": "1762652580.549397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/somer2", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/somer2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3132433055404106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166793474130525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46630208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34325132978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json b/data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json deleted file mode 100644 index f1d5456d6..000000000 --- a/data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sumink_somerft/1762652580.5496058", - "retrieved_timestamp": "1762652580.5496068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sumink/somerft", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/somerft", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14305819669587805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093455213252133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/sumink/somerft/d6c33a51-be09-4cb5-9942-4348668d3e5e.json b/data/hfopenllm_v2/sumink/somerft/d6c33a51-be09-4cb5-9942-4348668d3e5e.json new file mode 100644 index 000000000..fd584a2ba --- /dev/null +++ b/data/hfopenllm_v2/sumink/somerft/d6c33a51-be09-4cb5-9942-4348668d3e5e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sumink_somerft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "somerft", + "id": "sumink/somerft", + "developer": "sumink", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1431 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/1ccd36ee-445a-4861-8835-d602973148fc.json b/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/1ccd36ee-445a-4861-8835-d602973148fc.json new file mode 100644 index 000000000..af3751a29 --- /dev/null +++ b/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/1ccd36ee-445a-4861-8835-d602973148fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/sunbaby_BrainCog-8B-0.1-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BrainCog-8B-0.1-Instruct", + "id": "sunbaby/BrainCog-8B-0.1-Instruct", + "developer": "sunbaby", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4618 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3656 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2858 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json b/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json deleted file mode 100644 index 2f3e46ef7..000000000 --- a/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/sunbaby_BrainCog-8B-0.1-Instruct/1762652580.549814", - "retrieved_timestamp": "1762652580.549815", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "sunbaby/BrainCog-8B-0.1-Instruct", - "developer": "sunbaby", - "inference_platform": "unknown", - "id": "sunbaby/BrainCog-8B-0.1-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253004250943053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46182179983247446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28582114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/4c7ef4ee-3a7e-4f15-8a4a-c5853b1c6a47.json b/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/4c7ef4ee-3a7e-4f15-8a4a-c5853b1c6a47.json new file mode 100644 index 000000000..8d0457f9c --- /dev/null +++ b/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/4c7ef4ee-3a7e-4f15-8a4a-c5853b1c6a47.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", + "id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", + "developer": "swap-uniba", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4815 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3723 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json b/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json deleted file mode 100644 index 9b20634d8..000000000 --- a/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/1762652580.550269", - "retrieved_timestamp": "1762652580.5502698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", - "developer": "swap-uniba", - "inference_platform": "unknown", - "id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4815046299374548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4935698792285044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43873958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/synergetic/FrankenQwen2.5-14B/6a69202c-1c68-43e4-bd45-bbc2ff2db743.json b/data/hfopenllm_v2/synergetic/FrankenQwen2.5-14B/6a69202c-1c68-43e4-bd45-bbc2ff2db743.json new file mode 100644 index 000000000..5de286878 --- /dev/null +++ b/data/hfopenllm_v2/synergetic/FrankenQwen2.5-14B/6a69202c-1c68-43e4-bd45-bbc2ff2db743.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/synergetic_FrankenQwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FrankenQwen2.5-14B", + "id": "synergetic/FrankenQwen2.5-14B", + "developer": "synergetic", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 16.972 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1869 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6048 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3843 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json b/data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json deleted file mode 100644 index 640bd9e98..000000000 --- a/data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/talha2001_Beast-Soul-new/1762652580.5509062", - "retrieved_timestamp": "1762652580.5509079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "talha2001/Beast-Soul-new", - "developer": "talha2001", - "inference_platform": "unknown", - "id": "talha2001/Beast-Soul-new", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4853510906616666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227143628884523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3101728723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/talha2001/Beast-Soul-new/a053d6a3-05d4-4d0b-a9b8-7865cf7ac612.json b/data/hfopenllm_v2/talha2001/Beast-Soul-new/a053d6a3-05d4-4d0b-a9b8-7865cf7ac612.json new file mode 100644 index 000000000..3ca4e52f3 --- /dev/null +++ b/data/hfopenllm_v2/talha2001/Beast-Soul-new/a053d6a3-05d4-4d0b-a9b8-7865cf7ac612.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/talha2001_Beast-Soul-new/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Beast-Soul-new", + "id": "talha2001/Beast-Soul-new", + "developer": "talha2001", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4854 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5227 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4459 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3102 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json deleted file mode 100644 index 48915a2a7..000000000 --- a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/1762652580.5513222", - "retrieved_timestamp": "1762652580.5513222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", - "developer": "tangledgroup", - "inference_platform": "unknown", - "id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15090182936829835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31434444692284963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37613541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ] -} diff --git a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/f76d3d30-4fce-48a9-a26b-7d714fff1d29.json b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/f76d3d30-4fce-48a9-a26b-7d714fff1d29.json new file mode 100644 index 000000000..2cab635d2 --- /dev/null +++ b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/f76d3d30-4fce-48a9-a26b-7d714fff1d29.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tangled-llama-pints-1.5b-v0.1-instruct", + "id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", + "developer": "tangledgroup", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1509 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3143 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2399 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3761 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json deleted file mode 100644 index fd4008840..000000000 --- a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/1762652580.551594", - "retrieved_timestamp": "1762652580.551595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", - "developer": "tangledgroup", - "inference_platform": "unknown", - "id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.5 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1724092075692496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158349391752727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/eb38a092-1b56-4348-8188-baa2243f7046.json b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/eb38a092-1b56-4348-8188-baa2243f7046.json new file mode 100644 index 000000000..306584ab5 --- /dev/null +++ b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/eb38a092-1b56-4348-8188-baa2243f7046.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tangled-llama-pints-1.5b-v0.2-instruct", + "id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", + "developer": "tangledgroup", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.5 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1724 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3158 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3643 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1117 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/1c4cfb94-fc66-4fe2-9879-78683abe654f.json b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/1c4cfb94-fc66-4fe2-9879-78683abe654f.json new file mode 100644 index 000000000..c4123e916 --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/1c4cfb94-fc66-4fe2-9879-78683abe654f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lambda-gemma-2-9b-dpo", + "id": "tanliboy/lambda-gemma-2-9b-dpo", + "developer": "tanliboy", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4501 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5472 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0944 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4017 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3792 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/2deef730-c37b-46ca-82b7-de38ae724fd4.json b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/2deef730-c37b-46ca-82b7-de38ae724fd4.json new file mode 100644 index 000000000..a0e743c57 --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/2deef730-c37b-46ca-82b7-de38ae724fd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lambda-gemma-2-9b-dpo", + "id": "tanliboy/lambda-gemma-2-9b-dpo", + "developer": "tanliboy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1829 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5488 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4056 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3805 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json deleted file mode 100644 index 304253761..000000000 --- a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1762652580.551808", - "retrieved_timestamp": "1762652580.551809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tanliboy/lambda-gemma-2-9b-dpo", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-gemma-2-9b-dpo", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45008023156336296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547172399190412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.379155585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json deleted file mode 100644 index 0ab90ab86..000000000 --- a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1762652580.5520582", - "retrieved_timestamp": "1762652580.5520582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tanliboy/lambda-gemma-2-9b-dpo", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-gemma-2-9b-dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18292463995531855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5487911206515993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40562499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804853723404255 - } - } - ] -} diff --git a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json deleted file mode 100644 index bb0992436..000000000 --- a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-14b-dpo-test/1762652580.5523891", - "retrieved_timestamp": "1762652580.5523908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tanliboy/lambda-qwen2.5-14b-dpo-test", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-qwen2.5-14b-dpo-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8231215397367873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6393505282981286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460725075528701 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4847905585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/13a92beb-a8a4-4853-b2f5-1b09d3e2a64a.json b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/13a92beb-a8a4-4853-b2f5-1b09d3e2a64a.json new file mode 100644 index 000000000..0ef9f7333 --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/13a92beb-a8a4-4853-b2f5-1b09d3e2a64a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-14b-dpo-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lambda-qwen2.5-14b-dpo-test", + "id": "tanliboy/lambda-qwen2.5-14b-dpo-test", + "developer": "tanliboy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8231 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6394 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5461 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3624 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4848 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/36cf5b59-5369-4baf-80c1-3a47678eb5cb.json b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/36cf5b59-5369-4baf-80c1-3a47678eb5cb.json new file mode 100644 index 000000000..ca6c94a3a --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/36cf5b59-5369-4baf-80c1-3a47678eb5cb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-32b-dpo-test/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lambda-qwen2.5-32b-dpo-test", + "id": "tanliboy/lambda-qwen2.5-32b-dpo-test", + "developer": "tanliboy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8084 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6764 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6103 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4274 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5657 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json deleted file mode 100644 index 4336fe150..000000000 --- a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-32b-dpo-test/1762652580.552684", - "retrieved_timestamp": "1762652580.552685", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tanliboy/lambda-qwen2.5-32b-dpo-test", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-qwen2.5-32b-dpo-test", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8083839767372794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6763904009446838 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42742708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.565658244680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json b/data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json deleted file mode 100644 index 9350ebb06..000000000 --- a/data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tannedbum_Ellaria-9B/1762652580.5529752", - "retrieved_timestamp": "1762652580.552976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tannedbum/Ellaria-9B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/Ellaria-9B", - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825802204816554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5942102115140485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42054521276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/tannedbum/Ellaria-9B/fced3ef1-fb69-47fe-bf68-3efe72db3142.json b/data/hfopenllm_v2/tannedbum/Ellaria-9B/fced3ef1-fb69-47fe-bf68-3efe72db3142.json new file mode 100644 index 000000000..769518cca --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/Ellaria-9B/fced3ef1-fb69-47fe-bf68-3efe72db3142.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tannedbum_Ellaria-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ellaria-9B", + "id": "tannedbum/Ellaria-9B", + "developer": "tannedbum", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7826 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5942 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4151 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4205 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json b/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json deleted file mode 100644 index af1c51ff6..000000000 --- a/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-Maid-8B/1762652580.553287", - "retrieved_timestamp": "1762652580.553288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tannedbum/L3-Nymeria-Maid-8B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/L3-Nymeria-Maid-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7250029920610646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5146055785516804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37505208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37466755319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/7a83d75a-332e-476a-b0f7-986b2ec9cc5d.json b/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/7a83d75a-332e-476a-b0f7-986b2ec9cc5d.json new file mode 100644 index 000000000..f95541ce4 --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/7a83d75a-332e-476a-b0f7-986b2ec9cc5d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-Maid-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Nymeria-Maid-8B", + "id": "tannedbum/L3-Nymeria-Maid-8B", + "developer": "tannedbum", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.725 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5146 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3751 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3747 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json b/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json deleted file mode 100644 index 8620228c0..000000000 --- a/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-v2-8B/1762652580.553518", - "retrieved_timestamp": "1762652580.553519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tannedbum/L3-Nymeria-v2-8B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/L3-Nymeria-v2-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7168346653545925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5224198261531375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.369875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/6f413d72-cd9f-435c-b13e-9cec14edeb5c.json b/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/6f413d72-cd9f-435c-b13e-9cec14edeb5c.json new file mode 100644 index 000000000..c7b09040b --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/6f413d72-cd9f-435c-b13e-9cec14edeb5c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-v2-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Nymeria-v2-8B", + "id": "tannedbum/L3-Nymeria-v2-8B", + "developer": "tannedbum", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7168 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5224 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3699 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3753 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/a7822bbf-bc23-437d-8e5b-32fb06d3a9ec.json b/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/a7822bbf-bc23-437d-8e5b-32fb06d3a9ec.json new file mode 100644 index 000000000..c71fb151f --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/a7822bbf-bc23-437d-8e5b-32fb06d3a9ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tannedbum_L3-Rhaenys-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-Rhaenys-8B", + "id": "tannedbum/L3-Rhaenys-8B", + "developer": "tannedbum", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7363 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5299 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2978 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3799 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json b/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json deleted file mode 100644 index 87d0ce651..000000000 --- a/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tannedbum_L3-Rhaenys-8B/1762652580.553731", - "retrieved_timestamp": "1762652580.5537322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tannedbum/L3-Rhaenys-8B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/L3-Rhaenys-8B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7362686560548235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299209893116719 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3799035904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/teknium/CollectiveCognition-v1.1-Mistral-7B/0b19508c-4996-4fb7-b0e0-9fa952854fa3.json b/data/hfopenllm_v2/teknium/CollectiveCognition-v1.1-Mistral-7B/0b19508c-4996-4fb7-b0e0-9fa952854fa3.json new file mode 100644 index 000000000..df3839754 --- /dev/null +++ b/data/hfopenllm_v2/teknium/CollectiveCognition-v1.1-Mistral-7B/0b19508c-4996-4fb7-b0e0-9fa952854fa3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/teknium_CollectiveCognition-v1.1-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CollectiveCognition-v1.1-Mistral-7B", + "id": "teknium/CollectiveCognition-v1.1-Mistral-7B", + "developer": "teknium", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.279 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4493 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2837 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/teknium/OpenHermes-13B/447c22c1-8929-420f-b59b-01ab32a22281.json b/data/hfopenllm_v2/teknium/OpenHermes-13B/447c22c1-8929-420f-b59b-01ab32a22281.json new file mode 100644 index 000000000..217a4b6f0 --- /dev/null +++ b/data/hfopenllm_v2/teknium/OpenHermes-13B/447c22c1-8929-420f-b59b-01ab32a22281.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-13B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenHermes-13B", + "id": "teknium/OpenHermes-13B", + "developer": "teknium", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4206 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4043 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json b/data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json deleted file mode 100644 index 963669f6e..000000000 --- a/data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-13B/1762652580.5542011", - "retrieved_timestamp": "1762652580.554202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "teknium/OpenHermes-13B", - "developer": "teknium", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-13B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2668065178171696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42064384521911524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4042604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23894614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/teknium/OpenHermes-2-Mistral-7B/ab3dbe43-658e-4c8a-a399-b3d070d467ba.json b/data/hfopenllm_v2/teknium/OpenHermes-2-Mistral-7B/ab3dbe43-658e-4c8a-a399-b3d070d467ba.json new file mode 100644 index 000000000..75e18465b --- /dev/null +++ b/data/hfopenllm_v2/teknium/OpenHermes-2-Mistral-7B/ab3dbe43-658e-4c8a-a399-b3d070d467ba.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenHermes-2-Mistral-7B", + "id": "teknium/OpenHermes-2-Mistral-7B", + "developer": "teknium", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5286 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4948 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.452 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2931 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/teknium/OpenHermes-2.5-Mistral-7B/ee5c87a4-aa06-4728-a9bf-2fc35284b987.json b/data/hfopenllm_v2/teknium/OpenHermes-2.5-Mistral-7B/ee5c87a4-aa06-4728-a9bf-2fc35284b987.json new file mode 100644 index 000000000..1bb9bc4d1 --- /dev/null +++ b/data/hfopenllm_v2/teknium/OpenHermes-2.5-Mistral-7B/ee5c87a4-aa06-4728-a9bf-2fc35284b987.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2.5-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenHermes-2.5-Mistral-7B", + "id": "teknium/OpenHermes-2.5-Mistral-7B", + "developer": "teknium", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5571 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.487 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0506 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json b/data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json deleted file mode 100644 index 272b41a90..000000000 --- a/data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-7B/1762652580.5548952", - "retrieved_timestamp": "1762652580.5548952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "teknium/OpenHermes-7B", - "developer": "teknium", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-7B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1812513021006485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362033648602934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19331781914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/teknium/OpenHermes-7B/6a1a58f6-e399-4ac3-a516-f02a37b6ff68.json b/data/hfopenllm_v2/teknium/OpenHermes-7B/6a1a58f6-e399-4ac3-a516-f02a37b6ff68.json new file mode 100644 index 000000000..58a599dbe --- /dev/null +++ b/data/hfopenllm_v2/teknium/OpenHermes-7B/6a1a58f6-e399-4ac3-a516-f02a37b6ff68.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenHermes-7B", + "id": "teknium/OpenHermes-7B", + "developer": "teknium", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1813 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.362 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4324 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1933 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/9e2bfd77-b73e-436f-ad50-ccfd379cd3f2.json b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/9e2bfd77-b73e-436f-ad50-ccfd379cd3f2.json new file mode 100644 index 000000000..052c9b9d5 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/9e2bfd77-b73e-436f-ad50-ccfd379cd3f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon3-10b-tensopolis-v1", + "id": "tensopolis/falcon3-10b-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2749 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.442 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json deleted file mode 100644 index a407a8406..000000000 --- a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v1/1762652580.555104", - "retrieved_timestamp": "1762652580.555105", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/falcon3-10b-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/falcon3-10b-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.618226655000786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419880319148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/100cf60a-c43c-4b3a-a667-a45cffdd562a.json b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/100cf60a-c43c-4b3a-a667-a45cffdd562a.json new file mode 100644 index 000000000..67f93fc67 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/100cf60a-c43c-4b3a-a667-a45cffdd562a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon3-10b-tensopolis-v2", + "id": "tensopolis/falcon3-10b-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7792 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6182 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2666 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3272 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4297 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json deleted file mode 100644 index 1760edaa2..000000000 --- a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v2/1762652580.555352", - "retrieved_timestamp": "1762652580.5553532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/falcon3-10b-tensopolis-v2", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/falcon3-10b-tensopolis-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7792080568447275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.618226655000786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26661631419939574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424035904255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/2088fca7-11d7-47de-808d-d47da0caad0f.json b/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/2088fca7-11d7-47de-808d-d47da0caad0f.json new file mode 100644 index 000000000..4284ac283 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/2088fca7-11d7-47de-808d-d47da0caad0f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_lamarckvergence-14b-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lamarckvergence-14b-tensopolis-v1", + "id": "tensopolis/lamarckvergence-14b-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6561 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5166 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3607 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.525 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json b/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json deleted file mode 100644 index ea906de3f..000000000 --- a/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_lamarckvergence-14b-tensopolis-v1/1762652580.555553", - "retrieved_timestamp": "1762652580.5555542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/lamarckvergence-14b-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/lamarckvergence-14b-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7603735865281896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6561154329558933 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166163141993958 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44745833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250166223404256 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/mistral-small-2501-tensopolis-v1/bf0b3560-9d38-406a-ad30-5fd157f0fe43.json b/data/hfopenllm_v2/tensopolis/mistral-small-2501-tensopolis-v1/bf0b3560-9d38-406a-ad30-5fd157f0fe43.json new file mode 100644 index 000000000..26e8accb5 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/mistral-small-2501-tensopolis-v1/bf0b3560-9d38-406a-ad30-5fd157f0fe43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-2501-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-small-2501-tensopolis-v1", + "id": "tensopolis/mistral-small-2501-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6475 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4441 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.428 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4465 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/mistral-small-r1-tensopolis/9ce12fbc-00f7-4cc8-bd9d-67ead83a0801.json b/data/hfopenllm_v2/tensopolis/mistral-small-r1-tensopolis/9ce12fbc-00f7-4cc8-bd9d-67ead83a0801.json new file mode 100644 index 000000000..1b98fd82a --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/mistral-small-r1-tensopolis/9ce12fbc-00f7-4cc8-bd9d-67ead83a0801.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-r1-tensopolis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-small-r1-tensopolis", + "id": "tensopolis/mistral-small-r1-tensopolis", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4622 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5436 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2908 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4035 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/phi-4-tensopolis-v1/14501de3-dac0-44af-8c17-7abcd9bbba8b.json b/data/hfopenllm_v2/tensopolis/phi-4-tensopolis-v1/14501de3-dac0-44af-8c17-7abcd9bbba8b.json new file mode 100644 index 000000000..498b1170f --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/phi-4-tensopolis-v1/14501de3-dac0-44af-8c17-7abcd9bbba8b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_phi-4-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-tensopolis-v1", + "id": "tensopolis/phi-4-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6767 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6872 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.494 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/qwen2.5-14b-tensopolis-v1/c9db8ce4-6f0d-4c13-8484-6fca9e9c3798.json b/data/hfopenllm_v2/tensopolis/qwen2.5-14b-tensopolis-v1/c9db8ce4-6f0d-4c13-8484-6fca9e9c3798.json new file mode 100644 index 000000000..32b5e1f0e --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/qwen2.5-14b-tensopolis-v1/c9db8ce4-6f0d-4c13-8484-6fca9e9c3798.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-14b-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-14b-tensopolis-v1", + "id": "tensopolis/qwen2.5-14b-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6364 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3347 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4193 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4911 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/qwen2.5-3b-or1-tensopolis/8c6c06be-bbc6-4307-ba5b-336dc2bb466f.json b/data/hfopenllm_v2/tensopolis/qwen2.5-3b-or1-tensopolis/8c6c06be-bbc6-4307-ba5b-336dc2bb466f.json new file mode 100644 index 000000000..a7e6952b8 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/qwen2.5-3b-or1-tensopolis/8c6c06be-bbc6-4307-ba5b-336dc2bb466f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-3b-or1-tensopolis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-3b-or1-tensopolis", + "id": "tensopolis/qwen2.5-3b-or1-tensopolis", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3749 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v1/1326ff61-d0b4-46eb-9bcf-f978166e622b.json b/data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v1/1326ff61-d0b4-46eb-9bcf-f978166e622b.json new file mode 100644 index 000000000..2517471e4 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v1/1326ff61-d0b4-46eb-9bcf-f978166e622b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-7b-tensopolis-v1", + "id": "tensopolis/qwen2.5-7b-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7661 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5379 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4339 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4269 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v2/4c9e829f-7a99-4d61-8730-7457215a4fd6.json b/data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v2/4c9e829f-7a99-4d61-8730-7457215a4fd6.json new file mode 100644 index 000000000..eb25ba8f2 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/qwen2.5-7b-tensopolis-v2/4c9e829f-7a99-4d61-8730-7457215a4fd6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen2.5-7b-tensopolis-v2", + "id": "tensopolis/qwen2.5-7b-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7521 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4819 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2903 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4246 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json deleted file mode 100644 index 2182f85af..000000000 --- a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v1/1762652580.557624", - "retrieved_timestamp": "1762652580.557625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/virtuoso-lite-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-lite-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.806910109620252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.610185430846048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4582395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4434840425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/afc24d42-6d25-4036-8f22-fcf944b481b7.json b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/afc24d42-6d25-4036-8f22-fcf944b481b7.json new file mode 100644 index 000000000..03ab187f1 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/afc24d42-6d25-4036-8f22-fcf944b481b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "virtuoso-lite-tensopolis-v1", + "id": "tensopolis/virtuoso-lite-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8069 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2545 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4582 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4435 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/6f6db681-991e-408b-8d4e-71fff9e1c974.json b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/6f6db681-991e-408b-8d4e-71fff9e1c974.json new file mode 100644 index 000000000..21561cd85 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/6f6db681-991e-408b-8d4e-71fff9e1c974.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "virtuoso-lite-tensopolis-v2", + "id": "tensopolis/virtuoso-lite-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8029 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4595 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.444 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json deleted file mode 100644 index fac775b39..000000000 --- a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v2/1762652580.5578399", - "retrieved_timestamp": "1762652580.5578408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/virtuoso-lite-tensopolis-v2", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-lite-tensopolis-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8029384255996312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100187641793813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44398271276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json deleted file mode 100644 index 53aef1b7a..000000000 --- a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v1/1762652580.5582058", - "retrieved_timestamp": "1762652580.558207", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/virtuoso-small-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-small-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7856276900845313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6415395136436205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43263541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4968417553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/f3fa76bf-f11c-4dee-9b9f-00f1ec793dac.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/f3fa76bf-f11c-4dee-9b9f-00f1ec793dac.json new file mode 100644 index 000000000..636b8d846 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/f3fa76bf-f11c-4dee-9b9f-00f1ec793dac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "virtuoso-small-tensopolis-v1", + "id": "tensopolis/virtuoso-small-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6415 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4968 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/77b457d9-4957-4f0d-a8d3-e005ae382239.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/77b457d9-4957-4f0d-a8d3-e005ae382239.json new file mode 100644 index 000000000..243e5f94d --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/77b457d9-4957-4f0d-a8d3-e005ae382239.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "virtuoso-small-tensopolis-v2", + "id": "tensopolis/virtuoso-small-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.802 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6516 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4352 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5154 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json deleted file mode 100644 index 062d990aa..000000000 --- a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v2/1762652580.5584881", - "retrieved_timestamp": "1762652580.558489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/virtuoso-small-tensopolis-v2", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-small-tensopolis-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8020142111818863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6515835977499008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38746223564954685 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515375664893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/11474a7a-73a6-4a3f-8bcb-bef783e12a2b.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/11474a7a-73a6-4a3f-8bcb-bef783e12a2b.json new file mode 100644 index 000000000..24a089280 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/11474a7a-73a6-4a3f-8bcb-bef783e12a2b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-v2-tensopolis-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "virtuoso-small-v2-tensopolis-v1", + "id": "tensopolis/virtuoso-small-v2-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8419 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6545 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4524 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json deleted file mode 100644 index e5ffaba05..000000000 --- a/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-v2-tensopolis-v1/1762652580.558718", - "retrieved_timestamp": "1762652580.558719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensopolis/virtuoso-small-v2-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-small-v2-tensopolis-v1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8419061423689145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544753426578069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45092708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175365691489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/23cc1e7f-0994-43a5-8403-5361a2976285.json b/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/23cc1e7f-0994-43a5-8403-5361a2976285.json new file mode 100644 index 000000000..fa1deaf6f --- /dev/null +++ b/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/23cc1e7f-0994-43a5-8403-5361a2976285.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tensoropera_Fox-1-1.6B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Fox-1-1.6B", + "id": "tensoropera/Fox-1-1.6B", + "developer": "tensoropera", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.665 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2766 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3307 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.355 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1371 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json b/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json deleted file mode 100644 index fbd711568..000000000 --- a/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tensoropera_Fox-1-1.6B/1762652580.558935", - "retrieved_timestamp": "1762652580.558936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tensoropera/Fox-1-1.6B", - "developer": "tensoropera", - "inference_platform": "unknown", - "id": "tensoropera/Fox-1-1.6B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.665 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27659831469390106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307369914593792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35498958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1371343085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/tenyx/Llama3-TenyxChat-70B/88c257d3-d5c1-4e1f-bbc8-9fc6bd65e15e.json b/data/hfopenllm_v2/tenyx/Llama3-TenyxChat-70B/88c257d3-d5c1-4e1f-bbc8-9fc6bd65e15e.json new file mode 100644 index 000000000..f42d9a03a --- /dev/null +++ b/data/hfopenllm_v2/tenyx/Llama3-TenyxChat-70B/88c257d3-d5c1-4e1f-bbc8-9fc6bd65e15e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tenyx_Llama3-TenyxChat-70B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-TenyxChat-70B", + "id": "tenyx/Llama3-TenyxChat-70B", + "developer": "tenyx", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8087 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6511 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2356 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.426 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.521 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json b/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json deleted file mode 100644 index 96bfa693b..000000000 --- a/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/1762652580.559671", - "retrieved_timestamp": "1762652580.559671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", - "developer": "theo77186", - "inference_platform": "unknown", - "id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6101477413263474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007976986224548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/ec4c2032-8fc0-448a-a7c4-ee9b35b642db.json b/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/ec4c2032-8fc0-448a-a7c4-ee9b35b642db.json new file mode 100644 index 000000000..21cc408b5 --- /dev/null +++ b/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/ec4c2032-8fc0-448a-a7c4-ee9b35b642db.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-7B-Instruct-20241106", + "id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", + "developer": "theo77186", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6101 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5008 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2919 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json b/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json deleted file mode 100644 index 99e1ea4cc..000000000 --- a/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_Boptruth-Agatha-7B/1762652580.559956", - "retrieved_timestamp": "1762652580.559957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/Boptruth-Agatha-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/Boptruth-Agatha-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.312418826491487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4983936045348778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28607047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/3c7ac4de-1456-4afb-b7ac-07beb6cb4d39.json b/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/3c7ac4de-1456-4afb-b7ac-07beb6cb4d39.json new file mode 100644 index 000000000..096a756a8 --- /dev/null +++ b/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/3c7ac4de-1456-4afb-b7ac-07beb6cb4d39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_Boptruth-Agatha-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Boptruth-Agatha-7B", + "id": "theprint/Boptruth-Agatha-7B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4984 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0551 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4277 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json b/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json deleted file mode 100644 index 21be70798..000000000 --- a/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v2/1762652580.56022", - "retrieved_timestamp": "1762652580.560221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/CleverBoi-7B-v2", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-7B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.736 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21699756645700075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317253321634526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46953125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27086103723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/a06ad94f-13ee-466c-b25f-87cd87012678.json b/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/a06ad94f-13ee-466c-b25f-87cd87012678.json new file mode 100644 index 000000000..1d1af6ab3 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/a06ad94f-13ee-466c-b25f-87cd87012678.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CleverBoi-7B-v2", + "id": "theprint/CleverBoi-7B-v2", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.736 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.217 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4532 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4695 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2709 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json b/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json deleted file mode 100644 index 1a202be75..000000000 --- a/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v3/1762652580.560437", - "retrieved_timestamp": "1762652580.560438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/CleverBoi-7B-v3", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-7B-v3", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.736 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23823011830831084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4414430902840938 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4071770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28681848404255317 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/9e1ca6d0-d2b2-48c5-acc2-ad299ce02e1f.json b/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/9e1ca6d0-d2b2-48c5-acc2-ad299ce02e1f.json new file mode 100644 index 000000000..15f7a9b45 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/9e1ca6d0-d2b2-48c5-acc2-ad299ce02e1f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CleverBoi-7B-v3", + "id": "theprint/CleverBoi-7B-v3", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.736 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4414 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2659 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4072 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2868 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/7dcd6e37-3685-4b08-b983-b2a711aeaf73.json b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/7dcd6e37-3685-4b08-b983-b2a711aeaf73.json new file mode 100644 index 000000000..a63c67af4 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/7dcd6e37-3685-4b08-b983-b2a711aeaf73.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CleverBoi-Llama-3.1-8B-Instruct", + "id": "theprint/CleverBoi-Llama-3.1-8B-Instruct", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 16.061 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1682 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.456 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4014 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3075 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json deleted file mode 100644 index dff2d460b..000000000 --- a/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-Instruct/1762652580.5606558", - "retrieved_timestamp": "1762652580.5606568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/CleverBoi-Llama-3.1-8B-Instruct", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 16.061 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16816269719898758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4559618469185147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40143750000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30751329787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-v2/b1ae6801-0139-41d3-85dc-102ad5cc4c6a.json b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-v2/b1ae6801-0139-41d3-85dc-102ad5cc4c6a.json new file mode 100644 index 000000000..f1cf9de8b --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-v2/b1ae6801-0139-41d3-85dc-102ad5cc4c6a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CleverBoi-Llama-3.1-8B-v2", + "id": "theprint/CleverBoi-Llama-3.1-8B-v2", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1961 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4668 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0529 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2861 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3735 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3188 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json b/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json deleted file mode 100644 index 9fe60f311..000000000 --- a/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Nemo-12B-v2/1762652580.561142", - "retrieved_timestamp": "1762652580.561143", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/CleverBoi-Nemo-12B-v2", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-Nemo-12B-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 13.933 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2045827293802666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241085887165254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3228058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/4cc037a2-d952-4566-a575-015f8e3a5925.json b/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/4cc037a2-d952-4566-a575-015f8e3a5925.json new file mode 100644 index 000000000..76b2de2d9 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/4cc037a2-d952-4566-a575-015f8e3a5925.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Nemo-12B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CleverBoi-Nemo-12B-v2", + "id": "theprint/CleverBoi-Nemo-12B-v2", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 13.933 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2046 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5241 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4187 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/Code-Llama-Bagel-8B/a1eaadae-8601-4c18-ab0c-4f6d80d3307b.json b/data/hfopenllm_v2/theprint/Code-Llama-Bagel-8B/a1eaadae-8601-4c18-ab0c-4f6d80d3307b.json new file mode 100644 index 000000000..b1815116f --- /dev/null +++ b/data/hfopenllm_v2/theprint/Code-Llama-Bagel-8B/a1eaadae-8601-4c18-ab0c-4f6d80d3307b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_Code-Llama-Bagel-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Code-Llama-Bagel-8B", + "id": "theprint/Code-Llama-Bagel-8B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.253 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4697 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0612 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.368 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2822 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/Conversely-Mistral-7B/40e452df-8f0a-4473-a3d1-41f9c288c12f.json b/data/hfopenllm_v2/theprint/Conversely-Mistral-7B/40e452df-8f0a-4473-a3d1-41f9c288c12f.json new file mode 100644 index 000000000..71ebe1082 --- /dev/null +++ b/data/hfopenllm_v2/theprint/Conversely-Mistral-7B/40e452df-8f0a-4473-a3d1-41f9c288c12f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_Conversely-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Conversely-Mistral-7B", + "id": "theprint/Conversely-Mistral-7B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.496 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2608 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4672 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4189 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2826 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/Llama-3.2-3B-VanRossum/216020ac-276b-436e-815b-d6968eb83770.json b/data/hfopenllm_v2/theprint/Llama-3.2-3B-VanRossum/216020ac-276b-436e-815b-d6968eb83770.json new file mode 100644 index 000000000..aa1889b1c --- /dev/null +++ b/data/hfopenllm_v2/theprint/Llama-3.2-3B-VanRossum/216020ac-276b-436e-815b-d6968eb83770.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_Llama-3.2-3B-VanRossum/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-VanRossum", + "id": "theprint/Llama-3.2-3B-VanRossum", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 3.696 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4783 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4279 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0974 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3442 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.277 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-7B/1bb4aeac-a5e1-4fd7-9e70-64fdcfc600cd.json b/data/hfopenllm_v2/theprint/ReWiz-7B/1bb4aeac-a5e1-4fd7-9e70-64fdcfc600cd.json new file mode 100644 index 000000000..28eca97a3 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-7B/1bb4aeac-a5e1-4fd7-9e70-64fdcfc600cd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReWiz-7B", + "id": "theprint/ReWiz-7B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.736 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4564 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0408 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4612 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.267 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json b/data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json deleted file mode 100644 index 05ff71fb1..000000000 --- a/data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-7B/1762652580.562494", - "retrieved_timestamp": "1762652580.562496", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/ReWiz-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/ReWiz-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.736 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40479261692309737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564215411912313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2670378989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/ReWiz-Llama-3.1-8B-v2/25739611-f690-41b4-87de-9f4ea8b3d815.json b/data/hfopenllm_v2/theprint/ReWiz-Llama-3.1-8B-v2/25739611-f690-41b4-87de-9f4ea8b3d815.json new file mode 100644 index 000000000..f45735570 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Llama-3.1-8B-v2/25739611-f690-41b4-87de-9f4ea8b3d815.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.1-8B-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReWiz-Llama-3.1-8B-v2", + "id": "theprint/ReWiz-Llama-3.1-8B-v2", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2379 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4632 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3029 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.331 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Llama-3.2-3B/b8c27fdd-5b35-41ab-8a35-b5a48f27cceb.json b/data/hfopenllm_v2/theprint/ReWiz-Llama-3.2-3B/b8c27fdd-5b35-41ab-8a35-b5a48f27cceb.json new file mode 100644 index 000000000..e0c5c3d8e --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Llama-3.2-3B/b8c27fdd-5b35-41ab-8a35-b5a48f27cceb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.2-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReWiz-Llama-3.2-3B", + "id": "theprint/ReWiz-Llama-3.2-3B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1095 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3614 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2887 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json b/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json deleted file mode 100644 index 9e1bde3ed..000000000 --- a/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Nemo-12B-Instruct/1762652580.563264", - "retrieved_timestamp": "1762652580.563264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/ReWiz-Nemo-12B-Instruct", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Nemo-12B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10623811486854878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092407647626753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33394281914893614 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/fa237949-c3ac-482a-8a54-5a2019f24016.json b/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/fa237949-c3ac-482a-8a54-5a2019f24016.json new file mode 100644 index 000000000..121036620 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/fa237949-c3ac-482a-8a54-5a2019f24016.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Nemo-12B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReWiz-Nemo-12B-Instruct", + "id": "theprint/ReWiz-Nemo-12B-Instruct", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1062 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3238 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4096 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3339 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Qwen-2.5-14B/b60dd828-a3e7-46a8-b4c2-322aeca42faf.json b/data/hfopenllm_v2/theprint/ReWiz-Qwen-2.5-14B/b60dd828-a3e7-46a8-b4c2-322aeca42faf.json new file mode 100644 index 000000000..c794f72d9 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Qwen-2.5-14B/b60dd828-a3e7-46a8-b4c2-322aeca42faf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Qwen-2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReWiz-Qwen-2.5-14B", + "id": "theprint/ReWiz-Qwen-2.5-14B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 16.743 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6179 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.38 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4539 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/5de9f914-333f-4181-a93f-79257a3daf54.json b/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/5de9f914-333f-4181-a93f-79257a3daf54.json new file mode 100644 index 000000000..499adb749 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/5de9f914-333f-4181-a93f-79257a3daf54.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Worldbuilder-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ReWiz-Worldbuilder-7B", + "id": "theprint/ReWiz-Worldbuilder-7B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.251 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4636 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.037 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2971 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json b/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json deleted file mode 100644 index 2e47d2f11..000000000 --- a/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Worldbuilder-7B/1762652580.563769", - "retrieved_timestamp": "1762652580.56377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/ReWiz-Worldbuilder-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Worldbuilder-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25101951710350756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46361558385510165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45725 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297124335106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json b/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json deleted file mode 100644 index 15523fcc7..000000000 --- a/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_RuDolph-Hermes-7B/1762652580.564037", - "retrieved_timestamp": "1762652580.5640378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/RuDolph-Hermes-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/RuDolph-Hermes-7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604292167005767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052928613425586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30726396276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/e2d23da4-226a-4a02-8390-e8edaea4b65b.json b/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/e2d23da4-226a-4a02-8390-e8edaea4b65b.json new file mode 100644 index 000000000..b8a758ee2 --- /dev/null +++ b/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/e2d23da4-226a-4a02-8390-e8edaea4b65b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_RuDolph-Hermes-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RuDolph-Hermes-7B", + "id": "theprint/RuDolph-Hermes-7B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5053 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0514 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3121 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4226 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3073 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/WorldBuilder-12B/c64c7470-dcf9-46f8-b789-cab7e902739d.json b/data/hfopenllm_v2/theprint/WorldBuilder-12B/c64c7470-dcf9-46f8-b789-cab7e902739d.json new file mode 100644 index 000000000..70af1d5c7 --- /dev/null +++ b/data/hfopenllm_v2/theprint/WorldBuilder-12B/c64c7470-dcf9-46f8-b789-cab7e902739d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_WorldBuilder-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WorldBuilder-12B", + "id": "theprint/WorldBuilder-12B", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 13.933 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1374 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.501 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4066 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3192 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json b/data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json deleted file mode 100644 index 306b2c16d..000000000 --- a/data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/theprint_WorldBuilder-12B/1762652580.564255", - "retrieved_timestamp": "1762652580.564256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "theprint/WorldBuilder-12B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/WorldBuilder-12B", - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 13.933 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13743755457741016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010100641541125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31923204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/theprint/phi-3-mini-4k-python/f6d727a3-19dc-4173-a88f-2c47449896aa.json b/data/hfopenllm_v2/theprint/phi-3-mini-4k-python/f6d727a3-19dc-4173-a88f-2c47449896aa.json new file mode 100644 index 000000000..e5805f869 --- /dev/null +++ b/data/hfopenllm_v2/theprint/phi-3-mini-4k-python/f6d727a3-19dc-4173-a88f-2c47449896aa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/theprint_phi-3-mini-4k-python/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-3-mini-4k-python", + "id": "theprint/phi-3-mini-4k-python", + "developer": "theprint", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 4.132 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2409 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3922 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3577 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/490d14c8-2cb0-4328-9f41-6074b28d6fdc.json b/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/490d14c8-2cb0-4328-9f41-6074b28d6fdc.json new file mode 100644 index 000000000..440a5ef62 --- /dev/null +++ b/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/490d14c8-2cb0-4328-9f41-6074b28d6fdc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/thinkcoder_llama3-8b-instruct-lora-8-sft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-8b-instruct-lora-8-sft", + "id": "thinkcoder/llama3-8b-instruct-lora-8-sft", + "developer": "thinkcoder", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3235 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3476 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json b/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json deleted file mode 100644 index ac952c250..000000000 --- a/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/thinkcoder_llama3-8b-instruct-lora-8-sft/1762652580.564969", - "retrieved_timestamp": "1762652580.56497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "thinkcoder/llama3-8b-instruct-lora-8-sft", - "developer": "thinkcoder", - "inference_platform": "unknown", - "id": "thinkcoder/llama3-8b-instruct-lora-8-sft", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480416406246536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865011845587858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32345833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34757313829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json b/data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json deleted file mode 100644 index 33d50d537..000000000 --- a/data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/thirdeyeai_elevate360m/1762652580.565248", - "retrieved_timestamp": "1762652580.565249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "thirdeyeai/elevate360m", - "developer": "thirdeyeai", - "inference_platform": "unknown", - "id": "thirdeyeai/elevate360m", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04448862351892978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2962583602962783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1077127659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/thirdeyeai/elevate360m/9351b079-7ef5-42ec-bb83-f0d8ec7de479.json b/data/hfopenllm_v2/thirdeyeai/elevate360m/9351b079-7ef5-42ec-bb83-f0d8ec7de479.json new file mode 100644 index 000000000..2f88fa8a2 --- /dev/null +++ b/data/hfopenllm_v2/thirdeyeai/elevate360m/9351b079-7ef5-42ec-bb83-f0d8ec7de479.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/thirdeyeai_elevate360m/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "elevate360m", + "id": "thirdeyeai/elevate360m", + "developer": "thirdeyeai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2963 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2408 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3462 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1077 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-1_5B/852d5adb-f422-4102-8114-082ab0b3c07d.json b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-1_5B/852d5adb-f422-4102-8114-082ab0b3c07d.json new file mode 100644 index 000000000..d16c6d65e --- /dev/null +++ b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-1_5B/852d5adb-f422-4102-8114-082ab0b3c07d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-1_5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "XinYuan-Qwen2-1_5B", + "id": "thomas-yanxin/XinYuan-Qwen2-1_5B", + "developer": "thomas-yanxin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2986 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3635 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0672 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3634 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B-0917/c64e98cd-c022-4834-a3e0-3949416d1fb1.json b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B-0917/c64e98cd-c022-4834-a3e0-3949416d1fb1.json new file mode 100644 index 000000000..075ddb870 --- /dev/null +++ b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B-0917/c64e98cd-c022-4834-a3e0-3949416d1fb1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B-0917/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "XinYuan-Qwen2-7B-0917", + "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917", + "developer": "thomas-yanxin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3719 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5169 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1979 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B/f101bd15-ac61-49d4-beac-c89bc889b34b.json b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B/f101bd15-ac61-49d4-beac-c89bc889b34b.json new file mode 100644 index 000000000..eee3e1e34 --- /dev/null +++ b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2-7B/f101bd15-ac61-49d4-beac-c89bc889b34b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "XinYuan-Qwen2-7B", + "id": "thomas-yanxin/XinYuan-Qwen2-7B", + "developer": "thomas-yanxin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4438 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1458 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4058 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3925 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/11caf1c1-e2a0-4abb-bb0e-d06853a06e4d.json b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/11caf1c1-e2a0-4abb-bb0e-d06853a06e4d.json new file mode 100644 index 000000000..ff36d37cb --- /dev/null +++ b/data/hfopenllm_v2/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/11caf1c1-e2a0-4abb-bb0e-d06853a06e4d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "XinYuan-Qwen2.5-7B-0917", + "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", + "developer": "thomas-yanxin", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3577 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3676 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tianyil1/MistralForCausalLM_Cal_DPO/f0b57a60-8402-4430-93f3-b846a94113f2.json b/data/hfopenllm_v2/tianyil1/MistralForCausalLM_Cal_DPO/f0b57a60-8402-4430-93f3-b846a94113f2.json new file mode 100644 index 000000000..06e058942 --- /dev/null +++ b/data/hfopenllm_v2/tianyil1/MistralForCausalLM_Cal_DPO/f0b57a60-8402-4430-93f3-b846a94113f2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tianyil1_MistralForCausalLM_Cal_DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MistralForCausalLM_Cal_DPO", + "id": "tianyil1/MistralForCausalLM_Cal_DPO", + "developer": "tianyil1", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0287 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2763 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json deleted file mode 100644 index 1c8e9c328..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Base/1762652580.566659", - "retrieved_timestamp": "1762652580.566659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-10B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-10B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647754624396601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595004253437141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43979166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4240359042553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/50aa8077-4493-47a9-9cec-014c56343ecf.json b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/50aa8077-4493-47a9-9cec-014c56343ecf.json new file mode 100644 index 000000000..35ab332bf --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/50aa8077-4493-47a9-9cec-014c56343ecf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-10B-Base", + "id": "tiiuae/Falcon3-10B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3648 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.595 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.424 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/5e70d00b-c822-4ad6-afe8-3756a7038c57.json b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/5e70d00b-c822-4ad6-afe8-3756a7038c57.json new file mode 100644 index 000000000..968a3c904 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/5e70d00b-c822-4ad6-afe8-3756a7038c57.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-10B-Instruct", + "id": "tiiuae/Falcon3-10B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.617 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2764 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4429 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json deleted file mode 100644 index e5439c6ee..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Instruct/1762652580.566902", - "retrieved_timestamp": "1762652580.566903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-10B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-10B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170469398052084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44290226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json deleted file mode 100644 index e539cfc4f..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Base/1762652580.567122", - "retrieved_timestamp": "1762652580.567122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-1B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-1B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.669 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24280132271262472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3571153918015637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41473958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16082114361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/8162ba41-e630-470f-a297-72fb9f2110fd.json b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/8162ba41-e630-470f-a297-72fb9f2110fd.json new file mode 100644 index 000000000..e1cc14037 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/8162ba41-e630-470f-a297-72fb9f2110fd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-1B-Base", + "id": "tiiuae/Falcon3-1B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.669 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2428 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3571 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1608 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/60dd9d02-476f-459d-a41c-f89f82116dc3.json b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/60dd9d02-476f-459d-a41c-f89f82116dc3.json new file mode 100644 index 000000000..3a40efcbb --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/60dd9d02-476f-459d-a41c-f89f82116dc3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-1B-Instruct", + "id": "tiiuae/Falcon3-1B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.669 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5557 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3745 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0634 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4189 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1838 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json deleted file mode 100644 index 803bb69da..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Instruct/1762652580.567335", - "retrieved_timestamp": "1762652580.567335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-1B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.669 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5556678501930433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3744535691366672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18384308510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json deleted file mode 100644 index c90e8a42d..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Base/1762652580.5675461", - "retrieved_timestamp": "1762652580.5675468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-3B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-3B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764985793250797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4421367825874385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2878989361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/73e89f21-5799-4835-a0e0-a6664c0483da.json b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/73e89f21-5799-4835-a0e0-a6664c0483da.json new file mode 100644 index 000000000..81b06da6b --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/73e89f21-5799-4835-a0e0-a6664c0483da.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-3B-Base", + "id": "tiiuae/Falcon3-3B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2765 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4421 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1178 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2879 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json deleted file mode 100644 index 170166df2..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Instruct/1762652580.567748", - "retrieved_timestamp": "1762652580.567749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-3B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-3B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6976755010040027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4754430332167569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300531914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7f355ad4-9156-486d-8cf4-723117da3bb8.json b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7f355ad4-9156-486d-8cf4-723117da3bb8.json new file mode 100644 index 000000000..2d79d95ab --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7f355ad4-9156-486d-8cf4-723117da3bb8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-3B-Instruct", + "id": "tiiuae/Falcon3-3B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6977 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4754 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3005 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json deleted file mode 100644 index 48800f2ca..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Base/1762652580.56796", - "retrieved_timestamp": "1762652580.567961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-7B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-7B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34159474638403875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098880466426711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19410876132930513 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47020833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910405585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/4ccc6026-b639-488d-867f-d98ea49cf1b6.json b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/4ccc6026-b639-488d-867f-d98ea49cf1b6.json new file mode 100644 index 000000000..e683aaba0 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/4ccc6026-b639-488d-867f-d98ea49cf1b6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-7B-Base", + "id": "tiiuae/Falcon3-7B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1941 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4702 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/3cf2e68e-4de0-436e-935e-86935e11f72f.json b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/3cf2e68e-4de0-436e-935e-86935e11f72f.json new file mode 100644 index 000000000..b188ad079 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/3cf2e68e-4de0-436e-935e-86935e11f72f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-7B-Instruct", + "id": "tiiuae/Falcon3-7B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7612 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5632 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4086 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4827 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4087 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json deleted file mode 100644 index ccbce5989..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Instruct/1762652580.568164", - "retrieved_timestamp": "1762652580.568164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-7B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7612479332615238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.563244278519333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48267708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4087433510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json deleted file mode 100644 index 2106881c0..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Base/1762652580.568367", - "retrieved_timestamp": "1762652580.5683682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-Mamba-7B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-Mamba-7B-Base", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconMambaForCausalLM", - "params_billions": 7.273 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28911288713945665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4699280188827039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19410876132930513 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3431458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30377327127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/e9e4ae5d-0dd1-463c-9f15-47cb21efb409.json b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/e9e4ae5d-0dd1-463c-9f15-47cb21efb409.json new file mode 100644 index 000000000..98e4d109f --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/e9e4ae5d-0dd1-463c-9f15-47cb21efb409.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-Mamba-7B-Base", + "id": "tiiuae/Falcon3-Mamba-7B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconMambaForCausalLM", + "params_billions": 7.273 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2891 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4699 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1941 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3038 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json deleted file mode 100644 index a2d57a432..000000000 --- a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Instruct/1762652580.5685718", - "retrieved_timestamp": "1762652580.5685718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/Falcon3-Mamba-7B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-Mamba-7B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconMambaForCausalLM", - "params_billions": 7.273 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7165099713205406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4678957688410694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38686458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369348404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/c57eb23a-5998-4ab9-9a98-39b1338f5ba6.json b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/c57eb23a-5998-4ab9-9a98-39b1338f5ba6.json new file mode 100644 index 000000000..cfbd00438 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/c57eb23a-5998-4ab9-9a98-39b1338f5ba6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Falcon3-Mamba-7B-Instruct", + "id": "tiiuae/Falcon3-Mamba-7B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconMambaForCausalLM", + "params_billions": 7.273 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7165 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4679 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3006 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3869 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3369 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json b/data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json deleted file mode 100644 index 98ed412c9..000000000 --- a/data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-11B/1762652580.568774", - "retrieved_timestamp": "1762652580.568774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/falcon-11B", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-11B", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 11.103 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261324397044287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43916370355493844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39864583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23894614361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/falcon-11B/94fb625d-f58c-4f2e-8268-1dc4472c1cce.json b/data/hfopenllm_v2/tiiuae/falcon-11B/94fb625d-f58c-4f2e-8268-1dc4472c1cce.json new file mode 100644 index 000000000..50405922d --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-11B/94fb625d-f58c-4f2e-8268-1dc4472c1cce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-11B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon-11B", + "id": "tiiuae/falcon-11B", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 11.103 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3261 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4392 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0279 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.271 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3986 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json b/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json deleted file mode 100644 index 6e0937a1e..000000000 --- a/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b-instruct/1762652580.569173", - "retrieved_timestamp": "1762652580.569173", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/falcon-40b-instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-40b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 40.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24544874266945038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40538675151591974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2261469414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/4481ddef-2bef-4284-b56d-21054f5a9a97.json b/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/4481ddef-2bef-4284-b56d-21054f5a9a97.json new file mode 100644 index 000000000..fcd44b235 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/4481ddef-2bef-4284-b56d-21054f5a9a97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon-40b-instruct", + "id": "tiiuae/falcon-40b-instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 40.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2454 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4054 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3762 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2261 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-40b/80048c4b-e97b-45c7-aa04-70ce69481a97.json b/data/hfopenllm_v2/tiiuae/falcon-40b/80048c4b-e97b-45c7-aa04-70ce69481a97.json new file mode 100644 index 000000000..a3ef17532 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-40b/80048c4b-e97b-45c7-aa04-70ce69481a97.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon-40b", + "id": "tiiuae/falcon-40b", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 40.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4019 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3631 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json b/data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json deleted file mode 100644 index 08fbc75b9..000000000 --- a/data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b/1762652580.568969", - "retrieved_timestamp": "1762652580.56897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/falcon-40b", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-40b", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 40.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24964538535530173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4018532495595801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25049867021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json b/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json deleted file mode 100644 index 9b9f8ae64..000000000 --- a/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b-instruct/1762652580.5696268", - "retrieved_timestamp": "1762652580.5696268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/falcon-7b-instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-7b-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19688869976107837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32034221512355765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3633645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1155252659574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/d21a2557-2348-4087-b2a6-6e1c0101bccc.json b/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/d21a2557-2348-4087-b2a6-6e1c0101bccc.json new file mode 100644 index 000000000..556a2f3e8 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/d21a2557-2348-4087-b2a6-6e1c0101bccc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon-7b-instruct", + "id": "tiiuae/falcon-7b-instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1969 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3203 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0121 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3634 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1155 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json b/data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json deleted file mode 100644 index 379d7aa27..000000000 --- a/data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b/1762652580.5693781", - "retrieved_timestamp": "1762652580.569379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/falcon-7b", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.182051401392749 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32852446117322215 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253324468085106 - } - } - ] -} diff --git a/data/hfopenllm_v2/tiiuae/falcon-7b/76290d4b-5526-400b-8ca4-24d220f7c02d.json b/data/hfopenllm_v2/tiiuae/falcon-7b/76290d4b-5526-400b-8ca4-24d220f7c02d.json new file mode 100644 index 000000000..24fa62ffd --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-7b/76290d4b-5526-400b-8ca4-24d220f7c02d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon-7b", + "id": "tiiuae/falcon-7b", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1821 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3285 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3778 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/3a146535-09b3-4246-8bd8-0e984e0905b1.json b/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/3a146535-09b3-4246-8bd8-0e984e0905b1.json new file mode 100644 index 000000000..78cc692cd --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/3a146535-09b3-4246-8bd8-0e984e0905b1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-mamba-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "falcon-mamba-7b", + "id": "tiiuae/falcon-mamba-7b", + "developer": "tiiuae", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconMambaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4285 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3104 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json b/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json deleted file mode 100644 index 64638e51e..000000000 --- a/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-mamba-7b/1762652580.569833", - "retrieved_timestamp": "1762652580.569834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tiiuae/falcon-mamba-7b", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-mamba-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconMambaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3335760227307987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4284854988604366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23021941489361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/BiBo-v0.3/6683f95c-f97f-4117-b3c5-c1ed9587289e.json b/data/hfopenllm_v2/tinycompany/BiBo-v0.3/6683f95c-f97f-4117-b3c5-c1ed9587289e.json new file mode 100644 index 000000000..731470475 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/BiBo-v0.3/6683f95c-f97f-4117-b3c5-c1ed9587289e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BiBo-v0.3", + "id": "tinycompany/BiBo-v0.3", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5184 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4642 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.395 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json b/data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json deleted file mode 100644 index 23825b0d6..000000000 --- a/data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.3/1762652580.570036", - "retrieved_timestamp": "1762652580.570036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/BiBo-v0.3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/BiBo-v0.3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183989592060179 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4641611514377814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29945146276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json b/data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json deleted file mode 100644 index 3f56aa20e..000000000 --- a/data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.7/1762652580.570291", - "retrieved_timestamp": "1762652580.570291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/BiBo-v0.7", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/BiBo-v0.7", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738181358794665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43108167584271034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40441666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2650432180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/BiBo-v0.7/bbe74b2b-9e13-4c13-92c8-618078667248.json b/data/hfopenllm_v2/tinycompany/BiBo-v0.7/bbe74b2b-9e13-4c13-92c8-618078667248.json new file mode 100644 index 000000000..9034a4865 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/BiBo-v0.7/bbe74b2b-9e13-4c13-92c8-618078667248.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.7/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BiBo-v0.7", + "id": "tinycompany/BiBo-v0.7", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4044 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.265 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/61876ce3-acc4-4619-b0c2-78ac4dff48ea.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/61876ce3-acc4-4619-b0c2-78ac4dff48ea.json new file mode 100644 index 000000000..1ef8a0185 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/61876ce3-acc4-4619-b0c2-78ac4dff48ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-bgem3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ShawtyIsBad-bgem3", + "id": "tinycompany/ShawtyIsBad-bgem3", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2608 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3853 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0483 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3054 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3695 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2583 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json deleted file mode 100644 index 8b338a6ea..000000000 --- a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-bgem3/1762652580.570496", - "retrieved_timestamp": "1762652580.570497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-bgem3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-bgem3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2608113139802391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38529707856388956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36946875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25831117021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/b304baee-c9de-4982-801d-2b9e7f1a7334.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/b304baee-c9de-4982-801d-2b9e7f1a7334.json new file mode 100644 index 000000000..6ba06459f --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/b304baee-c9de-4982-801d-2b9e7f1a7334.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-e5-large/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ShawtyIsBad-e5-large", + "id": "tinycompany/ShawtyIsBad-e5-large", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2468 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0453 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2569 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json deleted file mode 100644 index 3f4115bae..000000000 --- a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-e5-large/1762652580.5709078", - "retrieved_timestamp": "1762652580.570912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-e5-large", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-e5-large", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24682287441765627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873483842947396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37204166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25689827127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/6f27e746-1bdd-4cec-a955-c27f2f9900ef.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/6f27e746-1bdd-4cec-a955-c27f2f9900ef.json new file mode 100644 index 000000000..44928ae48 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/6f27e746-1bdd-4cec-a955-c27f2f9900ef.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-ib/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ShawtyIsBad-ib", + "id": "tinycompany/ShawtyIsBad-ib", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2565 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3641 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2581 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json deleted file mode 100644 index a2ef27670..000000000 --- a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-ib/1762652580.571291", - "retrieved_timestamp": "1762652580.571292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-ib", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-ib", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2565149359255664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880457874839807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.258061835106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/30637c5d-1bc0-49dc-8afd-335a9a66f196.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/30637c5d-1bc0-49dc-8afd-335a9a66f196.json new file mode 100644 index 000000000..27d6d6ef9 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/30637c5d-1bc0-49dc-8afd-335a9a66f196.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic-moe/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ShawtyIsBad-nomic-moe", + "id": "tinycompany/ShawtyIsBad-nomic-moe", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2608 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3878 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.307 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3747 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2572 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json deleted file mode 100644 index 665bfd376..000000000 --- a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic-moe/1762652580.571543", - "retrieved_timestamp": "1762652580.5715442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-nomic-moe", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-nomic-moe", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2607614462958284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878019225656597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37470833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2572307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/169e29b6-50d8-456d-aa20-3fe2f3b19a1e.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/169e29b6-50d8-456d-aa20-3fe2f3b19a1e.json new file mode 100644 index 000000000..354caa8ce --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/169e29b6-50d8-456d-aa20-3fe2f3b19a1e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ShawtyIsBad-nomic1.5", + "id": "tinycompany/ShawtyIsBad-nomic1.5", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2544 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3874 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3112 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3628 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json deleted file mode 100644 index 6ea86dedb..000000000 --- a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic1.5/1762652580.571785", - "retrieved_timestamp": "1762652580.571787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-nomic1.5", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-nomic1.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2543916807404354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873599493472512 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36283333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25673204787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-base/427d32f7-190b-4005-b02c-6a8ce089dbbf.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-base/427d32f7-190b-4005-b02c-6a8ce089dbbf.json new file mode 100644 index 000000000..13015d742 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-base/427d32f7-190b-4005-b02c-6a8ce089dbbf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-base", + "id": "tinycompany/SigmaBoi-base", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4314 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0778 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4343 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json deleted file mode 100644 index 8a389a9b7..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-base/1762652580.5720189", - "retrieved_timestamp": "1762652580.57202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-base", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-base", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24469961923252526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314363391906919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43427083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2816655585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json deleted file mode 100644 index deb75fa32..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bge-m3/1762652580.572246", - "retrieved_timestamp": "1762652580.572247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-bge-m3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-bge-m3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24502431326657714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43509173985964184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28191489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/de7551a8-63b1-4de3-899f-9d98cb985005.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/de7551a8-63b1-4de3-899f-9d98cb985005.json new file mode 100644 index 000000000..e2bc5d9e6 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/de7551a8-63b1-4de3-899f-9d98cb985005.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bge-m3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-bge-m3", + "id": "tinycompany/SigmaBoi-bge-m3", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json deleted file mode 100644 index 19d2b4cb3..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bgem3/1762652580.572469", - "retrieved_timestamp": "1762652580.57247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-bgem3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-bgem3", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24502431326657714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43509173985964184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28191489361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/eff6f456-906d-4320-8e6f-667fbbf0574a.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/eff6f456-906d-4320-8e6f-667fbbf0574a.json new file mode 100644 index 000000000..942fd8495 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/eff6f456-906d-4320-8e6f-667fbbf0574a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bgem3/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-bgem3", + "id": "tinycompany/SigmaBoi-bgem3", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.245 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4351 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4383 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json deleted file mode 100644 index e2d09f7ac..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-ib/1762652580.572692", - "retrieved_timestamp": "1762652580.572693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-ib", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-ib", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24774708883540117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343622024096135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42896874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824135638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/6cbd9a3a-7e06-4eee-af9e-6db4ff35c36a.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/6cbd9a3a-7e06-4eee-af9e-6db4ff35c36a.json new file mode 100644 index 000000000..4fc792af1 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/6cbd9a3a-7e06-4eee-af9e-6db4ff35c36a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-ib/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-ib", + "id": "tinycompany/SigmaBoi-ib", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2477 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4344 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.074 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2824 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json deleted file mode 100644 index ff36fdcfc..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic-moe/1762652580.57291", - "retrieved_timestamp": "1762652580.572911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-nomic-moe", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-nomic-moe", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2474223948013493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43341835214223373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43163541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28366023936170215 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/7e3d3803-c8d4-4025-8d12-c4c29c49c059.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/7e3d3803-c8d4-4025-8d12-c4c29c49c059.json new file mode 100644 index 000000000..954ab673e --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/7e3d3803-c8d4-4025-8d12-c4c29c49c059.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic-moe/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-nomic-moe", + "id": "tinycompany/SigmaBoi-nomic-moe", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2474 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2928 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2837 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json deleted file mode 100644 index b2955700a..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5-fp32/1762652580.573416", - "retrieved_timestamp": "1762652580.573416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-nomic1.5-fp32", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-nomic1.5-fp32", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24622335403396323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43705348265770266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28407579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/a43a6ca9-3543-44bc-8511-ee5c45552070.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/a43a6ca9-3543-44bc-8511-ee5c45552070.json new file mode 100644 index 000000000..5ad5da701 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/a43a6ca9-3543-44bc-8511-ee5c45552070.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5-fp32/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-nomic1.5-fp32", + "id": "tinycompany/SigmaBoi-nomic1.5-fp32", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2462 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2841 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/83f6fdec-9592-45a1-acdf-0ebbb400c8a4.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/83f6fdec-9592-45a1-acdf-0ebbb400c8a4.json new file mode 100644 index 000000000..323eb5e1d --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/83f6fdec-9592-45a1-acdf-0ebbb400c8a4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SigmaBoi-nomic1.5", + "id": "tinycompany/SigmaBoi-nomic1.5", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2447 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2841 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json deleted file mode 100644 index 3fa4af4eb..000000000 --- a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5/1762652580.5731819", - "retrieved_timestamp": "1762652580.5731819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-nomic1.5", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-nomic1.5", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24469961923252526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43705348265770266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28407579787234044 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json b/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json deleted file mode 100644 index f14accfa0..000000000 --- a/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tinycompany_Tamed-Shawty/1762652580.573629", - "retrieved_timestamp": "1762652580.573629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tinycompany/Tamed-Shawty", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/Tamed-Shawty", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.562 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38308576798450333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3837059588999942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2601396276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6e2d4174-303f-437b-9abb-26667b1dd04c.json b/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6e2d4174-303f-437b-9abb-26667b1dd04c.json new file mode 100644 index 000000000..7acd05352 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6e2d4174-303f-437b-9abb-26667b1dd04c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tinycompany_Tamed-Shawty/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tamed-Shawty", + "id": "tinycompany/Tamed-Shawty", + "developer": "tinycompany", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.562 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3837 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2626 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3501 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json b/data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json deleted file mode 100644 index 825835fe7..000000000 --- a/data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tklohj_WindyFloLLM/1762652580.573854", - "retrieved_timestamp": "1762652580.573855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tklohj/WindyFloLLM", - "developer": "tklohj", - "inference_platform": "unknown", - "id": "tklohj/WindyFloLLM", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26685638550158025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636616007058791 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25814494680851063 - } - } - ] -} diff --git a/data/hfopenllm_v2/tklohj/WindyFloLLM/955e93d0-bec1-483c-b3f0-258e13d5cb16.json b/data/hfopenllm_v2/tklohj/WindyFloLLM/955e93d0-bec1-483c-b3f0-258e13d5cb16.json new file mode 100644 index 000000000..7110c6cac --- /dev/null +++ b/data/hfopenllm_v2/tklohj/WindyFloLLM/955e93d0-bec1-483c-b3f0-258e13d5cb16.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tklohj_WindyFloLLM/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WindyFloLLM", + "id": "tklohj/WindyFloLLM", + "developer": "tklohj", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4637 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4253 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2581 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/GPT-JT-6B-v1/3065ca79-c5e9-4875-9f81-4231e971d818.json b/data/hfopenllm_v2/togethercomputer/GPT-JT-6B-v1/3065ca79-c5e9-4875-9f81-4231e971d818.json new file mode 100644 index 000000000..3b3a90f5a --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/GPT-JT-6B-v1/3065ca79-c5e9-4875-9f81-4231e971d818.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-JT-6B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GPT-JT-6B-v1", + "id": "togethercomputer/GPT-JT-6B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2061 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3303 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0106 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3737 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1626 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json b/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json deleted file mode 100644 index 4c9f20580..000000000 --- a/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-NeoXT-Chat-Base-20B/1762652580.574344", - "retrieved_timestamp": "1762652580.5743449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/GPT-NeoXT-Chat-Base-20B", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 20.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18297561581049393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33209702572173033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/fc7e485f-a416-420b-b43c-e45e502c4a8f.json b/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/fc7e485f-a416-420b-b43c-e45e502c4a8f.json new file mode 100644 index 000000000..c6293a07a --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/fc7e485f-a416-420b-b43c-e45e502c4a8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-NeoXT-Chat-Base-20B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GPT-NeoXT-Chat-Base-20B", + "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 20.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.183 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3321 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/LLaMA-2-7B-32K/53e882c6-6eb5-4202-a8d0-3a313556c9f4.json b/data/hfopenllm_v2/togethercomputer/LLaMA-2-7B-32K/53e882c6-6eb5-4202-a8d0-3a313556c9f4.json new file mode 100644 index 000000000..f637076fd --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/LLaMA-2-7B-32K/53e882c6-6eb5-4202-a8d0-3a313556c9f4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_LLaMA-2-7B-32K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLaMA-2-7B-32K", + "id": "togethercomputer/LLaMA-2-7B-32K", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1865 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.34 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1768 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json b/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json deleted file mode 100644 index 8c4002da9..000000000 --- a/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_Llama-2-7B-32K-Instruct/1762652580.574983", - "retrieved_timestamp": "1762652580.5749838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/Llama-2-7B-32K-Instruct", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/Llama-2-7B-32K-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2130003945087922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34434724239927544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17810837765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/ba715669-c0ed-471f-80a6-b67453fb4930.json b/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/ba715669-c0ed-471f-80a6-b67453fb4930.json new file mode 100644 index 000000000..37237a2f5 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/ba715669-c0ed-471f-80a6-b67453fb4930.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_Llama-2-7B-32K-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-2-7B-32K-Instruct", + "id": "togethercomputer/Llama-2-7B-32K-Instruct", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.213 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3443 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2517 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4056 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1781 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/316cab27-5cac-4d26-90ae-05d1fc3bd14a.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/316cab27-5cac-4d26-90ae-05d1fc3bd14a.json new file mode 100644 index 000000000..26801a949 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/316cab27-5cac-4d26-90ae-05d1fc3bd14a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Base/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-INCITE-7B-Base", + "id": "togethercomputer/RedPajama-INCITE-7B-Base", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3195 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0159 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json deleted file mode 100644 index 797ea3be2..000000000 --- a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Base/1762652580.5751948", - "retrieved_timestamp": "1762652580.5751958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-7B-Base", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-7B-Base", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20822971936683554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31948898765013445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36199999999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json deleted file mode 100644 index dceab47a3..000000000 --- a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Chat/1762652580.57541", - "retrieved_timestamp": "1762652580.5754108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-7B-Chat", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-7B-Chat", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1557977278066641 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3175449328457368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/d2b0a35a-ea72-42f4-9f71-fffa1480bc22.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/d2b0a35a-ea72-42f4-9f71-fffa1480bc22.json new file mode 100644 index 000000000..56a01fe9b --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/d2b0a35a-ea72-42f4-9f71-fffa1480bc22.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-INCITE-7B-Chat", + "id": "togethercomputer/RedPajama-INCITE-7B-Chat", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1558 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3175 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2525 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1121 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/bf3eabff-fbf7-421c-9e04-548accc7678c.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/bf3eabff-fbf7-421c-9e04-548accc7678c.json new file mode 100644 index 000000000..ac9fb012b --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/bf3eabff-fbf7-421c-9e04-548accc7678c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-INCITE-7B-Instruct", + "id": "togethercomputer/RedPajama-INCITE-7B-Instruct", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2055 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3377 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2508 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3685 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1272 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json deleted file mode 100644 index 745ec09b8..000000000 --- a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Instruct/1762652580.57568", - "retrieved_timestamp": "1762652580.575681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-7B-Instruct", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-7B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2055069437980115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337743947089799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1272440159574468 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/b7eeedd8-33ef-46b3-a3fb-6ac87247bc4e.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/b7eeedd8-33ef-46b3-a3fb-6ac87247bc4e.json new file mode 100644 index 000000000..dff22f907 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/b7eeedd8-33ef-46b3-a3fb-6ac87247bc4e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Base-3B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-INCITE-Base-3B-v1", + "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2294 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.306 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0144 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2433 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json deleted file mode 100644 index 05f94ec0d..000000000 --- a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Base-3B-v1/1762652580.575899", - "retrieved_timestamp": "1762652580.5758998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22936253584932426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3060403878987615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37387499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json deleted file mode 100644 index f37c18522..000000000 --- a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Chat-3B-v1/1762652580.5763452", - "retrieved_timestamp": "1762652580.5763478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16521496296493304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32166937119202416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ] -} diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/b1c41abe-e7f6-4229-b776-8ed0b5f91bd4.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/b1c41abe-e7f6-4229-b776-8ed0b5f91bd4.json new file mode 100644 index 000000000..28081c01d --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/b1c41abe-e7f6-4229-b776-8ed0b5f91bd4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Chat-3B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-INCITE-Chat-3B-v1", + "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1652 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3217 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2441 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1127 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/5b769770-3b63-4863-a723-95212e2be40e.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/5b769770-3b63-4863-a723-95212e2be40e.json new file mode 100644 index 000000000..503ea0a24 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/5b769770-3b63-4863-a723-95212e2be40e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RedPajama-INCITE-Instruct-3B-v1", + "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2124 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2475 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3886 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.111 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json deleted file mode 100644 index 1c87b6d1d..000000000 --- a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/1762652580.576687", - "retrieved_timestamp": "1762652580.576688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2124263620526869 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3146017752057237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38860416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ] -} diff --git a/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f2264b41-efa5-4278-91fd-2f454aa91c61.json b/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f2264b41-efa5-4278-91fd-2f454aa91c61.json new file mode 100644 index 000000000..d7ea093eb --- /dev/null +++ b/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f2264b41-efa5-4278-91fd-2f454aa91c61.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-Swallow-8B-Instruct-v0.1", + "id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", + "developer": "tokyotech-llm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5508 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5009 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4357 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3088 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json b/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json deleted file mode 100644 index 7df4e6b1d..000000000 --- a/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/1762652580.5769222", - "retrieved_timestamp": "1762652580.576923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", - "developer": "tokyotech-llm", - "inference_platform": "unknown", - "id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5507719517546776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5009389976232003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43569791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087599734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json b/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json deleted file mode 100644 index 9e453d37c..000000000 --- a/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tomasmcm_sky-t1-coder-32b-flash/1762652580.577295", - "retrieved_timestamp": "1762652580.5772958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tomasmcm/sky-t1-coder-32b-flash", - "developer": "tomasmcm", - "inference_platform": "unknown", - "id": "tomasmcm/sky-t1-coder-32b-flash", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7780090160773414 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6822440044314982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422960725075529 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5782081117021277 - } - } - ] -} diff --git a/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/5c3484b4-6faa-47fd-a1a2-881898450f79.json b/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/5c3484b4-6faa-47fd-a1a2-881898450f79.json new file mode 100644 index 000000000..49195f496 --- /dev/null +++ b/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/5c3484b4-6faa-47fd-a1a2-881898450f79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tomasmcm_sky-t1-coder-32b-flash/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sky-t1-coder-32b-flash", + "id": "tomasmcm/sky-t1-coder-32b-flash", + "developer": "tomasmcm", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.778 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6822 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4233 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5782 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/trthminh1112/autotrain-llama32-1b-finetune/326b95f8-9eae-4064-a261-077a957e233c.json b/data/hfopenllm_v2/trthminh1112/autotrain-llama32-1b-finetune/326b95f8-9eae-4064-a261-077a957e233c.json new file mode 100644 index 000000000..2a5e33192 --- /dev/null +++ b/data/hfopenllm_v2/trthminh1112/autotrain-llama32-1b-finetune/326b95f8-9eae-4064-a261-077a957e233c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/trthminh1112_autotrain-llama32-1b-finetune/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "autotrain-llama32-1b-finetune", + "id": "trthminh1112/autotrain-llama32-1b-finetune", + "developer": "trthminh1112", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1769 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2996 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0151 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2567 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3513 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1099 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json b/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json deleted file mode 100644 index fdefdf254..000000000 --- a/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1762652580.577852", - "retrieved_timestamp": "1762652580.577852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", - "developer": "tugstugi", - "inference_platform": "unknown", - "id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017300761978217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101062293388118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4080784574468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/c1c7336e-b8bf-4a69-a586-c1a224ba8a65.json b/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/c1c7336e-b8bf-4a69-a586-c1a224ba8a65.json new file mode 100644 index 000000000..7f871c221 --- /dev/null +++ b/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/c1c7336e-b8bf-4a69-a586-c1a224ba8a65.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-7B-Instruct-QwQ-v0.1", + "id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", + "developer": "tugstugi", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6017 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3814 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2685 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3794 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4081 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/universalml/NepaliGPT-2.0/89e55482-b762-4f5d-a021-211048719bdc.json b/data/hfopenllm_v2/universalml/NepaliGPT-2.0/89e55482-b762-4f5d-a021-211048719bdc.json new file mode 100644 index 000000000..ae1d50c1b --- /dev/null +++ b/data/hfopenllm_v2/universalml/NepaliGPT-2.0/89e55482-b762-4f5d-a021-211048719bdc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/universalml_NepaliGPT-2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NepaliGPT-2.0", + "id": "universalml/NepaliGPT-2.0", + "developer": "universalml", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0365 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.466 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4657 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.33 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/81018e12-63f8-4ad8-87c4-181a13202497.json b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/81018e12-63f8-4ad8-87c4-181a13202497.json new file mode 100644 index 000000000..71f6ae6c9 --- /dev/null +++ b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/81018e12-63f8-4ad8-87c4-181a13202497.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct-no-system-message/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-Instruct-no-system-message", + "id": "unsloth/Llama-3.2-1B-Instruct-no-system-message", + "developer": "unsloth", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.565 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2727 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3341 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json deleted file mode 100644 index 032a0e11b..000000000 --- a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct-no-system-message/1762652580.578731", - "retrieved_timestamp": "1762652580.578733", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "unsloth/Llama-3.2-1B-Instruct-no-system-message", - "developer": "unsloth", - "inference_platform": "unknown", - "id": "unsloth/Llama-3.2-1B-Instruct-no-system-message", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5649853499824908 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543744783345775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1668882978723404 - } - } - ] -} diff --git a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json deleted file mode 100644 index 7aec79127..000000000 --- a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct/1762652580.578335", - "retrieved_timestamp": "1762652580.578335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "unsloth/Llama-3.2-1B-Instruct", - "developer": "unsloth", - "inference_platform": "unknown", - "id": "unsloth/Llama-3.2-1B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5809973093613834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34847036874553655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17420212765957446 - } - } - ] -} diff --git a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/5b09e8cb-aaf1-48fd-a2f4-11a8d4bc9a4d.json b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/5b09e8cb-aaf1-48fd-a2f4-11a8d4bc9a4d.json new file mode 100644 index 000000000..1bdf24114 --- /dev/null +++ b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/5b09e8cb-aaf1-48fd-a2f4-11a8d4bc9a4d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-1B-Instruct", + "id": "unsloth/Llama-3.2-1B-Instruct", + "developer": "unsloth", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.581 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3485 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3196 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1742 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json b/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json deleted file mode 100644 index 0aa5c4366..000000000 --- a/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/unsloth_Phi-3-mini-4k-instruct/1762652580.579097", - "retrieved_timestamp": "1762652580.5790982", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "unsloth/Phi-3-mini-4k-instruct", - "developer": "unsloth", - "inference_platform": "unknown", - "id": "unsloth/Phi-3-mini-4k-instruct", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544027624480822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5500239467441027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4030917553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/8b344f21-9038-4b15-aba8-308aa62e4b39.json b/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/8b344f21-9038-4b15-aba8-308aa62e4b39.json new file mode 100644 index 000000000..0e0bd0c1a --- /dev/null +++ b/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/8b344f21-9038-4b15-aba8-308aa62e4b39.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/unsloth_Phi-3-mini-4k-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-4k-instruct", + "id": "unsloth/Phi-3-mini-4k-instruct", + "developer": "unsloth", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.544 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.55 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1639 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.323 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4031 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/phi-4-bnb-4bit/68ca8f7c-88c2-4ede-bcb7-d4ae23429d8f.json b/data/hfopenllm_v2/unsloth/phi-4-bnb-4bit/68ca8f7c-88c2-4ede-bcb7-d4ae23429d8f.json new file mode 100644 index 000000000..04ac53e78 --- /dev/null +++ b/data/hfopenllm_v2/unsloth/phi-4-bnb-4bit/68ca8f7c-88c2-4ede-bcb7-d4ae23429d8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/unsloth_phi-4-bnb-4bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-bnb-4bit", + "id": "unsloth/phi-4-bnb-4bit", + "developer": "unsloth", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.058 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.673 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.677 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4607 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5256 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/phi-4-unsloth-bnb-4bit/df557f25-5505-49dd-a0cb-88fff601c6e2.json b/data/hfopenllm_v2/unsloth/phi-4-unsloth-bnb-4bit/df557f25-5505-49dd-a0cb-88fff601c6e2.json new file mode 100644 index 000000000..5a64d5d96 --- /dev/null +++ b/data/hfopenllm_v2/unsloth/phi-4-unsloth-bnb-4bit/df557f25-5505-49dd-a0cb-88fff601c6e2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/unsloth_phi-4-unsloth-bnb-4bit/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4-unsloth-bnb-4bit", + "id": "unsloth/phi-4-unsloth-bnb-4bit", + "developer": "unsloth", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.483 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6794 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6791 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4034 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5286 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/phi-4/a50bf387-bf34-490f-979a-b6217a85a1bd.json b/data/hfopenllm_v2/unsloth/phi-4/a50bf387-bf34-490f-979a-b6217a85a1bd.json new file mode 100644 index 000000000..cbdc7e8b4 --- /dev/null +++ b/data/hfopenllm_v2/unsloth/phi-4/a50bf387-bf34-490f-979a-b6217a85a1bd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/unsloth_phi-4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "phi-4", + "id": "unsloth/phi-4", + "developer": "unsloth", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6882 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6886 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3364 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5378 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/89264aa0-3bed-41d3-b171-2a5434cc990f.json b/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/89264aa0-3bed-41d3-b171-2a5434cc990f.json new file mode 100644 index 000000000..efc06b49f --- /dev/null +++ b/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/89264aa0-3bed-41d3-b171-2a5434cc990f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-Instruct-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SOLAR-10.7B-Instruct-v1.0", + "id": "upstage/SOLAR-10.7B-Instruct-v1.0", + "developer": "upstage", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4737 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5162 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3138 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json b/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json deleted file mode 100644 index a1d095c63..000000000 --- a/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-Instruct-v1.0/1762652580.580213", - "retrieved_timestamp": "1762652580.58022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "upstage/SOLAR-10.7B-Instruct-v1.0", - "developer": "upstage", - "inference_platform": "unknown", - "id": "upstage/SOLAR-10.7B-Instruct-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736609972650345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162494941446991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3899375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31382978723404253 - } - } - ] -} diff --git a/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/a3272caf-a292-4dc7-8932-636a4099ca6b.json b/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/a3272caf-a292-4dc7-8932-636a4099ca6b.json new file mode 100644 index 000000000..711e8ad03 --- /dev/null +++ b/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/a3272caf-a292-4dc7-8932-636a4099ca6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SOLAR-10.7B-v1.0", + "id": "upstage/SOLAR-10.7B-v1.0", + "developer": "upstage", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2421 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4372 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.34 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json b/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json deleted file mode 100644 index 1bc55d1cb..000000000 --- a/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-v1.0/1762652580.5805068", - "retrieved_timestamp": "1762652580.580508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "upstage/SOLAR-10.7B-v1.0", - "developer": "upstage", - "inference_platform": "unknown", - "id": "upstage/SOLAR-10.7B-v1.0", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24212644671693329 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093873084711799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43715624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3400099734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json b/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json deleted file mode 100644 index 84146711d..000000000 --- a/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/upstage_solar-pro-preview-instruct/1762652580.5807302", - "retrieved_timestamp": "1762652580.580731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "upstage/solar-pro-preview-instruct", - "developer": "upstage", - "inference_platform": "unknown", - "id": "upstage/solar-pro-preview-instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "SolarForCausalLM", - "params_billions": 22.14 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8415814483348626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6816843051379534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52734375 - } - } - ] -} diff --git a/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/c4ade77e-628f-457d-bbe1-3e5a0cb19d04.json b/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/c4ade77e-628f-457d-bbe1-3e5a0cb19d04.json new file mode 100644 index 000000000..86a94ac3f --- /dev/null +++ b/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/c4ade77e-628f-457d-bbe1-3e5a0cb19d04.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/upstage_solar-pro-preview-instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "solar-pro-preview-instruct", + "id": "upstage/solar-pro-preview-instruct", + "developer": "upstage", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "SolarForCausalLM", + "params_billions": 22.14 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6817 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5273 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json b/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json deleted file mode 100644 index e9c579df3..000000000 --- a/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/utkmst_chimera-beta-test2-lora-merged/1762652580.581129", - "retrieved_timestamp": "1762652580.581131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "utkmst/chimera-beta-test2-lora-merged", - "developer": "utkmst", - "inference_platform": "unknown", - "id": "utkmst/chimera-beta-test2-lora-merged", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6054269338688014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47957156724192185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992021276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/b030646c-5f5c-43ab-bbc4-405f82992265.json b/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/b030646c-5f5c-43ab-bbc4-405f82992265.json new file mode 100644 index 000000000..794273d51 --- /dev/null +++ b/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/b030646c-5f5c-43ab-bbc4-405f82992265.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/utkmst_chimera-beta-test2-lora-merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chimera-beta-test2-lora-merged", + "id": "utkmst/chimera-beta-test2-lora-merged", + "developer": "utkmst", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6054 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4796 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3037 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4118 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2992 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-code-mistral-7b-v1.0/399e516c-d8c8-4511-a746-76c81f72b36a.json b/data/hfopenllm_v2/uukuguy/speechless-code-mistral-7b-v1.0/399e516c-d8c8-4511-a746-76c81f72b36a.json new file mode 100644 index 000000000..8a74f1d3d --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-code-mistral-7b-v1.0/399e516c-d8c8-4511-a746-76c81f72b36a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-code-mistral-7b-v1.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-code-mistral-7b-v1.0", + "id": "uukuguy/speechless-code-mistral-7b-v1.0", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3665 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0521 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-codellama-34b-v2.0/bd8e4424-7903-43e7-8105-269de734582e.json b/data/hfopenllm_v2/uukuguy/speechless-codellama-34b-v2.0/bd8e4424-7903-43e7-8105-269de734582e.json new file mode 100644 index 000000000..eea43c67b --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-codellama-34b-v2.0/bd8e4424-7903-43e7-8105-269de734582e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-codellama-34b-v2.0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-codellama-34b-v2.0", + "id": "uukuguy/speechless-codellama-34b-v2.0", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.0 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4604 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4813 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/9126e939-3a87-4774-9606-084c5b56e933.json b/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/9126e939-3a87-4774-9606-084c5b56e933.json new file mode 100644 index 000000000..9ccbd840a --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/9126e939-3a87-4774-9606-084c5b56e933.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-coder-ds-6.7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-coder-ds-6.7b", + "id": "uukuguy/speechless-coder-ds-6.7b", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.7 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0211 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1719 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json b/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json deleted file mode 100644 index 7fcfa4c58..000000000 --- a/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-coder-ds-6.7b/1762652580.582827", - "retrieved_timestamp": "1762652580.582828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-coder-ds-6.7b", - "developer": "uukuguy", - "inference_platform": "unknown", - "id": "uukuguy/speechless-coder-ds-6.7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.7 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25046986440422525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4036373344669979 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.171875 - } - } - ] -} diff --git a/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/be2ef197-738e-422d-9a88-cafd124584b7.json b/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/be2ef197-738e-422d-9a88-cafd124584b7.json new file mode 100644 index 000000000..06f2df151 --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/be2ef197-738e-422d-9a88-cafd124584b7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-instruct-mistral-7b-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-instruct-mistral-7b-v0.2", + "id": "uukuguy/speechless-instruct-mistral-7b-v0.2", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3261 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2819 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4902 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2902 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json b/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json deleted file mode 100644 index fab302799..000000000 --- a/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-instruct-mistral-7b-v0.2/1762652580.5831082", - "retrieved_timestamp": "1762652580.5831091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-instruct-mistral-7b-v0.2", - "developer": "uukuguy", - "inference_platform": "unknown", - "id": "uukuguy/speechless-instruct-mistral-7b-v0.2", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261324397044287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4606667950681749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901770833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902260638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/ee22e6c5-8529-4987-86d0-4abf3b525f90.json b/data/hfopenllm_v2/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/ee22e6c5-8529-4987-86d0-4abf3b525f90.json new file mode 100644 index 000000000..776e410d9 --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/ee22e6c5-8529-4987-86d0-4abf3b525f90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-llama2-hermes-orca-platypus-wizardlm-13b", + "id": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4562 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4846 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0204 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2701 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4655 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/50f0ddc2-fccd-447c-ab50-a086ccb4cd3a.json b/data/hfopenllm_v2/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/50f0ddc2-fccd-447c-ab50-a086ccb4cd3a.json new file mode 100644 index 000000000..c79782429 --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/50f0ddc2-fccd-447c-ab50-a086ccb4cd3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-mistral-dolphin-orca-platypus-samantha-7b", + "id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.37 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4983 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0295 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4361 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.299 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json b/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json deleted file mode 100644 index d47e8898e..000000000 --- a/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-zephyr-code-functionary-7b/1762652580.583915", - "retrieved_timestamp": "1762652580.583916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "uukuguy/speechless-zephyr-code-functionary-7b", - "developer": "uukuguy", - "inference_platform": "unknown", - "id": "uukuguy/speechless-zephyr-code-functionary-7b", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2695791610704043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46642753957194555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3094248670212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/83294141-a70f-40da-b3f8-21b367098cce.json b/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/83294141-a70f-40da-b3f8-21b367098cce.json new file mode 100644 index 000000000..947584201 --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/83294141-a70f-40da-b3f8-21b367098cce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-zephyr-code-functionary-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speechless-zephyr-code-functionary-7b", + "id": "uukuguy/speechless-zephyr-code-functionary-7b", + "developer": "uukuguy", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2696 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4664 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3003 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3094 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/303ae3d2-fdf5-404d-83ca-8e6071e13e6b.json b/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/303ae3d2-fdf5-404d-83ca-8e6071e13e6b.json new file mode 100644 index 000000000..ef4193f8a --- /dev/null +++ b/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/303ae3d2-fdf5-404d-83ca-8e6071e13e6b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/v000000_L3-8B-Stheno-v3.2-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3-8B-Stheno-v3.2-abliterated", + "id": "v000000/L3-8B-Stheno-v3.2-abliterated", + "developer": "v000000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6718 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5141 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0695 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3096 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.362 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3604 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json b/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json deleted file mode 100644 index 6551961e8..000000000 --- a/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/v000000_L3-8B-Stheno-v3.2-abliterated/1762652580.584157", - "retrieved_timestamp": "1762652580.584158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "v000000/L3-8B-Stheno-v3.2-abliterated", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/L3-8B-Stheno-v3.2-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6717720093795574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141439214918061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ] -} diff --git a/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/1b13d76d-259f-41f2-baba-ce96ef0cb937.json b/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/1b13d76d-259f-41f2-baba-ce96ef0cb937.json new file mode 100644 index 000000000..4b7d05df3 --- /dev/null +++ b/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/1b13d76d-259f-41f2-baba-ce96ef0cb937.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/v000000_L3.1-Niitorm-8B-DPO-t0.0001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Niitorm-8B-DPO-t0.0001", + "id": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", + "developer": "v000000", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7689 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5134 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1624 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.388 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3866 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json b/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json deleted file mode 100644 index f4b99b578..000000000 --- a/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/v000000_L3.1-Niitorm-8B-DPO-t0.0001/1762652580.5844421", - "retrieved_timestamp": "1762652580.5844429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7688666072687137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5134234526726582 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38663563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json b/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json deleted file mode 100644 index 66cb4d362..000000000 --- a/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/v000000_L3.1-Storniitova-8B/1762652580.584696", - "retrieved_timestamp": "1762652580.584697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "v000000/L3.1-Storniitova-8B", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/L3.1-Storniitova-8B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151452004311876 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/b644a420-0a70-4b3d-9a5a-ff91911c857b.json b/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/b644a420-0a70-4b3d-9a5a-ff91911c857b.json new file mode 100644 index 000000000..45240285c --- /dev/null +++ b/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/b644a420-0a70-4b3d-9a5a-ff91911c857b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/v000000_L3.1-Storniitova-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-Storniitova-8B", + "id": "v000000/L3.1-Storniitova-8B", + "developer": "v000000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7817 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5151 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1465 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4029 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3776 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/33aaa60f-eb69-4d36-917c-6862121a223e.json b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/33aaa60f-eb69-4d36-917c-6862121a223e.json new file mode 100644 index 000000000..c7a731adf --- /dev/null +++ b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/33aaa60f-eb69-4d36-917c-6862121a223e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Gutenberg-1e-Delta", + "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", + "developer": "v000000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5264 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4073 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.493 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json deleted file mode 100644 index 3330f13b7..000000000 --- a/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1762652580.585153", - "retrieved_timestamp": "1762652580.585153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8197493760998595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.639010174859259 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4923537234042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/a1d2e571-6de0-4bd7-bdcf-8b3921b450f6.json b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/a1d2e571-6de0-4bd7-bdcf-8b3921b450f6.json new file mode 100644 index 000000000..98637a74d --- /dev/null +++ b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/a1d2e571-6de0-4bd7-bdcf-8b3921b450f6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", + "id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", + "developer": "v000000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8197 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.639 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3314 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4924 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/Qwen2.5-Lumen-14B/ad93274e-3ca0-40cb-9f65-e6e6c66a8008.json b/data/hfopenllm_v2/v000000/Qwen2.5-Lumen-14B/ad93274e-3ca0-40cb-9f65-e6e6c66a8008.json new file mode 100644 index 000000000..13c590e0a --- /dev/null +++ b/data/hfopenllm_v2/v000000/Qwen2.5-Lumen-14B/ad93274e-3ca0-40cb-9f65-e6e6c66a8008.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-Lumen-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Lumen-14B", + "id": "v000000/Qwen2.5-Lumen-14B", + "developer": "v000000", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8064 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6391 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5363 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4114 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4903 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json b/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json deleted file mode 100644 index ee54edf7c..000000000 --- a/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/1762652580.585581", - "retrieved_timestamp": "1762652580.585582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", - "developer": "vhab10", - "inference_platform": "unknown", - "id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.290711977552893 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057443268070797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40106250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3621176861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/b8043d04-c3ab-4d6a-97eb-44b195a52710.json b/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/b8043d04-c3ab-4d6a-97eb-44b195a52710.json new file mode 100644 index 000000000..84ee49bc2 --- /dev/null +++ b/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/b8043d04-c3ab-4d6a-97eb-44b195a52710.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-Base-Instruct-SLERP", + "id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", + "developer": "vhab10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1201 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2961 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4011 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json b/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json deleted file mode 100644 index 8fa7af8d9..000000000 --- a/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.2-Instruct-3B-TIES/1762652580.585841", - "retrieved_timestamp": "1762652580.585842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vhab10/Llama-3.2-Instruct-3B-TIES", - "developer": "vhab10", - "inference_platform": "unknown", - "id": "vhab10/Llama-3.2-Instruct-3B-TIES", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43323649966514094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2915558510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/c6bff6da-382f-4423-ba3a-d987839132e0.json b/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/c6bff6da-382f-4423-ba3a-d987839132e0.json new file mode 100644 index 000000000..44b89e714 --- /dev/null +++ b/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/c6bff6da-382f-4423-ba3a-d987839132e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.2-Instruct-3B-TIES/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-Instruct-3B-TIES", + "id": "vhab10/Llama-3.2-Instruct-3B-TIES", + "developer": "vhab10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4727 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4332 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3497 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2916 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vhab10/llama-3-8b-merged-linear/f3574ad1-a6d7-47fb-86e7-69c256452dea.json b/data/hfopenllm_v2/vhab10/llama-3-8b-merged-linear/f3574ad1-a6d7-47fb-86e7-69c256452dea.json new file mode 100644 index 000000000..a70dfa80f --- /dev/null +++ b/data/hfopenllm_v2/vhab10/llama-3-8b-merged-linear/f3574ad1-a6d7-47fb-86e7-69c256452dea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vhab10_llama-3-8b-merged-linear/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-merged-linear", + "id": "vhab10/llama-3-8b-merged-linear", + "developer": "vhab10", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.65 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5917 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4937 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json deleted file mode 100644 index 9d2373a6c..000000000 --- a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B-truthy/1762652580.586528", - "retrieved_timestamp": "1762652580.586528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/CarbonBeagle-11B-truthy", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/CarbonBeagle-11B-truthy", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212214701436633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5348420085288232 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37396874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.335688164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/f2e47267-6c40-4d70-8420-295c95b318f3.json b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/f2e47267-6c40-4d70-8420-295c95b318f3.json new file mode 100644 index 000000000..702223e17 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/f2e47267-6c40-4d70-8420-295c95b318f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B-truthy/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CarbonBeagle-11B-truthy", + "id": "vicgalle/CarbonBeagle-11B-truthy", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5212 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5348 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0491 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2995 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3357 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/395f246e-34c6-40e6-bfeb-b047aa12cf90.json b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/395f246e-34c6-40e6-bfeb-b047aa12cf90.json new file mode 100644 index 000000000..d8ca9e059 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/395f246e-34c6-40e6-bfeb-b047aa12cf90.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CarbonBeagle-11B", + "id": "vicgalle/CarbonBeagle-11B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5415 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5294 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.402 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3276 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json deleted file mode 100644 index 57b8329d0..000000000 --- a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B/1762652580.5862951", - "retrieved_timestamp": "1762652580.5862951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/CarbonBeagle-11B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/CarbonBeagle-11B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415298075772285 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5293652486530874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32762632978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/3a91f8bb-c132-45b3-b8b4-d2ecc9f03f3a.json b/data/hfopenllm_v2/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/3a91f8bb-c132-45b3-b8b4-d2ecc9f03f3a.json new file mode 100644 index 000000000..ea5259667 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/3a91f8bb-c132-45b3-b8b4-d2ecc9f03f3a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Configurable-Hermes-2-Pro-Llama-3-8B", + "id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5763 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5055 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0763 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.297 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4184 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json b/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json deleted file mode 100644 index ab826d396..000000000 --- a/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Llama-3.1-8B-Instruct/1762652580.586963", - "retrieved_timestamp": "1762652580.586964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Configurable-Llama-3.1-8B-Instruct", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/Configurable-Llama-3.1-8B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8312399987588488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5044756225072481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3592087765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/97c92043-9bed-460a-8d7b-70ab3584c75b.json b/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/97c92043-9bed-460a-8d7b-70ab3584c75b.json new file mode 100644 index 000000000..86af2304f --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/97c92043-9bed-460a-8d7b-70ab3584c75b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Llama-3.1-8B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Configurable-Llama-3.1-8B-Instruct", + "id": "vicgalle/Configurable-Llama-3.1-8B-Instruct", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8312 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.173 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3592 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json b/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json deleted file mode 100644 index 7b26c705a..000000000 --- a/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Yi-1.5-9B-Chat/1762652580.587164", - "retrieved_timestamp": "1762652580.5871649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Configurable-Yi-1.5-9B-Chat", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/Configurable-Yi-1.5-9B-Chat", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43234506664538974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452196737175008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42711458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015126329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/ab2ce171-bfcf-49ea-a341-2a52b2bd803a.json b/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/ab2ce171-bfcf-49ea-a341-2a52b2bd803a.json new file mode 100644 index 000000000..263e7fc5d --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/ab2ce171-bfcf-49ea-a341-2a52b2bd803a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Yi-1.5-9B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Configurable-Yi-1.5-9B-Chat", + "id": "vicgalle/Configurable-Yi-1.5-9B-Chat", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5452 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4271 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4015 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json b/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json deleted file mode 100644 index d8b504872..000000000 --- a/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableBeagle-11B/1762652580.587369", - "retrieved_timestamp": "1762652580.58737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/ConfigurableBeagle-11B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/ConfigurableBeagle-11B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5834452585805663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286592318626696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39530208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33743351063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/f9bbd9cc-dc6a-466f-b777-eaea4a15b874.json b/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/f9bbd9cc-dc6a-466f-b777-eaea4a15b874.json new file mode 100644 index 000000000..562396548 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/f9bbd9cc-dc6a-466f-b777-eaea4a15b874.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableBeagle-11B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ConfigurableBeagle-11B", + "id": "vicgalle/ConfigurableBeagle-11B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5834 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5287 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json b/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json deleted file mode 100644 index 5d4e80ecc..000000000 --- a/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableHermes-7B/1762652580.5875661", - "retrieved_timestamp": "1762652580.587567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/ConfigurableHermes-7B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/ConfigurableHermes-7B", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410798902467675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4572969627830424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4056875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3025265957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/cd0aefa3-b0c9-4683-872f-f9f9d285e6c3.json b/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/cd0aefa3-b0c9-4683-872f-f9f9d285e6c3.json new file mode 100644 index 000000000..4374da041 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/cd0aefa3-b0c9-4683-872f-f9f9d285e6c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableHermes-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ConfigurableHermes-7B", + "id": "vicgalle/ConfigurableHermes-7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5411 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4573 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0476 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2768 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4057 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3025 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json b/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json deleted file mode 100644 index 475cf04db..000000000 --- a/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableSOLAR-10.7B/1762652580.587757", - "retrieved_timestamp": "1762652580.587758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/ConfigurableSOLAR-10.7B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/ConfigurableSOLAR-10.7B", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5099558061499045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48668100977360457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38047916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31732047872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/c42db2ab-dbc4-48e4-9c16-7b8a5f8492c3.json b/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/c42db2ab-dbc4-48e4-9c16-7b8a5f8492c3.json new file mode 100644 index 000000000..3fecdf2c2 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/c42db2ab-dbc4-48e4-9c16-7b8a5f8492c3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableSOLAR-10.7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ConfigurableSOLAR-10.7B", + "id": "vicgalle/ConfigurableSOLAR-10.7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.51 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4867 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0665 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3805 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3173 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Humanish-RP-Llama-3.1-8B/1b32c387-97a7-42ff-892c-d3bacebbf050.json b/data/hfopenllm_v2/vicgalle/Humanish-RP-Llama-3.1-8B/1b32c387-97a7-42ff-892c-d3bacebbf050.json new file mode 100644 index 000000000..99c2ecb52 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Humanish-RP-Llama-3.1-8B/1b32c387-97a7-42ff-892c-d3bacebbf050.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Humanish-RP-Llama-3.1-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Humanish-RP-Llama-3.1-8B", + "id": "vicgalle/Humanish-RP-Llama-3.1-8B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6669 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.51 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1518 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2869 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3952 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3477 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Merge-Mistral-Prometheus-7B/cbea057c-b0f9-48ac-a075-eb28ebbaf358.json b/data/hfopenllm_v2/vicgalle/Merge-Mistral-Prometheus-7B/cbea057c-b0f9-48ac-a075-eb28ebbaf358.json new file mode 100644 index 000000000..f60131bd5 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Merge-Mistral-Prometheus-7B/cbea057c-b0f9-48ac-a075-eb28ebbaf358.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mistral-Prometheus-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Merge-Mistral-Prometheus-7B", + "id": "vicgalle/Merge-Mistral-Prometheus-7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4848 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4201 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0181 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.41 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2717 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/0b1bb876-9dc7-47d5-855a-f028fb7f2df6.json b/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/0b1bb876-9dc7-47d5-855a-f028fb7f2df6.json new file mode 100644 index 000000000..956f520a3 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/0b1bb876-9dc7-47d5-855a-f028fb7f2df6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mixtral-Prometheus-8x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Merge-Mixtral-Prometheus-8x7B", + "id": "vicgalle/Merge-Mixtral-Prometheus-8x7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5744 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5351 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0929 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3087 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3684 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json b/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json deleted file mode 100644 index 0daff9855..000000000 --- a/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mixtral-Prometheus-8x7B/1762652580.588394", - "retrieved_timestamp": "1762652580.588395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vicgalle/Merge-Mixtral-Prometheus-8x7B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/Merge-Mixtral-Prometheus-8x7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5744025851407598 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351498071096573 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683510638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/vicgalle/Roleplay-Llama-3-8B/a86678ad-344c-430f-80c7-02d634b0cd5b.json b/data/hfopenllm_v2/vicgalle/Roleplay-Llama-3-8B/a86678ad-344c-430f-80c7-02d634b0cd5b.json new file mode 100644 index 000000000..3b3b5675b --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Roleplay-Llama-3-8B/a86678ad-344c-430f-80c7-02d634b0cd5b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vicgalle_Roleplay-Llama-3-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Roleplay-Llama-3-8B", + "id": "vicgalle/Roleplay-Llama-3-8B", + "developer": "vicgalle", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.732 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5012 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0914 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3529 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/viettelsecurity-ai/security-llama3.2-3b/827f3236-74fa-432b-8177-8785ac25ad76.json b/data/hfopenllm_v2/viettelsecurity-ai/security-llama3.2-3b/827f3236-74fa-432b-8177-8785ac25ad76.json new file mode 100644 index 000000000..088a8c8aa --- /dev/null +++ b/data/hfopenllm_v2/viettelsecurity-ai/security-llama3.2-3b/827f3236-74fa-432b-8177-8785ac25ad76.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/viettelsecurity-ai_security-llama3.2-3b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "security-llama3.2-3b", + "id": "viettelsecurity-ai/security-llama3.2-3b", + "developer": "viettelsecurity-ai", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1261 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3379 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2837 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json b/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json deleted file mode 100644 index 6c89c4e01..000000000 --- a/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vihangd_smart-dan-sft-v0.1/1762652580.589078", - "retrieved_timestamp": "1762652580.5890791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vihangd/smart-dan-sft-v0.1", - "developer": "vihangd", - "inference_platform": "unknown", - "id": "vihangd/smart-dan-sft-v0.1", - "additional_details": { - "precision": "4bit", - "architecture": "LlamaForCausalLM", - "params_billions": 0.379 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15764615664215392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30617689187138886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ] -} diff --git a/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/7f694687-77e5-41d2-923b-f2d5f231729b.json b/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/7f694687-77e5-41d2-923b-f2d5f231729b.json new file mode 100644 index 000000000..ee3f7a212 --- /dev/null +++ b/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/7f694687-77e5-41d2-923b-f2d5f231729b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vihangd_smart-dan-sft-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smart-dan-sft-v0.1", + "id": "vihangd/smart-dan-sft-v0.1", + "developer": "vihangd", + "inference_platform": "unknown", + "additional_details": { + "precision": "4bit", + "architecture": "LlamaForCausalLM", + "params_billions": 0.379 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3062 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0098 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.255 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1142 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json b/data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json deleted file mode 100644 index df8dfdb16..000000000 --- a/data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/voidful_smol-360m-ft/1762652580.589319", - "retrieved_timestamp": "1762652580.58932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "voidful/smol-360m-ft", - "developer": "voidful", - "inference_platform": "unknown", - "id": "voidful/smol-360m-ft", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2013103011121602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011946898842932 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10871010638297872 - } - } - ] -} diff --git a/data/hfopenllm_v2/voidful/smol-360m-ft/daa9d03e-63b0-4c08-ae72-e11041200ac7.json b/data/hfopenllm_v2/voidful/smol-360m-ft/daa9d03e-63b0-4c08-ae72-e11041200ac7.json new file mode 100644 index 000000000..b88c7efbe --- /dev/null +++ b/data/hfopenllm_v2/voidful/smol-360m-ft/daa9d03e-63b0-4c08-ae72-e11041200ac7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/voidful_smol-360m-ft/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "smol-360m-ft", + "id": "voidful/smol-360m-ft", + "developer": "voidful", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2013 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0083 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2458 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3714 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1087 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/1539822f-acc4-4dae-9e61-133da97ebcbe.json b/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/1539822f-acc4-4dae-9e61-133da97ebcbe.json new file mode 100644 index 000000000..3d05087f3 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/1539822f-acc4-4dae-9e61-133da97ebcbe.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_MobileLLM-125M-HF/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MobileLLM-125M-HF", + "id": "vonjack/MobileLLM-125M-HF", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.125 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3027 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0091 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3782 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1164 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json b/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json deleted file mode 100644 index aeff79914..000000000 --- a/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_MobileLLM-125M-HF/1762652580.589566", - "retrieved_timestamp": "1762652580.589567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/MobileLLM-125M-HF", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/MobileLLM-125M-HF", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.125 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21072753627042912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30272988561565645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37818749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json b/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json deleted file mode 100644 index 13eb42c41..000000000 --- a/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/1762652580.589802", - "retrieved_timestamp": "1762652580.589803", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5787488308798432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5740684031598843 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3923541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885472074468085 - } - } - ] -} diff --git a/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/eec80fda-ce2f-4ef4-94d3-9e7b90f7f2e5.json b/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/eec80fda-ce2f-4ef4-94d3-9e7b90f7f2e5.json new file mode 100644 index 000000000..adf71cb9a --- /dev/null +++ b/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/eec80fda-ce2f-4ef4-94d3-9e7b90f7f2e5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3-mini-4k-instruct-LLaMAfied", + "id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5787 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5741 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1382 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3305 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3924 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3885 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json b/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json deleted file mode 100644 index 977b23397..000000000 --- a/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/1762652580.5900009", - "retrieved_timestamp": "1762652580.5900018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 4.132 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14158432957885078 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29747555432824196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40413541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11386303191489362 - } - } - ] -} diff --git a/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/448cac5f-a7d3-41fb-9b49-666758037eb4.json b/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/448cac5f-a7d3-41fb-9b49-666758037eb4.json new file mode 100644 index 000000000..745062dbb --- /dev/null +++ b/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/448cac5f-a7d3-41fb-9b49-666758037eb4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Phi-3.5-mini-instruct-hermes-fc-json", + "id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 4.132 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1416 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2975 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0076 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2542 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4041 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1139 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/Qwen2.5-Coder-0.5B-Merged/5d7c5ac1-84c3-4fd1-ac51-4c00ed8c59c7.json b/data/hfopenllm_v2/vonjack/Qwen2.5-Coder-0.5B-Merged/5d7c5ac1-84c3-4fd1-ac51-4c00ed8c59c7.json new file mode 100644 index 000000000..7da1c30b9 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/Qwen2.5-Coder-0.5B-Merged/5d7c5ac1-84c3-4fd1-ac51-4c00ed8c59c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_Qwen2.5-Coder-0.5B-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-Coder-0.5B-Merged", + "id": "vonjack/Qwen2.5-Coder-0.5B-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3076 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0378 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3303 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1202 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/7e1741cc-f9ea-4940-9b6b-d7a515cfce31.json b/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/7e1741cc-f9ea-4940-9b6b-d7a515cfce31.json new file mode 100644 index 000000000..a43b36871 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/7e1741cc-f9ea-4940-9b6b-d7a515cfce31.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-1.7B-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-1.7B-Merged", + "id": "vonjack/SmolLM2-1.7B-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3698 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0627 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3408 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2048 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json b/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json deleted file mode 100644 index 2cb064ca3..000000000 --- a/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-1.7B-Merged/1762652580.5904331", - "retrieved_timestamp": "1762652580.590434", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/SmolLM2-1.7B-Merged", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/SmolLM2-1.7B-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36979658417443495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3586553457965105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2047872340425532 - } - } - ] -} diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json b/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json deleted file mode 100644 index 0631717dc..000000000 --- a/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-135M-Merged/1762652580.590627", - "retrieved_timestamp": "1762652580.590627", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/SmolLM2-135M-Merged", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/SmolLM2-135M-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24829674153468353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3099931265410582 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36618749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ] -} diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/ec4d21be-b1a6-47a9-84a4-1a25249c1768.json b/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/ec4d21be-b1a6-47a9-84a4-1a25249c1768.json new file mode 100644 index 000000000..409a63ac3 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/ec4d21be-b1a6-47a9-84a4-1a25249c1768.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-135M-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-135M-Merged", + "id": "vonjack/SmolLM2-135M-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2483 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.31 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0113 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2383 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3662 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1112 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/c6b03539-04b3-4ef2-909d-8036a7ea2ae1.json b/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/c6b03539-04b3-4ef2-909d-8036a7ea2ae1.json new file mode 100644 index 000000000..0d45953b7 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/c6b03539-04b3-4ef2-909d-8036a7ea2ae1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-360M-Merged/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SmolLM2-360M-Merged", + "id": "vonjack/SmolLM2-360M-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3206 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3155 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0174 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2559 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3527 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json b/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json deleted file mode 100644 index bc1b358b7..000000000 --- a/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-360M-Merged/1762652580.590822", - "retrieved_timestamp": "1762652580.590823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "vonjack/SmolLM2-360M-Merged", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/SmolLM2-360M-Merged", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32058715319795916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31548533684955926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json b/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json deleted file mode 100644 index 583b4ed4b..000000000 --- a/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/1762652580.5912771", - "retrieved_timestamp": "1762652580.591278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", - "developer": "w4r10ck", - "inference_platform": "unknown", - "id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38840609582574237 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301525050503222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4639479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343583776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/f156ac38-056e-4ef1-bdbe-e83c299a683b.json b/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/f156ac38-056e-4ef1-bdbe-e83c299a683b.json new file mode 100644 index 000000000..1c4c50f83 --- /dev/null +++ b/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/f156ac38-056e-4ef1-bdbe-e83c299a683b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SOLAR-10.7B-Instruct-v1.0-uncensored", + "id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", + "developer": "w4r10ck", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3884 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5302 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4639 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3344 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/11d3c8db-300c-4e02-b729-7adba6844ad2.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/11d3c8db-300c-4e02-b729-7adba6844ad2.json new file mode 100644 index 000000000..11b77c784 --- /dev/null +++ b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/11d3c8db-300c-4e02-b729-7adba6844ad2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "li-14b-v0.4-slerp", + "id": "wanlige/li-14b-v0.4-slerp", + "developer": "wanlige", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4606 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6587 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4192 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4002 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json deleted file mode 100644 index 71b244834..000000000 --- a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp/1762652580.591778", - "retrieved_timestamp": "1762652580.591778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "wanlige/li-14b-v0.4-slerp", - "developer": "wanlige", - "inference_platform": "unknown", - "id": "wanlige/li-14b-v0.4-slerp", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4605967721201967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6587180444175935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41918429003021146 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5372340425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json deleted file mode 100644 index e50e0f029..000000000 --- a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp0.1/1762652580.5919738", - "retrieved_timestamp": "1762652580.591975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "wanlige/li-14b-v0.4-slerp0.1", - "developer": "wanlige", - "inference_platform": "unknown", - "id": "wanlige/li-14b-v0.4-slerp0.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7922722819895655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6571741435852609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294215425531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/fc75a820-fc0b-4e50-9304-61f0e93795c0.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/fc75a820-fc0b-4e50-9304-61f0e93795c0.json new file mode 100644 index 000000000..c132d7290 --- /dev/null +++ b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/fc75a820-fc0b-4e50-9304-61f0e93795c0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "li-14b-v0.4-slerp0.1", + "id": "wanlige/li-14b-v0.4-slerp0.1", + "developer": "wanlige", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7923 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6572 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3591 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4207 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5294 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json deleted file mode 100644 index fa8d2da85..000000000 --- a/data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4/1762652580.591545", - "retrieved_timestamp": "1762652580.591546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "wanlige/li-14b-v0.4", - "developer": "wanlige", - "inference_platform": "unknown", - "id": "wanlige/li-14b-v0.4", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.813279875175645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544457993364277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.446 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167054521276596 - } - } - ] -} diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4/bb66896f-799c-4e17-8b54-af5e795699fa.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4/bb66896f-799c-4e17-8b54-af5e795699fa.json new file mode 100644 index 000000000..3c975e261 --- /dev/null +++ b/data/hfopenllm_v2/wanlige/li-14b-v0.4/bb66896f-799c-4e17-8b54-af5e795699fa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "li-14b-v0.4", + "id": "wanlige/li-14b-v0.4", + "developer": "wanlige", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8133 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6544 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5574 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3389 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5167 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/30a1a786-7478-401f-85ae-57037ada3d32.json b/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/30a1a786-7478-401f-85ae-57037ada3d32.json new file mode 100644 index 000000000..2276ead59 --- /dev/null +++ b/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/30a1a786-7478-401f-85ae-57037ada3d32.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/wannaphong_KhanomTanLLM-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KhanomTanLLM-Instruct", + "id": "wannaphong/KhanomTanLLM-Instruct", + "developer": "wannaphong", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.447 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0136 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2634 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1119 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json b/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json deleted file mode 100644 index 9dbe5bed8..000000000 --- a/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/wannaphong_KhanomTanLLM-Instruct/1762652580.59218", - "retrieved_timestamp": "1762652580.59218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "wannaphong/KhanomTanLLM-Instruct", - "developer": "wannaphong", - "inference_platform": "unknown", - "id": "wannaphong/KhanomTanLLM-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.447 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16211762567764643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30931233392513263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37006249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/05430b16-07b6-41a1-ade9-6211cdf8ccf1.json b/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/05430b16-07b6-41a1-ade9-6211cdf8ccf1.json new file mode 100644 index 000000000..7fbd3aeba --- /dev/null +++ b/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/05430b16-07b6-41a1-ade9-6211cdf8ccf1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/waqasali1707_Beast-Soul-new/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Beast-Soul-new", + "id": "waqasali1707/Beast-Soul-new", + "developer": "waqasali1707", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.503 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5225 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0702 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4486 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3108 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json b/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json deleted file mode 100644 index d55a4c7ad..000000000 --- a/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/waqasali1707_Beast-Soul-new/1762652580.592428", - "retrieved_timestamp": "1762652580.592428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "waqasali1707/Beast-Soul-new", - "developer": "waqasali1707", - "inference_platform": "unknown", - "id": "waqasali1707/Beast-Soul-new", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029865202108184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522494907014536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4485625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ] -} diff --git a/data/hfopenllm_v2/wave-on-discord/qwent-7b/09bc4d5a-f104-4a36-999c-11e2532eef1e.json b/data/hfopenllm_v2/wave-on-discord/qwent-7b/09bc4d5a-f104-4a36-999c-11e2532eef1e.json new file mode 100644 index 000000000..3152ab264 --- /dev/null +++ b/data/hfopenllm_v2/wave-on-discord/qwent-7b/09bc4d5a-f104-4a36-999c-11e2532eef1e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/wave-on-discord_qwent-7b/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwent-7b", + "id": "wave-on-discord/qwent-7b", + "developer": "wave-on-discord", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2015 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0038 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2651 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1603 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json b/data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json deleted file mode 100644 index 7b9e7219c..000000000 --- a/data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3B-500/1762652580.593058", - "retrieved_timestamp": "1762652580.593059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "weathermanj/Menda-3B-500", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3B-500", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6353021095138676 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4766312519942703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/weathermanj/Menda-3B-500/a92cfff6-6caf-4bf1-913a-9d7dd2d8d449.json b/data/hfopenllm_v2/weathermanj/Menda-3B-500/a92cfff6-6caf-4bf1-913a-9d7dd2d8d449.json new file mode 100644 index 000000000..cc66f0e58 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3B-500/a92cfff6-6caf-4bf1-913a-9d7dd2d8d449.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3B-500/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Menda-3B-500", + "id": "weathermanj/Menda-3B-500", + "developer": "weathermanj", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6353 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4766 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3724 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3968 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3475 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-750/8972e92c-ebbe-4dc4-8a8c-6f7a42ab5c11.json b/data/hfopenllm_v2/weathermanj/Menda-3b-750/8972e92c-ebbe-4dc4-8a8c-6f7a42ab5c11.json new file mode 100644 index 000000000..ed1d22611 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3b-750/8972e92c-ebbe-4dc4-8a8c-6f7a42ab5c11.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-750/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Menda-3b-750", + "id": "weathermanj/Menda-3b-750", + "developer": "weathermanj", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6335 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4737 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3942 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3506 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json b/data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json deleted file mode 100644 index 87b072542..000000000 --- a/data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-750/1762652580.593308", - "retrieved_timestamp": "1762652580.593309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "weathermanj/Menda-3b-750", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3b-750", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6335035483627884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736825577251204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39418749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3505651595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json deleted file mode 100644 index 048311d8e..000000000 --- a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-100/1762652580.5935092", - "retrieved_timestamp": "1762652580.59351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "weathermanj/Menda-3b-Optim-100", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3b-Optim-100", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6398234462337709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47348022177793836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39930208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460771276595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e4f39815-9704-4d0a-8d9b-39359367adcc.json b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e4f39815-9704-4d0a-8d9b-39359367adcc.json new file mode 100644 index 000000000..d3fdfc367 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e4f39815-9704-4d0a-8d9b-39359367adcc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-100/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Menda-3b-Optim-100", + "id": "weathermanj/Menda-3b-Optim-100", + "developer": "weathermanj", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6398 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4735 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3461 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json deleted file mode 100644 index bc7ef7b65..000000000 --- a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-200/1762652580.5937102", - "retrieved_timestamp": "1762652580.5937111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "weathermanj/Menda-3b-Optim-200", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3b-Optim-200", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6374752323834094 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47460604908284837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731117824773414 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3484042553191489 - } - } - ] -} diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/f40df456-eb9a-46f8-8fb0-b6ad2748f3c2.json b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/f40df456-eb9a-46f8-8fb0-b6ad2748f3c2.json new file mode 100644 index 000000000..dc2afd5e8 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/f40df456-eb9a-46f8-8fb0-b6ad2748f3c2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-200/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Menda-3b-Optim-200", + "id": "weathermanj/Menda-3b-Optim-200", + "developer": "weathermanj", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4746 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3731 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2827 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4033 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3484 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json b/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json deleted file mode 100644 index 7ccfd02ef..000000000 --- a/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_ArliAI-RPMax-v1.3-merge-13.3B/1762652580.593927", - "retrieved_timestamp": "1762652580.5939279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/ArliAI-RPMax-v1.3-merge-13.3B", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/ArliAI-RPMax-v1.3-merge-13.3B", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.265 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038260703821416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4581388671914119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31998005319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/398996d9-299b-4120-a757-e2fe14e779ee.json b/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/398996d9-299b-4120-a757-e2fe14e779ee.json new file mode 100644 index 000000000..8283795f0 --- /dev/null +++ b/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/398996d9-299b-4120-a757-e2fe14e779ee.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_ArliAI-RPMax-v1.3-merge-13.3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArliAI-RPMax-v1.3-merge-13.3B", + "id": "win10/ArliAI-RPMax-v1.3-merge-13.3B", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.265 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3038 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4581 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4325 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.32 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/4398633e-77b0-4b61-ae85-29b0e5aad38b.json b/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/4398633e-77b0-4b61-ae85-29b0e5aad38b.json new file mode 100644 index 000000000..4e29ec855 --- /dev/null +++ b/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/4398633e-77b0-4b61-ae85-29b0e5aad38b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_Breeze-13B-32k-Instruct-v1_0/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Breeze-13B-32k-Instruct-v1_0", + "id": "win10/Breeze-13B-32k-Instruct-v1_0", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.726 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3584 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4611 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0128 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2643 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2568 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json b/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json deleted file mode 100644 index c4ca07999..000000000 --- a/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_Breeze-13B-32k-Instruct-v1_0/1762652580.5941818", - "retrieved_timestamp": "1762652580.594183", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/Breeze-13B-32k-Instruct-v1_0", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/Breeze-13B-32k-Instruct-v1_0", - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.726 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35843118481185476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46112304746712934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2568151595744681 - } - } - ] -} diff --git a/data/hfopenllm_v2/win10/EVA-Norns-Qwen2.5-v0.1/1bc60148-512f-4830-b541-f30535cf74bf.json b/data/hfopenllm_v2/win10/EVA-Norns-Qwen2.5-v0.1/1bc60148-512f-4830-b541-f30535cf74bf.json new file mode 100644 index 000000000..2e96decff --- /dev/null +++ b/data/hfopenllm_v2/win10/EVA-Norns-Qwen2.5-v0.1/1bc60148-512f-4830-b541-f30535cf74bf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_EVA-Norns-Qwen2.5-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-Norns-Qwen2.5-v0.1", + "id": "win10/EVA-Norns-Qwen2.5-v0.1", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.622 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5072 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2613 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4045 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3425 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/a9dfb20a-13e0-4419-a747-7c001b2e9435.json b/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/a9dfb20a-13e0-4419-a747-7c001b2e9435.json new file mode 100644 index 000000000..e291ffe11 --- /dev/null +++ b/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/a9dfb20a-13e0-4419-a747-7c001b2e9435.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_Llama-3.2-3B-Instruct-24-9-29/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.2-3B-Instruct-24-9-29", + "id": "win10/Llama-3.2-3B-Instruct-24-9-29", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7332 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4614 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1707 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3228 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json b/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json deleted file mode 100644 index 6fc6cefc9..000000000 --- a/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_Llama-3.2-3B-Instruct-24-9-29/1762652580.594629", - "retrieved_timestamp": "1762652580.59463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/Llama-3.2-3B-Instruct-24-9-29", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/Llama-3.2-3B-Instruct-24-9-29", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332211864519476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4614234982167829 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3228058510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/win10/Norns-Qwen2.5-12B/388e3559-a3b6-4738-9843-9bdd048bae09.json b/data/hfopenllm_v2/win10/Norns-Qwen2.5-12B/388e3559-a3b6-4738-9843-9bdd048bae09.json new file mode 100644 index 000000000..f5e98a734 --- /dev/null +++ b/data/hfopenllm_v2/win10/Norns-Qwen2.5-12B/388e3559-a3b6-4738-9843-9bdd048bae09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-12B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norns-Qwen2.5-12B", + "id": "win10/Norns-Qwen2.5-12B", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 12.277 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4897 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4619 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3555 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.266 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Norns-Qwen2.5-7B/994a6930-42d5-463a-9e7c-0a3070144211.json b/data/hfopenllm_v2/win10/Norns-Qwen2.5-7B/994a6930-42d5-463a-9e7c-0a3070144211.json new file mode 100644 index 000000000..a0a55bfe7 --- /dev/null +++ b/data/hfopenllm_v2/win10/Norns-Qwen2.5-7B/994a6930-42d5-463a-9e7c-0a3070144211.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norns-Qwen2.5-7B", + "id": "win10/Norns-Qwen2.5-7B", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6122 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2628 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2844 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4085 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3413 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json b/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json deleted file mode 100644 index 19e8be69c..000000000 --- a/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_Qwen2.5-2B-Instruct/1762652580.5952861", - "retrieved_timestamp": "1762652580.595287", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/Qwen2.5-2B-Instruct", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/Qwen2.5-2B-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.9 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22728914834860392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705905854806977 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43784375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19340093085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/cce46320-9794-443a-831a-92e2a21515b0.json b/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/cce46320-9794-443a-831a-92e2a21515b0.json new file mode 100644 index 000000000..0e10fdee0 --- /dev/null +++ b/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/cce46320-9794-443a-831a-92e2a21515b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_Qwen2.5-2B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-2B-Instruct", + "id": "win10/Qwen2.5-2B-Instruct", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.9 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2273 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3706 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0227 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2676 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1934 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json b/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json deleted file mode 100644 index d60fd3caa..000000000 --- a/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_llama3-13.45b-Instruct/1762652580.595499", - "retrieved_timestamp": "1762652580.5955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/llama3-13.45b-Instruct", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/llama3-13.45b-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.265 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144348107465968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486541523346714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38476041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345246010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/988f4cc0-ebfb-43a9-8a7f-3dd1f1c1e342.json b/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/988f4cc0-ebfb-43a9-8a7f-3dd1f1c1e342.json new file mode 100644 index 000000000..4c991522b --- /dev/null +++ b/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/988f4cc0-ebfb-43a9-8a7f-3dd1f1c1e342.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_llama3-13.45b-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-13.45b-Instruct", + "id": "win10/llama3-13.45b-Instruct", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.265 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4144 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4865 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3848 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/miscii-14b-1M-0128/3c675148-5d09-4778-baad-9295ef8cfc79.json b/data/hfopenllm_v2/win10/miscii-14b-1M-0128/3c675148-5d09-4778-baad-9295ef8cfc79.json new file mode 100644 index 000000000..be52a6670 --- /dev/null +++ b/data/hfopenllm_v2/win10/miscii-14b-1M-0128/3c675148-5d09-4778-baad-9295ef8cfc79.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/win10_miscii-14b-1M-0128/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "miscii-14b-1M-0128", + "id": "win10/miscii-14b-1M-0128", + "developer": "win10", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4181 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5742 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4773 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3826 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4491 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json b/data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json deleted file mode 100644 index 142295669..000000000 --- a/data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/win10_miscii-14b-1M-0128/1762652580.5956988", - "retrieved_timestamp": "1762652580.5957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "win10/miscii-14b-1M-0128", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/miscii-14b-1M-0128", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4180818007331658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5741994518517665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773413897280967 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44913563829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/winglian/Llama-3-8b-64k-PoSE/620b80ba-81ab-4504-9f42-4965014f3cd1.json b/data/hfopenllm_v2/winglian/Llama-3-8b-64k-PoSE/620b80ba-81ab-4504-9f42-4965014f3cd1.json new file mode 100644 index 000000000..6c33d53ee --- /dev/null +++ b/data/hfopenllm_v2/winglian/Llama-3-8b-64k-PoSE/620b80ba-81ab-4504-9f42-4965014f3cd1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/winglian_Llama-3-8b-64k-PoSE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8b-64k-PoSE", + "id": "winglian/Llama-3-8b-64k-PoSE", + "developer": "winglian", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2857 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0415 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3396 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2467 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/winglian/llama-3-8b-256k-PoSE/b6c68fc1-c2c1-4cdf-91ef-2007becd7ade.json b/data/hfopenllm_v2/winglian/llama-3-8b-256k-PoSE/b6c68fc1-c2c1-4cdf-91ef-2007becd7ade.json new file mode 100644 index 000000000..e3d0f5b20 --- /dev/null +++ b/data/hfopenllm_v2/winglian/llama-3-8b-256k-PoSE/b6c68fc1-c2c1-4cdf-91ef-2007becd7ade.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/winglian_llama-3-8b-256k-PoSE/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-256k-PoSE", + "id": "winglian/llama-3-8b-256k-PoSE", + "developer": "winglian", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2909 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3157 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0196 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2576 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3316 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1116 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wzhouad/gemma-2-9b-it-WPO-HB/19279c18-c2f7-4f75-a9c5-a121b2d4bcff.json b/data/hfopenllm_v2/wzhouad/gemma-2-9b-it-WPO-HB/19279c18-c2f7-4f75-a9c5-a121b2d4bcff.json new file mode 100644 index 000000000..7af55e4ae --- /dev/null +++ b/data/hfopenllm_v2/wzhouad/gemma-2-9b-it-WPO-HB/19279c18-c2f7-4f75-a9c5-a121b2d4bcff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/wzhouad_gemma-2-9b-it-WPO-HB/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-WPO-HB", + "id": "wzhouad/gemma-2-9b-it-WPO-HB", + "developer": "wzhouad", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5437 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5629 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1533 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.336 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/7966789d-8ace-4b39-9093-96bbb8e641d8.json b/data/hfopenllm_v2/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/7966789d-8ace-4b39-9093-96bbb8e641d8.json new file mode 100644 index 000000000..1d76659b4 --- /dev/null +++ b/data/hfopenllm_v2/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/7966789d-8ace-4b39-9093-96bbb8e641d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deepseek-Lumen-R1-Qwen2.5-14B", + "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", + "developer": "x0000001", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4436 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4569 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2779 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2852 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.474 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json b/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json deleted file mode 100644 index f68994089..000000000 --- a/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xMaulana_FinMatcha-3B-Instruct/1762652580.5969138", - "retrieved_timestamp": "1762652580.5969138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xMaulana/FinMatcha-3B-Instruct", - "developer": "xMaulana", - "inference_platform": "unknown", - "id": "xMaulana/FinMatcha-3B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548283000217202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453555265188897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36333333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181515957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/5e1d849d-0342-4de9-a7d8-dd5cd5960fac.json b/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/5e1d849d-0342-4de9-a7d8-dd5cd5960fac.json new file mode 100644 index 000000000..ef5a60545 --- /dev/null +++ b/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/5e1d849d-0342-4de9-a7d8-dd5cd5960fac.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xMaulana_FinMatcha-3B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FinMatcha-3B-Instruct", + "id": "xMaulana/FinMatcha-3B-Instruct", + "developer": "xMaulana", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7548 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4536 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1435 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3182 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xinchen9/Llama3.1_8B_Instruct_CoT/a17563e3-0369-4042-8006-2ec781653f63.json b/data/hfopenllm_v2/xinchen9/Llama3.1_8B_Instruct_CoT/a17563e3-0369-4042-8006-2ec781653f63.json new file mode 100644 index 000000000..6fe27572a --- /dev/null +++ b/data/hfopenllm_v2/xinchen9/Llama3.1_8B_Instruct_CoT/a17563e3-0369-4042-8006-2ec781653f63.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_8B_Instruct_CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1_8B_Instruct_CoT", + "id": "xinchen9/Llama3.1_8B_Instruct_CoT", + "developer": "xinchen9", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2974 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4398 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4371 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2879 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xinchen9/Llama3.1_CoT/68369110-e371-4112-ae0a-14f7fe9fc40f.json b/data/hfopenllm_v2/xinchen9/Llama3.1_CoT/68369110-e371-4112-ae0a-14f7fe9fc40f.json new file mode 100644 index 000000000..9bdeb1364 --- /dev/null +++ b/data/hfopenllm_v2/xinchen9/Llama3.1_CoT/68369110-e371-4112-ae0a-14f7fe9fc40f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1_CoT", + "id": "xinchen9/Llama3.1_CoT", + "developer": "xinchen9", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2246 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4341 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4305 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2739 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xinchen9/Llama3.1_CoT_V1/2a6925d3-992f-4c4f-a57b-3eb41062743b.json b/data/hfopenllm_v2/xinchen9/Llama3.1_CoT_V1/2a6925d3-992f-4c4f-a57b-3eb41062743b.json new file mode 100644 index 000000000..cd826b22d --- /dev/null +++ b/data/hfopenllm_v2/xinchen9/Llama3.1_CoT_V1/2a6925d3-992f-4c4f-a57b-3eb41062743b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT_V1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3.1_CoT_V1", + "id": "xinchen9/Llama3.1_CoT_V1", + "developer": "xinchen9", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0332 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4572 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2805 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xinchen9/Mistral-7B-CoT/28290ea9-9ce5-4605-ac5b-aa2d606994d8.json b/data/hfopenllm_v2/xinchen9/Mistral-7B-CoT/28290ea9-9ce5-4605-ac5b-aa2d606994d8.json new file mode 100644 index 000000000..678ee6b2d --- /dev/null +++ b/data/hfopenllm_v2/xinchen9/Mistral-7B-CoT/28290ea9-9ce5-4605-ac5b-aa2d606994d8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xinchen9_Mistral-7B-CoT/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mistral-7B-CoT", + "id": "xinchen9/Mistral-7B-CoT", + "developer": "xinchen9", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2783 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3873 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0249 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2492 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2284 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json b/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json deleted file mode 100644 index b50eb96de..000000000 --- a/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xinchen9_llama3-b8-ft-dis/1762652580.598142", - "retrieved_timestamp": "1762652580.598142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xinchen9/llama3-b8-ft-dis", - "developer": "xinchen9", - "inference_platform": "unknown", - "id": "xinchen9/llama3-b8-ft-dis", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.154598687039278 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4625789691224553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.365375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3243849734042553 - } - } - ] -} diff --git a/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/eb2ed6eb-4789-400d-aea5-841547a20cd7.json b/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/eb2ed6eb-4789-400d-aea5-841547a20cd7.json new file mode 100644 index 000000000..bb558ab87 --- /dev/null +++ b/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/eb2ed6eb-4789-400d-aea5-841547a20cd7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xinchen9_llama3-b8-ft-dis/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama3-b8-ft-dis", + "id": "xinchen9/llama3-b8-ft-dis", + "developer": "xinchen9", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1546 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4626 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0393 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3129 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3654 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3244 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/873218a0-7ddb-4287-88ce-8c8214e85c85.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/873218a0-7ddb-4287-88ce-8c8214e85c85.json new file mode 100644 index 000000000..9463f18e7 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/873218a0-7ddb-4287-88ce-8c8214e85c85.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6375 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4912 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0921 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.382 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3686 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json deleted file mode 100644 index a2451465b..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/1762652580.598392", - "retrieved_timestamp": "1762652580.5983932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6374752323834094 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912273915261041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json deleted file mode 100644 index 1483163b9..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/1762652580.598656", - "retrieved_timestamp": "1762652580.598656", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5056858683165713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38190624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/e4c32b92-46b4-431a-83f2-11499f587534.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/e4c32b92-46b4-431a-83f2-11499f587534.json new file mode 100644 index 000000000..ea401b3b0 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/e4c32b92-46b4-431a-83f2-11499f587534.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7275 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5057 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3819 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3697 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json deleted file mode 100644 index 6934fb0ea..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/1762652580.598878", - "retrieved_timestamp": "1762652580.5988789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6568593553992297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49518319163897667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35939583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/a05681a0-07e4-4206-ae89-dee4e9706467.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/a05681a0-07e4-4206-ae89-dee4e9706467.json new file mode 100644 index 000000000..a2cc2910a --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/a05681a0-07e4-4206-ae89-dee4e9706467.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6569 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4952 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3594 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3702 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json deleted file mode 100644 index a8a02dbb1..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/1762652580.599085", - "retrieved_timestamp": "1762652580.599086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620799478716473 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500449109241973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3805416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3599567819148936 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/b078f823-d603-4030-81a2-a3ca1a1117f9.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/b078f823-d603-4030-81a2-a3ca1a1117f9.json new file mode 100644 index 000000000..0aa1684a6 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/b078f823-d603-4030-81a2-a3ca1a1117f9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6621 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5004 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0861 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3805 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.36 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/26625158-6720-47c7-8c28-46ca7b4b947e.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/26625158-6720-47c7-8c28-46ca7b4b947e.json new file mode 100644 index 000000000..296a0d099 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/26625158-6720-47c7-8c28-46ca7b4b947e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6042 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4936 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3793 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3708 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json deleted file mode 100644 index ca7af7645..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/1762652580.599285", - "retrieved_timestamp": "1762652580.599286", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6042278931014153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4936062924421171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json deleted file mode 100644 index 6617099e6..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/1762652580.599496", - "retrieved_timestamp": "1762652580.5994968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131876753680235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4996376240562969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/5e3e8dec-f14b-4b7a-ace1-1e1728395e84.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/5e3e8dec-f14b-4b7a-ace1-1e1728395e84.json new file mode 100644 index 000000000..ca115a61a --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/5e3e8dec-f14b-4b7a-ace1-1e1728395e84.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7132 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4996 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0853 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3872 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3664 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/35b4378e-52cd-4ae1-985b-c8e2c00dc61a.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/35b4378e-52cd-4ae1-985b-c8e2c00dc61a.json new file mode 100644 index 000000000..fbbc12e22 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/35b4378e-52cd-4ae1-985b-c8e2c00dc61a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4899 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1073 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3581 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3704 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json deleted file mode 100644 index 016455e42..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/1762652580.599715", - "retrieved_timestamp": "1762652580.599715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594710922574325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48992211803775065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35809374999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37042885638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/4d99a55e-39c0-41c7-9ef0-494f739ceaec.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/4d99a55e-39c0-41c7-9ef0-494f739ceaec.json new file mode 100644 index 000000000..60810c2c1 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/4d99a55e-39c0-41c7-9ef0-494f739ceaec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", + "developer": "xkp24", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6453 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4951 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3939 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.353 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json deleted file mode 100644 index 5a726b5a8..000000000 --- a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/1762652580.599936", - "retrieved_timestamp": "1762652580.599936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6453188650558297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951075713814987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529753989361702 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json deleted file mode 100644 index 579285f72..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/1762652580.600162", - "retrieved_timestamp": "1762652580.600162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.575601625908146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901206199104098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36596874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36585771276595747 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/f3c7bacd-e231-45fd-b503-ee4d34caf4e8.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/f3c7bacd-e231-45fd-b503-ee4d34caf4e8.json new file mode 100644 index 000000000..9670d371e --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/f3c7bacd-e231-45fd-b503-ee4d34caf4e8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5756 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4901 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0997 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3659 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1bb87d8f-2d66-42b2-a744-1a7cbc2c17dc.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1bb87d8f-2d66-42b2-a744-1a7cbc2c17dc.json new file mode 100644 index 000000000..527102943 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1bb87d8f-2d66-42b2-a744-1a7cbc2c17dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7034 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5092 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0967 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3739 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3693 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json deleted file mode 100644 index b08b8cccf..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1762652580.600485", - "retrieved_timestamp": "1762652580.6004858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7034457461757027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091868512191421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692652925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json deleted file mode 100644 index 3efe87a48..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/1762652580.6007009", - "retrieved_timestamp": "1762652580.6007009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023794642659255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49695315361511977 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36736458333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/ae10fd26-e648-4fa0-ae24-dfaaf4ff510d.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/ae10fd26-e648-4fa0-ae24-dfaaf4ff510d.json new file mode 100644 index 000000000..dd8c0822b --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/ae10fd26-e648-4fa0-ae24-dfaaf4ff510d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6024 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.497 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1042 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3658 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/0af58746-0492-4ba7-8a17-c0a5c43d0700.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/0af58746-0492-4ba7-8a17-c0a5c43d0700.json new file mode 100644 index 000000000..27ec692c2 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/0af58746-0492-4ba7-8a17-c0a5c43d0700.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.662 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3818 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3615 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json deleted file mode 100644 index 5d59dacb0..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/1762652580.600907", - "retrieved_timestamp": "1762652580.600908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620300801872365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49999369392208165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38181249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/88fff9f5-7aa7-463a-87e0-5fd2f5bacf09.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/88fff9f5-7aa7-463a-87e0-5fd2f5bacf09.json new file mode 100644 index 000000000..0f5aeda25 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/88fff9f5-7aa7-463a-87e0-5fd2f5bacf09.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5336 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4915 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3625 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json deleted file mode 100644 index 97fa74ebd..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/1762652580.601125", - "retrieved_timestamp": "1762652580.601126", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336363072203975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49148727192613517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624501329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/bc79527d-ae58-4b17-afd8-df931562dbf3.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/bc79527d-ae58-4b17-afd8-df931562dbf3.json new file mode 100644 index 000000000..8d52c8dfa --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/bc79527d-ae58-4b17-afd8-df931562dbf3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6852 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0718 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2584 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3832 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3621 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json deleted file mode 100644 index e7f365c19..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/1762652580.601484", - "retrieved_timestamp": "1762652580.6014872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6851609285584471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507516320435292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3621176861702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/3e7423d5-ad7e-48e2-bd25-a4946d443c24.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/3e7423d5-ad7e-48e2-bd25-a4946d443c24.json new file mode 100644 index 000000000..922c5078c --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/3e7423d5-ad7e-48e2-bd25-a4946d443c24.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5482 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4887 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3633 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3671 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json deleted file mode 100644 index 734f96349..000000000 --- a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/1762652580.601857", - "retrieved_timestamp": "1762652580.6018581", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5482242671666733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48871746894288526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36710438829787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json b/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json deleted file mode 100644 index 215abfe7b..000000000 --- a/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/1762652580.602122", - "retrieved_timestamp": "1762652580.602124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6900069593124022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4978456981516493 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/7979fd6a-a886-41cc-987b-356b7c452bff.json b/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/7979fd6a-a886-41cc-987b-356b7c452bff.json new file mode 100644 index 000000000..4feb8bd7f --- /dev/null +++ b/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/7979fd6a-a886-41cc-987b-356b7c452bff.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", + "id": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", + "developer": "xukp20", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.69 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/2be6bc34-1e61-426f-b963-6e096b5418fb.json b/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/2be6bc34-1e61-426f-b963-6e096b5418fb.json new file mode 100644 index 000000000..fb936d3e3 --- /dev/null +++ b/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/2be6bc34-1e61-426f-b963-6e096b5418fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xwen-team_Xwen-7B-Chat/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xwen-7B-Chat", + "id": "xwen-team/Xwen-7B-Chat", + "developer": "xwen-team", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6864 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5068 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4509 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2609 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3914 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.429 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json b/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json deleted file mode 100644 index 7f79524c4..000000000 --- a/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/xwen-team_Xwen-7B-Chat/1762652580.602432", - "retrieved_timestamp": "1762652580.602433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "xwen-team/Xwen-7B-Chat", - "developer": "xwen-team", - "inference_platform": "unknown", - "id": "xwen-team/Xwen-7B-Chat", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864098370102439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506762793166296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42902260638297873 - } - } - ] -} diff --git a/data/hfopenllm_v2/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/c4f69339-be6b-4bb4-8faf-a1f40e73d4b0.json b/data/hfopenllm_v2/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/c4f69339-be6b-4bb4-8faf-a1f40e73d4b0.json new file mode 100644 index 000000000..e7afb7649 --- /dev/null +++ b/data/hfopenllm_v2/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/c4f69339-be6b-4bb4-8faf-a1f40e73d4b0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L3.1-ClaudeMaid-4x8B", + "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", + "developer": "xxx777xxxASD", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6696 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5071 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1412 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2911 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4289 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json b/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json deleted file mode 100644 index a0342ffd5..000000000 --- a/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Gemma-11B-Instruct/1762652580.603103", - "retrieved_timestamp": "1762652580.603105", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Gemma-11B-Instruct", - "developer": "yam-peleg", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Gemma-11B-Instruct", - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 10.475 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30207737691547315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40357843109818686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4088541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25540226063829785 - } - } - ] -} diff --git a/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/c845eb10-a028-4cc2-8f64-25d75480c0d5.json b/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/c845eb10-a028-4cc2-8f64-25d75480c0d5.json new file mode 100644 index 000000000..cb76ed21b --- /dev/null +++ b/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/c845eb10-a028-4cc2-8f64-25d75480c0d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Gemma-11B-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hebrew-Gemma-11B-Instruct", + "id": "yam-peleg/Hebrew-Gemma-11B-Instruct", + "developer": "yam-peleg", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 10.475 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3021 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4036 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0657 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4089 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2554 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/377e7223-4876-49b6-8057-b1831d7f129b.json b/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/377e7223-4876-49b6-8057-b1831d7f129b.json new file mode 100644 index 000000000..7e2ef6384 --- /dev/null +++ b/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/377e7223-4876-49b6-8057-b1831d7f129b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hebrew-Mistral-7B-200K", + "id": "yam-peleg/Hebrew-Mistral-7B-200K", + "developer": "yam-peleg", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.504 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4149 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0234 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.276 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3765 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2573 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/4ddb9ed6-0599-482e-b12e-bcb01975cc85.json b/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/4ddb9ed6-0599-482e-b12e-bcb01975cc85.json new file mode 100644 index 000000000..d83bce8f2 --- /dev/null +++ b/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B-200K/4ddb9ed6-0599-482e-b12e-bcb01975cc85.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hebrew-Mistral-7B-200K", + "id": "yam-peleg/Hebrew-Mistral-7B-200K", + "developer": "yam-peleg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.504 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.177 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.031 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.374 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2529 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B/9d5af106-be69-4b62-99c1-fcfb6091d080.json b/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B/9d5af106-be69-4b62-99c1-fcfb6091d080.json new file mode 100644 index 000000000..93e4556ea --- /dev/null +++ b/data/hfopenllm_v2/yam-peleg/Hebrew-Mistral-7B/9d5af106-be69-4b62-99c1-fcfb6091d080.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hebrew-Mistral-7B", + "id": "yam-peleg/Hebrew-Mistral-7B", + "developer": "yam-peleg", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.504 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2328 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4334 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0498 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2794 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3977 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.278 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/2f2d7a55-2838-446d-9487-a6cfa0c03356.json b/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/2f2d7a55-2838-446d-9487-a6cfa0c03356.json new file mode 100644 index 000000000..348a5fe37 --- /dev/null +++ b/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/2f2d7a55-2838-446d-9487-a6cfa0c03356.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yanng1242_Marcoro14-7B-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marcoro14-7B-slerp", + "id": "yanng1242/Marcoro14-7B-slerp", + "developer": "yanng1242", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.406 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5252 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0748 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3146 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4686 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3168 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json b/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json deleted file mode 100644 index 1bd28b53f..000000000 --- a/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yanng1242_Marcoro14-7B-slerp/1762652580.604092", - "retrieved_timestamp": "1762652580.604092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yanng1242/Marcoro14-7B-slerp", - "developer": "yanng1242", - "inference_platform": "unknown", - "id": "yanng1242/Marcoro14-7B-slerp", - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4059916576904835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251655292981787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.468625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168218085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json b/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json deleted file mode 100644 index d0aa90cdf..000000000 --- a/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yasserrmd_Coder-GRPO-3B/1762652580.6044621", - "retrieved_timestamp": "1762652580.604463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yasserrmd/Coder-GRPO-3B", - "developer": "yasserrmd", - "inference_platform": "unknown", - "id": "yasserrmd/Coder-GRPO-3B", - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6207640172520024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4469120364616385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3202416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4114583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197307180851064 - } - } - ] -} diff --git a/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/65d20d45-f63b-4b09-b66d-5f53297c0c20.json b/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/65d20d45-f63b-4b09-b66d-5f53297c0c20.json new file mode 100644 index 000000000..f3af930c7 --- /dev/null +++ b/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/65d20d45-f63b-4b09-b66d-5f53297c0c20.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yasserrmd_Coder-GRPO-3B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Coder-GRPO-3B", + "id": "yasserrmd/Coder-GRPO-3B", + "developer": "yasserrmd", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4469 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3202 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3197 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json b/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json deleted file mode 100644 index e93bacff9..000000000 --- a/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yasserrmd_Text2SQL-1.5B/1762652580.604796", - "retrieved_timestamp": "1762652580.6047978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yasserrmd/Text2SQL-1.5B", - "developer": "yasserrmd", - "inference_platform": "unknown", - "id": "yasserrmd/Text2SQL-1.5B", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2857407235025289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38577157961565695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39423958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23628656914893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/4712953f-0777-4b97-8f13-f7309f19f0dc.json b/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/4712953f-0777-4b97-8f13-f7309f19f0dc.json new file mode 100644 index 000000000..eca8489e8 --- /dev/null +++ b/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/4712953f-0777-4b97-8f13-f7309f19f0dc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yasserrmd_Text2SQL-1.5B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Text2SQL-1.5B", + "id": "yasserrmd/Text2SQL-1.5B", + "developer": "yasserrmd", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2857 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3858 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.068 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2878 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3942 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2363 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json deleted file mode 100644 index 0800aab16..000000000 --- a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1762652580.605103", - "retrieved_timestamp": "1762652580.6051042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ycros/BagelMIsteryTour-v2-8x7B", - "developer": "ycros", - "inference_platform": "unknown", - "id": "ycros/BagelMIsteryTour-v2-8x7B", - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.599431730031871 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515923595752544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34732380319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/84382308-04b5-439f-b486-b26d20da605a.json b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/84382308-04b5-439f-b486-b26d20da605a.json new file mode 100644 index 000000000..e45b7fe88 --- /dev/null +++ b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/84382308-04b5-439f-b486-b26d20da605a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BagelMIsteryTour-v2-8x7B", + "id": "ycros/BagelMIsteryTour-v2-8x7B", + "developer": "ycros", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6262 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3079 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4138 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3481 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json deleted file mode 100644 index c7520b20e..000000000 --- a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1762652580.605396", - "retrieved_timestamp": "1762652580.605397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ycros/BagelMIsteryTour-v2-8x7B", - "developer": "ycros", - "inference_platform": "unknown", - "id": "ycros/BagelMIsteryTour-v2-8x7B", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6262095683896506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141943573573103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480718085106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/e82be06f-14ed-45e8-a273-d28c50f5212b.json b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/e82be06f-14ed-45e8-a273-d28c50f5212b.json new file mode 100644 index 000000000..7ac95c1dd --- /dev/null +++ b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/e82be06f-14ed-45e8-a273-d28c50f5212b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BagelMIsteryTour-v2-8x7B", + "id": "ycros/BagelMIsteryTour-v2-8x7B", + "developer": "ycros", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5994 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5159 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0785 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3045 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/5815ba55-40fc-4f8e-ae0b-b329c42fd503.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/5815ba55-40fc-4f8e-ae0b-b329c42fd503.json new file mode 100644 index 000000000..fd37fa6d3 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/5815ba55-40fc-4f8e-ae0b-b329c42fd503.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6709 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1118 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3716 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json deleted file mode 100644 index d18369086..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/1762652580.605642", - "retrieved_timestamp": "1762652580.605643", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6708976626462231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49866134349131935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37269791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json deleted file mode 100644 index 11ed294db..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/1762652580.605978", - "retrieved_timestamp": "1762652580.605979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332710541363582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080359954971677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38060416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3748337765957447 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/e58eceb3-b501-4924-9d0d-98d7da3c16c5.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/e58eceb3-b501-4924-9d0d-98d7da3c16c5.json new file mode 100644 index 000000000..3a674e52d --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/e58eceb3-b501-4924-9d0d-98d7da3c16c5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7333 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1035 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3806 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3748 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/5a88455c-7699-4c49-8a12-76cda15d878c.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/5a88455c-7699-4c49-8a12-76cda15d878c.json new file mode 100644 index 000000000..6b39085c6 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/5a88455c-7699-4c49-8a12-76cda15d878c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6785 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4941 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3647 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3718 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json deleted file mode 100644 index 2b19e58bd..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/1762652580.60626", - "retrieved_timestamp": "1762652580.606261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6784664689690023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49412091896520455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37175864361702127 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json deleted file mode 100644 index a0a5c55f2..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/1762652580.6064892", - "retrieved_timestamp": "1762652580.6064901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131876753680235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5025359954971677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/122b4c1e-6e6c-4db5-8991-b091361c3ecf.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/122b4c1e-6e6c-4db5-8991-b091361c3ecf.json new file mode 100644 index 000000000..f983fdb6a --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/122b4c1e-6e6c-4db5-8991-b091361c3ecf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7132 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5025 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0989 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3713 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3683 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/6abeb0e4-32ee-4dbb-9902-b19cc96a2aa7.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/6abeb0e4-32ee-4dbb-9902-b19cc96a2aa7.json new file mode 100644 index 000000000..269e54be1 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/6abeb0e4-32ee-4dbb-9902-b19cc96a2aa7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6496 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.378 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.372 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json deleted file mode 100644 index 904cceae4..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/1762652580.606723", - "retrieved_timestamp": "1762652580.606724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6495653754260917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979459532536201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json deleted file mode 100644 index 6c15bfc96..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/1762652580.6069329", - "retrieved_timestamp": "1762652580.606934", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7196073086078272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045147424411157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3734208776595745 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/679f214f-e03f-47a9-8a11-91adbf1c4880.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/679f214f-e03f-47a9-8a11-91adbf1c4880.json new file mode 100644 index 000000000..7cceda8ce --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/679f214f-e03f-47a9-8a11-91adbf1c4880.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7196 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5045 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0876 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2601 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3831 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3734 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json deleted file mode 100644 index f1ff1a16e..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/1762652580.6072068", - "retrieved_timestamp": "1762652580.6072068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504397221594258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49578758563187125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3702626329787234 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/680e77b8-9c64-4c52-aa83-55236039cef1.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/680e77b8-9c64-4c52-aa83-55236039cef1.json new file mode 100644 index 000000000..78466338e --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/680e77b8-9c64-4c52-aa83-55236039cef1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6504 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4958 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0937 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.366 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3703 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json deleted file mode 100644 index 34fd9a91e..000000000 --- a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/1762652580.607418", - "retrieved_timestamp": "1762652580.6074188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015973173402128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991547169583548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37790624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ] -} diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/c24c471c-14b3-462e-8b81-6548b27e5ffc.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/c24c471c-14b3-462e-8b81-6548b27e5ffc.json new file mode 100644 index 000000000..3d58afca0 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/c24c471c-14b3-462e-8b81-6548b27e5ffc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", + "developer": "yfzp", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7016 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4992 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0869 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2592 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3779 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3669 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json b/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json deleted file mode 100644 index bc69d24f8..000000000 --- a/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/1762652580.6077929", - "retrieved_timestamp": "1762652580.607796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", - "developer": "yifAI", - "inference_platform": "unknown", - "id": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6489658550423987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49145217071254876 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38987499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3519780585106383 - } - } - ] -} diff --git a/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/efa7fa62-2e8b-403c-b345-eef876b48dbd.json b/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/efa7fa62-2e8b-403c-b345-eef876b48dbd.json new file mode 100644 index 000000000..183ccc754 --- /dev/null +++ b/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/efa7fa62-2e8b-403c-b345-eef876b48dbd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", + "id": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", + "developer": "yifAI", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4915 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0755 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3899 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.352 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/40bae762-65bd-4b4c-b422-ffd0fd3790a9.json b/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/40bae762-65bd-4b4c-b422-ffd0fd3790a9.json new file mode 100644 index 000000000..1a855481e --- /dev/null +++ b/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/40bae762-65bd-4b4c-b422-ffd0fd3790a9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-PRYMMAL-YL-1B-SLERP-V8", + "id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", + "developer": "ylalain", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.357 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1505 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3976 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0045 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2894 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json b/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json deleted file mode 100644 index bf6e57575..000000000 --- a/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/1762652580.608171", - "retrieved_timestamp": "1762652580.608172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", - "developer": "ylalain", - "inference_platform": "unknown", - "id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.357 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15052726764983576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3975573100103517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3874583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23836436170212766 - } - } - ] -} diff --git a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/596957cc-719c-44c7-8284-06a9ba0d1a30.json b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/596957cc-719c-44c7-8284-06a9ba0d1a30.json new file mode 100644 index 000000000..a68709211 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/596957cc-719c-44c7-8284-06a9ba0d1a30.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-GRPO-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-GRPO-Instruct", + "id": "ymcki/Llama-3.1-8B-GRPO-Instruct", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2945 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3817 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3738 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json deleted file mode 100644 index a354425ac..000000000 --- a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-GRPO-Instruct/1762652580.608475", - "retrieved_timestamp": "1762652580.608476", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/Llama-3.1-8B-GRPO-Instruct", - "developer": "ymcki", - "inference_platform": "unknown", - "id": "ymcki/Llama-3.1-8B-GRPO-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131586337530801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ] -} diff --git a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/706bbc09-f867-4327-bc4d-b5ede41ebd93.json b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/706bbc09-f867-4327-bc4d-b5ede41ebd93.json new file mode 100644 index 000000000..3032b3ce7 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/706bbc09-f867-4327-bc4d-b5ede41ebd93.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama-3.1-8B-SFT-GRPO-Instruct", + "id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3354 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3126 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.04 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2534 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3526 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1098 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json deleted file mode 100644 index 32c986a71..000000000 --- a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/1762652580.608792", - "retrieved_timestamp": "1762652580.608793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", - "developer": "ymcki", - "inference_platform": "unknown", - "id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33540007180946557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126261967336083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35260416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ] -} diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/8962e9be-75bf-4f57-8ce2-b29523740851.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/8962e9be-75bf-4f57-8ce2-b29523740851.json new file mode 100644 index 000000000..58ab0bf05 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/8962e9be-75bf-4f57-8ce2-b29523740851.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", + "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5218 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4147 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0544 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2836 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3514 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2461 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/014f4838-22ff-4802-a887-4d2de01a9256.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/014f4838-22ff-4802-a887-4d2de01a9256.json new file mode 100644 index 000000000..ff7346956 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/014f4838-22ff-4802-a887-4d2de01a9256.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-ORPO-jpn-it-abliterated-18", + "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4631 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4053 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0431 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2886 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3754 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2345 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/5c6eac9c-0ec6-4364-a86b-dcd894d69f0b.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/5c6eac9c-0ec6-4364-a86b-dcd894d69f0b.json new file mode 100644 index 000000000..758147784 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/5c6eac9c-0ec6-4364-a86b-dcd894d69f0b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-17-18-24", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5055 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3812 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0257 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.281 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3502 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2282 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/09b81cf2-3b79-448c-ab8e-87e378c804bb.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/09b81cf2-3b79-448c-ab8e-87e378c804bb.json new file mode 100644 index 000000000..d9039ff8d --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/09b81cf2-3b79-448c-ab8e-87e378c804bb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3065 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4072 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0325 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2693 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3969 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2249 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/28b9977a-db3d-4f38-b1f7-bd0cdcab5504.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/28b9977a-db3d-4f38-b1f7-bd0cdcab5504.json new file mode 100644 index 000000000..89ebbecdf --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/28b9977a-db3d-4f38-b1f7-bd0cdcab5504.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-17-ORPO", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4748 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3898 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0619 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2743 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3768 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2191 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17/845ea162-cfa1-47f4-8914-d81d9bf1bb7d.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17/845ea162-cfa1-47f4-8914-d81d9bf1bb7d.json new file mode 100644 index 000000000..b0dc22bac --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-17/845ea162-cfa1-47f4-8914-d81d9bf1bb7d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-17", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5082 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4076 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0385 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2718 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3701 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2455 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/706737c7-cd1a-4958-9ffc-2655f0b50178.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/706737c7-cd1a-4958-9ffc-2655f0b50178.json new file mode 100644 index 000000000..1d4acd4a2 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/706737c7-cd1a-4958-9ffc-2655f0b50178.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-18-ORPO", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4742 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4039 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0468 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2617 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3953 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2185 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18/5acd58cd-8dfb-4fb7-8832-6bc151e0b1a1.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18/5acd58cd-8dfb-4fb7-8832-6bc151e0b1a1.json new file mode 100644 index 000000000..902673fd5 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-18/5acd58cd-8dfb-4fb7-8832-6bc151e0b1a1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-18", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5175 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4132 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0446 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2735 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3742 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2505 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-24/d374a68d-b985-47c2-b087-500bffa93c80.json b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-24/d374a68d-b985-47c2-b087-500bffa93c80.json new file mode 100644 index 000000000..7ae15b60f --- /dev/null +++ b/data/hfopenllm_v2/ymcki/gemma-2-2b-jpn-it-abliterated-24/d374a68d-b985-47c2-b087-500bffa93c80.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-24/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-jpn-it-abliterated-24", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24", + "developer": "ymcki", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4979 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.411 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0438 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2777 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3915 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2473 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yuchenxie/ArlowGPT-3B-Multilingual/23fbceb0-b646-4945-b17f-66dde24a0e43.json b/data/hfopenllm_v2/yuchenxie/ArlowGPT-3B-Multilingual/23fbceb0-b646-4945-b17f-66dde24a0e43.json new file mode 100644 index 000000000..266d9bf50 --- /dev/null +++ b/data/hfopenllm_v2/yuchenxie/ArlowGPT-3B-Multilingual/23fbceb0-b646-4945-b17f-66dde24a0e43.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-3B-Multilingual/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArlowGPT-3B-Multilingual", + "id": "yuchenxie/ArlowGPT-3B-Multilingual", + "developer": "yuchenxie", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6395 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4301 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1125 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2802 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3727 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2817 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yuchenxie/ArlowGPT-8B/73d9e204-e829-4159-b340-6d9581c6f0e1.json b/data/hfopenllm_v2/yuchenxie/ArlowGPT-8B/73d9e204-e829-4159-b340-6d9581c6f0e1.json new file mode 100644 index 000000000..4cc6ee856 --- /dev/null +++ b/data/hfopenllm_v2/yuchenxie/ArlowGPT-8B/73d9e204-e829-4159-b340-6d9581c6f0e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-8B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArlowGPT-8B", + "id": "yuchenxie/ArlowGPT-8B", + "developer": "yuchenxie", + "inference_platform": "unknown", + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7847 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.508 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2039 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2936 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3882 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3787 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/a6979dda-fba6-4104-b153-3b0a89de8585.json b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/a6979dda-fba6-4104-b153-3b0a89de8585.json new file mode 100644 index 000000000..9cefceabb --- /dev/null +++ b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/a6979dda-fba6-4104-b153-3b0a89de8585.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-8B-SuperNova-Spectrum-Hermes-DPO", + "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", + "developer": "yuvraj17", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4691 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0566 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.302 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4012 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2635 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json deleted file mode 100644 index c1e91fecd..000000000 --- a/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/1762652580.611586", - "retrieved_timestamp": "1762652580.611586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", - "developer": "yuvraj17", - "inference_platform": "unknown", - "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690897928607206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399870586095269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634640957446808 - } - } - ] -} diff --git a/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/62e04968-0c5c-4aad-a434-d9d24bccbdb8.json b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/62e04968-0c5c-4aad-a434-d9d24bccbdb8.json new file mode 100644 index 000000000..76030c29d --- /dev/null +++ b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/62e04968-0c5c-4aad-a434-d9d24bccbdb8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-8B-SuperNova-Spectrum-dare_ties", + "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", + "developer": "yuvraj17", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4013 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4616 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2752 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4211 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/bae4064e-b10f-4082-876d-e4168ca1a8cc.json b/data/hfopenllm_v2/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/bae4064e-b10f-4082-876d-e4168ca1a8cc.json new file mode 100644 index 000000000..95dd6c5ed --- /dev/null +++ b/data/hfopenllm_v2/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/bae4064e-b10f-4082-876d-e4168ca1a8cc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Llama3-8B-abliterated-Spectrum-slerp", + "id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", + "developer": "yuvraj17", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2885 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3012 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3998 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3257 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/0040b48c-0f54-4c9b-97ee-1ca833c68e36.json b/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/0040b48c-0f54-4c9b-97ee-1ca833c68e36.json new file mode 100644 index 000000000..c07d9eb08 --- /dev/null +++ b/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/0040b48c-0f54-4c9b-97ee-1ca833c68e36.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-2b-it-chinese-kyara-dpo/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-2b-it-chinese-kyara-dpo", + "id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", + "developer": "zake7749", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5382 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4257 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0838 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2668 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4576 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2573 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json b/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json deleted file mode 100644 index 3f8c4bbdf..000000000 --- a/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-2b-it-chinese-kyara-dpo/1762652580.612313", - "retrieved_timestamp": "1762652580.6123142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", - "developer": "zake7749", - "inference_platform": "unknown", - "id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382075116247114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4257464897414603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45756250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25731382978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/zake7749/gemma-2-9b-it-chinese-kyara/6050e969-bcde-4594-8e53-05fa74c7287d.json b/data/hfopenllm_v2/zake7749/gemma-2-9b-it-chinese-kyara/6050e969-bcde-4594-8e53-05fa74c7287d.json new file mode 100644 index 000000000..0bf944e0e --- /dev/null +++ b/data/hfopenllm_v2/zake7749/gemma-2-9b-it-chinese-kyara/6050e969-bcde-4594-8e53-05fa74c7287d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-9b-it-chinese-kyara/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-9b-it-chinese-kyara", + "id": "zake7749/gemma-2-9b-it-chinese-kyara", + "developer": "zake7749", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1764 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5954 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.105 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4242 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4179 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Gemma-2-TM-9B/3aaee358-bf3e-4d91-91bf-bd42e0a7c61e.json b/data/hfopenllm_v2/zelk12/Gemma-2-TM-9B/3aaee358-bf3e-4d91-91bf-bd42e0a7c61e.json new file mode 100644 index 000000000..c972fae87 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Gemma-2-TM-9B/3aaee358-bf3e-4d91-91bf-bd42e0a7c61e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_Gemma-2-TM-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gemma-2-TM-9B", + "id": "zelk12/Gemma-2-TM-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8045 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5987 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2024 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4152 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4088 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen1-gemma-2-9B/ef5f4fb2-f409-49dc-b3f0-f3e19585cd8a.json b/data/hfopenllm_v2/zelk12/MT-Gen1-gemma-2-9B/ef5f4fb2-f409-49dc-b3f0-f3e19585cd8a.json new file mode 100644 index 000000000..14766f899 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen1-gemma-2-9B/ef5f4fb2-f409-49dc-b3f0-f3e19585cd8a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen1-gemma-2-9B", + "id": "zelk12/MT-Gen1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7886 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2221 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen2-GI-gemma-2-9B/4048fa60-7427-4f7e-9939-e270aa5e8b51.json b/data/hfopenllm_v2/zelk12/MT-Gen2-GI-gemma-2-9B/4048fa60-7427-4f7e-9939-e270aa5e8b51.json new file mode 100644 index 000000000..aab0e6abb --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen2-GI-gemma-2-9B/4048fa60-7427-4f7e-9939-e270aa5e8b51.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-GI-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen2-GI-gemma-2-9B", + "id": "zelk12/MT-Gen2-GI-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7914 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4283 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen2-gemma-2-9B/f5c9baea-f2cf-414a-937a-6a43f55a1c1d.json b/data/hfopenllm_v2/zelk12/MT-Gen2-gemma-2-9B/f5c9baea-f2cf-414a-937a-6a43f55a1c1d.json new file mode 100644 index 000000000..1b3fd4238 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen2-gemma-2-9B/f5c9baea-f2cf-414a-937a-6a43f55a1c1d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen2-gemma-2-9B", + "id": "zelk12/MT-Gen2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen3-gemma-2-9B/1da70796-d40b-4f2a-8ce3-b304f414a6d5.json b/data/hfopenllm_v2/zelk12/MT-Gen3-gemma-2-9B/1da70796-d40b-4f2a-8ce3-b304f414a6d5.json new file mode 100644 index 000000000..9a41654cb --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen3-gemma-2-9B/1da70796-d40b-4f2a-8ce3-b304f414a6d5.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen3-gemma-2-9B", + "id": "zelk12/MT-Gen3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.802 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6097 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2296 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4356 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen4-gemma-2-9B/de476f79-2539-4f9e-a1d2-901c6c4342d4.json b/data/hfopenllm_v2/zelk12/MT-Gen4-gemma-2-9B/de476f79-2539-4f9e-a1d2-901c6c4342d4.json new file mode 100644 index 000000000..6c3bf327c --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen4-gemma-2-9B/de476f79-2539-4f9e-a1d2-901c6c4342d4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen4-gemma-2-9B", + "id": "zelk12/MT-Gen4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7883 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.611 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2236 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen5-gemma-2-9B/80aee542-c894-46b6-a6ed-9f3400aefa9e.json b/data/hfopenllm_v2/zelk12/MT-Gen5-gemma-2-9B/80aee542-c894-46b6-a6ed-9f3400aefa9e.json new file mode 100644 index 000000000..758fecb7b --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen5-gemma-2-9B/80aee542-c894-46b6-a6ed-9f3400aefa9e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen5-gemma-2-9B", + "id": "zelk12/MT-Gen5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7923 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4402 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen6-gemma-2-9B/5c9d4eaf-0985-4f9e-8007-08b4081bb19d.json b/data/hfopenllm_v2/zelk12/MT-Gen6-gemma-2-9B/5c9d4eaf-0985-4f9e-8007-08b4081bb19d.json new file mode 100644 index 000000000..d42787489 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen6-gemma-2-9B/5c9d4eaf-0985-4f9e-8007-08b4081bb19d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen6-gemma-2-9B", + "id": "zelk12/MT-Gen6-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1616 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5845 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0823 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3331 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4166 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen6fix-gemma-2-9B/4b019824-8454-4ce8-aa49-d122a2491f9c.json b/data/hfopenllm_v2/zelk12/MT-Gen6fix-gemma-2-9B/4b019824-8454-4ce8-aa49-d122a2491f9c.json new file mode 100644 index 000000000..ea018b3ba --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen6fix-gemma-2-9B/4b019824-8454-4ce8-aa49-d122a2491f9c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6fix-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen6fix-gemma-2-9B", + "id": "zelk12/MT-Gen6fix-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1576 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5917 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0816 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3372 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4084 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.412 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Gen7-gemma-2-9B/0dfcd13c-f057-4aec-82ad-b5cf2b266502.json b/data/hfopenllm_v2/zelk12/MT-Gen7-gemma-2-9B/0dfcd13c-f057-4aec-82ad-b5cf2b266502.json new file mode 100644 index 000000000..dd5c80820 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Gen7-gemma-2-9B/0dfcd13c-f057-4aec-82ad-b5cf2b266502.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen7-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Gen7-gemma-2-9B", + "id": "zelk12/MT-Gen7-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5935 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0891 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3356 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4122 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/927589bf-f6a0-4155-a24b-120231bbf029.json b/data/hfopenllm_v2/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/927589bf-f6a0-4155-a24b-120231bbf029.json new file mode 100644 index 000000000..57eea20ea --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/927589bf-f6a0-4155-a24b-120231bbf029.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Max-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7907 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6142 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4396 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge-gemma-2-9B/1a2740cb-c541-434e-89a1-7a9fd2c4cabd.json b/data/hfopenllm_v2/zelk12/MT-Merge-gemma-2-9B/1a2740cb-c541-434e-89a1-7a9fd2c4cabd.json new file mode 100644 index 000000000..3dbd1e532 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge-gemma-2-9B/1a2740cb-c541-434e-89a1-7a9fd2c4cabd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge-gemma-2-9B", + "id": "zelk12/MT-Merge-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8035 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6118 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4256 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4362 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge1-gemma-2-9B/0110d1c9-755e-4f09-888b-0c9c1a263639.json b/data/hfopenllm_v2/zelk12/MT-Merge1-gemma-2-9B/0110d1c9-755e-4f09-888b-0c9c1a263639.json new file mode 100644 index 000000000..d496666d2 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge1-gemma-2-9B/0110d1c9-755e-4f09-888b-0c9c1a263639.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge1-gemma-2-9B", + "id": "zelk12/MT-Merge1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7901 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.61 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2289 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/cda65781-494c-45bd-8c32-7b1fe987f31c.json b/data/hfopenllm_v2/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/cda65781-494c-45bd-8c32-7b1fe987f31c.json new file mode 100644 index 000000000..66adad61e --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/cda65781-494c-45bd-8c32-7b1fe987f31c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", + "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7956 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6084 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4322 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge2-gemma-2-9B/2fd7de02-f8d9-45c1-9bb5-db5134bd4862.json b/data/hfopenllm_v2/zelk12/MT-Merge2-gemma-2-9B/2fd7de02-f8d9-45c1-9bb5-db5134bd4862.json new file mode 100644 index 000000000..6d090a24c --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge2-gemma-2-9B/2fd7de02-f8d9-45c1-9bb5-db5134bd4862.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge2-gemma-2-9B", + "id": "zelk12/MT-Merge2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7877 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2349 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge3-gemma-2-9B/acf07f51-5acd-4375-bafa-7a1a244db3c6.json b/data/hfopenllm_v2/zelk12/MT-Merge3-gemma-2-9B/acf07f51-5acd-4375-bafa-7a1a244db3c6.json new file mode 100644 index 000000000..78c6f3258 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge3-gemma-2-9B/acf07f51-5acd-4375-bafa-7a1a244db3c6.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge3-gemma-2-9B", + "id": "zelk12/MT-Merge3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7859 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge4-gemma-2-9B/ff985193-ba26-45d3-97be-b7d3b17ab4d7.json b/data/hfopenllm_v2/zelk12/MT-Merge4-gemma-2-9B/ff985193-ba26-45d3-97be-b7d3b17ab4d7.json new file mode 100644 index 000000000..f73843da3 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge4-gemma-2-9B/ff985193-ba26-45d3-97be-b7d3b17ab4d7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge4-gemma-2-9B", + "id": "zelk12/MT-Merge4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7807 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6118 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2168 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4294 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.439 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge5-gemma-2-9B/21dbea2c-5cb1-431c-a496-af9b932b3440.json b/data/hfopenllm_v2/zelk12/MT-Merge5-gemma-2-9B/21dbea2c-5cb1-431c-a496-af9b932b3440.json new file mode 100644 index 000000000..9e45d9643 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge5-gemma-2-9B/21dbea2c-5cb1-431c-a496-af9b932b3440.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge5-gemma-2-9B", + "id": "zelk12/MT-Merge5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7844 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6123 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4281 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-Merge6-gemma-2-9B/1143955c-c32c-4b41-8484-2c77e72f4946.json b/data/hfopenllm_v2/zelk12/MT-Merge6-gemma-2-9B/1143955c-c32c-4b41-8484-2c77e72f4946.json new file mode 100644 index 000000000..348e60c56 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-Merge6-gemma-2-9B/1143955c-c32c-4b41-8484-2c77e72f4946.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge6-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-Merge6-gemma-2-9B", + "id": "zelk12/MT-Merge6-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1695 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5949 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0801 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3289 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4098 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4115 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT-gemma-2-9B/94824ceb-08c3-415c-8003-b70a0d9af09d.json b/data/hfopenllm_v2/zelk12/MT-gemma-2-9B/94824ceb-08c3-415c-8003-b70a0d9af09d.json new file mode 100644 index 000000000..2c62b725d --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT-gemma-2-9B/94824ceb-08c3-415c-8003-b70a0d9af09d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT-gemma-2-9B", + "id": "zelk12/MT-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7968 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4071 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4224 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen1-gemma-2-9B/bf2903cb-b954-4870-98c3-116a96aa49fb.json b/data/hfopenllm_v2/zelk12/MT1-Gen1-gemma-2-9B/bf2903cb-b954-4870-98c3-116a96aa49fb.json new file mode 100644 index 000000000..8e7b2f927 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen1-gemma-2-9B/bf2903cb-b954-4870-98c3-116a96aa49fb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen1-gemma-2-9B", + "id": "zelk12/MT1-Gen1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7974 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6118 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4376 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen2-gemma-2-9B/b089c439-a38c-438d-bdad-1c68a1265d95.json b/data/hfopenllm_v2/zelk12/MT1-Gen2-gemma-2-9B/b089c439-a38c-438d-bdad-1c68a1265d95.json new file mode 100644 index 000000000..714f19ac3 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen2-gemma-2-9B/b089c439-a38c-438d-bdad-1c68a1265d95.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen2-gemma-2-9B", + "id": "zelk12/MT1-Gen2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7984 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2251 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4284 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4355 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen3-gemma-2-9B/c988815b-50e5-47e4-a418-bbbcdf1eb4a0.json b/data/hfopenllm_v2/zelk12/MT1-Gen3-gemma-2-9B/c988815b-50e5-47e4-a418-bbbcdf1eb4a0.json new file mode 100644 index 000000000..e7e79fa7b --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen3-gemma-2-9B/c988815b-50e5-47e4-a418-bbbcdf1eb4a0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen3-gemma-2-9B", + "id": "zelk12/MT1-Gen3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.796 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6102 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4349 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen4-gemma-2-9B/fa11d66c-7ebc-4b81-83b7-d35a4ff23d3f.json b/data/hfopenllm_v2/zelk12/MT1-Gen4-gemma-2-9B/fa11d66c-7ebc-4b81-83b7-d35a4ff23d3f.json new file mode 100644 index 000000000..b6d5e1c4b --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen4-gemma-2-9B/fa11d66c-7ebc-4b81-83b7-d35a4ff23d3f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen4-gemma-2-9B", + "id": "zelk12/MT1-Gen4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7941 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6058 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.216 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4286 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/1c81787b-594e-4bb6-aee1-7f193a628b16.json b/data/hfopenllm_v2/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/1c81787b-594e-4bb6-aee1-7f193a628b16.json new file mode 100644 index 000000000..c2e0e4354 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/1c81787b-594e-4bb6-aee1-7f193a628b16.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen5-IF-gemma-2-S2DMv1-9B", + "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7929 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2032 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4245 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4218 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen5-gemma-2-9B/fd9ce37e-d43d-4ec2-94ec-0eb42e3cc685.json b/data/hfopenllm_v2/zelk12/MT1-Gen5-gemma-2-9B/fd9ce37e-d43d-4ec2-94ec-0eb42e3cc685.json new file mode 100644 index 000000000..c299e1f9b --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen5-gemma-2-9B/fd9ce37e-d43d-4ec2-94ec-0eb42e3cc685.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen5-gemma-2-9B", + "id": "zelk12/MT1-Gen5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7795 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6017 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2077 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4222 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen6-gemma-2-9B/0625f09a-3e02-410b-963b-49b83dfc5c8f.json b/data/hfopenllm_v2/zelk12/MT1-Gen6-gemma-2-9B/0625f09a-3e02-410b-963b-49b83dfc5c8f.json new file mode 100644 index 000000000..550cda232 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen6-gemma-2-9B/0625f09a-3e02-410b-963b-49b83dfc5c8f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen6-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen6-gemma-2-9B", + "id": "zelk12/MT1-Gen6-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1634 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5944 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0808 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4044 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4133 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Gen7-gemma-2-9B/50c1399e-b409-4dff-b4d6-9be01dbb02c7.json b/data/hfopenllm_v2/zelk12/MT1-Gen7-gemma-2-9B/50c1399e-b409-4dff-b4d6-9be01dbb02c7.json new file mode 100644 index 000000000..2e79a59e4 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Gen7-gemma-2-9B/50c1399e-b409-4dff-b4d6-9be01dbb02c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen7-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Gen7-gemma-2-9B", + "id": "zelk12/MT1-Gen7-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1634 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5938 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0831 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.328 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4145 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/402bdb4a-b258-40a4-ac9f-de74026c02f3.json b/data/hfopenllm_v2/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/402bdb4a-b258-40a4-ac9f-de74026c02f3.json new file mode 100644 index 000000000..09acf9468 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/402bdb4a-b258-40a4-ac9f-de74026c02f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-Max-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7929 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6123 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT1-gemma-2-9B/65dcf458-db0f-45cd-a8a4-e16108e51161.json b/data/hfopenllm_v2/zelk12/MT1-gemma-2-9B/65dcf458-db0f-45cd-a8a4-e16108e51161.json new file mode 100644 index 000000000..f7d6bc79e --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT1-gemma-2-9B/65dcf458-db0f-45cd-a8a4-e16108e51161.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT1-gemma-2-9B", + "id": "zelk12/MT1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6109 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2236 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4322 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4358 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen1-gemma-2-9B/f1346b1a-0e66-4d80-bfad-ccbe0a8e2abf.json b/data/hfopenllm_v2/zelk12/MT2-Gen1-gemma-2-9B/f1346b1a-0e66-4d80-bfad-ccbe0a8e2abf.json new file mode 100644 index 000000000..44374c9d5 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen1-gemma-2-9B/f1346b1a-0e66-4d80-bfad-ccbe0a8e2abf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen1-gemma-2-9B", + "id": "zelk12/MT2-Gen1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4377 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen2-gemma-2-9B/11e7b55a-d872-474a-98a6-fc82ce5a863e.json b/data/hfopenllm_v2/zelk12/MT2-Gen2-gemma-2-9B/11e7b55a-d872-474a-98a6-fc82ce5a863e.json new file mode 100644 index 000000000..3faf801d3 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen2-gemma-2-9B/11e7b55a-d872-474a-98a6-fc82ce5a863e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen2-gemma-2-9B", + "id": "zelk12/MT2-Gen2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7889 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6093 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2183 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.427 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen3-gemma-2-9B/19688633-fa6c-412a-8dbc-c16fc49b3276.json b/data/hfopenllm_v2/zelk12/MT2-Gen3-gemma-2-9B/19688633-fa6c-412a-8dbc-c16fc49b3276.json new file mode 100644 index 000000000..b70f067a8 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen3-gemma-2-9B/19688633-fa6c-412a-8dbc-c16fc49b3276.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen3-gemma-2-9B", + "id": "zelk12/MT2-Gen3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.781 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4374 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen4-gemma-2-9B/7d67eb9c-a4d8-4b86-8c24-928ebbe58de7.json b/data/hfopenllm_v2/zelk12/MT2-Gen4-gemma-2-9B/7d67eb9c-a4d8-4b86-8c24-928ebbe58de7.json new file mode 100644 index 000000000..70dd1549f --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen4-gemma-2-9B/7d67eb9c-a4d8-4b86-8c24-928ebbe58de7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen4-gemma-2-9B", + "id": "zelk12/MT2-Gen4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7896 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6097 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2236 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen5-gemma-2-9B/447f880c-643f-4041-8cdb-87697d798085.json b/data/hfopenllm_v2/zelk12/MT2-Gen5-gemma-2-9B/447f880c-643f-4041-8cdb-87697d798085.json new file mode 100644 index 000000000..2ef223d58 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen5-gemma-2-9B/447f880c-643f-4041-8cdb-87697d798085.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen5-gemma-2-9B", + "id": "zelk12/MT2-Gen5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7749 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2107 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4302 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen6-gemma-2-9B/653d459e-f8b7-48bc-a9db-779e515532cf.json b/data/hfopenllm_v2/zelk12/MT2-Gen6-gemma-2-9B/653d459e-f8b7-48bc-a9db-779e515532cf.json new file mode 100644 index 000000000..828f27f8f --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen6-gemma-2-9B/653d459e-f8b7-48bc-a9db-779e515532cf.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen6-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen6-gemma-2-9B", + "id": "zelk12/MT2-Gen6-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1664 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.596 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0846 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4137 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.421 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Gen7-gemma-2-9B/4e56faf6-dbde-4059-b502-32c76bdbed2d.json b/data/hfopenllm_v2/zelk12/MT2-Gen7-gemma-2-9B/4e56faf6-dbde-4059-b502-32c76bdbed2d.json new file mode 100644 index 000000000..64fc4a08f --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Gen7-gemma-2-9B/4e56faf6-dbde-4059-b502-32c76bdbed2d.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen7-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Gen7-gemma-2-9B", + "id": "zelk12/MT2-Gen7-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6079 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.102 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4203 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4311 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/f161df97-3cc6-48d3-bfc5-d3f01108ecbb.json b/data/hfopenllm_v2/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/f161df97-3cc6-48d3-bfc5-d3f01108ecbb.json new file mode 100644 index 000000000..565a40eb8 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/f161df97-3cc6-48d3-bfc5-d3f01108ecbb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-Max-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7901 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6108 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT2-gemma-2-9B/7d08412d-e987-497f-a6ec-ce0affe0f80f.json b/data/hfopenllm_v2/zelk12/MT2-gemma-2-9B/7d08412d-e987-497f-a6ec-ce0affe0f80f.json new file mode 100644 index 000000000..adfbad27f --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT2-gemma-2-9B/7d08412d-e987-497f-a6ec-ce0affe0f80f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT2-gemma-2-9B", + "id": "zelk12/MT2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7886 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6115 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4217 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen1-gemma-2-9B/f042f897-cfe8-4d8c-b75b-bbfca44505ea.json b/data/hfopenllm_v2/zelk12/MT3-Gen1-gemma-2-9B/f042f897-cfe8-4d8c-b75b-bbfca44505ea.json new file mode 100644 index 000000000..4eb9a7a77 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen1-gemma-2-9B/f042f897-cfe8-4d8c-b75b-bbfca44505ea.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen1-gemma-2-9B", + "id": "zelk12/MT3-Gen1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7838 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3465 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4151 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen2-gemma-2-9B/f24ab334-c022-4e34-a930-3fed6ee18793.json b/data/hfopenllm_v2/zelk12/MT3-Gen2-gemma-2-9B/f24ab334-c022-4e34-a930-3fed6ee18793.json new file mode 100644 index 000000000..84a45fd26 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen2-gemma-2-9B/f24ab334-c022-4e34-a930-3fed6ee18793.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen2-gemma-2-9B", + "id": "zelk12/MT3-Gen2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7843 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6091 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2236 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3574 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4111 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4333 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen3-gemma-2-9B/2bd3c620-780f-452d-92d7-d01a04539939.json b/data/hfopenllm_v2/zelk12/MT3-Gen3-gemma-2-9B/2bd3c620-780f-452d-92d7-d01a04539939.json new file mode 100644 index 000000000..03053bdd1 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen3-gemma-2-9B/2bd3c620-780f-452d-92d7-d01a04539939.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen3-gemma-2-9B", + "id": "zelk12/MT3-Gen3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7856 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6089 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2153 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4258 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4303 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen4-gemma-2-9B/234042bd-237f-4cc5-8c5d-1eacd2e8bfaa.json b/data/hfopenllm_v2/zelk12/MT3-Gen4-gemma-2-9B/234042bd-237f-4cc5-8c5d-1eacd2e8bfaa.json new file mode 100644 index 000000000..283c8ee00 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen4-gemma-2-9B/234042bd-237f-4cc5-8c5d-1eacd2e8bfaa.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen4-gemma-2-9B", + "id": "zelk12/MT3-Gen4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7737 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6101 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2062 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4476 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4387 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B/d8e0a32e-f307-4056-b450-47a12a0a7b15.json b/data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B/d8e0a32e-f307-4056-b450-47a12a0a7b15.json new file mode 100644 index 000000000..e636eb2e9 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B/d8e0a32e-f307-4056-b450-47a12a0a7b15.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen5-gemma-2-9B", + "id": "zelk12/MT3-Gen5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.799 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4317 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B_v1/9dc3c4f5-8974-4496-8a6e-daa4fe3e3c2a.json b/data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B_v1/9dc3c4f5-8974-4496-8a6e-daa4fe3e3c2a.json new file mode 100644 index 000000000..90b69beb0 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen5-gemma-2-9B_v1/9dc3c4f5-8974-4496-8a6e-daa4fe3e3c2a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B_v1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen5-gemma-2-9B_v1", + "id": "zelk12/MT3-Gen5-gemma-2-9B_v1", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7996 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6113 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.349 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4359 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Gen6-gemma-2-9B/037787fb-9c61-4c56-a7fc-704c04b519f7.json b/data/hfopenllm_v2/zelk12/MT3-Gen6-gemma-2-9B/037787fb-9c61-4c56-a7fc-704c04b519f7.json new file mode 100644 index 000000000..bff72f6ad --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Gen6-gemma-2-9B/037787fb-9c61-4c56-a7fc-704c04b519f7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen6-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Gen6-gemma-2-9B", + "id": "zelk12/MT3-Gen6-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.602 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0884 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4126 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4102 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/5df3dd8f-4921-4916-8163-8651b796e478.json b/data/hfopenllm_v2/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/5df3dd8f-4921-4916-8163-8651b796e478.json new file mode 100644 index 000000000..71138b3be --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/5df3dd8f-4921-4916-8163-8651b796e478.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-Max-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6123 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1012 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT3-gemma-2-9B/50463593-3a53-4b3f-9621-d05670309b7e.json b/data/hfopenllm_v2/zelk12/MT3-gemma-2-9B/50463593-3a53-4b3f-9621-d05670309b7e.json new file mode 100644 index 000000000..16582b121 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT3-gemma-2-9B/50463593-3a53-4b3f-9621-d05670309b7e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT3-gemma-2-9B", + "id": "zelk12/MT3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6131 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2168 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3448 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4327 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-Gen1-gemma-2-9B/d7fef356-36c7-488f-8f49-997682a2c01a.json b/data/hfopenllm_v2/zelk12/MT4-Gen1-gemma-2-9B/d7fef356-36c7-488f-8f49-997682a2c01a.json new file mode 100644 index 000000000..7bdf9dc8f --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-Gen1-gemma-2-9B/d7fef356-36c7-488f-8f49-997682a2c01a.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-Gen1-gemma-2-9B", + "id": "zelk12/MT4-Gen1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7895 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6094 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2198 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4322 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4389 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-Gen2-gemma-2-9B/42e7abc6-eaa2-4971-90ee-e4d9dbb97ddb.json b/data/hfopenllm_v2/zelk12/MT4-Gen2-gemma-2-9B/42e7abc6-eaa2-4971-90ee-e4d9dbb97ddb.json new file mode 100644 index 000000000..3b4ba860e --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-Gen2-gemma-2-9B/42e7abc6-eaa2-4971-90ee-e4d9dbb97ddb.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-Gen2-gemma-2-9B", + "id": "zelk12/MT4-Gen2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8051 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6108 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2326 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4257 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-Gen3-gemma-2-9B/b1cf06a6-d270-41ae-bb9b-443bdc5446f3.json b/data/hfopenllm_v2/zelk12/MT4-Gen3-gemma-2-9B/b1cf06a6-d270-41ae-bb9b-443bdc5446f3.json new file mode 100644 index 000000000..5a8bcf436 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-Gen3-gemma-2-9B/b1cf06a6-d270-41ae-bb9b-443bdc5446f3.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-Gen3-gemma-2-9B", + "id": "zelk12/MT4-Gen3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7841 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6087 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.219 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4243 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4381 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-Gen4-gemma-2-9B/e40ea476-bcc5-4d3b-bf8e-e5048d9cbe42.json b/data/hfopenllm_v2/zelk12/MT4-Gen4-gemma-2-9B/e40ea476-bcc5-4d3b-bf8e-e5048d9cbe42.json new file mode 100644 index 000000000..fd69e6724 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-Gen4-gemma-2-9B/e40ea476-bcc5-4d3b-bf8e-e5048d9cbe42.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-Gen4-gemma-2-9B", + "id": "zelk12/MT4-Gen4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7874 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6076 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4244 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-Gen5-gemma-2-9B/731a5f85-a59e-40af-870c-00e519ca0e7e.json b/data/hfopenllm_v2/zelk12/MT4-Gen5-gemma-2-9B/731a5f85-a59e-40af-870c-00e519ca0e7e.json new file mode 100644 index 000000000..bc85cdb46 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-Gen5-gemma-2-9B/731a5f85-a59e-40af-870c-00e519ca0e7e.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-Gen5-gemma-2-9B", + "id": "zelk12/MT4-Gen5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7789 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6107 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2266 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3565 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4384 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/38d93ae8-90ec-473c-8570-33d52c46770b.json b/data/hfopenllm_v2/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/38d93ae8-90ec-473c-8570-33d52c46770b.json new file mode 100644 index 000000000..197b1a326 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/38d93ae8-90ec-473c-8570-33d52c46770b.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-Max-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1771 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.612 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4391 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT4-gemma-2-9B/9072fd28-040b-44df-bd58-6e3f59398189.json b/data/hfopenllm_v2/zelk12/MT4-gemma-2-9B/9072fd28-040b-44df-bd58-6e3f59398189.json new file mode 100644 index 000000000..7380725c6 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT4-gemma-2-9B/9072fd28-040b-44df-bd58-6e3f59398189.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT4-gemma-2-9B", + "id": "zelk12/MT4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6073 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2085 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3381 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4309 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4366 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-Gen1-gemma-2-9B/14827e00-09c5-4ebd-93cb-8e026ac73d20.json b/data/hfopenllm_v2/zelk12/MT5-Gen1-gemma-2-9B/14827e00-09c5-4ebd-93cb-8e026ac73d20.json new file mode 100644 index 000000000..045a5d93e --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-Gen1-gemma-2-9B/14827e00-09c5-4ebd-93cb-8e026ac73d20.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen1-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-Gen1-gemma-2-9B", + "id": "zelk12/MT5-Gen1-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7831 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.611 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2213 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3473 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4368 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-Gen2-gemma-2-9B/11e76d74-b8e0-408f-b429-566faa5d60a2.json b/data/hfopenllm_v2/zelk12/MT5-Gen2-gemma-2-9B/11e76d74-b8e0-408f-b429-566faa5d60a2.json new file mode 100644 index 000000000..0398ea5dd --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-Gen2-gemma-2-9B/11e76d74-b8e0-408f-b429-566faa5d60a2.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-Gen2-gemma-2-9B", + "id": "zelk12/MT5-Gen2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7962 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6105 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4163 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4379 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-Gen3-gemma-2-9B/944c84d8-231d-47ef-85f4-23c0286a4a02.json b/data/hfopenllm_v2/zelk12/MT5-Gen3-gemma-2-9B/944c84d8-231d-47ef-85f4-23c0286a4a02.json new file mode 100644 index 000000000..1735bd1eb --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-Gen3-gemma-2-9B/944c84d8-231d-47ef-85f4-23c0286a4a02.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen3-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-Gen3-gemma-2-9B", + "id": "zelk12/MT5-Gen3-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7825 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.609 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2168 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4375 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-Gen4-gemma-2-9B/47c8da1d-8ce3-4d19-b8b8-6b5e68e2e8ab.json b/data/hfopenllm_v2/zelk12/MT5-Gen4-gemma-2-9B/47c8da1d-8ce3-4d19-b8b8-6b5e68e2e8ab.json new file mode 100644 index 000000000..4db3a4fe8 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-Gen4-gemma-2-9B/47c8da1d-8ce3-4d19-b8b8-6b5e68e2e8ab.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen4-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-Gen4-gemma-2-9B", + "id": "zelk12/MT5-Gen4-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7835 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6131 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2243 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4397 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-Gen5-gemma-2-9B/ca54a8d4-153b-4169-b6ee-133461a9bedd.json b/data/hfopenllm_v2/zelk12/MT5-Gen5-gemma-2-9B/ca54a8d4-153b-4169-b6ee-133461a9bedd.json new file mode 100644 index 000000000..08cfd2a70 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-Gen5-gemma-2-9B/ca54a8d4-153b-4169-b6ee-133461a9bedd.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-Gen5-gemma-2-9B", + "id": "zelk12/MT5-Gen5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7947 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4191 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4329 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/652359ec-14f2-4f94-a694-b7dc98819bfc.json b/data/hfopenllm_v2/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/652359ec-14f2-4f94-a694-b7dc98819bfc.json new file mode 100644 index 000000000..5dfae6c49 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/652359ec-14f2-4f94-a694-b7dc98819bfc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-Max-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1762 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6127 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0982 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4228 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.439 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MT5-gemma-2-9B/b34f3335-c7a3-431f-b2c8-6f0731a81378.json b/data/hfopenllm_v2/zelk12/MT5-gemma-2-9B/b34f3335-c7a3-431f-b2c8-6f0731a81378.json new file mode 100644 index 000000000..f6eb0dcef --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MT5-gemma-2-9B/b34f3335-c7a3-431f-b2c8-6f0731a81378.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MT5-gemma-2-9B", + "id": "zelk12/MT5-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8048 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6112 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4204 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4367 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MTM-Merge-gemma-2-9B/077306f9-5d40-40dc-9df4-b5ca559af5c7.json b/data/hfopenllm_v2/zelk12/MTM-Merge-gemma-2-9B/077306f9-5d40-40dc-9df4-b5ca559af5c7.json new file mode 100644 index 000000000..5b5954b79 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MTM-Merge-gemma-2-9B/077306f9-5d40-40dc-9df4-b5ca559af5c7.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MTM-Merge-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MTM-Merge-gemma-2-9B", + "id": "zelk12/MTM-Merge-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7798 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6133 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2175 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3549 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4268 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4388 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/e0f0fe87-8ed3-4398-8683-65aa042d01d9.json b/data/hfopenllm_v2/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/e0f0fe87-8ed3-4398-8683-65aa042d01d9.json new file mode 100644 index 000000000..15b286068 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/e0f0fe87-8ed3-4398-8683-65aa042d01d9.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MTMaMe-Merge_02012025163610-gemma-2-9B", + "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1786 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6117 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0959 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3523 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4241 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4382 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/2d968d3e-a3df-4bdf-86a4-034087c0d7fc.json b/data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/2d968d3e-a3df-4bdf-86a4-034087c0d7fc.json new file mode 100644 index 000000000..eeccbef87 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/2d968d3e-a3df-4bdf-86a4-034087c0d7fc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rv0.4DMv1t0.25-gemma-2-9B", + "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7497 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.607 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2258 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4309 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4401 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/db476911-87fb-433f-b164-4435718dab46.json b/data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/db476911-87fb-433f-b164-4435718dab46.json new file mode 100644 index 000000000..48c1ee534 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/db476911-87fb-433f-b164-4435718dab46.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", + "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7646 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6098 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2069 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3423 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4283 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4347 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Rv0.4MT4g2-gemma-2-9B/75a967f6-a8ab-435f-999b-4889e8217dce.json b/data/hfopenllm_v2/zelk12/Rv0.4MT4g2-gemma-2-9B/75a967f6-a8ab-435f-999b-4889e8217dce.json new file mode 100644 index 000000000..eb1701501 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Rv0.4MT4g2-gemma-2-9B/75a967f6-a8ab-435f-999b-4889e8217dce.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4MT4g2-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rv0.4MT4g2-gemma-2-9B", + "id": "zelk12/Rv0.4MT4g2-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.732 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6041 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1949 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3532 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4231 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4417 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/T31122024203920-gemma-2-9B/e072997b-2f79-4d25-b8dc-ebf15ac311e1.json b/data/hfopenllm_v2/zelk12/T31122024203920-gemma-2-9B/e072997b-2f79-4d25-b8dc-ebf15ac311e1.json new file mode 100644 index 000000000..2fd27ce58 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/T31122024203920-gemma-2-9B/e072997b-2f79-4d25-b8dc-ebf15ac311e1.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_T31122024203920-gemma-2-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T31122024203920-gemma-2-9B", + "id": "zelk12/T31122024203920-gemma-2-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7676 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6096 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2054 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3507 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4322 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4373 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Test01012025155054/6d681a29-0d1a-4054-8250-5246993509f8.json b/data/hfopenllm_v2/zelk12/Test01012025155054/6d681a29-0d1a-4054-8250-5246993509f8.json new file mode 100644 index 000000000..5d2d4db3f --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Test01012025155054/6d681a29-0d1a-4054-8250-5246993509f8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Test01012025155054", + "id": "zelk12/Test01012025155054", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.817 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.283 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json b/data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json deleted file mode 100644 index 5695d48aa..000000000 --- a/data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054/1762652580.6282592", - "retrieved_timestamp": "1762652580.6282601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zelk12/Test01012025155054", - "developer": "zelk12", - "inference_platform": "unknown", - "id": "zelk12/Test01012025155054", - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.817 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555229014570229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28295044895258115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10904255319148937 - } - } - ] -} diff --git a/data/hfopenllm_v2/zelk12/Test01012025155054t0.5_gemma-2/2a6af4ce-e45c-4721-a23c-03071a5e774f.json b/data/hfopenllm_v2/zelk12/Test01012025155054t0.5_gemma-2/2a6af4ce-e45c-4721-a23c-03071a5e774f.json new file mode 100644 index 000000000..e5cfc1c87 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Test01012025155054t0.5_gemma-2/2a6af4ce-e45c-4721-a23c-03071a5e774f.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054t0.5_gemma-2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Test01012025155054t0.5_gemma-2", + "id": "zelk12/Test01012025155054t0.5_gemma-2", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.817 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1555 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.283 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2416 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.367 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.109 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/gemma-2-S2MTM-9B/5ae5ddff-714d-4a20-b1d3-3eeb95fd858c.json b/data/hfopenllm_v2/zelk12/gemma-2-S2MTM-9B/5ae5ddff-714d-4a20-b1d3-3eeb95fd858c.json new file mode 100644 index 000000000..18e9a3529 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/gemma-2-S2MTM-9B/5ae5ddff-714d-4a20-b1d3-3eeb95fd858c.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_gemma-2-S2MTM-9B/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gemma-2-S2MTM-9B", + "id": "zelk12/gemma-2-S2MTM-9B", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7823 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6061 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2047 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3456 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4218 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4297 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/60052d34-f6a7-4204-baea-532f5ba29880.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/60052d34-f6a7-4204-baea-532f5ba29880.json new file mode 100644 index 000000000..2e6a12b87 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/60052d34-f6a7-4204-baea-532f5ba29880.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7707 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2145 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3431 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.44 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/e1ddd882-f8a1-48d0-bb2a-878f43095895.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/e1ddd882-f8a1-48d0-bb2a-878f43095895.json new file mode 100644 index 000000000..c498d42d2 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/e1ddd882-f8a1-48d0-bb2a-878f43095895.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7208 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5995 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2017 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3951 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4141 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/d2c3edec-38d8-48e3-9f6d-e26a63442af8.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/d2c3edec-38d8-48e3-9f6d-e26a63442af8.json new file mode 100644 index 000000000..eeaa69af5 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/d2c3edec-38d8-48e3-9f6d-e26a63442af8.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-Ataraxy-9B-v0.1", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7649 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6075 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2281 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3498 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4136 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4321 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/dcfafe94-dacb-4e7a-9365-8bb39ecb79ec.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/dcfafe94-dacb-4e7a-9365-8bb39ecb79ec.json new file mode 100644 index 000000000..d481e65cf --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/dcfafe94-dacb-4e7a-9365-8bb39ecb79ec.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-Ataraxy-9B-v0.2", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.76 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6066 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2228 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3482 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.411 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/8ca0e602-bf6b-4d15-95c2-a0d47e78ded0.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/8ca0e602-bf6b-4d15-95c2-a0d47e78ded0.json new file mode 100644 index 000000000..93f6e8d52 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/8ca0e602-bf6b-4d15-95c2-a0d47e78ded0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", + "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7615 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6099 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.21 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3414 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.431 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4315 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/fc262523-dcde-4b45-80ba-2922e66d42c4.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/fc262523-dcde-4b45-80ba-2922e66d42c4.json new file mode 100644 index 000000000..5aa27b1eb --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/fc262523-dcde-4b45-80ba-2922e66d42c4.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-Ifable-9B-v0.1", + "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7944 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6064 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2205 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3515 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4323 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/f8d745da-9867-4348-bace-d8052c3b4025.json b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/f8d745da-9867-4348-bace-d8052c3b4025.json new file mode 100644 index 000000000..283796808 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/f8d745da-9867-4348-bace-d8052c3b4025.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", + "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", + "developer": "zelk12", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7445 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5978 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1888 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.344 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4295 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4181 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/3d410f0f-6b24-4e86-a353-6142c51b1ecc.json b/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/3d410f0f-6b24-4e86-a353-6142c51b1ecc.json new file mode 100644 index 000000000..165cc965d --- /dev/null +++ b/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/3d410f0f-6b24-4e86-a353-6142c51b1ecc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-32B-Instruct-abliterated-v2", + "id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", + "developer": "zetasepic", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8334 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6934 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5952 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3674 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4354 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5622 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json b/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json deleted file mode 100644 index 48a3a7761..000000000 --- a/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/1762652580.6318998", - "retrieved_timestamp": "1762652580.631902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", - "developer": "zetasepic", - "inference_platform": "unknown", - "id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8334131216283904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6934020817780425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43542708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5621675531914894 - } - } - ] -} diff --git a/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/46329fc3-974f-4d04-be9e-ba85b3816efc.json b/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/46329fc3-974f-4d04-be9e-ba85b3816efc.json new file mode 100644 index 000000000..5b3165605 --- /dev/null +++ b/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/46329fc3-974f-4d04-be9e-ba85b3816efc.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-72B-Instruct-abliterated/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen2.5-72B-Instruct-abliterated", + "id": "zetasepic/Qwen2.5-72B-Instruct-abliterated", + "developer": "zetasepic", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7153 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.7152 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5242 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4069 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4719 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5872 + } + } + ] +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json b/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json deleted file mode 100644 index d6ed73c37..000000000 --- a/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-72B-Instruct-abliterated/1762652580.632342", - "retrieved_timestamp": "1762652580.632343", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zetasepic/Qwen2.5-72B-Instruct-abliterated", - "developer": "zetasepic", - "inference_platform": "unknown", - "id": "zetasepic/Qwen2.5-72B-Instruct-abliterated", - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7152610628687439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7152257183282452 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241691842900302 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40687919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4719166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5871841755319149 - } - } - ] -} diff --git a/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json b/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json deleted file mode 100644 index 225520301..000000000 --- a/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "hfopenllm_v2/zhengr_MixTAO-7Bx2-MoE-v8.1/1762652580.6327481", - "retrieved_timestamp": "1762652580.632749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party", - "source_name": "HF Open LLM v2", - "source_type": "documentation" - }, - "model_info": { - "name": "zhengr/MixTAO-7Bx2-MoE-v8.1", - "developer": "zhengr", - "inference_platform": "unknown", - "id": "zhengr/MixTAO-7Bx2-MoE-v8.1", - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187810564856802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019437560239653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39762499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28465757978723405 - } - } - ] -} diff --git a/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/b964d0a4-7c44-4ea2-894e-3e1ca30321e0.json b/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/b964d0a4-7c44-4ea2-894e-3e1ca30321e0.json new file mode 100644 index 000000000..b0791265c --- /dev/null +++ b/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/b964d0a4-7c44-4ea2-894e-3e1ca30321e0.json @@ -0,0 +1,132 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "hfopenllm_v2/zhengr_MixTAO-7Bx2-MoE-v8.1/1770682486.623709", + "retrieved_timestamp": "1770682486.623709", + "source_metadata": { + "source_name": "HF Open LLM v2", + "source_type": "documentation", + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MixTAO-7Bx2-MoE-v8.1", + "id": "zhengr/MixTAO-7Bx2-MoE-v8.1", + "developer": "zhengr", + "inference_platform": "unknown", + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "source_data": { + "dataset_name": "IFEval", + "source_type": "hf_dataset", + "hf_repo": "google/IFEval" + }, + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4188 + } + }, + { + "evaluation_name": "BBH", + "source_data": { + "dataset_name": "BBH", + "source_type": "hf_dataset", + "hf_repo": "SaylorTwift/bbh" + }, + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4202 + } + }, + { + "evaluation_name": "MATH Level 5", + "source_data": { + "dataset_name": "MATH Level 5", + "source_type": "hf_dataset", + "hf_repo": "DigitalLearningGmbH/MATH-lighteval" + }, + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0604 + } + }, + { + "evaluation_name": "GPQA", + "source_data": { + "dataset_name": "GPQA", + "source_type": "hf_dataset", + "hf_repo": "Idavidrein/gpqa" + }, + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2987 + } + }, + { + "evaluation_name": "MUSR", + "source_data": { + "dataset_name": "MUSR", + "source_type": "hf_dataset", + "hf_repo": "TAUR-Lab/MuSR" + }, + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.3976 + } + }, + { + "evaluation_name": "MMLU-PRO", + "source_data": { + "dataset_name": "MMLU-PRO", + "source_type": "hf_dataset", + "hf_repo": "TIGER-Lab/MMLU-Pro" + }, + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2847 + } + } + ] +} \ No newline at end of file diff --git a/data/mmlu/openai/gpt2/7b2767f8-9266-486e-8e49-6177930bc258.json b/data/mmlu/openai/gpt2/7b2767f8-9266-486e-8e49-6177930bc258.json deleted file mode 100644 index 872c1b33c..000000000 --- a/data/mmlu/openai/gpt2/7b2767f8-9266-486e-8e49-6177930bc258.json +++ /dev/null @@ -1,2377 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "helm/openai/gpt2/mmlu/philosophy/1762354922", - "retrieved_timestamp": "1762354922", - "source_data": { - "dataset_name": "mmlu", - "samples_number": 10, - "sample_ids": [ - "id147", - "id11", - "id65", - "id344", - "id59", - "id291", - "id131", - "id222", - "id259", - "id105" - ], - "additional_details": { - "scenario_name": "helm.benchmark.scenarios.mmlu_scenario.MMLUScenario", - "scenario_args": { - "subject": "philosophy" - } - } - }, - "source_metadata": { - "source_name": "helm", - "source_type": "evaluation_run", - "source_organization_name": "Unknown", - "evaluator_relationship": "other" - }, - "model_info": { - "name": "openai/gpt2", - "id": "openai/gpt2", - "developer": "openai", - "inference_platform": "huggingface" - }, - "evaluation_results": [ - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.1111111111111111, - "details": { - "count": 1, - "sum": 0.1111111111111111, - "sum_squared": 0.012345679012345678, - "min": 0.1111111111111111, - "max": 0.1111111111111111, - "mean": 0.1111111111111111, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "multiple_choice_joint", - "evaluation_timestamp": "1762354922", - "metric_config": { - "evaluation_description": "quasi_prefix_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 1, - "stop_sequences": [ - "\n" - ], - "instructions": "The following are multiple choice questions (with answers) about philosophy.\n", - "input_prefix": "Question: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - } - ], - "detailed_evaluation_results_per_samples": [ - { - "sample_id": "id147", - "input": "What does the notion of “meaning in life” refer to?", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: What does the notion of “meaning in life” refer to?\nA. external meaning\nB. god's plan\nC. internalmeaning\nD. meaning in an afterlife\nAnswer:", - "ground_truth": [ - "C" - ], - "response": "D", - "choices": [ - [ - "A", - "external meaning" - ], - [ - "B", - "god's plan" - ], - [ - "C", - "internalmeaning" - ], - [ - "D", - "meaning in an afterlife" - ] - ] - }, - { - "sample_id": "id11", - "input": "In Plato's idealism, the unchanging Ideas or \"Forms\" in terms of which sensible objects both exist and are known must transcend (that is, exist beyond) the changing realm of appearances; because if Forms changed, then:", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: In Plato's idealism, the unchanging Ideas or \"Forms\" in terms of which sensible objects both exist and are known must transcend (that is, exist beyond) the changing realm of appearances; because if Forms changed, then:\nA. the only things in the sensible world that we could ever experience would be concepts.\nB. the sensible realm (in contrast to the intelligible realm) would consist only of copies of real things.\nC. nothing in the experienced world could be or be identified as one determinate thing or another.\nD. the sensible world would consist of unchanging Forms.\nAnswer:", - "ground_truth": [ - "C" - ], - "response": "D", - "choices": [ - [ - "A", - "the only things in the sensible world that we could ever experience would be concepts." - ], - [ - "B", - "the sensible realm (in contrast to the intelligible realm) would consist only of copies of real things." - ], - [ - "C", - "nothing in the experienced world could be or be identified as one determinate thing or another." - ], - [ - "D", - "the sensible world would consist of unchanging Forms." - ] - ] - }, - { - "sample_id": "id65", - "input": "Aristotle says that what makes things be what they are--their essence--does not exist apart from individ-uals that exist in the world. So if all the members of a species were destroyed, then their essence or form:", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Aristotle says that what makes things be what they are--their essence--does not exist apart from individ-uals that exist in the world. So if all the members of a species were destroyed, then their essence or form:\nA. would likewise be destroyed.\nB. would be destroyed only if there were no one around to remember the species.\nC. would continue existing (as with Plato's Forms) in some other realm of being.\nD. would not be destroyed because there was no essence or form originally to be destroyed; there are only individuals, not universal essences or natures of things.\nAnswer:", - "ground_truth": [ - "A" - ], - "response": "D", - "choices": [ - [ - "A", - "would likewise be destroyed." - ], - [ - "B", - "would be destroyed only if there were no one around to remember the species." - ], - [ - "C", - "would continue existing (as with Plato's Forms) in some other realm of being." - ], - [ - "D", - "would not be destroyed because there was no essence or form originally to be destroyed; there are only individuals, not universal essences or natures of things." - ] - ] - }, - { - "sample_id": "id344", - "input": "Aesthetic values have to do with _______", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Aesthetic values have to do with _______\nA. the moral value of works of art and other objects that could be judged beautiful\nB. good and bad works of art and other objects that could be judged beautiful\nC. the moral values of artists and critics\nD. pragmatic decisions regarding the display of art\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "D", - "choices": [ - [ - "A", - "the moral value of works of art and other objects that could be judged beautiful" - ], - [ - "B", - "good and bad works of art and other objects that could be judged beautiful" - ], - [ - "C", - "the moral values of artists and critics" - ], - [ - "D", - "pragmatic decisions regarding the display of art" - ] - ] - }, - { - "sample_id": "id59", - "input": "According to Moore, we are thinking about good whenever we think about:", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: According to Moore, we are thinking about good whenever we think about:\nA. pleasure.\nB. things that we desire.\nC. intrinsic value.\nD. none of the above.\nAnswer:", - "ground_truth": [ - "C" - ], - "response": "D", - "choices": [ - [ - "A", - "pleasure." - ], - [ - "B", - "things that we desire." - ], - [ - "C", - "intrinsic value." - ], - [ - "D", - "none of the above." - ] - ] - }, - { - "sample_id": "id291", - "input": "Craig argues that the series of events in time cannot be actually infinite, so we know that _____.", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Craig argues that the series of events in time cannot be actually infinite, so we know that _____.\nA. the universe is finite in the past and began to exist\nB. the universe is infinite in the past\nC. the universe never is uncaused\nD. the universe exists only in the mind\nAnswer:", - "ground_truth": [ - "A" - ], - "response": "D", - "choices": [ - [ - "A", - "the universe is finite in the past and began to exist" - ], - [ - "B", - "the universe is infinite in the past" - ], - [ - "C", - "the universe never is uncaused" - ], - [ - "D", - "the universe exists only in the mind" - ] - ] - }, - { - "sample_id": "id131", - "input": "Baier claims that ethical skepticism is often due to:", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Baier claims that ethical skepticism is often due to:\nA. confused metaphysical theories.\nB. confused epistemological theories.\nC. confused religious views.\nD. confused scientific theories.\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "D", - "choices": [ - [ - "A", - "confused metaphysical theories." - ], - [ - "B", - "confused epistemological theories." - ], - [ - "C", - "confused religious views." - ], - [ - "D", - "confused scientific theories." - ] - ] - }, - { - "sample_id": "id222", - "input": "Mill says that the ultimate end of utilitarianism is an existence as free of pain as possible and as rich as possible in _____.", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Mill says that the ultimate end of utilitarianism is an existence as free of pain as possible and as rich as possible in _____.\nA. lower pleasures\nB. spiritual attainment\nC. social achievement\nD. enjoyments\nAnswer:", - "ground_truth": [ - "D" - ], - "response": "D", - "choices": [ - [ - "A", - "lower pleasures" - ], - [ - "B", - "spiritual attainment" - ], - [ - "C", - "social achievement" - ], - [ - "D", - "enjoyments" - ] - ] - }, - { - "sample_id": "id259", - "input": "Philo says the analogy that Cleanthes uses to make his case is _____.", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Philo says the analogy that Cleanthes uses to make his case is _____.\nA. too complicated\nB. weak\nC. strong\nD. not based on a legitimate method of reasoning\nAnswer:", - "ground_truth": [ - "B" - ], - "response": "D", - "choices": [ - [ - "A", - "too complicated" - ], - [ - "B", - "weak" - ], - [ - "C", - "strong" - ], - [ - "D", - "not based on a legitimate method of reasoning" - ] - ] - }, - { - "sample_id": "id105", - "input": "Baggini accuses pessimists of mixing up the two senses of _____.", - "prompt": "The following are multiple choice questions (with answers) about philosophy.\n\nQuestion: The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.\nA. metaphysics\nB. epistemology\nC. quantum physics\nD. axiology\nAnswer: A\n\nQuestion: According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:\nA. pleasure.\nB. happiness.\nC. good.\nD. virtue.\nAnswer: C\n\nQuestion: Psychological egoism is:\nA. an ethical theory about how we ought to behave.\nB. a generalization concerning the way people tend to behave.\nC. a claim about human nature and the ways people are capable of behaving.\nD. none of the above.\nAnswer: C\n\nQuestion: Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?\nA. optimist\nB. satisfied\nC. nominally religious\nD. pessimist\nAnswer: D\n\nQuestion: According to d'Holbach, people always act according to _____.\nA. free choices\nB. dictates of the soul\nC. necessary natural laws\nD. undetermined will\nAnswer: C\n\nQuestion: Baggini accuses pessimists of mixing up the two senses of _____.\nA. philosophy\nB. religion\nC. meaning\nD. time\nAnswer:", - "ground_truth": [ - "C" - ], - "response": "D", - "choices": [ - [ - "A", - "philosophy" - ], - [ - "B", - "religion" - ], - [ - "C", - "meaning" - ], - [ - "D", - "time" - ] - ] - } - ] -} \ No newline at end of file diff --git a/data/narrativeqa/openai/gpt2/96a7ea61-8869-4dd0-9164-756519a26ac0.json b/data/narrativeqa/openai/gpt2/96a7ea61-8869-4dd0-9164-756519a26ac0.json deleted file mode 100644 index 1e6877a70..000000000 --- a/data/narrativeqa/openai/gpt2/96a7ea61-8869-4dd0-9164-756519a26ac0.json +++ /dev/null @@ -1,1634 +0,0 @@ -{ - "schema_version": "0.1.0", - "evaluation_id": "helm/openai/gpt2/narrativeqa/1763479296", - "retrieved_timestamp": "1763479296", - "source_data": { - "dataset_name": "narrativeqa", - "samples_number": 5, - "sample_ids": [ - "id1413", - "id1332", - "id1123", - "id1514", - "id1340" - ], - "additional_details": { - "scenario_name": "helm.benchmark.scenarios.narrativeqa_scenario.NarrativeQAScenario", - "scenario_args": {} - } - }, - "source_metadata": { - "source_name": "helm", - "source_type": "evaluation_run", - "source_organization_name": "Unknown", - "evaluator_relationship": "other" - }, - "model_info": { - "name": "openai/gpt2", - "id": "openai/gpt2", - "developer": "openai", - "inference_platform": "huggingface" - }, - "evaluation_results": [ - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "quasi_exact_match", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "f1_score", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.17424242424242425, - "details": { - "count": 1, - "sum": 0.17424242424242425, - "sum_squared": 0.030360422405876955, - "min": 0.17424242424242425, - "max": 0.17424242424242425, - "mean": 0.17424242424242425, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "f1_score", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "f1_score", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.17424242424242425, - "details": { - "count": 1, - "sum": 0.17424242424242425, - "sum_squared": 0.030360422405876955, - "min": 0.17424242424242425, - "max": 0.17424242424242425, - "mean": 0.17424242424242425, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "f1_score", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.17424242424242425, - "details": { - "count": 1, - "sum": 0.17424242424242425, - "sum_squared": 0.030360422405876955, - "min": 0.17424242424242425, - "max": 0.17424242424242425, - "mean": 0.17424242424242425, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "f1_score", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "f1_score", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.0, - "details": { - "count": 1, - "sum": 0.0, - "sum_squared": 0.0, - "min": 0.0, - "max": 0.0, - "mean": 0.0, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "rouge_l", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.05365510777881912, - "details": { - "count": 1, - "sum": 0.05365510777881912, - "sum_squared": 0.002878870590756696, - "min": 0.05365510777881912, - "max": 0.05365510777881912, - "mean": 0.05365510777881912, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "rouge_l", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03636363636363636, - "details": { - "count": 1, - "sum": 0.03636363636363636, - "sum_squared": 0.0013223140495867767, - "min": 0.03636363636363636, - "max": 0.03636363636363636, - "mean": 0.03636363636363636, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "rouge_l", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.05365510777881912, - "details": { - "count": 1, - "sum": 0.05365510777881912, - "sum_squared": 0.002878870590756696, - "min": 0.05365510777881912, - "max": 0.05365510777881912, - "mean": 0.05365510777881912, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "rouge_l", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.05365510777881912, - "details": { - "count": 1, - "sum": 0.05365510777881912, - "sum_squared": 0.002878870590756696, - "min": 0.05365510777881912, - "max": 0.05365510777881912, - "mean": 0.05365510777881912, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "rouge_l", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03636363636363636, - "details": { - "count": 1, - "sum": 0.03636363636363636, - "sum_squared": 0.0013223140495867767, - "min": 0.03636363636363636, - "max": 0.03636363636363636, - "mean": 0.03636363636363636, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "rouge_l", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03636363636363636, - "details": { - "count": 1, - "sum": 0.03636363636363636, - "sum_squared": 0.0013223140495867767, - "min": 0.03636363636363636, - "max": 0.03636363636363636, - "mean": 0.03636363636363636, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_1", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03568840579710145, - "details": { - "count": 1, - "sum": 0.03568840579710145, - "sum_squared": 0.001273662308338584, - "min": 0.03568840579710145, - "max": 0.03568840579710145, - "mean": 0.03568840579710145, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_1", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03508771929824561, - "details": { - "count": 1, - "sum": 0.03508771929824561, - "sum_squared": 0.0012311480455524776, - "min": 0.03508771929824561, - "max": 0.03508771929824561, - "mean": 0.03508771929824561, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_1", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03568840579710145, - "details": { - "count": 1, - "sum": 0.03568840579710145, - "sum_squared": 0.001273662308338584, - "min": 0.03568840579710145, - "max": 0.03568840579710145, - "mean": 0.03568840579710145, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_1", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03568840579710145, - "details": { - "count": 1, - "sum": 0.03568840579710145, - "sum_squared": 0.001273662308338584, - "min": 0.03568840579710145, - "max": 0.03568840579710145, - "mean": 0.03568840579710145, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_1", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03508771929824561, - "details": { - "count": 1, - "sum": 0.03508771929824561, - "sum_squared": 0.0012311480455524776, - "min": 0.03508771929824561, - "max": 0.03508771929824561, - "mean": 0.03508771929824561, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_1", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 0.03508771929824561, - "details": { - "count": 1, - "sum": 0.03508771929824561, - "sum_squared": 0.0012311480455524776, - "min": 0.03508771929824561, - "max": 0.03508771929824561, - "mean": 0.03508771929824561, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_4", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 1.1125369292536313e-308, - "details": { - "count": 1, - "sum": 1.1125369292536313e-308, - "sum_squared": 0.0, - "min": 1.1125369292536313e-308, - "max": 1.1125369292536313e-308, - "mean": 1.1125369292536313e-308, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_4", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 2.2250738585072626e-308, - "details": { - "count": 1, - "sum": 2.2250738585072626e-308, - "sum_squared": 0.0, - "min": 2.2250738585072626e-308, - "max": 2.2250738585072626e-308, - "mean": 2.2250738585072626e-308, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": null - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_4", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 1.1125369292536313e-308, - "details": { - "count": 1, - "sum": 1.1125369292536313e-308, - "sum_squared": 0.0, - "min": 1.1125369292536313e-308, - "max": 1.1125369292536313e-308, - "mean": 1.1125369292536313e-308, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_4", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 1.1125369292536313e-308, - "details": { - "count": 1, - "sum": 1.1125369292536313e-308, - "sum_squared": 0.0, - "min": 1.1125369292536313e-308, - "max": 1.1125369292536313e-308, - "mean": 1.1125369292536313e-308, - "variance": 0.0, - "stddev": 0.0, - "split": "test", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_4", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 2.2250738585072626e-308, - "details": { - "count": 1, - "sum": 2.2250738585072626e-308, - "sum_squared": 0.0, - "min": 2.2250738585072626e-308, - "max": 2.2250738585072626e-308, - "mean": 2.2250738585072626e-308, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "robustness", - "robustness": true, - "fairness": false, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - }, - { - "evaluation_name": "generation", - "evaluation_timestamp": "1763479296", - "metric_config": { - "evaluation_description": "bleu_4", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0.0, - "max_score": 1.0 - }, - "score_details": { - "score": 2.2250738585072626e-308, - "details": { - "count": 1, - "sum": 2.2250738585072626e-308, - "sum_squared": 0.0, - "min": 2.2250738585072626e-308, - "max": 2.2250738585072626e-308, - "mean": 2.2250738585072626e-308, - "variance": 0.0, - "stddev": 0.0, - "split": "valid", - "perturbation": { - "name": "fairness", - "robustness": false, - "fairness": true, - "computed_on": "worst", - "seed": null - } - } - }, - "generation_config": { - "temperature": 0.0, - "max_tokens": 100, - "stop_sequences": [ - "\n" - ], - "instructions": "", - "input_prefix": "Passage: ", - "input_suffix": "\n", - "output_prefix": "Answer: ", - "output_suffix": "\n", - "instance_prefix": "\n" - } - } - ], - "detailed_evaluation_results_per_samples": [ - { - "sample_id": "id1413", - "input": "Olive Penderghast, a 17-year-old girl living in Ojai, California lies to her best friend Rhiannon Abernathy about going on a date in order to get out of camping with Rhiannon's hippie parents. Instead, she hangs around the house all weekend listening to Natasha Bedingfield's \"Pocketful of Sunshine\", which is played by a greeting card she was sent. The following Monday, pressed by Rhiannon, Olive lies about losing her virginity to a college guy. Marianne Bryant, a prissy and strictly religious Christian at their school, overhears her telling the lie and soon it spreads like wildfire. The school's conservative church group run by Marianne decides Olive will be their next project. Olive confides the truth to her friend Brandon, and he explains how others bully him because of his homosexuality. He later asks Olive to pretend to sleep with him so that he will be accepted by everyone as a 'straight stud'.\nBrandon convinces Olive to help him and they pretend to have sex at a party. After having a fight with Rhiannon over Olive's new identity as a \"dirty skank\", Olive decides to counteract the harassment by embracing her new image as the school tramp. She begins to wear more provocative clothing and stitches a red \"A\" to everything she wears. Boys who usually have had no luck with girls in the past beg Olive to say they have had sex with her in order to increase their own popularity, in exchange for gift cards to various stores, in turn increasing her reputation. Things get worse when Micah, Marianne's 22-year-old boyfriend, contracts chlamydia from sleeping with Mrs. Griffith, the school guidance counsellor, and blames it all on Olive. Olive agrees to lie to cover up the affair so that the marriage of her favorite teacher, Mr. Griffith, would be spared.\nMarianne's religious clique, which now includes Rhiannon, begins harassing Olive in order to get her to leave school. After an ill-fated date with Anson, a boy who wants to pay her to actually sleep with him and not just pretend she did, Olive reconnects with Todd, her old crush, who is also the school's mascot. Todd then tells her that he does not believe the rumors because he remembers when she lied for him when he was not ready for his first kiss years ago. Olive then begins to ask everyone she lied for to help her out by telling the truth, but Brandon and Micah have abruptly left town and everyone else is enjoying their newfound popularity and do not want the truth to get out. Mrs. Griffith also refuses to tell the truth and when Olive threatens to expose her, Mrs. Griffith rebuffs her, saying no one would believe her.\nOlive, out of spite, then immediately tells Mr. Griffith, who believes her and separates from Mrs. Griffith. After a friendly talk with her eccentric, open-minded mother Rosemary, Olive comes up with a plan to get everything finally out in the open. She then does a song and dance number at a school pep rally to get people's attention to watch her via web cam, where she confesses what she has done (the web cam is the framing device of the film). The various boys whose reputations Olive helped improve are also shown watching. Later, Olive texts Rhiannon, apologizing for lying to her. When she is finishing up her web cast, Todd comes by riding a lawnmower and tells her to come outside. She signs off by saying she may lose her virginity to Todd, and proudly declares it's nobody's business (much to Marianne's disgrace). She goes outside to meet him, they kiss and the two are shown riding off on the lawnmower.\nQuestion: Who is Todd besides Olive's old crush?", - "prompt": "Passage: Olive Penderghast, a 17-year-old girl living in Ojai, California lies to her best friend Rhiannon Abernathy about going on a date in order to get out of camping with Rhiannon's hippie parents. Instead, she hangs around the house all weekend listening to Natasha Bedingfield's \"Pocketful of Sunshine\", which is played by a greeting card she was sent. The following Monday, pressed by Rhiannon, Olive lies about losing her virginity to a college guy. Marianne Bryant, a prissy and strictly religious Christian at their school, overhears her telling the lie and soon it spreads like wildfire. The school's conservative church group run by Marianne decides Olive will be their next project. Olive confides the truth to her friend Brandon, and he explains how others bully him because of his homosexuality. He later asks Olive to pretend to sleep with him so that he will be accepted by everyone as a 'straight stud'.\nBrandon convinces Olive to help him and they pretend to have sex at a party. After having a fight with Rhiannon over Olive's new identity as a \"dirty skank\", Olive decides to counteract the harassment by embracing her new image as the school tramp. She begins to wear more provocative clothing and stitches a red \"A\" to everything she wears. Boys who usually have had no luck with girls in the past beg Olive to say they have had sex with her in order to increase their own popularity, in exchange for gift cards to various stores, in turn increasing her reputation. Things get worse when Micah, Marianne's 22-year-old boyfriend, contracts chlamydia from sleeping with Mrs. Griffith, the school guidance counsellor, and blames it all on Olive. Olive agrees to lie to cover up the affair so that the marriage of her favorite teacher, Mr. Griffith, would be spared.\nMarianne's religious clique, which now includes Rhiannon, begins harassing Olive in order to get her to leave school. After an ill-fated date with Anson, a boy who wants to pay her to actually sleep with him and not just pretend she did, Olive reconnects with Todd, her old crush, who is also the school's mascot. Todd then tells her that he does not believe the rumors because he remembers when she lied for him when he was not ready for his first kiss years ago. Olive then begins to ask everyone she lied for to help her out by telling the truth, but Brandon and Micah have abruptly left town and everyone else is enjoying their newfound popularity and do not want the truth to get out. Mrs. Griffith also refuses to tell the truth and when Olive threatens to expose her, Mrs. Griffith rebuffs her, saying no one would believe her.\nOlive, out of spite, then immediately tells Mr. Griffith, who believes her and separates from Mrs. Griffith. After a friendly talk with her eccentric, open-minded mother Rosemary, Olive comes up with a plan to get everything finally out in the open. She then does a song and dance number at a school pep rally to get people's attention to watch her via web cam, where she confesses what she has done (the web cam is the framing device of the film). The various boys whose reputations Olive helped improve are also shown watching. Later, Olive texts Rhiannon, apologizing for lying to her. When she is finishing up her web cast, Todd comes by riding a lawnmower and tells her to come outside. She signs off by saying she may lose her virginity to Todd, and proudly declares it's nobody's business (much to Marianne's disgrace). She goes outside to meet him, they kiss and the two are shown riding off on the lawnmower.\nQuestion: Who is Todd besides Olive's old crush?\nAnswer:", - "ground_truth": [ - "The school Mascot", - "the schools mascot" - ], - "response": "Olive." - }, - { - "sample_id": "id1332", - "input": "Indefer Jones is the aged squire, between seventy and eighty years of age, of a large manor, Llanfeare, in Carmarthen, Wales. His niece, Isabel Brodrick, has lived with him for years after the remarriage of her father, and endeared herself to everyone. However, according to his strong traditional beliefs, the estate should be bequeathed to a male heir.\nHis sole male blood relative is his nephew Henry Jones, a London clerk. Henry has, in the past, incurred debts that the squire had paid off, been \"sent away from Oxford\", and generally made a poor impression on his occasional visits to Llanfeare. Nevertheless, Henry is told of his uncle's intention to make him the heir to the estate and is invited to pay a visit. Isabel rejects her uncle's suggestion that she solve his dilemma by marrying Henry, as she cannot stand her cousin. Indefer Jones finds his nephew to be just as detestable as ever. As a result, he overcomes his prejudice and changes his will one final time, in Isabel's favour. Unfortunately, he dies before he can tell anyone.\nFinding the document hidden in a book of sermons by accident, Henry vacillates between keeping silent and revealing its location. He is neither good enough to give up the estate nor evil enough to burn the document, fearing disgrace, a long jail sentence and, not least, eternal damnation. Instead, he comforts himself by reasoning that doing nothing cannot be a crime.\nIndefer Jones had had his last will witnessed by two of his tenants, but since the will cannot be found despite a thorough search of the house, Henry inherits the estate. However, already extant suspicions are only strengthened by his guilty manner. He endures abuse from everyone; his own servants either quit or treat him with disrespect. He takes to spending hours in the library, where the will is hidden.\nThe local newspaper begins to publish accounts of the affair that are insulting and seemingly libelous to Henry. It accuses him of destroying the will and usurping the estate from Isabel, whom everybody knows and respects. The old squire's lawyer, Mr Apjohn, himself suspecting that Henry knows more than he lets on, approaches the new squire about the articles, pressuring the unwilling young man into taking legal action against the editor. Henry finds that this only makes things worse. The prospect of being cross examined in the witness box fills him with dread. He realises the truth would be dragged out of him in court.\nMr Apjohn, by clever questioning, gets a good idea about where the will is. Henry knows that time is running out, but once again procrastinates. Mr Apjohn and Mr Brodrick, Isabel's father, visit Henry at home and find the document, despite Henry's ineffectual efforts to stop them. Because he did not destroy the will, Henry is permitted to return to his job in London with his reputation intact and ÂŁ4000, the amount Isabel was bequeathed in the other will.\nQuestion: How is Isabel Brodrick related to Indefer Jones?", - "prompt": "Passage: Indefer Jones is the aged squire, between seventy and eighty years of age, of a large manor, Llanfeare, in Carmarthen, Wales. His niece, Isabel Brodrick, has lived with him for years after the remarriage of her father, and endeared herself to everyone. However, according to his strong traditional beliefs, the estate should be bequeathed to a male heir.\nHis sole male blood relative is his nephew Henry Jones, a London clerk. Henry has, in the past, incurred debts that the squire had paid off, been \"sent away from Oxford\", and generally made a poor impression on his occasional visits to Llanfeare. Nevertheless, Henry is told of his uncle's intention to make him the heir to the estate and is invited to pay a visit. Isabel rejects her uncle's suggestion that she solve his dilemma by marrying Henry, as she cannot stand her cousin. Indefer Jones finds his nephew to be just as detestable as ever. As a result, he overcomes his prejudice and changes his will one final time, in Isabel's favour. Unfortunately, he dies before he can tell anyone.\nFinding the document hidden in a book of sermons by accident, Henry vacillates between keeping silent and revealing its location. He is neither good enough to give up the estate nor evil enough to burn the document, fearing disgrace, a long jail sentence and, not least, eternal damnation. Instead, he comforts himself by reasoning that doing nothing cannot be a crime.\nIndefer Jones had had his last will witnessed by two of his tenants, but since the will cannot be found despite a thorough search of the house, Henry inherits the estate. However, already extant suspicions are only strengthened by his guilty manner. He endures abuse from everyone; his own servants either quit or treat him with disrespect. He takes to spending hours in the library, where the will is hidden.\nThe local newspaper begins to publish accounts of the affair that are insulting and seemingly libelous to Henry. It accuses him of destroying the will and usurping the estate from Isabel, whom everybody knows and respects. The old squire's lawyer, Mr Apjohn, himself suspecting that Henry knows more than he lets on, approaches the new squire about the articles, pressuring the unwilling young man into taking legal action against the editor. Henry finds that this only makes things worse. The prospect of being cross examined in the witness box fills him with dread. He realises the truth would be dragged out of him in court.\nMr Apjohn, by clever questioning, gets a good idea about where the will is. Henry knows that time is running out, but once again procrastinates. Mr Apjohn and Mr Brodrick, Isabel's father, visit Henry at home and find the document, despite Henry's ineffectual efforts to stop them. Because he did not destroy the will, Henry is permitted to return to his job in London with his reputation intact and ÂŁ4000, the amount Isabel was bequeathed in the other will.\nQuestion: How is Isabel Brodrick related to Indefer Jones?\nAnswer:", - "ground_truth": [ - "She is his neice.", - "His niece." - ], - "response": "Isabel is a very good and intelligent woman. She is a very good and intelligent woman. She is a very good and intelligent woman. She is a very good and intelligent woman. She is a very good and intelligent woman." - }, - { - "sample_id": "id1123", - "input": "The subject of Cratylus is the correctness of names (περὶ ὀνομάτων ὀρθότητος), in other words, it is a critique on the subject of naming (Baxter).\nWhen discussing a ὄνομα (onoma ) and how it would relate to its subject, Socrates compares the original creation of a word to the work of an artist. An artist uses color to express the essence of his subject in a painting. In much the same way, the creator of words uses letters containing certain sounds to express the essence of a word's subject. There is a letter that is best for soft things, one for liquid things, and so on. He comments;\nthe best possible way to speak consists in using names all (or most) of which are like the things they name (that is, are appropriate to them), while the worst is to use the opposite kind of names.\nOne countering position, held by Hermogenes, is that names have come about due to custom and convention. They do not express the essence of their subject, so they can be swapped with something unrelated by the individuals or communities who use them.\nThe line between the two perspectives is often blurred. During more than half of the dialogue, Socrates makes guesses at Hermogenes' request as to where names and words have come from. These include the names of the Olympian gods, personified deities, and many words that describe abstract concepts. He examines whether, for example, giving names of \"streams\" to Cronus and Rhea (Ροή – flow or space) are purely accidental.\nDon't you think he who gave to the ancestors of the other gods the names “Rhea” and “Cronus” had the same thought as Heracleitus? Do you think he gave both of them the names of streams (ῥευμάτων ὀνόματα) merely by chance?\nThe Greek term \"ῥεῦμα\" may refer to the flow of any medium and is not restricted to the flow of water or liquids. Many of the words which Socrates uses as examples may have come from an idea originally linked to the name, but have changed over time. Those of which he cannot find a link, he often assumes have come from foreign origins or have changed so much as to lose all resemblance to the original word. He states, \"names have been so twisted in all manner of ways, that I should not be surprised if the old language when compared with that now in use would appear to us to be a barbarous tongue.\"\nThe final theory of relations between name and object named is posited by Cratylus, a disciple of Heraclitus, who believes that names arrive from divine origins, making them necessarily correct. Socrates rebukes this theory by reminding Cratylus of the imperfection of certain names in capturing the objects they seek to signify. From this point, Socrates ultimately rejects the study of language, believing it to be philosophically inferior to a study of things themselves.\nQuestion: What does the old language sound compared with the new language?", - "prompt": "Passage: The subject of Cratylus is the correctness of names (περὶ ὀνομάτων ὀρθότητος), in other words, it is a critique on the subject of naming (Baxter).\nWhen discussing a ὄνομα (onoma ) and how it would relate to its subject, Socrates compares the original creation of a word to the work of an artist. An artist uses color to express the essence of his subject in a painting. In much the same way, the creator of words uses letters containing certain sounds to express the essence of a word's subject. There is a letter that is best for soft things, one for liquid things, and so on. He comments;\nthe best possible way to speak consists in using names all (or most) of which are like the things they name (that is, are appropriate to them), while the worst is to use the opposite kind of names.\nOne countering position, held by Hermogenes, is that names have come about due to custom and convention. They do not express the essence of their subject, so they can be swapped with something unrelated by the individuals or communities who use them.\nThe line between the two perspectives is often blurred. During more than half of the dialogue, Socrates makes guesses at Hermogenes' request as to where names and words have come from. These include the names of the Olympian gods, personified deities, and many words that describe abstract concepts. He examines whether, for example, giving names of \"streams\" to Cronus and Rhea (Ροή – flow or space) are purely accidental.\nDon't you think he who gave to the ancestors of the other gods the names “Rhea” and “Cronus” had the same thought as Heracleitus? Do you think he gave both of them the names of streams (ῥευμάτων ὀνόματα) merely by chance?\nThe Greek term \"ῥεῦμα\" may refer to the flow of any medium and is not restricted to the flow of water or liquids. Many of the words which Socrates uses as examples may have come from an idea originally linked to the name, but have changed over time. Those of which he cannot find a link, he often assumes have come from foreign origins or have changed so much as to lose all resemblance to the original word. He states, \"names have been so twisted in all manner of ways, that I should not be surprised if the old language when compared with that now in use would appear to us to be a barbarous tongue.\"\nThe final theory of relations between name and object named is posited by Cratylus, a disciple of Heraclitus, who believes that names arrive from divine origins, making them necessarily correct. Socrates rebukes this theory by reminding Cratylus of the imperfection of certain names in capturing the objects they seek to signify. From this point, Socrates ultimately rejects the study of language, believing it to be philosophically inferior to a study of things themselves.\nQuestion: What does the old language sound compared with the new language?\nAnswer:", - "ground_truth": [ - "like a barbaric tongue.", - "barbarous tongue" - ], - "response": "The old language is a dialect of the Greek language, which is the language of the Greeks. The old language is a dialect of the Greek language, which is the language of the Greeks. The old language is a dialect of the Greek language, which is the language of the Greeks." - }, - { - "sample_id": "id1514", - "input": "The novel begins in Manchester, where we are introduced to the Bartons and the Wilsons, two working-class families. John Barton is a questioner of the distribution of wealth and the relations between rich and poor. Soon his wife dies—he blames it on her grief over the disappearance of her sister Esther. Having already lost his son Tom at a young age, Barton is left to raise his daughter, Mary, alone and now falls into depression and begins to involve himself in the Chartist, trade-union movement.\nChapter 1 takes place in countryside where Moss Side is now.\nMary takes up work at a dressmaker's (her father having objected to her working in a factory) and becomes subject to the affections of hard-working Jem Wilson and Harry Carson, son of a wealthy mill owner. She fondly hopes, by marrying Carson, to secure a comfortable life for herself and her father, but immediately after refusing Jem's offer of marriage she realises that she truly loves him. She therefore decides to evade Carson, planning to show her feelings to Jem in the course of time. Jem believes her decision to be final, though this does not change his feelings for her.\nMeanwhile, Esther, a \"street-walker,\" returns to warn John Barton that he must save Mary from becoming like her. He simply pushes her away, however, and she's sent to jail for a month on the charge of vagrancy. Upon her release she talks to Jem with the same purpose. He promises that he will protect Mary and confronts Carson, eventually entering into a fight with him, which is witnessed by a policeman passing by.\nNot long afterwards, Carson is shot dead, and Jem is arrested for the crime, his gun having been found at the scene. Esther decides to investigate the matter further and discovers that the wadding for the gun was a piece of paper on which is written Mary's name.\nShe visits her niece to warn her to save the one she loves, and after she leaves Mary realises that the murderer is not Jem but her father. She is now faced with having to save her lover without giving away her father. With the help of Job Legh (the intelligent grandfather of her blind friend Margaret), Mary travels to Liverpool to find the only person who could provide an alibi for Jem – Will Wilson, Jem's cousin and a sailor, who was with him on the night of the murder. Unfortunately, Will's ship is already departing, so that, after Mary chases after the ship in a small boat, the only thing Will can do is promise to return in the pilot ship and testify the next day.\nDuring the trial, Jem learns of Mary's great love for him. Will arrives in court to testify, and Jem is found \"not guilty\". Mary has fallen ill during the trial and is nursed by Mr Sturgis, an old sailor, and his wife. When she finally returns to Manchester she has to face her father, who is crushed by his remorse. He summons John Carson, Harry's father, to confess to him that he is the murderer. Carson is still set on justice, but after turning to the Bible he forgives Barton, who dies soon afterwards in Carson's arms. Not long after this Esther comes back to Mary's home, where she, too, soon dies.\nJem decides to leave England, where, his reputation damaged, it would be difficult for him to find a new job. The novel ends with the wedded Mary and Jem, their little child, and Mrs Wilson living happily in Canada. News comes that Margaret has regained her sight and that she and Will, soon to be married, will visit.\nQuestion: Who actually killed Harry Carson?", - "prompt": "Passage: The novel begins in Manchester, where we are introduced to the Bartons and the Wilsons, two working-class families. John Barton is a questioner of the distribution of wealth and the relations between rich and poor. Soon his wife dies—he blames it on her grief over the disappearance of her sister Esther. Having already lost his son Tom at a young age, Barton is left to raise his daughter, Mary, alone and now falls into depression and begins to involve himself in the Chartist, trade-union movement.\nChapter 1 takes place in countryside where Moss Side is now.\nMary takes up work at a dressmaker's (her father having objected to her working in a factory) and becomes subject to the affections of hard-working Jem Wilson and Harry Carson, son of a wealthy mill owner. She fondly hopes, by marrying Carson, to secure a comfortable life for herself and her father, but immediately after refusing Jem's offer of marriage she realises that she truly loves him. She therefore decides to evade Carson, planning to show her feelings to Jem in the course of time. Jem believes her decision to be final, though this does not change his feelings for her.\nMeanwhile, Esther, a \"street-walker,\" returns to warn John Barton that he must save Mary from becoming like her. He simply pushes her away, however, and she's sent to jail for a month on the charge of vagrancy. Upon her release she talks to Jem with the same purpose. He promises that he will protect Mary and confronts Carson, eventually entering into a fight with him, which is witnessed by a policeman passing by.\nNot long afterwards, Carson is shot dead, and Jem is arrested for the crime, his gun having been found at the scene. Esther decides to investigate the matter further and discovers that the wadding for the gun was a piece of paper on which is written Mary's name.\nShe visits her niece to warn her to save the one she loves, and after she leaves Mary realises that the murderer is not Jem but her father. She is now faced with having to save her lover without giving away her father. With the help of Job Legh (the intelligent grandfather of her blind friend Margaret), Mary travels to Liverpool to find the only person who could provide an alibi for Jem – Will Wilson, Jem's cousin and a sailor, who was with him on the night of the murder. Unfortunately, Will's ship is already departing, so that, after Mary chases after the ship in a small boat, the only thing Will can do is promise to return in the pilot ship and testify the next day.\nDuring the trial, Jem learns of Mary's great love for him. Will arrives in court to testify, and Jem is found \"not guilty\". Mary has fallen ill during the trial and is nursed by Mr Sturgis, an old sailor, and his wife. When she finally returns to Manchester she has to face her father, who is crushed by his remorse. He summons John Carson, Harry's father, to confess to him that he is the murderer. Carson is still set on justice, but after turning to the Bible he forgives Barton, who dies soon afterwards in Carson's arms. Not long after this Esther comes back to Mary's home, where she, too, soon dies.\nJem decides to leave England, where, his reputation damaged, it would be difficult for him to find a new job. The novel ends with the wedded Mary and Jem, their little child, and Mrs Wilson living happily in Canada. News comes that Margaret has regained her sight and that she and Will, soon to be married, will visit.\nQuestion: Who actually killed Harry Carson?\nAnswer:", - "ground_truth": [ - "John Barton", - "John Barton" - ], - "response": "The Wilsons." - }, - { - "sample_id": "id1340", - "input": "Tarzan returns to Opar, the source of the gold where a lost colony of fabled Atlantis is located, in order to make good on some financial reverses he has recently suffered. While Atlantis itself sank beneath the waves thousands of years ago, the workers of Opar continued to mine all of the gold, which means there is a rather huge stockpile but which is now lost to the memory of the Oparians and only Tarzan knows its secret location.\nA greedy, outlawed Belgian army officer, Albert Werper, in the employ of a criminal Arab, secretly follows Tarzan to Opar. There, Tarzan loses his memory after being struck on the head by a falling rock in the treasure room during an earthquake. On encountering La, the high priestess who is the servant of the Flaming God of Opar, and who is also very beautiful, Tarzan once again rejects her love which enrages her and she tries to have him killed; she had fallen in love with the apeman during their first encounter and La and her high priests are not going to allow Tarzan to escape their sacrificial knives this time.\nIn the meanwhile, Jane has been kidnapped by the Arab and wonders what is keeping her husband from once again coming to her rescue. A now amnesiac Tarzan and the Werper escape from Opar, bearing away the sacrificial knife of Opar which La and some retainers set out to recover. There is intrigue and counter intrigue the rest of the way.\nQuestion: Who is La?", - "prompt": "Passage: The Little White Bird is a series of short episodes, including both accounts of the narrator's day-to-day activities in contemporary London and fanciful tales set in Kensington Gardens and elsewhere.The story is set in several locations; the earlier chapters are set in the town of London, contemporaneous to the time of Barrie's writing, and involving some time travel of a few years, and other fantasy elements, while remaining within the London setting. The middle chapters that later became Peter Pan in Kensington Gardens are set in London's famous Kensington Gardens, introduced by the statement that \"All perambulators lead to Kensington Gardens\". The Kensington Gardens chapters include detailed descriptions of the features of the Gardens, along with fantasy names given to the locations by the story's characters, especially after \"Lock-Out Time\", described by Barrie as the time at the end of the day when the park gates are closed to the public, and the fairies and other magical inhabitants of the park can move about more freely than during the daylight, when they must hide from ordinary people. The third section of the book, following the Kensington Gardens chapters, are again set generally in London, though there are some short returns to the Gardens that are not part of the Peter Pan stories. In a two-page diversion in chapter 24, Barrie brings the story to Patagonia, and a journey by ship returning to England at the \"white cliffs of Albion\".\nQuestion: Where was the ship in Pentagonia returned to?\nAnswer: England\n\nPassage: Tarzan returns to Opar, the source of the gold where a lost colony of fabled Atlantis is located, in order to make good on some financial reverses he has recently suffered. While Atlantis itself sank beneath the waves thousands of years ago, the workers of Opar continued to mine all of the gold, which means there is a rather huge stockpile but which is now lost to the memory of the Oparians and only Tarzan knows its secret location.\nA greedy, outlawed Belgian army officer, Albert Werper, in the employ of a criminal Arab, secretly follows Tarzan to Opar. There, Tarzan loses his memory after being struck on the head by a falling rock in the treasure room during an earthquake. On encountering La, the high priestess who is the servant of the Flaming God of Opar, and who is also very beautiful, Tarzan once again rejects her love which enrages her and she tries to have him killed; she had fallen in love with the apeman during their first encounter and La and her high priests are not going to allow Tarzan to escape their sacrificial knives this time.\nIn the meanwhile, Jane has been kidnapped by the Arab and wonders what is keeping her husband from once again coming to her rescue. A now amnesiac Tarzan and the Werper escape from Opar, bearing away the sacrificial knife of Opar which La and some retainers set out to recover. There is intrigue and counter intrigue the rest of the way.\nQuestion: Who is La?\nAnswer:", - "ground_truth": [ - "La is the high priestess that serves the Flaming God of Opar.", - "A high priestess." - ], - "response": "La is a young woman who is a member of the royal family of Opar. She is a member of the royal family of Opar, and is a member of the royal family of Opar. She is a member of the royal family of Opar, and is a member of the royal family of Opar. She is a member of the royal family of Opar, and is a member of the royal family of Opar. She is a member of the royal family of Opar" - } - ] -} \ No newline at end of file diff --git a/scripts/global-mmlu-lite/__init__.py b/scripts/global-mmlu-lite/__init__.py new file mode 100644 index 000000000..cfdf9af33 --- /dev/null +++ b/scripts/global-mmlu-lite/__init__.py @@ -0,0 +1 @@ +"""Global MMLU Lite adapter for EvalEval schema.""" diff --git a/scripts/global-mmlu-lite/adapter.py b/scripts/global-mmlu-lite/adapter.py new file mode 100644 index 000000000..2e121f40d --- /dev/null +++ b/scripts/global-mmlu-lite/adapter.py @@ -0,0 +1,211 @@ +""" +Script to fetch Global MMLU Lite leaderboard results from Kaggle API +and convert them to the EvalEval schema format. + +Data source: +- Global MMLU Lite: Kaggle Benchmarks API (cohere-labs/global-mmlu-lite) + +Usage: + uv run python -m scripts.global-mmlu-lite.adapter +""" + +import time +from typing import List + +from eval_types import ( + AdditionalPropertiesObject, + EvaluationLog, + EvaluationResult, + EvaluatorRelationship, + MetricConfig, + ScoreDetails, + ScoreType, + SourceDataUrl, +) + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils import ( + fetch_json, + get_developer, + make_source_metadata, + make_model_info, + save_evaluation_log, +) + + +# Data source URL +KAGGLE_API_URL = "https://www.kaggle.com/api/v1/benchmarks/cohere-labs/global-mmlu-lite/leaderboard" + +OUTPUT_DIR = "data/global-mmlu-lite" + +# Hardcoded source data for global-mmlu-lite +SOURCE_DATA = SourceDataUrl( + dataset_name="global-mmlu-lite", + source_type="url", + url=["https://www.kaggle.com/datasets/cohere-labs/global-mmlu-lite"], +) + + +def parse_score(value) -> float: + """Parse a score value, ensuring it's a float.""" + if value is None: + return -1.0 + try: + return float(value) + except (ValueError, TypeError): + return -1.0 + + +def make_eval_result( + name: str, + score: float, + description: str, + confidence_interval: float | None = None, + stddev: float | None = None, +) -> EvaluationResult: + """Create an EvaluationResult with hardcoded source_data for global-mmlu-lite.""" + details = None + if confidence_interval is not None or stddev is not None: + details = AdditionalPropertiesObject( + **{k: v for k, v in [("confidence_interval", confidence_interval), ("stddev", stddev)] if v is not None} + ) + return EvaluationResult( + evaluation_name=name, + source_data=SOURCE_DATA, + metric_config=MetricConfig( + evaluation_description=description, + lower_is_better=False, + score_type=ScoreType.continuous, + min_score=0.0, + max_score=1.0, + ), + score_details=ScoreDetails( + score=round(score, 4) if score is not None else -1, + details=details, + ), + ) + + +def fetch_global_mmlu_lite(retrieved_timestamp: str) -> int: + """Fetch and process Global MMLU Lite results from Kaggle API.""" + print("Fetching Global MMLU Lite leaderboard from Kaggle API...") + + try: + data = fetch_json(KAGGLE_API_URL) + except Exception as e: + print(f"Error fetching data: {e}") + raise + + # The API returns a dict with a 'rows' key containing the leaderboard entries + rows = data.get("rows", []) + count = 0 + + for row in rows: + # Extract model information - require slug + model_slug = row.get("modelVersionSlug") + if not model_slug: + raise ValueError(f"Missing modelVersionSlug in row: {row}") + + model_name = model_slug + model_display_name = row.get("modelVersionName", "") + + developer = get_developer(model_name) + + # Create evaluation results from task results + eval_results: List[EvaluationResult] = [] + task_results = row.get("taskResults", []) + + for task in task_results: + task_name = task.get("benchmarkTaskName", "") + result_data = task.get("result", {}) + + # Extract score from the result + score_value = None + confidence_interval = None + + if result_data.get("hasNumericResult"): + numeric_result = result_data.get("numericResult") or result_data.get("numericResultNullable", {}) + score_value = numeric_result.get("value") + + # Extract confidence interval if available + if numeric_result.get("hasConfidenceInterval"): + confidence_interval = numeric_result.get("confidenceInterval") + + if score_value is not None: + score = parse_score(score_value) + if score >= 0: + eval_results.append( + make_eval_result( + name=task_name, + score=score, + description=f"Global MMLU Lite - {task_name}", + confidence_interval=confidence_interval, + ) + ) + + if not eval_results: + continue + + # Build model info + model_info = make_model_info( + model_name=model_name, + developer=developer, + additional_details={"display_name": model_display_name} if model_display_name and model_display_name != model_name else None, + ) + + # Build evaluation log + evaluation_id = f"global-mmlu-lite/{model_info.id.replace('/', '_')}/{retrieved_timestamp}" + eval_log = EvaluationLog( + schema_version="0.2.0", + evaluation_id=evaluation_id, + retrieved_timestamp=retrieved_timestamp, + source_metadata=make_source_metadata( + source_name="Global MMLU Lite", + organization_name="Cohere Labs", + organization_url="https://cohere.com", + evaluator_relationship=EvaluatorRelationship.third_party, + ), + model_info=model_info, + evaluation_results=eval_results, + ) + + # Parse model path for saving: use slug for folder name (no spaces, like hfopenllm_v2) + if "/" in model_info.id: + dev, _ = model_info.id.split("/", 1) + else: + dev, _ = "unknown", model_info.id + model_for_path = model_slug if model_slug else model_info.id.split("/")[-1] + + filepath = save_evaluation_log(eval_log, OUTPUT_DIR, dev, model_for_path) + print(f"Saved: {filepath}") + count += 1 + + return count + + +def main(): + """Main function to fetch and process Global MMLU Lite results.""" + retrieved_timestamp = str(time.time()) + + print("=" * 60) + print("Fetching Global MMLU Lite results...") + print("=" * 60) + + try: + count = fetch_global_mmlu_lite(retrieved_timestamp) + print(f"\nProcessed {count} models from Global MMLU Lite") + except Exception as e: + print(f"Error processing Global MMLU Lite: {e}") + import traceback + traceback.print_exc() + + print("\n" + "=" * 60) + print("Done!") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/scripts/hfopenllm_v2/adapter.py b/scripts/hfopenllm_v2/adapter.py index 8656768a6..98ccf56db 100644 --- a/scripts/hfopenllm_v2/adapter.py +++ b/scripts/hfopenllm_v2/adapter.py @@ -8,19 +8,26 @@ uv run python -m scripts.hfopenllm_v2.adapter """ -import json import time from pathlib import Path from typing import Any, Dict, List -from eval_types import EvaluationLog, EvaluationResult, EvaluatorRelationship +from eval_types import ( + EvaluationLog, + EvaluationResult, + EvaluatorRelationship, + MetricConfig, + ScoreDetails, + ScoreType, + SourceDataHf, +) import sys sys.path.insert(0, str(Path(__file__).parent.parent)) from utils import ( + fetch_json, get_developer, - make_evaluation_result, make_model_info, make_source_metadata, save_evaluation_log, @@ -41,6 +48,7 @@ "mmlu_pro": "MMLU-PRO", } + # Evaluation descriptions EVALUATION_DESCRIPTIONS = { "IFEval": "Accuracy on IFEval", @@ -51,23 +59,69 @@ "MMLU-PRO": "Accuracy on MMLU-PRO", } +# Source data mapping: eval_key -> SourceDataHf +SOURCE_DATA_MAPPING = { + "ifeval": SourceDataHf( + dataset_name="IFEval", + source_type="hf_dataset", + hf_repo="google/IFEval", + ), + "bbh": SourceDataHf( + dataset_name="BBH", + source_type="hf_dataset", + hf_repo="SaylorTwift/bbh", + ), + "math": SourceDataHf( + dataset_name="MATH Level 5", + source_type="hf_dataset", + hf_repo="DigitalLearningGmbH/MATH-lighteval", + ), + "gpqa": SourceDataHf( + dataset_name="GPQA", + source_type="hf_dataset", + hf_repo="Idavidrein/gpqa", + ), + "musr": SourceDataHf( + dataset_name="MUSR", + source_type="hf_dataset", + hf_repo="TAUR-Lab/MuSR", + ), + "mmlu_pro": SourceDataHf( + dataset_name="MMLU-PRO", + source_type="hf_dataset", + hf_repo="TIGER-Lab/MMLU-Pro", + ), +} + def convert_model(model_data: Dict[str, Any], retrieved_timestamp: str) -> EvaluationLog: """Convert a single model's data to EvaluationLog format.""" - model_name = model_data["model"]["name"] - developer = get_developer(model_name) + model_id = model_data["model"]["name"] + if "/" not in model_id: + raise ValueError(f"Expected 'org/model' format, got: {model_id}") + developer, model_name = model_id.split("/", 1) # Build evaluation results eval_results: List[EvaluationResult] = [] for eval_key, eval_data in model_data.get("evaluations", {}).items(): display_name = eval_data.get("name", EVALUATION_MAPPING.get(eval_key, eval_key)) description = EVALUATION_DESCRIPTIONS.get(display_name, f"Accuracy on {display_name}") + source_data = SOURCE_DATA_MAPPING.get(eval_key) eval_results.append( - make_evaluation_result( - name=display_name, - score=eval_data.get("value", 0.0), - description=description, + EvaluationResult( + evaluation_name=display_name, + source_data=source_data, + metric_config=MetricConfig( + evaluation_description=description, + lower_is_better=False, + score_type=ScoreType.continuous, + min_score=0.0, + max_score=1.0, + ), + score_details=ScoreDetails( + score=round(eval_data.get("value", 0.0), 4), + ), ) ) @@ -89,13 +143,12 @@ def convert_model(model_data: Dict[str, Any], retrieved_timestamp: str) -> Evalu ) # Build evaluation ID - evaluation_id = f"hfopenllm_v2/{model_name.replace('/', '_')}/{retrieved_timestamp}" + evaluation_id = f"hfopenllm_v2/{developer}_{model_name}/{retrieved_timestamp}" return EvaluationLog( - schema_version="0.1.0", + schema_version="0.2.0", evaluation_id=evaluation_id, retrieved_timestamp=retrieved_timestamp, - source_data=[SOURCE_URL], source_metadata=make_source_metadata( source_name="HF Open LLM v2", organization_name="Hugging Face", @@ -113,14 +166,10 @@ def process_models(models_data: List[Dict[str, Any]], output_dir: str = OUTPUT_D for model_data in models_data: try: - model_name = model_data["model"]["name"] - developer = get_developer(model_name) - - # Parse model name for directory structure - if "/" in model_name: - _, model = model_name.split("/", 1) - else: - model = model_name + model_id = model_data["model"]["name"] + if "/" not in model_id: + raise ValueError(f"Expected 'org/model' format, got: {model_id}") + developer, model = model_id.split("/", 1) # Convert to EvaluationLog eval_log = convert_model(model_data, retrieved_timestamp) @@ -138,18 +187,8 @@ def process_models(models_data: List[Dict[str, Any]], output_dir: str = OUTPUT_D if __name__ == "__main__": - # Load data from local file (downloaded separately) - data_path = Path(__file__).parent / "formatted.json" - - if not data_path.exists(): - print(f"Error: {data_path} not found.") - print(f"Please download the data from: {SOURCE_URL}") - print(f"Save it as: {data_path}") - exit(1) - - print(f"Loading data from {data_path}") - with open(data_path, "r") as f: - all_models = json.load(f) + print(f"Fetching data from {SOURCE_URL}...") + all_models = fetch_json(SOURCE_URL) print(f"Processing {len(all_models)} models...") count = process_models(all_models) diff --git a/uv.lock b/uv.lock index 548ffe78b..77be2f2dc 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.12" [[package]] @@ -413,15 +412,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195 }, ] -[[package]] -name = "cfgv" -version = "3.5.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445 }, -] - [[package]] name = "charset-normalizer" version = "3.4.4" @@ -484,7 +474,7 @@ name = "click" version = "8.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342 } wheels = [ @@ -578,10 +568,15 @@ dependencies = [ { name = "cuda-pathfinder" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260 }, { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019 }, + { url = "https://files.pythonhosted.org/packages/05/8b/b4b2d1c7775fa403b64333e720cfcfccef8dcb9cdeb99947061ca5a77628/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf8bfaedc238f3b115d957d1fd6562b7e8435ba57f6d0e2f87d0e7149ccb2da5", size = 11570071 }, { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628 }, + { url = "https://files.pythonhosted.org/packages/ec/07/6aff13bc1e977e35aaa6b22f52b172e2890c608c6db22438cf7ed2bf43a6/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3adf4958dcf68ae7801a59b73fb00a8b37f8d0595060d66ceae111b1002de38d", size = 11566797 }, { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991 }, + { url = "https://files.pythonhosted.org/packages/1e/b5/96a6696e20c4ffd2b327f54c7d0fde2259bdb998d045c25d5dedbbe30290/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f53a7f453d4b2643d8663d036bafe29b5ba89eb904c133180f295df6dc151e5", size = 11624530 }, { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703 }, + { url = "https://files.pythonhosted.org/packages/39/73/d2fc40c043bac699c3880bf88d3cebe9d88410cd043795382826c93a89f0/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20f2699d61d724de3eb3f3369d57e2b245f93085cab44fd37c3bea036cea1a6f", size = 11565056 }, { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658 }, ] @@ -724,15 +719,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252 }, ] -[[package]] -name = "distlib" -version = "0.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047 }, -] - [[package]] name = "docstring-parser" version = "0.17.0" @@ -756,14 +742,6 @@ dependencies = [ { name = "requests" }, ] -[package.dev-dependencies] -dev = [ - { name = "datamodel-code-generator" }, - { name = "pre-commit" }, - { name = "pytest" }, - { name = "ruff" }, -] - [package.metadata] requires-dist = [ { name = "crfm-helm", specifier = ">=0.5.12" }, @@ -775,14 +753,6 @@ requires-dist = [ { name = "requests", specifier = ">=2.32.5,<3.0.0" }, ] -[package.metadata.requires-dev] -dev = [ - { name = "datamodel-code-generator", specifier = ">=0.43.1" }, - { name = "pre-commit", specifier = ">=4.3.0" }, - { name = "pytest", specifier = ">=9.0.2" }, - { name = "ruff", specifier = ">=0.12.2" }, -] - [[package]] name = "filelock" version = "3.20.3" @@ -998,15 +968,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094 }, ] -[[package]] -name = "identify" -version = "2.6.15" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183 }, -] - [[package]] name = "idna" version = "3.11" @@ -1092,15 +1053,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/eb/427ed2b20a38a4ee29f24dbe4ae2dafab198674fe9a85e3d6adf9e5f5f41/inflect-7.5.0-py3-none-any.whl", hash = "sha256:2aea70e5e70c35d8350b8097396ec155ffd68def678c7ff97f51aa69c1d92344", size = 35197 }, ] -[[package]] -name = "iniconfig" -version = "2.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, -] - [[package]] name = "inspect-ai" version = "0.3.160" @@ -1696,15 +1648,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404 }, ] -[[package]] -name = "nodeenv" -version = "1.10.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438 }, -] - [[package]] name = "numpy" version = "2.4.1" @@ -1771,6 +1714,7 @@ name = "nvidia-cublas-cu12" version = "12.8.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124 }, { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921 }, ] @@ -1779,6 +1723,7 @@ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318 }, { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621 }, ] @@ -1788,6 +1733,7 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029 }, + { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076 }, ] [[package]] @@ -1795,6 +1741,7 @@ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265 }, { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765 }, ] @@ -1806,6 +1753,7 @@ dependencies = [ { name = "nvidia-cublas-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878 }, { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467 }, ] @@ -1817,6 +1765,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211 }, { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695 }, ] @@ -1826,6 +1775,7 @@ version = "1.13.1.3" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834 }, + { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705 }, ] [[package]] @@ -1833,6 +1783,7 @@ name = "nvidia-curand-cu12" version = "10.3.9.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754 }, { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976 }, ] @@ -1846,6 +1797,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841 }, { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905 }, ] @@ -1857,6 +1809,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129 }, { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466 }, ] @@ -1865,6 +1818,7 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557 }, { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691 }, ] @@ -1873,6 +1827,7 @@ name = "nvidia-nccl-cu12" version = "2.27.5" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625 }, { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229 }, ] @@ -1882,6 +1837,7 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836 }, + { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204 }, ] [[package]] @@ -1889,6 +1845,7 @@ name = "nvidia-nvshmem-cu12" version = "3.4.5" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938 }, { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095 }, ] @@ -1897,6 +1854,7 @@ name = "nvidia-nvtx-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161 }, { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954 }, ] @@ -2061,15 +2019,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731 }, ] -[[package]] -name = "pluggy" -version = "1.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, -] - [[package]] name = "ply" version = "3.11" @@ -2079,22 +2028,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567 }, ] -[[package]] -name = "pre-commit" -version = "4.5.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cfgv" }, - { name = "identify" }, - { name = "nodeenv" }, - { name = "pyyaml" }, - { name = "virtualenv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437 }, -] - [[package]] name = "preshed" version = "3.0.12" @@ -2392,6 +2325,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 }, { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 }, { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 }, + { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441 }, + { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291 }, + { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632 }, + { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905 }, { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495 }, { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388 }, { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879 }, @@ -2427,22 +2364,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781 }, ] -[[package]] -name = "pytest" -version = "9.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "iniconfig" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801 }, -] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2750,32 +2671,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532 }, ] -[[package]] -name = "ruff" -version = "0.14.11" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/77/9a7fe084d268f8855d493e5031ea03fa0af8cc05887f638bf1c4e3363eb8/ruff-0.14.11.tar.gz", hash = "sha256:f6dc463bfa5c07a59b1ff2c3b9767373e541346ea105503b4c0369c520a66958", size = 5993417 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/a6/a4c40a5aaa7e331f245d2dc1ac8ece306681f52b636b40ef87c88b9f7afd/ruff-0.14.11-py3-none-linux_armv6l.whl", hash = "sha256:f6ff2d95cbd335841a7217bdfd9c1d2e44eac2c584197ab1385579d55ff8830e", size = 12951208 }, - { url = "https://files.pythonhosted.org/packages/5c/5c/360a35cb7204b328b685d3129c08aca24765ff92b5a7efedbdd6c150d555/ruff-0.14.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f6eb5c1c8033680f4172ea9c8d3706c156223010b8b97b05e82c59bdc774ee6", size = 13330075 }, - { url = "https://files.pythonhosted.org/packages/1b/9e/0cc2f1be7a7d33cae541824cf3f95b4ff40d03557b575912b5b70273c9ec/ruff-0.14.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f2fc34cc896f90080fca01259f96c566f74069a04b25b6205d55379d12a6855e", size = 12257809 }, - { url = "https://files.pythonhosted.org/packages/a7/e5/5faab97c15bb75228d9f74637e775d26ac703cc2b4898564c01ab3637c02/ruff-0.14.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53386375001773ae812b43205d6064dae49ff0968774e6befe16a994fc233caa", size = 12678447 }, - { url = "https://files.pythonhosted.org/packages/1b/33/e9767f60a2bef779fb5855cab0af76c488e0ce90f7bb7b8a45c8a2ba4178/ruff-0.14.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a697737dce1ca97a0a55b5ff0434ee7205943d4874d638fe3ae66166ff46edbe", size = 12758560 }, - { url = "https://files.pythonhosted.org/packages/eb/84/4c6cf627a21462bb5102f7be2a320b084228ff26e105510cd2255ea868e5/ruff-0.14.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6845ca1da8ab81ab1dce755a32ad13f1db72e7fba27c486d5d90d65e04d17b8f", size = 13599296 }, - { url = "https://files.pythonhosted.org/packages/88/e1/92b5ed7ea66d849f6157e695dc23d5d6d982bd6aa8d077895652c38a7cae/ruff-0.14.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e36ce2fd31b54065ec6f76cb08d60159e1b32bdf08507862e32f47e6dde8bcbf", size = 15048981 }, - { url = "https://files.pythonhosted.org/packages/61/df/c1bd30992615ac17c2fb64b8a7376ca22c04a70555b5d05b8f717163cf9f/ruff-0.14.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590bcc0e2097ecf74e62a5c10a6b71f008ad82eb97b0a0079e85defe19fe74d9", size = 14633183 }, - { url = "https://files.pythonhosted.org/packages/04/e9/fe552902f25013dd28a5428a42347d9ad20c4b534834a325a28305747d64/ruff-0.14.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53fe71125fc158210d57fe4da26e622c9c294022988d08d9347ec1cf782adafe", size = 14050453 }, - { url = "https://files.pythonhosted.org/packages/ae/93/f36d89fa021543187f98991609ce6e47e24f35f008dfe1af01379d248a41/ruff-0.14.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a35c9da08562f1598ded8470fcfef2afb5cf881996e6c0a502ceb61f4bc9c8a3", size = 13757889 }, - { url = "https://files.pythonhosted.org/packages/b7/9f/c7fb6ecf554f28709a6a1f2a7f74750d400979e8cd47ed29feeaa1bd4db8/ruff-0.14.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0f3727189a52179393ecf92ec7057c2210203e6af2676f08d92140d3e1ee72c1", size = 13955832 }, - { url = "https://files.pythonhosted.org/packages/db/a0/153315310f250f76900a98278cf878c64dfb6d044e184491dd3289796734/ruff-0.14.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:eb09f849bd37147a789b85995ff734a6c4a095bed5fd1608c4f56afc3634cde2", size = 12586522 }, - { url = "https://files.pythonhosted.org/packages/2f/2b/a73a2b6e6d2df1d74bf2b78098be1572191e54bec0e59e29382d13c3adc5/ruff-0.14.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c61782543c1231bf71041461c1f28c64b961d457d0f238ac388e2ab173d7ecb7", size = 12724637 }, - { url = "https://files.pythonhosted.org/packages/f0/41/09100590320394401cd3c48fc718a8ba71c7ddb1ffd07e0ad6576b3a3df2/ruff-0.14.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82ff352ea68fb6766140381748e1f67f83c39860b6446966cff48a315c3e2491", size = 13145837 }, - { url = "https://files.pythonhosted.org/packages/3b/d8/e035db859d1d3edf909381eb8ff3e89a672d6572e9454093538fe6f164b0/ruff-0.14.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:728e56879df4ca5b62a9dde2dd0eb0edda2a55160c0ea28c4025f18c03f86984", size = 13850469 }, - { url = "https://files.pythonhosted.org/packages/4e/02/bb3ff8b6e6d02ce9e3740f4c17dfbbfb55f34c789c139e9cd91985f356c7/ruff-0.14.11-py3-none-win32.whl", hash = "sha256:337c5dd11f16ee52ae217757d9b82a26400be7efac883e9e852646f1557ed841", size = 12851094 }, - { url = "https://files.pythonhosted.org/packages/58/f1/90ddc533918d3a2ad628bc3044cdfc094949e6d4b929220c3f0eb8a1c998/ruff-0.14.11-py3-none-win_amd64.whl", hash = "sha256:f981cea63d08456b2c070e64b79cb62f951aa1305282974d4d5216e6e0178ae6", size = 14001379 }, - { url = "https://files.pythonhosted.org/packages/c4/1c/1dbe51782c0e1e9cfce1d1004752672d2d4629ea46945d19d731ad772b3b/ruff-0.14.11-py3-none-win_arm64.whl", hash = "sha256:649fb6c9edd7f751db276ef42df1f3df41c38d67d199570ae2a7bd6cbc3590f0", size = 12938644 }, -] - [[package]] name = "s3fs" version = "2025.3.0" @@ -3281,29 +3176,29 @@ name = "torch" version = "2.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "setuptools" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "typing-extensions" }, ] wheels = [ @@ -3368,7 +3263,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [ @@ -3401,10 +3296,15 @@ name = "triton" version = "3.6.0" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243 }, { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850 }, + { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521 }, { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450 }, + { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087 }, { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296 }, + { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577 }, { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063 }, + { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804 }, { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994 }, ] @@ -3514,20 +3414,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, ] -[[package]] -name = "virtualenv" -version = "20.36.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "distlib" }, - { name = "filelock" }, - { name = "platformdirs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/aa/a3/4d310fa5f00863544e1d0f4de93bddec248499ccf97d4791bc3122c9d4f3/virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba", size = 6032239 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/2a/dc2228b2888f51192c7dc766106cd475f1b768c10caaf9727659726f7391/virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f", size = 6008258 }, -] - [[package]] name = "wasabi" version = "1.1.3" From 9e6d46157c035196c6d12c35a19e297339ec78ed Mon Sep 17 00:00:00 2001 From: SreeHarshaNelaturu Date: Tue, 10 Feb 2026 17:48:03 +0100 Subject: [PATCH 2/4] delete duplicates in livecodebenchpro, latest schema changes --- .../126326f3-6521-45d1-aa14-5c51335c1929.json | 28 +- .../b3f5937a-1489-417b-8162-6c62dea0703d.json | 28 +- .../f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json | 28 +- .../809a1503-a161-4532-afd3-fdbd6551eb63.json | 28 +- .../808ca8e4-9b14-48ba-bb39-e3b6a5672c80.json | 79 + .../be076445-eb88-49b0-a855-2e0cb1551bab.json | 28 +- .../69210faf-04a8-46d4-b92b-94f2ca521c09.json | 28 +- .../ed293aa1-f64e-429d-bddf-91a35a4203d1.json | 79 + .../2bddd388-5e9a-423e-8767-37d6f9f69032.json | 79 + .../bfd991ca-13e9-4716-b389-11e0d2afe286.json | 28 +- .../b29b7c8e-759e-45fe-a9d3-1054f19af617.json | 28 +- .../801d2dc6-17e7-47f1-a54f-87b94a59b508.json | 28 +- .../def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json | 28 +- .../157dd68b-fcc2-416f-a2c0-c9781020e6af.json | 28 +- .../174f0e23-84f1-43d0-bcdf-11b83c37025a.json | 28 +- .../bef7254b-549f-4e6b-b5c8-31b84dc6acda.json | 28 +- .../aa236b03-b81f-431b-b049-7101cea165f2.json | 28 +- .../abc37028-a362-4e02-8499-1bb7497e0293.json | 28 +- .../ba46ef91-d157-4984-b3df-ce33d8d97f8e.json | 28 +- .../e70acf51-30ef-4c20-b7cc-51704d114d70.json | 28 +- .../0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json | 28 +- .../de66cc70-b456-4165-a827-5193dd77e84d.json | 79 + .../e9139c52-ada0-4d1c-ae82-7852aacdb6ea.json | 79 + .../1dd8c827-72af-4c8f-9ead-989de7105590.json | 28 +- .../ead39f61-b408-42b2-808f-8421a3200c89.json | 28 +- .../f96bdb35-4d61-4fde-8d91-edf55f13dc03.json | 28 +- .../5516f77c-932a-4eaa-ac31-dda9260ce82d.json | 79 + .../8992cef5-df7e-40a1-b099-331532c3deb0.json | 28 +- .../a77c08d6-a782-440c-b545-c60b6169712d.json | 28 +- scripts/global-mmlu-lite/adapter.py | 10 +- scripts/livecodebenchpro/adapter.py | 88 + uv.lock | 4215 +++++++++-------- 32 files changed, 3237 insertions(+), 2194 deletions(-) create mode 100644 data/livecodebenchpro/aliyun/qwen3-next-80b-a3b-thinking/808ca8e4-9b14-48ba-bb39-e3b6a5672c80.json create mode 100644 data/livecodebenchpro/anthropic/claude-sonnet-4-5-20250929/ed293aa1-f64e-429d-bddf-91a35a4203d1.json create mode 100644 data/livecodebenchpro/ark/ep-20250603132404-cgpjm/2bddd388-5e9a-423e-8767-37d6f9f69032.json create mode 100644 data/livecodebenchpro/openai/gpt-5-2025-08-07/de66cc70-b456-4165-a827-5193dd77e84d.json create mode 100644 data/livecodebenchpro/openai/gpt-5.2-2025-12-11/e9139c52-ada0-4d1c-ae82-7852aacdb6ea.json create mode 100644 data/livecodebenchpro/openai/o4-mini-2025-04-16/5516f77c-932a-4eaa-ac31-dda9260ce82d.json create mode 100644 scripts/livecodebenchpro/adapter.py diff --git a/data/livecodebenchpro/alibaba/qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json b/data/livecodebenchpro/alibaba/qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json index 5a0ceeec2..f3d8ed859 100644 --- a/data/livecodebenchpro/alibaba/qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json +++ b/data/livecodebenchpro/alibaba/qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/qwen3-235b-a22b-thinking-2507/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.1267605633802817 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.7605633802816901 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/alibaba/qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json b/data/livecodebenchpro/alibaba/qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json index 3c599d83c..86221fbb2 100644 --- a/data/livecodebenchpro/alibaba/qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json +++ b/data/livecodebenchpro/alibaba/qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/qwen3-30b-a3b/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.028169014084507043 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.5774647887323944 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/alibaba/qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json b/data/livecodebenchpro/alibaba/qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json index 2b1425f7a..f517719a6 100644 --- a/data/livecodebenchpro/alibaba/qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json +++ b/data/livecodebenchpro/alibaba/qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/alibaba/qwen3-max/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.04225352112676056 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.36619718309859156 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/alibaba/qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json b/data/livecodebenchpro/alibaba/qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json index 7966b382b..3255f5bca 100644 --- a/data/livecodebenchpro/alibaba/qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json +++ b/data/livecodebenchpro/alibaba/qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/qwen3-next-80b-a3b-thinking/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.14084507042253522 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.7464788732394366 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/aliyun/qwen3-next-80b-a3b-thinking/808ca8e4-9b14-48ba-bb39-e3b6a5672c80.json b/data/livecodebenchpro/aliyun/qwen3-next-80b-a3b-thinking/808ca8e4-9b14-48ba-bb39-e3b6a5672c80.json new file mode 100644 index 000000000..78b03c308 --- /dev/null +++ b/data/livecodebenchpro/aliyun/qwen3-next-80b-a3b-thinking/808ca8e4-9b14-48ba-bb39-e3b6a5672c80.json @@ -0,0 +1,79 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "livecodebenchpro/qwen3-next-80b-a3b-thinking/1770683238.099205", + "retrieved_timestamp": "1770683238.099205", + "source_metadata": { + "source_name": "Live Code Bench Pro", + "source_type": "documentation", + "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qwen3-next-80b-a3b-thinking", + "id": "aliyun/qwen3-next-80b-a3b-thinking", + "developer": "aliyun", + "inference_platform": "aliyun" + }, + "evaluation_results": [ + { + "evaluation_name": "Hard Problems", + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Hard Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "Medium Problems", + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Medium Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0704 + } + }, + { + "evaluation_name": "Easy Problems", + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Easy Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.6901 + } + } + ] +} \ No newline at end of file diff --git a/data/livecodebenchpro/anthropic/claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json b/data/livecodebenchpro/anthropic/claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json index 4f2674b8c..6816347c8 100644 --- a/data/livecodebenchpro/anthropic/claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json +++ b/data/livecodebenchpro/anthropic/claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/claude-3-7-sonnet-20250219/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.28169014084507044 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/anthropic/claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json b/data/livecodebenchpro/anthropic/claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json index 7a2761278..586366674 100644 --- a/data/livecodebenchpro/anthropic/claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json +++ b/data/livecodebenchpro/anthropic/claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/anthropic/claude-3.7-sonnet/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.014084507042253521 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.15492957746478872 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/anthropic/claude-sonnet-4-5-20250929/ed293aa1-f64e-429d-bddf-91a35a4203d1.json b/data/livecodebenchpro/anthropic/claude-sonnet-4-5-20250929/ed293aa1-f64e-429d-bddf-91a35a4203d1.json new file mode 100644 index 000000000..304dcadde --- /dev/null +++ b/data/livecodebenchpro/anthropic/claude-sonnet-4-5-20250929/ed293aa1-f64e-429d-bddf-91a35a4203d1.json @@ -0,0 +1,79 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "livecodebenchpro/claude-sonnet-4-5-20250929/1770683238.099205", + "retrieved_timestamp": "1770683238.099205", + "source_metadata": { + "source_name": "Live Code Bench Pro", + "source_type": "documentation", + "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "claude-sonnet-4-5-20250929", + "id": "anthropic/claude-sonnet-4-5-20250929", + "developer": "anthropic", + "inference_platform": "anthropic" + }, + "evaluation_results": [ + { + "evaluation_name": "Hard Problems", + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Hard Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "Medium Problems", + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Medium Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "Easy Problems", + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Easy Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5352 + } + } + ] +} \ No newline at end of file diff --git a/data/livecodebenchpro/ark/ep-20250603132404-cgpjm/2bddd388-5e9a-423e-8767-37d6f9f69032.json b/data/livecodebenchpro/ark/ep-20250603132404-cgpjm/2bddd388-5e9a-423e-8767-37d6f9f69032.json new file mode 100644 index 000000000..5c18e44af --- /dev/null +++ b/data/livecodebenchpro/ark/ep-20250603132404-cgpjm/2bddd388-5e9a-423e-8767-37d6f9f69032.json @@ -0,0 +1,79 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "livecodebenchpro/ep-20250603132404-cgpjm/1770683238.099205", + "retrieved_timestamp": "1770683238.099205", + "source_metadata": { + "source_name": "Live Code Bench Pro", + "source_type": "documentation", + "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ep-20250603132404-cgpjm", + "id": "ark/ep-20250603132404-cgpjm", + "developer": "ark", + "inference_platform": "ark" + }, + "evaluation_results": [ + { + "evaluation_name": "Hard Problems", + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Hard Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "Medium Problems", + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Medium Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0141 + } + }, + { + "evaluation_name": "Easy Problems", + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Easy Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.507 + } + } + ] +} \ No newline at end of file diff --git a/data/livecodebenchpro/bytedance/doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json b/data/livecodebenchpro/bytedance/doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json index d6f18e74a..14a043adb 100644 --- a/data/livecodebenchpro/bytedance/doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json +++ b/data/livecodebenchpro/bytedance/doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/doubao-seed-1-6-thinking-250615/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.07042253521126761 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.5774647887323944 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/deepseek/chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json b/data/livecodebenchpro/deepseek/chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json index db300d0f6..88ca0a5d0 100644 --- a/data/livecodebenchpro/deepseek/chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json +++ b/data/livecodebenchpro/deepseek/chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/deepseek/chat-v3-0324/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.19718309859154928 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/deepseek/ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json b/data/livecodebenchpro/deepseek/ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json index 5c21d1375..078e0a459 100644 --- a/data/livecodebenchpro/deepseek/ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json +++ b/data/livecodebenchpro/deepseek/ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/ep-20250214004308-p7n89/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.014084507042253521 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.4225352112676056 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/deepseek/ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json b/data/livecodebenchpro/deepseek/ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json index 085690655..4ea07e2df 100644 --- a/data/livecodebenchpro/deepseek/ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json +++ b/data/livecodebenchpro/deepseek/ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/ep-20250228232227-z44x5/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.1267605633802817 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/deepseek/ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json b/data/livecodebenchpro/deepseek/ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json index 49a311ec8..114e45638 100644 --- a/data/livecodebenchpro/deepseek/ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json +++ b/data/livecodebenchpro/deepseek/ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/ep-20250603132404-cgpjm/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.08450704225352113 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.5774647887323944 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/google/gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json b/data/livecodebenchpro/google/gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json index fb3e0da1e..57f7f41bd 100644 --- a/data/livecodebenchpro/google/gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json +++ b/data/livecodebenchpro/google/gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/google/gemini-2.5-flash/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.028169014084507043 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.38028169014084506 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/google/gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json b/data/livecodebenchpro/google/gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json index 8a9e9168b..a5be78bce 100644 --- a/data/livecodebenchpro/google/gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json +++ b/data/livecodebenchpro/google/gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/gemini-2.5-pro/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.014084507042253521 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.2112676056338028 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.7183098591549296 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/kuaishou/kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json b/data/livecodebenchpro/kuaishou/kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json index f38c1ba83..2cbd5d730 100644 --- a/data/livecodebenchpro/kuaishou/kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json +++ b/data/livecodebenchpro/kuaishou/kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/kwaipilot-40b-0604/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.07042253521126761 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.056338028169014086 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/meta/llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json b/data/livecodebenchpro/meta/llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json index 2c6ba72aa..949352df3 100644 --- a/data/livecodebenchpro/meta/llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json +++ b/data/livecodebenchpro/meta/llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/meta/llama-4-maverick/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.09859154929577464 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json b/data/livecodebenchpro/openai/gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json index 3c41a7ab4..28d6a0f6c 100644 --- a/data/livecodebenchpro/openai/gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json +++ b/data/livecodebenchpro/openai/gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/openai/gpt-4.1/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.19718309859154928 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json b/data/livecodebenchpro/openai/gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json index 512ca6a35..e67250be3 100644 --- a/data/livecodebenchpro/openai/gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json +++ b/data/livecodebenchpro/openai/gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/openai/gpt-4o-2024-11-20/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.07042253521126761 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json b/data/livecodebenchpro/openai/gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json index 8d72bdab5..cf3bb7a63 100644 --- a/data/livecodebenchpro/openai/gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json +++ b/data/livecodebenchpro/openai/gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/gpt-5-2025-08-07/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.04225352112676056 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.4084507042253521 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.8873239436619719 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/gpt-5-2025-08-07/de66cc70-b456-4165-a827-5193dd77e84d.json b/data/livecodebenchpro/openai/gpt-5-2025-08-07/de66cc70-b456-4165-a827-5193dd77e84d.json new file mode 100644 index 000000000..348bade22 --- /dev/null +++ b/data/livecodebenchpro/openai/gpt-5-2025-08-07/de66cc70-b456-4165-a827-5193dd77e84d.json @@ -0,0 +1,79 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "livecodebenchpro/gpt-5-2025-08-07/1770683238.099205", + "retrieved_timestamp": "1770683238.099205", + "source_metadata": { + "source_name": "Live Code Bench Pro", + "source_type": "documentation", + "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-5-2025-08-07", + "id": "openai/gpt-5-2025-08-07", + "developer": "openai", + "inference_platform": "openai" + }, + "evaluation_results": [ + { + "evaluation_name": "Hard Problems", + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Hard Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0423 + } + }, + { + "evaluation_name": "Medium Problems", + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Medium Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.4085 + } + }, + { + "evaluation_name": "Easy Problems", + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Easy Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9014 + } + } + ] +} \ No newline at end of file diff --git a/data/livecodebenchpro/openai/gpt-5.2-2025-12-11/e9139c52-ada0-4d1c-ae82-7852aacdb6ea.json b/data/livecodebenchpro/openai/gpt-5.2-2025-12-11/e9139c52-ada0-4d1c-ae82-7852aacdb6ea.json new file mode 100644 index 000000000..8996fcf9e --- /dev/null +++ b/data/livecodebenchpro/openai/gpt-5.2-2025-12-11/e9139c52-ada0-4d1c-ae82-7852aacdb6ea.json @@ -0,0 +1,79 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "livecodebenchpro/gpt-5.2-2025-12-11/1770683238.099205", + "retrieved_timestamp": "1770683238.099205", + "source_metadata": { + "source_name": "Live Code Bench Pro", + "source_type": "documentation", + "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt-5.2-2025-12-11", + "id": "openai/gpt-5.2-2025-12-11", + "developer": "openai", + "inference_platform": "openai" + }, + "evaluation_results": [ + { + "evaluation_name": "Hard Problems", + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Hard Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.1594 + } + }, + { + "evaluation_name": "Medium Problems", + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Medium Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.5211 + } + }, + { + "evaluation_name": "Easy Problems", + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Easy Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.9014 + } + } + ] +} \ No newline at end of file diff --git a/data/livecodebenchpro/openai/gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json b/data/livecodebenchpro/openai/gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json index dc3a79cb3..d9a8cbc70 100644 --- a/data/livecodebenchpro/openai/gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json +++ b/data/livecodebenchpro/openai/gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/openai/gpt-oss-120b/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.11267605633802817 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.6619718309859155 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json b/data/livecodebenchpro/openai/gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json index 0b0caa4ef..fd7123119 100644 --- a/data/livecodebenchpro/openai/gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json +++ b/data/livecodebenchpro/openai/gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/openai/gpt-oss-20b/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.056338028169014086 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.5070422535211268 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json b/data/livecodebenchpro/openai/o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json index 6a43d838a..5fc307953 100644 --- a/data/livecodebenchpro/openai/o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json +++ b/data/livecodebenchpro/openai/o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/o3-2025-04-16/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.22535211267605634 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.7183098591549296 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/openai/o4-mini-2025-04-16/5516f77c-932a-4eaa-ac31-dda9260ce82d.json b/data/livecodebenchpro/openai/o4-mini-2025-04-16/5516f77c-932a-4eaa-ac31-dda9260ce82d.json new file mode 100644 index 000000000..21df96195 --- /dev/null +++ b/data/livecodebenchpro/openai/o4-mini-2025-04-16/5516f77c-932a-4eaa-ac31-dda9260ce82d.json @@ -0,0 +1,79 @@ +{ + "schema_version": "0.2.0", + "evaluation_id": "livecodebenchpro/o4-mini-2025-04-16/1770683238.099205", + "retrieved_timestamp": "1770683238.099205", + "source_metadata": { + "source_name": "Live Code Bench Pro", + "source_type": "documentation", + "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "o4-mini-2025-04-16", + "id": "openai/o4-mini-2025-04-16", + "developer": "openai", + "inference_platform": "openai" + }, + "evaluation_results": [ + { + "evaluation_name": "Hard Problems", + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Hard Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.0143 + } + }, + { + "evaluation_name": "Medium Problems", + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Medium Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.2923 + } + }, + { + "evaluation_name": "Easy Problems", + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] + }, + "metric_config": { + "evaluation_description": "Pass@1 on Easy Problems", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0.0, + "max_score": 1.0 + }, + "score_details": { + "score": 0.8571 + } + } + ] +} \ No newline at end of file diff --git a/data/livecodebenchpro/openai/o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json b/data/livecodebenchpro/openai/o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json index 23e79d50b..824e5dc57 100644 --- a/data/livecodebenchpro/openai/o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json +++ b/data/livecodebenchpro/openai/o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/o4-mini-2025-04-16/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.014084507042253521 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.30985915492957744 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.8873239436619719 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/data/livecodebenchpro/z-ai/glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json b/data/livecodebenchpro/z-ai/glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json index 810237523..013991ae1 100644 --- a/data/livecodebenchpro/z-ai/glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json +++ b/data/livecodebenchpro/z-ai/glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json @@ -1,5 +1,5 @@ { - "schema_version": "0.1.0", + "schema_version": "0.2.0", "evaluation_id": "livecodebenchpro/z-ai/glm-4.5/1760492095.8105888", "retrieved_timestamp": "1760492095.8105888", "source_metadata": { @@ -26,6 +26,13 @@ }, "score_details": { "score": 0.0 + }, + "source_data": { + "dataset_name": "Hard Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live" + ] } }, { @@ -39,6 +46,13 @@ }, "score_details": { "score": 0.028169014084507043 + }, + "source_data": { + "dataset_name": "Medium Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live" + ] } }, { @@ -52,12 +66,14 @@ }, "score_details": { "score": 0.1267605633802817 + }, + "source_data": { + "dataset_name": "Easy Problems", + "source_type": "url", + "url": [ + "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" + ] } } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" ] } diff --git a/scripts/global-mmlu-lite/adapter.py b/scripts/global-mmlu-lite/adapter.py index 2e121f40d..73cd86d69 100644 --- a/scripts/global-mmlu-lite/adapter.py +++ b/scripts/global-mmlu-lite/adapter.py @@ -13,7 +13,6 @@ from typing import List from eval_types import ( - AdditionalPropertiesObject, EvaluationLog, EvaluationResult, EvaluatorRelationship, @@ -21,6 +20,7 @@ ScoreDetails, ScoreType, SourceDataUrl, + Uncertainty, ) import sys @@ -67,10 +67,10 @@ def make_eval_result( stddev: float | None = None, ) -> EvaluationResult: """Create an EvaluationResult with hardcoded source_data for global-mmlu-lite.""" - details = None + uncertainty = None if confidence_interval is not None or stddev is not None: - details = AdditionalPropertiesObject( - **{k: v for k, v in [("confidence_interval", confidence_interval), ("stddev", stddev)] if v is not None} + uncertainty = Uncertainty( + standard_deviation=stddev, ) return EvaluationResult( evaluation_name=name, @@ -84,7 +84,7 @@ def make_eval_result( ), score_details=ScoreDetails( score=round(score, 4) if score is not None else -1, - details=details, + uncertainty=uncertainty, ), ) diff --git a/scripts/livecodebenchpro/adapter.py b/scripts/livecodebenchpro/adapter.py new file mode 100644 index 000000000..94532357b --- /dev/null +++ b/scripts/livecodebenchpro/adapter.py @@ -0,0 +1,88 @@ +""" +Script to migrate existing Live Code Bench Pro data from schema 0.1.0 to 0.2.0. + +Moves top-level source_data URLs into per-evaluation_result source_data fields +using SourceDataUrl, matching each URL to its evaluation by difficulty. + +Usage: + uv run python scripts/livecodebenchpro/adapter.py +""" + +import json +from pathlib import Path + +BASE_URL = "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty" +DATA_DIR = Path(__file__).parent.parent.parent / "data" / "livecodebenchpro" + +# Map evaluation_name -> difficulty for URL matching +DIFFICULTY_FOR_EVAL = { + "Hard Problems": "hard", + "Medium Problems": "medium", + "Easy Problems": "easy", +} + + +def make_source_data(difficulty: str) -> dict: + """Build a SourceDataUrl dict for a given difficulty.""" + return { + "dataset_name": f"{difficulty.capitalize()} Problems", + "source_type": "url", + "url": [f"{BASE_URL}?difficulty={difficulty}&benchmark_mode=live"], + } + + +def migrate_file(filepath: Path) -> None: + """Migrate a single JSON file from 0.1.0 to 0.2.0.""" + with open(filepath, "r") as f: + data = json.load(f) + + if data.get("schema_version") == "0.2.0": + print(f"Skipping (already 0.2.0): {filepath}") + return + + if data.get("schema_version") != "0.1.0": + raise ValueError(f"{filepath}: expected schema_version 0.1.0, got {data.get('schema_version')}") + + # Remove top-level source_data + if "source_data" not in data: + raise ValueError(f"{filepath}: missing top-level source_data") + del data["source_data"] + + # Add source_data to each evaluation_result + for result in data["evaluation_results"]: + eval_name = result.get("evaluation_name") + if not eval_name: + raise ValueError(f"{filepath}: evaluation_result missing evaluation_name") + + difficulty = DIFFICULTY_FOR_EVAL.get(eval_name) + if not difficulty: + raise ValueError(f"{filepath}: unknown evaluation_name '{eval_name}'") + + result["source_data"] = make_source_data(difficulty) + + data["schema_version"] = "0.2.0" + + with open(filepath, "w") as f: + json.dump(data, f, indent=2) + f.write("\n") + + +def main(): + if not DATA_DIR.exists(): + raise FileNotFoundError(f"Data directory not found: {DATA_DIR}") + + files = list(DATA_DIR.rglob("*.json")) + if not files: + raise FileNotFoundError(f"No JSON files found in {DATA_DIR}") + + print(f"Migrating {len(files)} files in {DATA_DIR}...") + + for filepath in files: + migrate_file(filepath) + print(f"Migrated: {filepath}") + + print(f"\nDone! Migrated {len(files)} files to schema 0.2.0.") + + +if __name__ == "__main__": + main() diff --git a/uv.lock b/uv.lock index 77be2f2dc..85c3b2986 100644 --- a/uv.lock +++ b/uv.lock @@ -1,13 +1,14 @@ version = 1 +revision = 2 requires-python = ">=3.12" [[package]] name = "absl-py" version = "2.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/64/c7/8de93764ad66968d19329a7e0c147a2bb3c7054c554d4a119111b8f9440f/absl_py-2.4.0.tar.gz", hash = "sha256:8c6af82722b35cf71e0f4d1d47dcaebfff286e27110a99fc359349b247dfb5d4", size = 116543 } +sdist = { url = "https://files.pythonhosted.org/packages/64/c7/8de93764ad66968d19329a7e0c147a2bb3c7054c554d4a119111b8f9440f/absl_py-2.4.0.tar.gz", hash = "sha256:8c6af82722b35cf71e0f4d1d47dcaebfff286e27110a99fc359349b247dfb5d4", size = 116543, upload_time = "2026-01-28T10:17:05.322Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750 }, + { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750, upload_time = "2026-01-28T10:17:04.19Z" }, ] [[package]] @@ -18,9 +19,9 @@ dependencies = [ { name = "aiobotocore", extra = ["boto3"] }, { name = "aiofiles" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/01/92e9ab00f36e2899315f49eefcd5b4685fbb19016c7f19a9edf06da80bb0/aioboto3-15.5.0.tar.gz", hash = "sha256:ea8d8787d315594842fbfcf2c4dce3bac2ad61be275bc8584b2ce9a3402a6979", size = 255069 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/01/92e9ab00f36e2899315f49eefcd5b4685fbb19016c7f19a9edf06da80bb0/aioboto3-15.5.0.tar.gz", hash = "sha256:ea8d8787d315594842fbfcf2c4dce3bac2ad61be275bc8584b2ce9a3402a6979", size = 255069, upload_time = "2025-10-30T13:37:16.122Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/3e/e8f5b665bca646d43b916763c901e00a07e40f7746c9128bdc912a089424/aioboto3-15.5.0-py3-none-any.whl", hash = "sha256:cc880c4d6a8481dd7e05da89f41c384dbd841454fc1998ae25ca9c39201437a6", size = 35913 }, + { url = "https://files.pythonhosted.org/packages/e5/3e/e8f5b665bca646d43b916763c901e00a07e40f7746c9128bdc912a089424/aioboto3-15.5.0-py3-none-any.whl", hash = "sha256:cc880c4d6a8481dd7e05da89f41c384dbd841454fc1998ae25ca9c39201437a6", size = 35913, upload_time = "2025-10-30T13:37:14.549Z" }, ] [[package]] @@ -36,9 +37,9 @@ dependencies = [ { name = "python-dateutil" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560 } +sdist = { url = "https://files.pythonhosted.org/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560, upload_time = "2025-10-28T22:33:21.787Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039 }, + { url = "https://files.pythonhosted.org/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039, upload_time = "2025-10-28T22:33:19.949Z" }, ] [package.optional-dependencies] @@ -50,18 +51,18 @@ boto3 = [ name = "aiofiles" version = "25.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354 } +sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload_time = "2025-10-09T20:51:04.358Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668 }, + { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload_time = "2025-10-09T20:51:03.174Z" }, ] [[package]] name = "aiohappyeyeballs" version = "2.6.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760 } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload_time = "2025-03-12T01:42:48.764Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265 }, + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload_time = "2025-03-12T01:42:47.083Z" }, ] [[package]] @@ -77,85 +78,85 @@ dependencies = [ { name = "propcache" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732 }, - { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293 }, - { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533 }, - { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839 }, - { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932 }, - { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906 }, - { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020 }, - { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181 }, - { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794 }, - { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900 }, - { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239 }, - { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527 }, - { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489 }, - { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852 }, - { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379 }, - { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253 }, - { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407 }, - { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190 }, - { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783 }, - { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704 }, - { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652 }, - { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014 }, - { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777 }, - { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276 }, - { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131 }, - { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863 }, - { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793 }, - { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676 }, - { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217 }, - { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303 }, - { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673 }, - { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120 }, - { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383 }, - { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899 }, - { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238 }, - { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292 }, - { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021 }, - { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263 }, - { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107 }, - { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196 }, - { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591 }, - { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277 }, - { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575 }, - { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455 }, - { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417 }, - { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968 }, - { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690 }, - { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390 }, - { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188 }, - { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126 }, - { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128 }, - { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512 }, - { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444 }, - { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798 }, - { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835 }, - { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486 }, - { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951 }, - { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001 }, - { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246 }, - { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131 }, - { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196 }, - { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841 }, - { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193 }, - { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979 }, - { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193 }, - { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801 }, - { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523 }, - { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694 }, +sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload_time = "2026-01-03T17:33:05.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload_time = "2026-01-03T17:30:14.23Z" }, + { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload_time = "2026-01-03T17:30:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload_time = "2026-01-03T17:30:17.431Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload_time = "2026-01-03T17:30:19.422Z" }, + { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload_time = "2026-01-03T17:30:21.756Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload_time = "2026-01-03T17:30:23.932Z" }, + { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload_time = "2026-01-03T17:30:26Z" }, + { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload_time = "2026-01-03T17:30:27.554Z" }, + { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload_time = "2026-01-03T17:30:29.254Z" }, + { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload_time = "2026-01-03T17:30:31.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload_time = "2026-01-03T17:30:32.703Z" }, + { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload_time = "2026-01-03T17:30:34.695Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload_time = "2026-01-03T17:30:36.864Z" }, + { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload_time = "2026-01-03T17:30:39.433Z" }, + { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload_time = "2026-01-03T17:30:41.081Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload_time = "2026-01-03T17:30:42.644Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload_time = "2026-01-03T17:30:44.195Z" }, + { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload_time = "2026-01-03T17:30:45.832Z" }, + { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload_time = "2026-01-03T17:30:47.466Z" }, + { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload_time = "2026-01-03T17:30:49.373Z" }, + { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload_time = "2026-01-03T17:30:50.974Z" }, + { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload_time = "2026-01-03T17:30:52.729Z" }, + { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload_time = "2026-01-03T17:30:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload_time = "2026-01-03T17:30:56.512Z" }, + { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload_time = "2026-01-03T17:30:58.256Z" }, + { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload_time = "2026-01-03T17:31:00.445Z" }, + { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload_time = "2026-01-03T17:31:03.024Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload_time = "2026-01-03T17:31:04.842Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload_time = "2026-01-03T17:31:06.868Z" }, + { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload_time = "2026-01-03T17:31:08.958Z" }, + { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload_time = "2026-01-03T17:31:10.676Z" }, + { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload_time = "2026-01-03T17:31:12.575Z" }, + { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload_time = "2026-01-03T17:31:14.382Z" }, + { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload_time = "2026-01-03T17:31:15.958Z" }, + { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload_time = "2026-01-03T17:31:17.909Z" }, + { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload_time = "2026-01-03T17:31:19.919Z" }, + { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload_time = "2026-01-03T17:31:21.636Z" }, + { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload_time = "2026-01-03T17:31:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload_time = "2026-01-03T17:31:25.334Z" }, + { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload_time = "2026-01-03T17:31:27.394Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload_time = "2026-01-03T17:31:29.238Z" }, + { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload_time = "2026-01-03T17:31:31.053Z" }, + { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload_time = "2026-01-03T17:31:32.87Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload_time = "2026-01-03T17:31:34.76Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload_time = "2026-01-03T17:31:36.699Z" }, + { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload_time = "2026-01-03T17:31:38.622Z" }, + { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload_time = "2026-01-03T17:31:40.57Z" }, + { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload_time = "2026-01-03T17:31:42.857Z" }, + { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload_time = "2026-01-03T17:31:44.984Z" }, + { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126, upload_time = "2026-01-03T17:31:47.463Z" }, + { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128, upload_time = "2026-01-03T17:31:49.2Z" }, + { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload_time = "2026-01-03T17:31:51.134Z" }, + { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload_time = "2026-01-03T17:31:52.85Z" }, + { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload_time = "2026-01-03T17:31:54.91Z" }, + { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload_time = "2026-01-03T17:31:56.733Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload_time = "2026-01-03T17:31:58.65Z" }, + { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload_time = "2026-01-03T17:32:00.989Z" }, + { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload_time = "2026-01-03T17:32:03.122Z" }, + { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload_time = "2026-01-03T17:32:05.255Z" }, + { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload_time = "2026-01-03T17:32:07.607Z" }, + { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload_time = "2026-01-03T17:32:09.59Z" }, + { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload_time = "2026-01-03T17:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload_time = "2026-01-03T17:32:13.705Z" }, + { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload_time = "2026-01-03T17:32:15.965Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload_time = "2026-01-03T17:32:18.219Z" }, + { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload_time = "2026-01-03T17:32:20.25Z" }, + { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523, upload_time = "2026-01-03T17:32:22.215Z" }, + { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload_time = "2026-01-03T17:32:24.546Z" }, ] [[package]] name = "aioitertools" version = "0.13.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322 } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322, upload_time = "2025-11-06T22:17:07.609Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182 }, + { url = "https://files.pythonhosted.org/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182, upload_time = "2025-11-06T22:17:06.502Z" }, ] [[package]] @@ -166,18 +167,18 @@ dependencies = [ { name = "frozenlist" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007 } +sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload_time = "2025-07-03T22:54:43.528Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490 }, + { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload_time = "2025-07-03T22:54:42.156Z" }, ] [[package]] name = "annotated-types" version = "0.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload_time = "2024-05-20T21:33:25.928Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload_time = "2024-05-20T21:33:24.1Z" }, ] [[package]] @@ -188,27 +189,27 @@ dependencies = [ { name = "idna" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685 } +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload_time = "2026-01-06T11:45:21.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592 }, + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload_time = "2026-01-06T11:45:19.497Z" }, ] [[package]] name = "argcomplete" version = "3.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/38/61/0b9ae6399dd4a58d8c1b1dc5a27d6f2808023d0b5dd3104bb99f45a33ff6/argcomplete-3.6.3.tar.gz", hash = "sha256:62e8ed4fd6a45864acc8235409461b72c9a28ee785a2011cc5eb78318786c89c", size = 73754 } +sdist = { url = "https://files.pythonhosted.org/packages/38/61/0b9ae6399dd4a58d8c1b1dc5a27d6f2808023d0b5dd3104bb99f45a33ff6/argcomplete-3.6.3.tar.gz", hash = "sha256:62e8ed4fd6a45864acc8235409461b72c9a28ee785a2011cc5eb78318786c89c", size = 73754, upload_time = "2025-10-20T03:33:34.741Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/74/f5/9373290775639cb67a2fce7f629a1c240dce9f12fe927bc32b2736e16dfc/argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce", size = 43846 }, + { url = "https://files.pythonhosted.org/packages/74/f5/9373290775639cb67a2fce7f629a1c240dce9f12fe927bc32b2736e16dfc/argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce", size = 43846, upload_time = "2025-10-20T03:33:33.021Z" }, ] [[package]] name = "attrs" version = "25.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251 } +sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload_time = "2025-10-06T13:54:44.725Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615 }, + { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload_time = "2025-10-06T13:54:43.17Z" }, ] [[package]] @@ -219,9 +220,9 @@ dependencies = [ { name = "soupsieve" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737 } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload_time = "2025-11-30T15:08:26.084Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721 }, + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload_time = "2025-11-30T15:08:24.087Z" }, ] [[package]] @@ -236,24 +237,24 @@ dependencies = [ { name = "platformdirs" }, { name = "pytokens" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/d9/07b458a3f1c525ac392b5edc6b191ff140b596f9d77092429417a54e249d/black-25.12.0.tar.gz", hash = "sha256:8d3dd9cea14bff7ddc0eb243c811cdb1a011ebb4800a5f0335a01a68654796a7", size = 659264 } +sdist = { url = "https://files.pythonhosted.org/packages/c4/d9/07b458a3f1c525ac392b5edc6b191ff140b596f9d77092429417a54e249d/black-25.12.0.tar.gz", hash = "sha256:8d3dd9cea14bff7ddc0eb243c811cdb1a011ebb4800a5f0335a01a68654796a7", size = 659264, upload_time = "2025-12-08T01:40:52.501Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/bd/26083f805115db17fda9877b3c7321d08c647df39d0df4c4ca8f8450593e/black-25.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31f96b7c98c1ddaeb07dc0f56c652e25bdedaac76d5b68a059d998b57c55594a", size = 1924178 }, - { url = "https://files.pythonhosted.org/packages/89/6b/ea00d6651561e2bdd9231c4177f4f2ae19cc13a0b0574f47602a7519b6ca/black-25.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05dd459a19e218078a1f98178c13f861fe6a9a5f88fc969ca4d9b49eb1809783", size = 1742643 }, - { url = "https://files.pythonhosted.org/packages/6d/f3/360fa4182e36e9875fabcf3a9717db9d27a8d11870f21cff97725c54f35b/black-25.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1f68c5eff61f226934be6b5b80296cf6939e5d2f0c2f7d543ea08b204bfaf59", size = 1800158 }, - { url = "https://files.pythonhosted.org/packages/f8/08/2c64830cb6616278067e040acca21d4f79727b23077633953081c9445d61/black-25.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:274f940c147ddab4442d316b27f9e332ca586d39c85ecf59ebdea82cc9ee8892", size = 1426197 }, - { url = "https://files.pythonhosted.org/packages/d4/60/a93f55fd9b9816b7432cf6842f0e3000fdd5b7869492a04b9011a133ee37/black-25.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:169506ba91ef21e2e0591563deda7f00030cb466e747c4b09cb0a9dae5db2f43", size = 1237266 }, - { url = "https://files.pythonhosted.org/packages/c8/52/c551e36bc95495d2aa1a37d50566267aa47608c81a53f91daa809e03293f/black-25.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a05ddeb656534c3e27a05a29196c962877c83fa5503db89e68857d1161ad08a5", size = 1923809 }, - { url = "https://files.pythonhosted.org/packages/a0/f7/aac9b014140ee56d247e707af8db0aae2e9efc28d4a8aba92d0abd7ae9d1/black-25.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9ec77439ef3e34896995503865a85732c94396edcc739f302c5673a2315e1e7f", size = 1742384 }, - { url = "https://files.pythonhosted.org/packages/74/98/38aaa018b2ab06a863974c12b14a6266badc192b20603a81b738c47e902e/black-25.12.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e509c858adf63aa61d908061b52e580c40eae0dfa72415fa47ac01b12e29baf", size = 1798761 }, - { url = "https://files.pythonhosted.org/packages/16/3a/a8ac542125f61574a3f015b521ca83b47321ed19bb63fe6d7560f348bfe1/black-25.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:252678f07f5bac4ff0d0e9b261fbb029fa530cfa206d0a636a34ab445ef8ca9d", size = 1429180 }, - { url = "https://files.pythonhosted.org/packages/e6/2d/bdc466a3db9145e946762d52cd55b1385509d9f9004fec1c97bdc8debbfb/black-25.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:bc5b1c09fe3c931ddd20ee548511c64ebf964ada7e6f0763d443947fd1c603ce", size = 1239350 }, - { url = "https://files.pythonhosted.org/packages/35/46/1d8f2542210c502e2ae1060b2e09e47af6a5e5963cb78e22ec1a11170b28/black-25.12.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0a0953b134f9335c2434864a643c842c44fba562155c738a2a37a4d61f00cad5", size = 1917015 }, - { url = "https://files.pythonhosted.org/packages/41/37/68accadf977672beb8e2c64e080f568c74159c1aaa6414b4cd2aef2d7906/black-25.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2355bbb6c3b76062870942d8cc450d4f8ac71f9c93c40122762c8784df49543f", size = 1741830 }, - { url = "https://files.pythonhosted.org/packages/ac/76/03608a9d8f0faad47a3af3a3c8c53af3367f6c0dd2d23a84710456c7ac56/black-25.12.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9678bd991cc793e81d19aeeae57966ee02909877cb65838ccffef24c3ebac08f", size = 1791450 }, - { url = "https://files.pythonhosted.org/packages/06/99/b2a4bd7dfaea7964974f947e1c76d6886d65fe5d24f687df2d85406b2609/black-25.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:97596189949a8aad13ad12fcbb4ae89330039b96ad6742e6f6b45e75ad5cfd83", size = 1452042 }, - { url = "https://files.pythonhosted.org/packages/b2/7c/d9825de75ae5dd7795d007681b752275ea85a1c5d83269b4b9c754c2aaab/black-25.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:778285d9ea197f34704e3791ea9404cd6d07595745907dd2ce3da7a13627b29b", size = 1267446 }, - { url = "https://files.pythonhosted.org/packages/68/11/21331aed19145a952ad28fca2756a1433ee9308079bd03bd898e903a2e53/black-25.12.0-py3-none-any.whl", hash = "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", size = 206191 }, + { url = "https://files.pythonhosted.org/packages/d1/bd/26083f805115db17fda9877b3c7321d08c647df39d0df4c4ca8f8450593e/black-25.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31f96b7c98c1ddaeb07dc0f56c652e25bdedaac76d5b68a059d998b57c55594a", size = 1924178, upload_time = "2025-12-08T01:49:51.048Z" }, + { url = "https://files.pythonhosted.org/packages/89/6b/ea00d6651561e2bdd9231c4177f4f2ae19cc13a0b0574f47602a7519b6ca/black-25.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05dd459a19e218078a1f98178c13f861fe6a9a5f88fc969ca4d9b49eb1809783", size = 1742643, upload_time = "2025-12-08T01:49:59.09Z" }, + { url = "https://files.pythonhosted.org/packages/6d/f3/360fa4182e36e9875fabcf3a9717db9d27a8d11870f21cff97725c54f35b/black-25.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1f68c5eff61f226934be6b5b80296cf6939e5d2f0c2f7d543ea08b204bfaf59", size = 1800158, upload_time = "2025-12-08T01:44:27.301Z" }, + { url = "https://files.pythonhosted.org/packages/f8/08/2c64830cb6616278067e040acca21d4f79727b23077633953081c9445d61/black-25.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:274f940c147ddab4442d316b27f9e332ca586d39c85ecf59ebdea82cc9ee8892", size = 1426197, upload_time = "2025-12-08T01:45:51.198Z" }, + { url = "https://files.pythonhosted.org/packages/d4/60/a93f55fd9b9816b7432cf6842f0e3000fdd5b7869492a04b9011a133ee37/black-25.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:169506ba91ef21e2e0591563deda7f00030cb466e747c4b09cb0a9dae5db2f43", size = 1237266, upload_time = "2025-12-08T01:45:10.556Z" }, + { url = "https://files.pythonhosted.org/packages/c8/52/c551e36bc95495d2aa1a37d50566267aa47608c81a53f91daa809e03293f/black-25.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a05ddeb656534c3e27a05a29196c962877c83fa5503db89e68857d1161ad08a5", size = 1923809, upload_time = "2025-12-08T01:46:55.126Z" }, + { url = "https://files.pythonhosted.org/packages/a0/f7/aac9b014140ee56d247e707af8db0aae2e9efc28d4a8aba92d0abd7ae9d1/black-25.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9ec77439ef3e34896995503865a85732c94396edcc739f302c5673a2315e1e7f", size = 1742384, upload_time = "2025-12-08T01:49:37.022Z" }, + { url = "https://files.pythonhosted.org/packages/74/98/38aaa018b2ab06a863974c12b14a6266badc192b20603a81b738c47e902e/black-25.12.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e509c858adf63aa61d908061b52e580c40eae0dfa72415fa47ac01b12e29baf", size = 1798761, upload_time = "2025-12-08T01:46:05.386Z" }, + { url = "https://files.pythonhosted.org/packages/16/3a/a8ac542125f61574a3f015b521ca83b47321ed19bb63fe6d7560f348bfe1/black-25.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:252678f07f5bac4ff0d0e9b261fbb029fa530cfa206d0a636a34ab445ef8ca9d", size = 1429180, upload_time = "2025-12-08T01:45:34.903Z" }, + { url = "https://files.pythonhosted.org/packages/e6/2d/bdc466a3db9145e946762d52cd55b1385509d9f9004fec1c97bdc8debbfb/black-25.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:bc5b1c09fe3c931ddd20ee548511c64ebf964ada7e6f0763d443947fd1c603ce", size = 1239350, upload_time = "2025-12-08T01:46:09.458Z" }, + { url = "https://files.pythonhosted.org/packages/35/46/1d8f2542210c502e2ae1060b2e09e47af6a5e5963cb78e22ec1a11170b28/black-25.12.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0a0953b134f9335c2434864a643c842c44fba562155c738a2a37a4d61f00cad5", size = 1917015, upload_time = "2025-12-08T01:53:27.987Z" }, + { url = "https://files.pythonhosted.org/packages/41/37/68accadf977672beb8e2c64e080f568c74159c1aaa6414b4cd2aef2d7906/black-25.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2355bbb6c3b76062870942d8cc450d4f8ac71f9c93c40122762c8784df49543f", size = 1741830, upload_time = "2025-12-08T01:54:36.861Z" }, + { url = "https://files.pythonhosted.org/packages/ac/76/03608a9d8f0faad47a3af3a3c8c53af3367f6c0dd2d23a84710456c7ac56/black-25.12.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9678bd991cc793e81d19aeeae57966ee02909877cb65838ccffef24c3ebac08f", size = 1791450, upload_time = "2025-12-08T01:44:52.581Z" }, + { url = "https://files.pythonhosted.org/packages/06/99/b2a4bd7dfaea7964974f947e1c76d6886d65fe5d24f687df2d85406b2609/black-25.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:97596189949a8aad13ad12fcbb4ae89330039b96ad6742e6f6b45e75ad5cfd83", size = 1452042, upload_time = "2025-12-08T01:46:13.188Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7c/d9825de75ae5dd7795d007681b752275ea85a1c5d83269b4b9c754c2aaab/black-25.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:778285d9ea197f34704e3791ea9404cd6d07595745907dd2ce3da7a13627b29b", size = 1267446, upload_time = "2025-12-08T01:46:14.497Z" }, + { url = "https://files.pythonhosted.org/packages/68/11/21331aed19145a952ad28fca2756a1433ee9308079bd03bd898e903a2e53/black-25.12.0-py3-none-any.whl", hash = "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", size = 206191, upload_time = "2025-12-08T01:40:50.963Z" }, ] [[package]] @@ -263,29 +264,29 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/d0/d8cc8c9a4488a787e7fa430f6055e5bd1ddb22c340a751d9e901b82e2efe/blis-1.3.3.tar.gz", hash = "sha256:034d4560ff3cc43e8aa37e188451b0440e3261d989bb8a42ceee865607715ecd", size = 2644873 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/16/d1/429cf0cf693d4c7dc2efed969bd474e315aab636e4a95f66c4ed7264912d/blis-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a1c74e100665f8e918ebdbae2794576adf1f691680b5cdb8b29578432f623ef", size = 6929663 }, - { url = "https://files.pythonhosted.org/packages/11/69/363c8df8d98b3cc97be19aad6aabb2c9c53f372490d79316bdee92d476e7/blis-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f6c595185176ce021316263e1a1d636a3425b6c48366c1fd712d08d0b71849a", size = 1230939 }, - { url = "https://files.pythonhosted.org/packages/96/2a/fbf65d906d823d839076c5150a6f8eb5ecbc5f9135e0b6510609bda1e6b7/blis-1.3.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d734b19fba0be7944f272dfa7b443b37c61f9476d9ab054a9ac53555ceadd2e0", size = 2818835 }, - { url = "https://files.pythonhosted.org/packages/d5/ad/58deaa3ad856dd3cc96493e40ffd2ed043d18d4d304f85a65cde1ccbf644/blis-1.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ef6d6e2b599a3a2788eb6d9b443533961265aa4ec49d574ed4bb846e548dcdb", size = 11366550 }, - { url = "https://files.pythonhosted.org/packages/78/82/816a7adfe1f7acc8151f01ec86ef64467a3c833932d8f19f8e06613b8a4e/blis-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8c888438ae99c500422d50698e3028b65caa8ebb44e24204d87fda2df64058f7", size = 3023686 }, - { url = "https://files.pythonhosted.org/packages/1e/e2/0e93b865f648b5519360846669a35f28ee8f4e1d93d054f6850d8afbabde/blis-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8177879fd3590b5eecdd377f9deafb5dc8af6d684f065bd01553302fb3fcf9a7", size = 14250939 }, - { url = "https://files.pythonhosted.org/packages/20/07/fb43edc2ff0a6a367e4a94fc39eb3b85aa1e55e24cc857af2db145ce9f0d/blis-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:f20f7ad69aaffd1ce14fe77de557b6df9b61e0c9e582f75a843715d836b5c8af", size = 6192759 }, - { url = "https://files.pythonhosted.org/packages/e6/f7/d26e62d9be3d70473a63e0a5d30bae49c2fe138bebac224adddcdef8a7ce/blis-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1e647341f958421a86b028a2efe16ce19c67dba2a05f79e8f7e80b1ff45328aa", size = 6928322 }, - { url = "https://files.pythonhosted.org/packages/4a/78/750d12da388f714958eb2f2fd177652323bbe7ec528365c37129edd6eb84/blis-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d563160f874abb78a57e346f07312c5323f7ad67b6370052b6b17087ef234a8e", size = 1229635 }, - { url = "https://files.pythonhosted.org/packages/e8/36/eac4199c5b200a5f3e93cad197da8d26d909f218eb444c4f552647c95240/blis-1.3.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:30b8a5b90cb6cb81d1ada9ae05aa55fb8e70d9a0ae9db40d2401bb9c1c8f14c4", size = 2815650 }, - { url = "https://files.pythonhosted.org/packages/bf/51/472e7b36a6bedb5242a9757e7486f702c3619eff76e256735d0c8b1679c6/blis-1.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9f5c53b277f6ac5b3ca30bc12ebab7ea16c8f8c36b14428abb56924213dc127", size = 11359008 }, - { url = "https://files.pythonhosted.org/packages/84/da/d0dfb6d6e6321ae44df0321384c32c322bd07b15740d7422727a1a49fc5d/blis-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6297e7616c158b305c9a8a4e47ca5fc9b0785194dd96c903b1a1591a7ca21ddf", size = 3011959 }, - { url = "https://files.pythonhosted.org/packages/20/c5/2b0b5e556fa0364ed671051ea078a6d6d7b979b1cfef78d64ad3ca5f0c7f/blis-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3f966ca74f89f8a33e568b9a1d71992fc9a0d29a423e047f0a212643e21b5458", size = 14232456 }, - { url = "https://files.pythonhosted.org/packages/31/07/4cdc81a47bf862c0b06d91f1bc6782064e8b69ac9b5d4ff51d97e4ff03da/blis-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:7a0fc4b237a3a453bdc3c7ab48d91439fcd2d013b665c46948d9eaf9c3e45a97", size = 6192624 }, - { url = "https://files.pythonhosted.org/packages/5f/8a/80f7c68fbc24a76fc9c18522c46d6d69329c320abb18e26a707a5d874083/blis-1.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c3e33cfbf22a418373766816343fcfcd0556012aa3ffdf562c29cddec448a415", size = 6934081 }, - { url = "https://files.pythonhosted.org/packages/e5/52/d1aa3a51a7fc299b0c89dcaa971922714f50b1202769eebbdaadd1b5cff7/blis-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6f165930e8d3a85c606d2003211497e28d528c7416fbfeafb6b15600963f7c9b", size = 1231486 }, - { url = "https://files.pythonhosted.org/packages/99/4f/badc7bd7f74861b26c10123bba7b9d16f99cd9535ad0128780360713820f/blis-1.3.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:878d4d96d8f2c7a2459024f013f2e4e5f46d708b23437dae970d998e7bff14a0", size = 2814944 }, - { url = "https://files.pythonhosted.org/packages/72/a6/f62a3bd814ca19ec7e29ac889fd354adea1217df3183e10217de51e2eb8b/blis-1.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f36c0ca84a05ee5d3dbaa38056c4423c1fc29948b17a7923dd2fed8967375d74", size = 11345825 }, - { url = "https://files.pythonhosted.org/packages/d4/6c/671af79ee42bc4c968cae35c091ac89e8721c795bfa4639100670dc59139/blis-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e5a662c48cd4aad5dae1a950345df23957524f071315837a4c6feb7d3b288990", size = 3008771 }, - { url = "https://files.pythonhosted.org/packages/be/92/7cd7f8490da7c98ee01557f2105885cc597217b0e7fd2eeb9e22cdd4ef23/blis-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9de26fbd72bac900c273b76d46f0b45b77a28eace2e01f6ac6c2239531a413bb", size = 14219213 }, - { url = "https://files.pythonhosted.org/packages/0a/de/acae8e9f9a1f4bb393d41c8265898b0f29772e38eac14e9f69d191e2c006/blis-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:9e5fdf4211b1972400f8ff6dafe87cb689c5d84f046b4a76b207c0bd2270faaf", size = 6324695 }, +sdist = { url = "https://files.pythonhosted.org/packages/d0/d0/d8cc8c9a4488a787e7fa430f6055e5bd1ddb22c340a751d9e901b82e2efe/blis-1.3.3.tar.gz", hash = "sha256:034d4560ff3cc43e8aa37e188451b0440e3261d989bb8a42ceee865607715ecd", size = 2644873, upload_time = "2025-11-17T12:28:30.511Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/d1/429cf0cf693d4c7dc2efed969bd474e315aab636e4a95f66c4ed7264912d/blis-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a1c74e100665f8e918ebdbae2794576adf1f691680b5cdb8b29578432f623ef", size = 6929663, upload_time = "2025-11-17T12:27:44.482Z" }, + { url = "https://files.pythonhosted.org/packages/11/69/363c8df8d98b3cc97be19aad6aabb2c9c53f372490d79316bdee92d476e7/blis-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f6c595185176ce021316263e1a1d636a3425b6c48366c1fd712d08d0b71849a", size = 1230939, upload_time = "2025-11-17T12:27:46.19Z" }, + { url = "https://files.pythonhosted.org/packages/96/2a/fbf65d906d823d839076c5150a6f8eb5ecbc5f9135e0b6510609bda1e6b7/blis-1.3.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d734b19fba0be7944f272dfa7b443b37c61f9476d9ab054a9ac53555ceadd2e0", size = 2818835, upload_time = "2025-11-17T12:27:48.167Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ad/58deaa3ad856dd3cc96493e40ffd2ed043d18d4d304f85a65cde1ccbf644/blis-1.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ef6d6e2b599a3a2788eb6d9b443533961265aa4ec49d574ed4bb846e548dcdb", size = 11366550, upload_time = "2025-11-17T12:27:49.958Z" }, + { url = "https://files.pythonhosted.org/packages/78/82/816a7adfe1f7acc8151f01ec86ef64467a3c833932d8f19f8e06613b8a4e/blis-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8c888438ae99c500422d50698e3028b65caa8ebb44e24204d87fda2df64058f7", size = 3023686, upload_time = "2025-11-17T12:27:52.062Z" }, + { url = "https://files.pythonhosted.org/packages/1e/e2/0e93b865f648b5519360846669a35f28ee8f4e1d93d054f6850d8afbabde/blis-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8177879fd3590b5eecdd377f9deafb5dc8af6d684f065bd01553302fb3fcf9a7", size = 14250939, upload_time = "2025-11-17T12:27:53.847Z" }, + { url = "https://files.pythonhosted.org/packages/20/07/fb43edc2ff0a6a367e4a94fc39eb3b85aa1e55e24cc857af2db145ce9f0d/blis-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:f20f7ad69aaffd1ce14fe77de557b6df9b61e0c9e582f75a843715d836b5c8af", size = 6192759, upload_time = "2025-11-17T12:27:56.176Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f7/d26e62d9be3d70473a63e0a5d30bae49c2fe138bebac224adddcdef8a7ce/blis-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1e647341f958421a86b028a2efe16ce19c67dba2a05f79e8f7e80b1ff45328aa", size = 6928322, upload_time = "2025-11-17T12:27:57.965Z" }, + { url = "https://files.pythonhosted.org/packages/4a/78/750d12da388f714958eb2f2fd177652323bbe7ec528365c37129edd6eb84/blis-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d563160f874abb78a57e346f07312c5323f7ad67b6370052b6b17087ef234a8e", size = 1229635, upload_time = "2025-11-17T12:28:00.118Z" }, + { url = "https://files.pythonhosted.org/packages/e8/36/eac4199c5b200a5f3e93cad197da8d26d909f218eb444c4f552647c95240/blis-1.3.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:30b8a5b90cb6cb81d1ada9ae05aa55fb8e70d9a0ae9db40d2401bb9c1c8f14c4", size = 2815650, upload_time = "2025-11-17T12:28:02.544Z" }, + { url = "https://files.pythonhosted.org/packages/bf/51/472e7b36a6bedb5242a9757e7486f702c3619eff76e256735d0c8b1679c6/blis-1.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9f5c53b277f6ac5b3ca30bc12ebab7ea16c8f8c36b14428abb56924213dc127", size = 11359008, upload_time = "2025-11-17T12:28:04.589Z" }, + { url = "https://files.pythonhosted.org/packages/84/da/d0dfb6d6e6321ae44df0321384c32c322bd07b15740d7422727a1a49fc5d/blis-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6297e7616c158b305c9a8a4e47ca5fc9b0785194dd96c903b1a1591a7ca21ddf", size = 3011959, upload_time = "2025-11-17T12:28:06.862Z" }, + { url = "https://files.pythonhosted.org/packages/20/c5/2b0b5e556fa0364ed671051ea078a6d6d7b979b1cfef78d64ad3ca5f0c7f/blis-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3f966ca74f89f8a33e568b9a1d71992fc9a0d29a423e047f0a212643e21b5458", size = 14232456, upload_time = "2025-11-17T12:28:08.779Z" }, + { url = "https://files.pythonhosted.org/packages/31/07/4cdc81a47bf862c0b06d91f1bc6782064e8b69ac9b5d4ff51d97e4ff03da/blis-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:7a0fc4b237a3a453bdc3c7ab48d91439fcd2d013b665c46948d9eaf9c3e45a97", size = 6192624, upload_time = "2025-11-17T12:28:14.197Z" }, + { url = "https://files.pythonhosted.org/packages/5f/8a/80f7c68fbc24a76fc9c18522c46d6d69329c320abb18e26a707a5d874083/blis-1.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c3e33cfbf22a418373766816343fcfcd0556012aa3ffdf562c29cddec448a415", size = 6934081, upload_time = "2025-11-17T12:28:16.436Z" }, + { url = "https://files.pythonhosted.org/packages/e5/52/d1aa3a51a7fc299b0c89dcaa971922714f50b1202769eebbdaadd1b5cff7/blis-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6f165930e8d3a85c606d2003211497e28d528c7416fbfeafb6b15600963f7c9b", size = 1231486, upload_time = "2025-11-17T12:28:18.008Z" }, + { url = "https://files.pythonhosted.org/packages/99/4f/badc7bd7f74861b26c10123bba7b9d16f99cd9535ad0128780360713820f/blis-1.3.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:878d4d96d8f2c7a2459024f013f2e4e5f46d708b23437dae970d998e7bff14a0", size = 2814944, upload_time = "2025-11-17T12:28:19.654Z" }, + { url = "https://files.pythonhosted.org/packages/72/a6/f62a3bd814ca19ec7e29ac889fd354adea1217df3183e10217de51e2eb8b/blis-1.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f36c0ca84a05ee5d3dbaa38056c4423c1fc29948b17a7923dd2fed8967375d74", size = 11345825, upload_time = "2025-11-17T12:28:21.354Z" }, + { url = "https://files.pythonhosted.org/packages/d4/6c/671af79ee42bc4c968cae35c091ac89e8721c795bfa4639100670dc59139/blis-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e5a662c48cd4aad5dae1a950345df23957524f071315837a4c6feb7d3b288990", size = 3008771, upload_time = "2025-11-17T12:28:23.637Z" }, + { url = "https://files.pythonhosted.org/packages/be/92/7cd7f8490da7c98ee01557f2105885cc597217b0e7fd2eeb9e22cdd4ef23/blis-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9de26fbd72bac900c273b76d46f0b45b77a28eace2e01f6ac6c2239531a413bb", size = 14219213, upload_time = "2025-11-17T12:28:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/0a/de/acae8e9f9a1f4bb393d41c8265898b0f29772e38eac14e9f69d191e2c006/blis-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:9e5fdf4211b1972400f8ff6dafe87cb689c5d84f046b4a76b207c0bd2270faaf", size = 6324695, upload_time = "2025-11-17T12:28:28.401Z" }, ] [[package]] @@ -297,9 +298,9 @@ dependencies = [ { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/f9/6ef8feb52c3cce5ec3967a535a6114b57ac7949fd166b0f3090c2b06e4e5/boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12", size = 111535 } +sdist = { url = "https://files.pythonhosted.org/packages/ed/f9/6ef8feb52c3cce5ec3967a535a6114b57ac7949fd166b0f3090c2b06e4e5/boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12", size = 111535, upload_time = "2025-10-28T19:26:57.247Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/24/3bf865b07d15fea85b63504856e137029b6acbc73762496064219cdb265d/boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c", size = 139321 }, + { url = "https://files.pythonhosted.org/packages/61/24/3bf865b07d15fea85b63504856e137029b6acbc73762496064219cdb265d/boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c", size = 139321, upload_time = "2025-10-28T19:26:55.007Z" }, ] [[package]] @@ -311,27 +312,27 @@ dependencies = [ { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956 } +sdist = { url = "https://files.pythonhosted.org/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956, upload_time = "2025-10-28T19:26:46.108Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973 }, + { url = "https://files.pythonhosted.org/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973, upload_time = "2025-10-28T19:26:42.15Z" }, ] [[package]] name = "bottle" version = "0.12.25" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fd/04/1c09ab851a52fe6bc063fd0df758504edede5cc741bd2e807bf434a09215/bottle-0.12.25.tar.gz", hash = "sha256:e1a9c94970ae6d710b3fb4526294dfeb86f2cb4a81eff3a4b98dc40fb0e5e021", size = 74231 } +sdist = { url = "https://files.pythonhosted.org/packages/fd/04/1c09ab851a52fe6bc063fd0df758504edede5cc741bd2e807bf434a09215/bottle-0.12.25.tar.gz", hash = "sha256:e1a9c94970ae6d710b3fb4526294dfeb86f2cb4a81eff3a4b98dc40fb0e5e021", size = 74231, upload_time = "2023-03-04T15:34:18.083Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/1f/5977ea88c6a3df6199db97d320e5da816d415d1eb75a987a1f6823d5cc9d/bottle-0.12.25-py3-none-any.whl", hash = "sha256:d6f15f9d422670b7c073d63bd8d287b135388da187a0f3e3c19293626ce034ea", size = 90181 }, + { url = "https://files.pythonhosted.org/packages/bb/1f/5977ea88c6a3df6199db97d320e5da816d415d1eb75a987a1f6823d5cc9d/bottle-0.12.25-py3-none-any.whl", hash = "sha256:d6f15f9d422670b7c073d63bd8d287b135388da187a0f3e3c19293626ce034ea", size = 90181, upload_time = "2023-03-04T15:34:16.243Z" }, ] [[package]] name = "catalogue" version = "2.0.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561 } +sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561, upload_time = "2023-09-25T06:29:24.962Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325 }, + { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325, upload_time = "2023-09-25T06:29:23.337Z" }, ] [[package]] @@ -341,18 +342,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/da/ff3239eb4241cbc6f8b69f53d4ca27a178d51f9e5a954f1a3588c8227dc5/cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d", size = 30050 } +sdist = { url = "https://files.pythonhosted.org/packages/fc/da/ff3239eb4241cbc6f8b69f53d4ca27a178d51f9e5a954f1a3588c8227dc5/cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d", size = 30050, upload_time = "2022-10-03T11:00:37.889Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/43/3b/1d34fc4449174dfd2bc5ad7047a23edb6558b2e4b5a41b25a8ad6655c6c7/cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21", size = 35673 }, + { url = "https://files.pythonhosted.org/packages/43/3b/1d34fc4449174dfd2bc5ad7047a23edb6558b2e4b5a41b25a8ad6655c6c7/cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21", size = 35673, upload_time = "2022-10-03T11:00:36.109Z" }, ] [[package]] name = "certifi" version = "2026.1.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268 } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload_time = "2026-01-04T02:42:41.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900 }, + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload_time = "2026-01-04T02:42:40.15Z" }, ] [[package]] @@ -362,111 +363,120 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pycparser", marker = "implementation_name != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271 }, - { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048 }, - { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529 }, - { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097 }, - { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983 }, - { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519 }, - { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572 }, - { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963 }, - { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361 }, - { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932 }, - { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557 }, - { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762 }, - { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230 }, - { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043 }, - { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446 }, - { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101 }, - { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948 }, - { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422 }, - { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499 }, - { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928 }, - { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302 }, - { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909 }, - { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402 }, - { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780 }, - { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320 }, - { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487 }, - { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049 }, - { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793 }, - { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300 }, - { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244 }, - { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828 }, - { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926 }, - { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328 }, - { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650 }, - { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687 }, - { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773 }, - { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013 }, - { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593 }, - { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354 }, - { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480 }, - { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584 }, - { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443 }, - { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437 }, - { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487 }, - { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726 }, - { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195 }, +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload_time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload_time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload_time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload_time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload_time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload_time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload_time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload_time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload_time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload_time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload_time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload_time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload_time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload_time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload_time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload_time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload_time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload_time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload_time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload_time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload_time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload_time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload_time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload_time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload_time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload_time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload_time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload_time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload_time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload_time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload_time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload_time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload_time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload_time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload_time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload_time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload_time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload_time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload_time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload_time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload_time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload_time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload_time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload_time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload_time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload_time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload_time = "2025-09-08T23:23:43.004Z" }, +] + +[[package]] +name = "cfgv" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload_time = "2025-11-19T20:55:51.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload_time = "2025-11-19T20:55:50.744Z" }, ] [[package]] name = "charset-normalizer" version = "3.4.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 }, - { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 }, - { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 }, - { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 }, - { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 }, - { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 }, - { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 }, - { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 }, - { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 }, - { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 }, - { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 }, - { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 }, - { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 }, - { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 }, - { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 }, - { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 }, - { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 }, - { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 }, - { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 }, - { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 }, - { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 }, - { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 }, - { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 }, - { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 }, - { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 }, - { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 }, - { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 }, - { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 }, - { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 }, - { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 }, - { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 }, - { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 }, - { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 }, - { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 }, - { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 }, - { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 }, - { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 }, - { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 }, - { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 }, - { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 }, - { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 }, - { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 }, - { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 }, - { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 }, - { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 }, - { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 }, - { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 }, - { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 }, - { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 }, +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload_time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload_time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload_time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload_time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload_time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload_time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload_time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload_time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload_time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload_time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload_time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload_time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload_time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload_time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload_time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload_time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload_time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload_time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload_time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload_time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload_time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload_time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload_time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload_time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload_time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload_time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload_time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload_time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload_time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload_time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload_time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload_time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload_time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload_time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload_time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload_time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload_time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload_time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload_time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload_time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload_time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload_time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload_time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload_time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload_time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload_time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload_time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload_time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload_time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload_time = "2025-10-14T04:42:31.76Z" }, ] [[package]] @@ -474,29 +484,29 @@ name = "click" version = "8.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342 } +sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload_time = "2025-05-20T23:19:49.832Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215 }, + { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload_time = "2025-05-20T23:19:47.796Z" }, ] [[package]] name = "cloudpathlib" version = "0.23.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f4/18/2ac35d6b3015a0c74e923d94fc69baf8307f7c3233de015d69f99e17afa8/cloudpathlib-0.23.0.tar.gz", hash = "sha256:eb38a34c6b8a048ecfd2b2f60917f7cbad4a105b7c979196450c2f541f4d6b4b", size = 53126 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/18/2ac35d6b3015a0c74e923d94fc69baf8307f7c3233de015d69f99e17afa8/cloudpathlib-0.23.0.tar.gz", hash = "sha256:eb38a34c6b8a048ecfd2b2f60917f7cbad4a105b7c979196450c2f541f4d6b4b", size = 53126, upload_time = "2025-10-07T22:47:56.278Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/8a/c4bb04426d608be4a3171efa2e233d2c59a5c8937850c10d098e126df18e/cloudpathlib-0.23.0-py3-none-any.whl", hash = "sha256:8520b3b01468fee77de37ab5d50b1b524ea6b4a8731c35d1b7407ac0cd716002", size = 62755 }, + { url = "https://files.pythonhosted.org/packages/ae/8a/c4bb04426d608be4a3171efa2e233d2c59a5c8937850c10d098e126df18e/cloudpathlib-0.23.0-py3-none-any.whl", hash = "sha256:8520b3b01468fee77de37ab5d50b1b524ea6b4a8731c35d1b7407ac0cd716002", size = 62755, upload_time = "2025-10-07T22:47:54.905Z" }, ] [[package]] name = "colorama" version = "0.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload_time = "2022-10-25T02:36:22.414Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload_time = "2022-10-25T02:36:20.889Z" }, ] [[package]] @@ -506,9 +516,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/61/f083b5ac52e505dfc1c624eafbf8c7589a0d7f32daa398d2e7590efa5fda/colorlog-6.10.1.tar.gz", hash = "sha256:eb4ae5cb65fe7fec7773c2306061a8e63e02efc2c72eba9d27b0fa23c94f1321", size = 17162 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/61/f083b5ac52e505dfc1c624eafbf8c7589a0d7f32daa398d2e7590efa5fda/colorlog-6.10.1.tar.gz", hash = "sha256:eb4ae5cb65fe7fec7773c2306061a8e63e02efc2c72eba9d27b0fa23c94f1321", size = 17162, upload_time = "2025-10-16T16:14:11.978Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/c1/e419ef3723a074172b68aaa89c9f3de486ed4c2399e2dbd8113a4fdcaf9e/colorlog-6.10.1-py3-none-any.whl", hash = "sha256:2d7e8348291948af66122cff006c9f8da6255d224e7cf8e37d8de2df3bad8c9c", size = 11743 }, + { url = "https://files.pythonhosted.org/packages/6d/c1/e419ef3723a074172b68aaa89c9f3de486ed4c2399e2dbd8113a4fdcaf9e/colorlog-6.10.1-py3-none-any.whl", hash = "sha256:2d7e8348291948af66122cff006c9f8da6255d224e7cf8e37d8de2df3bad8c9c", size = 11743, upload_time = "2025-10-16T16:14:10.512Z" }, ] [[package]] @@ -519,9 +529,9 @@ dependencies = [ { name = "pydantic" }, { name = "srsly" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/51/d3/57c6631159a1b48d273b40865c315cf51f89df7a9d1101094ef12e3a37c2/confection-0.1.5.tar.gz", hash = "sha256:8e72dd3ca6bd4f48913cd220f10b8275978e740411654b6e8ca6d7008c590f0e", size = 38924 } +sdist = { url = "https://files.pythonhosted.org/packages/51/d3/57c6631159a1b48d273b40865c315cf51f89df7a9d1101094ef12e3a37c2/confection-0.1.5.tar.gz", hash = "sha256:8e72dd3ca6bd4f48913cd220f10b8275978e740411654b6e8ca6d7008c590f0e", size = 38924, upload_time = "2024-05-31T16:17:01.559Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451 }, + { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451, upload_time = "2024-05-31T16:16:59.075Z" }, ] [[package]] @@ -555,9 +565,9 @@ dependencies = [ { name = "uncertainty-calibration" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9e/57/8f0ffd8973d48f0430a9bfa9ed22f6f1f11fed6c547df7d03b468f8c9833/crfm_helm-0.5.12.tar.gz", hash = "sha256:7ac444123f8f1f25d71e3727bbd119a5dd53653236b1442a8cad9fc6bb5d6418", size = 8263909 } +sdist = { url = "https://files.pythonhosted.org/packages/9e/57/8f0ffd8973d48f0430a9bfa9ed22f6f1f11fed6c547df7d03b468f8c9833/crfm_helm-0.5.12.tar.gz", hash = "sha256:7ac444123f8f1f25d71e3727bbd119a5dd53653236b1442a8cad9fc6bb5d6418", size = 8263909, upload_time = "2026-01-28T00:37:29.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/50/7cf798d9f0f6c18456be3bf281fee90e8932485234e8b1bf475e2fcee307/crfm_helm-0.5.12-py3-none-any.whl", hash = "sha256:01e83d17f7140cccf65443f0117a1375ccd3cec04c5d939587f9e16fcd77cba1", size = 8843831 }, + { url = "https://files.pythonhosted.org/packages/fb/50/7cf798d9f0f6c18456be3bf281fee90e8932485234e8b1bf475e2fcee307/crfm_helm-0.5.12-py3-none-any.whl", hash = "sha256:01e83d17f7140cccf65443f0117a1375ccd3cec04c5d939587f9e16fcd77cba1", size = 8843831, upload_time = "2026-01-28T00:37:26.771Z" }, ] [[package]] @@ -568,16 +578,11 @@ dependencies = [ { name = "cuda-pathfinder" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260 }, - { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019 }, - { url = "https://files.pythonhosted.org/packages/05/8b/b4b2d1c7775fa403b64333e720cfcfccef8dcb9cdeb99947061ca5a77628/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf8bfaedc238f3b115d957d1fd6562b7e8435ba57f6d0e2f87d0e7149ccb2da5", size = 11570071 }, - { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628 }, - { url = "https://files.pythonhosted.org/packages/ec/07/6aff13bc1e977e35aaa6b22f52b172e2890c608c6db22438cf7ed2bf43a6/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3adf4958dcf68ae7801a59b73fb00a8b37f8d0595060d66ceae111b1002de38d", size = 11566797 }, - { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991 }, - { url = "https://files.pythonhosted.org/packages/1e/b5/96a6696e20c4ffd2b327f54c7d0fde2259bdb998d045c25d5dedbbe30290/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f53a7f453d4b2643d8663d036bafe29b5ba89eb904c133180f295df6dc151e5", size = 11624530 }, - { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703 }, - { url = "https://files.pythonhosted.org/packages/39/73/d2fc40c043bac699c3880bf88d3cebe9d88410cd043795382826c93a89f0/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20f2699d61d724de3eb3f3369d57e2b245f93085cab44fd37c3bea036cea1a6f", size = 11565056 }, - { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658 }, + { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload_time = "2025-10-21T14:51:43.167Z" }, + { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628, upload_time = "2025-10-21T14:51:49.905Z" }, + { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991, upload_time = "2025-10-21T14:51:56.535Z" }, + { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703, upload_time = "2025-10-21T14:52:03.585Z" }, + { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658, upload_time = "2025-10-21T14:52:10.411Z" }, ] [[package]] @@ -585,64 +590,64 @@ name = "cuda-pathfinder" version = "1.3.3" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/02/4dbe7568a42e46582248942f54dc64ad094769532adbe21e525e4edf7bc4/cuda_pathfinder-1.3.3-py3-none-any.whl", hash = "sha256:9984b664e404f7c134954a771be8775dfd6180ea1e1aef4a5a37d4be05d9bbb1", size = 27154 }, + { url = "https://files.pythonhosted.org/packages/0b/02/4dbe7568a42e46582248942f54dc64ad094769532adbe21e525e4edf7bc4/cuda_pathfinder-1.3.3-py3-none-any.whl", hash = "sha256:9984b664e404f7c134954a771be8775dfd6180ea1e1aef4a5a37d4be05d9bbb1", size = 27154, upload_time = "2025-12-04T22:35:08.996Z" }, ] [[package]] name = "cymem" version = "2.0.13" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/2f0fbb32535c3731b7c2974c569fb9325e0a38ed5565a08e1139a3b71e82/cymem-2.0.13.tar.gz", hash = "sha256:1c91a92ae8c7104275ac26bd4d29b08ccd3e7faff5893d3858cb6fadf1bc1588", size = 12320 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/52/478a2911ab5028cb710b4900d64aceba6f4f882fcb13fd8d40a456a1b6dc/cymem-2.0.13-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8afbc5162a0fe14b6463e1c4e45248a1b2fe2cbcecc8a5b9e511117080da0eb", size = 43745 }, - { url = "https://files.pythonhosted.org/packages/f9/71/f0f8adee945524774b16af326bd314a14a478ed369a728a22834e6785a18/cymem-2.0.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9251d889348fe79a75e9b3e4d1b5fa651fca8a64500820685d73a3acc21b6a8", size = 42927 }, - { url = "https://files.pythonhosted.org/packages/62/6d/159780fe162ff715d62b809246e5fc20901cef87ca28b67d255a8d741861/cymem-2.0.13-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:742fc19764467a49ed22e56a4d2134c262d73a6c635409584ae3bf9afa092c33", size = 258346 }, - { url = "https://files.pythonhosted.org/packages/eb/12/678d16f7aa1996f947bf17b8cfb917ea9c9674ef5e2bd3690c04123d5680/cymem-2.0.13-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f190a92fe46197ee64d32560eb121c2809bb843341733227f51538ce77b3410d", size = 260843 }, - { url = "https://files.pythonhosted.org/packages/31/5d/0dd8c167c08cd85e70d274b7235cfe1e31b3cebc99221178eaf4bbb95c6f/cymem-2.0.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d670329ee8dbbbf241b7c08069fe3f1d3a1a3e2d69c7d05ea008a7010d826298", size = 254607 }, - { url = "https://files.pythonhosted.org/packages/b7/c9/d6514a412a1160aa65db539836b3d47f9b59f6675f294ec34ae32f867c82/cymem-2.0.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a84ba3178d9128b9ffb52ce81ebab456e9fe959125b51109f5b73ebdfc6b60d6", size = 262421 }, - { url = "https://files.pythonhosted.org/packages/dd/fe/3ee37d02ca4040f2fb22d34eb415198f955862b5dd47eee01df4c8f5454c/cymem-2.0.13-cp312-cp312-win_amd64.whl", hash = "sha256:2ff1c41fd59b789579fdace78aa587c5fc091991fa59458c382b116fc36e30dc", size = 40176 }, - { url = "https://files.pythonhosted.org/packages/94/fb/1b681635bfd5f2274d0caa8f934b58435db6c091b97f5593738065ddb786/cymem-2.0.13-cp312-cp312-win_arm64.whl", hash = "sha256:6bbd701338df7bf408648191dff52472a9b334f71bcd31a21a41d83821050f67", size = 35959 }, - { url = "https://files.pythonhosted.org/packages/ce/0f/95a4d1e3bebfdfa7829252369357cf9a764f67569328cd9221f21e2c952e/cymem-2.0.13-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:891fd9030293a8b652dc7fb9fdc79a910a6c76fc679cd775e6741b819ffea476", size = 43478 }, - { url = "https://files.pythonhosted.org/packages/bf/a0/8fc929cc29ae466b7b4efc23ece99cbd3ea34992ccff319089c624d667fd/cymem-2.0.13-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89c4889bd16513ce1644ccfe1e7c473ba7ca150f0621e66feac3a571bde09e7e", size = 42695 }, - { url = "https://files.pythonhosted.org/packages/4a/b3/deeb01354ebaf384438083ffe0310209ef903db3e7ba5a8f584b06d28387/cymem-2.0.13-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:45dcaba0f48bef9cc3d8b0b92058640244a95a9f12542210b51318da97c2cf28", size = 250573 }, - { url = "https://files.pythonhosted.org/packages/36/36/bc980b9a14409f3356309c45a8d88d58797d02002a9d794dd6c84e809d3a/cymem-2.0.13-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e96848faaafccc0abd631f1c5fb194eac0caee4f5a8777fdbb3e349d3a21741c", size = 254572 }, - { url = "https://files.pythonhosted.org/packages/fd/dd/a12522952624685bd0f8968e26d2ed6d059c967413ce6eb52292f538f1b0/cymem-2.0.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e02d3e2c3bfeb21185d5a4a70790d9df40629a87d8d7617dc22b4e864f665fa3", size = 248060 }, - { url = "https://files.pythonhosted.org/packages/08/11/5dc933ddfeb2dfea747a0b935cb965b9a7580b324d96fc5f5a1b5ff8df29/cymem-2.0.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fece5229fd5ecdcd7a0738affb8c59890e13073ae5626544e13825f26c019d3c", size = 254601 }, - { url = "https://files.pythonhosted.org/packages/70/66/d23b06166864fa94e13a98e5922986ce774832936473578febce64448d75/cymem-2.0.13-cp313-cp313-win_amd64.whl", hash = "sha256:38aefeb269597c1a0c2ddf1567dd8605489b661fa0369c6406c1acd433b4c7ba", size = 40103 }, - { url = "https://files.pythonhosted.org/packages/2f/9e/c7b21271ab88a21760f3afdec84d2bc09ffa9e6c8d774ad9d4f1afab0416/cymem-2.0.13-cp313-cp313-win_arm64.whl", hash = "sha256:717270dcfd8c8096b479c42708b151002ff98e434a7b6f1f916387a6c791e2ad", size = 36016 }, - { url = "https://files.pythonhosted.org/packages/7f/28/d3b03427edc04ae04910edf1c24b993881c3ba93a9729a42bcbb816a1808/cymem-2.0.13-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7e1a863a7f144ffb345397813701509cfc74fc9ed360a4d92799805b4b865dd1", size = 46429 }, - { url = "https://files.pythonhosted.org/packages/35/a9/7ed53e481f47ebfb922b0b42e980cec83e98ccb2137dc597ea156642440c/cymem-2.0.13-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c16cb80efc017b054f78998c6b4b013cef509c7b3d802707ce1f85a1d68361bf", size = 46205 }, - { url = "https://files.pythonhosted.org/packages/61/39/a3d6ad073cf7f0fbbb8bbf09698c3c8fac11be3f791d710239a4e8dd3438/cymem-2.0.13-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0d78a27c88b26c89bd1ece247d1d5939dba05a1dae6305aad8fd8056b17ddb51", size = 296083 }, - { url = "https://files.pythonhosted.org/packages/36/0c/20697c8bc19f624a595833e566f37d7bcb9167b0ce69de896eba7cfc9c2d/cymem-2.0.13-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6d36710760f817194dacb09d9fc45cb6a5062ed75e85f0ef7ad7aeeb13d80cc3", size = 286159 }, - { url = "https://files.pythonhosted.org/packages/82/d4/9326e3422d1c2d2b4a8fb859bdcce80138f6ab721ddafa4cba328a505c71/cymem-2.0.13-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c8f30971cadd5dcf73bcfbbc5849b1f1e1f40db8cd846c4aa7d3b5e035c7b583", size = 288186 }, - { url = "https://files.pythonhosted.org/packages/ed/bc/68da7dd749b72884dc22e898562f335002d70306069d496376e5ff3b6153/cymem-2.0.13-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9d441d0e45798ec1fd330373bf7ffa6b795f229275f64016b6a193e6e2a51522", size = 290353 }, - { url = "https://files.pythonhosted.org/packages/50/23/dbf2ad6ecd19b99b3aab6203b1a06608bbd04a09c522d836b854f2f30f73/cymem-2.0.13-cp313-cp313t-win_amd64.whl", hash = "sha256:d1c950eebb9f0f15e3ef3591313482a5a611d16fc12d545e2018cd607f40f472", size = 44764 }, - { url = "https://files.pythonhosted.org/packages/54/3f/35701c13e1fc7b0895198c8b20068c569a841e0daf8e0b14d1dc0816b28f/cymem-2.0.13-cp313-cp313t-win_arm64.whl", hash = "sha256:042e8611ef862c34a97b13241f5d0da86d58aca3cecc45c533496678e75c5a1f", size = 38964 }, - { url = "https://files.pythonhosted.org/packages/a7/2e/f0e1596010a9a57fa9ebd124a678c07c5b2092283781ae51e79edcf5cb98/cymem-2.0.13-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d2a4bf67db76c7b6afc33de44fb1c318207c3224a30da02c70901936b5aafdf1", size = 43812 }, - { url = "https://files.pythonhosted.org/packages/bc/45/8ccc21df08fcbfa6aa3efeb7efc11a1c81c90e7476e255768bb9c29ba02a/cymem-2.0.13-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:92a2ce50afa5625fb5ce7c9302cee61e23a57ccac52cd0410b4858e572f8614b", size = 42951 }, - { url = "https://files.pythonhosted.org/packages/01/8c/fe16531631f051d3d1226fa42e2d76fd2c8d5cfa893ec93baee90c7a9d90/cymem-2.0.13-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bc116a70cc3a5dc3d1684db5268eff9399a0be8603980005e5b889564f1ea42f", size = 249878 }, - { url = "https://files.pythonhosted.org/packages/47/4b/39d67b80ffb260457c05fcc545de37d82e9e2dbafc93dd6b64f17e09b933/cymem-2.0.13-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68489bf0035c4c280614067ab6a82815b01dc9fcd486742a5306fe9f68deb7ef", size = 252571 }, - { url = "https://files.pythonhosted.org/packages/53/0e/76f6531f74dfdfe7107899cce93ab063bb7ee086ccd3910522b31f623c08/cymem-2.0.13-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:03cb7bdb55718d5eb6ef0340b1d2430ba1386db30d33e9134d01ba9d6d34d705", size = 248555 }, - { url = "https://files.pythonhosted.org/packages/c7/7c/eee56757db81f0aefc2615267677ae145aff74228f529838425057003c0d/cymem-2.0.13-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1710390e7fb2510a8091a1991024d8ae838fd06b02cdfdcd35f006192e3c6b0e", size = 254177 }, - { url = "https://files.pythonhosted.org/packages/77/e0/a4b58ec9e53c836dce07ef39837a64a599f4a21a134fc7ca57a3a8f9a4b5/cymem-2.0.13-cp314-cp314-win_amd64.whl", hash = "sha256:ac699c8ec72a3a9de8109bd78821ab22f60b14cf2abccd970b5ff310e14158ed", size = 40853 }, - { url = "https://files.pythonhosted.org/packages/61/81/9931d1f83e5aeba175440af0b28f0c2e6f71274a5a7b688bc3e907669388/cymem-2.0.13-cp314-cp314-win_arm64.whl", hash = "sha256:90c2d0c04bcda12cd5cebe9be93ce3af6742ad8da96e1b1907e3f8e00291def1", size = 36970 }, - { url = "https://files.pythonhosted.org/packages/b7/ef/af447c2184dec6dec973be14614df8ccb4d16d1c74e0784ab4f02538433c/cymem-2.0.13-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:ff036bbc1464993552fd1251b0a83fe102af334b301e3896d7aa05a4999ad042", size = 46804 }, - { url = "https://files.pythonhosted.org/packages/8c/95/e10f33a8d4fc17f9b933d451038218437f9326c2abb15a3e7f58ce2a06ec/cymem-2.0.13-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fb8291691ba7ff4e6e000224cc97a744a8d9588418535c9454fd8436911df612", size = 46254 }, - { url = "https://files.pythonhosted.org/packages/e7/7a/5efeb2d2ea6ebad2745301ad33a4fa9a8f9a33b66623ee4d9185683007a6/cymem-2.0.13-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d8d06ea59006b1251ad5794bcc00121e148434826090ead0073c7b7fedebe431", size = 296061 }, - { url = "https://files.pythonhosted.org/packages/0b/28/2a3f65842cc8443c2c0650cf23d525be06c8761ab212e0a095a88627be1b/cymem-2.0.13-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c0046a619ecc845ccb4528b37b63426a0cbcb4f14d7940add3391f59f13701e6", size = 285784 }, - { url = "https://files.pythonhosted.org/packages/98/73/dd5f9729398f0108c2e71d942253d0d484d299d08b02e474d7cfc43ed0b0/cymem-2.0.13-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:18ad5b116a82fa3674bc8838bd3792891b428971e2123ae8c0fd3ca472157c5e", size = 288062 }, - { url = "https://files.pythonhosted.org/packages/5a/01/ffe51729a8f961a437920560659073e47f575d4627445216c1177ecd4a41/cymem-2.0.13-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:666ce6146bc61b9318aa70d91ce33f126b6344a25cf0b925621baed0c161e9cc", size = 290465 }, - { url = "https://files.pythonhosted.org/packages/fd/ac/c9e7d68607f71ef978c81e334ab2898b426944c71950212b1467186f69f9/cymem-2.0.13-cp314-cp314t-win_amd64.whl", hash = "sha256:84c1168c563d9d1e04546cb65e3e54fde2bf814f7c7faf11fc06436598e386d1", size = 46665 }, - { url = "https://files.pythonhosted.org/packages/66/66/150e406a2db5535533aa3c946de58f0371f2e412e23f050c704588023e6e/cymem-2.0.13-cp314-cp314t-win_arm64.whl", hash = "sha256:e9027764dc5f1999fb4b4cabee1d0322c59e330c0a6485b436a68275f614277f", size = 39715 }, +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/2f0fbb32535c3731b7c2974c569fb9325e0a38ed5565a08e1139a3b71e82/cymem-2.0.13.tar.gz", hash = "sha256:1c91a92ae8c7104275ac26bd4d29b08ccd3e7faff5893d3858cb6fadf1bc1588", size = 12320, upload_time = "2025-11-14T14:58:36.902Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/52/478a2911ab5028cb710b4900d64aceba6f4f882fcb13fd8d40a456a1b6dc/cymem-2.0.13-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8afbc5162a0fe14b6463e1c4e45248a1b2fe2cbcecc8a5b9e511117080da0eb", size = 43745, upload_time = "2025-11-14T14:57:32.52Z" }, + { url = "https://files.pythonhosted.org/packages/f9/71/f0f8adee945524774b16af326bd314a14a478ed369a728a22834e6785a18/cymem-2.0.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9251d889348fe79a75e9b3e4d1b5fa651fca8a64500820685d73a3acc21b6a8", size = 42927, upload_time = "2025-11-14T14:57:33.827Z" }, + { url = "https://files.pythonhosted.org/packages/62/6d/159780fe162ff715d62b809246e5fc20901cef87ca28b67d255a8d741861/cymem-2.0.13-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:742fc19764467a49ed22e56a4d2134c262d73a6c635409584ae3bf9afa092c33", size = 258346, upload_time = "2025-11-14T14:57:34.917Z" }, + { url = "https://files.pythonhosted.org/packages/eb/12/678d16f7aa1996f947bf17b8cfb917ea9c9674ef5e2bd3690c04123d5680/cymem-2.0.13-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f190a92fe46197ee64d32560eb121c2809bb843341733227f51538ce77b3410d", size = 260843, upload_time = "2025-11-14T14:57:36.503Z" }, + { url = "https://files.pythonhosted.org/packages/31/5d/0dd8c167c08cd85e70d274b7235cfe1e31b3cebc99221178eaf4bbb95c6f/cymem-2.0.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d670329ee8dbbbf241b7c08069fe3f1d3a1a3e2d69c7d05ea008a7010d826298", size = 254607, upload_time = "2025-11-14T14:57:38.036Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c9/d6514a412a1160aa65db539836b3d47f9b59f6675f294ec34ae32f867c82/cymem-2.0.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a84ba3178d9128b9ffb52ce81ebab456e9fe959125b51109f5b73ebdfc6b60d6", size = 262421, upload_time = "2025-11-14T14:57:39.265Z" }, + { url = "https://files.pythonhosted.org/packages/dd/fe/3ee37d02ca4040f2fb22d34eb415198f955862b5dd47eee01df4c8f5454c/cymem-2.0.13-cp312-cp312-win_amd64.whl", hash = "sha256:2ff1c41fd59b789579fdace78aa587c5fc091991fa59458c382b116fc36e30dc", size = 40176, upload_time = "2025-11-14T14:57:40.706Z" }, + { url = "https://files.pythonhosted.org/packages/94/fb/1b681635bfd5f2274d0caa8f934b58435db6c091b97f5593738065ddb786/cymem-2.0.13-cp312-cp312-win_arm64.whl", hash = "sha256:6bbd701338df7bf408648191dff52472a9b334f71bcd31a21a41d83821050f67", size = 35959, upload_time = "2025-11-14T14:57:41.682Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0f/95a4d1e3bebfdfa7829252369357cf9a764f67569328cd9221f21e2c952e/cymem-2.0.13-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:891fd9030293a8b652dc7fb9fdc79a910a6c76fc679cd775e6741b819ffea476", size = 43478, upload_time = "2025-11-14T14:57:42.682Z" }, + { url = "https://files.pythonhosted.org/packages/bf/a0/8fc929cc29ae466b7b4efc23ece99cbd3ea34992ccff319089c624d667fd/cymem-2.0.13-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89c4889bd16513ce1644ccfe1e7c473ba7ca150f0621e66feac3a571bde09e7e", size = 42695, upload_time = "2025-11-14T14:57:43.741Z" }, + { url = "https://files.pythonhosted.org/packages/4a/b3/deeb01354ebaf384438083ffe0310209ef903db3e7ba5a8f584b06d28387/cymem-2.0.13-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:45dcaba0f48bef9cc3d8b0b92058640244a95a9f12542210b51318da97c2cf28", size = 250573, upload_time = "2025-11-14T14:57:44.81Z" }, + { url = "https://files.pythonhosted.org/packages/36/36/bc980b9a14409f3356309c45a8d88d58797d02002a9d794dd6c84e809d3a/cymem-2.0.13-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e96848faaafccc0abd631f1c5fb194eac0caee4f5a8777fdbb3e349d3a21741c", size = 254572, upload_time = "2025-11-14T14:57:46.023Z" }, + { url = "https://files.pythonhosted.org/packages/fd/dd/a12522952624685bd0f8968e26d2ed6d059c967413ce6eb52292f538f1b0/cymem-2.0.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e02d3e2c3bfeb21185d5a4a70790d9df40629a87d8d7617dc22b4e864f665fa3", size = 248060, upload_time = "2025-11-14T14:57:47.605Z" }, + { url = "https://files.pythonhosted.org/packages/08/11/5dc933ddfeb2dfea747a0b935cb965b9a7580b324d96fc5f5a1b5ff8df29/cymem-2.0.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fece5229fd5ecdcd7a0738affb8c59890e13073ae5626544e13825f26c019d3c", size = 254601, upload_time = "2025-11-14T14:57:48.861Z" }, + { url = "https://files.pythonhosted.org/packages/70/66/d23b06166864fa94e13a98e5922986ce774832936473578febce64448d75/cymem-2.0.13-cp313-cp313-win_amd64.whl", hash = "sha256:38aefeb269597c1a0c2ddf1567dd8605489b661fa0369c6406c1acd433b4c7ba", size = 40103, upload_time = "2025-11-14T14:57:50.396Z" }, + { url = "https://files.pythonhosted.org/packages/2f/9e/c7b21271ab88a21760f3afdec84d2bc09ffa9e6c8d774ad9d4f1afab0416/cymem-2.0.13-cp313-cp313-win_arm64.whl", hash = "sha256:717270dcfd8c8096b479c42708b151002ff98e434a7b6f1f916387a6c791e2ad", size = 36016, upload_time = "2025-11-14T14:57:51.611Z" }, + { url = "https://files.pythonhosted.org/packages/7f/28/d3b03427edc04ae04910edf1c24b993881c3ba93a9729a42bcbb816a1808/cymem-2.0.13-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7e1a863a7f144ffb345397813701509cfc74fc9ed360a4d92799805b4b865dd1", size = 46429, upload_time = "2025-11-14T14:57:52.582Z" }, + { url = "https://files.pythonhosted.org/packages/35/a9/7ed53e481f47ebfb922b0b42e980cec83e98ccb2137dc597ea156642440c/cymem-2.0.13-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c16cb80efc017b054f78998c6b4b013cef509c7b3d802707ce1f85a1d68361bf", size = 46205, upload_time = "2025-11-14T14:57:53.64Z" }, + { url = "https://files.pythonhosted.org/packages/61/39/a3d6ad073cf7f0fbbb8bbf09698c3c8fac11be3f791d710239a4e8dd3438/cymem-2.0.13-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0d78a27c88b26c89bd1ece247d1d5939dba05a1dae6305aad8fd8056b17ddb51", size = 296083, upload_time = "2025-11-14T14:57:55.922Z" }, + { url = "https://files.pythonhosted.org/packages/36/0c/20697c8bc19f624a595833e566f37d7bcb9167b0ce69de896eba7cfc9c2d/cymem-2.0.13-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6d36710760f817194dacb09d9fc45cb6a5062ed75e85f0ef7ad7aeeb13d80cc3", size = 286159, upload_time = "2025-11-14T14:57:57.106Z" }, + { url = "https://files.pythonhosted.org/packages/82/d4/9326e3422d1c2d2b4a8fb859bdcce80138f6ab721ddafa4cba328a505c71/cymem-2.0.13-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c8f30971cadd5dcf73bcfbbc5849b1f1e1f40db8cd846c4aa7d3b5e035c7b583", size = 288186, upload_time = "2025-11-14T14:57:58.334Z" }, + { url = "https://files.pythonhosted.org/packages/ed/bc/68da7dd749b72884dc22e898562f335002d70306069d496376e5ff3b6153/cymem-2.0.13-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9d441d0e45798ec1fd330373bf7ffa6b795f229275f64016b6a193e6e2a51522", size = 290353, upload_time = "2025-11-14T14:58:00.562Z" }, + { url = "https://files.pythonhosted.org/packages/50/23/dbf2ad6ecd19b99b3aab6203b1a06608bbd04a09c522d836b854f2f30f73/cymem-2.0.13-cp313-cp313t-win_amd64.whl", hash = "sha256:d1c950eebb9f0f15e3ef3591313482a5a611d16fc12d545e2018cd607f40f472", size = 44764, upload_time = "2025-11-14T14:58:01.793Z" }, + { url = "https://files.pythonhosted.org/packages/54/3f/35701c13e1fc7b0895198c8b20068c569a841e0daf8e0b14d1dc0816b28f/cymem-2.0.13-cp313-cp313t-win_arm64.whl", hash = "sha256:042e8611ef862c34a97b13241f5d0da86d58aca3cecc45c533496678e75c5a1f", size = 38964, upload_time = "2025-11-14T14:58:02.87Z" }, + { url = "https://files.pythonhosted.org/packages/a7/2e/f0e1596010a9a57fa9ebd124a678c07c5b2092283781ae51e79edcf5cb98/cymem-2.0.13-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d2a4bf67db76c7b6afc33de44fb1c318207c3224a30da02c70901936b5aafdf1", size = 43812, upload_time = "2025-11-14T14:58:04.227Z" }, + { url = "https://files.pythonhosted.org/packages/bc/45/8ccc21df08fcbfa6aa3efeb7efc11a1c81c90e7476e255768bb9c29ba02a/cymem-2.0.13-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:92a2ce50afa5625fb5ce7c9302cee61e23a57ccac52cd0410b4858e572f8614b", size = 42951, upload_time = "2025-11-14T14:58:05.424Z" }, + { url = "https://files.pythonhosted.org/packages/01/8c/fe16531631f051d3d1226fa42e2d76fd2c8d5cfa893ec93baee90c7a9d90/cymem-2.0.13-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bc116a70cc3a5dc3d1684db5268eff9399a0be8603980005e5b889564f1ea42f", size = 249878, upload_time = "2025-11-14T14:58:06.95Z" }, + { url = "https://files.pythonhosted.org/packages/47/4b/39d67b80ffb260457c05fcc545de37d82e9e2dbafc93dd6b64f17e09b933/cymem-2.0.13-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68489bf0035c4c280614067ab6a82815b01dc9fcd486742a5306fe9f68deb7ef", size = 252571, upload_time = "2025-11-14T14:58:08.232Z" }, + { url = "https://files.pythonhosted.org/packages/53/0e/76f6531f74dfdfe7107899cce93ab063bb7ee086ccd3910522b31f623c08/cymem-2.0.13-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:03cb7bdb55718d5eb6ef0340b1d2430ba1386db30d33e9134d01ba9d6d34d705", size = 248555, upload_time = "2025-11-14T14:58:09.429Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7c/eee56757db81f0aefc2615267677ae145aff74228f529838425057003c0d/cymem-2.0.13-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1710390e7fb2510a8091a1991024d8ae838fd06b02cdfdcd35f006192e3c6b0e", size = 254177, upload_time = "2025-11-14T14:58:10.594Z" }, + { url = "https://files.pythonhosted.org/packages/77/e0/a4b58ec9e53c836dce07ef39837a64a599f4a21a134fc7ca57a3a8f9a4b5/cymem-2.0.13-cp314-cp314-win_amd64.whl", hash = "sha256:ac699c8ec72a3a9de8109bd78821ab22f60b14cf2abccd970b5ff310e14158ed", size = 40853, upload_time = "2025-11-14T14:58:12.116Z" }, + { url = "https://files.pythonhosted.org/packages/61/81/9931d1f83e5aeba175440af0b28f0c2e6f71274a5a7b688bc3e907669388/cymem-2.0.13-cp314-cp314-win_arm64.whl", hash = "sha256:90c2d0c04bcda12cd5cebe9be93ce3af6742ad8da96e1b1907e3f8e00291def1", size = 36970, upload_time = "2025-11-14T14:58:13.114Z" }, + { url = "https://files.pythonhosted.org/packages/b7/ef/af447c2184dec6dec973be14614df8ccb4d16d1c74e0784ab4f02538433c/cymem-2.0.13-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:ff036bbc1464993552fd1251b0a83fe102af334b301e3896d7aa05a4999ad042", size = 46804, upload_time = "2025-11-14T14:58:14.113Z" }, + { url = "https://files.pythonhosted.org/packages/8c/95/e10f33a8d4fc17f9b933d451038218437f9326c2abb15a3e7f58ce2a06ec/cymem-2.0.13-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fb8291691ba7ff4e6e000224cc97a744a8d9588418535c9454fd8436911df612", size = 46254, upload_time = "2025-11-14T14:58:15.156Z" }, + { url = "https://files.pythonhosted.org/packages/e7/7a/5efeb2d2ea6ebad2745301ad33a4fa9a8f9a33b66623ee4d9185683007a6/cymem-2.0.13-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d8d06ea59006b1251ad5794bcc00121e148434826090ead0073c7b7fedebe431", size = 296061, upload_time = "2025-11-14T14:58:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/0b/28/2a3f65842cc8443c2c0650cf23d525be06c8761ab212e0a095a88627be1b/cymem-2.0.13-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c0046a619ecc845ccb4528b37b63426a0cbcb4f14d7940add3391f59f13701e6", size = 285784, upload_time = "2025-11-14T14:58:17.412Z" }, + { url = "https://files.pythonhosted.org/packages/98/73/dd5f9729398f0108c2e71d942253d0d484d299d08b02e474d7cfc43ed0b0/cymem-2.0.13-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:18ad5b116a82fa3674bc8838bd3792891b428971e2123ae8c0fd3ca472157c5e", size = 288062, upload_time = "2025-11-14T14:58:20.225Z" }, + { url = "https://files.pythonhosted.org/packages/5a/01/ffe51729a8f961a437920560659073e47f575d4627445216c1177ecd4a41/cymem-2.0.13-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:666ce6146bc61b9318aa70d91ce33f126b6344a25cf0b925621baed0c161e9cc", size = 290465, upload_time = "2025-11-14T14:58:21.815Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ac/c9e7d68607f71ef978c81e334ab2898b426944c71950212b1467186f69f9/cymem-2.0.13-cp314-cp314t-win_amd64.whl", hash = "sha256:84c1168c563d9d1e04546cb65e3e54fde2bf814f7c7faf11fc06436598e386d1", size = 46665, upload_time = "2025-11-14T14:58:23.512Z" }, + { url = "https://files.pythonhosted.org/packages/66/66/150e406a2db5535533aa3c946de58f0371f2e412e23f050c704588023e6e/cymem-2.0.13-cp314-cp314t-win_arm64.whl", hash = "sha256:e9027764dc5f1999fb4b4cabee1d0322c59e330c0a6485b436a68275f614277f", size = 39715, upload_time = "2025-11-14T14:58:24.773Z" }, ] [[package]] name = "dacite" version = "1.9.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/55/a0/7ca79796e799a3e782045d29bf052b5cde7439a2bbb17f15ff44f7aacc63/dacite-1.9.2.tar.gz", hash = "sha256:6ccc3b299727c7aa17582f0021f6ae14d5de47c7227932c47fec4cdfefd26f09", size = 22420 } +sdist = { url = "https://files.pythonhosted.org/packages/55/a0/7ca79796e799a3e782045d29bf052b5cde7439a2bbb17f15ff44f7aacc63/dacite-1.9.2.tar.gz", hash = "sha256:6ccc3b299727c7aa17582f0021f6ae14d5de47c7227932c47fec4cdfefd26f09", size = 22420, upload_time = "2025-02-05T09:27:29.757Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/94/35/386550fd60316d1e37eccdda609b074113298f23cef5bddb2049823fe666/dacite-1.9.2-py3-none-any.whl", hash = "sha256:053f7c3f5128ca2e9aceb66892b1a3c8936d02c686e707bee96e19deef4bc4a0", size = 16600 }, + { url = "https://files.pythonhosted.org/packages/94/35/386550fd60316d1e37eccdda609b074113298f23cef5bddb2049823fe666/dacite-1.9.2-py3-none-any.whl", hash = "sha256:053f7c3f5128ca2e9aceb66892b1a3c8936d02c686e707bee96e19deef4bc4a0", size = 16600, upload_time = "2025-02-05T09:27:24.345Z" }, ] [[package]] @@ -660,9 +665,9 @@ dependencies = [ { name = "pydantic" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/8f/7594b293c5da78c7b7685facfc07b96d03af9f833f5f5742a028b6a1d564/datamodel_code_generator-0.52.2.tar.gz", hash = "sha256:54b25789980deafed50c455c25a5d391c177a9f74f0e14b0edeb6db69f1185a9", size = 789610 } +sdist = { url = "https://files.pythonhosted.org/packages/72/8f/7594b293c5da78c7b7685facfc07b96d03af9f833f5f5742a028b6a1d564/datamodel_code_generator-0.52.2.tar.gz", hash = "sha256:54b25789980deafed50c455c25a5d391c177a9f74f0e14b0edeb6db69f1185a9", size = 789610, upload_time = "2026-01-05T17:25:50.211Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/8d/72a75327e05a26d5fb7fa511a7d78af18198dc2595bbb179da0c9e4b6fbc/datamodel_code_generator-0.52.2-py3-none-any.whl", hash = "sha256:3ee19ec38190b76bb4119aa67ddce25b3447dd4e5c6bfd47b03a9937c50f5aac", size = 249759 }, + { url = "https://files.pythonhosted.org/packages/9e/8d/72a75327e05a26d5fb7fa511a7d78af18198dc2595bbb179da0c9e4b6fbc/datamodel_code_generator-0.52.2-py3-none-any.whl", hash = "sha256:3ee19ec38190b76bb4119aa67ddce25b3447dd4e5c6bfd47b03a9937c50f5aac", size = 249759, upload_time = "2026-01-05T17:25:48.845Z" }, ] [[package]] @@ -684,48 +689,57 @@ dependencies = [ { name = "tqdm" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1a/89/d3d6fef58a488f8569c82fd293ab7cbd4250244d67f425dcae64c63800ea/datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041", size = 569336 } +sdist = { url = "https://files.pythonhosted.org/packages/1a/89/d3d6fef58a488f8569c82fd293ab7cbd4250244d67f425dcae64c63800ea/datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041", size = 569336, upload_time = "2025-05-07T15:15:02.659Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546 }, + { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546, upload_time = "2025-05-07T15:14:59.742Z" }, ] [[package]] name = "debugpy" version = "1.8.19" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/73/75/9e12d4d42349b817cd545b89247696c67917aab907012ae5b64bbfea3199/debugpy-1.8.19.tar.gz", hash = "sha256:eea7e5987445ab0b5ed258093722d5ecb8bb72217c5c9b1e21f64efe23ddebdb", size = 1644590 } +sdist = { url = "https://files.pythonhosted.org/packages/73/75/9e12d4d42349b817cd545b89247696c67917aab907012ae5b64bbfea3199/debugpy-1.8.19.tar.gz", hash = "sha256:eea7e5987445ab0b5ed258093722d5ecb8bb72217c5c9b1e21f64efe23ddebdb", size = 1644590, upload_time = "2025-12-15T21:53:28.044Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/15/d762e5263d9e25b763b78be72dc084c7a32113a0bac119e2f7acae7700ed/debugpy-1.8.19-cp312-cp312-macosx_15_0_universal2.whl", hash = "sha256:bccb1540a49cde77edc7ce7d9d075c1dbeb2414751bc0048c7a11e1b597a4c2e", size = 2549995 }, - { url = "https://files.pythonhosted.org/packages/a7/88/f7d25c68b18873b7c53d7c156ca7a7ffd8e77073aa0eac170a9b679cf786/debugpy-1.8.19-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:e9c68d9a382ec754dc05ed1d1b4ed5bd824b9f7c1a8cd1083adb84b3c93501de", size = 4309891 }, - { url = "https://files.pythonhosted.org/packages/c5/4f/a65e973aba3865794da65f71971dca01ae66666132c7b2647182d5be0c5f/debugpy-1.8.19-cp312-cp312-win32.whl", hash = "sha256:6599cab8a783d1496ae9984c52cb13b7c4a3bd06a8e6c33446832a5d97ce0bee", size = 5286355 }, - { url = "https://files.pythonhosted.org/packages/d8/3a/d3d8b48fec96e3d824e404bf428276fb8419dfa766f78f10b08da1cb2986/debugpy-1.8.19-cp312-cp312-win_amd64.whl", hash = "sha256:66e3d2fd8f2035a8f111eb127fa508469dfa40928a89b460b41fd988684dc83d", size = 5328239 }, - { url = "https://files.pythonhosted.org/packages/71/3d/388035a31a59c26f1ecc8d86af607d0c42e20ef80074147cd07b180c4349/debugpy-1.8.19-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:91e35db2672a0abaf325f4868fcac9c1674a0d9ad9bb8a8c849c03a5ebba3e6d", size = 2538859 }, - { url = "https://files.pythonhosted.org/packages/4a/19/c93a0772d0962294f083dbdb113af1a7427bb632d36e5314297068f55db7/debugpy-1.8.19-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:85016a73ab84dea1c1f1dcd88ec692993bcbe4532d1b49ecb5f3c688ae50c606", size = 4292575 }, - { url = "https://files.pythonhosted.org/packages/5c/56/09e48ab796b0a77e3d7dc250f95251832b8bf6838c9632f6100c98bdf426/debugpy-1.8.19-cp313-cp313-win32.whl", hash = "sha256:b605f17e89ba0ecee994391194285fada89cee111cfcd29d6f2ee11cbdc40976", size = 5286209 }, - { url = "https://files.pythonhosted.org/packages/fb/4e/931480b9552c7d0feebe40c73725dd7703dcc578ba9efc14fe0e6d31cfd1/debugpy-1.8.19-cp313-cp313-win_amd64.whl", hash = "sha256:c30639998a9f9cd9699b4b621942c0179a6527f083c72351f95c6ab1728d5b73", size = 5328206 }, - { url = "https://files.pythonhosted.org/packages/f6/b9/cbec520c3a00508327476c7fce26fbafef98f412707e511eb9d19a2ef467/debugpy-1.8.19-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:1e8c4d1bd230067bf1bbcdbd6032e5a57068638eb28b9153d008ecde288152af", size = 2537372 }, - { url = "https://files.pythonhosted.org/packages/88/5e/cf4e4dc712a141e10d58405c58c8268554aec3c35c09cdcda7535ff13f76/debugpy-1.8.19-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d40c016c1f538dbf1762936e3aeb43a89b965069d9f60f9e39d35d9d25e6b809", size = 4268729 }, - { url = "https://files.pythonhosted.org/packages/82/a3/c91a087ab21f1047db328c1d3eb5d1ff0e52de9e74f9f6f6fa14cdd93d58/debugpy-1.8.19-cp314-cp314-win32.whl", hash = "sha256:0601708223fe1cd0e27c6cce67a899d92c7d68e73690211e6788a4b0e1903f5b", size = 5286388 }, - { url = "https://files.pythonhosted.org/packages/17/b8/bfdc30b6e94f1eff09f2dc9cc1f9cd1c6cde3d996bcbd36ce2d9a4956e99/debugpy-1.8.19-cp314-cp314-win_amd64.whl", hash = "sha256:8e19a725f5d486f20e53a1dde2ab8bb2c9607c40c00a42ab646def962b41125f", size = 5327741 }, - { url = "https://files.pythonhosted.org/packages/25/3e/e27078370414ef35fafad2c06d182110073daaeb5d3bf734b0b1eeefe452/debugpy-1.8.19-py2.py3-none-any.whl", hash = "sha256:360ffd231a780abbc414ba0f005dad409e71c78637efe8f2bd75837132a41d38", size = 5292321 }, + { url = "https://files.pythonhosted.org/packages/4a/15/d762e5263d9e25b763b78be72dc084c7a32113a0bac119e2f7acae7700ed/debugpy-1.8.19-cp312-cp312-macosx_15_0_universal2.whl", hash = "sha256:bccb1540a49cde77edc7ce7d9d075c1dbeb2414751bc0048c7a11e1b597a4c2e", size = 2549995, upload_time = "2025-12-15T21:53:43.773Z" }, + { url = "https://files.pythonhosted.org/packages/a7/88/f7d25c68b18873b7c53d7c156ca7a7ffd8e77073aa0eac170a9b679cf786/debugpy-1.8.19-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:e9c68d9a382ec754dc05ed1d1b4ed5bd824b9f7c1a8cd1083adb84b3c93501de", size = 4309891, upload_time = "2025-12-15T21:53:45.26Z" }, + { url = "https://files.pythonhosted.org/packages/c5/4f/a65e973aba3865794da65f71971dca01ae66666132c7b2647182d5be0c5f/debugpy-1.8.19-cp312-cp312-win32.whl", hash = "sha256:6599cab8a783d1496ae9984c52cb13b7c4a3bd06a8e6c33446832a5d97ce0bee", size = 5286355, upload_time = "2025-12-15T21:53:46.763Z" }, + { url = "https://files.pythonhosted.org/packages/d8/3a/d3d8b48fec96e3d824e404bf428276fb8419dfa766f78f10b08da1cb2986/debugpy-1.8.19-cp312-cp312-win_amd64.whl", hash = "sha256:66e3d2fd8f2035a8f111eb127fa508469dfa40928a89b460b41fd988684dc83d", size = 5328239, upload_time = "2025-12-15T21:53:48.868Z" }, + { url = "https://files.pythonhosted.org/packages/71/3d/388035a31a59c26f1ecc8d86af607d0c42e20ef80074147cd07b180c4349/debugpy-1.8.19-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:91e35db2672a0abaf325f4868fcac9c1674a0d9ad9bb8a8c849c03a5ebba3e6d", size = 2538859, upload_time = "2025-12-15T21:53:50.478Z" }, + { url = "https://files.pythonhosted.org/packages/4a/19/c93a0772d0962294f083dbdb113af1a7427bb632d36e5314297068f55db7/debugpy-1.8.19-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:85016a73ab84dea1c1f1dcd88ec692993bcbe4532d1b49ecb5f3c688ae50c606", size = 4292575, upload_time = "2025-12-15T21:53:51.821Z" }, + { url = "https://files.pythonhosted.org/packages/5c/56/09e48ab796b0a77e3d7dc250f95251832b8bf6838c9632f6100c98bdf426/debugpy-1.8.19-cp313-cp313-win32.whl", hash = "sha256:b605f17e89ba0ecee994391194285fada89cee111cfcd29d6f2ee11cbdc40976", size = 5286209, upload_time = "2025-12-15T21:53:53.602Z" }, + { url = "https://files.pythonhosted.org/packages/fb/4e/931480b9552c7d0feebe40c73725dd7703dcc578ba9efc14fe0e6d31cfd1/debugpy-1.8.19-cp313-cp313-win_amd64.whl", hash = "sha256:c30639998a9f9cd9699b4b621942c0179a6527f083c72351f95c6ab1728d5b73", size = 5328206, upload_time = "2025-12-15T21:53:55.433Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b9/cbec520c3a00508327476c7fce26fbafef98f412707e511eb9d19a2ef467/debugpy-1.8.19-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:1e8c4d1bd230067bf1bbcdbd6032e5a57068638eb28b9153d008ecde288152af", size = 2537372, upload_time = "2025-12-15T21:53:57.318Z" }, + { url = "https://files.pythonhosted.org/packages/88/5e/cf4e4dc712a141e10d58405c58c8268554aec3c35c09cdcda7535ff13f76/debugpy-1.8.19-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d40c016c1f538dbf1762936e3aeb43a89b965069d9f60f9e39d35d9d25e6b809", size = 4268729, upload_time = "2025-12-15T21:53:58.712Z" }, + { url = "https://files.pythonhosted.org/packages/82/a3/c91a087ab21f1047db328c1d3eb5d1ff0e52de9e74f9f6f6fa14cdd93d58/debugpy-1.8.19-cp314-cp314-win32.whl", hash = "sha256:0601708223fe1cd0e27c6cce67a899d92c7d68e73690211e6788a4b0e1903f5b", size = 5286388, upload_time = "2025-12-15T21:54:00.687Z" }, + { url = "https://files.pythonhosted.org/packages/17/b8/bfdc30b6e94f1eff09f2dc9cc1f9cd1c6cde3d996bcbd36ce2d9a4956e99/debugpy-1.8.19-cp314-cp314-win_amd64.whl", hash = "sha256:8e19a725f5d486f20e53a1dde2ab8bb2c9607c40c00a42ab646def962b41125f", size = 5327741, upload_time = "2025-12-15T21:54:02.148Z" }, + { url = "https://files.pythonhosted.org/packages/25/3e/e27078370414ef35fafad2c06d182110073daaeb5d3bf734b0b1eeefe452/debugpy-1.8.19-py2.py3-none-any.whl", hash = "sha256:360ffd231a780abbc414ba0f005dad409e71c78637efe8f2bd75837132a41d38", size = 5292321, upload_time = "2025-12-15T21:54:16.024Z" }, ] [[package]] name = "dill" version = "0.3.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847 } +sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847, upload_time = "2024-01-27T23:42:16.145Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload_time = "2024-01-27T23:42:14.239Z" }, +] + +[[package]] +name = "distlib" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload_time = "2025-07-17T16:52:00.465Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252 }, + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload_time = "2025-07-17T16:51:58.613Z" }, ] [[package]] name = "docstring-parser" version = "0.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442 } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload_time = "2025-07-21T07:35:01.868Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896 }, + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload_time = "2025-07-21T07:35:00.684Z" }, ] [[package]] @@ -742,6 +756,14 @@ dependencies = [ { name = "requests" }, ] +[package.dev-dependencies] +dev = [ + { name = "datamodel-code-generator" }, + { name = "pre-commit" }, + { name = "pytest" }, + { name = "ruff" }, +] + [package.metadata] requires-dist = [ { name = "crfm-helm", specifier = ">=0.5.12" }, @@ -753,120 +775,128 @@ requires-dist = [ { name = "requests", specifier = ">=2.32.5,<3.0.0" }, ] +[package.metadata.requires-dev] +dev = [ + { name = "datamodel-code-generator", specifier = ">=0.43.1" }, + { name = "pre-commit", specifier = ">=4.3.0" }, + { name = "pytest", specifier = ">=9.0.2" }, + { name = "ruff", specifier = ">=0.12.2" }, +] + [[package]] name = "filelock" version = "3.20.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485 } +sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload_time = "2026-01-09T17:55:05.421Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701 }, + { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload_time = "2026-01-09T17:55:04.334Z" }, ] [[package]] name = "frozendict" version = "2.4.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/90/b2/2a3d1374b7780999d3184e171e25439a8358c47b481f68be883c14086b4c/frozendict-2.4.7.tar.gz", hash = "sha256:e478fb2a1391a56c8a6e10cc97c4a9002b410ecd1ac28c18d780661762e271bd", size = 317082 } +sdist = { url = "https://files.pythonhosted.org/packages/90/b2/2a3d1374b7780999d3184e171e25439a8358c47b481f68be883c14086b4c/frozendict-2.4.7.tar.gz", hash = "sha256:e478fb2a1391a56c8a6e10cc97c4a9002b410ecd1ac28c18d780661762e271bd", size = 317082, upload_time = "2025-11-11T22:40:14.251Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/74/f94141b38a51a553efef7f510fc213894161ae49b88bffd037f8d2a7cb2f/frozendict-2.4.7-py3-none-any.whl", hash = "sha256:972af65924ea25cf5b4d9326d549e69a9a4918d8a76a9d3a7cd174d98b237550", size = 16264 }, + { url = "https://files.pythonhosted.org/packages/38/74/f94141b38a51a553efef7f510fc213894161ae49b88bffd037f8d2a7cb2f/frozendict-2.4.7-py3-none-any.whl", hash = "sha256:972af65924ea25cf5b4d9326d549e69a9a4918d8a76a9d3a7cd174d98b237550", size = 16264, upload_time = "2025-11-11T22:40:12.836Z" }, ] [[package]] name = "frozenlist" version = "1.8.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782 }, - { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594 }, - { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448 }, - { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411 }, - { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014 }, - { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909 }, - { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049 }, - { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485 }, - { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619 }, - { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320 }, - { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820 }, - { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518 }, - { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096 }, - { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985 }, - { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591 }, - { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102 }, - { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717 }, - { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651 }, - { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417 }, - { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391 }, - { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048 }, - { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549 }, - { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833 }, - { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363 }, - { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314 }, - { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365 }, - { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763 }, - { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110 }, - { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717 }, - { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628 }, - { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882 }, - { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676 }, - { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235 }, - { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742 }, - { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725 }, - { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533 }, - { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506 }, - { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161 }, - { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676 }, - { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638 }, - { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067 }, - { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101 }, - { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901 }, - { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395 }, - { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659 }, - { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492 }, - { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034 }, - { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749 }, - { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127 }, - { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698 }, - { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749 }, - { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298 }, - { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015 }, - { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038 }, - { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130 }, - { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845 }, - { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131 }, - { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542 }, - { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308 }, - { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210 }, - { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972 }, - { url = "https://files.pythonhosted.org/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536 }, - { url = "https://files.pythonhosted.org/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330 }, - { url = "https://files.pythonhosted.org/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627 }, - { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238 }, - { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738 }, - { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739 }, - { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186 }, - { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196 }, - { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830 }, - { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289 }, - { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318 }, - { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814 }, - { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762 }, - { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470 }, - { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042 }, - { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148 }, - { url = "https://files.pythonhosted.org/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676 }, - { url = "https://files.pythonhosted.org/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451 }, - { url = "https://files.pythonhosted.org/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507 }, - { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409 }, +sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload_time = "2025-10-06T05:38:17.865Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload_time = "2025-10-06T05:36:06.649Z" }, + { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload_time = "2025-10-06T05:36:07.69Z" }, + { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload_time = "2025-10-06T05:36:08.78Z" }, + { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload_time = "2025-10-06T05:36:09.801Z" }, + { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload_time = "2025-10-06T05:36:11.394Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload_time = "2025-10-06T05:36:12.598Z" }, + { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload_time = "2025-10-06T05:36:14.065Z" }, + { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload_time = "2025-10-06T05:36:15.39Z" }, + { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload_time = "2025-10-06T05:36:16.558Z" }, + { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload_time = "2025-10-06T05:36:17.821Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload_time = "2025-10-06T05:36:19.046Z" }, + { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload_time = "2025-10-06T05:36:20.763Z" }, + { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload_time = "2025-10-06T05:36:22.129Z" }, + { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload_time = "2025-10-06T05:36:23.661Z" }, + { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload_time = "2025-10-06T05:36:24.958Z" }, + { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload_time = "2025-10-06T05:36:26.333Z" }, + { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717, upload_time = "2025-10-06T05:36:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651, upload_time = "2025-10-06T05:36:28.855Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417, upload_time = "2025-10-06T05:36:29.877Z" }, + { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391, upload_time = "2025-10-06T05:36:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048, upload_time = "2025-10-06T05:36:32.531Z" }, + { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549, upload_time = "2025-10-06T05:36:33.706Z" }, + { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833, upload_time = "2025-10-06T05:36:34.947Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363, upload_time = "2025-10-06T05:36:36.534Z" }, + { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314, upload_time = "2025-10-06T05:36:38.582Z" }, + { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365, upload_time = "2025-10-06T05:36:40.152Z" }, + { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload_time = "2025-10-06T05:36:41.355Z" }, + { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload_time = "2025-10-06T05:36:42.716Z" }, + { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload_time = "2025-10-06T05:36:44.251Z" }, + { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload_time = "2025-10-06T05:36:45.423Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload_time = "2025-10-06T05:36:46.796Z" }, + { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload_time = "2025-10-06T05:36:47.8Z" }, + { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload_time = "2025-10-06T05:36:48.78Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload_time = "2025-10-06T05:36:49.837Z" }, + { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload_time = "2025-10-06T05:36:50.851Z" }, + { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533, upload_time = "2025-10-06T05:36:51.898Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506, upload_time = "2025-10-06T05:36:53.101Z" }, + { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161, upload_time = "2025-10-06T05:36:54.309Z" }, + { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676, upload_time = "2025-10-06T05:36:55.566Z" }, + { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638, upload_time = "2025-10-06T05:36:56.758Z" }, + { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067, upload_time = "2025-10-06T05:36:57.965Z" }, + { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101, upload_time = "2025-10-06T05:36:59.237Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload_time = "2025-10-06T05:37:00.811Z" }, + { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload_time = "2025-10-06T05:37:02.115Z" }, + { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload_time = "2025-10-06T05:37:03.711Z" }, + { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload_time = "2025-10-06T05:37:04.915Z" }, + { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload_time = "2025-10-06T05:37:06.343Z" }, + { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload_time = "2025-10-06T05:37:07.431Z" }, + { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127, upload_time = "2025-10-06T05:37:08.438Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698, upload_time = "2025-10-06T05:37:09.48Z" }, + { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749, upload_time = "2025-10-06T05:37:10.569Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298, upload_time = "2025-10-06T05:37:11.993Z" }, + { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015, upload_time = "2025-10-06T05:37:13.194Z" }, + { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038, upload_time = "2025-10-06T05:37:14.577Z" }, + { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130, upload_time = "2025-10-06T05:37:15.781Z" }, + { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845, upload_time = "2025-10-06T05:37:17.037Z" }, + { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131, upload_time = "2025-10-06T05:37:18.221Z" }, + { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542, upload_time = "2025-10-06T05:37:19.771Z" }, + { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308, upload_time = "2025-10-06T05:37:20.969Z" }, + { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210, upload_time = "2025-10-06T05:37:22.252Z" }, + { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972, upload_time = "2025-10-06T05:37:23.5Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536, upload_time = "2025-10-06T05:37:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330, upload_time = "2025-10-06T05:37:26.928Z" }, + { url = "https://files.pythonhosted.org/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627, upload_time = "2025-10-06T05:37:28.075Z" }, + { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238, upload_time = "2025-10-06T05:37:29.373Z" }, + { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738, upload_time = "2025-10-06T05:37:30.792Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739, upload_time = "2025-10-06T05:37:32.127Z" }, + { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186, upload_time = "2025-10-06T05:37:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196, upload_time = "2025-10-06T05:37:36.107Z" }, + { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830, upload_time = "2025-10-06T05:37:37.663Z" }, + { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289, upload_time = "2025-10-06T05:37:39.261Z" }, + { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318, upload_time = "2025-10-06T05:37:43.213Z" }, + { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814, upload_time = "2025-10-06T05:37:45.337Z" }, + { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762, upload_time = "2025-10-06T05:37:46.657Z" }, + { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470, upload_time = "2025-10-06T05:37:47.946Z" }, + { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042, upload_time = "2025-10-06T05:37:49.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148, upload_time = "2025-10-06T05:37:50.745Z" }, + { url = "https://files.pythonhosted.org/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676, upload_time = "2025-10-06T05:37:52.222Z" }, + { url = "https://files.pythonhosted.org/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451, upload_time = "2025-10-06T05:37:53.425Z" }, + { url = "https://files.pythonhosted.org/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507, upload_time = "2025-10-06T05:37:54.513Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload_time = "2025-10-06T05:38:16.721Z" }, ] [[package]] name = "fsspec" version = "2025.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491 } +sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491, upload_time = "2025-03-07T21:47:56.461Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615 }, + { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615, upload_time = "2025-03-07T21:47:54.809Z" }, ] [package.optional-dependencies] @@ -878,47 +908,47 @@ http = [ name = "genson" version = "1.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c5/cf/2303c8ad276dcf5ee2ad6cf69c4338fd86ef0f471a5207b069adf7a393cf/genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37", size = 34919 } +sdist = { url = "https://files.pythonhosted.org/packages/c5/cf/2303c8ad276dcf5ee2ad6cf69c4338fd86ef0f471a5207b069adf7a393cf/genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37", size = 34919, upload_time = "2024-05-15T22:08:49.123Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/5c/e226de133afd8bb267ec27eead9ae3d784b95b39a287ed404caab39a5f50/genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7", size = 21470 }, + { url = "https://files.pythonhosted.org/packages/f8/5c/e226de133afd8bb267ec27eead9ae3d784b95b39a287ed404caab39a5f50/genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7", size = 21470, upload_time = "2024-05-15T22:08:47.056Z" }, ] [[package]] name = "h11" version = "0.16.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload_time = "2025-04-24T03:35:25.427Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload_time = "2025-04-24T03:35:24.344Z" }, ] [[package]] name = "hf-xet" version = "1.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870 }, - { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584 }, - { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004 }, - { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636 }, - { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448 }, - { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401 }, - { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866 }, - { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861 }, - { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699 }, - { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885 }, - { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550 }, - { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010 }, - { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264 }, - { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071 }, - { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099 }, - { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178 }, - { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214 }, - { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054 }, - { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812 }, - { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920 }, - { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735 }, +sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload_time = "2025-10-24T19:04:32.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload_time = "2025-10-24T19:04:11.422Z" }, + { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload_time = "2025-10-24T19:04:09.586Z" }, + { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload_time = "2025-10-24T19:04:00.314Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload_time = "2025-10-24T19:03:58.111Z" }, + { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload_time = "2025-10-24T19:04:20.951Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload_time = "2025-10-24T19:04:22.549Z" }, + { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload_time = "2025-10-24T19:04:33.461Z" }, + { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload_time = "2025-10-24T19:04:19.01Z" }, + { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload_time = "2025-10-24T19:04:17.306Z" }, + { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload_time = "2025-10-24T19:04:07.642Z" }, + { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload_time = "2025-10-24T19:04:05.55Z" }, + { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload_time = "2025-10-24T19:04:28.598Z" }, + { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload_time = "2025-10-24T19:04:30.397Z" }, + { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload_time = "2025-10-24T19:04:37.463Z" }, + { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload_time = "2025-10-24T19:04:15.366Z" }, + { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload_time = "2025-10-24T19:04:13.695Z" }, + { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload_time = "2025-10-24T19:04:03.596Z" }, + { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload_time = "2025-10-24T19:04:01.949Z" }, + { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload_time = "2025-10-24T19:04:24.585Z" }, + { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload_time = "2025-10-24T19:04:26.927Z" }, + { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload_time = "2025-10-24T19:04:35.928Z" }, ] [[package]] @@ -929,9 +959,9 @@ dependencies = [ { name = "certifi" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload_time = "2025-04-24T22:06:22.219Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload_time = "2025-04-24T22:06:20.566Z" }, ] [[package]] @@ -944,9 +974,9 @@ dependencies = [ { name = "httpcore" }, { name = "idna" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload_time = "2024-12-06T15:37:23.222Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload_time = "2024-12-06T15:37:21.509Z" }, ] [[package]] @@ -963,81 +993,90 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358 } +sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358, upload_time = "2025-10-23T12:12:01.413Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload_time = "2025-10-23T12:11:59.557Z" }, +] + +[[package]] +name = "identify" +version = "2.6.16" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/8d/e8b97e6bd3fb6fb271346f7981362f1e04d6a7463abd0de79e1fda17c067/identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980", size = 99360, upload_time = "2026-01-12T18:58:58.201Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094 }, + { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload_time = "2026-01-12T18:58:56.627Z" }, ] [[package]] name = "idna" version = "3.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload_time = "2025-10-12T14:55:20.501Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload_time = "2025-10-12T14:55:18.883Z" }, ] [[package]] name = "ijson" version = "3.4.0.post0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/30/7ab4b9e88e7946f6beef419f74edcc541df3ea562c7882257b4eaa82417d/ijson-3.4.0.post0.tar.gz", hash = "sha256:9aa02dc70bb245670a6ca7fba737b992aeeb4895360980622f7e568dbf23e41e", size = 67216 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/fe/3b6af0025288e769dbfa30485dae1b3bd3f33f00390f3ee532cbb1c33e9b/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b607a500fca26101be47d2baf7cddb457b819ab60a75ce51ed1092a40da8b2f9", size = 87847 }, - { url = "https://files.pythonhosted.org/packages/6e/a5/95ee2ca82f3b1a57892452f6e5087607d56c620beb8ce625475194568698/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4827d9874a6a81625412c59f7ca979a84d01f7f6bfb3c6d4dc4c46d0382b14e0", size = 59815 }, - { url = "https://files.pythonhosted.org/packages/51/8d/5a704ab3c17c55c21c86423458db8610626ca99cc9086a74dfeb7ee9054c/ijson-3.4.0.post0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4d4afec780881edb2a0d2dd40b1cdbe246e630022d5192f266172a0307986a7", size = 59648 }, - { url = "https://files.pythonhosted.org/packages/25/56/ca5d6ca145d007f30b44e747f3c163bc08710ce004af0deaad4a2301339b/ijson-3.4.0.post0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432fb60ffb952926f9438e0539011e2dfcd108f8426ee826ccc6173308c3ff2c", size = 138279 }, - { url = "https://files.pythonhosted.org/packages/c3/d3/22e3cc806fcdda7ad4c8482ed74db7a017d4a1d49b4300c7bc07052fb561/ijson-3.4.0.post0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54a0e3e05d9a0c95ecba73d9579f146cf6d5c5874116c849dba2d39a5f30380e", size = 149110 }, - { url = "https://files.pythonhosted.org/packages/3e/04/efb30f413648b9267f5a33920ac124d7ebef3bc4063af8f6ffc8ca11ddcb/ijson-3.4.0.post0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05807edc0bcbd222dc6ea32a2b897f0c81dc7f12c8580148bc82f6d7f5e7ec7b", size = 149026 }, - { url = "https://files.pythonhosted.org/packages/2d/cf/481165f7046ade32488719300a3994a437020bc41cfbb54334356348f513/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5269af16f715855d9864937f9dd5c348ca1ac49cee6a2c7a1b7091c159e874f", size = 150012 }, - { url = "https://files.pythonhosted.org/packages/0f/24/642e3289917ecf860386e26dfde775f9962d26ab7f6c2e364ed3ca3c25d8/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b200df83c901f5bfa416d069ac71077aa1608f854a4c50df1b84ced560e9c9ec", size = 142193 }, - { url = "https://files.pythonhosted.org/packages/0f/f5/fd2f038abe95e553e1c3ee207cda19db9196eb416e63c7c89699a8cf0db7/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6458bd8e679cdff459a0a5e555b107c3bbacb1f382da3fe0f40e392871eb518d", size = 150904 }, - { url = "https://files.pythonhosted.org/packages/49/35/24259d22519987928164e6cb8fe3486e1df0899b2999ada4b0498639b463/ijson-3.4.0.post0-cp312-cp312-win32.whl", hash = "sha256:55f7f656b5986326c978cbb3a9eea9e33f3ef6ecc4535b38f1d452c731da39ab", size = 52358 }, - { url = "https://files.pythonhosted.org/packages/a1/2b/6f7ade27a8ff5758fc41006dadd2de01730def84fe3e60553b329c59e0d4/ijson-3.4.0.post0-cp312-cp312-win_amd64.whl", hash = "sha256:e15833dcf6f6d188fdc624a31cd0520c3ba21b6855dc304bc7c1a8aeca02d4ac", size = 54789 }, - { url = "https://files.pythonhosted.org/packages/1b/20/aaec6977f9d538bbadd760c7fa0f6a0937742abdcc920ec6478a8576e55f/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:114ed248166ac06377e87a245a158d6b98019d2bdd3bb93995718e0bd996154f", size = 87863 }, - { url = "https://files.pythonhosted.org/packages/5b/29/06bf56a866e2fe21453a1ad8f3a5d7bca3c723f73d96329656dfee969783/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffb21203736b08fe27cb30df6a4f802fafb9ef7646c5ff7ef79569b63ea76c57", size = 59806 }, - { url = "https://files.pythonhosted.org/packages/ba/ae/e1d0fda91ba7a444b75f0d60cb845fdb1f55d3111351529dcbf4b1c276fe/ijson-3.4.0.post0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:07f20ecd748602ac7f18c617637e53bd73ded7f3b22260bba3abe401a7fc284e", size = 59643 }, - { url = "https://files.pythonhosted.org/packages/4d/24/5a24533be2726396cc1724dc237bada09b19715b5bfb0e7b9400db0901ad/ijson-3.4.0.post0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:27aa193d47ffc6bc4e45453896ad98fb089a367e8283b973f1fe5c0198b60b4e", size = 138082 }, - { url = "https://files.pythonhosted.org/packages/05/60/026c3efcec23c329657e878cbc0a9a25b42e7eb3971e8c2377cb3284e2b7/ijson-3.4.0.post0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ccddb2894eb7af162ba43b9475ac5825d15d568832f82eb8783036e5d2aebd42", size = 149145 }, - { url = "https://files.pythonhosted.org/packages/ed/c2/036499909b7a1bc0bcd85305e4348ad171aeb9df57581287533bdb3497e9/ijson-3.4.0.post0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61ab0b8c5bf707201dc67e02c116f4b6545c4afd7feb2264b989d242d9c4348a", size = 149046 }, - { url = "https://files.pythonhosted.org/packages/ba/75/e7736073ad96867c129f9e799e3e65086badd89dbf3911f76d9b3bf8a115/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:254cfb8c124af68327a0e7a49b50bbdacafd87c4690a3d62c96eb01020a685ef", size = 150356 }, - { url = "https://files.pythonhosted.org/packages/9d/1b/1c1575d2cda136985561fcf774fe6c54412cd0fa08005342015af0403193/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:04ac9ca54db20f82aeda6379b5f4f6112fdb150d09ebce04affeab98a17b4ed3", size = 142322 }, - { url = "https://files.pythonhosted.org/packages/28/4d/aba9871feb624df8494435d1a9ddc7b6a4f782c6044bfc0d770a4b59f145/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a603d7474bf35e7b3a8e49c8dabfc4751841931301adff3f3318171c4e407f32", size = 151386 }, - { url = "https://files.pythonhosted.org/packages/3f/9a/791baa83895fb6e492bce2c7a0ea6427b6a41fe854349e62a37d0c9deaf0/ijson-3.4.0.post0-cp313-cp313-win32.whl", hash = "sha256:ec5bb1520cb212ebead7dba048bb9b70552c3440584f83b01b0abc96862e2a09", size = 52352 }, - { url = "https://files.pythonhosted.org/packages/a9/0c/061f51493e1da21116d74ee8f6a6b9ae06ca5fa2eb53c3b38b64f9a9a5ae/ijson-3.4.0.post0-cp313-cp313-win_amd64.whl", hash = "sha256:3505dff18bdeb8b171eb28af6df34857e2be80dc01e2e3b624e77215ad58897f", size = 54783 }, - { url = "https://files.pythonhosted.org/packages/c7/89/4344e176f2c5f5ef3251c9bfa4ddd5b4cf3f9601fd6ec3f677a3ba0b9c71/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:45a0b1c833ed2620eaf8da958f06ac8351c59e5e470e078400d23814670ed708", size = 92342 }, - { url = "https://files.pythonhosted.org/packages/d4/b1/85012c586a6645f9fb8bfa3ef62ed2f303c8d73fc7c2f705111582925980/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7809ec8c8f40228edaaa089f33e811dff4c5b8509702652870d3f286c9682e27", size = 62028 }, - { url = "https://files.pythonhosted.org/packages/65/ea/7b7e2815c101d78b33e74d64ddb70cccc377afccd5dda76e566ed3fcb56f/ijson-3.4.0.post0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cf4a34c2cfe852aee75c89c05b0a4531c49dc0be27eeed221afd6fbf9c3e149c", size = 61773 }, - { url = "https://files.pythonhosted.org/packages/59/7d/2175e599cb77a64f528629bad3ce95dfdf2aa6171d313c1fc00bbfaf0d22/ijson-3.4.0.post0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a39d5d36067604b26b78de70b8951c90e9272450642661fe531a8f7a6936a7fa", size = 198562 }, - { url = "https://files.pythonhosted.org/packages/13/97/82247c501c92405bb2fc44ab5efb497335bcb9cf0f5d3a0b04a800737bd8/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83fc738d81c9ea686b452996110b8a6678296c481e0546857db24785bff8da92", size = 216212 }, - { url = "https://files.pythonhosted.org/packages/95/ca/b956f507bb02e05ce109fd11ab6a2c054f8b686cc5affe41afe50630984d/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b2a81aee91633868f5b40280e2523f7c5392e920a5082f47c5e991e516b483f6", size = 206618 }, - { url = "https://files.pythonhosted.org/packages/3e/12/e827840ab81d86a9882e499097934df53294f05155f1acfcb9a211ac1142/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:56169e298c5a2e7196aaa55da78ddc2415876a74fe6304f81b1eb0d3273346f7", size = 210689 }, - { url = "https://files.pythonhosted.org/packages/1b/3b/59238d9422c31a4aefa22ebeb8e599e706158a0ab03669ef623be77a499a/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eeb9540f0b1a575cbb5968166706946458f98c16e7accc6f2fe71efa29864241", size = 199927 }, - { url = "https://files.pythonhosted.org/packages/b6/0f/ec01c36c128c37edb8a5ae8f3de3256009f886338d459210dfe121ee4ba9/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ba3478ff0bb49d7ba88783f491a99b6e3fa929c930ab062d2bb7837e6a38fe88", size = 204455 }, - { url = "https://files.pythonhosted.org/packages/c8/cf/5560e1db96c6d10a5313be76bf5a1754266cbfb5cc13ff64d107829e07b1/ijson-3.4.0.post0-cp313-cp313t-win32.whl", hash = "sha256:b005ce84e82f28b00bf777a464833465dfe3efa43a0a26c77b5ac40723e1a728", size = 54566 }, - { url = "https://files.pythonhosted.org/packages/22/5a/cbb69144c3b25dd56f5421ff7dc0cf3051355579062024772518e4f4b3c5/ijson-3.4.0.post0-cp313-cp313t-win_amd64.whl", hash = "sha256:fe9c84c9b1c8798afa407be1cea1603401d99bfc7c34497e19f4f5e5ddc9b441", size = 57298 }, - { url = "https://files.pythonhosted.org/packages/af/0b/a4ce8524fd850302bbf5d9f38d07c0fa981fdbe44951d2fcd036935b67dd/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da6a21b88cbf5ecbc53371283988d22c9643aa71ae2873bbeaefd2dea3b6160b", size = 88361 }, - { url = "https://files.pythonhosted.org/packages/be/90/a5e5f33e46f28174a9c8142d12dcb3d26ce358d9a2230b9b15f5c987b3a5/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cf24a48a1c3ca9d44a04feb59ccefeb9aa52bb49b9cb70ad30518c25cce74bb7", size = 59960 }, - { url = "https://files.pythonhosted.org/packages/83/e2/551dd7037dda759aa0ce53f0d3d7be03b03c6b05c0b0a5d5ab7a47e6b4b1/ijson-3.4.0.post0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d14427d366f95f21adcb97d0ed1f6d30f6fdc04d0aa1e4de839152c50c2b8d65", size = 59957 }, - { url = "https://files.pythonhosted.org/packages/ac/b9/3006384f85cc26cf83dbbd542d362cc336f1e1ddd491e32147cfa46ea8ae/ijson-3.4.0.post0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:339d49f6c5d24051c85d9226be96d2d56e633cb8b7d09dd8099de8d8b51a97e2", size = 139967 }, - { url = "https://files.pythonhosted.org/packages/77/3b/b5234add8115cbfe8635b6c152fb527327f45e4c0f0bf2e93844b36b5217/ijson-3.4.0.post0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7206afcb396aaef66c2b066997b4e9d9042c4b7d777f4d994e9cec6d322c2fe6", size = 149196 }, - { url = "https://files.pythonhosted.org/packages/a2/d2/c4ae543e37d7a9fba09740c221976a63705dbad23a9cda9022fc9fa0f3de/ijson-3.4.0.post0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8dd327da225887194fe8b93f2b3c9c256353e14a6b9eefc940ed17fde38f5b8", size = 148516 }, - { url = "https://files.pythonhosted.org/packages/0d/a1/914b5fb1c26af2474cd04841626e0e95576499a4ca940661fb105ee12dd2/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4810546e66128af51fd4a0c9a640e84e8508e9c15c4f247d8a3e3253b20e1465", size = 149770 }, - { url = "https://files.pythonhosted.org/packages/7a/c1/51c3584102d0d85d4aa10cc88dbbe431ecb9fe98160a9e2fad62a4456aed/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:103a0838061297d063bca81d724b0958b616f372bd893bbc278320152252c652", size = 143688 }, - { url = "https://files.pythonhosted.org/packages/47/3d/a54f13d766332620bded8ee76bcdd274509ecc53cf99573450f95b3ad910/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:40007c977e230e04118b27322f25a72ae342a3d61464b2057fcd9b21eeb7427a", size = 150688 }, - { url = "https://files.pythonhosted.org/packages/72/49/43d97cccf3266da7c044bd42e5083340ad1fd97fbb16d1bcd6791fd8918f/ijson-3.4.0.post0-cp314-cp314-win32.whl", hash = "sha256:f932969fc1fd4449ca141cf5f47ff357656a154a361f28d9ebca0badc5b02297", size = 52882 }, - { url = "https://files.pythonhosted.org/packages/e9/f0/008f1ed4e0fc6f6dc7a5a82ecf08a59bb212514e158954374d440d700e6c/ijson-3.4.0.post0-cp314-cp314-win_amd64.whl", hash = "sha256:3ed19b1e4349240773a8ce4a4bfa450892d4a57949c02c515cd6be5a46b7696a", size = 55568 }, - { url = "https://files.pythonhosted.org/packages/69/1c/8a199fded709e762aced89bb7086973c837e432dd714bbad78a6ac789c23/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:226447e40ca9340a39ed07d68ea02ee14b52cb4fe649425b256c1f0073531c83", size = 92345 }, - { url = "https://files.pythonhosted.org/packages/be/60/04e97f6a403203bd2eb8849570bdce5719d696b5fb96aa2a62566fe7a1d9/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c88f0669d45d4b1aa017c9b68d378e7cd15d188dfb6f0209adc78b7f45590a7", size = 62029 }, - { url = "https://files.pythonhosted.org/packages/2a/97/e88295f9456ba939d90d4603af28fcabda3b443ef55e709e9381df3daa58/ijson-3.4.0.post0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:56b3089dc28c12492d92cc4896d2be585a89ecae34e25d08c1df88f21815cb50", size = 61776 }, - { url = "https://files.pythonhosted.org/packages/1b/9f/0e9c236e720c2de887ab0d7cad8a15d2aa55fb449f792437fc99899957a9/ijson-3.4.0.post0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c117321cfa7b749cc1213f9b4c80dc958f0a206df98ec038ae4bcbbdb8463a15", size = 199808 }, - { url = "https://files.pythonhosted.org/packages/0e/70/c21de30e7013e074924cd82057acfc5760e7b2cc41180f80770621b0ad36/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8311f48db6a33116db5c81682f08b6e2405501a4b4e460193ae69fec3cd1f87a", size = 217152 }, - { url = "https://files.pythonhosted.org/packages/64/78/63a0bcc0707037df4e22bb836451279d850592258c859685a402c27f5d6d/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91c61a3e63e04da648737e6b4abd537df1b46fb8cdf3219b072e790bb3c1a46b", size = 207663 }, - { url = "https://files.pythonhosted.org/packages/7d/85/834e9838d69893cb7567e1210be044444213c78f7414aaf1cd241df16078/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1709171023ce82651b2f132575c2e6282e47f64ad67bd3260da476418d0e7895", size = 211157 }, - { url = "https://files.pythonhosted.org/packages/2e/9b/9fda503799ebc30397710552e5dedc1d98d9ea6a694e5717415892623a94/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5f0a72b1e3c0f78551670c12b2fdc1bf05f2796254d9c2055ba319bec2216020", size = 200231 }, - { url = "https://files.pythonhosted.org/packages/15/f3/6419d1d5795a16591233d3aa3747b084e82c0c1d7184bdad9be638174560/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b982a3597b0439ce9c8f4cfc929d86c6ed43907908be1e8463a34dc35fe5b258", size = 204825 }, - { url = "https://files.pythonhosted.org/packages/1f/8d/a520e6902129c55fa94428ea0a22e8547540d5e7ca30f18b39594a5feea2/ijson-3.4.0.post0-cp314-cp314t-win32.whl", hash = "sha256:4e39bfdc36b0b460ef15a06550a6a385c64c81f7ac205ccff39bd45147918912", size = 55559 }, - { url = "https://files.pythonhosted.org/packages/20/67/0ac6dd0045957ba1270b7b1860864f7d8cea4062e70b1083134c587e5768/ijson-3.4.0.post0-cp314-cp314t-win_amd64.whl", hash = "sha256:17e45262a5ddef39894013fb1548ee7094e444c8389eb1a97f86708b19bea03e", size = 58238 }, +sdist = { url = "https://files.pythonhosted.org/packages/2d/30/7ab4b9e88e7946f6beef419f74edcc541df3ea562c7882257b4eaa82417d/ijson-3.4.0.post0.tar.gz", hash = "sha256:9aa02dc70bb245670a6ca7fba737b992aeeb4895360980622f7e568dbf23e41e", size = 67216, upload_time = "2025-10-10T05:29:25.62Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/fe/3b6af0025288e769dbfa30485dae1b3bd3f33f00390f3ee532cbb1c33e9b/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b607a500fca26101be47d2baf7cddb457b819ab60a75ce51ed1092a40da8b2f9", size = 87847, upload_time = "2025-10-10T05:28:07.229Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a5/95ee2ca82f3b1a57892452f6e5087607d56c620beb8ce625475194568698/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4827d9874a6a81625412c59f7ca979a84d01f7f6bfb3c6d4dc4c46d0382b14e0", size = 59815, upload_time = "2025-10-10T05:28:08.448Z" }, + { url = "https://files.pythonhosted.org/packages/51/8d/5a704ab3c17c55c21c86423458db8610626ca99cc9086a74dfeb7ee9054c/ijson-3.4.0.post0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4d4afec780881edb2a0d2dd40b1cdbe246e630022d5192f266172a0307986a7", size = 59648, upload_time = "2025-10-10T05:28:09.307Z" }, + { url = "https://files.pythonhosted.org/packages/25/56/ca5d6ca145d007f30b44e747f3c163bc08710ce004af0deaad4a2301339b/ijson-3.4.0.post0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432fb60ffb952926f9438e0539011e2dfcd108f8426ee826ccc6173308c3ff2c", size = 138279, upload_time = "2025-10-10T05:28:10.489Z" }, + { url = "https://files.pythonhosted.org/packages/c3/d3/22e3cc806fcdda7ad4c8482ed74db7a017d4a1d49b4300c7bc07052fb561/ijson-3.4.0.post0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54a0e3e05d9a0c95ecba73d9579f146cf6d5c5874116c849dba2d39a5f30380e", size = 149110, upload_time = "2025-10-10T05:28:12.263Z" }, + { url = "https://files.pythonhosted.org/packages/3e/04/efb30f413648b9267f5a33920ac124d7ebef3bc4063af8f6ffc8ca11ddcb/ijson-3.4.0.post0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05807edc0bcbd222dc6ea32a2b897f0c81dc7f12c8580148bc82f6d7f5e7ec7b", size = 149026, upload_time = "2025-10-10T05:28:13.557Z" }, + { url = "https://files.pythonhosted.org/packages/2d/cf/481165f7046ade32488719300a3994a437020bc41cfbb54334356348f513/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5269af16f715855d9864937f9dd5c348ca1ac49cee6a2c7a1b7091c159e874f", size = 150012, upload_time = "2025-10-10T05:28:14.859Z" }, + { url = "https://files.pythonhosted.org/packages/0f/24/642e3289917ecf860386e26dfde775f9962d26ab7f6c2e364ed3ca3c25d8/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b200df83c901f5bfa416d069ac71077aa1608f854a4c50df1b84ced560e9c9ec", size = 142193, upload_time = "2025-10-10T05:28:16.131Z" }, + { url = "https://files.pythonhosted.org/packages/0f/f5/fd2f038abe95e553e1c3ee207cda19db9196eb416e63c7c89699a8cf0db7/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6458bd8e679cdff459a0a5e555b107c3bbacb1f382da3fe0f40e392871eb518d", size = 150904, upload_time = "2025-10-10T05:28:17.401Z" }, + { url = "https://files.pythonhosted.org/packages/49/35/24259d22519987928164e6cb8fe3486e1df0899b2999ada4b0498639b463/ijson-3.4.0.post0-cp312-cp312-win32.whl", hash = "sha256:55f7f656b5986326c978cbb3a9eea9e33f3ef6ecc4535b38f1d452c731da39ab", size = 52358, upload_time = "2025-10-10T05:28:18.315Z" }, + { url = "https://files.pythonhosted.org/packages/a1/2b/6f7ade27a8ff5758fc41006dadd2de01730def84fe3e60553b329c59e0d4/ijson-3.4.0.post0-cp312-cp312-win_amd64.whl", hash = "sha256:e15833dcf6f6d188fdc624a31cd0520c3ba21b6855dc304bc7c1a8aeca02d4ac", size = 54789, upload_time = "2025-10-10T05:28:19.552Z" }, + { url = "https://files.pythonhosted.org/packages/1b/20/aaec6977f9d538bbadd760c7fa0f6a0937742abdcc920ec6478a8576e55f/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:114ed248166ac06377e87a245a158d6b98019d2bdd3bb93995718e0bd996154f", size = 87863, upload_time = "2025-10-10T05:28:20.786Z" }, + { url = "https://files.pythonhosted.org/packages/5b/29/06bf56a866e2fe21453a1ad8f3a5d7bca3c723f73d96329656dfee969783/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffb21203736b08fe27cb30df6a4f802fafb9ef7646c5ff7ef79569b63ea76c57", size = 59806, upload_time = "2025-10-10T05:28:21.596Z" }, + { url = "https://files.pythonhosted.org/packages/ba/ae/e1d0fda91ba7a444b75f0d60cb845fdb1f55d3111351529dcbf4b1c276fe/ijson-3.4.0.post0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:07f20ecd748602ac7f18c617637e53bd73ded7f3b22260bba3abe401a7fc284e", size = 59643, upload_time = "2025-10-10T05:28:22.45Z" }, + { url = "https://files.pythonhosted.org/packages/4d/24/5a24533be2726396cc1724dc237bada09b19715b5bfb0e7b9400db0901ad/ijson-3.4.0.post0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:27aa193d47ffc6bc4e45453896ad98fb089a367e8283b973f1fe5c0198b60b4e", size = 138082, upload_time = "2025-10-10T05:28:23.319Z" }, + { url = "https://files.pythonhosted.org/packages/05/60/026c3efcec23c329657e878cbc0a9a25b42e7eb3971e8c2377cb3284e2b7/ijson-3.4.0.post0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ccddb2894eb7af162ba43b9475ac5825d15d568832f82eb8783036e5d2aebd42", size = 149145, upload_time = "2025-10-10T05:28:24.279Z" }, + { url = "https://files.pythonhosted.org/packages/ed/c2/036499909b7a1bc0bcd85305e4348ad171aeb9df57581287533bdb3497e9/ijson-3.4.0.post0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61ab0b8c5bf707201dc67e02c116f4b6545c4afd7feb2264b989d242d9c4348a", size = 149046, upload_time = "2025-10-10T05:28:25.186Z" }, + { url = "https://files.pythonhosted.org/packages/ba/75/e7736073ad96867c129f9e799e3e65086badd89dbf3911f76d9b3bf8a115/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:254cfb8c124af68327a0e7a49b50bbdacafd87c4690a3d62c96eb01020a685ef", size = 150356, upload_time = "2025-10-10T05:28:26.135Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1b/1c1575d2cda136985561fcf774fe6c54412cd0fa08005342015af0403193/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:04ac9ca54db20f82aeda6379b5f4f6112fdb150d09ebce04affeab98a17b4ed3", size = 142322, upload_time = "2025-10-10T05:28:27.125Z" }, + { url = "https://files.pythonhosted.org/packages/28/4d/aba9871feb624df8494435d1a9ddc7b6a4f782c6044bfc0d770a4b59f145/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a603d7474bf35e7b3a8e49c8dabfc4751841931301adff3f3318171c4e407f32", size = 151386, upload_time = "2025-10-10T05:28:28.274Z" }, + { url = "https://files.pythonhosted.org/packages/3f/9a/791baa83895fb6e492bce2c7a0ea6427b6a41fe854349e62a37d0c9deaf0/ijson-3.4.0.post0-cp313-cp313-win32.whl", hash = "sha256:ec5bb1520cb212ebead7dba048bb9b70552c3440584f83b01b0abc96862e2a09", size = 52352, upload_time = "2025-10-10T05:28:29.191Z" }, + { url = "https://files.pythonhosted.org/packages/a9/0c/061f51493e1da21116d74ee8f6a6b9ae06ca5fa2eb53c3b38b64f9a9a5ae/ijson-3.4.0.post0-cp313-cp313-win_amd64.whl", hash = "sha256:3505dff18bdeb8b171eb28af6df34857e2be80dc01e2e3b624e77215ad58897f", size = 54783, upload_time = "2025-10-10T05:28:30.048Z" }, + { url = "https://files.pythonhosted.org/packages/c7/89/4344e176f2c5f5ef3251c9bfa4ddd5b4cf3f9601fd6ec3f677a3ba0b9c71/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:45a0b1c833ed2620eaf8da958f06ac8351c59e5e470e078400d23814670ed708", size = 92342, upload_time = "2025-10-10T05:28:31.389Z" }, + { url = "https://files.pythonhosted.org/packages/d4/b1/85012c586a6645f9fb8bfa3ef62ed2f303c8d73fc7c2f705111582925980/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7809ec8c8f40228edaaa089f33e811dff4c5b8509702652870d3f286c9682e27", size = 62028, upload_time = "2025-10-10T05:28:32.849Z" }, + { url = "https://files.pythonhosted.org/packages/65/ea/7b7e2815c101d78b33e74d64ddb70cccc377afccd5dda76e566ed3fcb56f/ijson-3.4.0.post0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cf4a34c2cfe852aee75c89c05b0a4531c49dc0be27eeed221afd6fbf9c3e149c", size = 61773, upload_time = "2025-10-10T05:28:34.016Z" }, + { url = "https://files.pythonhosted.org/packages/59/7d/2175e599cb77a64f528629bad3ce95dfdf2aa6171d313c1fc00bbfaf0d22/ijson-3.4.0.post0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a39d5d36067604b26b78de70b8951c90e9272450642661fe531a8f7a6936a7fa", size = 198562, upload_time = "2025-10-10T05:28:34.878Z" }, + { url = "https://files.pythonhosted.org/packages/13/97/82247c501c92405bb2fc44ab5efb497335bcb9cf0f5d3a0b04a800737bd8/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83fc738d81c9ea686b452996110b8a6678296c481e0546857db24785bff8da92", size = 216212, upload_time = "2025-10-10T05:28:36.208Z" }, + { url = "https://files.pythonhosted.org/packages/95/ca/b956f507bb02e05ce109fd11ab6a2c054f8b686cc5affe41afe50630984d/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b2a81aee91633868f5b40280e2523f7c5392e920a5082f47c5e991e516b483f6", size = 206618, upload_time = "2025-10-10T05:28:37.243Z" }, + { url = "https://files.pythonhosted.org/packages/3e/12/e827840ab81d86a9882e499097934df53294f05155f1acfcb9a211ac1142/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:56169e298c5a2e7196aaa55da78ddc2415876a74fe6304f81b1eb0d3273346f7", size = 210689, upload_time = "2025-10-10T05:28:38.252Z" }, + { url = "https://files.pythonhosted.org/packages/1b/3b/59238d9422c31a4aefa22ebeb8e599e706158a0ab03669ef623be77a499a/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eeb9540f0b1a575cbb5968166706946458f98c16e7accc6f2fe71efa29864241", size = 199927, upload_time = "2025-10-10T05:28:39.233Z" }, + { url = "https://files.pythonhosted.org/packages/b6/0f/ec01c36c128c37edb8a5ae8f3de3256009f886338d459210dfe121ee4ba9/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ba3478ff0bb49d7ba88783f491a99b6e3fa929c930ab062d2bb7837e6a38fe88", size = 204455, upload_time = "2025-10-10T05:28:40.644Z" }, + { url = "https://files.pythonhosted.org/packages/c8/cf/5560e1db96c6d10a5313be76bf5a1754266cbfb5cc13ff64d107829e07b1/ijson-3.4.0.post0-cp313-cp313t-win32.whl", hash = "sha256:b005ce84e82f28b00bf777a464833465dfe3efa43a0a26c77b5ac40723e1a728", size = 54566, upload_time = "2025-10-10T05:28:41.663Z" }, + { url = "https://files.pythonhosted.org/packages/22/5a/cbb69144c3b25dd56f5421ff7dc0cf3051355579062024772518e4f4b3c5/ijson-3.4.0.post0-cp313-cp313t-win_amd64.whl", hash = "sha256:fe9c84c9b1c8798afa407be1cea1603401d99bfc7c34497e19f4f5e5ddc9b441", size = 57298, upload_time = "2025-10-10T05:28:42.881Z" }, + { url = "https://files.pythonhosted.org/packages/af/0b/a4ce8524fd850302bbf5d9f38d07c0fa981fdbe44951d2fcd036935b67dd/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da6a21b88cbf5ecbc53371283988d22c9643aa71ae2873bbeaefd2dea3b6160b", size = 88361, upload_time = "2025-10-10T05:28:43.73Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/a5e5f33e46f28174a9c8142d12dcb3d26ce358d9a2230b9b15f5c987b3a5/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cf24a48a1c3ca9d44a04feb59ccefeb9aa52bb49b9cb70ad30518c25cce74bb7", size = 59960, upload_time = "2025-10-10T05:28:44.585Z" }, + { url = "https://files.pythonhosted.org/packages/83/e2/551dd7037dda759aa0ce53f0d3d7be03b03c6b05c0b0a5d5ab7a47e6b4b1/ijson-3.4.0.post0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d14427d366f95f21adcb97d0ed1f6d30f6fdc04d0aa1e4de839152c50c2b8d65", size = 59957, upload_time = "2025-10-10T05:28:45.748Z" }, + { url = "https://files.pythonhosted.org/packages/ac/b9/3006384f85cc26cf83dbbd542d362cc336f1e1ddd491e32147cfa46ea8ae/ijson-3.4.0.post0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:339d49f6c5d24051c85d9226be96d2d56e633cb8b7d09dd8099de8d8b51a97e2", size = 139967, upload_time = "2025-10-10T05:28:47.229Z" }, + { url = "https://files.pythonhosted.org/packages/77/3b/b5234add8115cbfe8635b6c152fb527327f45e4c0f0bf2e93844b36b5217/ijson-3.4.0.post0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7206afcb396aaef66c2b066997b4e9d9042c4b7d777f4d994e9cec6d322c2fe6", size = 149196, upload_time = "2025-10-10T05:28:48.226Z" }, + { url = "https://files.pythonhosted.org/packages/a2/d2/c4ae543e37d7a9fba09740c221976a63705dbad23a9cda9022fc9fa0f3de/ijson-3.4.0.post0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8dd327da225887194fe8b93f2b3c9c256353e14a6b9eefc940ed17fde38f5b8", size = 148516, upload_time = "2025-10-10T05:28:49.237Z" }, + { url = "https://files.pythonhosted.org/packages/0d/a1/914b5fb1c26af2474cd04841626e0e95576499a4ca940661fb105ee12dd2/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4810546e66128af51fd4a0c9a640e84e8508e9c15c4f247d8a3e3253b20e1465", size = 149770, upload_time = "2025-10-10T05:28:50.501Z" }, + { url = "https://files.pythonhosted.org/packages/7a/c1/51c3584102d0d85d4aa10cc88dbbe431ecb9fe98160a9e2fad62a4456aed/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:103a0838061297d063bca81d724b0958b616f372bd893bbc278320152252c652", size = 143688, upload_time = "2025-10-10T05:28:51.823Z" }, + { url = "https://files.pythonhosted.org/packages/47/3d/a54f13d766332620bded8ee76bcdd274509ecc53cf99573450f95b3ad910/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:40007c977e230e04118b27322f25a72ae342a3d61464b2057fcd9b21eeb7427a", size = 150688, upload_time = "2025-10-10T05:28:52.757Z" }, + { url = "https://files.pythonhosted.org/packages/72/49/43d97cccf3266da7c044bd42e5083340ad1fd97fbb16d1bcd6791fd8918f/ijson-3.4.0.post0-cp314-cp314-win32.whl", hash = "sha256:f932969fc1fd4449ca141cf5f47ff357656a154a361f28d9ebca0badc5b02297", size = 52882, upload_time = "2025-10-10T05:28:53.708Z" }, + { url = "https://files.pythonhosted.org/packages/e9/f0/008f1ed4e0fc6f6dc7a5a82ecf08a59bb212514e158954374d440d700e6c/ijson-3.4.0.post0-cp314-cp314-win_amd64.whl", hash = "sha256:3ed19b1e4349240773a8ce4a4bfa450892d4a57949c02c515cd6be5a46b7696a", size = 55568, upload_time = "2025-10-10T05:28:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/69/1c/8a199fded709e762aced89bb7086973c837e432dd714bbad78a6ac789c23/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:226447e40ca9340a39ed07d68ea02ee14b52cb4fe649425b256c1f0073531c83", size = 92345, upload_time = "2025-10-10T05:28:55.657Z" }, + { url = "https://files.pythonhosted.org/packages/be/60/04e97f6a403203bd2eb8849570bdce5719d696b5fb96aa2a62566fe7a1d9/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c88f0669d45d4b1aa017c9b68d378e7cd15d188dfb6f0209adc78b7f45590a7", size = 62029, upload_time = "2025-10-10T05:28:56.561Z" }, + { url = "https://files.pythonhosted.org/packages/2a/97/e88295f9456ba939d90d4603af28fcabda3b443ef55e709e9381df3daa58/ijson-3.4.0.post0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:56b3089dc28c12492d92cc4896d2be585a89ecae34e25d08c1df88f21815cb50", size = 61776, upload_time = "2025-10-10T05:28:57.401Z" }, + { url = "https://files.pythonhosted.org/packages/1b/9f/0e9c236e720c2de887ab0d7cad8a15d2aa55fb449f792437fc99899957a9/ijson-3.4.0.post0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c117321cfa7b749cc1213f9b4c80dc958f0a206df98ec038ae4bcbbdb8463a15", size = 199808, upload_time = "2025-10-10T05:28:58.62Z" }, + { url = "https://files.pythonhosted.org/packages/0e/70/c21de30e7013e074924cd82057acfc5760e7b2cc41180f80770621b0ad36/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8311f48db6a33116db5c81682f08b6e2405501a4b4e460193ae69fec3cd1f87a", size = 217152, upload_time = "2025-10-10T05:28:59.656Z" }, + { url = "https://files.pythonhosted.org/packages/64/78/63a0bcc0707037df4e22bb836451279d850592258c859685a402c27f5d6d/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91c61a3e63e04da648737e6b4abd537df1b46fb8cdf3219b072e790bb3c1a46b", size = 207663, upload_time = "2025-10-10T05:29:00.73Z" }, + { url = "https://files.pythonhosted.org/packages/7d/85/834e9838d69893cb7567e1210be044444213c78f7414aaf1cd241df16078/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1709171023ce82651b2f132575c2e6282e47f64ad67bd3260da476418d0e7895", size = 211157, upload_time = "2025-10-10T05:29:01.87Z" }, + { url = "https://files.pythonhosted.org/packages/2e/9b/9fda503799ebc30397710552e5dedc1d98d9ea6a694e5717415892623a94/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5f0a72b1e3c0f78551670c12b2fdc1bf05f2796254d9c2055ba319bec2216020", size = 200231, upload_time = "2025-10-10T05:29:02.883Z" }, + { url = "https://files.pythonhosted.org/packages/15/f3/6419d1d5795a16591233d3aa3747b084e82c0c1d7184bdad9be638174560/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b982a3597b0439ce9c8f4cfc929d86c6ed43907908be1e8463a34dc35fe5b258", size = 204825, upload_time = "2025-10-10T05:29:04.242Z" }, + { url = "https://files.pythonhosted.org/packages/1f/8d/a520e6902129c55fa94428ea0a22e8547540d5e7ca30f18b39594a5feea2/ijson-3.4.0.post0-cp314-cp314t-win32.whl", hash = "sha256:4e39bfdc36b0b460ef15a06550a6a385c64c81f7ac205ccff39bd45147918912", size = 55559, upload_time = "2025-10-10T05:29:05.681Z" }, + { url = "https://files.pythonhosted.org/packages/20/67/0ac6dd0045957ba1270b7b1860864f7d8cea4062e70b1083134c587e5768/ijson-3.4.0.post0-cp314-cp314t-win_amd64.whl", hash = "sha256:17e45262a5ddef39894013fb1548ee7094e444c8389eb1a97f86708b19bea03e", size = 58238, upload_time = "2025-10-10T05:29:06.656Z" }, ] [[package]] @@ -1048,9 +1087,18 @@ dependencies = [ { name = "more-itertools" }, { name = "typeguard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/c6/943357d44a21fd995723d07ccaddd78023eace03c1846049a2645d4324a3/inflect-7.5.0.tar.gz", hash = "sha256:faf19801c3742ed5a05a8ce388e0d8fe1a07f8d095c82201eb904f5d27ad571f", size = 73751 } +sdist = { url = "https://files.pythonhosted.org/packages/78/c6/943357d44a21fd995723d07ccaddd78023eace03c1846049a2645d4324a3/inflect-7.5.0.tar.gz", hash = "sha256:faf19801c3742ed5a05a8ce388e0d8fe1a07f8d095c82201eb904f5d27ad571f", size = 73751, upload_time = "2024-12-28T17:11:18.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/eb/427ed2b20a38a4ee29f24dbe4ae2dafab198674fe9a85e3d6adf9e5f5f41/inflect-7.5.0-py3-none-any.whl", hash = "sha256:2aea70e5e70c35d8350b8097396ec155ffd68def678c7ff97f51aa69c1d92344", size = 35197, upload_time = "2024-12-28T17:11:15.931Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload_time = "2025-10-18T21:55:43.219Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/eb/427ed2b20a38a4ee29f24dbe4ae2dafab198674fe9a85e3d6adf9e5f5f41/inflect-7.5.0-py3-none-any.whl", hash = "sha256:2aea70e5e70c35d8350b8097396ec155ffd68def678c7ff97f51aa69c1d92344", size = 35197 }, + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload_time = "2025-10-18T21:55:41.639Z" }, ] [[package]] @@ -1095,18 +1143,18 @@ dependencies = [ { name = "universal-pathlib" }, { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3f/1a/c4a35fe9cd49983f511c1cd6b832547fb464b40176feba3a9df3be512a5e/inspect_ai-0.3.160.tar.gz", hash = "sha256:8aadaa65fb23730430388ee6ba1be4a5da697e78dab77075d7d2522a2ebd0cb6", size = 43312939 } +sdist = { url = "https://files.pythonhosted.org/packages/3f/1a/c4a35fe9cd49983f511c1cd6b832547fb464b40176feba3a9df3be512a5e/inspect_ai-0.3.160.tar.gz", hash = "sha256:8aadaa65fb23730430388ee6ba1be4a5da697e78dab77075d7d2522a2ebd0cb6", size = 43312939, upload_time = "2026-01-09T14:38:30.833Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/40/29d4cdcb6a292877cb5283d9641f454866e74fc2ae83010f42eb19de3779/inspect_ai-0.3.160-py3-none-any.whl", hash = "sha256:46238194abc910f101963bb0938967c919ac71681edfab0ebee5403dd28c11e2", size = 34520873 }, + { url = "https://files.pythonhosted.org/packages/6f/40/29d4cdcb6a292877cb5283d9641f454866e74fc2ae83010f42eb19de3779/inspect_ai-0.3.160-py3-none-any.whl", hash = "sha256:46238194abc910f101963bb0938967c919ac71681edfab0ebee5403dd28c11e2", size = 34520873, upload_time = "2026-01-09T14:38:24.698Z" }, ] [[package]] name = "isort" version = "7.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/63/53/4f3c058e3bace40282876f9b553343376ee687f3c35a525dc79dbd450f88/isort-7.0.0.tar.gz", hash = "sha256:5513527951aadb3ac4292a41a16cbc50dd1642432f5e8c20057d414bdafb4187", size = 805049 } +sdist = { url = "https://files.pythonhosted.org/packages/63/53/4f3c058e3bace40282876f9b553343376ee687f3c35a525dc79dbd450f88/isort-7.0.0.tar.gz", hash = "sha256:5513527951aadb3ac4292a41a16cbc50dd1642432f5e8c20057d414bdafb4187", size = 805049, upload_time = "2025-10-11T13:30:59.107Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/ed/e3705d6d02b4f7aea715a353c8ce193efd0b5db13e204df895d38734c244/isort-7.0.0-py3-none-any.whl", hash = "sha256:1bcabac8bc3c36c7fb7b98a76c8abb18e0f841a3ba81decac7691008592499c1", size = 94672 }, + { url = "https://files.pythonhosted.org/packages/7f/ed/e3705d6d02b4f7aea715a353c8ce193efd0b5db13e204df895d38734c244/isort-7.0.0-py3-none-any.whl", hash = "sha256:1bcabac8bc3c36c7fb7b98a76c8abb18e0f841a3ba81decac7691008592499c1", size = 94672, upload_time = "2025-10-11T13:30:57.665Z" }, ] [[package]] @@ -1116,27 +1164,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markupsafe" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload_time = "2025-03-05T20:05:02.478Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload_time = "2025-03-05T20:05:00.369Z" }, ] [[package]] name = "jmespath" version = "1.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload_time = "2022-06-17T18:00:12.224Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 }, + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload_time = "2022-06-17T18:00:10.251Z" }, ] [[package]] name = "joblib" version = "1.5.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603 } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload_time = "2025-12-15T08:41:46.427Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071 }, + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload_time = "2025-12-15T08:41:44.973Z" }, ] [[package]] @@ -1146,9 +1194,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/35/87/bcda8e46c88d0e34cad2f09ee2d0c7f5957bccdb9791b0b934ec84d84be4/jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", size = 11359 } +sdist = { url = "https://files.pythonhosted.org/packages/35/87/bcda8e46c88d0e34cad2f09ee2d0c7f5957bccdb9791b0b934ec84d84be4/jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", size = 11359, upload_time = "2023-09-01T12:34:44.187Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701 }, + { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload_time = "2023-09-01T12:34:42.563Z" }, ] [[package]] @@ -1158,9 +1206,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpointer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/42/78/18813351fe5d63acad16aec57f94ec2b70a09e53ca98145589e185423873/jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c", size = 21699 } +sdist = { url = "https://files.pythonhosted.org/packages/42/78/18813351fe5d63acad16aec57f94ec2b70a09e53ca98145589e185423873/jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c", size = 21699, upload_time = "2023-06-26T12:07:29.144Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade", size = 12898 }, + { url = "https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade", size = 12898, upload_time = "2023-06-16T21:01:28.466Z" }, ] [[package]] @@ -1170,27 +1218,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "ply" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6d/86/08646239a313f895186ff0a4573452038eed8c86f54380b3ebac34d32fb2/jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c", size = 37838 } +sdist = { url = "https://files.pythonhosted.org/packages/6d/86/08646239a313f895186ff0a4573452038eed8c86f54380b3ebac34d32fb2/jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c", size = 37838, upload_time = "2024-10-11T15:41:42.404Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/35/5a/73ecb3d82f8615f32ccdadeb9356726d6cae3a4bbc840b437ceb95708063/jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6", size = 30105 }, + { url = "https://files.pythonhosted.org/packages/35/5a/73ecb3d82f8615f32ccdadeb9356726d6cae3a4bbc840b437ceb95708063/jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6", size = 30105, upload_time = "2024-11-20T17:58:30.418Z" }, ] [[package]] name = "jsonpointer" version = "3.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6a/0a/eebeb1fa92507ea94016a2a790b93c2ae41a7e18778f85471dc54475ed25/jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef", size = 9114 } +sdist = { url = "https://files.pythonhosted.org/packages/6a/0a/eebeb1fa92507ea94016a2a790b93c2ae41a7e18778f85471dc54475ed25/jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef", size = 9114, upload_time = "2024-06-10T19:24:42.462Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595 }, + { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload_time = "2024-06-10T19:24:40.698Z" }, ] [[package]] name = "jsonref" version = "1.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814 } +sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload_time = "2023-01-16T16:10:04.455Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425 }, + { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload_time = "2023-01-16T16:10:02.255Z" }, ] [[package]] @@ -1203,9 +1251,9 @@ dependencies = [ { name = "referencing" }, { name = "rpds-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583 } +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload_time = "2026-01-07T13:41:07.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630 }, + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload_time = "2026-01-07T13:41:05.306Z" }, ] [[package]] @@ -1215,9 +1263,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "referencing" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855 } +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload_time = "2025-09-08T01:34:59.186Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437 }, + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload_time = "2025-09-08T01:34:57.871Z" }, ] [[package]] @@ -1227,9 +1275,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "uc-micro-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2a/ae/bb56c6828e4797ba5a4821eec7c43b8bf40f69cda4d4f5f8c8a2810ec96a/linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048", size = 27946 } +sdist = { url = "https://files.pythonhosted.org/packages/2a/ae/bb56c6828e4797ba5a4821eec7c43b8bf40f69cda4d4f5f8c8a2810ec96a/linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048", size = 27946, upload_time = "2024-02-04T14:48:04.179Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820 }, + { url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820, upload_time = "2024-02-04T14:48:02.496Z" }, ] [[package]] @@ -1239,9 +1287,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markupsafe" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474 } +sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload_time = "2025-04-10T12:44:31.16Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509 }, + { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload_time = "2025-04-10T12:50:53.297Z" }, ] [[package]] @@ -1251,9 +1299,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070 } +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload_time = "2025-08-11T12:57:52.854Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321 }, + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload_time = "2025-08-11T12:57:51.923Z" }, ] [package.optional-dependencies] @@ -1265,63 +1313,63 @@ linkify = [ name = "markupsafe" version = "3.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 }, - { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 }, - { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 }, - { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 }, - { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 }, - { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 }, - { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 }, - { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 }, - { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 }, - { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 }, - { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 }, - { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622 }, - { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029 }, - { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374 }, - { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980 }, - { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990 }, - { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784 }, - { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588 }, - { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041 }, - { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543 }, - { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113 }, - { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911 }, - { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658 }, - { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066 }, - { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639 }, - { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569 }, - { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284 }, - { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801 }, - { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769 }, - { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642 }, - { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612 }, - { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200 }, - { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973 }, - { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619 }, - { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029 }, - { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408 }, - { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005 }, - { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048 }, - { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821 }, - { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606 }, - { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043 }, - { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747 }, - { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341 }, - { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073 }, - { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661 }, - { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069 }, - { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670 }, - { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598 }, - { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261 }, - { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835 }, - { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733 }, - { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672 }, - { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819 }, - { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426 }, - { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146 }, +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload_time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload_time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload_time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload_time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload_time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload_time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload_time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload_time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload_time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload_time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload_time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload_time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload_time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload_time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload_time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload_time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload_time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload_time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload_time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload_time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload_time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload_time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload_time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload_time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload_time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload_time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload_time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload_time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload_time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload_time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload_time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload_time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload_time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload_time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload_time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload_time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload_time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload_time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload_time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload_time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload_time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload_time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload_time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload_time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload_time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload_time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload_time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload_time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload_time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload_time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload_time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload_time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload_time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload_time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload_time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload_time = "2025-09-27T18:37:28.327Z" }, ] [[package]] @@ -1331,215 +1379,215 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655 } +sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload_time = "2025-08-11T07:25:49.083Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205 }, + { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload_time = "2025-08-11T07:25:47.597Z" }, ] [[package]] name = "mdurl" version = "0.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload_time = "2022-08-14T12:40:10.846Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload_time = "2022-08-14T12:40:09.779Z" }, ] [[package]] name = "mmh3" version = "5.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a7/af/f28c2c2f51f31abb4725f9a64bc7863d5f491f6539bd26aee2a1d21a649e/mmh3-5.2.0.tar.gz", hash = "sha256:1efc8fec8478e9243a78bb993422cf79f8ff85cb4cf6b79647480a31e0d950a8", size = 33582 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/6a/d5aa7edb5c08e0bd24286c7d08341a0446f9a2fbbb97d96a8a6dd81935ee/mmh3-5.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:384eda9361a7bf83a85e09447e1feafe081034af9dd428893701b959230d84be", size = 56141 }, - { url = "https://files.pythonhosted.org/packages/08/49/131d0fae6447bc4a7299ebdb1a6fb9d08c9f8dcf97d75ea93e8152ddf7ab/mmh3-5.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2c9da0d568569cc87315cb063486d761e38458b8ad513fedd3dc9263e1b81bcd", size = 40681 }, - { url = "https://files.pythonhosted.org/packages/8f/6f/9221445a6bcc962b7f5ff3ba18ad55bba624bacdc7aa3fc0a518db7da8ec/mmh3-5.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86d1be5d63232e6eb93c50881aea55ff06eb86d8e08f9b5417c8c9b10db9db96", size = 40062 }, - { url = "https://files.pythonhosted.org/packages/1e/d4/6bb2d0fef81401e0bb4c297d1eb568b767de4ce6fc00890bc14d7b51ecc4/mmh3-5.2.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bf7bee43e17e81671c447e9c83499f53d99bf440bc6d9dc26a841e21acfbe094", size = 97333 }, - { url = "https://files.pythonhosted.org/packages/44/e0/ccf0daff8134efbb4fbc10a945ab53302e358c4b016ada9bf97a6bdd50c1/mmh3-5.2.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7aa18cdb58983ee660c9c400b46272e14fa253c675ed963d3812487f8ca42037", size = 103310 }, - { url = "https://files.pythonhosted.org/packages/02/63/1965cb08a46533faca0e420e06aff8bbaf9690a6f0ac6ae6e5b2e4544687/mmh3-5.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae9d032488fcec32d22be6542d1a836f00247f40f320844dbb361393b5b22773", size = 106178 }, - { url = "https://files.pythonhosted.org/packages/c2/41/c883ad8e2c234013f27f92061200afc11554ea55edd1bcf5e1accd803a85/mmh3-5.2.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1861fb6b1d0453ed7293200139c0a9011eeb1376632e048e3766945b13313c5", size = 113035 }, - { url = "https://files.pythonhosted.org/packages/df/b5/1ccade8b1fa625d634a18bab7bf08a87457e09d5ec8cf83ca07cbea9d400/mmh3-5.2.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:99bb6a4d809aa4e528ddfe2c85dd5239b78b9dd14be62cca0329db78505e7b50", size = 120784 }, - { url = "https://files.pythonhosted.org/packages/77/1c/919d9171fcbdcdab242e06394464ccf546f7d0f3b31e0d1e3a630398782e/mmh3-5.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1f8d8b627799f4e2fcc7c034fed8f5f24dc7724ff52f69838a3d6d15f1ad4765", size = 99137 }, - { url = "https://files.pythonhosted.org/packages/66/8a/1eebef5bd6633d36281d9fc83cf2e9ba1ba0e1a77dff92aacab83001cee4/mmh3-5.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b5995088dd7023d2d9f310a0c67de5a2b2e06a570ecfd00f9ff4ab94a67cde43", size = 98664 }, - { url = "https://files.pythonhosted.org/packages/13/41/a5d981563e2ee682b21fb65e29cc0f517a6734a02b581359edd67f9d0360/mmh3-5.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1a5f4d2e59d6bba8ef01b013c472741835ad961e7c28f50c82b27c57748744a4", size = 106459 }, - { url = "https://files.pythonhosted.org/packages/24/31/342494cd6ab792d81e083680875a2c50fa0c5df475ebf0b67784f13e4647/mmh3-5.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fd6e6c3d90660d085f7e73710eab6f5545d4854b81b0135a3526e797009dbda3", size = 110038 }, - { url = "https://files.pythonhosted.org/packages/28/44/efda282170a46bb4f19c3e2b90536513b1d821c414c28469a227ca5a1789/mmh3-5.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c4a2f3d83879e3de2eb8cbf562e71563a8ed15ee9b9c2e77ca5d9f73072ac15c", size = 97545 }, - { url = "https://files.pythonhosted.org/packages/68/8f/534ae319c6e05d714f437e7206f78c17e66daca88164dff70286b0e8ea0c/mmh3-5.2.0-cp312-cp312-win32.whl", hash = "sha256:2421b9d665a0b1ad724ec7332fb5a98d075f50bc51a6ff854f3a1882bd650d49", size = 40805 }, - { url = "https://files.pythonhosted.org/packages/b8/f6/f6abdcfefcedab3c964868048cfe472764ed358c2bf6819a70dd4ed4ed3a/mmh3-5.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d80005b7634a3a2220f81fbeb94775ebd12794623bb2e1451701ea732b4aa3", size = 41597 }, - { url = "https://files.pythonhosted.org/packages/15/fd/f7420e8cbce45c259c770cac5718badf907b302d3a99ec587ba5ce030237/mmh3-5.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:3d6bfd9662a20c054bc216f861fa330c2dac7c81e7fb8307b5e32ab5b9b4d2e0", size = 39350 }, - { url = "https://files.pythonhosted.org/packages/d8/fa/27f6ab93995ef6ad9f940e96593c5dd24744d61a7389532b0fec03745607/mmh3-5.2.0-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:e79c00eba78f7258e5b354eccd4d7907d60317ced924ea4a5f2e9d83f5453065", size = 40874 }, - { url = "https://files.pythonhosted.org/packages/11/9c/03d13bcb6a03438bc8cac3d2e50f80908d159b31a4367c2e1a7a077ded32/mmh3-5.2.0-cp313-cp313-android_21_x86_64.whl", hash = "sha256:956127e663d05edbeec54df38885d943dfa27406594c411139690485128525de", size = 42012 }, - { url = "https://files.pythonhosted.org/packages/4e/78/0865d9765408a7d504f1789944e678f74e0888b96a766d578cb80b040999/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:c3dca4cb5b946ee91b3d6bb700d137b1cd85c20827f89fdf9c16258253489044", size = 39197 }, - { url = "https://files.pythonhosted.org/packages/3e/12/76c3207bd186f98b908b6706c2317abb73756d23a4e68ea2bc94825b9015/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e651e17bfde5840e9e4174b01e9e080ce49277b70d424308b36a7969d0d1af73", size = 39840 }, - { url = "https://files.pythonhosted.org/packages/5d/0d/574b6cce5555c9f2b31ea189ad44986755eb14e8862db28c8b834b8b64dc/mmh3-5.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:9f64bf06f4bf623325fda3a6d02d36cd69199b9ace99b04bb2d7fd9f89688504", size = 40644 }, - { url = "https://files.pythonhosted.org/packages/52/82/3731f8640b79c46707f53ed72034a58baad400be908c87b0088f1f89f986/mmh3-5.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ddc63328889bcaee77b743309e5c7d2d52cee0d7d577837c91b6e7cc9e755e0b", size = 56153 }, - { url = "https://files.pythonhosted.org/packages/4f/34/e02dca1d4727fd9fdeaff9e2ad6983e1552804ce1d92cc796e5b052159bb/mmh3-5.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bb0fdc451fb6d86d81ab8f23d881b8d6e37fc373a2deae1c02d27002d2ad7a05", size = 40684 }, - { url = "https://files.pythonhosted.org/packages/8f/36/3dee40767356e104967e6ed6d102ba47b0b1ce2a89432239b95a94de1b89/mmh3-5.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b29044e1ffdb84fe164d0a7ea05c7316afea93c00f8ed9449cf357c36fc4f814", size = 40057 }, - { url = "https://files.pythonhosted.org/packages/31/58/228c402fccf76eb39a0a01b8fc470fecf21965584e66453b477050ee0e99/mmh3-5.2.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:58981d6ea9646dbbf9e59a30890cbf9f610df0e4a57dbfe09215116fd90b0093", size = 97344 }, - { url = "https://files.pythonhosted.org/packages/34/82/fc5ce89006389a6426ef28e326fc065b0fbaaed230373b62d14c889f47ea/mmh3-5.2.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e5634565367b6d98dc4aa2983703526ef556b3688ba3065edb4b9b90ede1c54", size = 103325 }, - { url = "https://files.pythonhosted.org/packages/09/8c/261e85777c6aee1ebd53f2f17e210e7481d5b0846cd0b4a5c45f1e3761b8/mmh3-5.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0271ac12415afd3171ab9a3c7cbfc71dee2c68760a7dc9d05bf8ed6ddfa3a7a", size = 106240 }, - { url = "https://files.pythonhosted.org/packages/70/73/2f76b3ad8a3d431824e9934403df36c0ddacc7831acf82114bce3c4309c8/mmh3-5.2.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:45b590e31bc552c6f8e2150ff1ad0c28dd151e9f87589e7eaf508fbdd8e8e908", size = 113060 }, - { url = "https://files.pythonhosted.org/packages/9f/b9/7ea61a34e90e50a79a9d87aa1c0b8139a7eaf4125782b34b7d7383472633/mmh3-5.2.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bdde97310d59604f2a9119322f61b31546748499a21b44f6715e8ced9308a6c5", size = 120781 }, - { url = "https://files.pythonhosted.org/packages/0f/5b/ae1a717db98c7894a37aeedbd94b3f99e6472a836488f36b6849d003485b/mmh3-5.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fc9c5f280438cf1c1a8f9abb87dc8ce9630a964120cfb5dd50d1e7ce79690c7a", size = 99174 }, - { url = "https://files.pythonhosted.org/packages/e3/de/000cce1d799fceebb6d4487ae29175dd8e81b48e314cba7b4da90bcf55d7/mmh3-5.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c903e71fd8debb35ad2a4184c1316b3cb22f64ce517b4e6747f25b0a34e41266", size = 98734 }, - { url = "https://files.pythonhosted.org/packages/79/19/0dc364391a792b72fbb22becfdeacc5add85cc043cd16986e82152141883/mmh3-5.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:eed4bba7ff8a0d37106ba931ab03bdd3915fbb025bcf4e1f0aa02bc8114960c5", size = 106493 }, - { url = "https://files.pythonhosted.org/packages/3c/b1/bc8c28e4d6e807bbb051fefe78e1156d7f104b89948742ad310612ce240d/mmh3-5.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1fdb36b940e9261aff0b5177c5b74a36936b902f473180f6c15bde26143681a9", size = 110089 }, - { url = "https://files.pythonhosted.org/packages/3b/a2/d20f3f5c95e9c511806686c70d0a15479cc3941c5f322061697af1c1ff70/mmh3-5.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7303aab41e97adcf010a09efd8f1403e719e59b7705d5e3cfed3dd7571589290", size = 97571 }, - { url = "https://files.pythonhosted.org/packages/7b/23/665296fce4f33488deec39a750ffd245cfc07aafb0e3ef37835f91775d14/mmh3-5.2.0-cp313-cp313-win32.whl", hash = "sha256:03e08c6ebaf666ec1e3d6ea657a2d363bb01effd1a9acfe41f9197decaef0051", size = 40806 }, - { url = "https://files.pythonhosted.org/packages/59/b0/92e7103f3b20646e255b699e2d0327ce53a3f250e44367a99dc8be0b7c7a/mmh3-5.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:7fddccd4113e7b736706e17a239a696332360cbaddf25ae75b57ba1acce65081", size = 41600 }, - { url = "https://files.pythonhosted.org/packages/99/22/0b2bd679a84574647de538c5b07ccaa435dbccc37815067fe15b90fe8dad/mmh3-5.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:fa0c966ee727aad5406d516375593c5f058c766b21236ab8985693934bb5085b", size = 39349 }, - { url = "https://files.pythonhosted.org/packages/f7/ca/a20db059a8a47048aaf550da14a145b56e9c7386fb8280d3ce2962dcebf7/mmh3-5.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e5015f0bb6eb50008bed2d4b1ce0f2a294698a926111e4bb202c0987b4f89078", size = 39209 }, - { url = "https://files.pythonhosted.org/packages/98/dd/e5094799d55c7482d814b979a0fd608027d0af1b274bfb4c3ea3e950bfd5/mmh3-5.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e0f3ed828d709f5b82d8bfe14f8856120718ec4bd44a5b26102c3030a1e12501", size = 39843 }, - { url = "https://files.pythonhosted.org/packages/f4/6b/7844d7f832c85400e7cc89a1348e4e1fdd38c5a38415bb5726bbb8fcdb6c/mmh3-5.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:f35727c5118aba95f0397e18a1a5b8405425581bfe53e821f0fb444cbdc2bc9b", size = 40648 }, - { url = "https://files.pythonhosted.org/packages/1f/bf/71f791f48a21ff3190ba5225807cbe4f7223360e96862c376e6e3fb7efa7/mmh3-5.2.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bc244802ccab5220008cb712ca1508cb6a12f0eb64ad62997156410579a1770", size = 56164 }, - { url = "https://files.pythonhosted.org/packages/70/1f/f87e3d34d83032b4f3f0f528c6d95a98290fcacf019da61343a49dccfd51/mmh3-5.2.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ff3d50dc3fe8a98059f99b445dfb62792b5d006c5e0b8f03c6de2813b8376110", size = 40692 }, - { url = "https://files.pythonhosted.org/packages/a6/e2/db849eaed07117086f3452feca8c839d30d38b830ac59fe1ce65af8be5ad/mmh3-5.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:37a358cc881fe796e099c1db6ce07ff757f088827b4e8467ac52b7a7ffdca647", size = 40068 }, - { url = "https://files.pythonhosted.org/packages/df/6b/209af927207af77425b044e32f77f49105a0b05d82ff88af6971d8da4e19/mmh3-5.2.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b9a87025121d1c448f24f27ff53a5fe7b6ef980574b4a4f11acaabe702420d63", size = 97367 }, - { url = "https://files.pythonhosted.org/packages/ca/e0/78adf4104c425606a9ce33fb351f790c76a6c2314969c4a517d1ffc92196/mmh3-5.2.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ba55d6ca32eeef8b2625e1e4bfc3b3db52bc63014bd7e5df8cc11bf2b036b12", size = 103306 }, - { url = "https://files.pythonhosted.org/packages/a3/79/c2b89f91b962658b890104745b1b6c9ce38d50a889f000b469b91eeb1b9e/mmh3-5.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9ff37ba9f15637e424c2ab57a1a590c52897c845b768e4e0a4958084ec87f22", size = 106312 }, - { url = "https://files.pythonhosted.org/packages/4b/14/659d4095528b1a209be90934778c5ffe312177d51e365ddcbca2cac2ec7c/mmh3-5.2.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a094319ec0db52a04af9fdc391b4d39a1bc72bc8424b47c4411afb05413a44b5", size = 113135 }, - { url = "https://files.pythonhosted.org/packages/8d/6f/cd7734a779389a8a467b5c89a48ff476d6f2576e78216a37551a97e9e42a/mmh3-5.2.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c5584061fd3da584659b13587f26c6cad25a096246a481636d64375d0c1f6c07", size = 120775 }, - { url = "https://files.pythonhosted.org/packages/1d/ca/8256e3b96944408940de3f9291d7e38a283b5761fe9614d4808fcf27bd62/mmh3-5.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecbfc0437ddfdced5e7822d1ce4855c9c64f46819d0fdc4482c53f56c707b935", size = 99178 }, - { url = "https://files.pythonhosted.org/packages/8a/32/39e2b3cf06b6e2eb042c984dab8680841ac2a0d3ca6e0bea30db1f27b565/mmh3-5.2.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7b986d506a8e8ea345791897ba5d8ba0d9d8820cd4fc3e52dbe6de19388de2e7", size = 98738 }, - { url = "https://files.pythonhosted.org/packages/61/d3/7bbc8e0e8cf65ebbe1b893ffa0467b7ecd1bd07c3bbf6c9db4308ada22ec/mmh3-5.2.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:38d899a156549da8ef6a9f1d6f7ef231228d29f8f69bce2ee12f5fba6d6fd7c5", size = 106510 }, - { url = "https://files.pythonhosted.org/packages/10/99/b97e53724b52374e2f3859046f0eb2425192da356cb19784d64bc17bb1cf/mmh3-5.2.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d86651fa45799530885ba4dab3d21144486ed15285e8784181a0ab37a4552384", size = 110053 }, - { url = "https://files.pythonhosted.org/packages/ac/62/3688c7d975ed195155671df68788c83fed6f7909b6ec4951724c6860cb97/mmh3-5.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c463d7c1c4cfc9d751efeaadd936bbba07b5b0ed81a012b3a9f5a12f0872bd6e", size = 97546 }, - { url = "https://files.pythonhosted.org/packages/ca/3b/c6153250f03f71a8b7634cded82939546cdfba02e32f124ff51d52c6f991/mmh3-5.2.0-cp314-cp314-win32.whl", hash = "sha256:bb4fe46bdc6104fbc28db7a6bacb115ee6368ff993366bbd8a2a7f0076e6f0c0", size = 41422 }, - { url = "https://files.pythonhosted.org/packages/74/01/a27d98bab083a435c4c07e9d1d720d4c8a578bf4c270bae373760b1022be/mmh3-5.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:7c7f0b342fd06044bedd0b6e72177ddc0076f54fd89ee239447f8b271d919d9b", size = 42135 }, - { url = "https://files.pythonhosted.org/packages/cb/c9/dbba5507e95429b8b380e2ba091eff5c20a70a59560934dff0ad8392b8c8/mmh3-5.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:3193752fc05ea72366c2b63ff24b9a190f422e32d75fdeae71087c08fff26115", size = 39879 }, - { url = "https://files.pythonhosted.org/packages/b5/d1/c8c0ef839c17258b9de41b84f663574fabcf8ac2007b7416575e0f65ff6e/mmh3-5.2.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:69fc339d7202bea69ef9bd7c39bfdf9fdabc8e6822a01eba62fb43233c1b3932", size = 57696 }, - { url = "https://files.pythonhosted.org/packages/2f/55/95e2b9ff201e89f9fe37036037ab61a6c941942b25cdb7b6a9df9b931993/mmh3-5.2.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:12da42c0a55c9d86ab566395324213c319c73ecb0c239fad4726324212b9441c", size = 41421 }, - { url = "https://files.pythonhosted.org/packages/77/79/9be23ad0b7001a4b22752e7693be232428ecc0a35068a4ff5c2f14ef8b20/mmh3-5.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f7f9034c7cf05ddfaac8d7a2e63a3c97a840d4615d0a0e65ba8bdf6f8576e3be", size = 40853 }, - { url = "https://files.pythonhosted.org/packages/ac/1b/96b32058eda1c1dee8264900c37c359a7325c1f11f5ff14fd2be8e24eff9/mmh3-5.2.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:11730eeb16dfcf9674fdea9bb6b8e6dd9b40813b7eb839bc35113649eef38aeb", size = 109694 }, - { url = "https://files.pythonhosted.org/packages/8d/6f/a2ae44cd7dad697b6dea48390cbc977b1e5ca58fda09628cbcb2275af064/mmh3-5.2.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:932a6eec1d2e2c3c9e630d10f7128d80e70e2d47fe6b8c7ea5e1afbd98733e65", size = 117438 }, - { url = "https://files.pythonhosted.org/packages/a0/08/bfb75451c83f05224a28afeaf3950c7b793c0b71440d571f8e819cfb149a/mmh3-5.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ca975c51c5028947bbcfc24966517aac06a01d6c921e30f7c5383c195f87991", size = 120409 }, - { url = "https://files.pythonhosted.org/packages/9f/ea/8b118b69b2ff8df568f742387d1a159bc654a0f78741b31437dd047ea28e/mmh3-5.2.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5b0b58215befe0f0e120b828f7645e97719bbba9f23b69e268ed0ac7adde8645", size = 125909 }, - { url = "https://files.pythonhosted.org/packages/3e/11/168cc0b6a30650032e351a3b89b8a47382da541993a03af91e1ba2501234/mmh3-5.2.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29c2b9ce61886809d0492a274a5a53047742dea0f703f9c4d5d223c3ea6377d3", size = 135331 }, - { url = "https://files.pythonhosted.org/packages/31/05/e3a9849b1c18a7934c64e831492c99e67daebe84a8c2f2c39a7096a830e3/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a367d4741ac0103f8198c82f429bccb9359f543ca542b06a51f4f0332e8de279", size = 110085 }, - { url = "https://files.pythonhosted.org/packages/d9/d5/a96bcc306e3404601418b2a9a370baec92af84204528ba659fdfe34c242f/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5a5dba98e514fb26241868f6eb90a7f7ca0e039aed779342965ce24ea32ba513", size = 111195 }, - { url = "https://files.pythonhosted.org/packages/af/29/0fd49801fec5bff37198684e0849b58e0dab3a2a68382a357cfffb0fafc3/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:941603bfd75a46023807511c1ac2f1b0f39cccc393c15039969806063b27e6db", size = 116919 }, - { url = "https://files.pythonhosted.org/packages/2d/04/4f3c32b0a2ed762edca45d8b46568fc3668e34f00fb1e0a3b5451ec1281c/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:132dd943451a7c7546978863d2f5a64977928410782e1a87d583cb60eb89e667", size = 123160 }, - { url = "https://files.pythonhosted.org/packages/91/76/3d29eaa38821730633d6a240d36fa8ad2807e9dfd432c12e1a472ed211eb/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f698733a8a494466432d611a8f0d1e026f5286dee051beea4b3c3146817e35d5", size = 110206 }, - { url = "https://files.pythonhosted.org/packages/44/1c/ccf35892684d3a408202e296e56843743e0b4fb1629e59432ea88cdb3909/mmh3-5.2.0-cp314-cp314t-win32.whl", hash = "sha256:6d541038b3fc360ec538fc116de87462627944765a6750308118f8b509a8eec7", size = 41970 }, - { url = "https://files.pythonhosted.org/packages/75/b2/b9e4f1e5adb5e21eb104588fcee2cd1eaa8308255173481427d5ecc4284e/mmh3-5.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e912b19cf2378f2967d0c08e86ff4c6c360129887f678e27e4dde970d21b3f4d", size = 43063 }, - { url = "https://files.pythonhosted.org/packages/6a/fc/0e61d9a4e29c8679356795a40e48f647b4aad58d71bfc969f0f8f56fb912/mmh3-5.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:e7884931fe5e788163e7b3c511614130c2c59feffdc21112290a194487efb2e9", size = 40455 }, +sdist = { url = "https://files.pythonhosted.org/packages/a7/af/f28c2c2f51f31abb4725f9a64bc7863d5f491f6539bd26aee2a1d21a649e/mmh3-5.2.0.tar.gz", hash = "sha256:1efc8fec8478e9243a78bb993422cf79f8ff85cb4cf6b79647480a31e0d950a8", size = 33582, upload_time = "2025-07-29T07:43:48.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/6a/d5aa7edb5c08e0bd24286c7d08341a0446f9a2fbbb97d96a8a6dd81935ee/mmh3-5.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:384eda9361a7bf83a85e09447e1feafe081034af9dd428893701b959230d84be", size = 56141, upload_time = "2025-07-29T07:42:13.456Z" }, + { url = "https://files.pythonhosted.org/packages/08/49/131d0fae6447bc4a7299ebdb1a6fb9d08c9f8dcf97d75ea93e8152ddf7ab/mmh3-5.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2c9da0d568569cc87315cb063486d761e38458b8ad513fedd3dc9263e1b81bcd", size = 40681, upload_time = "2025-07-29T07:42:14.306Z" }, + { url = "https://files.pythonhosted.org/packages/8f/6f/9221445a6bcc962b7f5ff3ba18ad55bba624bacdc7aa3fc0a518db7da8ec/mmh3-5.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86d1be5d63232e6eb93c50881aea55ff06eb86d8e08f9b5417c8c9b10db9db96", size = 40062, upload_time = "2025-07-29T07:42:15.08Z" }, + { url = "https://files.pythonhosted.org/packages/1e/d4/6bb2d0fef81401e0bb4c297d1eb568b767de4ce6fc00890bc14d7b51ecc4/mmh3-5.2.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bf7bee43e17e81671c447e9c83499f53d99bf440bc6d9dc26a841e21acfbe094", size = 97333, upload_time = "2025-07-29T07:42:16.436Z" }, + { url = "https://files.pythonhosted.org/packages/44/e0/ccf0daff8134efbb4fbc10a945ab53302e358c4b016ada9bf97a6bdd50c1/mmh3-5.2.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7aa18cdb58983ee660c9c400b46272e14fa253c675ed963d3812487f8ca42037", size = 103310, upload_time = "2025-07-29T07:42:17.796Z" }, + { url = "https://files.pythonhosted.org/packages/02/63/1965cb08a46533faca0e420e06aff8bbaf9690a6f0ac6ae6e5b2e4544687/mmh3-5.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae9d032488fcec32d22be6542d1a836f00247f40f320844dbb361393b5b22773", size = 106178, upload_time = "2025-07-29T07:42:19.281Z" }, + { url = "https://files.pythonhosted.org/packages/c2/41/c883ad8e2c234013f27f92061200afc11554ea55edd1bcf5e1accd803a85/mmh3-5.2.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1861fb6b1d0453ed7293200139c0a9011eeb1376632e048e3766945b13313c5", size = 113035, upload_time = "2025-07-29T07:42:20.356Z" }, + { url = "https://files.pythonhosted.org/packages/df/b5/1ccade8b1fa625d634a18bab7bf08a87457e09d5ec8cf83ca07cbea9d400/mmh3-5.2.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:99bb6a4d809aa4e528ddfe2c85dd5239b78b9dd14be62cca0329db78505e7b50", size = 120784, upload_time = "2025-07-29T07:42:21.377Z" }, + { url = "https://files.pythonhosted.org/packages/77/1c/919d9171fcbdcdab242e06394464ccf546f7d0f3b31e0d1e3a630398782e/mmh3-5.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1f8d8b627799f4e2fcc7c034fed8f5f24dc7724ff52f69838a3d6d15f1ad4765", size = 99137, upload_time = "2025-07-29T07:42:22.344Z" }, + { url = "https://files.pythonhosted.org/packages/66/8a/1eebef5bd6633d36281d9fc83cf2e9ba1ba0e1a77dff92aacab83001cee4/mmh3-5.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b5995088dd7023d2d9f310a0c67de5a2b2e06a570ecfd00f9ff4ab94a67cde43", size = 98664, upload_time = "2025-07-29T07:42:23.269Z" }, + { url = "https://files.pythonhosted.org/packages/13/41/a5d981563e2ee682b21fb65e29cc0f517a6734a02b581359edd67f9d0360/mmh3-5.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1a5f4d2e59d6bba8ef01b013c472741835ad961e7c28f50c82b27c57748744a4", size = 106459, upload_time = "2025-07-29T07:42:24.238Z" }, + { url = "https://files.pythonhosted.org/packages/24/31/342494cd6ab792d81e083680875a2c50fa0c5df475ebf0b67784f13e4647/mmh3-5.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fd6e6c3d90660d085f7e73710eab6f5545d4854b81b0135a3526e797009dbda3", size = 110038, upload_time = "2025-07-29T07:42:25.629Z" }, + { url = "https://files.pythonhosted.org/packages/28/44/efda282170a46bb4f19c3e2b90536513b1d821c414c28469a227ca5a1789/mmh3-5.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c4a2f3d83879e3de2eb8cbf562e71563a8ed15ee9b9c2e77ca5d9f73072ac15c", size = 97545, upload_time = "2025-07-29T07:42:27.04Z" }, + { url = "https://files.pythonhosted.org/packages/68/8f/534ae319c6e05d714f437e7206f78c17e66daca88164dff70286b0e8ea0c/mmh3-5.2.0-cp312-cp312-win32.whl", hash = "sha256:2421b9d665a0b1ad724ec7332fb5a98d075f50bc51a6ff854f3a1882bd650d49", size = 40805, upload_time = "2025-07-29T07:42:28.032Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f6/f6abdcfefcedab3c964868048cfe472764ed358c2bf6819a70dd4ed4ed3a/mmh3-5.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d80005b7634a3a2220f81fbeb94775ebd12794623bb2e1451701ea732b4aa3", size = 41597, upload_time = "2025-07-29T07:42:28.894Z" }, + { url = "https://files.pythonhosted.org/packages/15/fd/f7420e8cbce45c259c770cac5718badf907b302d3a99ec587ba5ce030237/mmh3-5.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:3d6bfd9662a20c054bc216f861fa330c2dac7c81e7fb8307b5e32ab5b9b4d2e0", size = 39350, upload_time = "2025-07-29T07:42:29.794Z" }, + { url = "https://files.pythonhosted.org/packages/d8/fa/27f6ab93995ef6ad9f940e96593c5dd24744d61a7389532b0fec03745607/mmh3-5.2.0-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:e79c00eba78f7258e5b354eccd4d7907d60317ced924ea4a5f2e9d83f5453065", size = 40874, upload_time = "2025-07-29T07:42:30.662Z" }, + { url = "https://files.pythonhosted.org/packages/11/9c/03d13bcb6a03438bc8cac3d2e50f80908d159b31a4367c2e1a7a077ded32/mmh3-5.2.0-cp313-cp313-android_21_x86_64.whl", hash = "sha256:956127e663d05edbeec54df38885d943dfa27406594c411139690485128525de", size = 42012, upload_time = "2025-07-29T07:42:31.539Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/0865d9765408a7d504f1789944e678f74e0888b96a766d578cb80b040999/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:c3dca4cb5b946ee91b3d6bb700d137b1cd85c20827f89fdf9c16258253489044", size = 39197, upload_time = "2025-07-29T07:42:32.374Z" }, + { url = "https://files.pythonhosted.org/packages/3e/12/76c3207bd186f98b908b6706c2317abb73756d23a4e68ea2bc94825b9015/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e651e17bfde5840e9e4174b01e9e080ce49277b70d424308b36a7969d0d1af73", size = 39840, upload_time = "2025-07-29T07:42:33.227Z" }, + { url = "https://files.pythonhosted.org/packages/5d/0d/574b6cce5555c9f2b31ea189ad44986755eb14e8862db28c8b834b8b64dc/mmh3-5.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:9f64bf06f4bf623325fda3a6d02d36cd69199b9ace99b04bb2d7fd9f89688504", size = 40644, upload_time = "2025-07-29T07:42:34.099Z" }, + { url = "https://files.pythonhosted.org/packages/52/82/3731f8640b79c46707f53ed72034a58baad400be908c87b0088f1f89f986/mmh3-5.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ddc63328889bcaee77b743309e5c7d2d52cee0d7d577837c91b6e7cc9e755e0b", size = 56153, upload_time = "2025-07-29T07:42:35.031Z" }, + { url = "https://files.pythonhosted.org/packages/4f/34/e02dca1d4727fd9fdeaff9e2ad6983e1552804ce1d92cc796e5b052159bb/mmh3-5.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bb0fdc451fb6d86d81ab8f23d881b8d6e37fc373a2deae1c02d27002d2ad7a05", size = 40684, upload_time = "2025-07-29T07:42:35.914Z" }, + { url = "https://files.pythonhosted.org/packages/8f/36/3dee40767356e104967e6ed6d102ba47b0b1ce2a89432239b95a94de1b89/mmh3-5.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b29044e1ffdb84fe164d0a7ea05c7316afea93c00f8ed9449cf357c36fc4f814", size = 40057, upload_time = "2025-07-29T07:42:36.755Z" }, + { url = "https://files.pythonhosted.org/packages/31/58/228c402fccf76eb39a0a01b8fc470fecf21965584e66453b477050ee0e99/mmh3-5.2.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:58981d6ea9646dbbf9e59a30890cbf9f610df0e4a57dbfe09215116fd90b0093", size = 97344, upload_time = "2025-07-29T07:42:37.675Z" }, + { url = "https://files.pythonhosted.org/packages/34/82/fc5ce89006389a6426ef28e326fc065b0fbaaed230373b62d14c889f47ea/mmh3-5.2.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e5634565367b6d98dc4aa2983703526ef556b3688ba3065edb4b9b90ede1c54", size = 103325, upload_time = "2025-07-29T07:42:38.591Z" }, + { url = "https://files.pythonhosted.org/packages/09/8c/261e85777c6aee1ebd53f2f17e210e7481d5b0846cd0b4a5c45f1e3761b8/mmh3-5.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0271ac12415afd3171ab9a3c7cbfc71dee2c68760a7dc9d05bf8ed6ddfa3a7a", size = 106240, upload_time = "2025-07-29T07:42:39.563Z" }, + { url = "https://files.pythonhosted.org/packages/70/73/2f76b3ad8a3d431824e9934403df36c0ddacc7831acf82114bce3c4309c8/mmh3-5.2.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:45b590e31bc552c6f8e2150ff1ad0c28dd151e9f87589e7eaf508fbdd8e8e908", size = 113060, upload_time = "2025-07-29T07:42:40.585Z" }, + { url = "https://files.pythonhosted.org/packages/9f/b9/7ea61a34e90e50a79a9d87aa1c0b8139a7eaf4125782b34b7d7383472633/mmh3-5.2.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bdde97310d59604f2a9119322f61b31546748499a21b44f6715e8ced9308a6c5", size = 120781, upload_time = "2025-07-29T07:42:41.618Z" }, + { url = "https://files.pythonhosted.org/packages/0f/5b/ae1a717db98c7894a37aeedbd94b3f99e6472a836488f36b6849d003485b/mmh3-5.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fc9c5f280438cf1c1a8f9abb87dc8ce9630a964120cfb5dd50d1e7ce79690c7a", size = 99174, upload_time = "2025-07-29T07:42:42.587Z" }, + { url = "https://files.pythonhosted.org/packages/e3/de/000cce1d799fceebb6d4487ae29175dd8e81b48e314cba7b4da90bcf55d7/mmh3-5.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c903e71fd8debb35ad2a4184c1316b3cb22f64ce517b4e6747f25b0a34e41266", size = 98734, upload_time = "2025-07-29T07:42:43.996Z" }, + { url = "https://files.pythonhosted.org/packages/79/19/0dc364391a792b72fbb22becfdeacc5add85cc043cd16986e82152141883/mmh3-5.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:eed4bba7ff8a0d37106ba931ab03bdd3915fbb025bcf4e1f0aa02bc8114960c5", size = 106493, upload_time = "2025-07-29T07:42:45.07Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b1/bc8c28e4d6e807bbb051fefe78e1156d7f104b89948742ad310612ce240d/mmh3-5.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1fdb36b940e9261aff0b5177c5b74a36936b902f473180f6c15bde26143681a9", size = 110089, upload_time = "2025-07-29T07:42:46.122Z" }, + { url = "https://files.pythonhosted.org/packages/3b/a2/d20f3f5c95e9c511806686c70d0a15479cc3941c5f322061697af1c1ff70/mmh3-5.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7303aab41e97adcf010a09efd8f1403e719e59b7705d5e3cfed3dd7571589290", size = 97571, upload_time = "2025-07-29T07:42:47.18Z" }, + { url = "https://files.pythonhosted.org/packages/7b/23/665296fce4f33488deec39a750ffd245cfc07aafb0e3ef37835f91775d14/mmh3-5.2.0-cp313-cp313-win32.whl", hash = "sha256:03e08c6ebaf666ec1e3d6ea657a2d363bb01effd1a9acfe41f9197decaef0051", size = 40806, upload_time = "2025-07-29T07:42:48.166Z" }, + { url = "https://files.pythonhosted.org/packages/59/b0/92e7103f3b20646e255b699e2d0327ce53a3f250e44367a99dc8be0b7c7a/mmh3-5.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:7fddccd4113e7b736706e17a239a696332360cbaddf25ae75b57ba1acce65081", size = 41600, upload_time = "2025-07-29T07:42:49.371Z" }, + { url = "https://files.pythonhosted.org/packages/99/22/0b2bd679a84574647de538c5b07ccaa435dbccc37815067fe15b90fe8dad/mmh3-5.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:fa0c966ee727aad5406d516375593c5f058c766b21236ab8985693934bb5085b", size = 39349, upload_time = "2025-07-29T07:42:50.268Z" }, + { url = "https://files.pythonhosted.org/packages/f7/ca/a20db059a8a47048aaf550da14a145b56e9c7386fb8280d3ce2962dcebf7/mmh3-5.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e5015f0bb6eb50008bed2d4b1ce0f2a294698a926111e4bb202c0987b4f89078", size = 39209, upload_time = "2025-07-29T07:42:51.559Z" }, + { url = "https://files.pythonhosted.org/packages/98/dd/e5094799d55c7482d814b979a0fd608027d0af1b274bfb4c3ea3e950bfd5/mmh3-5.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e0f3ed828d709f5b82d8bfe14f8856120718ec4bd44a5b26102c3030a1e12501", size = 39843, upload_time = "2025-07-29T07:42:52.536Z" }, + { url = "https://files.pythonhosted.org/packages/f4/6b/7844d7f832c85400e7cc89a1348e4e1fdd38c5a38415bb5726bbb8fcdb6c/mmh3-5.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:f35727c5118aba95f0397e18a1a5b8405425581bfe53e821f0fb444cbdc2bc9b", size = 40648, upload_time = "2025-07-29T07:42:53.392Z" }, + { url = "https://files.pythonhosted.org/packages/1f/bf/71f791f48a21ff3190ba5225807cbe4f7223360e96862c376e6e3fb7efa7/mmh3-5.2.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bc244802ccab5220008cb712ca1508cb6a12f0eb64ad62997156410579a1770", size = 56164, upload_time = "2025-07-29T07:42:54.267Z" }, + { url = "https://files.pythonhosted.org/packages/70/1f/f87e3d34d83032b4f3f0f528c6d95a98290fcacf019da61343a49dccfd51/mmh3-5.2.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ff3d50dc3fe8a98059f99b445dfb62792b5d006c5e0b8f03c6de2813b8376110", size = 40692, upload_time = "2025-07-29T07:42:55.234Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e2/db849eaed07117086f3452feca8c839d30d38b830ac59fe1ce65af8be5ad/mmh3-5.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:37a358cc881fe796e099c1db6ce07ff757f088827b4e8467ac52b7a7ffdca647", size = 40068, upload_time = "2025-07-29T07:42:56.158Z" }, + { url = "https://files.pythonhosted.org/packages/df/6b/209af927207af77425b044e32f77f49105a0b05d82ff88af6971d8da4e19/mmh3-5.2.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b9a87025121d1c448f24f27ff53a5fe7b6ef980574b4a4f11acaabe702420d63", size = 97367, upload_time = "2025-07-29T07:42:57.037Z" }, + { url = "https://files.pythonhosted.org/packages/ca/e0/78adf4104c425606a9ce33fb351f790c76a6c2314969c4a517d1ffc92196/mmh3-5.2.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ba55d6ca32eeef8b2625e1e4bfc3b3db52bc63014bd7e5df8cc11bf2b036b12", size = 103306, upload_time = "2025-07-29T07:42:58.522Z" }, + { url = "https://files.pythonhosted.org/packages/a3/79/c2b89f91b962658b890104745b1b6c9ce38d50a889f000b469b91eeb1b9e/mmh3-5.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9ff37ba9f15637e424c2ab57a1a590c52897c845b768e4e0a4958084ec87f22", size = 106312, upload_time = "2025-07-29T07:42:59.552Z" }, + { url = "https://files.pythonhosted.org/packages/4b/14/659d4095528b1a209be90934778c5ffe312177d51e365ddcbca2cac2ec7c/mmh3-5.2.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a094319ec0db52a04af9fdc391b4d39a1bc72bc8424b47c4411afb05413a44b5", size = 113135, upload_time = "2025-07-29T07:43:00.745Z" }, + { url = "https://files.pythonhosted.org/packages/8d/6f/cd7734a779389a8a467b5c89a48ff476d6f2576e78216a37551a97e9e42a/mmh3-5.2.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c5584061fd3da584659b13587f26c6cad25a096246a481636d64375d0c1f6c07", size = 120775, upload_time = "2025-07-29T07:43:02.124Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ca/8256e3b96944408940de3f9291d7e38a283b5761fe9614d4808fcf27bd62/mmh3-5.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecbfc0437ddfdced5e7822d1ce4855c9c64f46819d0fdc4482c53f56c707b935", size = 99178, upload_time = "2025-07-29T07:43:03.182Z" }, + { url = "https://files.pythonhosted.org/packages/8a/32/39e2b3cf06b6e2eb042c984dab8680841ac2a0d3ca6e0bea30db1f27b565/mmh3-5.2.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7b986d506a8e8ea345791897ba5d8ba0d9d8820cd4fc3e52dbe6de19388de2e7", size = 98738, upload_time = "2025-07-29T07:43:04.207Z" }, + { url = "https://files.pythonhosted.org/packages/61/d3/7bbc8e0e8cf65ebbe1b893ffa0467b7ecd1bd07c3bbf6c9db4308ada22ec/mmh3-5.2.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:38d899a156549da8ef6a9f1d6f7ef231228d29f8f69bce2ee12f5fba6d6fd7c5", size = 106510, upload_time = "2025-07-29T07:43:05.656Z" }, + { url = "https://files.pythonhosted.org/packages/10/99/b97e53724b52374e2f3859046f0eb2425192da356cb19784d64bc17bb1cf/mmh3-5.2.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d86651fa45799530885ba4dab3d21144486ed15285e8784181a0ab37a4552384", size = 110053, upload_time = "2025-07-29T07:43:07.204Z" }, + { url = "https://files.pythonhosted.org/packages/ac/62/3688c7d975ed195155671df68788c83fed6f7909b6ec4951724c6860cb97/mmh3-5.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c463d7c1c4cfc9d751efeaadd936bbba07b5b0ed81a012b3a9f5a12f0872bd6e", size = 97546, upload_time = "2025-07-29T07:43:08.226Z" }, + { url = "https://files.pythonhosted.org/packages/ca/3b/c6153250f03f71a8b7634cded82939546cdfba02e32f124ff51d52c6f991/mmh3-5.2.0-cp314-cp314-win32.whl", hash = "sha256:bb4fe46bdc6104fbc28db7a6bacb115ee6368ff993366bbd8a2a7f0076e6f0c0", size = 41422, upload_time = "2025-07-29T07:43:09.216Z" }, + { url = "https://files.pythonhosted.org/packages/74/01/a27d98bab083a435c4c07e9d1d720d4c8a578bf4c270bae373760b1022be/mmh3-5.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:7c7f0b342fd06044bedd0b6e72177ddc0076f54fd89ee239447f8b271d919d9b", size = 42135, upload_time = "2025-07-29T07:43:10.183Z" }, + { url = "https://files.pythonhosted.org/packages/cb/c9/dbba5507e95429b8b380e2ba091eff5c20a70a59560934dff0ad8392b8c8/mmh3-5.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:3193752fc05ea72366c2b63ff24b9a190f422e32d75fdeae71087c08fff26115", size = 39879, upload_time = "2025-07-29T07:43:11.106Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d1/c8c0ef839c17258b9de41b84f663574fabcf8ac2007b7416575e0f65ff6e/mmh3-5.2.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:69fc339d7202bea69ef9bd7c39bfdf9fdabc8e6822a01eba62fb43233c1b3932", size = 57696, upload_time = "2025-07-29T07:43:11.989Z" }, + { url = "https://files.pythonhosted.org/packages/2f/55/95e2b9ff201e89f9fe37036037ab61a6c941942b25cdb7b6a9df9b931993/mmh3-5.2.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:12da42c0a55c9d86ab566395324213c319c73ecb0c239fad4726324212b9441c", size = 41421, upload_time = "2025-07-29T07:43:13.269Z" }, + { url = "https://files.pythonhosted.org/packages/77/79/9be23ad0b7001a4b22752e7693be232428ecc0a35068a4ff5c2f14ef8b20/mmh3-5.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f7f9034c7cf05ddfaac8d7a2e63a3c97a840d4615d0a0e65ba8bdf6f8576e3be", size = 40853, upload_time = "2025-07-29T07:43:14.888Z" }, + { url = "https://files.pythonhosted.org/packages/ac/1b/96b32058eda1c1dee8264900c37c359a7325c1f11f5ff14fd2be8e24eff9/mmh3-5.2.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:11730eeb16dfcf9674fdea9bb6b8e6dd9b40813b7eb839bc35113649eef38aeb", size = 109694, upload_time = "2025-07-29T07:43:15.816Z" }, + { url = "https://files.pythonhosted.org/packages/8d/6f/a2ae44cd7dad697b6dea48390cbc977b1e5ca58fda09628cbcb2275af064/mmh3-5.2.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:932a6eec1d2e2c3c9e630d10f7128d80e70e2d47fe6b8c7ea5e1afbd98733e65", size = 117438, upload_time = "2025-07-29T07:43:16.865Z" }, + { url = "https://files.pythonhosted.org/packages/a0/08/bfb75451c83f05224a28afeaf3950c7b793c0b71440d571f8e819cfb149a/mmh3-5.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ca975c51c5028947bbcfc24966517aac06a01d6c921e30f7c5383c195f87991", size = 120409, upload_time = "2025-07-29T07:43:18.207Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ea/8b118b69b2ff8df568f742387d1a159bc654a0f78741b31437dd047ea28e/mmh3-5.2.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5b0b58215befe0f0e120b828f7645e97719bbba9f23b69e268ed0ac7adde8645", size = 125909, upload_time = "2025-07-29T07:43:19.39Z" }, + { url = "https://files.pythonhosted.org/packages/3e/11/168cc0b6a30650032e351a3b89b8a47382da541993a03af91e1ba2501234/mmh3-5.2.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29c2b9ce61886809d0492a274a5a53047742dea0f703f9c4d5d223c3ea6377d3", size = 135331, upload_time = "2025-07-29T07:43:20.435Z" }, + { url = "https://files.pythonhosted.org/packages/31/05/e3a9849b1c18a7934c64e831492c99e67daebe84a8c2f2c39a7096a830e3/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a367d4741ac0103f8198c82f429bccb9359f543ca542b06a51f4f0332e8de279", size = 110085, upload_time = "2025-07-29T07:43:21.92Z" }, + { url = "https://files.pythonhosted.org/packages/d9/d5/a96bcc306e3404601418b2a9a370baec92af84204528ba659fdfe34c242f/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5a5dba98e514fb26241868f6eb90a7f7ca0e039aed779342965ce24ea32ba513", size = 111195, upload_time = "2025-07-29T07:43:23.066Z" }, + { url = "https://files.pythonhosted.org/packages/af/29/0fd49801fec5bff37198684e0849b58e0dab3a2a68382a357cfffb0fafc3/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:941603bfd75a46023807511c1ac2f1b0f39cccc393c15039969806063b27e6db", size = 116919, upload_time = "2025-07-29T07:43:24.178Z" }, + { url = "https://files.pythonhosted.org/packages/2d/04/4f3c32b0a2ed762edca45d8b46568fc3668e34f00fb1e0a3b5451ec1281c/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:132dd943451a7c7546978863d2f5a64977928410782e1a87d583cb60eb89e667", size = 123160, upload_time = "2025-07-29T07:43:25.26Z" }, + { url = "https://files.pythonhosted.org/packages/91/76/3d29eaa38821730633d6a240d36fa8ad2807e9dfd432c12e1a472ed211eb/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f698733a8a494466432d611a8f0d1e026f5286dee051beea4b3c3146817e35d5", size = 110206, upload_time = "2025-07-29T07:43:26.699Z" }, + { url = "https://files.pythonhosted.org/packages/44/1c/ccf35892684d3a408202e296e56843743e0b4fb1629e59432ea88cdb3909/mmh3-5.2.0-cp314-cp314t-win32.whl", hash = "sha256:6d541038b3fc360ec538fc116de87462627944765a6750308118f8b509a8eec7", size = 41970, upload_time = "2025-07-29T07:43:27.666Z" }, + { url = "https://files.pythonhosted.org/packages/75/b2/b9e4f1e5adb5e21eb104588fcee2cd1eaa8308255173481427d5ecc4284e/mmh3-5.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e912b19cf2378f2967d0c08e86ff4c6c360129887f678e27e4dde970d21b3f4d", size = 43063, upload_time = "2025-07-29T07:43:28.582Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fc/0e61d9a4e29c8679356795a40e48f647b4aad58d71bfc969f0f8f56fb912/mmh3-5.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:e7884931fe5e788163e7b3c511614130c2c59feffdc21112290a194487efb2e9", size = 40455, upload_time = "2025-07-29T07:43:29.563Z" }, ] [[package]] name = "more-itertools" version = "10.8.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431 } +sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload_time = "2025-09-02T15:23:11.018Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667 }, + { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload_time = "2025-09-02T15:23:09.635Z" }, ] [[package]] name = "mpmath" version = "1.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106 } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload_time = "2023-03-07T16:47:11.061Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload_time = "2023-03-07T16:47:09.197Z" }, ] [[package]] name = "multidict" version = "6.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877 }, - { url = "https://files.pythonhosted.org/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467 }, - { url = "https://files.pythonhosted.org/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834 }, - { url = "https://files.pythonhosted.org/packages/7f/f5/013798161ca665e4a422afbc5e2d9e4070142a9ff8905e482139cd09e4d0/multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7", size = 250545 }, - { url = "https://files.pythonhosted.org/packages/71/2f/91dbac13e0ba94669ea5119ba267c9a832f0cb65419aca75549fcf09a3dc/multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e", size = 258305 }, - { url = "https://files.pythonhosted.org/packages/ef/b0/754038b26f6e04488b48ac621f779c341338d78503fb45403755af2df477/multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546", size = 242363 }, - { url = "https://files.pythonhosted.org/packages/87/15/9da40b9336a7c9fa606c4cf2ed80a649dffeb42b905d4f63a1d7eb17d746/multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4", size = 268375 }, - { url = "https://files.pythonhosted.org/packages/82/72/c53fcade0cc94dfaad583105fd92b3a783af2091eddcb41a6d5a52474000/multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1", size = 269346 }, - { url = "https://files.pythonhosted.org/packages/0d/e2/9baffdae21a76f77ef8447f1a05a96ec4bc0a24dae08767abc0a2fe680b8/multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d", size = 256107 }, - { url = "https://files.pythonhosted.org/packages/3c/06/3f06f611087dc60d65ef775f1fb5aca7c6d61c6db4990e7cda0cef9b1651/multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304", size = 253592 }, - { url = "https://files.pythonhosted.org/packages/20/24/54e804ec7945b6023b340c412ce9c3f81e91b3bf5fa5ce65558740141bee/multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12", size = 251024 }, - { url = "https://files.pythonhosted.org/packages/14/48/011cba467ea0b17ceb938315d219391d3e421dfd35928e5dbdc3f4ae76ef/multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62", size = 251484 }, - { url = "https://files.pythonhosted.org/packages/0d/2f/919258b43bb35b99fa127435cfb2d91798eb3a943396631ef43e3720dcf4/multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0", size = 263579 }, - { url = "https://files.pythonhosted.org/packages/31/22/a0e884d86b5242b5a74cf08e876bdf299e413016b66e55511f7a804a366e/multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a", size = 259654 }, - { url = "https://files.pythonhosted.org/packages/b2/e5/17e10e1b5c5f5a40f2fcbb45953c9b215f8a4098003915e46a93f5fcaa8f/multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8", size = 251511 }, - { url = "https://files.pythonhosted.org/packages/e3/9a/201bb1e17e7af53139597069c375e7b0dcbd47594604f65c2d5359508566/multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4", size = 41895 }, - { url = "https://files.pythonhosted.org/packages/46/e2/348cd32faad84eaf1d20cce80e2bb0ef8d312c55bca1f7fa9865e7770aaf/multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b", size = 46073 }, - { url = "https://files.pythonhosted.org/packages/25/ec/aad2613c1910dce907480e0c3aa306905830f25df2e54ccc9dea450cb5aa/multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec", size = 43226 }, - { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135 }, - { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117 }, - { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472 }, - { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342 }, - { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082 }, - { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704 }, - { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355 }, - { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259 }, - { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903 }, - { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365 }, - { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062 }, - { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683 }, - { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254 }, - { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967 }, - { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085 }, - { url = "https://files.pythonhosted.org/packages/ba/55/b73e1d624ea4b8fd4dd07a3bb70f6e4c7c6c5d9d640a41c6ffe5cdbd2a55/multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17", size = 41713 }, - { url = "https://files.pythonhosted.org/packages/32/31/75c59e7d3b4205075b4c183fa4ca398a2daf2303ddf616b04ae6ef55cffe/multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390", size = 45915 }, - { url = "https://files.pythonhosted.org/packages/31/2a/8987831e811f1184c22bc2e45844934385363ee61c0a2dcfa8f71b87e608/multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e", size = 43077 }, - { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114 }, - { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442 }, - { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885 }, - { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588 }, - { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966 }, - { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618 }, - { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539 }, - { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345 }, - { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934 }, - { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243 }, - { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878 }, - { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452 }, - { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312 }, - { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935 }, - { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385 }, - { url = "https://files.pythonhosted.org/packages/ef/a0/f83ae75e42d694b3fbad3e047670e511c138be747bc713cf1b10d5096416/multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d", size = 47777 }, - { url = "https://files.pythonhosted.org/packages/dc/80/9b174a92814a3830b7357307a792300f42c9e94664b01dee8e457551fa66/multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6", size = 53104 }, - { url = "https://files.pythonhosted.org/packages/cc/28/04baeaf0428d95bb7a7bea0e691ba2f31394338ba424fb0679a9ed0f4c09/multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792", size = 45503 }, - { url = "https://files.pythonhosted.org/packages/e2/b1/3da6934455dd4b261d4c72f897e3a5728eba81db59959f3a639245891baa/multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842", size = 75128 }, - { url = "https://files.pythonhosted.org/packages/14/2c/f069cab5b51d175a1a2cb4ccdf7a2c2dabd58aa5bd933fa036a8d15e2404/multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b", size = 44410 }, - { url = "https://files.pythonhosted.org/packages/42/e2/64bb41266427af6642b6b128e8774ed84c11b80a90702c13ac0a86bb10cc/multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38", size = 43205 }, - { url = "https://files.pythonhosted.org/packages/02/68/6b086fef8a3f1a8541b9236c594f0c9245617c29841f2e0395d979485cde/multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128", size = 245084 }, - { url = "https://files.pythonhosted.org/packages/15/ee/f524093232007cd7a75c1d132df70f235cfd590a7c9eaccd7ff422ef4ae8/multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34", size = 252667 }, - { url = "https://files.pythonhosted.org/packages/02/a5/eeb3f43ab45878f1895118c3ef157a480db58ede3f248e29b5354139c2c9/multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99", size = 233590 }, - { url = "https://files.pythonhosted.org/packages/6a/1e/76d02f8270b97269d7e3dbd45644b1785bda457b474315f8cf999525a193/multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202", size = 264112 }, - { url = "https://files.pythonhosted.org/packages/76/0b/c28a70ecb58963847c2a8efe334904cd254812b10e535aefb3bcce513918/multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1", size = 261194 }, - { url = "https://files.pythonhosted.org/packages/b4/63/2ab26e4209773223159b83aa32721b4021ffb08102f8ac7d689c943fded1/multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3", size = 248510 }, - { url = "https://files.pythonhosted.org/packages/93/cd/06c1fa8282af1d1c46fd55c10a7930af652afdce43999501d4d68664170c/multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d", size = 248395 }, - { url = "https://files.pythonhosted.org/packages/99/ac/82cb419dd6b04ccf9e7e61befc00c77614fc8134362488b553402ecd55ce/multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6", size = 239520 }, - { url = "https://files.pythonhosted.org/packages/fa/f3/a0f9bf09493421bd8716a362e0cd1d244f5a6550f5beffdd6b47e885b331/multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7", size = 245479 }, - { url = "https://files.pythonhosted.org/packages/8d/01/476d38fc73a212843f43c852b0eee266b6971f0e28329c2184a8df90c376/multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb", size = 258903 }, - { url = "https://files.pythonhosted.org/packages/49/6d/23faeb0868adba613b817d0e69c5f15531b24d462af8012c4f6de4fa8dc3/multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f", size = 252333 }, - { url = "https://files.pythonhosted.org/packages/1e/cc/48d02ac22b30fa247f7dad82866e4b1015431092f4ba6ebc7e77596e0b18/multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f", size = 243411 }, - { url = "https://files.pythonhosted.org/packages/4a/03/29a8bf5a18abf1fe34535c88adbdfa88c9fb869b5a3b120692c64abe8284/multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885", size = 40940 }, - { url = "https://files.pythonhosted.org/packages/82/16/7ed27b680791b939de138f906d5cf2b4657b0d45ca6f5dd6236fdddafb1a/multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c", size = 45087 }, - { url = "https://files.pythonhosted.org/packages/cd/3c/e3e62eb35a1950292fe39315d3c89941e30a9d07d5d2df42965ab041da43/multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000", size = 42368 }, - { url = "https://files.pythonhosted.org/packages/8b/40/cd499bd0dbc5f1136726db3153042a735fffd0d77268e2ee20d5f33c010f/multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63", size = 82326 }, - { url = "https://files.pythonhosted.org/packages/13/8a/18e031eca251c8df76daf0288e6790561806e439f5ce99a170b4af30676b/multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718", size = 48065 }, - { url = "https://files.pythonhosted.org/packages/40/71/5e6701277470a87d234e433fb0a3a7deaf3bcd92566e421e7ae9776319de/multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2", size = 46475 }, - { url = "https://files.pythonhosted.org/packages/fe/6a/bab00cbab6d9cfb57afe1663318f72ec28289ea03fd4e8236bb78429893a/multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e", size = 239324 }, - { url = "https://files.pythonhosted.org/packages/2a/5f/8de95f629fc22a7769ade8b41028e3e5a822c1f8904f618d175945a81ad3/multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064", size = 246877 }, - { url = "https://files.pythonhosted.org/packages/23/b4/38881a960458f25b89e9f4a4fdcb02ac101cfa710190db6e5528841e67de/multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e", size = 225824 }, - { url = "https://files.pythonhosted.org/packages/1e/39/6566210c83f8a261575f18e7144736059f0c460b362e96e9cf797a24b8e7/multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd", size = 253558 }, - { url = "https://files.pythonhosted.org/packages/00/a3/67f18315100f64c269f46e6c0319fa87ba68f0f64f2b8e7fd7c72b913a0b/multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a", size = 252339 }, - { url = "https://files.pythonhosted.org/packages/c8/2a/1cb77266afee2458d82f50da41beba02159b1d6b1f7973afc9a1cad1499b/multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96", size = 244895 }, - { url = "https://files.pythonhosted.org/packages/dd/72/09fa7dd487f119b2eb9524946ddd36e2067c08510576d43ff68469563b3b/multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e", size = 241862 }, - { url = "https://files.pythonhosted.org/packages/65/92/bc1f8bd0853d8669300f732c801974dfc3702c3eeadae2f60cef54dc69d7/multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599", size = 232376 }, - { url = "https://files.pythonhosted.org/packages/09/86/ac39399e5cb9d0c2ac8ef6e10a768e4d3bc933ac808d49c41f9dc23337eb/multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394", size = 240272 }, - { url = "https://files.pythonhosted.org/packages/3d/b6/fed5ac6b8563ec72df6cb1ea8dac6d17f0a4a1f65045f66b6d3bf1497c02/multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38", size = 248774 }, - { url = "https://files.pythonhosted.org/packages/6b/8d/b954d8c0dc132b68f760aefd45870978deec6818897389dace00fcde32ff/multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9", size = 242731 }, - { url = "https://files.pythonhosted.org/packages/16/9d/a2dac7009125d3540c2f54e194829ea18ac53716c61b655d8ed300120b0f/multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0", size = 240193 }, - { url = "https://files.pythonhosted.org/packages/39/ca/c05f144128ea232ae2178b008d5011d4e2cea86e4ee8c85c2631b1b94802/multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13", size = 48023 }, - { url = "https://files.pythonhosted.org/packages/ba/8f/0a60e501584145588be1af5cc829265701ba3c35a64aec8e07cbb71d39bb/multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd", size = 53507 }, - { url = "https://files.pythonhosted.org/packages/7f/ae/3148b988a9c6239903e786eac19c889fab607c31d6efa7fb2147e5680f23/multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827", size = 44804 }, - { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317 }, +sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload_time = "2025-10-06T14:52:30.657Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877, upload_time = "2025-10-06T14:49:20.884Z" }, + { url = "https://files.pythonhosted.org/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467, upload_time = "2025-10-06T14:49:22.054Z" }, + { url = "https://files.pythonhosted.org/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834, upload_time = "2025-10-06T14:49:23.566Z" }, + { url = "https://files.pythonhosted.org/packages/7f/f5/013798161ca665e4a422afbc5e2d9e4070142a9ff8905e482139cd09e4d0/multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7", size = 250545, upload_time = "2025-10-06T14:49:24.882Z" }, + { url = "https://files.pythonhosted.org/packages/71/2f/91dbac13e0ba94669ea5119ba267c9a832f0cb65419aca75549fcf09a3dc/multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e", size = 258305, upload_time = "2025-10-06T14:49:26.778Z" }, + { url = "https://files.pythonhosted.org/packages/ef/b0/754038b26f6e04488b48ac621f779c341338d78503fb45403755af2df477/multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546", size = 242363, upload_time = "2025-10-06T14:49:28.562Z" }, + { url = "https://files.pythonhosted.org/packages/87/15/9da40b9336a7c9fa606c4cf2ed80a649dffeb42b905d4f63a1d7eb17d746/multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4", size = 268375, upload_time = "2025-10-06T14:49:29.96Z" }, + { url = "https://files.pythonhosted.org/packages/82/72/c53fcade0cc94dfaad583105fd92b3a783af2091eddcb41a6d5a52474000/multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1", size = 269346, upload_time = "2025-10-06T14:49:31.404Z" }, + { url = "https://files.pythonhosted.org/packages/0d/e2/9baffdae21a76f77ef8447f1a05a96ec4bc0a24dae08767abc0a2fe680b8/multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d", size = 256107, upload_time = "2025-10-06T14:49:32.974Z" }, + { url = "https://files.pythonhosted.org/packages/3c/06/3f06f611087dc60d65ef775f1fb5aca7c6d61c6db4990e7cda0cef9b1651/multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304", size = 253592, upload_time = "2025-10-06T14:49:34.52Z" }, + { url = "https://files.pythonhosted.org/packages/20/24/54e804ec7945b6023b340c412ce9c3f81e91b3bf5fa5ce65558740141bee/multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12", size = 251024, upload_time = "2025-10-06T14:49:35.956Z" }, + { url = "https://files.pythonhosted.org/packages/14/48/011cba467ea0b17ceb938315d219391d3e421dfd35928e5dbdc3f4ae76ef/multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62", size = 251484, upload_time = "2025-10-06T14:49:37.631Z" }, + { url = "https://files.pythonhosted.org/packages/0d/2f/919258b43bb35b99fa127435cfb2d91798eb3a943396631ef43e3720dcf4/multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0", size = 263579, upload_time = "2025-10-06T14:49:39.502Z" }, + { url = "https://files.pythonhosted.org/packages/31/22/a0e884d86b5242b5a74cf08e876bdf299e413016b66e55511f7a804a366e/multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a", size = 259654, upload_time = "2025-10-06T14:49:41.32Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/17e10e1b5c5f5a40f2fcbb45953c9b215f8a4098003915e46a93f5fcaa8f/multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8", size = 251511, upload_time = "2025-10-06T14:49:46.021Z" }, + { url = "https://files.pythonhosted.org/packages/e3/9a/201bb1e17e7af53139597069c375e7b0dcbd47594604f65c2d5359508566/multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4", size = 41895, upload_time = "2025-10-06T14:49:48.718Z" }, + { url = "https://files.pythonhosted.org/packages/46/e2/348cd32faad84eaf1d20cce80e2bb0ef8d312c55bca1f7fa9865e7770aaf/multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b", size = 46073, upload_time = "2025-10-06T14:49:50.28Z" }, + { url = "https://files.pythonhosted.org/packages/25/ec/aad2613c1910dce907480e0c3aa306905830f25df2e54ccc9dea450cb5aa/multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec", size = 43226, upload_time = "2025-10-06T14:49:52.304Z" }, + { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135, upload_time = "2025-10-06T14:49:54.26Z" }, + { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117, upload_time = "2025-10-06T14:49:55.82Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472, upload_time = "2025-10-06T14:49:57.048Z" }, + { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342, upload_time = "2025-10-06T14:49:58.368Z" }, + { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082, upload_time = "2025-10-06T14:49:59.89Z" }, + { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704, upload_time = "2025-10-06T14:50:01.485Z" }, + { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355, upload_time = "2025-10-06T14:50:02.955Z" }, + { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259, upload_time = "2025-10-06T14:50:04.446Z" }, + { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903, upload_time = "2025-10-06T14:50:05.98Z" }, + { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365, upload_time = "2025-10-06T14:50:07.511Z" }, + { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062, upload_time = "2025-10-06T14:50:09.074Z" }, + { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683, upload_time = "2025-10-06T14:50:10.714Z" }, + { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254, upload_time = "2025-10-06T14:50:12.28Z" }, + { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967, upload_time = "2025-10-06T14:50:14.16Z" }, + { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085, upload_time = "2025-10-06T14:50:15.639Z" }, + { url = "https://files.pythonhosted.org/packages/ba/55/b73e1d624ea4b8fd4dd07a3bb70f6e4c7c6c5d9d640a41c6ffe5cdbd2a55/multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17", size = 41713, upload_time = "2025-10-06T14:50:17.066Z" }, + { url = "https://files.pythonhosted.org/packages/32/31/75c59e7d3b4205075b4c183fa4ca398a2daf2303ddf616b04ae6ef55cffe/multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390", size = 45915, upload_time = "2025-10-06T14:50:18.264Z" }, + { url = "https://files.pythonhosted.org/packages/31/2a/8987831e811f1184c22bc2e45844934385363ee61c0a2dcfa8f71b87e608/multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e", size = 43077, upload_time = "2025-10-06T14:50:19.853Z" }, + { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114, upload_time = "2025-10-06T14:50:21.223Z" }, + { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442, upload_time = "2025-10-06T14:50:22.871Z" }, + { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885, upload_time = "2025-10-06T14:50:24.258Z" }, + { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588, upload_time = "2025-10-06T14:50:25.716Z" }, + { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966, upload_time = "2025-10-06T14:50:28.192Z" }, + { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618, upload_time = "2025-10-06T14:50:29.82Z" }, + { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539, upload_time = "2025-10-06T14:50:31.731Z" }, + { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345, upload_time = "2025-10-06T14:50:33.26Z" }, + { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934, upload_time = "2025-10-06T14:50:34.808Z" }, + { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243, upload_time = "2025-10-06T14:50:36.436Z" }, + { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878, upload_time = "2025-10-06T14:50:37.953Z" }, + { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452, upload_time = "2025-10-06T14:50:39.574Z" }, + { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312, upload_time = "2025-10-06T14:50:41.612Z" }, + { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935, upload_time = "2025-10-06T14:50:43.972Z" }, + { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385, upload_time = "2025-10-06T14:50:45.648Z" }, + { url = "https://files.pythonhosted.org/packages/ef/a0/f83ae75e42d694b3fbad3e047670e511c138be747bc713cf1b10d5096416/multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d", size = 47777, upload_time = "2025-10-06T14:50:47.154Z" }, + { url = "https://files.pythonhosted.org/packages/dc/80/9b174a92814a3830b7357307a792300f42c9e94664b01dee8e457551fa66/multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6", size = 53104, upload_time = "2025-10-06T14:50:48.851Z" }, + { url = "https://files.pythonhosted.org/packages/cc/28/04baeaf0428d95bb7a7bea0e691ba2f31394338ba424fb0679a9ed0f4c09/multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792", size = 45503, upload_time = "2025-10-06T14:50:50.16Z" }, + { url = "https://files.pythonhosted.org/packages/e2/b1/3da6934455dd4b261d4c72f897e3a5728eba81db59959f3a639245891baa/multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842", size = 75128, upload_time = "2025-10-06T14:50:51.92Z" }, + { url = "https://files.pythonhosted.org/packages/14/2c/f069cab5b51d175a1a2cb4ccdf7a2c2dabd58aa5bd933fa036a8d15e2404/multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b", size = 44410, upload_time = "2025-10-06T14:50:53.275Z" }, + { url = "https://files.pythonhosted.org/packages/42/e2/64bb41266427af6642b6b128e8774ed84c11b80a90702c13ac0a86bb10cc/multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38", size = 43205, upload_time = "2025-10-06T14:50:54.911Z" }, + { url = "https://files.pythonhosted.org/packages/02/68/6b086fef8a3f1a8541b9236c594f0c9245617c29841f2e0395d979485cde/multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128", size = 245084, upload_time = "2025-10-06T14:50:56.369Z" }, + { url = "https://files.pythonhosted.org/packages/15/ee/f524093232007cd7a75c1d132df70f235cfd590a7c9eaccd7ff422ef4ae8/multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34", size = 252667, upload_time = "2025-10-06T14:50:57.991Z" }, + { url = "https://files.pythonhosted.org/packages/02/a5/eeb3f43ab45878f1895118c3ef157a480db58ede3f248e29b5354139c2c9/multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99", size = 233590, upload_time = "2025-10-06T14:50:59.589Z" }, + { url = "https://files.pythonhosted.org/packages/6a/1e/76d02f8270b97269d7e3dbd45644b1785bda457b474315f8cf999525a193/multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202", size = 264112, upload_time = "2025-10-06T14:51:01.183Z" }, + { url = "https://files.pythonhosted.org/packages/76/0b/c28a70ecb58963847c2a8efe334904cd254812b10e535aefb3bcce513918/multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1", size = 261194, upload_time = "2025-10-06T14:51:02.794Z" }, + { url = "https://files.pythonhosted.org/packages/b4/63/2ab26e4209773223159b83aa32721b4021ffb08102f8ac7d689c943fded1/multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3", size = 248510, upload_time = "2025-10-06T14:51:04.724Z" }, + { url = "https://files.pythonhosted.org/packages/93/cd/06c1fa8282af1d1c46fd55c10a7930af652afdce43999501d4d68664170c/multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d", size = 248395, upload_time = "2025-10-06T14:51:06.306Z" }, + { url = "https://files.pythonhosted.org/packages/99/ac/82cb419dd6b04ccf9e7e61befc00c77614fc8134362488b553402ecd55ce/multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6", size = 239520, upload_time = "2025-10-06T14:51:08.091Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f3/a0f9bf09493421bd8716a362e0cd1d244f5a6550f5beffdd6b47e885b331/multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7", size = 245479, upload_time = "2025-10-06T14:51:10.365Z" }, + { url = "https://files.pythonhosted.org/packages/8d/01/476d38fc73a212843f43c852b0eee266b6971f0e28329c2184a8df90c376/multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb", size = 258903, upload_time = "2025-10-06T14:51:12.466Z" }, + { url = "https://files.pythonhosted.org/packages/49/6d/23faeb0868adba613b817d0e69c5f15531b24d462af8012c4f6de4fa8dc3/multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f", size = 252333, upload_time = "2025-10-06T14:51:14.48Z" }, + { url = "https://files.pythonhosted.org/packages/1e/cc/48d02ac22b30fa247f7dad82866e4b1015431092f4ba6ebc7e77596e0b18/multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f", size = 243411, upload_time = "2025-10-06T14:51:16.072Z" }, + { url = "https://files.pythonhosted.org/packages/4a/03/29a8bf5a18abf1fe34535c88adbdfa88c9fb869b5a3b120692c64abe8284/multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885", size = 40940, upload_time = "2025-10-06T14:51:17.544Z" }, + { url = "https://files.pythonhosted.org/packages/82/16/7ed27b680791b939de138f906d5cf2b4657b0d45ca6f5dd6236fdddafb1a/multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c", size = 45087, upload_time = "2025-10-06T14:51:18.875Z" }, + { url = "https://files.pythonhosted.org/packages/cd/3c/e3e62eb35a1950292fe39315d3c89941e30a9d07d5d2df42965ab041da43/multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000", size = 42368, upload_time = "2025-10-06T14:51:20.225Z" }, + { url = "https://files.pythonhosted.org/packages/8b/40/cd499bd0dbc5f1136726db3153042a735fffd0d77268e2ee20d5f33c010f/multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63", size = 82326, upload_time = "2025-10-06T14:51:21.588Z" }, + { url = "https://files.pythonhosted.org/packages/13/8a/18e031eca251c8df76daf0288e6790561806e439f5ce99a170b4af30676b/multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718", size = 48065, upload_time = "2025-10-06T14:51:22.93Z" }, + { url = "https://files.pythonhosted.org/packages/40/71/5e6701277470a87d234e433fb0a3a7deaf3bcd92566e421e7ae9776319de/multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2", size = 46475, upload_time = "2025-10-06T14:51:24.352Z" }, + { url = "https://files.pythonhosted.org/packages/fe/6a/bab00cbab6d9cfb57afe1663318f72ec28289ea03fd4e8236bb78429893a/multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e", size = 239324, upload_time = "2025-10-06T14:51:25.822Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5f/8de95f629fc22a7769ade8b41028e3e5a822c1f8904f618d175945a81ad3/multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064", size = 246877, upload_time = "2025-10-06T14:51:27.604Z" }, + { url = "https://files.pythonhosted.org/packages/23/b4/38881a960458f25b89e9f4a4fdcb02ac101cfa710190db6e5528841e67de/multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e", size = 225824, upload_time = "2025-10-06T14:51:29.664Z" }, + { url = "https://files.pythonhosted.org/packages/1e/39/6566210c83f8a261575f18e7144736059f0c460b362e96e9cf797a24b8e7/multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd", size = 253558, upload_time = "2025-10-06T14:51:31.684Z" }, + { url = "https://files.pythonhosted.org/packages/00/a3/67f18315100f64c269f46e6c0319fa87ba68f0f64f2b8e7fd7c72b913a0b/multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a", size = 252339, upload_time = "2025-10-06T14:51:33.699Z" }, + { url = "https://files.pythonhosted.org/packages/c8/2a/1cb77266afee2458d82f50da41beba02159b1d6b1f7973afc9a1cad1499b/multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96", size = 244895, upload_time = "2025-10-06T14:51:36.189Z" }, + { url = "https://files.pythonhosted.org/packages/dd/72/09fa7dd487f119b2eb9524946ddd36e2067c08510576d43ff68469563b3b/multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e", size = 241862, upload_time = "2025-10-06T14:51:41.291Z" }, + { url = "https://files.pythonhosted.org/packages/65/92/bc1f8bd0853d8669300f732c801974dfc3702c3eeadae2f60cef54dc69d7/multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599", size = 232376, upload_time = "2025-10-06T14:51:43.55Z" }, + { url = "https://files.pythonhosted.org/packages/09/86/ac39399e5cb9d0c2ac8ef6e10a768e4d3bc933ac808d49c41f9dc23337eb/multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394", size = 240272, upload_time = "2025-10-06T14:51:45.265Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b6/fed5ac6b8563ec72df6cb1ea8dac6d17f0a4a1f65045f66b6d3bf1497c02/multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38", size = 248774, upload_time = "2025-10-06T14:51:46.836Z" }, + { url = "https://files.pythonhosted.org/packages/6b/8d/b954d8c0dc132b68f760aefd45870978deec6818897389dace00fcde32ff/multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9", size = 242731, upload_time = "2025-10-06T14:51:48.541Z" }, + { url = "https://files.pythonhosted.org/packages/16/9d/a2dac7009125d3540c2f54e194829ea18ac53716c61b655d8ed300120b0f/multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0", size = 240193, upload_time = "2025-10-06T14:51:50.355Z" }, + { url = "https://files.pythonhosted.org/packages/39/ca/c05f144128ea232ae2178b008d5011d4e2cea86e4ee8c85c2631b1b94802/multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13", size = 48023, upload_time = "2025-10-06T14:51:51.883Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8f/0a60e501584145588be1af5cc829265701ba3c35a64aec8e07cbb71d39bb/multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd", size = 53507, upload_time = "2025-10-06T14:51:53.672Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ae/3148b988a9c6239903e786eac19c889fab607c31d6efa7fb2147e5680f23/multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827", size = 44804, upload_time = "2025-10-06T14:51:55.415Z" }, + { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload_time = "2025-10-06T14:52:29.272Z" }, ] [[package]] @@ -1549,88 +1597,88 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dill" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603 } +sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload_time = "2024-01-28T18:52:34.85Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824 }, - { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519 }, - { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 }, - { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628 }, - { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 }, + { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload_time = "2024-01-28T18:52:26.062Z" }, + { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload_time = "2024-01-28T18:52:28.115Z" }, + { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload_time = "2024-01-28T18:52:29.395Z" }, + { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload_time = "2024-01-28T18:52:30.853Z" }, + { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload_time = "2024-01-28T18:52:31.981Z" }, ] [[package]] name = "murmurhash" version = "1.0.15" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/23/2e/88c147931ea9725d634840d538622e94122bceaf346233349b7b5c62964b/murmurhash-1.0.15.tar.gz", hash = "sha256:58e2b27b7847f9e2a6edf10b47a8c8dd70a4705f45dccb7bf76aeadacf56ba01", size = 13291 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/46/be8522d3456fdccf1b8b049c6d82e7a3c1114c4fc2cfe14b04cba4b3e701/murmurhash-1.0.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d37e3ae44746bca80b1a917c2ea625cf216913564ed43f69d2888e5df97db0cb", size = 27884 }, - { url = "https://files.pythonhosted.org/packages/ed/cc/630449bf4f6178d7daf948ce46ad00b25d279065fc30abd8d706be3d87e0/murmurhash-1.0.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0861cb11039409eaf46878456b7d985ef17b6b484103a6fc367b2ecec846891d", size = 27855 }, - { url = "https://files.pythonhosted.org/packages/ff/30/ea8f601a9bf44db99468696efd59eb9cff1157cd55cb586d67116697583f/murmurhash-1.0.15-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5a301decfaccfec70fe55cb01dde2a012c3014a874542eaa7cc73477bb749616", size = 134088 }, - { url = "https://files.pythonhosted.org/packages/c9/de/c40ce8c0877d406691e735b8d6e9c815f36a82b499d358313db5dbe219d7/murmurhash-1.0.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32c6fde7bd7e9407003370a07b5f4addacabe1556ad3dc2cac246b7a2bba3400", size = 133978 }, - { url = "https://files.pythonhosted.org/packages/47/84/bd49963ecd84ebab2fe66595e2d1ed41d5e8b5153af5dc930f0bd827007c/murmurhash-1.0.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d8b43a7011540dc3c7ce66f2134df9732e2bc3bbb4a35f6458bc755e48bde26", size = 132956 }, - { url = "https://files.pythonhosted.org/packages/4f/7c/2530769c545074417c862583f05f4245644599f1e9ff619b3dfe2969aafc/murmurhash-1.0.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43bf4541892ecd95963fcd307bf1c575fc0fee1682f41c93007adee71ca2bb40", size = 134184 }, - { url = "https://files.pythonhosted.org/packages/84/a4/b249b042f5afe34d14ada2dc4afc777e883c15863296756179652e081c44/murmurhash-1.0.15-cp312-cp312-win_amd64.whl", hash = "sha256:f4ac15a2089dc42e6eb0966622d42d2521590a12c92480aafecf34c085302cca", size = 25647 }, - { url = "https://files.pythonhosted.org/packages/13/bf/028179259aebc18fd4ba5cae2601d1d47517427a537ab44336446431a215/murmurhash-1.0.15-cp312-cp312-win_arm64.whl", hash = "sha256:4a70ca4ae19e600d9be3da64d00710e79dde388a4d162f22078d64844d0ebdda", size = 23338 }, - { url = "https://files.pythonhosted.org/packages/29/2f/ba300b5f04dae0409202d6285668b8a9d3ade43a846abee3ef611cb388d5/murmurhash-1.0.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fe50dc70e52786759358fd1471e309b94dddfffb9320d9dfea233c7684c894ba", size = 27861 }, - { url = "https://files.pythonhosted.org/packages/34/02/29c19d268e6f4ea1ed2a462c901eed1ed35b454e2cbc57da592fad663ac6/murmurhash-1.0.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1349a7c23f6092e7998ddc5bd28546cc31a595afc61e9fdb3afc423feec3d7ad", size = 27840 }, - { url = "https://files.pythonhosted.org/packages/e2/63/58e2de2b5232cd294c64092688c422196e74f9fa8b3958bdf02d33df24b9/murmurhash-1.0.15-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3ba6d05de2613535b5a9227d4ad8ef40a540465f64660d4a8800634ae10e04f", size = 133080 }, - { url = "https://files.pythonhosted.org/packages/aa/9a/d13e2e9f8ba1ced06840921a50f7cece0a475453284158a3018b72679761/murmurhash-1.0.15-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fa1b70b3cc2801ab44179c65827bbd12009c68b34e9d9ce7125b6a0bd35af63c", size = 132648 }, - { url = "https://files.pythonhosted.org/packages/b2/e1/47994f1813fa205c84977b0ff51ae6709f8539af052c7491a5f863d82bdc/murmurhash-1.0.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:213d710fb6f4ef3bc11abbfad0fa94a75ffb675b7dc158c123471e5de869f9af", size = 131502 }, - { url = "https://files.pythonhosted.org/packages/b9/ea/90c1fd00b4aeb704fb5e84cd666b33ffd7f245155048071ffbb51d2bb57d/murmurhash-1.0.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b65a5c4e7f5d71f7ccac2d2b60bdf7092d7976270878cfec59d5a66a533db823", size = 132736 }, - { url = "https://files.pythonhosted.org/packages/00/db/da73462dbfa77f6433b128d2120ba7ba300f8c06dc4f4e022c38d240a5f5/murmurhash-1.0.15-cp313-cp313-win_amd64.whl", hash = "sha256:9aba94c5d841e1904cd110e94ceb7f49cfb60a874bbfb27e0373622998fb7c7c", size = 25682 }, - { url = "https://files.pythonhosted.org/packages/bb/83/032729ef14971b938fbef41ee125fc8800020ee229bd35178b6ede8ee934/murmurhash-1.0.15-cp313-cp313-win_arm64.whl", hash = "sha256:263807eca40d08c7b702413e45cca75ecb5883aa337237dc5addb660f1483378", size = 23370 }, - { url = "https://files.pythonhosted.org/packages/10/83/7547d9205e9bd2f8e5dfd0b682cc9277594f98909f228eb359489baec1df/murmurhash-1.0.15-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:694fd42a74b7ce257169d14c24aa616aa6cd4ccf8abe50eca0557e08da99d055", size = 29955 }, - { url = "https://files.pythonhosted.org/packages/b7/c7/3afd5de7a5b3ae07fe2d3a3271b327ee1489c58ba2b2f2159bd31a25edb9/murmurhash-1.0.15-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a2ea4546ba426390beff3cd10db8f0152fdc9072c4f2583ec7d8aa9f3e4ac070", size = 30108 }, - { url = "https://files.pythonhosted.org/packages/02/69/d6637ee67d78ebb2538c00411f28ea5c154886bbe1db16c49435a8a4ab16/murmurhash-1.0.15-cp313-cp313t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:34e5a91139c40b10f98d0b297907f5d5267b4b1b2e5dd2eb74a021824f751b98", size = 164054 }, - { url = "https://files.pythonhosted.org/packages/ab/4c/89e590165b4c7da6bf941441212a721a270195332d3aacfdfdf527d466ca/murmurhash-1.0.15-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:dc35606868a5961cf42e79314ca0bddf5a400ce377b14d83192057928d6252ec", size = 168153 }, - { url = "https://files.pythonhosted.org/packages/07/7a/95c42df0c21d2e413b9fcd17317a7587351daeb264dc29c6aec1fdbd26f8/murmurhash-1.0.15-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:43cc6ac3b91ca0f7a5ae9c063ba4d6c26972c97fd7c25280ecc666413e4c5535", size = 164345 }, - { url = "https://files.pythonhosted.org/packages/d0/22/9d02c880a88b83bb3ce7d6a38fb727373ab78d82e5f3d8d9fc5612219f90/murmurhash-1.0.15-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:847d712136cb462f0e4bd6229ee2d9eb996d8854eb8312dff3d20c8f5181fda5", size = 161990 }, - { url = "https://files.pythonhosted.org/packages/9a/e3/750232524e0dc262e8dcede6536dafc766faadd9a52f1d23746b02948ad8/murmurhash-1.0.15-cp313-cp313t-win_amd64.whl", hash = "sha256:2680851af6901dbe66cc4aa7ef8e263de47e6e1b425ae324caa571bdf18f8d58", size = 28812 }, - { url = "https://files.pythonhosted.org/packages/ff/89/4ad9d215ef6ade89f27a72dc4e86b98ef1a43534cc3e6a6900a362a0bf0a/murmurhash-1.0.15-cp313-cp313t-win_arm64.whl", hash = "sha256:189a8de4d657b5da9efd66601b0636330b08262b3a55431f2379097c986995d0", size = 25398 }, - { url = "https://files.pythonhosted.org/packages/1c/69/726df275edf07688146966e15eaaa23168100b933a2e1a29b37eb56c6db8/murmurhash-1.0.15-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:7c4280136b738e85ff76b4bdc4341d0b867ee753e73fd8b6994288080c040d0b", size = 28029 }, - { url = "https://files.pythonhosted.org/packages/59/8f/24ecf9061bc2b20933df8aba47c73e904274ea8811c8300cab92f6f82372/murmurhash-1.0.15-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d4d681f474830489e2ec1d912095cfff027fbaf2baa5414c7e9d25b89f0fab68", size = 27912 }, - { url = "https://files.pythonhosted.org/packages/ba/26/fff3caba25aa3c0622114e03c69fb66c839b22335b04d7cce91a3a126d44/murmurhash-1.0.15-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d7e47c5746785db6a43b65fac47b9e63dd71dfbd89a8c92693425b9715e68c6e", size = 131847 }, - { url = "https://files.pythonhosted.org/packages/df/e4/0f2b9fc533467a27afb4e906c33f32d5f637477de87dd94690e0c44335a6/murmurhash-1.0.15-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e8e674f02a99828c8a671ba99cd03299381b2f0744e6f25c29cadfc6151dc724", size = 132267 }, - { url = "https://files.pythonhosted.org/packages/da/bf/9d1c107989728ec46e25773d503aa54070b32822a18cfa7f9d5f41bc17a5/murmurhash-1.0.15-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:26fd7c7855ac4850ad8737991d7b0e3e501df93ebaf0cf45aa5954303085fdba", size = 131894 }, - { url = "https://files.pythonhosted.org/packages/0d/81/dcf27c71445c0e993b10e33169a098ca60ee702c5c58fcbde205fa6332a6/murmurhash-1.0.15-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cb8ebafae60d5f892acff533cc599a359954d8c016a829514cb3f6e9ee10f322", size = 132054 }, - { url = "https://files.pythonhosted.org/packages/bc/32/e874a14b2d2246bd2d16f80f49fad393a3865d4ee7d66d2cae939a67a29a/murmurhash-1.0.15-cp314-cp314-win_amd64.whl", hash = "sha256:898a629bf111f1aeba4437e533b5b836c0a9d2dd12d6880a9c75f6ca13e30e22", size = 26579 }, - { url = "https://files.pythonhosted.org/packages/af/8e/4fca051ed8ae4d23a15aaf0a82b18cb368e8cf84f1e3b474d5749ec46069/murmurhash-1.0.15-cp314-cp314-win_arm64.whl", hash = "sha256:88dc1dd53b7b37c0df1b8b6bce190c12763014492f0269ff7620dc6027f470f4", size = 24341 }, - { url = "https://files.pythonhosted.org/packages/38/9c/c72c2a4edd86aac829337ab9f83cf04cdb15e5d503e4c9a3a243f30a261c/murmurhash-1.0.15-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:6cb4e962ec4f928b30c271b2d84e6707eff6d942552765b663743cfa618b294b", size = 30146 }, - { url = "https://files.pythonhosted.org/packages/ac/d7/72b47ebc86436cd0aa1fd4c6e8779521ec389397ac11389990278d0f7a47/murmurhash-1.0.15-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5678a3ea4fbf0cbaaca2bed9b445f556f294d5f799c67185d05ffcb221a77faf", size = 30141 }, - { url = "https://files.pythonhosted.org/packages/64/bb/6d2f09135079c34dc2d26e961c52742d558b320c61503f273eab6ba743d9/murmurhash-1.0.15-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ef19f38c6b858eef83caf710773db98c8f7eb2193b4c324650c74f3d8ba299e0", size = 163898 }, - { url = "https://files.pythonhosted.org/packages/b9/e2/9c1b462e33f9cb2d632056f07c90b502fc20bd7da50a15d0557343bd2fed/murmurhash-1.0.15-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22aa3ceaedd2e57078b491ed08852d512b84ff4ff9bb2ff3f9bf0eec7f214c9e", size = 168040 }, - { url = "https://files.pythonhosted.org/packages/e8/73/8694db1408fcdfa73589f7df6c445437ea146986fa1e393ec60d26d6e30c/murmurhash-1.0.15-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bba0e0262c0d08682b028cb963ac477bd9839029486fa1333fc5c01fb6072749", size = 164239 }, - { url = "https://files.pythonhosted.org/packages/2d/f9/8e360bdfc3c44e267e7e046f0e0b9922766da92da26959a6963f597e6bb5/murmurhash-1.0.15-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4fd8189ee293a09f30f4931408f40c28ccd42d9de4f66595f8814879339378bc", size = 161811 }, - { url = "https://files.pythonhosted.org/packages/f9/31/97649680595b1096803d877ababb9a67c07f4378f177ec885eea28b9db6d/murmurhash-1.0.15-cp314-cp314t-win_amd64.whl", hash = "sha256:66395b1388f7daa5103db92debe06842ae3be4c0749ef6db68b444518666cdcc", size = 29817 }, - { url = "https://files.pythonhosted.org/packages/76/66/4fce8755f25d77324401886c00017c556be7ca3039575b94037aff905385/murmurhash-1.0.15-cp314-cp314t-win_arm64.whl", hash = "sha256:c22e56c6a0b70598a66e456de5272f76088bc623688da84ef403148a6d41851d", size = 26219 }, +sdist = { url = "https://files.pythonhosted.org/packages/23/2e/88c147931ea9725d634840d538622e94122bceaf346233349b7b5c62964b/murmurhash-1.0.15.tar.gz", hash = "sha256:58e2b27b7847f9e2a6edf10b47a8c8dd70a4705f45dccb7bf76aeadacf56ba01", size = 13291, upload_time = "2025-11-14T09:51:15.272Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/46/be8522d3456fdccf1b8b049c6d82e7a3c1114c4fc2cfe14b04cba4b3e701/murmurhash-1.0.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d37e3ae44746bca80b1a917c2ea625cf216913564ed43f69d2888e5df97db0cb", size = 27884, upload_time = "2025-11-14T09:50:13.133Z" }, + { url = "https://files.pythonhosted.org/packages/ed/cc/630449bf4f6178d7daf948ce46ad00b25d279065fc30abd8d706be3d87e0/murmurhash-1.0.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0861cb11039409eaf46878456b7d985ef17b6b484103a6fc367b2ecec846891d", size = 27855, upload_time = "2025-11-14T09:50:14.859Z" }, + { url = "https://files.pythonhosted.org/packages/ff/30/ea8f601a9bf44db99468696efd59eb9cff1157cd55cb586d67116697583f/murmurhash-1.0.15-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5a301decfaccfec70fe55cb01dde2a012c3014a874542eaa7cc73477bb749616", size = 134088, upload_time = "2025-11-14T09:50:15.958Z" }, + { url = "https://files.pythonhosted.org/packages/c9/de/c40ce8c0877d406691e735b8d6e9c815f36a82b499d358313db5dbe219d7/murmurhash-1.0.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32c6fde7bd7e9407003370a07b5f4addacabe1556ad3dc2cac246b7a2bba3400", size = 133978, upload_time = "2025-11-14T09:50:17.572Z" }, + { url = "https://files.pythonhosted.org/packages/47/84/bd49963ecd84ebab2fe66595e2d1ed41d5e8b5153af5dc930f0bd827007c/murmurhash-1.0.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d8b43a7011540dc3c7ce66f2134df9732e2bc3bbb4a35f6458bc755e48bde26", size = 132956, upload_time = "2025-11-14T09:50:18.742Z" }, + { url = "https://files.pythonhosted.org/packages/4f/7c/2530769c545074417c862583f05f4245644599f1e9ff619b3dfe2969aafc/murmurhash-1.0.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43bf4541892ecd95963fcd307bf1c575fc0fee1682f41c93007adee71ca2bb40", size = 134184, upload_time = "2025-11-14T09:50:19.941Z" }, + { url = "https://files.pythonhosted.org/packages/84/a4/b249b042f5afe34d14ada2dc4afc777e883c15863296756179652e081c44/murmurhash-1.0.15-cp312-cp312-win_amd64.whl", hash = "sha256:f4ac15a2089dc42e6eb0966622d42d2521590a12c92480aafecf34c085302cca", size = 25647, upload_time = "2025-11-14T09:50:21.049Z" }, + { url = "https://files.pythonhosted.org/packages/13/bf/028179259aebc18fd4ba5cae2601d1d47517427a537ab44336446431a215/murmurhash-1.0.15-cp312-cp312-win_arm64.whl", hash = "sha256:4a70ca4ae19e600d9be3da64d00710e79dde388a4d162f22078d64844d0ebdda", size = 23338, upload_time = "2025-11-14T09:50:22.359Z" }, + { url = "https://files.pythonhosted.org/packages/29/2f/ba300b5f04dae0409202d6285668b8a9d3ade43a846abee3ef611cb388d5/murmurhash-1.0.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fe50dc70e52786759358fd1471e309b94dddfffb9320d9dfea233c7684c894ba", size = 27861, upload_time = "2025-11-14T09:50:23.804Z" }, + { url = "https://files.pythonhosted.org/packages/34/02/29c19d268e6f4ea1ed2a462c901eed1ed35b454e2cbc57da592fad663ac6/murmurhash-1.0.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1349a7c23f6092e7998ddc5bd28546cc31a595afc61e9fdb3afc423feec3d7ad", size = 27840, upload_time = "2025-11-14T09:50:25.146Z" }, + { url = "https://files.pythonhosted.org/packages/e2/63/58e2de2b5232cd294c64092688c422196e74f9fa8b3958bdf02d33df24b9/murmurhash-1.0.15-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3ba6d05de2613535b5a9227d4ad8ef40a540465f64660d4a8800634ae10e04f", size = 133080, upload_time = "2025-11-14T09:50:26.566Z" }, + { url = "https://files.pythonhosted.org/packages/aa/9a/d13e2e9f8ba1ced06840921a50f7cece0a475453284158a3018b72679761/murmurhash-1.0.15-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fa1b70b3cc2801ab44179c65827bbd12009c68b34e9d9ce7125b6a0bd35af63c", size = 132648, upload_time = "2025-11-14T09:50:27.788Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e1/47994f1813fa205c84977b0ff51ae6709f8539af052c7491a5f863d82bdc/murmurhash-1.0.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:213d710fb6f4ef3bc11abbfad0fa94a75ffb675b7dc158c123471e5de869f9af", size = 131502, upload_time = "2025-11-14T09:50:29.339Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ea/90c1fd00b4aeb704fb5e84cd666b33ffd7f245155048071ffbb51d2bb57d/murmurhash-1.0.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b65a5c4e7f5d71f7ccac2d2b60bdf7092d7976270878cfec59d5a66a533db823", size = 132736, upload_time = "2025-11-14T09:50:30.545Z" }, + { url = "https://files.pythonhosted.org/packages/00/db/da73462dbfa77f6433b128d2120ba7ba300f8c06dc4f4e022c38d240a5f5/murmurhash-1.0.15-cp313-cp313-win_amd64.whl", hash = "sha256:9aba94c5d841e1904cd110e94ceb7f49cfb60a874bbfb27e0373622998fb7c7c", size = 25682, upload_time = "2025-11-14T09:50:31.624Z" }, + { url = "https://files.pythonhosted.org/packages/bb/83/032729ef14971b938fbef41ee125fc8800020ee229bd35178b6ede8ee934/murmurhash-1.0.15-cp313-cp313-win_arm64.whl", hash = "sha256:263807eca40d08c7b702413e45cca75ecb5883aa337237dc5addb660f1483378", size = 23370, upload_time = "2025-11-14T09:50:33.264Z" }, + { url = "https://files.pythonhosted.org/packages/10/83/7547d9205e9bd2f8e5dfd0b682cc9277594f98909f228eb359489baec1df/murmurhash-1.0.15-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:694fd42a74b7ce257169d14c24aa616aa6cd4ccf8abe50eca0557e08da99d055", size = 29955, upload_time = "2025-11-14T09:50:34.488Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c7/3afd5de7a5b3ae07fe2d3a3271b327ee1489c58ba2b2f2159bd31a25edb9/murmurhash-1.0.15-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a2ea4546ba426390beff3cd10db8f0152fdc9072c4f2583ec7d8aa9f3e4ac070", size = 30108, upload_time = "2025-11-14T09:50:35.53Z" }, + { url = "https://files.pythonhosted.org/packages/02/69/d6637ee67d78ebb2538c00411f28ea5c154886bbe1db16c49435a8a4ab16/murmurhash-1.0.15-cp313-cp313t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:34e5a91139c40b10f98d0b297907f5d5267b4b1b2e5dd2eb74a021824f751b98", size = 164054, upload_time = "2025-11-14T09:50:36.591Z" }, + { url = "https://files.pythonhosted.org/packages/ab/4c/89e590165b4c7da6bf941441212a721a270195332d3aacfdfdf527d466ca/murmurhash-1.0.15-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:dc35606868a5961cf42e79314ca0bddf5a400ce377b14d83192057928d6252ec", size = 168153, upload_time = "2025-11-14T09:50:37.856Z" }, + { url = "https://files.pythonhosted.org/packages/07/7a/95c42df0c21d2e413b9fcd17317a7587351daeb264dc29c6aec1fdbd26f8/murmurhash-1.0.15-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:43cc6ac3b91ca0f7a5ae9c063ba4d6c26972c97fd7c25280ecc666413e4c5535", size = 164345, upload_time = "2025-11-14T09:50:39.346Z" }, + { url = "https://files.pythonhosted.org/packages/d0/22/9d02c880a88b83bb3ce7d6a38fb727373ab78d82e5f3d8d9fc5612219f90/murmurhash-1.0.15-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:847d712136cb462f0e4bd6229ee2d9eb996d8854eb8312dff3d20c8f5181fda5", size = 161990, upload_time = "2025-11-14T09:50:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/750232524e0dc262e8dcede6536dafc766faadd9a52f1d23746b02948ad8/murmurhash-1.0.15-cp313-cp313t-win_amd64.whl", hash = "sha256:2680851af6901dbe66cc4aa7ef8e263de47e6e1b425ae324caa571bdf18f8d58", size = 28812, upload_time = "2025-11-14T09:50:41.971Z" }, + { url = "https://files.pythonhosted.org/packages/ff/89/4ad9d215ef6ade89f27a72dc4e86b98ef1a43534cc3e6a6900a362a0bf0a/murmurhash-1.0.15-cp313-cp313t-win_arm64.whl", hash = "sha256:189a8de4d657b5da9efd66601b0636330b08262b3a55431f2379097c986995d0", size = 25398, upload_time = "2025-11-14T09:50:43.023Z" }, + { url = "https://files.pythonhosted.org/packages/1c/69/726df275edf07688146966e15eaaa23168100b933a2e1a29b37eb56c6db8/murmurhash-1.0.15-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:7c4280136b738e85ff76b4bdc4341d0b867ee753e73fd8b6994288080c040d0b", size = 28029, upload_time = "2025-11-14T09:50:44.124Z" }, + { url = "https://files.pythonhosted.org/packages/59/8f/24ecf9061bc2b20933df8aba47c73e904274ea8811c8300cab92f6f82372/murmurhash-1.0.15-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d4d681f474830489e2ec1d912095cfff027fbaf2baa5414c7e9d25b89f0fab68", size = 27912, upload_time = "2025-11-14T09:50:45.266Z" }, + { url = "https://files.pythonhosted.org/packages/ba/26/fff3caba25aa3c0622114e03c69fb66c839b22335b04d7cce91a3a126d44/murmurhash-1.0.15-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d7e47c5746785db6a43b65fac47b9e63dd71dfbd89a8c92693425b9715e68c6e", size = 131847, upload_time = "2025-11-14T09:50:46.819Z" }, + { url = "https://files.pythonhosted.org/packages/df/e4/0f2b9fc533467a27afb4e906c33f32d5f637477de87dd94690e0c44335a6/murmurhash-1.0.15-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e8e674f02a99828c8a671ba99cd03299381b2f0744e6f25c29cadfc6151dc724", size = 132267, upload_time = "2025-11-14T09:50:48.298Z" }, + { url = "https://files.pythonhosted.org/packages/da/bf/9d1c107989728ec46e25773d503aa54070b32822a18cfa7f9d5f41bc17a5/murmurhash-1.0.15-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:26fd7c7855ac4850ad8737991d7b0e3e501df93ebaf0cf45aa5954303085fdba", size = 131894, upload_time = "2025-11-14T09:50:49.485Z" }, + { url = "https://files.pythonhosted.org/packages/0d/81/dcf27c71445c0e993b10e33169a098ca60ee702c5c58fcbde205fa6332a6/murmurhash-1.0.15-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cb8ebafae60d5f892acff533cc599a359954d8c016a829514cb3f6e9ee10f322", size = 132054, upload_time = "2025-11-14T09:50:50.747Z" }, + { url = "https://files.pythonhosted.org/packages/bc/32/e874a14b2d2246bd2d16f80f49fad393a3865d4ee7d66d2cae939a67a29a/murmurhash-1.0.15-cp314-cp314-win_amd64.whl", hash = "sha256:898a629bf111f1aeba4437e533b5b836c0a9d2dd12d6880a9c75f6ca13e30e22", size = 26579, upload_time = "2025-11-14T09:50:52.278Z" }, + { url = "https://files.pythonhosted.org/packages/af/8e/4fca051ed8ae4d23a15aaf0a82b18cb368e8cf84f1e3b474d5749ec46069/murmurhash-1.0.15-cp314-cp314-win_arm64.whl", hash = "sha256:88dc1dd53b7b37c0df1b8b6bce190c12763014492f0269ff7620dc6027f470f4", size = 24341, upload_time = "2025-11-14T09:50:53.295Z" }, + { url = "https://files.pythonhosted.org/packages/38/9c/c72c2a4edd86aac829337ab9f83cf04cdb15e5d503e4c9a3a243f30a261c/murmurhash-1.0.15-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:6cb4e962ec4f928b30c271b2d84e6707eff6d942552765b663743cfa618b294b", size = 30146, upload_time = "2025-11-14T09:50:54.705Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d7/72b47ebc86436cd0aa1fd4c6e8779521ec389397ac11389990278d0f7a47/murmurhash-1.0.15-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5678a3ea4fbf0cbaaca2bed9b445f556f294d5f799c67185d05ffcb221a77faf", size = 30141, upload_time = "2025-11-14T09:50:55.829Z" }, + { url = "https://files.pythonhosted.org/packages/64/bb/6d2f09135079c34dc2d26e961c52742d558b320c61503f273eab6ba743d9/murmurhash-1.0.15-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ef19f38c6b858eef83caf710773db98c8f7eb2193b4c324650c74f3d8ba299e0", size = 163898, upload_time = "2025-11-14T09:50:56.946Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e2/9c1b462e33f9cb2d632056f07c90b502fc20bd7da50a15d0557343bd2fed/murmurhash-1.0.15-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22aa3ceaedd2e57078b491ed08852d512b84ff4ff9bb2ff3f9bf0eec7f214c9e", size = 168040, upload_time = "2025-11-14T09:50:58.234Z" }, + { url = "https://files.pythonhosted.org/packages/e8/73/8694db1408fcdfa73589f7df6c445437ea146986fa1e393ec60d26d6e30c/murmurhash-1.0.15-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bba0e0262c0d08682b028cb963ac477bd9839029486fa1333fc5c01fb6072749", size = 164239, upload_time = "2025-11-14T09:50:59.95Z" }, + { url = "https://files.pythonhosted.org/packages/2d/f9/8e360bdfc3c44e267e7e046f0e0b9922766da92da26959a6963f597e6bb5/murmurhash-1.0.15-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4fd8189ee293a09f30f4931408f40c28ccd42d9de4f66595f8814879339378bc", size = 161811, upload_time = "2025-11-14T09:51:01.289Z" }, + { url = "https://files.pythonhosted.org/packages/f9/31/97649680595b1096803d877ababb9a67c07f4378f177ec885eea28b9db6d/murmurhash-1.0.15-cp314-cp314t-win_amd64.whl", hash = "sha256:66395b1388f7daa5103db92debe06842ae3be4c0749ef6db68b444518666cdcc", size = 29817, upload_time = "2025-11-14T09:51:02.493Z" }, + { url = "https://files.pythonhosted.org/packages/76/66/4fce8755f25d77324401886c00017c556be7ca3039575b94037aff905385/murmurhash-1.0.15-cp314-cp314t-win_arm64.whl", hash = "sha256:c22e56c6a0b70598a66e456de5272f76088bc623688da84ef403148a6d41851d", size = 26219, upload_time = "2025-11-14T09:51:03.563Z" }, ] [[package]] name = "mypy-extensions" version = "1.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload_time = "2025-04-22T14:54:24.164Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 }, + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload_time = "2025-04-22T14:54:22.983Z" }, ] [[package]] name = "nest-asyncio2" version = "1.7.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/eb/ecf8bbf9d22a4e8f7be1628336fe0202da7660790053aa28abeb6c15eb14/nest_asyncio2-1.7.1.tar.gz", hash = "sha256:a1fe5bbbd20894dcceb1842322d74992c5834d5ab692af2c4f59a9a4fcf75fe8", size = 13797 } +sdist = { url = "https://files.pythonhosted.org/packages/2d/eb/ecf8bbf9d22a4e8f7be1628336fe0202da7660790053aa28abeb6c15eb14/nest_asyncio2-1.7.1.tar.gz", hash = "sha256:a1fe5bbbd20894dcceb1842322d74992c5834d5ab692af2c4f59a9a4fcf75fe8", size = 13797, upload_time = "2025-11-20T20:46:07.085Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/48/c1f1ddcfd04bba60470235c2f83733ecff43ebe068dc7715aab60bc92ad8/nest_asyncio2-1.7.1-py3-none-any.whl", hash = "sha256:f83bc1744c3cfa7d47fd29431e5e168db6cb76eda1bb20108955c32f60d7eddf", size = 7504 }, + { url = "https://files.pythonhosted.org/packages/8c/48/c1f1ddcfd04bba60470235c2f83733ecff43ebe068dc7715aab60bc92ad8/nest_asyncio2-1.7.1-py3-none-any.whl", hash = "sha256:f83bc1744c3cfa7d47fd29431e5e168db6cb76eda1bb20108955c32f60d7eddf", size = 7504, upload_time = "2025-11-20T20:46:05.704Z" }, ] [[package]] name = "networkx" version = "3.6.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025 } +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload_time = "2025-12-08T17:02:39.908Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504 }, + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload_time = "2025-12-08T17:02:38.159Z" }, ] [[package]] @@ -1643,70 +1691,79 @@ dependencies = [ { name = "regex" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload_time = "2025-10-01T07:19:23.764Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404 }, + { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload_time = "2025-10-01T07:19:21.648Z" }, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload_time = "2025-12-20T14:08:54.006Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload_time = "2025-12-20T14:08:52.782Z" }, ] [[package]] name = "numpy" version = "2.4.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888 }, - { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956 }, - { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567 }, - { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459 }, - { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859 }, - { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419 }, - { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131 }, - { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342 }, - { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015 }, - { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730 }, - { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166 }, - { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495 }, - { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657 }, - { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256 }, - { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212 }, - { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871 }, - { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305 }, - { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909 }, - { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380 }, - { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089 }, - { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230 }, - { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125 }, - { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156 }, - { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663 }, - { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224 }, - { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352 }, - { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279 }, - { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316 }, - { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884 }, - { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138 }, - { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478 }, - { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981 }, - { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046 }, - { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858 }, - { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417 }, - { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643 }, - { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963 }, - { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811 }, - { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643 }, - { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601 }, - { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722 }, - { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590 }, - { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180 }, - { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774 }, - { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274 }, - { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306 }, - { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653 }, - { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144 }, - { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425 }, - { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053 }, - { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482 }, - { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117 }, - { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121 }, +sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload_time = "2026-01-10T06:44:59.619Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888, upload_time = "2026-01-10T06:42:40.913Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956, upload_time = "2026-01-10T06:42:43.091Z" }, + { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567, upload_time = "2026-01-10T06:42:45.107Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459, upload_time = "2026-01-10T06:42:48.152Z" }, + { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859, upload_time = "2026-01-10T06:42:49.947Z" }, + { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419, upload_time = "2026-01-10T06:42:52.409Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131, upload_time = "2026-01-10T06:42:54.694Z" }, + { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342, upload_time = "2026-01-10T06:42:56.991Z" }, + { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015, upload_time = "2026-01-10T06:42:59.631Z" }, + { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730, upload_time = "2026-01-10T06:43:01.627Z" }, + { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166, upload_time = "2026-01-10T06:43:03.673Z" }, + { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload_time = "2026-01-10T06:43:06.283Z" }, + { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload_time = "2026-01-10T06:43:09.094Z" }, + { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload_time = "2026-01-10T06:43:13.634Z" }, + { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload_time = "2026-01-10T06:43:15.661Z" }, + { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload_time = "2026-01-10T06:43:17.324Z" }, + { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload_time = "2026-01-10T06:43:19.376Z" }, + { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload_time = "2026-01-10T06:43:21.808Z" }, + { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload_time = "2026-01-10T06:43:23.957Z" }, + { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089, upload_time = "2026-01-10T06:43:27.535Z" }, + { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230, upload_time = "2026-01-10T06:43:29.298Z" }, + { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125, upload_time = "2026-01-10T06:43:31.782Z" }, + { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload_time = "2026-01-10T06:43:34.237Z" }, + { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload_time = "2026-01-10T06:43:36.211Z" }, + { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload_time = "2026-01-10T06:43:37.884Z" }, + { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload_time = "2026-01-10T06:43:39.479Z" }, + { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload_time = "2026-01-10T06:43:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload_time = "2026-01-10T06:43:44.121Z" }, + { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload_time = "2026-01-10T06:43:46.613Z" }, + { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138, upload_time = "2026-01-10T06:43:48.854Z" }, + { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478, upload_time = "2026-01-10T06:43:50.476Z" }, + { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981, upload_time = "2026-01-10T06:43:52.575Z" }, + { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload_time = "2026-01-10T06:43:54.797Z" }, + { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload_time = "2026-01-10T06:43:57.099Z" }, + { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload_time = "2026-01-10T06:43:59.037Z" }, + { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload_time = "2026-01-10T06:44:01.852Z" }, + { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload_time = "2026-01-10T06:44:04.047Z" }, + { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload_time = "2026-01-10T06:44:06.207Z" }, + { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload_time = "2026-01-10T06:44:08.33Z" }, + { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload_time = "2026-01-10T06:44:10.841Z" }, + { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722, upload_time = "2026-01-10T06:44:13.332Z" }, + { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590, upload_time = "2026-01-10T06:44:15.006Z" }, + { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180, upload_time = "2026-01-10T06:44:17.386Z" }, + { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload_time = "2026-01-10T06:44:19.467Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload_time = "2026-01-10T06:44:23.189Z" }, + { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload_time = "2026-01-10T06:44:25.012Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload_time = "2026-01-10T06:44:26.706Z" }, + { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload_time = "2026-01-10T06:44:29.378Z" }, + { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload_time = "2026-01-10T06:44:31.721Z" }, + { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload_time = "2026-01-10T06:44:34.617Z" }, + { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482, upload_time = "2026-01-10T06:44:36.798Z" }, + { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117, upload_time = "2026-01-10T06:44:38.828Z" }, + { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload_time = "2026-01-10T06:44:41.644Z" }, ] [[package]] @@ -1714,8 +1771,7 @@ name = "nvidia-cublas-cu12" version = "12.8.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124 }, - { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921 }, + { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload_time = "2025-03-07T01:44:31.254Z" }, ] [[package]] @@ -1723,8 +1779,7 @@ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318 }, - { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621 }, + { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload_time = "2025-03-07T01:40:21.213Z" }, ] [[package]] @@ -1732,8 +1787,7 @@ name = "nvidia-cuda-nvrtc-cu12" version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029 }, - { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076 }, + { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload_time = "2025-03-07T01:42:13.562Z" }, ] [[package]] @@ -1741,8 +1795,7 @@ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265 }, - { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765 }, + { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload_time = "2025-03-07T01:40:01.615Z" }, ] [[package]] @@ -1753,8 +1806,7 @@ dependencies = [ { name = "nvidia-cublas-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878 }, - { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467 }, + { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload_time = "2025-06-06T21:54:08.597Z" }, ] [[package]] @@ -1765,8 +1817,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211 }, - { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695 }, + { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload_time = "2025-03-07T01:45:27.821Z" }, ] [[package]] @@ -1774,8 +1825,7 @@ name = "nvidia-cufile-cu12" version = "1.13.1.3" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834 }, - { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705 }, + { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload_time = "2025-03-07T01:45:50.723Z" }, ] [[package]] @@ -1783,8 +1833,7 @@ name = "nvidia-curand-cu12" version = "10.3.9.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754 }, - { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976 }, + { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload_time = "2025-03-07T01:46:23.323Z" }, ] [[package]] @@ -1797,8 +1846,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841 }, - { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905 }, + { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload_time = "2025-03-07T01:47:16.273Z" }, ] [[package]] @@ -1809,8 +1857,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129 }, - { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466 }, + { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload_time = "2025-03-07T01:48:13.779Z" }, ] [[package]] @@ -1818,8 +1865,7 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557 }, - { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691 }, + { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload_time = "2025-02-26T00:15:44.104Z" }, ] [[package]] @@ -1827,8 +1873,7 @@ name = "nvidia-nccl-cu12" version = "2.27.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625 }, - { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229 }, + { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload_time = "2025-06-26T04:11:28.385Z" }, ] [[package]] @@ -1836,8 +1881,7 @@ name = "nvidia-nvjitlink-cu12" version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836 }, - { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204 }, + { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload_time = "2025-03-07T01:49:55.661Z" }, ] [[package]] @@ -1845,8 +1889,7 @@ name = "nvidia-nvshmem-cu12" version = "3.4.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938 }, - { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095 }, + { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload_time = "2025-09-06T00:32:31.266Z" }, ] [[package]] @@ -1854,17 +1897,16 @@ name = "nvidia-nvtx-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161 }, - { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954 }, + { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload_time = "2025-03-07T01:42:44.131Z" }, ] [[package]] name = "packaging" version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload_time = "2025-04-19T11:48:59.673Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload_time = "2025-04-19T11:48:57.875Z" }, ] [[package]] @@ -1877,155 +1919,180 @@ dependencies = [ { name = "pytz" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846 }, - { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618 }, - { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212 }, - { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693 }, - { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002 }, - { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971 }, - { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722 }, - { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671 }, - { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807 }, - { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872 }, - { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371 }, - { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333 }, - { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120 }, - { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991 }, - { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227 }, - { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056 }, - { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189 }, - { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912 }, - { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160 }, - { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233 }, - { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635 }, - { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079 }, - { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049 }, - { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638 }, - { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834 }, - { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925 }, - { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071 }, - { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504 }, - { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702 }, - { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535 }, - { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582 }, - { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963 }, - { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175 }, +sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload_time = "2025-09-29T23:34:51.853Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload_time = "2025-09-29T23:19:48.856Z" }, + { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload_time = "2025-09-29T23:39:08.659Z" }, + { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload_time = "2025-09-29T23:19:59.765Z" }, + { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693, upload_time = "2025-09-29T23:20:14.098Z" }, + { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002, upload_time = "2025-09-29T23:20:26.76Z" }, + { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971, upload_time = "2025-09-29T23:20:41.344Z" }, + { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload_time = "2025-09-29T23:20:54.139Z" }, + { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671, upload_time = "2025-09-29T23:21:05.024Z" }, + { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807, upload_time = "2025-09-29T23:21:15.979Z" }, + { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872, upload_time = "2025-09-29T23:21:27.165Z" }, + { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371, upload_time = "2025-09-29T23:21:40.532Z" }, + { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333, upload_time = "2025-09-29T23:21:55.77Z" }, + { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120, upload_time = "2025-09-29T23:22:10.109Z" }, + { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991, upload_time = "2025-09-29T23:25:04.889Z" }, + { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227, upload_time = "2025-09-29T23:22:24.343Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056, upload_time = "2025-09-29T23:22:37.762Z" }, + { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189, upload_time = "2025-09-29T23:22:51.688Z" }, + { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912, upload_time = "2025-09-29T23:23:05.042Z" }, + { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160, upload_time = "2025-09-29T23:23:28.57Z" }, + { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233, upload_time = "2025-09-29T23:24:24.876Z" }, + { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635, upload_time = "2025-09-29T23:25:52.486Z" }, + { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079, upload_time = "2025-09-29T23:26:33.204Z" }, + { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049, upload_time = "2025-09-29T23:27:15.384Z" }, + { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638, upload_time = "2025-09-29T23:27:51.625Z" }, + { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834, upload_time = "2025-09-29T23:28:21.289Z" }, + { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925, upload_time = "2025-09-29T23:28:58.261Z" }, + { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071, upload_time = "2025-09-29T23:32:27.484Z" }, + { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504, upload_time = "2025-09-29T23:29:31.47Z" }, + { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702, upload_time = "2025-09-29T23:29:54.591Z" }, + { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535, upload_time = "2025-09-29T23:30:21.003Z" }, + { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582, upload_time = "2025-09-29T23:30:43.391Z" }, + { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963, upload_time = "2025-09-29T23:31:10.009Z" }, + { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload_time = "2025-09-29T23:31:59.173Z" }, ] [[package]] name = "parameterized" version = "0.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ea/49/00c0c0cc24ff4266025a53e41336b79adaa5a4ebfad214f433d623f9865e/parameterized-0.9.0.tar.gz", hash = "sha256:7fc905272cefa4f364c1a3429cbbe9c0f98b793988efb5bf90aac80f08db09b1", size = 24351 } +sdist = { url = "https://files.pythonhosted.org/packages/ea/49/00c0c0cc24ff4266025a53e41336b79adaa5a4ebfad214f433d623f9865e/parameterized-0.9.0.tar.gz", hash = "sha256:7fc905272cefa4f364c1a3429cbbe9c0f98b793988efb5bf90aac80f08db09b1", size = 24351, upload_time = "2023-03-27T02:01:11.592Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/2f/804f58f0b856ab3bf21617cccf5b39206e6c4c94c2cd227bde125ea6105f/parameterized-0.9.0-py2.py3-none-any.whl", hash = "sha256:4e0758e3d41bea3bbd05ec14fc2c24736723f243b28d702081aef438c9372b1b", size = 20475 }, + { url = "https://files.pythonhosted.org/packages/00/2f/804f58f0b856ab3bf21617cccf5b39206e6c4c94c2cd227bde125ea6105f/parameterized-0.9.0-py2.py3-none-any.whl", hash = "sha256:4e0758e3d41bea3bbd05ec14fc2c24736723f243b28d702081aef438c9372b1b", size = 20475, upload_time = "2023-03-27T02:01:09.31Z" }, ] [[package]] name = "pathlib-abc" version = "0.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/cb/448649d7f25d228bf0be3a04590ab7afa77f15e056f8fa976ed05ec9a78f/pathlib_abc-0.5.2.tar.gz", hash = "sha256:fcd56f147234645e2c59c7ae22808b34c364bb231f685ddd9f96885aed78a94c", size = 33342 } +sdist = { url = "https://files.pythonhosted.org/packages/d6/cb/448649d7f25d228bf0be3a04590ab7afa77f15e056f8fa976ed05ec9a78f/pathlib_abc-0.5.2.tar.gz", hash = "sha256:fcd56f147234645e2c59c7ae22808b34c364bb231f685ddd9f96885aed78a94c", size = 33342, upload_time = "2025-10-10T18:37:20.524Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b1/29/c028a0731e202035f0e2e0bfbf1a3e46ad6c628cbb17f6f1cc9eea5d9ff1/pathlib_abc-0.5.2-py3-none-any.whl", hash = "sha256:4c9d94cf1b23af417ce7c0417b43333b06a106c01000b286c99de230d95eefbb", size = 19070 }, + { url = "https://files.pythonhosted.org/packages/b1/29/c028a0731e202035f0e2e0bfbf1a3e46ad6c628cbb17f6f1cc9eea5d9ff1/pathlib_abc-0.5.2-py3-none-any.whl", hash = "sha256:4c9d94cf1b23af417ce7c0417b43333b06a106c01000b286c99de230d95eefbb", size = 19070, upload_time = "2025-10-10T18:37:19.437Z" }, ] [[package]] name = "pathspec" version = "1.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841 } +sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload_time = "2026-01-09T15:46:46.009Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021 }, + { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload_time = "2026-01-09T15:46:44.652Z" }, ] [[package]] name = "pillow" version = "12.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/02/d52c733a2452ef1ffcc123b68e6606d07276b0e358db70eabad7e40042b7/pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9", size = 46977283 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/31/dc53fe21a2f2996e1b7d92bf671cdb157079385183ef7c1ae08b485db510/pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b", size = 5262642 }, - { url = "https://files.pythonhosted.org/packages/ab/c1/10e45ac9cc79419cedf5121b42dcca5a50ad2b601fa080f58c22fb27626e/pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551", size = 4657464 }, - { url = "https://files.pythonhosted.org/packages/ad/26/7b82c0ab7ef40ebede7a97c72d473bda5950f609f8e0c77b04af574a0ddb/pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208", size = 6234878 }, - { url = "https://files.pythonhosted.org/packages/76/25/27abc9792615b5e886ca9411ba6637b675f1b77af3104710ac7353fe5605/pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5", size = 8044868 }, - { url = "https://files.pythonhosted.org/packages/0a/ea/f200a4c36d836100e7bc738fc48cd963d3ba6372ebc8298a889e0cfc3359/pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661", size = 6349468 }, - { url = "https://files.pythonhosted.org/packages/11/8f/48d0b77ab2200374c66d344459b8958c86693be99526450e7aee714e03e4/pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17", size = 7041518 }, - { url = "https://files.pythonhosted.org/packages/1d/23/c281182eb986b5d31f0a76d2a2c8cd41722d6fb8ed07521e802f9bba52de/pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670", size = 6462829 }, - { url = "https://files.pythonhosted.org/packages/25/ef/7018273e0faac099d7b00982abdcc39142ae6f3bd9ceb06de09779c4a9d6/pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616", size = 7166756 }, - { url = "https://files.pythonhosted.org/packages/8f/c8/993d4b7ab2e341fe02ceef9576afcf5830cdec640be2ac5bee1820d693d4/pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7", size = 6328770 }, - { url = "https://files.pythonhosted.org/packages/a7/87/90b358775a3f02765d87655237229ba64a997b87efa8ccaca7dd3e36e7a7/pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d", size = 7033406 }, - { url = "https://files.pythonhosted.org/packages/5d/cf/881b457eccacac9e5b2ddd97d5071fb6d668307c57cbf4e3b5278e06e536/pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c", size = 2452612 }, - { url = "https://files.pythonhosted.org/packages/dd/c7/2530a4aa28248623e9d7f27316b42e27c32ec410f695929696f2e0e4a778/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1", size = 4062543 }, - { url = "https://files.pythonhosted.org/packages/8f/1f/40b8eae823dc1519b87d53c30ed9ef085506b05281d313031755c1705f73/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179", size = 4138373 }, - { url = "https://files.pythonhosted.org/packages/d4/77/6fa60634cf06e52139fd0e89e5bbf055e8166c691c42fb162818b7fda31d/pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0", size = 3601241 }, - { url = "https://files.pythonhosted.org/packages/4f/bf/28ab865de622e14b747f0cd7877510848252d950e43002e224fb1c9ababf/pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587", size = 5262410 }, - { url = "https://files.pythonhosted.org/packages/1c/34/583420a1b55e715937a85bd48c5c0991598247a1fd2eb5423188e765ea02/pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac", size = 4657312 }, - { url = "https://files.pythonhosted.org/packages/1d/fd/f5a0896839762885b3376ff04878f86ab2b097c2f9a9cdccf4eda8ba8dc0/pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b", size = 6232605 }, - { url = "https://files.pythonhosted.org/packages/98/aa/938a09d127ac1e70e6ed467bd03834350b33ef646b31edb7452d5de43792/pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea", size = 8041617 }, - { url = "https://files.pythonhosted.org/packages/17/e8/538b24cb426ac0186e03f80f78bc8dc7246c667f58b540bdd57c71c9f79d/pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c", size = 6346509 }, - { url = "https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc", size = 7038117 }, - { url = "https://files.pythonhosted.org/packages/c7/a2/d40308cf86eada842ca1f3ffa45d0ca0df7e4ab33c83f81e73f5eaed136d/pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644", size = 6460151 }, - { url = "https://files.pythonhosted.org/packages/f1/88/f5b058ad6453a085c5266660a1417bdad590199da1b32fb4efcff9d33b05/pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c", size = 7164534 }, - { url = "https://files.pythonhosted.org/packages/19/ce/c17334caea1db789163b5d855a5735e47995b0b5dc8745e9a3605d5f24c0/pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171", size = 6332551 }, - { url = "https://files.pythonhosted.org/packages/e5/07/74a9d941fa45c90a0d9465098fe1ec85de3e2afbdc15cc4766622d516056/pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a", size = 7040087 }, - { url = "https://files.pythonhosted.org/packages/88/09/c99950c075a0e9053d8e880595926302575bc742b1b47fe1bbcc8d388d50/pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45", size = 2452470 }, - { url = "https://files.pythonhosted.org/packages/b5/ba/970b7d85ba01f348dee4d65412476321d40ee04dcb51cd3735b9dc94eb58/pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d", size = 5264816 }, - { url = "https://files.pythonhosted.org/packages/10/60/650f2fb55fdba7a510d836202aa52f0baac633e50ab1cf18415d332188fb/pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0", size = 4660472 }, - { url = "https://files.pythonhosted.org/packages/2b/c0/5273a99478956a099d533c4f46cbaa19fd69d606624f4334b85e50987a08/pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554", size = 6268974 }, - { url = "https://files.pythonhosted.org/packages/b4/26/0bf714bc2e73d5267887d47931d53c4ceeceea6978148ed2ab2a4e6463c4/pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e", size = 8073070 }, - { url = "https://files.pythonhosted.org/packages/43/cf/1ea826200de111a9d65724c54f927f3111dc5ae297f294b370a670c17786/pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82", size = 6380176 }, - { url = "https://files.pythonhosted.org/packages/03/e0/7938dd2b2013373fd85d96e0f38d62b7a5a262af21ac274250c7ca7847c9/pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4", size = 7067061 }, - { url = "https://files.pythonhosted.org/packages/86/ad/a2aa97d37272a929a98437a8c0ac37b3cf012f4f8721e1bd5154699b2518/pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0", size = 6491824 }, - { url = "https://files.pythonhosted.org/packages/a4/44/80e46611b288d51b115826f136fb3465653c28f491068a72d3da49b54cd4/pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b", size = 7190911 }, - { url = "https://files.pythonhosted.org/packages/86/77/eacc62356b4cf81abe99ff9dbc7402750044aed02cfd6a503f7c6fc11f3e/pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65", size = 6336445 }, - { url = "https://files.pythonhosted.org/packages/e7/3c/57d81d0b74d218706dafccb87a87ea44262c43eef98eb3b164fd000e0491/pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0", size = 7045354 }, - { url = "https://files.pythonhosted.org/packages/ac/82/8b9b97bba2e3576a340f93b044a3a3a09841170ab4c1eb0d5c93469fd32f/pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8", size = 2454547 }, - { url = "https://files.pythonhosted.org/packages/8c/87/bdf971d8bbcf80a348cc3bacfcb239f5882100fe80534b0ce67a784181d8/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91", size = 4062533 }, - { url = "https://files.pythonhosted.org/packages/ff/4f/5eb37a681c68d605eb7034c004875c81f86ec9ef51f5be4a63eadd58859a/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796", size = 4138546 }, - { url = "https://files.pythonhosted.org/packages/11/6d/19a95acb2edbace40dcd582d077b991646b7083c41b98da4ed7555b59733/pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd", size = 3601163 }, - { url = "https://files.pythonhosted.org/packages/fc/36/2b8138e51cb42e4cc39c3297713455548be855a50558c3ac2beebdc251dd/pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13", size = 5266086 }, - { url = "https://files.pythonhosted.org/packages/53/4b/649056e4d22e1caa90816bf99cef0884aed607ed38075bd75f091a607a38/pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e", size = 4657344 }, - { url = "https://files.pythonhosted.org/packages/6c/6b/c5742cea0f1ade0cd61485dc3d81f05261fc2276f537fbdc00802de56779/pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643", size = 6232114 }, - { url = "https://files.pythonhosted.org/packages/bf/8f/9f521268ce22d63991601aafd3d48d5ff7280a246a1ef62d626d67b44064/pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5", size = 8042708 }, - { url = "https://files.pythonhosted.org/packages/1a/eb/257f38542893f021502a1bbe0c2e883c90b5cff26cc33b1584a841a06d30/pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de", size = 6347762 }, - { url = "https://files.pythonhosted.org/packages/c4/5a/8ba375025701c09b309e8d5163c5a4ce0102fa86bbf8800eb0d7ac87bc51/pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9", size = 7039265 }, - { url = "https://files.pythonhosted.org/packages/cf/dc/cf5e4cdb3db533f539e88a7bbf9f190c64ab8a08a9bc7a4ccf55067872e4/pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a", size = 6462341 }, - { url = "https://files.pythonhosted.org/packages/d0/47/0291a25ac9550677e22eda48510cfc4fa4b2ef0396448b7fbdc0a6946309/pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a", size = 7165395 }, - { url = "https://files.pythonhosted.org/packages/4f/4c/e005a59393ec4d9416be06e6b45820403bb946a778e39ecec62f5b2b991e/pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030", size = 6431413 }, - { url = "https://files.pythonhosted.org/packages/1c/af/f23697f587ac5f9095d67e31b81c95c0249cd461a9798a061ed6709b09b5/pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94", size = 7176779 }, - { url = "https://files.pythonhosted.org/packages/b3/36/6a51abf8599232f3e9afbd16d52829376a68909fe14efe29084445db4b73/pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4", size = 2543105 }, - { url = "https://files.pythonhosted.org/packages/82/54/2e1dd20c8749ff225080d6ba465a0cab4387f5db0d1c5fb1439e2d99923f/pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2", size = 5268571 }, - { url = "https://files.pythonhosted.org/packages/57/61/571163a5ef86ec0cf30d265ac2a70ae6fc9e28413d1dc94fa37fae6bda89/pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61", size = 4660426 }, - { url = "https://files.pythonhosted.org/packages/5e/e1/53ee5163f794aef1bf84243f755ee6897a92c708505350dd1923f4afec48/pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51", size = 6269908 }, - { url = "https://files.pythonhosted.org/packages/bc/0b/b4b4106ff0ee1afa1dc599fde6ab230417f800279745124f6c50bcffed8e/pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc", size = 8074733 }, - { url = "https://files.pythonhosted.org/packages/19/9f/80b411cbac4a732439e629a26ad3ef11907a8c7fc5377b7602f04f6fe4e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14", size = 6381431 }, - { url = "https://files.pythonhosted.org/packages/8f/b7/d65c45db463b66ecb6abc17c6ba6917a911202a07662247e1355ce1789e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8", size = 7068529 }, - { url = "https://files.pythonhosted.org/packages/50/96/dfd4cd726b4a45ae6e3c669fc9e49deb2241312605d33aba50499e9d9bd1/pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924", size = 6492981 }, - { url = "https://files.pythonhosted.org/packages/4d/1c/b5dc52cf713ae46033359c5ca920444f18a6359ce1020dd3e9c553ea5bc6/pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef", size = 7191878 }, - { url = "https://files.pythonhosted.org/packages/53/26/c4188248bd5edaf543864fe4834aebe9c9cb4968b6f573ce014cc42d0720/pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988", size = 6438703 }, - { url = "https://files.pythonhosted.org/packages/b8/0e/69ed296de8ea05cb03ee139cee600f424ca166e632567b2d66727f08c7ed/pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6", size = 7182927 }, - { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104 }, +sdist = { url = "https://files.pythonhosted.org/packages/d0/02/d52c733a2452ef1ffcc123b68e6606d07276b0e358db70eabad7e40042b7/pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9", size = 46977283, upload_time = "2026-01-02T09:13:29.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/31/dc53fe21a2f2996e1b7d92bf671cdb157079385183ef7c1ae08b485db510/pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b", size = 5262642, upload_time = "2026-01-02T09:11:10.138Z" }, + { url = "https://files.pythonhosted.org/packages/ab/c1/10e45ac9cc79419cedf5121b42dcca5a50ad2b601fa080f58c22fb27626e/pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551", size = 4657464, upload_time = "2026-01-02T09:11:12.319Z" }, + { url = "https://files.pythonhosted.org/packages/ad/26/7b82c0ab7ef40ebede7a97c72d473bda5950f609f8e0c77b04af574a0ddb/pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208", size = 6234878, upload_time = "2026-01-02T09:11:14.096Z" }, + { url = "https://files.pythonhosted.org/packages/76/25/27abc9792615b5e886ca9411ba6637b675f1b77af3104710ac7353fe5605/pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5", size = 8044868, upload_time = "2026-01-02T09:11:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/0a/ea/f200a4c36d836100e7bc738fc48cd963d3ba6372ebc8298a889e0cfc3359/pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661", size = 6349468, upload_time = "2026-01-02T09:11:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/11/8f/48d0b77ab2200374c66d344459b8958c86693be99526450e7aee714e03e4/pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17", size = 7041518, upload_time = "2026-01-02T09:11:19.389Z" }, + { url = "https://files.pythonhosted.org/packages/1d/23/c281182eb986b5d31f0a76d2a2c8cd41722d6fb8ed07521e802f9bba52de/pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670", size = 6462829, upload_time = "2026-01-02T09:11:21.28Z" }, + { url = "https://files.pythonhosted.org/packages/25/ef/7018273e0faac099d7b00982abdcc39142ae6f3bd9ceb06de09779c4a9d6/pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616", size = 7166756, upload_time = "2026-01-02T09:11:23.559Z" }, + { url = "https://files.pythonhosted.org/packages/8f/c8/993d4b7ab2e341fe02ceef9576afcf5830cdec640be2ac5bee1820d693d4/pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7", size = 6328770, upload_time = "2026-01-02T09:11:25.661Z" }, + { url = "https://files.pythonhosted.org/packages/a7/87/90b358775a3f02765d87655237229ba64a997b87efa8ccaca7dd3e36e7a7/pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d", size = 7033406, upload_time = "2026-01-02T09:11:27.474Z" }, + { url = "https://files.pythonhosted.org/packages/5d/cf/881b457eccacac9e5b2ddd97d5071fb6d668307c57cbf4e3b5278e06e536/pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c", size = 2452612, upload_time = "2026-01-02T09:11:29.309Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c7/2530a4aa28248623e9d7f27316b42e27c32ec410f695929696f2e0e4a778/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1", size = 4062543, upload_time = "2026-01-02T09:11:31.566Z" }, + { url = "https://files.pythonhosted.org/packages/8f/1f/40b8eae823dc1519b87d53c30ed9ef085506b05281d313031755c1705f73/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179", size = 4138373, upload_time = "2026-01-02T09:11:33.367Z" }, + { url = "https://files.pythonhosted.org/packages/d4/77/6fa60634cf06e52139fd0e89e5bbf055e8166c691c42fb162818b7fda31d/pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0", size = 3601241, upload_time = "2026-01-02T09:11:35.011Z" }, + { url = "https://files.pythonhosted.org/packages/4f/bf/28ab865de622e14b747f0cd7877510848252d950e43002e224fb1c9ababf/pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587", size = 5262410, upload_time = "2026-01-02T09:11:36.682Z" }, + { url = "https://files.pythonhosted.org/packages/1c/34/583420a1b55e715937a85bd48c5c0991598247a1fd2eb5423188e765ea02/pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac", size = 4657312, upload_time = "2026-01-02T09:11:38.535Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fd/f5a0896839762885b3376ff04878f86ab2b097c2f9a9cdccf4eda8ba8dc0/pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b", size = 6232605, upload_time = "2026-01-02T09:11:40.602Z" }, + { url = "https://files.pythonhosted.org/packages/98/aa/938a09d127ac1e70e6ed467bd03834350b33ef646b31edb7452d5de43792/pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea", size = 8041617, upload_time = "2026-01-02T09:11:42.721Z" }, + { url = "https://files.pythonhosted.org/packages/17/e8/538b24cb426ac0186e03f80f78bc8dc7246c667f58b540bdd57c71c9f79d/pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c", size = 6346509, upload_time = "2026-01-02T09:11:44.955Z" }, + { url = "https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc", size = 7038117, upload_time = "2026-01-02T09:11:46.736Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a2/d40308cf86eada842ca1f3ffa45d0ca0df7e4ab33c83f81e73f5eaed136d/pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644", size = 6460151, upload_time = "2026-01-02T09:11:48.625Z" }, + { url = "https://files.pythonhosted.org/packages/f1/88/f5b058ad6453a085c5266660a1417bdad590199da1b32fb4efcff9d33b05/pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c", size = 7164534, upload_time = "2026-01-02T09:11:50.445Z" }, + { url = "https://files.pythonhosted.org/packages/19/ce/c17334caea1db789163b5d855a5735e47995b0b5dc8745e9a3605d5f24c0/pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171", size = 6332551, upload_time = "2026-01-02T09:11:52.234Z" }, + { url = "https://files.pythonhosted.org/packages/e5/07/74a9d941fa45c90a0d9465098fe1ec85de3e2afbdc15cc4766622d516056/pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a", size = 7040087, upload_time = "2026-01-02T09:11:54.822Z" }, + { url = "https://files.pythonhosted.org/packages/88/09/c99950c075a0e9053d8e880595926302575bc742b1b47fe1bbcc8d388d50/pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45", size = 2452470, upload_time = "2026-01-02T09:11:56.522Z" }, + { url = "https://files.pythonhosted.org/packages/b5/ba/970b7d85ba01f348dee4d65412476321d40ee04dcb51cd3735b9dc94eb58/pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d", size = 5264816, upload_time = "2026-01-02T09:11:58.227Z" }, + { url = "https://files.pythonhosted.org/packages/10/60/650f2fb55fdba7a510d836202aa52f0baac633e50ab1cf18415d332188fb/pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0", size = 4660472, upload_time = "2026-01-02T09:12:00.798Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c0/5273a99478956a099d533c4f46cbaa19fd69d606624f4334b85e50987a08/pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554", size = 6268974, upload_time = "2026-01-02T09:12:02.572Z" }, + { url = "https://files.pythonhosted.org/packages/b4/26/0bf714bc2e73d5267887d47931d53c4ceeceea6978148ed2ab2a4e6463c4/pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e", size = 8073070, upload_time = "2026-01-02T09:12:04.75Z" }, + { url = "https://files.pythonhosted.org/packages/43/cf/1ea826200de111a9d65724c54f927f3111dc5ae297f294b370a670c17786/pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82", size = 6380176, upload_time = "2026-01-02T09:12:06.626Z" }, + { url = "https://files.pythonhosted.org/packages/03/e0/7938dd2b2013373fd85d96e0f38d62b7a5a262af21ac274250c7ca7847c9/pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4", size = 7067061, upload_time = "2026-01-02T09:12:08.624Z" }, + { url = "https://files.pythonhosted.org/packages/86/ad/a2aa97d37272a929a98437a8c0ac37b3cf012f4f8721e1bd5154699b2518/pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0", size = 6491824, upload_time = "2026-01-02T09:12:10.488Z" }, + { url = "https://files.pythonhosted.org/packages/a4/44/80e46611b288d51b115826f136fb3465653c28f491068a72d3da49b54cd4/pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b", size = 7190911, upload_time = "2026-01-02T09:12:12.772Z" }, + { url = "https://files.pythonhosted.org/packages/86/77/eacc62356b4cf81abe99ff9dbc7402750044aed02cfd6a503f7c6fc11f3e/pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65", size = 6336445, upload_time = "2026-01-02T09:12:14.775Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3c/57d81d0b74d218706dafccb87a87ea44262c43eef98eb3b164fd000e0491/pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0", size = 7045354, upload_time = "2026-01-02T09:12:16.599Z" }, + { url = "https://files.pythonhosted.org/packages/ac/82/8b9b97bba2e3576a340f93b044a3a3a09841170ab4c1eb0d5c93469fd32f/pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8", size = 2454547, upload_time = "2026-01-02T09:12:18.704Z" }, + { url = "https://files.pythonhosted.org/packages/8c/87/bdf971d8bbcf80a348cc3bacfcb239f5882100fe80534b0ce67a784181d8/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91", size = 4062533, upload_time = "2026-01-02T09:12:20.791Z" }, + { url = "https://files.pythonhosted.org/packages/ff/4f/5eb37a681c68d605eb7034c004875c81f86ec9ef51f5be4a63eadd58859a/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796", size = 4138546, upload_time = "2026-01-02T09:12:23.664Z" }, + { url = "https://files.pythonhosted.org/packages/11/6d/19a95acb2edbace40dcd582d077b991646b7083c41b98da4ed7555b59733/pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd", size = 3601163, upload_time = "2026-01-02T09:12:26.338Z" }, + { url = "https://files.pythonhosted.org/packages/fc/36/2b8138e51cb42e4cc39c3297713455548be855a50558c3ac2beebdc251dd/pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13", size = 5266086, upload_time = "2026-01-02T09:12:28.782Z" }, + { url = "https://files.pythonhosted.org/packages/53/4b/649056e4d22e1caa90816bf99cef0884aed607ed38075bd75f091a607a38/pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e", size = 4657344, upload_time = "2026-01-02T09:12:31.117Z" }, + { url = "https://files.pythonhosted.org/packages/6c/6b/c5742cea0f1ade0cd61485dc3d81f05261fc2276f537fbdc00802de56779/pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643", size = 6232114, upload_time = "2026-01-02T09:12:32.936Z" }, + { url = "https://files.pythonhosted.org/packages/bf/8f/9f521268ce22d63991601aafd3d48d5ff7280a246a1ef62d626d67b44064/pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5", size = 8042708, upload_time = "2026-01-02T09:12:34.78Z" }, + { url = "https://files.pythonhosted.org/packages/1a/eb/257f38542893f021502a1bbe0c2e883c90b5cff26cc33b1584a841a06d30/pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de", size = 6347762, upload_time = "2026-01-02T09:12:36.748Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5a/8ba375025701c09b309e8d5163c5a4ce0102fa86bbf8800eb0d7ac87bc51/pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9", size = 7039265, upload_time = "2026-01-02T09:12:39.082Z" }, + { url = "https://files.pythonhosted.org/packages/cf/dc/cf5e4cdb3db533f539e88a7bbf9f190c64ab8a08a9bc7a4ccf55067872e4/pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a", size = 6462341, upload_time = "2026-01-02T09:12:40.946Z" }, + { url = "https://files.pythonhosted.org/packages/d0/47/0291a25ac9550677e22eda48510cfc4fa4b2ef0396448b7fbdc0a6946309/pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a", size = 7165395, upload_time = "2026-01-02T09:12:42.706Z" }, + { url = "https://files.pythonhosted.org/packages/4f/4c/e005a59393ec4d9416be06e6b45820403bb946a778e39ecec62f5b2b991e/pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030", size = 6431413, upload_time = "2026-01-02T09:12:44.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/af/f23697f587ac5f9095d67e31b81c95c0249cd461a9798a061ed6709b09b5/pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94", size = 7176779, upload_time = "2026-01-02T09:12:46.727Z" }, + { url = "https://files.pythonhosted.org/packages/b3/36/6a51abf8599232f3e9afbd16d52829376a68909fe14efe29084445db4b73/pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4", size = 2543105, upload_time = "2026-01-02T09:12:49.243Z" }, + { url = "https://files.pythonhosted.org/packages/82/54/2e1dd20c8749ff225080d6ba465a0cab4387f5db0d1c5fb1439e2d99923f/pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2", size = 5268571, upload_time = "2026-01-02T09:12:51.11Z" }, + { url = "https://files.pythonhosted.org/packages/57/61/571163a5ef86ec0cf30d265ac2a70ae6fc9e28413d1dc94fa37fae6bda89/pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61", size = 4660426, upload_time = "2026-01-02T09:12:52.865Z" }, + { url = "https://files.pythonhosted.org/packages/5e/e1/53ee5163f794aef1bf84243f755ee6897a92c708505350dd1923f4afec48/pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51", size = 6269908, upload_time = "2026-01-02T09:12:54.884Z" }, + { url = "https://files.pythonhosted.org/packages/bc/0b/b4b4106ff0ee1afa1dc599fde6ab230417f800279745124f6c50bcffed8e/pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc", size = 8074733, upload_time = "2026-01-02T09:12:56.802Z" }, + { url = "https://files.pythonhosted.org/packages/19/9f/80b411cbac4a732439e629a26ad3ef11907a8c7fc5377b7602f04f6fe4e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14", size = 6381431, upload_time = "2026-01-02T09:12:58.823Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b7/d65c45db463b66ecb6abc17c6ba6917a911202a07662247e1355ce1789e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8", size = 7068529, upload_time = "2026-01-02T09:13:00.885Z" }, + { url = "https://files.pythonhosted.org/packages/50/96/dfd4cd726b4a45ae6e3c669fc9e49deb2241312605d33aba50499e9d9bd1/pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924", size = 6492981, upload_time = "2026-01-02T09:13:03.314Z" }, + { url = "https://files.pythonhosted.org/packages/4d/1c/b5dc52cf713ae46033359c5ca920444f18a6359ce1020dd3e9c553ea5bc6/pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef", size = 7191878, upload_time = "2026-01-02T09:13:05.276Z" }, + { url = "https://files.pythonhosted.org/packages/53/26/c4188248bd5edaf543864fe4834aebe9c9cb4968b6f573ce014cc42d0720/pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988", size = 6438703, upload_time = "2026-01-02T09:13:07.491Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0e/69ed296de8ea05cb03ee139cee600f424ca166e632567b2d66727f08c7ed/pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6", size = 7182927, upload_time = "2026-01-02T09:13:09.841Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload_time = "2026-01-02T09:13:12.068Z" }, ] [[package]] name = "platformdirs" version = "4.5.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715 } +sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload_time = "2025-12-05T13:52:58.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload_time = "2025-12-05T13:52:56.823Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload_time = "2025-05-15T12:30:07.975Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731 }, + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload_time = "2025-05-15T12:30:06.134Z" }, ] [[package]] name = "ply" version = "3.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130 } +sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130, upload_time = "2018-02-15T19:01:31.097Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567 }, + { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload_time = "2018-02-15T19:01:27.172Z" }, +] + +[[package]] +name = "pre-commit" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload_time = "2025-12-16T21:14:33.552Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload_time = "2025-12-16T21:14:32.409Z" }, ] [[package]] @@ -2036,213 +2103,213 @@ dependencies = [ { name = "cymem" }, { name = "murmurhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/34/eb4f5f0f678e152a96e826da867d2f41c4b18a2d589e40e1dd3347219e91/preshed-3.0.12.tar.gz", hash = "sha256:b73f9a8b54ee1d44529cc6018356896cff93d48f755f29c134734d9371c0d685", size = 15027 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/f7/ff3aca937eeaee19c52c45ddf92979546e52ed0686e58be4bc09c47e7d88/preshed-3.0.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2779861f5d69480493519ed123a622a13012d1182126779036b99d9d989bf7e9", size = 129958 }, - { url = "https://files.pythonhosted.org/packages/80/24/fd654a9c0f5f3ed1a9b1d8a392f063ae9ca29ad0b462f0732ae0147f7cee/preshed-3.0.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffe1fd7d92f51ed34383e20d8b734780c814ca869cfdb7e07f2d31651f90cdf4", size = 124550 }, - { url = "https://files.pythonhosted.org/packages/71/49/8271c7f680696f4b0880f44357d2a903d649cb9f6e60a1efc97a203104df/preshed-3.0.12-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:91893404858502cc4e856d338fef3d2a4a552135f79a1041c24eb919817c19db", size = 874987 }, - { url = "https://files.pythonhosted.org/packages/a3/a5/ca200187ca1632f1e2c458b72f1bd100fa8b55deecd5d72e1e4ebf09e98c/preshed-3.0.12-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9e06e8f2ba52f183eb9817a616cdebe84a211bb859a2ffbc23f3295d0b189638", size = 866499 }, - { url = "https://files.pythonhosted.org/packages/87/a1/943b61f850c44899910c21996cb542d0ef5931744c6d492fdfdd8457e693/preshed-3.0.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbe8b8a2d4f9af14e8a39ecca524b9de6defc91d8abcc95eb28f42da1c23272c", size = 878064 }, - { url = "https://files.pythonhosted.org/packages/3e/75/d7fff7f1fa3763619aa85d6ba70493a5d9c6e6ea7958a6e8c9d3e6e88bbe/preshed-3.0.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5d0aaac9c5862f5471fddd0c931dc64d3af2efc5fe3eb48b50765adb571243b9", size = 900540 }, - { url = "https://files.pythonhosted.org/packages/e4/12/a2285b78bd097a1e53fb90a1743bc8ce0d35e5b65b6853f3b3c47da398ca/preshed-3.0.12-cp312-cp312-win_amd64.whl", hash = "sha256:0eb8d411afcb1e3b12a0602fb6a0e33140342a732a795251a0ce452aba401dc0", size = 118298 }, - { url = "https://files.pythonhosted.org/packages/0b/34/4e8443fe99206a2fcfc63659969a8f8c8ab184836533594a519f3899b1ad/preshed-3.0.12-cp312-cp312-win_arm64.whl", hash = "sha256:dcd3d12903c9f720a39a5c5f1339f7f46e3ab71279fb7a39776768fb840b6077", size = 104746 }, - { url = "https://files.pythonhosted.org/packages/1e/36/1d3df6f9f37efc34be4ee3013b3bb698b06f1e372f80959851b54d8efdb2/preshed-3.0.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3deb3ab93d50c785eaa7694a8e169eb12d00263a99c91d56511fe943bcbacfb6", size = 128023 }, - { url = "https://files.pythonhosted.org/packages/fb/d4/3ca81f42978da1b81aa57b3e9b5193d8093e187787a3b2511d16b30b7c62/preshed-3.0.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604350001238dab63dc14774ee30c257b5d71c7be976dbecd1f1ed37529f60f", size = 122851 }, - { url = "https://files.pythonhosted.org/packages/17/73/f388398f8d789f69b510272d144a9186d658423f6d3ecc484c0fe392acec/preshed-3.0.12-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04fb860a8aab18d2201f06159337eda5568dc5eed218570d960fad79e783c7d0", size = 835926 }, - { url = "https://files.pythonhosted.org/packages/35/c6/b7170933451cbc27eaefd57b36f61a5e7e7c8da50ae24f819172e0ca8a4d/preshed-3.0.12-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d0c8fcd44996031c46a0aa6773c7b7aa5ee58c3ee87bc05236dacd5599d35063", size = 827294 }, - { url = "https://files.pythonhosted.org/packages/7d/ec/6504730d811c0a375721db2107d31684ec17ee5b7bb3796ecfa41e704d41/preshed-3.0.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b07efc3abd3714ce01cf67db0a2dada6e829ab7def74039d446e49ddb32538c5", size = 838809 }, - { url = "https://files.pythonhosted.org/packages/7e/1a/09d13240c1fbadcc0603e2fe029623045a36c88b4b50b02e7fdc89e3b88e/preshed-3.0.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f184ef184b76e0e4707bce2395008779e4dfa638456b13b18469c2c1a42903a6", size = 861448 }, - { url = "https://files.pythonhosted.org/packages/0d/35/9523160153037ee8337672249449be416ee92236f32602e7dd643767814f/preshed-3.0.12-cp313-cp313-win_amd64.whl", hash = "sha256:ebb3da2dc62ab09e5dc5a00ec38e7f5cdf8741c175714ab4a80773d8ee31b495", size = 117413 }, - { url = "https://files.pythonhosted.org/packages/79/eb/4263e6e896753b8e2ffa93035458165850a5ea81d27e8888afdbfd8fa9c4/preshed-3.0.12-cp313-cp313-win_arm64.whl", hash = "sha256:b36a2cf57a5ca6e78e69b569c92ef3bdbfb00e3a14859e201eec6ab3bdc27085", size = 104041 }, - { url = "https://files.pythonhosted.org/packages/77/39/7b33910b7ba3db9ce1515c39eb4657232913fb171fe701f792ef50726e60/preshed-3.0.12-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0d8b458dfbd6cc5007d045fa5638231328e3d6f214fd24ab999cc10f8b9097e5", size = 129211 }, - { url = "https://files.pythonhosted.org/packages/32/67/97dceebe0b2b4dd94333e4ec283d38614f92996de615859a952da082890d/preshed-3.0.12-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8e9196e2ea704243a69df203e0c9185eb7c5c58c3632ba1c1e2e2e0aa3aae3b4", size = 123311 }, - { url = "https://files.pythonhosted.org/packages/4b/6f/f3772f6eaad1eae787f82ffb65a81a4a1993277eacf5a78a29da34608323/preshed-3.0.12-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ffa644e1730012ed435fb9d0c3031ea19a06b11136eff5e9b96b2aa25ec7a5f5", size = 831683 }, - { url = "https://files.pythonhosted.org/packages/1a/93/997d39ca61202486dd06c669b4707a5b8e5d0c2c922db9f7744fd6a12096/preshed-3.0.12-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:39e83a16ce53e4a3c41c091fe4fe1c3d28604e63928040da09ba0c5d5a7ca41e", size = 830035 }, - { url = "https://files.pythonhosted.org/packages/0a/f2/51bf44e3fdbef08d40a832181842cd9b21b11c3f930989f4ff17e9201e12/preshed-3.0.12-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2ec9bc0baee426303a644c7bf531333d4e7fd06fedf07f62ee09969c208d578d", size = 841728 }, - { url = "https://files.pythonhosted.org/packages/d3/b1/2d0e3d23d9f885f7647654d770227eb13e4d892deb9b0ed50b993d63fb18/preshed-3.0.12-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7db058f1b4a3d4d51c4c05b379c6cc9c36fcad00160923cb20ca1c7030581ea4", size = 858860 }, - { url = "https://files.pythonhosted.org/packages/e7/57/7c28c7f6f9bfce02796b54f1f6acd2cebb3fa3f14a2dce6fb3c686e3c3a8/preshed-3.0.12-cp314-cp314-win_amd64.whl", hash = "sha256:c87a54a55a2ba98d0c3fd7886295f2825397aff5a7157dcfb89124f6aa2dca41", size = 120325 }, - { url = "https://files.pythonhosted.org/packages/33/c3/df235ca679a08e09103983ec17c668f96abe897eadbe18d635972b43d8a9/preshed-3.0.12-cp314-cp314-win_arm64.whl", hash = "sha256:d9c5f10b4b971d71d163c2416b91b7136eae54ef3183b1742bb5993269af1b18", size = 107393 }, - { url = "https://files.pythonhosted.org/packages/7e/f1/51a2a72381c8aa3aeb8305d88e720c745048527107e649c01b8d49d6b5bf/preshed-3.0.12-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2739a9c57efcfa16466fa6e0257d67f0075a9979dc729585fbadaed7383ab449", size = 137703 }, - { url = "https://files.pythonhosted.org/packages/3f/ab/f3c3d50647f3af6ce6441c596a4f6fb0216d549432ef51f61c0c1744c9b9/preshed-3.0.12-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:364249656bfbf98b4008fac707f35835580ec56207f7cbecdafef6ebb6a595a6", size = 134889 }, - { url = "https://files.pythonhosted.org/packages/54/9a/012dbae28a0b88cd98eae99f87701ffbe3a7d2ea3de345cb8a6a6e1b16cd/preshed-3.0.12-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7f933d509ee762a90f62573aaf189eba94dfee478fca13ea2183b2f8a1bb8f7e", size = 911078 }, - { url = "https://files.pythonhosted.org/packages/88/c1/0cd0f8cdb91f63c298320cf946c4b97adfb8e8d3a5d454267410c90fcfaa/preshed-3.0.12-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f73f4e29bf90e58034e6f5fa55e6029f3f2d7c042a7151ed487b49898b0ce887", size = 930506 }, - { url = "https://files.pythonhosted.org/packages/20/1a/cab79b3181b2150eeeb0e2541c2bd4e0830e1e068b8836b24ea23610cec3/preshed-3.0.12-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a61ede0c3d18f1ae128113f785a396351a46f4634beccfdf617b0a86008b154d", size = 900009 }, - { url = "https://files.pythonhosted.org/packages/31/9a/5ea9d6d95d5c07ba70166330a43bff7f0a074d0134eb7984eca6551e8c70/preshed-3.0.12-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eafc08a86f77be78e722d96aa8a3a0aef0e3c7ac2f2ada22186a138e63d4033c", size = 910826 }, - { url = "https://files.pythonhosted.org/packages/92/71/39024f9873ff317eac724b2759e94d013703800d970d51de77ccc6afff7e/preshed-3.0.12-cp314-cp314t-win_amd64.whl", hash = "sha256:fadaad54973b8697d5ef008735e150bd729a127b6497fd2cb068842074a6f3a7", size = 141358 }, - { url = "https://files.pythonhosted.org/packages/9d/0d/431bb85252119f5d2260417fa7d164619b31eed8f1725b364dc0ade43a8e/preshed-3.0.12-cp314-cp314t-win_arm64.whl", hash = "sha256:c0c0d3b66b4c1e40aa6042721492f7b07fc9679ab6c361bc121aa54a1c3ef63f", size = 114839 }, +sdist = { url = "https://files.pythonhosted.org/packages/bf/34/eb4f5f0f678e152a96e826da867d2f41c4b18a2d589e40e1dd3347219e91/preshed-3.0.12.tar.gz", hash = "sha256:b73f9a8b54ee1d44529cc6018356896cff93d48f755f29c134734d9371c0d685", size = 15027, upload_time = "2025-11-17T13:00:33.621Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/f7/ff3aca937eeaee19c52c45ddf92979546e52ed0686e58be4bc09c47e7d88/preshed-3.0.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2779861f5d69480493519ed123a622a13012d1182126779036b99d9d989bf7e9", size = 129958, upload_time = "2025-11-17T12:59:33.391Z" }, + { url = "https://files.pythonhosted.org/packages/80/24/fd654a9c0f5f3ed1a9b1d8a392f063ae9ca29ad0b462f0732ae0147f7cee/preshed-3.0.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffe1fd7d92f51ed34383e20d8b734780c814ca869cfdb7e07f2d31651f90cdf4", size = 124550, upload_time = "2025-11-17T12:59:34.688Z" }, + { url = "https://files.pythonhosted.org/packages/71/49/8271c7f680696f4b0880f44357d2a903d649cb9f6e60a1efc97a203104df/preshed-3.0.12-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:91893404858502cc4e856d338fef3d2a4a552135f79a1041c24eb919817c19db", size = 874987, upload_time = "2025-11-17T12:59:36.062Z" }, + { url = "https://files.pythonhosted.org/packages/a3/a5/ca200187ca1632f1e2c458b72f1bd100fa8b55deecd5d72e1e4ebf09e98c/preshed-3.0.12-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9e06e8f2ba52f183eb9817a616cdebe84a211bb859a2ffbc23f3295d0b189638", size = 866499, upload_time = "2025-11-17T12:59:37.586Z" }, + { url = "https://files.pythonhosted.org/packages/87/a1/943b61f850c44899910c21996cb542d0ef5931744c6d492fdfdd8457e693/preshed-3.0.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbe8b8a2d4f9af14e8a39ecca524b9de6defc91d8abcc95eb28f42da1c23272c", size = 878064, upload_time = "2025-11-17T12:59:39.651Z" }, + { url = "https://files.pythonhosted.org/packages/3e/75/d7fff7f1fa3763619aa85d6ba70493a5d9c6e6ea7958a6e8c9d3e6e88bbe/preshed-3.0.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5d0aaac9c5862f5471fddd0c931dc64d3af2efc5fe3eb48b50765adb571243b9", size = 900540, upload_time = "2025-11-17T12:59:41.384Z" }, + { url = "https://files.pythonhosted.org/packages/e4/12/a2285b78bd097a1e53fb90a1743bc8ce0d35e5b65b6853f3b3c47da398ca/preshed-3.0.12-cp312-cp312-win_amd64.whl", hash = "sha256:0eb8d411afcb1e3b12a0602fb6a0e33140342a732a795251a0ce452aba401dc0", size = 118298, upload_time = "2025-11-17T12:59:42.65Z" }, + { url = "https://files.pythonhosted.org/packages/0b/34/4e8443fe99206a2fcfc63659969a8f8c8ab184836533594a519f3899b1ad/preshed-3.0.12-cp312-cp312-win_arm64.whl", hash = "sha256:dcd3d12903c9f720a39a5c5f1339f7f46e3ab71279fb7a39776768fb840b6077", size = 104746, upload_time = "2025-11-17T12:59:43.934Z" }, + { url = "https://files.pythonhosted.org/packages/1e/36/1d3df6f9f37efc34be4ee3013b3bb698b06f1e372f80959851b54d8efdb2/preshed-3.0.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3deb3ab93d50c785eaa7694a8e169eb12d00263a99c91d56511fe943bcbacfb6", size = 128023, upload_time = "2025-11-17T12:59:45.157Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d4/3ca81f42978da1b81aa57b3e9b5193d8093e187787a3b2511d16b30b7c62/preshed-3.0.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604350001238dab63dc14774ee30c257b5d71c7be976dbecd1f1ed37529f60f", size = 122851, upload_time = "2025-11-17T12:59:46.439Z" }, + { url = "https://files.pythonhosted.org/packages/17/73/f388398f8d789f69b510272d144a9186d658423f6d3ecc484c0fe392acec/preshed-3.0.12-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04fb860a8aab18d2201f06159337eda5568dc5eed218570d960fad79e783c7d0", size = 835926, upload_time = "2025-11-17T12:59:47.882Z" }, + { url = "https://files.pythonhosted.org/packages/35/c6/b7170933451cbc27eaefd57b36f61a5e7e7c8da50ae24f819172e0ca8a4d/preshed-3.0.12-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d0c8fcd44996031c46a0aa6773c7b7aa5ee58c3ee87bc05236dacd5599d35063", size = 827294, upload_time = "2025-11-17T12:59:49.365Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ec/6504730d811c0a375721db2107d31684ec17ee5b7bb3796ecfa41e704d41/preshed-3.0.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b07efc3abd3714ce01cf67db0a2dada6e829ab7def74039d446e49ddb32538c5", size = 838809, upload_time = "2025-11-17T12:59:51.234Z" }, + { url = "https://files.pythonhosted.org/packages/7e/1a/09d13240c1fbadcc0603e2fe029623045a36c88b4b50b02e7fdc89e3b88e/preshed-3.0.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f184ef184b76e0e4707bce2395008779e4dfa638456b13b18469c2c1a42903a6", size = 861448, upload_time = "2025-11-17T12:59:52.702Z" }, + { url = "https://files.pythonhosted.org/packages/0d/35/9523160153037ee8337672249449be416ee92236f32602e7dd643767814f/preshed-3.0.12-cp313-cp313-win_amd64.whl", hash = "sha256:ebb3da2dc62ab09e5dc5a00ec38e7f5cdf8741c175714ab4a80773d8ee31b495", size = 117413, upload_time = "2025-11-17T12:59:54.4Z" }, + { url = "https://files.pythonhosted.org/packages/79/eb/4263e6e896753b8e2ffa93035458165850a5ea81d27e8888afdbfd8fa9c4/preshed-3.0.12-cp313-cp313-win_arm64.whl", hash = "sha256:b36a2cf57a5ca6e78e69b569c92ef3bdbfb00e3a14859e201eec6ab3bdc27085", size = 104041, upload_time = "2025-11-17T12:59:55.596Z" }, + { url = "https://files.pythonhosted.org/packages/77/39/7b33910b7ba3db9ce1515c39eb4657232913fb171fe701f792ef50726e60/preshed-3.0.12-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0d8b458dfbd6cc5007d045fa5638231328e3d6f214fd24ab999cc10f8b9097e5", size = 129211, upload_time = "2025-11-17T12:59:57.182Z" }, + { url = "https://files.pythonhosted.org/packages/32/67/97dceebe0b2b4dd94333e4ec283d38614f92996de615859a952da082890d/preshed-3.0.12-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8e9196e2ea704243a69df203e0c9185eb7c5c58c3632ba1c1e2e2e0aa3aae3b4", size = 123311, upload_time = "2025-11-17T12:59:58.449Z" }, + { url = "https://files.pythonhosted.org/packages/4b/6f/f3772f6eaad1eae787f82ffb65a81a4a1993277eacf5a78a29da34608323/preshed-3.0.12-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ffa644e1730012ed435fb9d0c3031ea19a06b11136eff5e9b96b2aa25ec7a5f5", size = 831683, upload_time = "2025-11-17T13:00:00.229Z" }, + { url = "https://files.pythonhosted.org/packages/1a/93/997d39ca61202486dd06c669b4707a5b8e5d0c2c922db9f7744fd6a12096/preshed-3.0.12-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:39e83a16ce53e4a3c41c091fe4fe1c3d28604e63928040da09ba0c5d5a7ca41e", size = 830035, upload_time = "2025-11-17T13:00:02.191Z" }, + { url = "https://files.pythonhosted.org/packages/0a/f2/51bf44e3fdbef08d40a832181842cd9b21b11c3f930989f4ff17e9201e12/preshed-3.0.12-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2ec9bc0baee426303a644c7bf531333d4e7fd06fedf07f62ee09969c208d578d", size = 841728, upload_time = "2025-11-17T13:00:03.643Z" }, + { url = "https://files.pythonhosted.org/packages/d3/b1/2d0e3d23d9f885f7647654d770227eb13e4d892deb9b0ed50b993d63fb18/preshed-3.0.12-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7db058f1b4a3d4d51c4c05b379c6cc9c36fcad00160923cb20ca1c7030581ea4", size = 858860, upload_time = "2025-11-17T13:00:05.185Z" }, + { url = "https://files.pythonhosted.org/packages/e7/57/7c28c7f6f9bfce02796b54f1f6acd2cebb3fa3f14a2dce6fb3c686e3c3a8/preshed-3.0.12-cp314-cp314-win_amd64.whl", hash = "sha256:c87a54a55a2ba98d0c3fd7886295f2825397aff5a7157dcfb89124f6aa2dca41", size = 120325, upload_time = "2025-11-17T13:00:06.428Z" }, + { url = "https://files.pythonhosted.org/packages/33/c3/df235ca679a08e09103983ec17c668f96abe897eadbe18d635972b43d8a9/preshed-3.0.12-cp314-cp314-win_arm64.whl", hash = "sha256:d9c5f10b4b971d71d163c2416b91b7136eae54ef3183b1742bb5993269af1b18", size = 107393, upload_time = "2025-11-17T13:00:07.718Z" }, + { url = "https://files.pythonhosted.org/packages/7e/f1/51a2a72381c8aa3aeb8305d88e720c745048527107e649c01b8d49d6b5bf/preshed-3.0.12-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2739a9c57efcfa16466fa6e0257d67f0075a9979dc729585fbadaed7383ab449", size = 137703, upload_time = "2025-11-17T13:00:09.001Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ab/f3c3d50647f3af6ce6441c596a4f6fb0216d549432ef51f61c0c1744c9b9/preshed-3.0.12-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:364249656bfbf98b4008fac707f35835580ec56207f7cbecdafef6ebb6a595a6", size = 134889, upload_time = "2025-11-17T13:00:10.29Z" }, + { url = "https://files.pythonhosted.org/packages/54/9a/012dbae28a0b88cd98eae99f87701ffbe3a7d2ea3de345cb8a6a6e1b16cd/preshed-3.0.12-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7f933d509ee762a90f62573aaf189eba94dfee478fca13ea2183b2f8a1bb8f7e", size = 911078, upload_time = "2025-11-17T13:00:11.911Z" }, + { url = "https://files.pythonhosted.org/packages/88/c1/0cd0f8cdb91f63c298320cf946c4b97adfb8e8d3a5d454267410c90fcfaa/preshed-3.0.12-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f73f4e29bf90e58034e6f5fa55e6029f3f2d7c042a7151ed487b49898b0ce887", size = 930506, upload_time = "2025-11-17T13:00:13.375Z" }, + { url = "https://files.pythonhosted.org/packages/20/1a/cab79b3181b2150eeeb0e2541c2bd4e0830e1e068b8836b24ea23610cec3/preshed-3.0.12-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a61ede0c3d18f1ae128113f785a396351a46f4634beccfdf617b0a86008b154d", size = 900009, upload_time = "2025-11-17T13:00:14.781Z" }, + { url = "https://files.pythonhosted.org/packages/31/9a/5ea9d6d95d5c07ba70166330a43bff7f0a074d0134eb7984eca6551e8c70/preshed-3.0.12-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eafc08a86f77be78e722d96aa8a3a0aef0e3c7ac2f2ada22186a138e63d4033c", size = 910826, upload_time = "2025-11-17T13:00:16.861Z" }, + { url = "https://files.pythonhosted.org/packages/92/71/39024f9873ff317eac724b2759e94d013703800d970d51de77ccc6afff7e/preshed-3.0.12-cp314-cp314t-win_amd64.whl", hash = "sha256:fadaad54973b8697d5ef008735e150bd729a127b6497fd2cb068842074a6f3a7", size = 141358, upload_time = "2025-11-17T13:00:18.167Z" }, + { url = "https://files.pythonhosted.org/packages/9d/0d/431bb85252119f5d2260417fa7d164619b31eed8f1725b364dc0ade43a8e/preshed-3.0.12-cp314-cp314t-win_arm64.whl", hash = "sha256:c0c0d3b66b4c1e40aa6042721492f7b07fc9679ab6c361bc121aa54a1c3ef63f", size = 114839, upload_time = "2025-11-17T13:00:19.513Z" }, ] [[package]] name = "propcache" version = "0.4.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061 }, - { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037 }, - { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324 }, - { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505 }, - { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242 }, - { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474 }, - { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575 }, - { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736 }, - { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019 }, - { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376 }, - { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988 }, - { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615 }, - { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066 }, - { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655 }, - { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789 }, - { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750 }, - { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780 }, - { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308 }, - { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182 }, - { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215 }, - { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112 }, - { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442 }, - { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398 }, - { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920 }, - { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748 }, - { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877 }, - { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437 }, - { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586 }, - { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790 }, - { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158 }, - { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451 }, - { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374 }, - { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396 }, - { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950 }, - { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856 }, - { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420 }, - { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254 }, - { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205 }, - { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873 }, - { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739 }, - { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514 }, - { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781 }, - { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396 }, - { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897 }, - { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789 }, - { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152 }, - { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869 }, - { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596 }, - { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981 }, - { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490 }, - { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371 }, - { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424 }, - { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566 }, - { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130 }, - { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625 }, - { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209 }, - { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797 }, - { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140 }, - { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257 }, - { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097 }, - { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455 }, - { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372 }, - { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411 }, - { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712 }, - { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557 }, - { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015 }, - { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880 }, - { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938 }, - { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641 }, - { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510 }, - { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161 }, - { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393 }, - { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546 }, - { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259 }, - { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428 }, - { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305 }, +sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload_time = "2025-10-08T19:49:02.291Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload_time = "2025-10-08T19:46:46.075Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload_time = "2025-10-08T19:46:47.23Z" }, + { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload_time = "2025-10-08T19:46:48.384Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload_time = "2025-10-08T19:46:50.055Z" }, + { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload_time = "2025-10-08T19:46:51.815Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload_time = "2025-10-08T19:46:53.208Z" }, + { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload_time = "2025-10-08T19:46:54.511Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload_time = "2025-10-08T19:46:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload_time = "2025-10-08T19:46:57.595Z" }, + { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload_time = "2025-10-08T19:46:59.067Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload_time = "2025-10-08T19:47:00.544Z" }, + { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload_time = "2025-10-08T19:47:01.968Z" }, + { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload_time = "2025-10-08T19:47:03.503Z" }, + { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload_time = "2025-10-08T19:47:04.973Z" }, + { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload_time = "2025-10-08T19:47:06.077Z" }, + { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload_time = "2025-10-08T19:47:07.648Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload_time = "2025-10-08T19:47:08.851Z" }, + { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload_time = "2025-10-08T19:47:09.982Z" }, + { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload_time = "2025-10-08T19:47:11.319Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload_time = "2025-10-08T19:47:13.146Z" }, + { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload_time = "2025-10-08T19:47:14.913Z" }, + { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload_time = "2025-10-08T19:47:16.277Z" }, + { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload_time = "2025-10-08T19:47:17.962Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload_time = "2025-10-08T19:47:19.355Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload_time = "2025-10-08T19:47:21.338Z" }, + { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload_time = "2025-10-08T19:47:23.059Z" }, + { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload_time = "2025-10-08T19:47:24.445Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload_time = "2025-10-08T19:47:25.736Z" }, + { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload_time = "2025-10-08T19:47:26.847Z" }, + { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload_time = "2025-10-08T19:47:27.961Z" }, + { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload_time = "2025-10-08T19:47:29.445Z" }, + { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload_time = "2025-10-08T19:47:30.579Z" }, + { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload_time = "2025-10-08T19:47:31.79Z" }, + { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload_time = "2025-10-08T19:47:33.481Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload_time = "2025-10-08T19:47:34.906Z" }, + { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload_time = "2025-10-08T19:47:36.338Z" }, + { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload_time = "2025-10-08T19:47:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload_time = "2025-10-08T19:47:39.659Z" }, + { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload_time = "2025-10-08T19:47:41.084Z" }, + { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload_time = "2025-10-08T19:47:42.51Z" }, + { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload_time = "2025-10-08T19:47:43.927Z" }, + { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload_time = "2025-10-08T19:47:45.448Z" }, + { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload_time = "2025-10-08T19:47:47.202Z" }, + { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload_time = "2025-10-08T19:47:48.336Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload_time = "2025-10-08T19:47:49.876Z" }, + { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload_time = "2025-10-08T19:47:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload_time = "2025-10-08T19:47:52.594Z" }, + { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload_time = "2025-10-08T19:47:54.073Z" }, + { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload_time = "2025-10-08T19:47:55.715Z" }, + { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload_time = "2025-10-08T19:47:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload_time = "2025-10-08T19:47:59.317Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload_time = "2025-10-08T19:48:00.67Z" }, + { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload_time = "2025-10-08T19:48:02.604Z" }, + { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload_time = "2025-10-08T19:48:04.499Z" }, + { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload_time = "2025-10-08T19:48:06.213Z" }, + { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload_time = "2025-10-08T19:48:08.432Z" }, + { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload_time = "2025-10-08T19:48:09.968Z" }, + { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140, upload_time = "2025-10-08T19:48:11.232Z" }, + { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257, upload_time = "2025-10-08T19:48:12.707Z" }, + { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097, upload_time = "2025-10-08T19:48:13.923Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload_time = "2025-10-08T19:48:15.16Z" }, + { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload_time = "2025-10-08T19:48:16.424Z" }, + { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload_time = "2025-10-08T19:48:17.577Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload_time = "2025-10-08T19:48:18.901Z" }, + { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload_time = "2025-10-08T19:48:20.762Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload_time = "2025-10-08T19:48:22.592Z" }, + { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload_time = "2025-10-08T19:48:23.947Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload_time = "2025-10-08T19:48:25.656Z" }, + { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload_time = "2025-10-08T19:48:27.207Z" }, + { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload_time = "2025-10-08T19:48:28.65Z" }, + { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload_time = "2025-10-08T19:48:30.133Z" }, + { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload_time = "2025-10-08T19:48:31.567Z" }, + { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546, upload_time = "2025-10-08T19:48:32.872Z" }, + { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259, upload_time = "2025-10-08T19:48:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428, upload_time = "2025-10-08T19:48:35.441Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload_time = "2025-10-08T19:49:00.792Z" }, ] [[package]] name = "psutil" version = "7.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624 }, - { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132 }, - { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612 }, - { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201 }, - { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081 }, - { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767 }, - { url = "https://files.pythonhosted.org/packages/05/c2/5fb764bd61e40e1fe756a44bd4c21827228394c17414ade348e28f83cd79/psutil-7.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:494c513ccc53225ae23eec7fe6e1482f1b8a44674241b54561f755a898650679", size = 129716 }, - { url = "https://files.pythonhosted.org/packages/c9/d2/935039c20e06f615d9ca6ca0ab756cf8408a19d298ffaa08666bc18dc805/psutil-7.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fce5f92c22b00cdefd1645aa58ab4877a01679e901555067b1bd77039aa589f", size = 130133 }, - { url = "https://files.pythonhosted.org/packages/77/69/19f1eb0e01d24c2b3eacbc2f78d3b5add8a89bf0bb69465bc8d563cc33de/psutil-7.2.1-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93f3f7b0bb07711b49626e7940d6fe52aa9940ad86e8f7e74842e73189712129", size = 181518 }, - { url = "https://files.pythonhosted.org/packages/e1/6d/7e18b1b4fa13ad370787626c95887b027656ad4829c156bb6569d02f3262/psutil-7.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d34d2ca888208eea2b5c68186841336a7f5e0b990edec929be909353a202768a", size = 184348 }, - { url = "https://files.pythonhosted.org/packages/98/60/1672114392dd879586d60dd97896325df47d9a130ac7401318005aab28ec/psutil-7.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2ceae842a78d1603753561132d5ad1b2f8a7979cb0c283f5b52fb4e6e14b1a79", size = 140400 }, - { url = "https://files.pythonhosted.org/packages/fb/7b/d0e9d4513c46e46897b46bcfc410d51fc65735837ea57a25170f298326e6/psutil-7.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:08a2f175e48a898c8eb8eace45ce01777f4785bc744c90aa2cc7f2fa5462a266", size = 135430 }, - { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137 }, - { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947 }, - { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694 }, - { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136 }, - { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108 }, - { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402 }, - { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938 }, - { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836 }, +sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload_time = "2025-12-29T08:26:00.169Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload_time = "2025-12-29T08:26:04.255Z" }, + { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload_time = "2025-12-29T08:26:06.228Z" }, + { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload_time = "2025-12-29T08:26:08.276Z" }, + { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload_time = "2025-12-29T08:26:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081, upload_time = "2025-12-29T08:26:12.483Z" }, + { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767, upload_time = "2025-12-29T08:26:14.528Z" }, + { url = "https://files.pythonhosted.org/packages/05/c2/5fb764bd61e40e1fe756a44bd4c21827228394c17414ade348e28f83cd79/psutil-7.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:494c513ccc53225ae23eec7fe6e1482f1b8a44674241b54561f755a898650679", size = 129716, upload_time = "2025-12-29T08:26:16.017Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d2/935039c20e06f615d9ca6ca0ab756cf8408a19d298ffaa08666bc18dc805/psutil-7.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fce5f92c22b00cdefd1645aa58ab4877a01679e901555067b1bd77039aa589f", size = 130133, upload_time = "2025-12-29T08:26:18.009Z" }, + { url = "https://files.pythonhosted.org/packages/77/69/19f1eb0e01d24c2b3eacbc2f78d3b5add8a89bf0bb69465bc8d563cc33de/psutil-7.2.1-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93f3f7b0bb07711b49626e7940d6fe52aa9940ad86e8f7e74842e73189712129", size = 181518, upload_time = "2025-12-29T08:26:20.241Z" }, + { url = "https://files.pythonhosted.org/packages/e1/6d/7e18b1b4fa13ad370787626c95887b027656ad4829c156bb6569d02f3262/psutil-7.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d34d2ca888208eea2b5c68186841336a7f5e0b990edec929be909353a202768a", size = 184348, upload_time = "2025-12-29T08:26:22.215Z" }, + { url = "https://files.pythonhosted.org/packages/98/60/1672114392dd879586d60dd97896325df47d9a130ac7401318005aab28ec/psutil-7.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2ceae842a78d1603753561132d5ad1b2f8a7979cb0c283f5b52fb4e6e14b1a79", size = 140400, upload_time = "2025-12-29T08:26:23.993Z" }, + { url = "https://files.pythonhosted.org/packages/fb/7b/d0e9d4513c46e46897b46bcfc410d51fc65735837ea57a25170f298326e6/psutil-7.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:08a2f175e48a898c8eb8eace45ce01777f4785bc744c90aa2cc7f2fa5462a266", size = 135430, upload_time = "2025-12-29T08:26:25.999Z" }, + { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload_time = "2025-12-29T08:26:27.759Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload_time = "2025-12-29T08:26:29.548Z" }, + { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload_time = "2025-12-29T08:26:32.147Z" }, + { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload_time = "2025-12-29T08:26:34.079Z" }, + { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload_time = "2025-12-29T08:26:36.225Z" }, + { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload_time = "2025-12-29T08:26:39.21Z" }, + { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938, upload_time = "2025-12-29T08:26:41.036Z" }, + { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload_time = "2025-12-29T08:26:43.086Z" }, ] [[package]] name = "pyarrow" version = "23.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116 }, - { url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831 }, - { url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452 }, - { url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396 }, - { url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129 }, - { url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642 }, - { url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628 }, - { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630 }, - { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820 }, - { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735 }, - { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263 }, - { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529 }, - { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851 }, - { url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747 }, - { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038 }, - { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142 }, - { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374 }, - { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896 }, - { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401 }, - { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677 }, - { url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889 }, - { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265 }, - { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211 }, - { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313 }, - { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886 }, - { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055 }, - { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812 }, - { url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851 }, - { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240 }, - { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712 }, - { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523 }, - { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490 }, - { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605 }, - { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979 }, - { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905 }, +sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload_time = "2026-01-18T16:19:42.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload_time = "2026-01-18T16:15:28.054Z" }, + { url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload_time = "2026-01-18T16:15:33.798Z" }, + { url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload_time = "2026-01-18T16:15:41.598Z" }, + { url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload_time = "2026-01-18T16:15:51.252Z" }, + { url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload_time = "2026-01-18T16:16:00.299Z" }, + { url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload_time = "2026-01-18T16:16:08.502Z" }, + { url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload_time = "2026-01-18T16:16:15.28Z" }, + { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload_time = "2026-01-18T16:16:22.141Z" }, + { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload_time = "2026-01-18T16:16:29.441Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload_time = "2026-01-18T16:16:38.79Z" }, + { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload_time = "2026-01-18T16:16:46.248Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload_time = "2026-01-18T16:16:53.412Z" }, + { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload_time = "2026-01-18T16:17:01.133Z" }, + { url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload_time = "2026-01-18T16:18:02.413Z" }, + { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload_time = "2026-01-18T16:17:07.861Z" }, + { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload_time = "2026-01-18T16:17:15.401Z" }, + { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload_time = "2026-01-18T16:17:23.93Z" }, + { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload_time = "2026-01-18T16:17:33.393Z" }, + { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload_time = "2026-01-18T16:17:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload_time = "2026-01-18T16:17:50.241Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload_time = "2026-01-18T16:17:56.55Z" }, + { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload_time = "2026-01-18T16:18:07.904Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload_time = "2026-01-18T16:18:13.94Z" }, + { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload_time = "2026-01-18T16:18:20.367Z" }, + { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload_time = "2026-01-18T16:18:27.544Z" }, + { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload_time = "2026-01-18T16:18:35.425Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload_time = "2026-01-18T16:18:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload_time = "2026-01-18T16:19:38.567Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload_time = "2026-01-18T16:18:49.755Z" }, + { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload_time = "2026-01-18T16:18:55.626Z" }, + { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload_time = "2026-01-18T16:19:03.958Z" }, + { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload_time = "2026-01-18T16:19:11.208Z" }, + { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload_time = "2026-01-18T16:19:19.544Z" }, + { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload_time = "2026-01-18T16:19:26.717Z" }, + { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload_time = "2026-01-18T16:19:32.93Z" }, ] [[package]] name = "pyarrow-hotfix" version = "0.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d2/ed/c3e8677f7abf3981838c2af7b5ac03e3589b3ef94fcb31d575426abae904/pyarrow_hotfix-0.7.tar.gz", hash = "sha256:59399cd58bdd978b2e42816a4183a55c6472d4e33d183351b6069f11ed42661d", size = 9910 } +sdist = { url = "https://files.pythonhosted.org/packages/d2/ed/c3e8677f7abf3981838c2af7b5ac03e3589b3ef94fcb31d575426abae904/pyarrow_hotfix-0.7.tar.gz", hash = "sha256:59399cd58bdd978b2e42816a4183a55c6472d4e33d183351b6069f11ed42661d", size = 9910, upload_time = "2025-04-25T10:17:06.247Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/c3/94ade4906a2f88bc935772f59c934013b4205e773bcb4239db114a6da136/pyarrow_hotfix-0.7-py3-none-any.whl", hash = "sha256:3236f3b5f1260f0e2ac070a55c1a7b339c4bb7267839bd2015e283234e758100", size = 7923 }, + { url = "https://files.pythonhosted.org/packages/2e/c3/94ade4906a2f88bc935772f59c934013b4205e773bcb4239db114a6da136/pyarrow_hotfix-0.7-py3-none-any.whl", hash = "sha256:3236f3b5f1260f0e2ac070a55c1a7b339c4bb7267839bd2015e283234e758100", size = 7923, upload_time = "2025-04-25T10:17:05.224Z" }, ] [[package]] name = "pycparser" version = "3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492 } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload_time = "2026-01-21T14:26:51.89Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172 }, + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload_time = "2026-01-21T14:26:50.693Z" }, ] [[package]] @@ -2255,9 +2322,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591 } +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload_time = "2025-11-26T15:11:46.471Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580 }, + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload_time = "2025-11-26T15:11:44.605Z" }, ] [[package]] @@ -2267,81 +2334,77 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990 }, - { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003 }, - { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200 }, - { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578 }, - { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504 }, - { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816 }, - { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366 }, - { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698 }, - { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603 }, - { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591 }, - { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068 }, - { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908 }, - { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145 }, - { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179 }, - { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403 }, - { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206 }, - { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307 }, - { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258 }, - { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917 }, - { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186 }, - { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164 }, - { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146 }, - { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788 }, - { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133 }, - { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852 }, - { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679 }, - { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766 }, - { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005 }, - { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622 }, - { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725 }, - { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040 }, - { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691 }, - { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897 }, - { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302 }, - { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877 }, - { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680 }, - { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960 }, - { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102 }, - { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039 }, - { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126 }, - { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489 }, - { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288 }, - { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255 }, - { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760 }, - { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092 }, - { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385 }, - { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832 }, - { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585 }, - { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078 }, - { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914 }, - { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560 }, - { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244 }, - { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955 }, - { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 }, - { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 }, - { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 }, - { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441 }, - { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291 }, - { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632 }, - { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905 }, - { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495 }, - { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388 }, - { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879 }, - { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017 }, +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload_time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload_time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload_time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload_time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload_time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload_time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload_time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload_time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload_time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload_time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload_time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload_time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload_time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload_time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload_time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload_time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload_time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload_time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload_time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload_time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload_time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload_time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload_time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload_time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload_time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload_time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload_time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload_time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload_time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload_time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload_time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload_time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload_time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload_time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload_time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload_time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload_time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload_time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload_time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload_time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload_time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload_time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload_time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload_time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload_time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload_time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload_time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload_time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload_time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload_time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload_time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload_time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload_time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload_time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload_time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload_time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload_time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload_time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload_time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload_time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload_time = "2025-11-04T13:42:59.471Z" }, ] [[package]] name = "pygments" version = "2.19.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload_time = "2025-06-21T13:39:12.283Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload_time = "2025-06-21T13:39:07.939Z" }, ] [[package]] @@ -2352,16 +2415,32 @@ dependencies = [ { name = "pyparsing" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/39/2d/cd65dc4fa8c901e6d02b4074771ced04828d71af18b97da24ed1e55507d7/pyhocon-0.3.61-py3-none-any.whl", hash = "sha256:73d0f064af9a7d454949c5557284ce1d716cfd8e1383ecc90095fc575d278df0", size = 25049 }, + { url = "https://files.pythonhosted.org/packages/39/2d/cd65dc4fa8c901e6d02b4074771ced04828d71af18b97da24ed1e55507d7/pyhocon-0.3.61-py3-none-any.whl", hash = "sha256:73d0f064af9a7d454949c5557284ce1d716cfd8e1383ecc90095fc575d278df0", size = 25049, upload_time = "2024-05-29T15:09:23.683Z" }, ] [[package]] name = "pyparsing" version = "3.3.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574 } +sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload_time = "2026-01-21T03:57:59.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload_time = "2026-01-21T03:57:55.912Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload_time = "2025-12-06T21:30:51.014Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781 }, + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload_time = "2025-12-06T21:30:49.154Z" }, ] [[package]] @@ -2371,82 +2450,82 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload_time = "2024-03-01T18:36:20.211Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload_time = "2024-03-01T18:36:18.57Z" }, ] [[package]] name = "python-dotenv" version = "1.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221 } +sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload_time = "2025-10-26T15:12:10.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230 }, + { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload_time = "2025-10-26T15:12:09.109Z" }, ] [[package]] name = "pytokens" version = "0.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644 } +sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644, upload_time = "2025-11-05T13:36:35.34Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195 }, + { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195, upload_time = "2025-11-05T13:36:33.183Z" }, ] [[package]] name = "pytz" version = "2025.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload_time = "2025-03-25T02:25:00.538Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload_time = "2025-03-25T02:24:58.468Z" }, ] [[package]] name = "pyyaml" version = "6.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, - { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, - { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, - { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, - { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, - { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, - { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, - { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, - { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, - { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, - { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, - { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, - { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, - { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, - { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, - { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, - { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, - { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, - { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, - { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, - { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, - { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, - { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, - { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, - { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, - { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, - { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, - { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, - { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, - { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, - { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, - { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, - { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, - { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, - { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, - { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, - { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, - { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload_time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload_time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload_time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload_time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload_time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload_time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload_time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload_time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload_time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload_time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload_time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload_time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload_time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload_time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload_time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload_time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload_time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload_time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload_time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload_time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload_time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload_time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload_time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload_time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload_time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload_time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload_time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload_time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload_time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload_time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload_time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload_time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload_time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload_time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload_time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload_time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload_time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload_time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload_time = "2025-09-25T21:32:56.828Z" }, ] [[package]] @@ -2458,87 +2537,87 @@ dependencies = [ { name = "rpds-py" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036 } +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload_time = "2025-10-13T15:30:48.871Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766 }, + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload_time = "2025-10-13T15:30:47.625Z" }, ] [[package]] name = "regex" version = "2025.11.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cc/a9/546676f25e573a4cf00fe8e119b78a37b6a8fe2dc95cda877b30889c9c45/regex-2025.11.3.tar.gz", hash = "sha256:1fedc720f9bb2494ce31a58a1631f9c82df6a09b49c19517ea5cc280b4541e01", size = 414669 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/74/18f04cb53e58e3fb107439699bd8375cf5a835eec81084e0bddbd122e4c2/regex-2025.11.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bc8ab71e2e31b16e40868a40a69007bc305e1109bd4658eb6cad007e0bf67c41", size = 489312 }, - { url = "https://files.pythonhosted.org/packages/78/3f/37fcdd0d2b1e78909108a876580485ea37c91e1acf66d3bb8e736348f441/regex-2025.11.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:22b29dda7e1f7062a52359fca6e58e548e28c6686f205e780b02ad8ef710de36", size = 291256 }, - { url = "https://files.pythonhosted.org/packages/bf/26/0a575f58eb23b7ebd67a45fccbc02ac030b737b896b7e7a909ffe43ffd6a/regex-2025.11.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a91e4a29938bc1a082cc28fdea44be420bf2bebe2665343029723892eb073e1", size = 288921 }, - { url = "https://files.pythonhosted.org/packages/ea/98/6a8dff667d1af907150432cf5abc05a17ccd32c72a3615410d5365ac167a/regex-2025.11.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08b884f4226602ad40c5d55f52bf91a9df30f513864e0054bad40c0e9cf1afb7", size = 798568 }, - { url = "https://files.pythonhosted.org/packages/64/15/92c1db4fa4e12733dd5a526c2dd2b6edcbfe13257e135fc0f6c57f34c173/regex-2025.11.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3e0b11b2b2433d1c39c7c7a30e3f3d0aeeea44c2a8d0bae28f6b95f639927a69", size = 864165 }, - { url = "https://files.pythonhosted.org/packages/f9/e7/3ad7da8cdee1ce66c7cd37ab5ab05c463a86ffeb52b1a25fe7bd9293b36c/regex-2025.11.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87eb52a81ef58c7ba4d45c3ca74e12aa4b4e77816f72ca25258a85b3ea96cb48", size = 912182 }, - { url = "https://files.pythonhosted.org/packages/84/bd/9ce9f629fcb714ffc2c3faf62b6766ecb7a585e1e885eb699bcf130a5209/regex-2025.11.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a12ab1f5c29b4e93db518f5e3872116b7e9b1646c9f9f426f777b50d44a09e8c", size = 803501 }, - { url = "https://files.pythonhosted.org/packages/7c/0f/8dc2e4349d8e877283e6edd6c12bdcebc20f03744e86f197ab6e4492bf08/regex-2025.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7521684c8c7c4f6e88e35ec89680ee1aa8358d3f09d27dfbdf62c446f5d4c695", size = 787842 }, - { url = "https://files.pythonhosted.org/packages/f9/73/cff02702960bc185164d5619c0c62a2f598a6abff6695d391b096237d4ab/regex-2025.11.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7fe6e5440584e94cc4b3f5f4d98a25e29ca12dccf8873679a635638349831b98", size = 858519 }, - { url = "https://files.pythonhosted.org/packages/61/83/0e8d1ae71e15bc1dc36231c90b46ee35f9d52fab2e226b0e039e7ea9c10a/regex-2025.11.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8e026094aa12b43f4fd74576714e987803a315c76edb6b098b9809db5de58f74", size = 850611 }, - { url = "https://files.pythonhosted.org/packages/c8/f5/70a5cdd781dcfaa12556f2955bf170cd603cb1c96a1827479f8faea2df97/regex-2025.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:435bbad13e57eb5606a68443af62bed3556de2f46deb9f7d4237bc2f1c9fb3a0", size = 789759 }, - { url = "https://files.pythonhosted.org/packages/59/9b/7c29be7903c318488983e7d97abcf8ebd3830e4c956c4c540005fcfb0462/regex-2025.11.3-cp312-cp312-win32.whl", hash = "sha256:3839967cf4dc4b985e1570fd8d91078f0c519f30491c60f9ac42a8db039be204", size = 266194 }, - { url = "https://files.pythonhosted.org/packages/1a/67/3b92df89f179d7c367be654ab5626ae311cb28f7d5c237b6bb976cd5fbbb/regex-2025.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:e721d1b46e25c481dc5ded6f4b3f66c897c58d2e8cfdf77bbced84339108b0b9", size = 277069 }, - { url = "https://files.pythonhosted.org/packages/d7/55/85ba4c066fe5094d35b249c3ce8df0ba623cfd35afb22d6764f23a52a1c5/regex-2025.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:64350685ff08b1d3a6fff33f45a9ca183dc1d58bbfe4981604e70ec9801bbc26", size = 270330 }, - { url = "https://files.pythonhosted.org/packages/e1/a7/dda24ebd49da46a197436ad96378f17df30ceb40e52e859fc42cac45b850/regex-2025.11.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c1e448051717a334891f2b9a620fe36776ebf3dd8ec46a0b877c8ae69575feb4", size = 489081 }, - { url = "https://files.pythonhosted.org/packages/19/22/af2dc751aacf88089836aa088a1a11c4f21a04707eb1b0478e8e8fb32847/regex-2025.11.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9b5aca4d5dfd7fbfbfbdaf44850fcc7709a01146a797536a8f84952e940cca76", size = 291123 }, - { url = "https://files.pythonhosted.org/packages/a3/88/1a3ea5672f4b0a84802ee9891b86743438e7c04eb0b8f8c4e16a42375327/regex-2025.11.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:04d2765516395cf7dda331a244a3282c0f5ae96075f728629287dfa6f76ba70a", size = 288814 }, - { url = "https://files.pythonhosted.org/packages/fb/8c/f5987895bf42b8ddeea1b315c9fedcfe07cadee28b9c98cf50d00adcb14d/regex-2025.11.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d9903ca42bfeec4cebedba8022a7c97ad2aab22e09573ce9976ba01b65e4361", size = 798592 }, - { url = "https://files.pythonhosted.org/packages/99/2a/6591ebeede78203fa77ee46a1c36649e02df9eaa77a033d1ccdf2fcd5d4e/regex-2025.11.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:639431bdc89d6429f6721625e8129413980ccd62e9d3f496be618a41d205f160", size = 864122 }, - { url = "https://files.pythonhosted.org/packages/94/d6/be32a87cf28cf8ed064ff281cfbd49aefd90242a83e4b08b5a86b38e8eb4/regex-2025.11.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f117efad42068f9715677c8523ed2be1518116d1c49b1dd17987716695181efe", size = 912272 }, - { url = "https://files.pythonhosted.org/packages/62/11/9bcef2d1445665b180ac7f230406ad80671f0fc2a6ffb93493b5dd8cd64c/regex-2025.11.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4aecb6f461316adf9f1f0f6a4a1a3d79e045f9b71ec76055a791affa3b285850", size = 803497 }, - { url = "https://files.pythonhosted.org/packages/e5/a7/da0dc273d57f560399aa16d8a68ae7f9b57679476fc7ace46501d455fe84/regex-2025.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3b3a5f320136873cc5561098dfab677eea139521cb9a9e8db98b7e64aef44cbc", size = 787892 }, - { url = "https://files.pythonhosted.org/packages/da/4b/732a0c5a9736a0b8d6d720d4945a2f1e6f38f87f48f3173559f53e8d5d82/regex-2025.11.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:75fa6f0056e7efb1f42a1c34e58be24072cb9e61a601340cc1196ae92326a4f9", size = 858462 }, - { url = "https://files.pythonhosted.org/packages/0c/f5/a2a03df27dc4c2d0c769220f5110ba8c4084b0bfa9ab0f9b4fcfa3d2b0fc/regex-2025.11.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:dbe6095001465294f13f1adcd3311e50dd84e5a71525f20a10bd16689c61ce0b", size = 850528 }, - { url = "https://files.pythonhosted.org/packages/d6/09/e1cd5bee3841c7f6eb37d95ca91cdee7100b8f88b81e41c2ef426910891a/regex-2025.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:454d9b4ae7881afbc25015b8627c16d88a597479b9dea82b8c6e7e2e07240dc7", size = 789866 }, - { url = "https://files.pythonhosted.org/packages/eb/51/702f5ea74e2a9c13d855a6a85b7f80c30f9e72a95493260193c07f3f8d74/regex-2025.11.3-cp313-cp313-win32.whl", hash = "sha256:28ba4d69171fc6e9896337d4fc63a43660002b7da53fc15ac992abcf3410917c", size = 266189 }, - { url = "https://files.pythonhosted.org/packages/8b/00/6e29bb314e271a743170e53649db0fdb8e8ff0b64b4f425f5602f4eb9014/regex-2025.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:bac4200befe50c670c405dc33af26dad5a3b6b255dd6c000d92fe4629f9ed6a5", size = 277054 }, - { url = "https://files.pythonhosted.org/packages/25/f1/b156ff9f2ec9ac441710764dda95e4edaf5f36aca48246d1eea3f1fd96ec/regex-2025.11.3-cp313-cp313-win_arm64.whl", hash = "sha256:2292cd5a90dab247f9abe892ac584cb24f0f54680c73fcb4a7493c66c2bf2467", size = 270325 }, - { url = "https://files.pythonhosted.org/packages/20/28/fd0c63357caefe5680b8ea052131acbd7f456893b69cc2a90cc3e0dc90d4/regex-2025.11.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:1eb1ebf6822b756c723e09f5186473d93236c06c579d2cc0671a722d2ab14281", size = 491984 }, - { url = "https://files.pythonhosted.org/packages/df/ec/7014c15626ab46b902b3bcc4b28a7bae46d8f281fc7ea9c95e22fcaaa917/regex-2025.11.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1e00ec2970aab10dc5db34af535f21fcf32b4a31d99e34963419636e2f85ae39", size = 292673 }, - { url = "https://files.pythonhosted.org/packages/23/ab/3b952ff7239f20d05f1f99e9e20188513905f218c81d52fb5e78d2bf7634/regex-2025.11.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a4cb042b615245d5ff9b3794f56be4138b5adc35a4166014d31d1814744148c7", size = 291029 }, - { url = "https://files.pythonhosted.org/packages/21/7e/3dc2749fc684f455f162dcafb8a187b559e2614f3826877d3844a131f37b/regex-2025.11.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44f264d4bf02f3176467d90b294d59bf1db9fe53c141ff772f27a8b456b2a9ed", size = 807437 }, - { url = "https://files.pythonhosted.org/packages/1b/0b/d529a85ab349c6a25d1ca783235b6e3eedf187247eab536797021f7126c6/regex-2025.11.3-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7be0277469bf3bd7a34a9c57c1b6a724532a0d235cd0dc4e7f4316f982c28b19", size = 873368 }, - { url = "https://files.pythonhosted.org/packages/7d/18/2d868155f8c9e3e9d8f9e10c64e9a9f496bb8f7e037a88a8bed26b435af6/regex-2025.11.3-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0d31e08426ff4b5b650f68839f5af51a92a5b51abd8554a60c2fbc7c71f25d0b", size = 914921 }, - { url = "https://files.pythonhosted.org/packages/2d/71/9d72ff0f354fa783fe2ba913c8734c3b433b86406117a8db4ea2bf1c7a2f/regex-2025.11.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e43586ce5bd28f9f285a6e729466841368c4a0353f6fd08d4ce4630843d3648a", size = 812708 }, - { url = "https://files.pythonhosted.org/packages/e7/19/ce4bf7f5575c97f82b6e804ffb5c4e940c62609ab2a0d9538d47a7fdf7d4/regex-2025.11.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0f9397d561a4c16829d4e6ff75202c1c08b68a3bdbfe29dbfcdb31c9830907c6", size = 795472 }, - { url = "https://files.pythonhosted.org/packages/03/86/fd1063a176ffb7b2315f9a1b08d17b18118b28d9df163132615b835a26ee/regex-2025.11.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:dd16e78eb18ffdb25ee33a0682d17912e8cc8a770e885aeee95020046128f1ce", size = 868341 }, - { url = "https://files.pythonhosted.org/packages/12/43/103fb2e9811205e7386366501bc866a164a0430c79dd59eac886a2822950/regex-2025.11.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:ffcca5b9efe948ba0661e9df0fa50d2bc4b097c70b9810212d6b62f05d83b2dd", size = 854666 }, - { url = "https://files.pythonhosted.org/packages/7d/22/e392e53f3869b75804762c7c848bd2dd2abf2b70fb0e526f58724638bd35/regex-2025.11.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c56b4d162ca2b43318ac671c65bd4d563e841a694ac70e1a976ac38fcf4ca1d2", size = 799473 }, - { url = "https://files.pythonhosted.org/packages/4f/f9/8bd6b656592f925b6845fcbb4d57603a3ac2fb2373344ffa1ed70aa6820a/regex-2025.11.3-cp313-cp313t-win32.whl", hash = "sha256:9ddc42e68114e161e51e272f667d640f97e84a2b9ef14b7477c53aac20c2d59a", size = 268792 }, - { url = "https://files.pythonhosted.org/packages/e5/87/0e7d603467775ff65cd2aeabf1b5b50cc1c3708556a8b849a2fa4dd1542b/regex-2025.11.3-cp313-cp313t-win_amd64.whl", hash = "sha256:7a7c7fdf755032ffdd72c77e3d8096bdcb0eb92e89e17571a196f03d88b11b3c", size = 280214 }, - { url = "https://files.pythonhosted.org/packages/8d/d0/2afc6f8e94e2b64bfb738a7c2b6387ac1699f09f032d363ed9447fd2bb57/regex-2025.11.3-cp313-cp313t-win_arm64.whl", hash = "sha256:df9eb838c44f570283712e7cff14c16329a9f0fb19ca492d21d4b7528ee6821e", size = 271469 }, - { url = "https://files.pythonhosted.org/packages/31/e9/f6e13de7e0983837f7b6d238ad9458800a874bf37c264f7923e63409944c/regex-2025.11.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9697a52e57576c83139d7c6f213d64485d3df5bf84807c35fa409e6c970801c6", size = 489089 }, - { url = "https://files.pythonhosted.org/packages/a3/5c/261f4a262f1fa65141c1b74b255988bd2fa020cc599e53b080667d591cfc/regex-2025.11.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e18bc3f73bd41243c9b38a6d9f2366cd0e0137a9aebe2d8ff76c5b67d4c0a3f4", size = 291059 }, - { url = "https://files.pythonhosted.org/packages/8e/57/f14eeb7f072b0e9a5a090d1712741fd8f214ec193dba773cf5410108bb7d/regex-2025.11.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:61a08bcb0ec14ff4e0ed2044aad948d0659604f824cbd50b55e30b0ec6f09c73", size = 288900 }, - { url = "https://files.pythonhosted.org/packages/3c/6b/1d650c45e99a9b327586739d926a1cd4e94666b1bd4af90428b36af66dc7/regex-2025.11.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9c30003b9347c24bcc210958c5d167b9e4f9be786cb380a7d32f14f9b84674f", size = 799010 }, - { url = "https://files.pythonhosted.org/packages/99/ee/d66dcbc6b628ce4e3f7f0cbbb84603aa2fc0ffc878babc857726b8aab2e9/regex-2025.11.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4e1e592789704459900728d88d41a46fe3969b82ab62945560a31732ffc19a6d", size = 864893 }, - { url = "https://files.pythonhosted.org/packages/bf/2d/f238229f1caba7ac87a6c4153d79947fb0261415827ae0f77c304260c7d3/regex-2025.11.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6538241f45eb5a25aa575dbba1069ad786f68a4f2773a29a2bd3dd1f9de787be", size = 911522 }, - { url = "https://files.pythonhosted.org/packages/bd/3d/22a4eaba214a917c80e04f6025d26143690f0419511e0116508e24b11c9b/regex-2025.11.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce22519c989bb72a7e6b36a199384c53db7722fe669ba891da75907fe3587db", size = 803272 }, - { url = "https://files.pythonhosted.org/packages/84/b1/03188f634a409353a84b5ef49754b97dbcc0c0f6fd6c8ede505a8960a0a4/regex-2025.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:66d559b21d3640203ab9075797a55165d79017520685fb407b9234d72ab63c62", size = 787958 }, - { url = "https://files.pythonhosted.org/packages/99/6a/27d072f7fbf6fadd59c64d210305e1ff865cc3b78b526fd147db768c553b/regex-2025.11.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:669dcfb2e38f9e8c69507bace46f4889e3abbfd9b0c29719202883c0a603598f", size = 859289 }, - { url = "https://files.pythonhosted.org/packages/9a/70/1b3878f648e0b6abe023172dacb02157e685564853cc363d9961bcccde4e/regex-2025.11.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:32f74f35ff0f25a5021373ac61442edcb150731fbaa28286bbc8bb1582c89d02", size = 850026 }, - { url = "https://files.pythonhosted.org/packages/dd/d5/68e25559b526b8baab8e66839304ede68ff6727237a47727d240006bd0ff/regex-2025.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e6c7a21dffba883234baefe91bc3388e629779582038f75d2a5be918e250f0ed", size = 789499 }, - { url = "https://files.pythonhosted.org/packages/fc/df/43971264857140a350910d4e33df725e8c94dd9dee8d2e4729fa0d63d49e/regex-2025.11.3-cp314-cp314-win32.whl", hash = "sha256:795ea137b1d809eb6836b43748b12634291c0ed55ad50a7d72d21edf1cd565c4", size = 271604 }, - { url = "https://files.pythonhosted.org/packages/01/6f/9711b57dc6894a55faf80a4c1b5aa4f8649805cb9c7aef46f7d27e2b9206/regex-2025.11.3-cp314-cp314-win_amd64.whl", hash = "sha256:9f95fbaa0ee1610ec0fc6b26668e9917a582ba80c52cc6d9ada15e30aa9ab9ad", size = 280320 }, - { url = "https://files.pythonhosted.org/packages/f1/7e/f6eaa207d4377481f5e1775cdeb5a443b5a59b392d0065f3417d31d80f87/regex-2025.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:dfec44d532be4c07088c3de2876130ff0fbeeacaa89a137decbbb5f665855a0f", size = 273372 }, - { url = "https://files.pythonhosted.org/packages/c3/06/49b198550ee0f5e4184271cee87ba4dfd9692c91ec55289e6282f0f86ccf/regex-2025.11.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ba0d8a5d7f04f73ee7d01d974d47c5834f8a1b0224390e4fe7c12a3a92a78ecc", size = 491985 }, - { url = "https://files.pythonhosted.org/packages/ce/bf/abdafade008f0b1c9da10d934034cb670432d6cf6cbe38bbb53a1cfd6cf8/regex-2025.11.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:442d86cf1cfe4faabf97db7d901ef58347efd004934da045c745e7b5bd57ac49", size = 292669 }, - { url = "https://files.pythonhosted.org/packages/f9/ef/0c357bb8edbd2ad8e273fcb9e1761bc37b8acbc6e1be050bebd6475f19c1/regex-2025.11.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fd0a5e563c756de210bb964789b5abe4f114dacae9104a47e1a649b910361536", size = 291030 }, - { url = "https://files.pythonhosted.org/packages/79/06/edbb67257596649b8fb088d6aeacbcb248ac195714b18a65e018bf4c0b50/regex-2025.11.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf3490bcbb985a1ae97b2ce9ad1c0f06a852d5b19dde9b07bdf25bf224248c95", size = 807674 }, - { url = "https://files.pythonhosted.org/packages/f4/d9/ad4deccfce0ea336296bd087f1a191543bb99ee1c53093dcd4c64d951d00/regex-2025.11.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3809988f0a8b8c9dcc0f92478d6501fac7200b9ec56aecf0ec21f4a2ec4b6009", size = 873451 }, - { url = "https://files.pythonhosted.org/packages/13/75/a55a4724c56ef13e3e04acaab29df26582f6978c000ac9cd6810ad1f341f/regex-2025.11.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f4ff94e58e84aedb9c9fce66d4ef9f27a190285b451420f297c9a09f2b9abee9", size = 914980 }, - { url = "https://files.pythonhosted.org/packages/67/1e/a1657ee15bd9116f70d4a530c736983eed997b361e20ecd8f5ca3759d5c5/regex-2025.11.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eb542fd347ce61e1321b0a6b945d5701528dca0cd9759c2e3bb8bd57e47964d", size = 812852 }, - { url = "https://files.pythonhosted.org/packages/b8/6f/f7516dde5506a588a561d296b2d0044839de06035bb486b326065b4c101e/regex-2025.11.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d6c2d5919075a1f2e413c00b056ea0c2f065b3f5fe83c3d07d325ab92dce51d6", size = 795566 }, - { url = "https://files.pythonhosted.org/packages/d9/dd/3d10b9e170cc16fb34cb2cef91513cf3df65f440b3366030631b2984a264/regex-2025.11.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3f8bf11a4827cc7ce5a53d4ef6cddd5ad25595d3c1435ef08f76825851343154", size = 868463 }, - { url = "https://files.pythonhosted.org/packages/f5/8e/935e6beff1695aa9085ff83195daccd72acc82c81793df480f34569330de/regex-2025.11.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:22c12d837298651e5550ac1d964e4ff57c3f56965fc1812c90c9fb2028eaf267", size = 854694 }, - { url = "https://files.pythonhosted.org/packages/92/12/10650181a040978b2f5720a6a74d44f841371a3d984c2083fc1752e4acf6/regex-2025.11.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ba394a3dda9ad41c7c780f60f6e4a70988741415ae96f6d1bf6c239cf01379", size = 799691 }, - { url = "https://files.pythonhosted.org/packages/67/90/8f37138181c9a7690e7e4cb388debbd389342db3c7381d636d2875940752/regex-2025.11.3-cp314-cp314t-win32.whl", hash = "sha256:4bf146dca15cdd53224a1bf46d628bd7590e4a07fbb69e720d561aea43a32b38", size = 274583 }, - { url = "https://files.pythonhosted.org/packages/8f/cd/867f5ec442d56beb56f5f854f40abcfc75e11d10b11fdb1869dd39c63aaf/regex-2025.11.3-cp314-cp314t-win_amd64.whl", hash = "sha256:adad1a1bcf1c9e76346e091d22d23ac54ef28e1365117d99521631078dfec9de", size = 284286 }, - { url = "https://files.pythonhosted.org/packages/20/31/32c0c4610cbc070362bf1d2e4ea86d1ea29014d400a6d6c2486fcfd57766/regex-2025.11.3-cp314-cp314t-win_arm64.whl", hash = "sha256:c54f768482cef41e219720013cd05933b6f971d9562544d691c68699bf2b6801", size = 274741 }, +sdist = { url = "https://files.pythonhosted.org/packages/cc/a9/546676f25e573a4cf00fe8e119b78a37b6a8fe2dc95cda877b30889c9c45/regex-2025.11.3.tar.gz", hash = "sha256:1fedc720f9bb2494ce31a58a1631f9c82df6a09b49c19517ea5cc280b4541e01", size = 414669, upload_time = "2025-11-03T21:34:22.089Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/74/18f04cb53e58e3fb107439699bd8375cf5a835eec81084e0bddbd122e4c2/regex-2025.11.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bc8ab71e2e31b16e40868a40a69007bc305e1109bd4658eb6cad007e0bf67c41", size = 489312, upload_time = "2025-11-03T21:31:34.343Z" }, + { url = "https://files.pythonhosted.org/packages/78/3f/37fcdd0d2b1e78909108a876580485ea37c91e1acf66d3bb8e736348f441/regex-2025.11.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:22b29dda7e1f7062a52359fca6e58e548e28c6686f205e780b02ad8ef710de36", size = 291256, upload_time = "2025-11-03T21:31:35.675Z" }, + { url = "https://files.pythonhosted.org/packages/bf/26/0a575f58eb23b7ebd67a45fccbc02ac030b737b896b7e7a909ffe43ffd6a/regex-2025.11.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a91e4a29938bc1a082cc28fdea44be420bf2bebe2665343029723892eb073e1", size = 288921, upload_time = "2025-11-03T21:31:37.07Z" }, + { url = "https://files.pythonhosted.org/packages/ea/98/6a8dff667d1af907150432cf5abc05a17ccd32c72a3615410d5365ac167a/regex-2025.11.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08b884f4226602ad40c5d55f52bf91a9df30f513864e0054bad40c0e9cf1afb7", size = 798568, upload_time = "2025-11-03T21:31:38.784Z" }, + { url = "https://files.pythonhosted.org/packages/64/15/92c1db4fa4e12733dd5a526c2dd2b6edcbfe13257e135fc0f6c57f34c173/regex-2025.11.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3e0b11b2b2433d1c39c7c7a30e3f3d0aeeea44c2a8d0bae28f6b95f639927a69", size = 864165, upload_time = "2025-11-03T21:31:40.559Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e7/3ad7da8cdee1ce66c7cd37ab5ab05c463a86ffeb52b1a25fe7bd9293b36c/regex-2025.11.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87eb52a81ef58c7ba4d45c3ca74e12aa4b4e77816f72ca25258a85b3ea96cb48", size = 912182, upload_time = "2025-11-03T21:31:42.002Z" }, + { url = "https://files.pythonhosted.org/packages/84/bd/9ce9f629fcb714ffc2c3faf62b6766ecb7a585e1e885eb699bcf130a5209/regex-2025.11.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a12ab1f5c29b4e93db518f5e3872116b7e9b1646c9f9f426f777b50d44a09e8c", size = 803501, upload_time = "2025-11-03T21:31:43.815Z" }, + { url = "https://files.pythonhosted.org/packages/7c/0f/8dc2e4349d8e877283e6edd6c12bdcebc20f03744e86f197ab6e4492bf08/regex-2025.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7521684c8c7c4f6e88e35ec89680ee1aa8358d3f09d27dfbdf62c446f5d4c695", size = 787842, upload_time = "2025-11-03T21:31:45.353Z" }, + { url = "https://files.pythonhosted.org/packages/f9/73/cff02702960bc185164d5619c0c62a2f598a6abff6695d391b096237d4ab/regex-2025.11.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7fe6e5440584e94cc4b3f5f4d98a25e29ca12dccf8873679a635638349831b98", size = 858519, upload_time = "2025-11-03T21:31:46.814Z" }, + { url = "https://files.pythonhosted.org/packages/61/83/0e8d1ae71e15bc1dc36231c90b46ee35f9d52fab2e226b0e039e7ea9c10a/regex-2025.11.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8e026094aa12b43f4fd74576714e987803a315c76edb6b098b9809db5de58f74", size = 850611, upload_time = "2025-11-03T21:31:48.289Z" }, + { url = "https://files.pythonhosted.org/packages/c8/f5/70a5cdd781dcfaa12556f2955bf170cd603cb1c96a1827479f8faea2df97/regex-2025.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:435bbad13e57eb5606a68443af62bed3556de2f46deb9f7d4237bc2f1c9fb3a0", size = 789759, upload_time = "2025-11-03T21:31:49.759Z" }, + { url = "https://files.pythonhosted.org/packages/59/9b/7c29be7903c318488983e7d97abcf8ebd3830e4c956c4c540005fcfb0462/regex-2025.11.3-cp312-cp312-win32.whl", hash = "sha256:3839967cf4dc4b985e1570fd8d91078f0c519f30491c60f9ac42a8db039be204", size = 266194, upload_time = "2025-11-03T21:31:51.53Z" }, + { url = "https://files.pythonhosted.org/packages/1a/67/3b92df89f179d7c367be654ab5626ae311cb28f7d5c237b6bb976cd5fbbb/regex-2025.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:e721d1b46e25c481dc5ded6f4b3f66c897c58d2e8cfdf77bbced84339108b0b9", size = 277069, upload_time = "2025-11-03T21:31:53.151Z" }, + { url = "https://files.pythonhosted.org/packages/d7/55/85ba4c066fe5094d35b249c3ce8df0ba623cfd35afb22d6764f23a52a1c5/regex-2025.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:64350685ff08b1d3a6fff33f45a9ca183dc1d58bbfe4981604e70ec9801bbc26", size = 270330, upload_time = "2025-11-03T21:31:54.514Z" }, + { url = "https://files.pythonhosted.org/packages/e1/a7/dda24ebd49da46a197436ad96378f17df30ceb40e52e859fc42cac45b850/regex-2025.11.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c1e448051717a334891f2b9a620fe36776ebf3dd8ec46a0b877c8ae69575feb4", size = 489081, upload_time = "2025-11-03T21:31:55.9Z" }, + { url = "https://files.pythonhosted.org/packages/19/22/af2dc751aacf88089836aa088a1a11c4f21a04707eb1b0478e8e8fb32847/regex-2025.11.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9b5aca4d5dfd7fbfbfbdaf44850fcc7709a01146a797536a8f84952e940cca76", size = 291123, upload_time = "2025-11-03T21:31:57.758Z" }, + { url = "https://files.pythonhosted.org/packages/a3/88/1a3ea5672f4b0a84802ee9891b86743438e7c04eb0b8f8c4e16a42375327/regex-2025.11.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:04d2765516395cf7dda331a244a3282c0f5ae96075f728629287dfa6f76ba70a", size = 288814, upload_time = "2025-11-03T21:32:01.12Z" }, + { url = "https://files.pythonhosted.org/packages/fb/8c/f5987895bf42b8ddeea1b315c9fedcfe07cadee28b9c98cf50d00adcb14d/regex-2025.11.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d9903ca42bfeec4cebedba8022a7c97ad2aab22e09573ce9976ba01b65e4361", size = 798592, upload_time = "2025-11-03T21:32:03.006Z" }, + { url = "https://files.pythonhosted.org/packages/99/2a/6591ebeede78203fa77ee46a1c36649e02df9eaa77a033d1ccdf2fcd5d4e/regex-2025.11.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:639431bdc89d6429f6721625e8129413980ccd62e9d3f496be618a41d205f160", size = 864122, upload_time = "2025-11-03T21:32:04.553Z" }, + { url = "https://files.pythonhosted.org/packages/94/d6/be32a87cf28cf8ed064ff281cfbd49aefd90242a83e4b08b5a86b38e8eb4/regex-2025.11.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f117efad42068f9715677c8523ed2be1518116d1c49b1dd17987716695181efe", size = 912272, upload_time = "2025-11-03T21:32:06.148Z" }, + { url = "https://files.pythonhosted.org/packages/62/11/9bcef2d1445665b180ac7f230406ad80671f0fc2a6ffb93493b5dd8cd64c/regex-2025.11.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4aecb6f461316adf9f1f0f6a4a1a3d79e045f9b71ec76055a791affa3b285850", size = 803497, upload_time = "2025-11-03T21:32:08.162Z" }, + { url = "https://files.pythonhosted.org/packages/e5/a7/da0dc273d57f560399aa16d8a68ae7f9b57679476fc7ace46501d455fe84/regex-2025.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3b3a5f320136873cc5561098dfab677eea139521cb9a9e8db98b7e64aef44cbc", size = 787892, upload_time = "2025-11-03T21:32:09.769Z" }, + { url = "https://files.pythonhosted.org/packages/da/4b/732a0c5a9736a0b8d6d720d4945a2f1e6f38f87f48f3173559f53e8d5d82/regex-2025.11.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:75fa6f0056e7efb1f42a1c34e58be24072cb9e61a601340cc1196ae92326a4f9", size = 858462, upload_time = "2025-11-03T21:32:11.769Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f5/a2a03df27dc4c2d0c769220f5110ba8c4084b0bfa9ab0f9b4fcfa3d2b0fc/regex-2025.11.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:dbe6095001465294f13f1adcd3311e50dd84e5a71525f20a10bd16689c61ce0b", size = 850528, upload_time = "2025-11-03T21:32:13.906Z" }, + { url = "https://files.pythonhosted.org/packages/d6/09/e1cd5bee3841c7f6eb37d95ca91cdee7100b8f88b81e41c2ef426910891a/regex-2025.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:454d9b4ae7881afbc25015b8627c16d88a597479b9dea82b8c6e7e2e07240dc7", size = 789866, upload_time = "2025-11-03T21:32:15.748Z" }, + { url = "https://files.pythonhosted.org/packages/eb/51/702f5ea74e2a9c13d855a6a85b7f80c30f9e72a95493260193c07f3f8d74/regex-2025.11.3-cp313-cp313-win32.whl", hash = "sha256:28ba4d69171fc6e9896337d4fc63a43660002b7da53fc15ac992abcf3410917c", size = 266189, upload_time = "2025-11-03T21:32:17.493Z" }, + { url = "https://files.pythonhosted.org/packages/8b/00/6e29bb314e271a743170e53649db0fdb8e8ff0b64b4f425f5602f4eb9014/regex-2025.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:bac4200befe50c670c405dc33af26dad5a3b6b255dd6c000d92fe4629f9ed6a5", size = 277054, upload_time = "2025-11-03T21:32:19.042Z" }, + { url = "https://files.pythonhosted.org/packages/25/f1/b156ff9f2ec9ac441710764dda95e4edaf5f36aca48246d1eea3f1fd96ec/regex-2025.11.3-cp313-cp313-win_arm64.whl", hash = "sha256:2292cd5a90dab247f9abe892ac584cb24f0f54680c73fcb4a7493c66c2bf2467", size = 270325, upload_time = "2025-11-03T21:32:21.338Z" }, + { url = "https://files.pythonhosted.org/packages/20/28/fd0c63357caefe5680b8ea052131acbd7f456893b69cc2a90cc3e0dc90d4/regex-2025.11.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:1eb1ebf6822b756c723e09f5186473d93236c06c579d2cc0671a722d2ab14281", size = 491984, upload_time = "2025-11-03T21:32:23.466Z" }, + { url = "https://files.pythonhosted.org/packages/df/ec/7014c15626ab46b902b3bcc4b28a7bae46d8f281fc7ea9c95e22fcaaa917/regex-2025.11.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1e00ec2970aab10dc5db34af535f21fcf32b4a31d99e34963419636e2f85ae39", size = 292673, upload_time = "2025-11-03T21:32:25.034Z" }, + { url = "https://files.pythonhosted.org/packages/23/ab/3b952ff7239f20d05f1f99e9e20188513905f218c81d52fb5e78d2bf7634/regex-2025.11.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a4cb042b615245d5ff9b3794f56be4138b5adc35a4166014d31d1814744148c7", size = 291029, upload_time = "2025-11-03T21:32:26.528Z" }, + { url = "https://files.pythonhosted.org/packages/21/7e/3dc2749fc684f455f162dcafb8a187b559e2614f3826877d3844a131f37b/regex-2025.11.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44f264d4bf02f3176467d90b294d59bf1db9fe53c141ff772f27a8b456b2a9ed", size = 807437, upload_time = "2025-11-03T21:32:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/1b/0b/d529a85ab349c6a25d1ca783235b6e3eedf187247eab536797021f7126c6/regex-2025.11.3-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7be0277469bf3bd7a34a9c57c1b6a724532a0d235cd0dc4e7f4316f982c28b19", size = 873368, upload_time = "2025-11-03T21:32:30.4Z" }, + { url = "https://files.pythonhosted.org/packages/7d/18/2d868155f8c9e3e9d8f9e10c64e9a9f496bb8f7e037a88a8bed26b435af6/regex-2025.11.3-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0d31e08426ff4b5b650f68839f5af51a92a5b51abd8554a60c2fbc7c71f25d0b", size = 914921, upload_time = "2025-11-03T21:32:32.123Z" }, + { url = "https://files.pythonhosted.org/packages/2d/71/9d72ff0f354fa783fe2ba913c8734c3b433b86406117a8db4ea2bf1c7a2f/regex-2025.11.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e43586ce5bd28f9f285a6e729466841368c4a0353f6fd08d4ce4630843d3648a", size = 812708, upload_time = "2025-11-03T21:32:34.305Z" }, + { url = "https://files.pythonhosted.org/packages/e7/19/ce4bf7f5575c97f82b6e804ffb5c4e940c62609ab2a0d9538d47a7fdf7d4/regex-2025.11.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0f9397d561a4c16829d4e6ff75202c1c08b68a3bdbfe29dbfcdb31c9830907c6", size = 795472, upload_time = "2025-11-03T21:32:36.364Z" }, + { url = "https://files.pythonhosted.org/packages/03/86/fd1063a176ffb7b2315f9a1b08d17b18118b28d9df163132615b835a26ee/regex-2025.11.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:dd16e78eb18ffdb25ee33a0682d17912e8cc8a770e885aeee95020046128f1ce", size = 868341, upload_time = "2025-11-03T21:32:38.042Z" }, + { url = "https://files.pythonhosted.org/packages/12/43/103fb2e9811205e7386366501bc866a164a0430c79dd59eac886a2822950/regex-2025.11.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:ffcca5b9efe948ba0661e9df0fa50d2bc4b097c70b9810212d6b62f05d83b2dd", size = 854666, upload_time = "2025-11-03T21:32:40.079Z" }, + { url = "https://files.pythonhosted.org/packages/7d/22/e392e53f3869b75804762c7c848bd2dd2abf2b70fb0e526f58724638bd35/regex-2025.11.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c56b4d162ca2b43318ac671c65bd4d563e841a694ac70e1a976ac38fcf4ca1d2", size = 799473, upload_time = "2025-11-03T21:32:42.148Z" }, + { url = "https://files.pythonhosted.org/packages/4f/f9/8bd6b656592f925b6845fcbb4d57603a3ac2fb2373344ffa1ed70aa6820a/regex-2025.11.3-cp313-cp313t-win32.whl", hash = "sha256:9ddc42e68114e161e51e272f667d640f97e84a2b9ef14b7477c53aac20c2d59a", size = 268792, upload_time = "2025-11-03T21:32:44.13Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/0e7d603467775ff65cd2aeabf1b5b50cc1c3708556a8b849a2fa4dd1542b/regex-2025.11.3-cp313-cp313t-win_amd64.whl", hash = "sha256:7a7c7fdf755032ffdd72c77e3d8096bdcb0eb92e89e17571a196f03d88b11b3c", size = 280214, upload_time = "2025-11-03T21:32:45.853Z" }, + { url = "https://files.pythonhosted.org/packages/8d/d0/2afc6f8e94e2b64bfb738a7c2b6387ac1699f09f032d363ed9447fd2bb57/regex-2025.11.3-cp313-cp313t-win_arm64.whl", hash = "sha256:df9eb838c44f570283712e7cff14c16329a9f0fb19ca492d21d4b7528ee6821e", size = 271469, upload_time = "2025-11-03T21:32:48.026Z" }, + { url = "https://files.pythonhosted.org/packages/31/e9/f6e13de7e0983837f7b6d238ad9458800a874bf37c264f7923e63409944c/regex-2025.11.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9697a52e57576c83139d7c6f213d64485d3df5bf84807c35fa409e6c970801c6", size = 489089, upload_time = "2025-11-03T21:32:50.027Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5c/261f4a262f1fa65141c1b74b255988bd2fa020cc599e53b080667d591cfc/regex-2025.11.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e18bc3f73bd41243c9b38a6d9f2366cd0e0137a9aebe2d8ff76c5b67d4c0a3f4", size = 291059, upload_time = "2025-11-03T21:32:51.682Z" }, + { url = "https://files.pythonhosted.org/packages/8e/57/f14eeb7f072b0e9a5a090d1712741fd8f214ec193dba773cf5410108bb7d/regex-2025.11.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:61a08bcb0ec14ff4e0ed2044aad948d0659604f824cbd50b55e30b0ec6f09c73", size = 288900, upload_time = "2025-11-03T21:32:53.569Z" }, + { url = "https://files.pythonhosted.org/packages/3c/6b/1d650c45e99a9b327586739d926a1cd4e94666b1bd4af90428b36af66dc7/regex-2025.11.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9c30003b9347c24bcc210958c5d167b9e4f9be786cb380a7d32f14f9b84674f", size = 799010, upload_time = "2025-11-03T21:32:55.222Z" }, + { url = "https://files.pythonhosted.org/packages/99/ee/d66dcbc6b628ce4e3f7f0cbbb84603aa2fc0ffc878babc857726b8aab2e9/regex-2025.11.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4e1e592789704459900728d88d41a46fe3969b82ab62945560a31732ffc19a6d", size = 864893, upload_time = "2025-11-03T21:32:57.239Z" }, + { url = "https://files.pythonhosted.org/packages/bf/2d/f238229f1caba7ac87a6c4153d79947fb0261415827ae0f77c304260c7d3/regex-2025.11.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6538241f45eb5a25aa575dbba1069ad786f68a4f2773a29a2bd3dd1f9de787be", size = 911522, upload_time = "2025-11-03T21:32:59.274Z" }, + { url = "https://files.pythonhosted.org/packages/bd/3d/22a4eaba214a917c80e04f6025d26143690f0419511e0116508e24b11c9b/regex-2025.11.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce22519c989bb72a7e6b36a199384c53db7722fe669ba891da75907fe3587db", size = 803272, upload_time = "2025-11-03T21:33:01.393Z" }, + { url = "https://files.pythonhosted.org/packages/84/b1/03188f634a409353a84b5ef49754b97dbcc0c0f6fd6c8ede505a8960a0a4/regex-2025.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:66d559b21d3640203ab9075797a55165d79017520685fb407b9234d72ab63c62", size = 787958, upload_time = "2025-11-03T21:33:03.379Z" }, + { url = "https://files.pythonhosted.org/packages/99/6a/27d072f7fbf6fadd59c64d210305e1ff865cc3b78b526fd147db768c553b/regex-2025.11.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:669dcfb2e38f9e8c69507bace46f4889e3abbfd9b0c29719202883c0a603598f", size = 859289, upload_time = "2025-11-03T21:33:05.374Z" }, + { url = "https://files.pythonhosted.org/packages/9a/70/1b3878f648e0b6abe023172dacb02157e685564853cc363d9961bcccde4e/regex-2025.11.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:32f74f35ff0f25a5021373ac61442edcb150731fbaa28286bbc8bb1582c89d02", size = 850026, upload_time = "2025-11-03T21:33:07.131Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d5/68e25559b526b8baab8e66839304ede68ff6727237a47727d240006bd0ff/regex-2025.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e6c7a21dffba883234baefe91bc3388e629779582038f75d2a5be918e250f0ed", size = 789499, upload_time = "2025-11-03T21:33:09.141Z" }, + { url = "https://files.pythonhosted.org/packages/fc/df/43971264857140a350910d4e33df725e8c94dd9dee8d2e4729fa0d63d49e/regex-2025.11.3-cp314-cp314-win32.whl", hash = "sha256:795ea137b1d809eb6836b43748b12634291c0ed55ad50a7d72d21edf1cd565c4", size = 271604, upload_time = "2025-11-03T21:33:10.9Z" }, + { url = "https://files.pythonhosted.org/packages/01/6f/9711b57dc6894a55faf80a4c1b5aa4f8649805cb9c7aef46f7d27e2b9206/regex-2025.11.3-cp314-cp314-win_amd64.whl", hash = "sha256:9f95fbaa0ee1610ec0fc6b26668e9917a582ba80c52cc6d9ada15e30aa9ab9ad", size = 280320, upload_time = "2025-11-03T21:33:12.572Z" }, + { url = "https://files.pythonhosted.org/packages/f1/7e/f6eaa207d4377481f5e1775cdeb5a443b5a59b392d0065f3417d31d80f87/regex-2025.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:dfec44d532be4c07088c3de2876130ff0fbeeacaa89a137decbbb5f665855a0f", size = 273372, upload_time = "2025-11-03T21:33:14.219Z" }, + { url = "https://files.pythonhosted.org/packages/c3/06/49b198550ee0f5e4184271cee87ba4dfd9692c91ec55289e6282f0f86ccf/regex-2025.11.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ba0d8a5d7f04f73ee7d01d974d47c5834f8a1b0224390e4fe7c12a3a92a78ecc", size = 491985, upload_time = "2025-11-03T21:33:16.555Z" }, + { url = "https://files.pythonhosted.org/packages/ce/bf/abdafade008f0b1c9da10d934034cb670432d6cf6cbe38bbb53a1cfd6cf8/regex-2025.11.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:442d86cf1cfe4faabf97db7d901ef58347efd004934da045c745e7b5bd57ac49", size = 292669, upload_time = "2025-11-03T21:33:18.32Z" }, + { url = "https://files.pythonhosted.org/packages/f9/ef/0c357bb8edbd2ad8e273fcb9e1761bc37b8acbc6e1be050bebd6475f19c1/regex-2025.11.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fd0a5e563c756de210bb964789b5abe4f114dacae9104a47e1a649b910361536", size = 291030, upload_time = "2025-11-03T21:33:20.048Z" }, + { url = "https://files.pythonhosted.org/packages/79/06/edbb67257596649b8fb088d6aeacbcb248ac195714b18a65e018bf4c0b50/regex-2025.11.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf3490bcbb985a1ae97b2ce9ad1c0f06a852d5b19dde9b07bdf25bf224248c95", size = 807674, upload_time = "2025-11-03T21:33:21.797Z" }, + { url = "https://files.pythonhosted.org/packages/f4/d9/ad4deccfce0ea336296bd087f1a191543bb99ee1c53093dcd4c64d951d00/regex-2025.11.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3809988f0a8b8c9dcc0f92478d6501fac7200b9ec56aecf0ec21f4a2ec4b6009", size = 873451, upload_time = "2025-11-03T21:33:23.741Z" }, + { url = "https://files.pythonhosted.org/packages/13/75/a55a4724c56ef13e3e04acaab29df26582f6978c000ac9cd6810ad1f341f/regex-2025.11.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f4ff94e58e84aedb9c9fce66d4ef9f27a190285b451420f297c9a09f2b9abee9", size = 914980, upload_time = "2025-11-03T21:33:25.999Z" }, + { url = "https://files.pythonhosted.org/packages/67/1e/a1657ee15bd9116f70d4a530c736983eed997b361e20ecd8f5ca3759d5c5/regex-2025.11.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eb542fd347ce61e1321b0a6b945d5701528dca0cd9759c2e3bb8bd57e47964d", size = 812852, upload_time = "2025-11-03T21:33:27.852Z" }, + { url = "https://files.pythonhosted.org/packages/b8/6f/f7516dde5506a588a561d296b2d0044839de06035bb486b326065b4c101e/regex-2025.11.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d6c2d5919075a1f2e413c00b056ea0c2f065b3f5fe83c3d07d325ab92dce51d6", size = 795566, upload_time = "2025-11-03T21:33:32.364Z" }, + { url = "https://files.pythonhosted.org/packages/d9/dd/3d10b9e170cc16fb34cb2cef91513cf3df65f440b3366030631b2984a264/regex-2025.11.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3f8bf11a4827cc7ce5a53d4ef6cddd5ad25595d3c1435ef08f76825851343154", size = 868463, upload_time = "2025-11-03T21:33:34.459Z" }, + { url = "https://files.pythonhosted.org/packages/f5/8e/935e6beff1695aa9085ff83195daccd72acc82c81793df480f34569330de/regex-2025.11.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:22c12d837298651e5550ac1d964e4ff57c3f56965fc1812c90c9fb2028eaf267", size = 854694, upload_time = "2025-11-03T21:33:36.793Z" }, + { url = "https://files.pythonhosted.org/packages/92/12/10650181a040978b2f5720a6a74d44f841371a3d984c2083fc1752e4acf6/regex-2025.11.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ba394a3dda9ad41c7c780f60f6e4a70988741415ae96f6d1bf6c239cf01379", size = 799691, upload_time = "2025-11-03T21:33:39.079Z" }, + { url = "https://files.pythonhosted.org/packages/67/90/8f37138181c9a7690e7e4cb388debbd389342db3c7381d636d2875940752/regex-2025.11.3-cp314-cp314t-win32.whl", hash = "sha256:4bf146dca15cdd53224a1bf46d628bd7590e4a07fbb69e720d561aea43a32b38", size = 274583, upload_time = "2025-11-03T21:33:41.302Z" }, + { url = "https://files.pythonhosted.org/packages/8f/cd/867f5ec442d56beb56f5f854f40abcfc75e11d10b11fdb1869dd39c63aaf/regex-2025.11.3-cp314-cp314t-win_amd64.whl", hash = "sha256:adad1a1bcf1c9e76346e091d22d23ac54ef28e1365117d99521631078dfec9de", size = 284286, upload_time = "2025-11-03T21:33:43.324Z" }, + { url = "https://files.pythonhosted.org/packages/20/31/32c0c4610cbc070362bf1d2e4ea86d1ea29014d400a6d6c2486fcfd57766/regex-2025.11.3-cp314-cp314t-win_arm64.whl", hash = "sha256:c54f768482cef41e219720013cd05933b6f971d9562544d691c68699bf2b6801", size = 274741, upload_time = "2025-11-03T21:33:45.557Z" }, ] [[package]] @@ -2551,18 +2630,18 @@ dependencies = [ { name = "idna" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 } +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload_time = "2025-08-18T20:46:02.573Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 }, + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload_time = "2025-08-18T20:46:00.542Z" }, ] [[package]] name = "retrying" version = "1.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c8/5a/b17e1e257d3e6f2e7758930e1256832c9ddd576f8631781e6a072914befa/retrying-1.4.2.tar.gz", hash = "sha256:d102e75d53d8d30b88562d45361d6c6c934da06fab31bd81c0420acb97a8ba39", size = 11411 } +sdist = { url = "https://files.pythonhosted.org/packages/c8/5a/b17e1e257d3e6f2e7758930e1256832c9ddd576f8631781e6a072914befa/retrying-1.4.2.tar.gz", hash = "sha256:d102e75d53d8d30b88562d45361d6c6c934da06fab31bd81c0420acb97a8ba39", size = 11411, upload_time = "2025-08-03T03:35:25.189Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/f3/6cd296376653270ac1b423bb30bd70942d9916b6978c6f40472d6ac038e7/retrying-1.4.2-py3-none-any.whl", hash = "sha256:bbc004aeb542a74f3569aeddf42a2516efefcdaff90df0eb38fbfbf19f179f59", size = 10859 }, + { url = "https://files.pythonhosted.org/packages/67/f3/6cd296376653270ac1b423bb30bd70942d9916b6978c6f40472d6ac038e7/retrying-1.4.2-py3-none-any.whl", hash = "sha256:bbc004aeb542a74f3569aeddf42a2516efefcdaff90df0eb38fbfbf19f179f59", size = 10859, upload_time = "2025-08-03T03:35:23.829Z" }, ] [[package]] @@ -2573,9 +2652,9 @@ dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990 } +sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload_time = "2025-10-09T14:16:53.064Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393 }, + { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload_time = "2025-10-09T14:16:51.245Z" }, ] [[package]] @@ -2588,87 +2667,112 @@ dependencies = [ { name = "numpy" }, { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e2/c5/9136736c37022a6ad27fea38f3111eb8f02fe75d067f9a985cc358653102/rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04", size = 17400 } +sdist = { url = "https://files.pythonhosted.org/packages/e2/c5/9136736c37022a6ad27fea38f3111eb8f02fe75d067f9a985cc358653102/rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04", size = 17400, upload_time = "2022-07-22T22:46:22.909Z" } [[package]] name = "rpds-py" version = "0.30.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086 }, - { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053 }, - { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763 }, - { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951 }, - { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622 }, - { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492 }, - { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080 }, - { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680 }, - { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589 }, - { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289 }, - { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737 }, - { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120 }, - { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782 }, - { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463 }, - { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868 }, - { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887 }, - { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904 }, - { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945 }, - { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783 }, - { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021 }, - { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589 }, - { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025 }, - { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895 }, - { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799 }, - { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731 }, - { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027 }, - { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020 }, - { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139 }, - { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224 }, - { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645 }, - { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443 }, - { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375 }, - { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850 }, - { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812 }, - { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841 }, - { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149 }, - { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843 }, - { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507 }, - { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949 }, - { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790 }, - { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217 }, - { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806 }, - { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341 }, - { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768 }, - { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099 }, - { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192 }, - { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080 }, - { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841 }, - { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670 }, - { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005 }, - { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112 }, - { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049 }, - { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661 }, - { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606 }, - { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126 }, - { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371 }, - { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298 }, - { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604 }, - { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391 }, - { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868 }, - { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747 }, - { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795 }, - { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330 }, - { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194 }, - { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340 }, - { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765 }, - { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834 }, - { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470 }, - { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630 }, - { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148 }, - { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030 }, - { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570 }, - { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532 }, +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload_time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload_time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload_time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload_time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload_time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload_time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload_time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload_time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload_time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload_time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload_time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload_time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload_time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload_time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload_time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload_time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload_time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload_time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload_time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload_time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload_time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload_time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload_time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload_time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload_time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload_time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload_time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload_time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload_time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload_time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload_time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload_time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload_time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload_time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload_time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload_time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload_time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload_time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload_time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload_time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload_time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload_time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload_time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload_time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload_time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload_time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload_time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload_time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload_time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload_time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload_time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload_time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload_time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload_time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload_time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload_time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload_time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload_time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload_time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload_time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload_time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload_time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload_time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload_time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload_time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload_time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload_time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload_time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload_time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload_time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload_time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload_time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload_time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload_time = "2025-11-30T20:24:14.634Z" }, +] + +[[package]] +name = "ruff" +version = "0.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/39/5cee96809fbca590abea6b46c6d1c586b49663d1d2830a751cc8fc42c666/ruff-0.15.0.tar.gz", hash = "sha256:6bdea47cdbea30d40f8f8d7d69c0854ba7c15420ec75a26f463290949d7f7e9a", size = 4524893, upload_time = "2026-02-03T17:53:35.357Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/88/3fd1b0aa4b6330d6aaa63a285bc96c9f71970351579152d231ed90914586/ruff-0.15.0-py3-none-linux_armv6l.whl", hash = "sha256:aac4ebaa612a82b23d45964586f24ae9bc23ca101919f5590bdb368d74ad5455", size = 10354332, upload_time = "2026-02-03T17:52:54.892Z" }, + { url = "https://files.pythonhosted.org/packages/72/f6/62e173fbb7eb75cc29fe2576a1e20f0a46f671a2587b5f604bfb0eaf5f6f/ruff-0.15.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dcd4be7cc75cfbbca24a98d04d0b9b36a270d0833241f776b788d59f4142b14d", size = 10767189, upload_time = "2026-02-03T17:53:19.778Z" }, + { url = "https://files.pythonhosted.org/packages/99/e4/968ae17b676d1d2ff101d56dc69cf333e3a4c985e1ec23803df84fc7bf9e/ruff-0.15.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d747e3319b2bce179c7c1eaad3d884dc0a199b5f4d5187620530adf9105268ce", size = 10075384, upload_time = "2026-02-03T17:53:29.241Z" }, + { url = "https://files.pythonhosted.org/packages/a2/bf/9843c6044ab9e20af879c751487e61333ca79a2c8c3058b15722386b8cae/ruff-0.15.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:650bd9c56ae03102c51a5e4b554d74d825ff3abe4db22b90fd32d816c2e90621", size = 10481363, upload_time = "2026-02-03T17:52:43.332Z" }, + { url = "https://files.pythonhosted.org/packages/55/d9/4ada5ccf4cd1f532db1c8d44b6f664f2208d3d93acbeec18f82315e15193/ruff-0.15.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6664b7eac559e3048223a2da77769c2f92b43a6dfd4720cef42654299a599c9", size = 10187736, upload_time = "2026-02-03T17:53:00.522Z" }, + { url = "https://files.pythonhosted.org/packages/86/e2/f25eaecd446af7bb132af0a1d5b135a62971a41f5366ff41d06d25e77a91/ruff-0.15.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f811f97b0f092b35320d1556f3353bf238763420ade5d9e62ebd2b73f2ff179", size = 10968415, upload_time = "2026-02-03T17:53:15.705Z" }, + { url = "https://files.pythonhosted.org/packages/e7/dc/f06a8558d06333bf79b497d29a50c3a673d9251214e0d7ec78f90b30aa79/ruff-0.15.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761ec0a66680fab6454236635a39abaf14198818c8cdf691e036f4bc0f406b2d", size = 11809643, upload_time = "2026-02-03T17:53:23.031Z" }, + { url = "https://files.pythonhosted.org/packages/dd/45/0ece8db2c474ad7df13af3a6d50f76e22a09d078af63078f005057ca59eb/ruff-0.15.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:940f11c2604d317e797b289f4f9f3fa5555ffe4fb574b55ed006c3d9b6f0eb78", size = 11234787, upload_time = "2026-02-03T17:52:46.432Z" }, + { url = "https://files.pythonhosted.org/packages/8a/d9/0e3a81467a120fd265658d127db648e4d3acfe3e4f6f5d4ea79fac47e587/ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbca3d40558789126da91d7ef9a7c87772ee107033db7191edefa34e2c7f1b4", size = 11112797, upload_time = "2026-02-03T17:52:49.274Z" }, + { url = "https://files.pythonhosted.org/packages/b2/cb/8c0b3b0c692683f8ff31351dfb6241047fa873a4481a76df4335a8bff716/ruff-0.15.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9a121a96db1d75fa3eb39c4539e607f628920dd72ff1f7c5ee4f1b768ac62d6e", size = 11033133, upload_time = "2026-02-03T17:53:33.105Z" }, + { url = "https://files.pythonhosted.org/packages/f8/5e/23b87370cf0f9081a8c89a753e69a4e8778805b8802ccfe175cc410e50b9/ruff-0.15.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5298d518e493061f2eabd4abd067c7e4fb89e2f63291c94332e35631c07c3662", size = 10442646, upload_time = "2026-02-03T17:53:06.278Z" }, + { url = "https://files.pythonhosted.org/packages/e1/9a/3c94de5ce642830167e6d00b5c75aacd73e6347b4c7fc6828699b150a5ee/ruff-0.15.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afb6e603d6375ff0d6b0cee563fa21ab570fd15e65c852cb24922cef25050cf1", size = 10195750, upload_time = "2026-02-03T17:53:26.084Z" }, + { url = "https://files.pythonhosted.org/packages/30/15/e396325080d600b436acc970848d69df9c13977942fb62bb8722d729bee8/ruff-0.15.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77e515f6b15f828b94dc17d2b4ace334c9ddb7d9468c54b2f9ed2b9c1593ef16", size = 10676120, upload_time = "2026-02-03T17:53:09.363Z" }, + { url = "https://files.pythonhosted.org/packages/8d/c9/229a23d52a2983de1ad0fb0ee37d36e0257e6f28bfd6b498ee2c76361874/ruff-0.15.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6f6e80850a01eb13b3e42ee0ebdf6e4497151b48c35051aab51c101266d187a3", size = 11201636, upload_time = "2026-02-03T17:52:57.281Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b0/69adf22f4e24f3677208adb715c578266842e6e6a3cc77483f48dd999ede/ruff-0.15.0-py3-none-win32.whl", hash = "sha256:238a717ef803e501b6d51e0bdd0d2c6e8513fe9eec14002445134d3907cd46c3", size = 10465945, upload_time = "2026-02-03T17:53:12.591Z" }, + { url = "https://files.pythonhosted.org/packages/51/ad/f813b6e2c97e9b4598be25e94a9147b9af7e60523b0cb5d94d307c15229d/ruff-0.15.0-py3-none-win_amd64.whl", hash = "sha256:dd5e4d3301dc01de614da3cdffc33d4b1b96fb89e45721f1598e5532ccf78b18", size = 11564657, upload_time = "2026-02-03T17:52:51.893Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b0/2d823f6e77ebe560f4e397d078487e8d52c1516b331e3521bc75db4272ca/ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a", size = 10865753, upload_time = "2026-02-03T17:53:03.014Z" }, ] [[package]] @@ -2680,9 +2784,9 @@ dependencies = [ { name = "aiohttp" }, { name = "fsspec" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e4/cd/5dde2fed1699ff48120336249d9857a574e39feb8afaff694568ab1499b3/s3fs-2025.3.0.tar.gz", hash = "sha256:446dd539eb0d0678209723cb7ad1bedbb172185b0d34675b09be1ad81843a644", size = 77153 } +sdist = { url = "https://files.pythonhosted.org/packages/e4/cd/5dde2fed1699ff48120336249d9857a574e39feb8afaff694568ab1499b3/s3fs-2025.3.0.tar.gz", hash = "sha256:446dd539eb0d0678209723cb7ad1bedbb172185b0d34675b09be1ad81843a644", size = 77153, upload_time = "2025-03-07T21:58:32.114Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/3f/35f4041a82a68df89fe4af97c8bb44aa492dad924799cbb02078e9e303e6/s3fs-2025.3.0-py3-none-any.whl", hash = "sha256:88d803615baa04945156ca0e1498009b7acd3132c07198bd81b3e874846e0aa2", size = 30454 }, + { url = "https://files.pythonhosted.org/packages/3a/3f/35f4041a82a68df89fe4af97c8bb44aa492dad924799cbb02078e9e303e6/s3fs-2025.3.0-py3-none-any.whl", hash = "sha256:88d803615baa04945156ca0e1498009b7acd3132c07198bd81b3e874846e0aa2", size = 30454, upload_time = "2025-03-07T21:58:30.998Z" }, ] [[package]] @@ -2692,31 +2796,31 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/74/8d69dcb7a9efe8baa2046891735e5dfe433ad558ae23d9e3c14c633d1d58/s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125", size = 151547 } +sdist = { url = "https://files.pythonhosted.org/packages/62/74/8d69dcb7a9efe8baa2046891735e5dfe433ad558ae23d9e3c14c633d1d58/s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125", size = 151547, upload_time = "2025-09-09T19:23:31.089Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712 }, + { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload_time = "2025-09-09T19:23:30.041Z" }, ] [[package]] name = "safetensors" version = "0.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878 } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload_time = "2025-11-19T15:18:43.199Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781 }, - { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058 }, - { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748 }, - { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881 }, - { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463 }, - { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855 }, - { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152 }, - { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856 }, - { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060 }, - { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715 }, - { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377 }, - { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368 }, - { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423 }, - { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380 }, + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload_time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload_time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload_time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload_time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload_time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload_time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload_time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload_time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload_time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload_time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload_time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload_time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload_time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload_time = "2025-11-19T15:18:44.427Z" }, ] [[package]] @@ -2729,38 +2833,38 @@ dependencies = [ { name = "scipy" }, { name = "threadpoolctl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242 }, - { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075 }, - { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492 }, - { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904 }, - { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359 }, - { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898 }, - { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770 }, - { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458 }, - { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341 }, - { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022 }, - { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409 }, - { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760 }, - { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045 }, - { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324 }, - { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651 }, - { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045 }, - { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994 }, - { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518 }, - { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667 }, - { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524 }, - { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133 }, - { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223 }, - { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518 }, - { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546 }, - { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305 }, - { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257 }, - { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673 }, - { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467 }, - { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395 }, - { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647 }, +sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload_time = "2025-12-10T07:08:53.618Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload_time = "2025-12-10T07:07:51.568Z" }, + { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload_time = "2025-12-10T07:07:53.697Z" }, + { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload_time = "2025-12-10T07:07:55.574Z" }, + { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload_time = "2025-12-10T07:07:57.666Z" }, + { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload_time = "2025-12-10T07:07:59.838Z" }, + { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload_time = "2025-12-10T07:08:01.36Z" }, + { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload_time = "2025-12-10T07:08:03.251Z" }, + { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload_time = "2025-12-10T07:08:05.336Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload_time = "2025-12-10T07:08:07.732Z" }, + { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload_time = "2025-12-10T07:08:09.862Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload_time = "2025-12-10T07:08:12.028Z" }, + { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload_time = "2025-12-10T07:08:13.688Z" }, + { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload_time = "2025-12-10T07:08:15.215Z" }, + { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload_time = "2025-12-10T07:08:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload_time = "2025-12-10T07:08:19.952Z" }, + { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload_time = "2025-12-10T07:08:22.11Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload_time = "2025-12-10T07:08:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload_time = "2025-12-10T07:08:25.71Z" }, + { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload_time = "2025-12-10T07:08:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload_time = "2025-12-10T07:08:29.822Z" }, + { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload_time = "2025-12-10T07:08:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload_time = "2025-12-10T07:08:34.166Z" }, + { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload_time = "2025-12-10T07:08:36.339Z" }, + { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload_time = "2025-12-10T07:08:38.128Z" }, + { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload_time = "2025-12-10T07:08:41.013Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload_time = "2025-12-10T07:08:42.873Z" }, + { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload_time = "2025-12-10T07:08:45.362Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload_time = "2025-12-10T07:08:47.408Z" }, + { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload_time = "2025-12-10T07:08:49.337Z" }, + { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload_time = "2025-12-10T07:08:51.601Z" }, ] [[package]] @@ -2770,94 +2874,94 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/11/7241a63e73ba5a516f1930ac8d5b44cbbfabd35ac73a2d08ca206df007c4/scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57", size = 31364580 }, - { url = "https://files.pythonhosted.org/packages/ed/1d/5057f812d4f6adc91a20a2d6f2ebcdb517fdbc87ae3acc5633c9b97c8ba5/scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e", size = 27969012 }, - { url = "https://files.pythonhosted.org/packages/e3/21/f6ec556c1e3b6ec4e088da667d9987bb77cc3ab3026511f427dc8451187d/scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8", size = 20140691 }, - { url = "https://files.pythonhosted.org/packages/7a/fe/5e5ad04784964ba964a96f16c8d4676aa1b51357199014dce58ab7ec5670/scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306", size = 22463015 }, - { url = "https://files.pythonhosted.org/packages/4a/69/7c347e857224fcaf32a34a05183b9d8a7aca25f8f2d10b8a698b8388561a/scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742", size = 32724197 }, - { url = "https://files.pythonhosted.org/packages/d1/fe/66d73b76d378ba8cc2fe605920c0c75092e3a65ae746e1e767d9d020a75a/scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b", size = 35009148 }, - { url = "https://files.pythonhosted.org/packages/af/07/07dec27d9dc41c18d8c43c69e9e413431d20c53a0339c388bcf72f353c4b/scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d", size = 34798766 }, - { url = "https://files.pythonhosted.org/packages/81/61/0470810c8a093cdacd4ba7504b8a218fd49ca070d79eca23a615f5d9a0b0/scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e", size = 37405953 }, - { url = "https://files.pythonhosted.org/packages/92/ce/672ed546f96d5d41ae78c4b9b02006cedd0b3d6f2bf5bb76ea455c320c28/scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8", size = 36328121 }, - { url = "https://files.pythonhosted.org/packages/9d/21/38165845392cae67b61843a52c6455d47d0cc2a40dd495c89f4362944654/scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b", size = 24314368 }, - { url = "https://files.pythonhosted.org/packages/0c/51/3468fdfd49387ddefee1636f5cf6d03ce603b75205bf439bbf0e62069bfd/scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6", size = 31344101 }, - { url = "https://files.pythonhosted.org/packages/b2/9a/9406aec58268d437636069419e6977af953d1e246df941d42d3720b7277b/scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269", size = 27950385 }, - { url = "https://files.pythonhosted.org/packages/4f/98/e7342709e17afdfd1b26b56ae499ef4939b45a23a00e471dfb5375eea205/scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72", size = 20122115 }, - { url = "https://files.pythonhosted.org/packages/fd/0e/9eeeb5357a64fd157cbe0302c213517c541cc16b8486d82de251f3c68ede/scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61", size = 22442402 }, - { url = "https://files.pythonhosted.org/packages/c9/10/be13397a0e434f98e0c79552b2b584ae5bb1c8b2be95db421533bbca5369/scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6", size = 32696338 }, - { url = "https://files.pythonhosted.org/packages/63/1e/12fbf2a3bb240161651c94bb5cdd0eae5d4e8cc6eaeceb74ab07b12a753d/scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752", size = 34977201 }, - { url = "https://files.pythonhosted.org/packages/19/5b/1a63923e23ccd20bd32156d7dd708af5bbde410daa993aa2500c847ab2d2/scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d", size = 34777384 }, - { url = "https://files.pythonhosted.org/packages/39/22/b5da95d74edcf81e540e467202a988c50fef41bd2011f46e05f72ba07df6/scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea", size = 37379586 }, - { url = "https://files.pythonhosted.org/packages/b9/b6/8ac583d6da79e7b9e520579f03007cb006f063642afd6b2eeb16b890bf93/scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812", size = 36287211 }, - { url = "https://files.pythonhosted.org/packages/55/fb/7db19e0b3e52f882b420417644ec81dd57eeef1bd1705b6f689d8ff93541/scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2", size = 24312646 }, - { url = "https://files.pythonhosted.org/packages/20/b6/7feaa252c21cc7aff335c6c55e1b90ab3e3306da3f048109b8b639b94648/scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3", size = 31693194 }, - { url = "https://files.pythonhosted.org/packages/76/bb/bbb392005abce039fb7e672cb78ac7d158700e826b0515cab6b5b60c26fb/scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97", size = 28365415 }, - { url = "https://files.pythonhosted.org/packages/37/da/9d33196ecc99fba16a409c691ed464a3a283ac454a34a13a3a57c0d66f3a/scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e", size = 20537232 }, - { url = "https://files.pythonhosted.org/packages/56/9d/f4b184f6ddb28e9a5caea36a6f98e8ecd2a524f9127354087ce780885d83/scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07", size = 22791051 }, - { url = "https://files.pythonhosted.org/packages/9b/9d/025cccdd738a72140efc582b1641d0dd4caf2e86c3fb127568dc80444e6e/scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00", size = 32815098 }, - { url = "https://files.pythonhosted.org/packages/48/5f/09b879619f8bca15ce392bfc1894bd9c54377e01d1b3f2f3b595a1b4d945/scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45", size = 35031342 }, - { url = "https://files.pythonhosted.org/packages/f2/9a/f0f0a9f0aa079d2f106555b984ff0fbb11a837df280f04f71f056ea9c6e4/scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209", size = 34893199 }, - { url = "https://files.pythonhosted.org/packages/90/b8/4f0f5cf0c5ea4d7548424e6533e6b17d164f34a6e2fb2e43ffebb6697b06/scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04", size = 37438061 }, - { url = "https://files.pythonhosted.org/packages/f9/cc/2bd59140ed3b2fa2882fb15da0a9cb1b5a6443d67cfd0d98d4cec83a57ec/scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0", size = 36328593 }, - { url = "https://files.pythonhosted.org/packages/13/1b/c87cc44a0d2c7aaf0f003aef2904c3d097b422a96c7e7c07f5efd9073c1b/scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67", size = 24625083 }, - { url = "https://files.pythonhosted.org/packages/1a/2d/51006cd369b8e7879e1c630999a19d1fbf6f8b5ed3e33374f29dc87e53b3/scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a", size = 31346803 }, - { url = "https://files.pythonhosted.org/packages/d6/2e/2349458c3ce445f53a6c93d4386b1c4c5c0c540917304c01222ff95ff317/scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2", size = 27967182 }, - { url = "https://files.pythonhosted.org/packages/5e/7c/df525fbfa77b878d1cfe625249529514dc02f4fd5f45f0f6295676a76528/scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467", size = 20139125 }, - { url = "https://files.pythonhosted.org/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e", size = 22443554 }, - { url = "https://files.pythonhosted.org/packages/80/5c/ea5d239cda2dd3d31399424967a24d556cf409fbea7b5b21412b0fd0a44f/scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67", size = 32757834 }, - { url = "https://files.pythonhosted.org/packages/b8/7e/8c917cc573310e5dc91cbeead76f1b600d3fb17cf0969db02c9cf92e3cfa/scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73", size = 34995775 }, - { url = "https://files.pythonhosted.org/packages/c5/43/176c0c3c07b3f7df324e7cdd933d3e2c4898ca202b090bd5ba122f9fe270/scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b", size = 34841240 }, - { url = "https://files.pythonhosted.org/packages/44/8c/d1f5f4b491160592e7f084d997de53a8e896a3ac01cd07e59f43ca222744/scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b", size = 37394463 }, - { url = "https://files.pythonhosted.org/packages/9f/ec/42a6657f8d2d087e750e9a5dde0b481fd135657f09eaf1cf5688bb23c338/scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061", size = 37053015 }, - { url = "https://files.pythonhosted.org/packages/27/58/6b89a6afd132787d89a362d443a7bddd511b8f41336a1ae47f9e4f000dc4/scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb", size = 24951312 }, - { url = "https://files.pythonhosted.org/packages/e9/01/f58916b9d9ae0112b86d7c3b10b9e685625ce6e8248df139d0fcb17f7397/scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1", size = 31706502 }, - { url = "https://files.pythonhosted.org/packages/59/8e/2912a87f94a7d1f8b38aabc0faf74b82d3b6c9e22be991c49979f0eceed8/scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1", size = 28380854 }, - { url = "https://files.pythonhosted.org/packages/bd/1c/874137a52dddab7d5d595c1887089a2125d27d0601fce8c0026a24a92a0b/scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232", size = 20552752 }, - { url = "https://files.pythonhosted.org/packages/3f/f0/7518d171cb735f6400f4576cf70f756d5b419a07fe1867da34e2c2c9c11b/scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d", size = 22803972 }, - { url = "https://files.pythonhosted.org/packages/7c/74/3498563a2c619e8a3ebb4d75457486c249b19b5b04a30600dfd9af06bea5/scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba", size = 32829770 }, - { url = "https://files.pythonhosted.org/packages/48/d1/7b50cedd8c6c9d6f706b4b36fa8544d829c712a75e370f763b318e9638c1/scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db", size = 35051093 }, - { url = "https://files.pythonhosted.org/packages/e2/82/a2d684dfddb87ba1b3ea325df7c3293496ee9accb3a19abe9429bce94755/scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf", size = 34909905 }, - { url = "https://files.pythonhosted.org/packages/ef/5e/e565bd73991d42023eb82bb99e51c5b3d9e2c588ca9d4b3e2cc1d3ca62a6/scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f", size = 37457743 }, - { url = "https://files.pythonhosted.org/packages/58/a8/a66a75c3d8f1fb2b83f66007d6455a06a6f6cf5618c3dc35bc9b69dd096e/scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088", size = 37098574 }, - { url = "https://files.pythonhosted.org/packages/56/a5/df8f46ef7da168f1bc52cd86e09a9de5c6f19cc1da04454d51b7d4f43408/scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff", size = 25246266 }, +sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload_time = "2026-01-10T21:34:23.009Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/11/7241a63e73ba5a516f1930ac8d5b44cbbfabd35ac73a2d08ca206df007c4/scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57", size = 31364580, upload_time = "2026-01-10T21:25:25.717Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1d/5057f812d4f6adc91a20a2d6f2ebcdb517fdbc87ae3acc5633c9b97c8ba5/scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e", size = 27969012, upload_time = "2026-01-10T21:25:30.921Z" }, + { url = "https://files.pythonhosted.org/packages/e3/21/f6ec556c1e3b6ec4e088da667d9987bb77cc3ab3026511f427dc8451187d/scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8", size = 20140691, upload_time = "2026-01-10T21:25:34.802Z" }, + { url = "https://files.pythonhosted.org/packages/7a/fe/5e5ad04784964ba964a96f16c8d4676aa1b51357199014dce58ab7ec5670/scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306", size = 22463015, upload_time = "2026-01-10T21:25:39.277Z" }, + { url = "https://files.pythonhosted.org/packages/4a/69/7c347e857224fcaf32a34a05183b9d8a7aca25f8f2d10b8a698b8388561a/scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742", size = 32724197, upload_time = "2026-01-10T21:25:44.084Z" }, + { url = "https://files.pythonhosted.org/packages/d1/fe/66d73b76d378ba8cc2fe605920c0c75092e3a65ae746e1e767d9d020a75a/scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b", size = 35009148, upload_time = "2026-01-10T21:25:50.591Z" }, + { url = "https://files.pythonhosted.org/packages/af/07/07dec27d9dc41c18d8c43c69e9e413431d20c53a0339c388bcf72f353c4b/scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d", size = 34798766, upload_time = "2026-01-10T21:25:59.41Z" }, + { url = "https://files.pythonhosted.org/packages/81/61/0470810c8a093cdacd4ba7504b8a218fd49ca070d79eca23a615f5d9a0b0/scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e", size = 37405953, upload_time = "2026-01-10T21:26:07.75Z" }, + { url = "https://files.pythonhosted.org/packages/92/ce/672ed546f96d5d41ae78c4b9b02006cedd0b3d6f2bf5bb76ea455c320c28/scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8", size = 36328121, upload_time = "2026-01-10T21:26:16.509Z" }, + { url = "https://files.pythonhosted.org/packages/9d/21/38165845392cae67b61843a52c6455d47d0cc2a40dd495c89f4362944654/scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b", size = 24314368, upload_time = "2026-01-10T21:26:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/0c/51/3468fdfd49387ddefee1636f5cf6d03ce603b75205bf439bbf0e62069bfd/scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6", size = 31344101, upload_time = "2026-01-10T21:26:30.25Z" }, + { url = "https://files.pythonhosted.org/packages/b2/9a/9406aec58268d437636069419e6977af953d1e246df941d42d3720b7277b/scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269", size = 27950385, upload_time = "2026-01-10T21:26:36.801Z" }, + { url = "https://files.pythonhosted.org/packages/4f/98/e7342709e17afdfd1b26b56ae499ef4939b45a23a00e471dfb5375eea205/scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72", size = 20122115, upload_time = "2026-01-10T21:26:42.107Z" }, + { url = "https://files.pythonhosted.org/packages/fd/0e/9eeeb5357a64fd157cbe0302c213517c541cc16b8486d82de251f3c68ede/scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61", size = 22442402, upload_time = "2026-01-10T21:26:48.029Z" }, + { url = "https://files.pythonhosted.org/packages/c9/10/be13397a0e434f98e0c79552b2b584ae5bb1c8b2be95db421533bbca5369/scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6", size = 32696338, upload_time = "2026-01-10T21:26:55.521Z" }, + { url = "https://files.pythonhosted.org/packages/63/1e/12fbf2a3bb240161651c94bb5cdd0eae5d4e8cc6eaeceb74ab07b12a753d/scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752", size = 34977201, upload_time = "2026-01-10T21:27:03.501Z" }, + { url = "https://files.pythonhosted.org/packages/19/5b/1a63923e23ccd20bd32156d7dd708af5bbde410daa993aa2500c847ab2d2/scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d", size = 34777384, upload_time = "2026-01-10T21:27:11.423Z" }, + { url = "https://files.pythonhosted.org/packages/39/22/b5da95d74edcf81e540e467202a988c50fef41bd2011f46e05f72ba07df6/scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea", size = 37379586, upload_time = "2026-01-10T21:27:20.171Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b6/8ac583d6da79e7b9e520579f03007cb006f063642afd6b2eeb16b890bf93/scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812", size = 36287211, upload_time = "2026-01-10T21:28:43.122Z" }, + { url = "https://files.pythonhosted.org/packages/55/fb/7db19e0b3e52f882b420417644ec81dd57eeef1bd1705b6f689d8ff93541/scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2", size = 24312646, upload_time = "2026-01-10T21:28:49.893Z" }, + { url = "https://files.pythonhosted.org/packages/20/b6/7feaa252c21cc7aff335c6c55e1b90ab3e3306da3f048109b8b639b94648/scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3", size = 31693194, upload_time = "2026-01-10T21:27:27.454Z" }, + { url = "https://files.pythonhosted.org/packages/76/bb/bbb392005abce039fb7e672cb78ac7d158700e826b0515cab6b5b60c26fb/scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97", size = 28365415, upload_time = "2026-01-10T21:27:34.26Z" }, + { url = "https://files.pythonhosted.org/packages/37/da/9d33196ecc99fba16a409c691ed464a3a283ac454a34a13a3a57c0d66f3a/scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e", size = 20537232, upload_time = "2026-01-10T21:27:40.306Z" }, + { url = "https://files.pythonhosted.org/packages/56/9d/f4b184f6ddb28e9a5caea36a6f98e8ecd2a524f9127354087ce780885d83/scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07", size = 22791051, upload_time = "2026-01-10T21:27:46.539Z" }, + { url = "https://files.pythonhosted.org/packages/9b/9d/025cccdd738a72140efc582b1641d0dd4caf2e86c3fb127568dc80444e6e/scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00", size = 32815098, upload_time = "2026-01-10T21:27:54.389Z" }, + { url = "https://files.pythonhosted.org/packages/48/5f/09b879619f8bca15ce392bfc1894bd9c54377e01d1b3f2f3b595a1b4d945/scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45", size = 35031342, upload_time = "2026-01-10T21:28:03.012Z" }, + { url = "https://files.pythonhosted.org/packages/f2/9a/f0f0a9f0aa079d2f106555b984ff0fbb11a837df280f04f71f056ea9c6e4/scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209", size = 34893199, upload_time = "2026-01-10T21:28:10.832Z" }, + { url = "https://files.pythonhosted.org/packages/90/b8/4f0f5cf0c5ea4d7548424e6533e6b17d164f34a6e2fb2e43ffebb6697b06/scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04", size = 37438061, upload_time = "2026-01-10T21:28:19.684Z" }, + { url = "https://files.pythonhosted.org/packages/f9/cc/2bd59140ed3b2fa2882fb15da0a9cb1b5a6443d67cfd0d98d4cec83a57ec/scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0", size = 36328593, upload_time = "2026-01-10T21:28:28.007Z" }, + { url = "https://files.pythonhosted.org/packages/13/1b/c87cc44a0d2c7aaf0f003aef2904c3d097b422a96c7e7c07f5efd9073c1b/scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67", size = 24625083, upload_time = "2026-01-10T21:28:35.188Z" }, + { url = "https://files.pythonhosted.org/packages/1a/2d/51006cd369b8e7879e1c630999a19d1fbf6f8b5ed3e33374f29dc87e53b3/scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a", size = 31346803, upload_time = "2026-01-10T21:28:57.24Z" }, + { url = "https://files.pythonhosted.org/packages/d6/2e/2349458c3ce445f53a6c93d4386b1c4c5c0c540917304c01222ff95ff317/scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2", size = 27967182, upload_time = "2026-01-10T21:29:04.107Z" }, + { url = "https://files.pythonhosted.org/packages/5e/7c/df525fbfa77b878d1cfe625249529514dc02f4fd5f45f0f6295676a76528/scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467", size = 20139125, upload_time = "2026-01-10T21:29:10.179Z" }, + { url = "https://files.pythonhosted.org/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e", size = 22443554, upload_time = "2026-01-10T21:29:15.888Z" }, + { url = "https://files.pythonhosted.org/packages/80/5c/ea5d239cda2dd3d31399424967a24d556cf409fbea7b5b21412b0fd0a44f/scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67", size = 32757834, upload_time = "2026-01-10T21:29:23.406Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7e/8c917cc573310e5dc91cbeead76f1b600d3fb17cf0969db02c9cf92e3cfa/scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73", size = 34995775, upload_time = "2026-01-10T21:29:31.915Z" }, + { url = "https://files.pythonhosted.org/packages/c5/43/176c0c3c07b3f7df324e7cdd933d3e2c4898ca202b090bd5ba122f9fe270/scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b", size = 34841240, upload_time = "2026-01-10T21:29:39.995Z" }, + { url = "https://files.pythonhosted.org/packages/44/8c/d1f5f4b491160592e7f084d997de53a8e896a3ac01cd07e59f43ca222744/scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b", size = 37394463, upload_time = "2026-01-10T21:29:48.723Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ec/42a6657f8d2d087e750e9a5dde0b481fd135657f09eaf1cf5688bb23c338/scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061", size = 37053015, upload_time = "2026-01-10T21:30:51.418Z" }, + { url = "https://files.pythonhosted.org/packages/27/58/6b89a6afd132787d89a362d443a7bddd511b8f41336a1ae47f9e4f000dc4/scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb", size = 24951312, upload_time = "2026-01-10T21:30:56.771Z" }, + { url = "https://files.pythonhosted.org/packages/e9/01/f58916b9d9ae0112b86d7c3b10b9e685625ce6e8248df139d0fcb17f7397/scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1", size = 31706502, upload_time = "2026-01-10T21:29:56.326Z" }, + { url = "https://files.pythonhosted.org/packages/59/8e/2912a87f94a7d1f8b38aabc0faf74b82d3b6c9e22be991c49979f0eceed8/scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1", size = 28380854, upload_time = "2026-01-10T21:30:01.554Z" }, + { url = "https://files.pythonhosted.org/packages/bd/1c/874137a52dddab7d5d595c1887089a2125d27d0601fce8c0026a24a92a0b/scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232", size = 20552752, upload_time = "2026-01-10T21:30:05.93Z" }, + { url = "https://files.pythonhosted.org/packages/3f/f0/7518d171cb735f6400f4576cf70f756d5b419a07fe1867da34e2c2c9c11b/scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d", size = 22803972, upload_time = "2026-01-10T21:30:10.651Z" }, + { url = "https://files.pythonhosted.org/packages/7c/74/3498563a2c619e8a3ebb4d75457486c249b19b5b04a30600dfd9af06bea5/scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba", size = 32829770, upload_time = "2026-01-10T21:30:16.359Z" }, + { url = "https://files.pythonhosted.org/packages/48/d1/7b50cedd8c6c9d6f706b4b36fa8544d829c712a75e370f763b318e9638c1/scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db", size = 35051093, upload_time = "2026-01-10T21:30:22.987Z" }, + { url = "https://files.pythonhosted.org/packages/e2/82/a2d684dfddb87ba1b3ea325df7c3293496ee9accb3a19abe9429bce94755/scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf", size = 34909905, upload_time = "2026-01-10T21:30:28.704Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5e/e565bd73991d42023eb82bb99e51c5b3d9e2c588ca9d4b3e2cc1d3ca62a6/scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f", size = 37457743, upload_time = "2026-01-10T21:30:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/58/a8/a66a75c3d8f1fb2b83f66007d6455a06a6f6cf5618c3dc35bc9b69dd096e/scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088", size = 37098574, upload_time = "2026-01-10T21:30:40.782Z" }, + { url = "https://files.pythonhosted.org/packages/56/a5/df8f46ef7da168f1bc52cd86e09a9de5c6f19cc1da04454d51b7d4f43408/scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff", size = 25246266, upload_time = "2026-01-10T21:30:45.923Z" }, ] [[package]] name = "semver" version = "3.0.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/d1/d3159231aec234a59dd7d601e9dd9fe96f3afff15efd33c1070019b26132/semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602", size = 269730 } +sdist = { url = "https://files.pythonhosted.org/packages/72/d1/d3159231aec234a59dd7d601e9dd9fe96f3afff15efd33c1070019b26132/semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602", size = 269730, upload_time = "2025-01-24T13:19:27.617Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/24/4d91e05817e92e3a61c8a21e08fd0f390f5301f1c448b137c57c4bc6e543/semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746", size = 17912 }, + { url = "https://files.pythonhosted.org/packages/a6/24/4d91e05817e92e3a61c8a21e08fd0f390f5301f1c448b137c57c4bc6e543/semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746", size = 17912, upload_time = "2025-01-24T13:19:24.949Z" }, ] [[package]] name = "setuptools" version = "80.10.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/76/95/faf61eb8363f26aa7e1d762267a8d602a1b26d4f3a1e758e92cb3cb8b054/setuptools-80.10.2.tar.gz", hash = "sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70", size = 1200343 } +sdist = { url = "https://files.pythonhosted.org/packages/76/95/faf61eb8363f26aa7e1d762267a8d602a1b26d4f3a1e758e92cb3cb8b054/setuptools-80.10.2.tar.gz", hash = "sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70", size = 1200343, upload_time = "2026-01-25T22:38:17.252Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234 }, + { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload_time = "2026-01-25T22:38:15.216Z" }, ] [[package]] name = "shortuuid" version = "1.0.13" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/e2/bcf761f3bff95856203f9559baf3741c416071dd200c0fc19fad7f078f86/shortuuid-1.0.13.tar.gz", hash = "sha256:3bb9cf07f606260584b1df46399c0b87dd84773e7b25912b7e391e30797c5e72", size = 9662 } +sdist = { url = "https://files.pythonhosted.org/packages/8c/e2/bcf761f3bff95856203f9559baf3741c416071dd200c0fc19fad7f078f86/shortuuid-1.0.13.tar.gz", hash = "sha256:3bb9cf07f606260584b1df46399c0b87dd84773e7b25912b7e391e30797c5e72", size = 9662, upload_time = "2024-03-11T20:11:06.879Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/44/21d6bf170bf40b41396480d8d49ad640bca3f2b02139cd52aa1e272830a5/shortuuid-1.0.13-py3-none-any.whl", hash = "sha256:a482a497300b49b4953e15108a7913244e1bb0d41f9d332f5e9925dba33a3c5a", size = 10529 }, + { url = "https://files.pythonhosted.org/packages/c0/44/21d6bf170bf40b41396480d8d49ad640bca3f2b02139cd52aa1e272830a5/shortuuid-1.0.13-py3-none-any.whl", hash = "sha256:a482a497300b49b4953e15108a7913244e1bb0d41f9d332f5e9925dba33a3c5a", size = 10529, upload_time = "2024-03-11T20:11:04.807Z" }, ] [[package]] name = "six" version = "1.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload_time = "2024-12-04T17:35:28.174Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload_time = "2024-12-04T17:35:26.475Z" }, ] [[package]] @@ -2867,27 +2971,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/67/9a/0a7acb748b86e2922982366d780ca4b16c33f7246fa5860d26005c97e4f3/smart_open-7.5.0.tar.gz", hash = "sha256:f394b143851d8091011832ac8113ea4aba6b92e6c35f6e677ddaaccb169d7cb9", size = 53920 } +sdist = { url = "https://files.pythonhosted.org/packages/67/9a/0a7acb748b86e2922982366d780ca4b16c33f7246fa5860d26005c97e4f3/smart_open-7.5.0.tar.gz", hash = "sha256:f394b143851d8091011832ac8113ea4aba6b92e6c35f6e677ddaaccb169d7cb9", size = 53920, upload_time = "2025-11-08T21:38:40.698Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940 }, + { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload_time = "2025-11-08T21:38:39.024Z" }, ] [[package]] name = "sniffio" version = "1.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload_time = "2024-02-25T23:20:04.057Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload_time = "2024-02-25T23:20:01.196Z" }, ] [[package]] name = "soupsieve" version = "2.8.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/23/adf3796d740536d63a6fbda113d07e60c734b6ed5d3058d1e47fc0495e47/soupsieve-2.8.1.tar.gz", hash = "sha256:4cf733bc50fa805f5df4b8ef4740fc0e0fa6218cf3006269afd3f9d6d80fd350", size = 117856 } +sdist = { url = "https://files.pythonhosted.org/packages/89/23/adf3796d740536d63a6fbda113d07e60c734b6ed5d3058d1e47fc0495e47/soupsieve-2.8.1.tar.gz", hash = "sha256:4cf733bc50fa805f5df4b8ef4740fc0e0fa6218cf3006269afd3f9d6d80fd350", size = 117856, upload_time = "2025-12-18T13:50:34.655Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/f3/b67d6ea49ca9154453b6d70b34ea22f3996b9fa55da105a79d8732227adc/soupsieve-2.8.1-py3-none-any.whl", hash = "sha256:a11fe2a6f3d76ab3cf2de04eb339c1be5b506a8a47f2ceb6d139803177f85434", size = 36710 }, + { url = "https://files.pythonhosted.org/packages/48/f3/b67d6ea49ca9154453b6d70b34ea22f3996b9fa55da105a79d8732227adc/soupsieve-2.8.1-py3-none-any.whl", hash = "sha256:a11fe2a6f3d76ab3cf2de04eb339c1be5b506a8a47f2ceb6d139803177f85434", size = 36710, upload_time = "2025-12-18T13:50:33.267Z" }, ] [[package]] @@ -2914,57 +3018,57 @@ dependencies = [ { name = "wasabi" }, { name = "weasel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/59/9f/424244b0e2656afc9ff82fb7a96931a47397bfce5ba382213827b198312a/spacy-3.8.11.tar.gz", hash = "sha256:54e1e87b74a2f9ea807ffd606166bf29ac45e2bd81ff7f608eadc7b05787d90d", size = 1326804 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/51/fb/01eadf4ba70606b3054702dc41fc2ccf7d70fb14514b3cd57f0ff78ebea8/spacy-3.8.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:aa1ee8362074c30098feaaf2dd888c829a1a79c4311eec1b117a0a61f16fa6dd", size = 6073726 }, - { url = "https://files.pythonhosted.org/packages/3a/f8/07b03a2997fc2621aaeafae00af50f55522304a7da6926b07027bb6d0709/spacy-3.8.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:75a036d04c2cf11d6cb566c0a689860cc5a7a75b439e8fea1b3a6b673dabf25d", size = 5724702 }, - { url = "https://files.pythonhosted.org/packages/13/0c/c4fa0f379dbe3258c305d2e2df3760604a9fcd71b34f8f65c23e43f4cf55/spacy-3.8.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cb599d2747d4a59a5f90e8a453c149b13db382a8297925cf126333141dbc4f7", size = 32727774 }, - { url = "https://files.pythonhosted.org/packages/ce/8e/6a4ba82bed480211ebdf5341b0f89e7271b454307525ac91b5e447825914/spacy-3.8.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:94632e302ad2fb79dc285bf1e9e4d4a178904d5c67049e0e02b7fb4a77af85c4", size = 33215053 }, - { url = "https://files.pythonhosted.org/packages/a6/bc/44d863d248e9d7358c76a0aa8b3f196b8698df520650ed8de162e18fbffb/spacy-3.8.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aeca6cf34009d48cda9fb1bbfb532469e3d643817241a73e367b34ab99a5806f", size = 32074195 }, - { url = "https://files.pythonhosted.org/packages/6f/7d/0b115f3f16e1dd2d3f99b0f89497867fc11c41aed94f4b7a4367b4b54136/spacy-3.8.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:368a79b8df925b15d89dccb5e502039446fb2ce93cf3020e092d5b962c3349b9", size = 32996143 }, - { url = "https://files.pythonhosted.org/packages/7d/48/7e9581b476df76aaf9ee182888d15322e77c38b0bbbd5e80160ba0bddd4c/spacy-3.8.11-cp312-cp312-win_amd64.whl", hash = "sha256:88d65941a87f58d75afca1785bd64d01183a92f7269dcbcf28bd9d6f6a77d1a7", size = 14217511 }, - { url = "https://files.pythonhosted.org/packages/7b/1f/307a16f32f90aa5ee7ad8d29ff8620a57132b80a4c8c536963d46d192e1a/spacy-3.8.11-cp312-cp312-win_arm64.whl", hash = "sha256:97b865d6d3658e2ab103a67d6c8a2d678e193e84a07f40d9938565b669ceee39", size = 13614446 }, - { url = "https://files.pythonhosted.org/packages/ed/5c/3f07cff8bc478fcf48a915ca9fe8637486a1ec676587ed3e6fd775423301/spacy-3.8.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea4adeb399636059925be085c5bb852c1f3a2ebe1c2060332cbad6257d223bbc", size = 6051355 }, - { url = "https://files.pythonhosted.org/packages/6d/44/4671e8098b62befec69c7848538a0824086559f74065284bbd57a5747781/spacy-3.8.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dd785e6bd85a58fa037da0c18fcd7250e2daecdfc30464d3882912529d1ad588", size = 5700468 }, - { url = "https://files.pythonhosted.org/packages/0c/98/5708bdfb39f94af0655568e14d953886117e18bd04c3aa3ab5ff1a60ea89/spacy-3.8.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:598c177054eb6196deed03cac6fb7a3229f4789719ad0c9f7483f9491e375749", size = 32521877 }, - { url = "https://files.pythonhosted.org/packages/c6/1f/731beb48f2c7415a71e2f655876fea8a0b3a6798be3d4d51b794f939623d/spacy-3.8.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a5a449ed3f2d03399481870b776f3ec61f2b831812d63dc1acedf6da70e5ab03", size = 32848355 }, - { url = "https://files.pythonhosted.org/packages/47/6b/f3d131d3f9bb1c7de4f355a12adcd0a5fa77f9f624711ddd0f19c517e88b/spacy-3.8.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a6c35c2cb93bade9b7360d1f9db608a066246a41301bb579309efb50764ba55b", size = 31764944 }, - { url = "https://files.pythonhosted.org/packages/72/bf/37ea8134667a4f2787b5f0e0146f2e8df1fb36ab67d598ad06eb5ed2e7db/spacy-3.8.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0156ae575b20290021573faa1fed8a82b11314e9a1c28f034713359a5240a325", size = 32718517 }, - { url = "https://files.pythonhosted.org/packages/79/fe/436435dfa93cc355ed511f21cf3cda5302b7aa29716457317eb07f1cf2da/spacy-3.8.11-cp313-cp313-win_amd64.whl", hash = "sha256:6f39cf36f86bd6a8882076f86ca80f246c73aa41d7ebc8679fbbe41b6f8ec045", size = 14211913 }, - { url = "https://files.pythonhosted.org/packages/c8/23/f89cfa51f54aa5e9c6c7a37f8bf4952d678f0902a5e1d81dfda33a94bfb2/spacy-3.8.11-cp313-cp313-win_arm64.whl", hash = "sha256:9a7151eee0814a5ced36642b42b1ecc8f98ac7225f3e378fb9f862ffbe84b8bf", size = 13605169 }, - { url = "https://files.pythonhosted.org/packages/d7/78/ddeb09116b593f3cccc7eb489a713433076b11cf8cdfb98aec641b73a2c2/spacy-3.8.11-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:43c24d19a3f85bde0872935294a31fd9b3a6db3f92bb2b75074177cd3acec03f", size = 6067734 }, - { url = "https://files.pythonhosted.org/packages/65/bb/1bb630250dc70e00fa3821879c6e2cb65c19425aba38840d3484061285c1/spacy-3.8.11-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b6158c21da57b8373d2d1afb2b73977c4bc4235d2563e7788d44367fc384939a", size = 5732963 }, - { url = "https://files.pythonhosted.org/packages/7a/56/c58071b3db23932ab2b934af3462a958e7edf472da9668e4869fe2a2199e/spacy-3.8.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1c0bd1bde1d91f1d7a44774ca4ca3fcf064946b72599a8eb34c25e014362ace1", size = 32447290 }, - { url = "https://files.pythonhosted.org/packages/34/eb/d3947efa2b46848372e89ced8371671d77219612a3eebef15db5690aa4d2/spacy-3.8.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:99b767c41a772e544cf2d48e0808764f42f17eb2fd6188db4a729922ff7f0c1e", size = 32488011 }, - { url = "https://files.pythonhosted.org/packages/04/9e/8c6c01558b62388557247e553e48874f52637a5648b957ed01fbd628391d/spacy-3.8.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a3c500f04c164e4366a1163a61bf39fd50f0c63abdb1fc17991281ec52a54ab4", size = 31731340 }, - { url = "https://files.pythonhosted.org/packages/23/1f/21812ec34b187ef6ba223389760dfea09bbe27d2b84b553c5205576b4ac2/spacy-3.8.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a2bfe45c0c1530eaabc68f5434c52b1be8df10d5c195c54d4dc2e70cea97dc65", size = 32478557 }, - { url = "https://files.pythonhosted.org/packages/f3/16/a0c9174a232dfe7b48281c05364957e2c6d0f80ef26b67ce8d28a49c2d91/spacy-3.8.11-cp314-cp314-win_amd64.whl", hash = "sha256:45d0bbc8442d18dcea9257be0d1ab26e884067e038b1fa133405bf2f20c74edf", size = 14396041 }, - { url = "https://files.pythonhosted.org/packages/aa/d0/a6aad5b73d523e4686474b0cfcf46f37f3d7a18765be5c1f56c1dcee4c18/spacy-3.8.11-cp314-cp314-win_arm64.whl", hash = "sha256:90a12961ecc44e0195fd42db9f0ce4aade17e6fe03f8ab98d4549911d9e6f992", size = 13823760 }, +sdist = { url = "https://files.pythonhosted.org/packages/59/9f/424244b0e2656afc9ff82fb7a96931a47397bfce5ba382213827b198312a/spacy-3.8.11.tar.gz", hash = "sha256:54e1e87b74a2f9ea807ffd606166bf29ac45e2bd81ff7f608eadc7b05787d90d", size = 1326804, upload_time = "2025-11-17T20:40:03.079Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/fb/01eadf4ba70606b3054702dc41fc2ccf7d70fb14514b3cd57f0ff78ebea8/spacy-3.8.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:aa1ee8362074c30098feaaf2dd888c829a1a79c4311eec1b117a0a61f16fa6dd", size = 6073726, upload_time = "2025-11-17T20:39:01.679Z" }, + { url = "https://files.pythonhosted.org/packages/3a/f8/07b03a2997fc2621aaeafae00af50f55522304a7da6926b07027bb6d0709/spacy-3.8.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:75a036d04c2cf11d6cb566c0a689860cc5a7a75b439e8fea1b3a6b673dabf25d", size = 5724702, upload_time = "2025-11-17T20:39:03.486Z" }, + { url = "https://files.pythonhosted.org/packages/13/0c/c4fa0f379dbe3258c305d2e2df3760604a9fcd71b34f8f65c23e43f4cf55/spacy-3.8.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cb599d2747d4a59a5f90e8a453c149b13db382a8297925cf126333141dbc4f7", size = 32727774, upload_time = "2025-11-17T20:39:05.894Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8e/6a4ba82bed480211ebdf5341b0f89e7271b454307525ac91b5e447825914/spacy-3.8.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:94632e302ad2fb79dc285bf1e9e4d4a178904d5c67049e0e02b7fb4a77af85c4", size = 33215053, upload_time = "2025-11-17T20:39:08.588Z" }, + { url = "https://files.pythonhosted.org/packages/a6/bc/44d863d248e9d7358c76a0aa8b3f196b8698df520650ed8de162e18fbffb/spacy-3.8.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aeca6cf34009d48cda9fb1bbfb532469e3d643817241a73e367b34ab99a5806f", size = 32074195, upload_time = "2025-11-17T20:39:11.601Z" }, + { url = "https://files.pythonhosted.org/packages/6f/7d/0b115f3f16e1dd2d3f99b0f89497867fc11c41aed94f4b7a4367b4b54136/spacy-3.8.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:368a79b8df925b15d89dccb5e502039446fb2ce93cf3020e092d5b962c3349b9", size = 32996143, upload_time = "2025-11-17T20:39:14.705Z" }, + { url = "https://files.pythonhosted.org/packages/7d/48/7e9581b476df76aaf9ee182888d15322e77c38b0bbbd5e80160ba0bddd4c/spacy-3.8.11-cp312-cp312-win_amd64.whl", hash = "sha256:88d65941a87f58d75afca1785bd64d01183a92f7269dcbcf28bd9d6f6a77d1a7", size = 14217511, upload_time = "2025-11-17T20:39:17.316Z" }, + { url = "https://files.pythonhosted.org/packages/7b/1f/307a16f32f90aa5ee7ad8d29ff8620a57132b80a4c8c536963d46d192e1a/spacy-3.8.11-cp312-cp312-win_arm64.whl", hash = "sha256:97b865d6d3658e2ab103a67d6c8a2d678e193e84a07f40d9938565b669ceee39", size = 13614446, upload_time = "2025-11-17T20:39:19.748Z" }, + { url = "https://files.pythonhosted.org/packages/ed/5c/3f07cff8bc478fcf48a915ca9fe8637486a1ec676587ed3e6fd775423301/spacy-3.8.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea4adeb399636059925be085c5bb852c1f3a2ebe1c2060332cbad6257d223bbc", size = 6051355, upload_time = "2025-11-17T20:39:22.243Z" }, + { url = "https://files.pythonhosted.org/packages/6d/44/4671e8098b62befec69c7848538a0824086559f74065284bbd57a5747781/spacy-3.8.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dd785e6bd85a58fa037da0c18fcd7250e2daecdfc30464d3882912529d1ad588", size = 5700468, upload_time = "2025-11-17T20:39:23.87Z" }, + { url = "https://files.pythonhosted.org/packages/0c/98/5708bdfb39f94af0655568e14d953886117e18bd04c3aa3ab5ff1a60ea89/spacy-3.8.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:598c177054eb6196deed03cac6fb7a3229f4789719ad0c9f7483f9491e375749", size = 32521877, upload_time = "2025-11-17T20:39:26.291Z" }, + { url = "https://files.pythonhosted.org/packages/c6/1f/731beb48f2c7415a71e2f655876fea8a0b3a6798be3d4d51b794f939623d/spacy-3.8.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a5a449ed3f2d03399481870b776f3ec61f2b831812d63dc1acedf6da70e5ab03", size = 32848355, upload_time = "2025-11-17T20:39:28.971Z" }, + { url = "https://files.pythonhosted.org/packages/47/6b/f3d131d3f9bb1c7de4f355a12adcd0a5fa77f9f624711ddd0f19c517e88b/spacy-3.8.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a6c35c2cb93bade9b7360d1f9db608a066246a41301bb579309efb50764ba55b", size = 31764944, upload_time = "2025-11-17T20:39:31.788Z" }, + { url = "https://files.pythonhosted.org/packages/72/bf/37ea8134667a4f2787b5f0e0146f2e8df1fb36ab67d598ad06eb5ed2e7db/spacy-3.8.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0156ae575b20290021573faa1fed8a82b11314e9a1c28f034713359a5240a325", size = 32718517, upload_time = "2025-11-17T20:39:35.286Z" }, + { url = "https://files.pythonhosted.org/packages/79/fe/436435dfa93cc355ed511f21cf3cda5302b7aa29716457317eb07f1cf2da/spacy-3.8.11-cp313-cp313-win_amd64.whl", hash = "sha256:6f39cf36f86bd6a8882076f86ca80f246c73aa41d7ebc8679fbbe41b6f8ec045", size = 14211913, upload_time = "2025-11-17T20:39:37.906Z" }, + { url = "https://files.pythonhosted.org/packages/c8/23/f89cfa51f54aa5e9c6c7a37f8bf4952d678f0902a5e1d81dfda33a94bfb2/spacy-3.8.11-cp313-cp313-win_arm64.whl", hash = "sha256:9a7151eee0814a5ced36642b42b1ecc8f98ac7225f3e378fb9f862ffbe84b8bf", size = 13605169, upload_time = "2025-11-17T20:39:40.455Z" }, + { url = "https://files.pythonhosted.org/packages/d7/78/ddeb09116b593f3cccc7eb489a713433076b11cf8cdfb98aec641b73a2c2/spacy-3.8.11-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:43c24d19a3f85bde0872935294a31fd9b3a6db3f92bb2b75074177cd3acec03f", size = 6067734, upload_time = "2025-11-17T20:39:42.629Z" }, + { url = "https://files.pythonhosted.org/packages/65/bb/1bb630250dc70e00fa3821879c6e2cb65c19425aba38840d3484061285c1/spacy-3.8.11-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b6158c21da57b8373d2d1afb2b73977c4bc4235d2563e7788d44367fc384939a", size = 5732963, upload_time = "2025-11-17T20:39:44.872Z" }, + { url = "https://files.pythonhosted.org/packages/7a/56/c58071b3db23932ab2b934af3462a958e7edf472da9668e4869fe2a2199e/spacy-3.8.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1c0bd1bde1d91f1d7a44774ca4ca3fcf064946b72599a8eb34c25e014362ace1", size = 32447290, upload_time = "2025-11-17T20:39:47.392Z" }, + { url = "https://files.pythonhosted.org/packages/34/eb/d3947efa2b46848372e89ced8371671d77219612a3eebef15db5690aa4d2/spacy-3.8.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:99b767c41a772e544cf2d48e0808764f42f17eb2fd6188db4a729922ff7f0c1e", size = 32488011, upload_time = "2025-11-17T20:39:50.408Z" }, + { url = "https://files.pythonhosted.org/packages/04/9e/8c6c01558b62388557247e553e48874f52637a5648b957ed01fbd628391d/spacy-3.8.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a3c500f04c164e4366a1163a61bf39fd50f0c63abdb1fc17991281ec52a54ab4", size = 31731340, upload_time = "2025-11-17T20:39:53.221Z" }, + { url = "https://files.pythonhosted.org/packages/23/1f/21812ec34b187ef6ba223389760dfea09bbe27d2b84b553c5205576b4ac2/spacy-3.8.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a2bfe45c0c1530eaabc68f5434c52b1be8df10d5c195c54d4dc2e70cea97dc65", size = 32478557, upload_time = "2025-11-17T20:39:55.826Z" }, + { url = "https://files.pythonhosted.org/packages/f3/16/a0c9174a232dfe7b48281c05364957e2c6d0f80ef26b67ce8d28a49c2d91/spacy-3.8.11-cp314-cp314-win_amd64.whl", hash = "sha256:45d0bbc8442d18dcea9257be0d1ab26e884067e038b1fa133405bf2f20c74edf", size = 14396041, upload_time = "2025-11-17T20:39:58.557Z" }, + { url = "https://files.pythonhosted.org/packages/aa/d0/a6aad5b73d523e4686474b0cfcf46f37f3d7a18765be5c1f56c1dcee4c18/spacy-3.8.11-cp314-cp314-win_arm64.whl", hash = "sha256:90a12961ecc44e0195fd42db9f0ce4aade17e6fe03f8ab98d4549911d9e6f992", size = 13823760, upload_time = "2025-11-17T20:40:00.831Z" }, ] [[package]] name = "spacy-legacy" version = "3.0.12" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d9/79/91f9d7cc8db5642acad830dcc4b49ba65a7790152832c4eceb305e46d681/spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774", size = 23806 } +sdist = { url = "https://files.pythonhosted.org/packages/d9/79/91f9d7cc8db5642acad830dcc4b49ba65a7790152832c4eceb305e46d681/spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774", size = 23806, upload_time = "2023-01-23T09:04:15.104Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/55/12e842c70ff8828e34e543a2c7176dac4da006ca6901c9e8b43efab8bc6b/spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f", size = 29971 }, + { url = "https://files.pythonhosted.org/packages/c3/55/12e842c70ff8828e34e543a2c7176dac4da006ca6901c9e8b43efab8bc6b/spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f", size = 29971, upload_time = "2023-01-23T09:04:13.45Z" }, ] [[package]] name = "spacy-loggers" version = "1.0.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/67/3d/926db774c9c98acf66cb4ed7faf6c377746f3e00b84b700d0868b95d0712/spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24", size = 20811 } +sdist = { url = "https://files.pythonhosted.org/packages/67/3d/926db774c9c98acf66cb4ed7faf6c377746f3e00b84b700d0868b95d0712/spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24", size = 20811, upload_time = "2023-09-11T12:26:52.323Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/33/78/d1a1a026ef3af911159398c939b1509d5c36fe524c7b644f34a5146c4e16/spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645", size = 22343 }, + { url = "https://files.pythonhosted.org/packages/33/78/d1a1a026ef3af911159398c939b1509d5c36fe524c7b644f34a5146c4e16/spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645", size = 22343, upload_time = "2023-09-11T12:26:50.586Z" }, ] [[package]] name = "sqlitedict" version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/12/9a/7620d1e9dcb02839ed6d4b14064e609cdd7a8ae1e47289aa0456796dd9ca/sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c", size = 21846 } +sdist = { url = "https://files.pythonhosted.org/packages/12/9a/7620d1e9dcb02839ed6d4b14064e609cdd7a8ae1e47289aa0456796dd9ca/sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c", size = 21846, upload_time = "2022-12-03T13:39:13.102Z" } [[package]] name = "srsly" @@ -2973,36 +3077,36 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "catalogue" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cf/77/5633c4ba65e3421b72b5b4bd93aa328360b351b3a1e5bf3c90eb224668e5/srsly-2.5.2.tar.gz", hash = "sha256:4092bc843c71b7595c6c90a0302a197858c5b9fe43067f62ae6a45bc3baa1c19", size = 492055 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/1c/21f658d98d602a559491b7886c7ca30245c2cd8987ff1b7709437c0f74b1/srsly-2.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6f92b4f883e6be4ca77f15980b45d394d310f24903e25e1b2c46df783c7edcce", size = 656161 }, - { url = "https://files.pythonhosted.org/packages/2f/a2/bc6fd484ed703857043ae9abd6c9aea9152f9480a6961186ee6c1e0c49e8/srsly-2.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ac4790a54b00203f1af5495b6b8ac214131139427f30fcf05cf971dde81930eb", size = 653237 }, - { url = "https://files.pythonhosted.org/packages/ab/ea/e3895da29a15c8d325e050ad68a0d1238eece1d2648305796adf98dcba66/srsly-2.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ce5c6b016050857a7dd365c9dcdd00d96e7ac26317cfcb175db387e403de05bf", size = 1174418 }, - { url = "https://files.pythonhosted.org/packages/a6/a5/21996231f53ee97191d0746c3a672ba33a4d86a19ffad85a1c0096c91c5f/srsly-2.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:539c6d0016e91277b5e9be31ebed03f03c32580d49c960e4a92c9003baecf69e", size = 1183089 }, - { url = "https://files.pythonhosted.org/packages/7b/df/eb17aa8e4a828e8df7aa7dc471295529d9126e6b710f1833ebe0d8568a8e/srsly-2.5.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f24b2c4f4c29da04083f09158543eb3f8893ba0ac39818693b3b259ee8044f0", size = 1122594 }, - { url = "https://files.pythonhosted.org/packages/80/74/1654a80e6c8ec3ee32370ea08a78d3651e0ba1c4d6e6be31c9efdb9a2d10/srsly-2.5.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d34675047460a3f6999e43478f40d9b43917ea1e93a75c41d05bf7648f3e872d", size = 1139594 }, - { url = "https://files.pythonhosted.org/packages/73/aa/8393344ca7f0e81965febba07afc5cad68335ed0426408d480b861ab915b/srsly-2.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:81fd133ba3c66c07f0e3a889d2b4c852984d71ea833a665238a9d47d8e051ba5", size = 654750 }, - { url = "https://files.pythonhosted.org/packages/c2/c5/dc29e65419692444253ea549106be156c5911041f16791f3b62fb90c14f2/srsly-2.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d976d6ae8e66006797b919e3d58533dce64cd48a5447a8ff7277f9b0505b0185", size = 654723 }, - { url = "https://files.pythonhosted.org/packages/80/8c/8111e7e8c766b47b5a5f9864f27f532cf6bb92837a3e277eb297170bd6af/srsly-2.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:24f52ecd27409ea24ba116ee9f07a2bb1c4b9ba11284b32a0bf2ca364499d1c1", size = 651651 }, - { url = "https://files.pythonhosted.org/packages/45/de/3f99d4e44af427ee09004df6586d0746640536b382c948f456be027c599b/srsly-2.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b0667ce1effb32a57522db10705db7c78d144547fcacc8a06df62c4bb7f96e", size = 1158012 }, - { url = "https://files.pythonhosted.org/packages/c3/2f/66044ef5a10a487652913c1a7f32396cb0e9e32ecfc3fdc0a0bc0382e703/srsly-2.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60782f6f79c340cdaf1ba7cbaa1d354a0f7c8f86b285f1e14e75edb51452895a", size = 1163258 }, - { url = "https://files.pythonhosted.org/packages/74/6b/698834048672b52937e8cf09b554adb81b106c0492f9bc62e41e3b46a69b/srsly-2.5.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec51abb1b58e1e6c689714104aeeba6290c40c0bfad0243b9b594df89f05881", size = 1112214 }, - { url = "https://files.pythonhosted.org/packages/85/17/1efc70426be93d32a3c6c5c12d795eb266a9255d8b537fcb924a3de57fcb/srsly-2.5.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:76464e45f73afd20c2c34d2ef145bf788afc32e7d45f36f6393ed92a85189ed3", size = 1130687 }, - { url = "https://files.pythonhosted.org/packages/e2/25/07f8c8a778bc0447ee15e37089b08af81b24fcc1d4a2c09eff4c3a79b241/srsly-2.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:009424a96d763951e4872b36ba38823f973bef094a1adbc11102e23e8d1ef429", size = 653128 }, - { url = "https://files.pythonhosted.org/packages/39/03/3d248f538abc141d9c7ed1aa10e61506c0f95515a61066ee90e888f0cd8f/srsly-2.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a0911dcf1026f982bd8c5f73e1c43f1bc868416408fcbc1f3d99eb59475420c5", size = 659866 }, - { url = "https://files.pythonhosted.org/packages/43/22/0fcff4c977ddfb32a6b10f33d904868b16ce655323756281f973c5a3449e/srsly-2.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0ff3ac2942aee44235ca3c7712fcbd6e0d1a092e10ee16e07cef459ed6d7f65", size = 655868 }, - { url = "https://files.pythonhosted.org/packages/1b/c1/e158f26a5597ac31b0f306d2584411ec1f984058e8171d76c678bf439e96/srsly-2.5.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:78385fb75e1bf7b81ffde97555aee094d270a5e0ea66f8280f6e95f5bb508b3e", size = 1156753 }, - { url = "https://files.pythonhosted.org/packages/d9/bc/2001cd27fd6ecdae79050cf6b655ca646dedc0b69a756e6a87993cc47314/srsly-2.5.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2e9943b70bd7655b9eefca77aab838c3b7acea00c9dd244fd218a43dc61c518b", size = 1157916 }, - { url = "https://files.pythonhosted.org/packages/5c/dd/56f563c2d0cd76c8fd22fb9f1589f18af50b54d31dd3323ceb05fe7999b8/srsly-2.5.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7d235a2bb08f5240e47c6aba4d9688b228d830fbf4c858388d9c151a10039e6d", size = 1114582 }, - { url = "https://files.pythonhosted.org/packages/2e/e6/e155facc965a119e6f5d32b7e95082cadfb62cc5d97087d53db93f3a5a98/srsly-2.5.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ad94ee18b3042a6cdfdc022556e2ed9a7b52b876de86fe334c4d8ec58d59ecbc", size = 1129875 }, - { url = "https://files.pythonhosted.org/packages/b6/3a/c12a4d556349c9f491b0a9d27968483f22934d2a02dfb14fb1d3a7d9b837/srsly-2.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:6658467165d8fa4aec0f5f6e2da8fe977e087eaff13322b0ff20450f0d762cee", size = 658858 }, - { url = "https://files.pythonhosted.org/packages/70/db/52510cbf478ab3ae8cb6c95aff3a499f2ded69df6d84df8a293630e9f10a/srsly-2.5.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:517e907792acf574979752ce33e7b15985c95d4ed7d8e38ee47f36063dc985ac", size = 666843 }, - { url = "https://files.pythonhosted.org/packages/3d/da/4257b1d4c3eb005ecd135414398c033c13c4d3dffb715f63c3acd63d8d1a/srsly-2.5.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e5602797e6f87bf030b11ad356828142367c5c81e923303b5ff2a88dfb12d1e4", size = 663981 }, - { url = "https://files.pythonhosted.org/packages/c6/f8/1ec5edd7299d8599def20fc3440372964f7c750022db8063e321747d1cf8/srsly-2.5.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3452306118f8604daaaac6d770ee8f910fca449e8f066dcc96a869b43ece5340", size = 1267808 }, - { url = "https://files.pythonhosted.org/packages/3e/5c/4ef9782c9a3f331ef80e1ea8fc6fab50fc3d32ae61a494625d2c5f30cc4c/srsly-2.5.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e2d59f1ce00d73397a7f5b9fc33e76d17816ce051abe4eb920cec879d2a9d4f4", size = 1252838 }, - { url = "https://files.pythonhosted.org/packages/39/da/d13cfc662d71eec3ccd4072433bf435bd2e11e1c5340150b4cc43fad46f4/srsly-2.5.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ebda3736651d33d92b17e26c525ba8d0b94d0ee379c9f92e8d937ba89dca8978", size = 1244558 }, - { url = "https://files.pythonhosted.org/packages/26/50/92bf62dfb19532b823ef52251bb7003149e1d4a89f50a63332c8ff5f894b/srsly-2.5.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:74a9338fcc044f4bdc7113b2d9db2db8e0a263c69f1cba965acf12c845d8b365", size = 1244935 }, - { url = "https://files.pythonhosted.org/packages/95/81/6ea10ef6228ce4438a240c803639f7ccf5eae3469fbc015f33bd84aa8df1/srsly-2.5.2-cp314-cp314t-win_amd64.whl", hash = "sha256:8e2b9058623c44b07441eb0d711dfdf6302f917f0634d0a294cae37578dcf899", size = 676105 }, +sdist = { url = "https://files.pythonhosted.org/packages/cf/77/5633c4ba65e3421b72b5b4bd93aa328360b351b3a1e5bf3c90eb224668e5/srsly-2.5.2.tar.gz", hash = "sha256:4092bc843c71b7595c6c90a0302a197858c5b9fe43067f62ae6a45bc3baa1c19", size = 492055, upload_time = "2025-11-17T14:11:02.543Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/1c/21f658d98d602a559491b7886c7ca30245c2cd8987ff1b7709437c0f74b1/srsly-2.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6f92b4f883e6be4ca77f15980b45d394d310f24903e25e1b2c46df783c7edcce", size = 656161, upload_time = "2025-11-17T14:10:03.181Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a2/bc6fd484ed703857043ae9abd6c9aea9152f9480a6961186ee6c1e0c49e8/srsly-2.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ac4790a54b00203f1af5495b6b8ac214131139427f30fcf05cf971dde81930eb", size = 653237, upload_time = "2025-11-17T14:10:04.636Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ea/e3895da29a15c8d325e050ad68a0d1238eece1d2648305796adf98dcba66/srsly-2.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ce5c6b016050857a7dd365c9dcdd00d96e7ac26317cfcb175db387e403de05bf", size = 1174418, upload_time = "2025-11-17T14:10:05.945Z" }, + { url = "https://files.pythonhosted.org/packages/a6/a5/21996231f53ee97191d0746c3a672ba33a4d86a19ffad85a1c0096c91c5f/srsly-2.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:539c6d0016e91277b5e9be31ebed03f03c32580d49c960e4a92c9003baecf69e", size = 1183089, upload_time = "2025-11-17T14:10:07.335Z" }, + { url = "https://files.pythonhosted.org/packages/7b/df/eb17aa8e4a828e8df7aa7dc471295529d9126e6b710f1833ebe0d8568a8e/srsly-2.5.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f24b2c4f4c29da04083f09158543eb3f8893ba0ac39818693b3b259ee8044f0", size = 1122594, upload_time = "2025-11-17T14:10:08.899Z" }, + { url = "https://files.pythonhosted.org/packages/80/74/1654a80e6c8ec3ee32370ea08a78d3651e0ba1c4d6e6be31c9efdb9a2d10/srsly-2.5.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d34675047460a3f6999e43478f40d9b43917ea1e93a75c41d05bf7648f3e872d", size = 1139594, upload_time = "2025-11-17T14:10:10.286Z" }, + { url = "https://files.pythonhosted.org/packages/73/aa/8393344ca7f0e81965febba07afc5cad68335ed0426408d480b861ab915b/srsly-2.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:81fd133ba3c66c07f0e3a889d2b4c852984d71ea833a665238a9d47d8e051ba5", size = 654750, upload_time = "2025-11-17T14:10:11.637Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c5/dc29e65419692444253ea549106be156c5911041f16791f3b62fb90c14f2/srsly-2.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d976d6ae8e66006797b919e3d58533dce64cd48a5447a8ff7277f9b0505b0185", size = 654723, upload_time = "2025-11-17T14:10:13.305Z" }, + { url = "https://files.pythonhosted.org/packages/80/8c/8111e7e8c766b47b5a5f9864f27f532cf6bb92837a3e277eb297170bd6af/srsly-2.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:24f52ecd27409ea24ba116ee9f07a2bb1c4b9ba11284b32a0bf2ca364499d1c1", size = 651651, upload_time = "2025-11-17T14:10:14.907Z" }, + { url = "https://files.pythonhosted.org/packages/45/de/3f99d4e44af427ee09004df6586d0746640536b382c948f456be027c599b/srsly-2.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b0667ce1effb32a57522db10705db7c78d144547fcacc8a06df62c4bb7f96e", size = 1158012, upload_time = "2025-11-17T14:10:16.176Z" }, + { url = "https://files.pythonhosted.org/packages/c3/2f/66044ef5a10a487652913c1a7f32396cb0e9e32ecfc3fdc0a0bc0382e703/srsly-2.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60782f6f79c340cdaf1ba7cbaa1d354a0f7c8f86b285f1e14e75edb51452895a", size = 1163258, upload_time = "2025-11-17T14:10:17.471Z" }, + { url = "https://files.pythonhosted.org/packages/74/6b/698834048672b52937e8cf09b554adb81b106c0492f9bc62e41e3b46a69b/srsly-2.5.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec51abb1b58e1e6c689714104aeeba6290c40c0bfad0243b9b594df89f05881", size = 1112214, upload_time = "2025-11-17T14:10:18.679Z" }, + { url = "https://files.pythonhosted.org/packages/85/17/1efc70426be93d32a3c6c5c12d795eb266a9255d8b537fcb924a3de57fcb/srsly-2.5.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:76464e45f73afd20c2c34d2ef145bf788afc32e7d45f36f6393ed92a85189ed3", size = 1130687, upload_time = "2025-11-17T14:10:20.346Z" }, + { url = "https://files.pythonhosted.org/packages/e2/25/07f8c8a778bc0447ee15e37089b08af81b24fcc1d4a2c09eff4c3a79b241/srsly-2.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:009424a96d763951e4872b36ba38823f973bef094a1adbc11102e23e8d1ef429", size = 653128, upload_time = "2025-11-17T14:10:21.552Z" }, + { url = "https://files.pythonhosted.org/packages/39/03/3d248f538abc141d9c7ed1aa10e61506c0f95515a61066ee90e888f0cd8f/srsly-2.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a0911dcf1026f982bd8c5f73e1c43f1bc868416408fcbc1f3d99eb59475420c5", size = 659866, upload_time = "2025-11-17T14:10:22.811Z" }, + { url = "https://files.pythonhosted.org/packages/43/22/0fcff4c977ddfb32a6b10f33d904868b16ce655323756281f973c5a3449e/srsly-2.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0ff3ac2942aee44235ca3c7712fcbd6e0d1a092e10ee16e07cef459ed6d7f65", size = 655868, upload_time = "2025-11-17T14:10:24.036Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c1/e158f26a5597ac31b0f306d2584411ec1f984058e8171d76c678bf439e96/srsly-2.5.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:78385fb75e1bf7b81ffde97555aee094d270a5e0ea66f8280f6e95f5bb508b3e", size = 1156753, upload_time = "2025-11-17T14:10:25.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/bc/2001cd27fd6ecdae79050cf6b655ca646dedc0b69a756e6a87993cc47314/srsly-2.5.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2e9943b70bd7655b9eefca77aab838c3b7acea00c9dd244fd218a43dc61c518b", size = 1157916, upload_time = "2025-11-17T14:10:26.705Z" }, + { url = "https://files.pythonhosted.org/packages/5c/dd/56f563c2d0cd76c8fd22fb9f1589f18af50b54d31dd3323ceb05fe7999b8/srsly-2.5.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7d235a2bb08f5240e47c6aba4d9688b228d830fbf4c858388d9c151a10039e6d", size = 1114582, upload_time = "2025-11-17T14:10:27.997Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e6/e155facc965a119e6f5d32b7e95082cadfb62cc5d97087d53db93f3a5a98/srsly-2.5.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ad94ee18b3042a6cdfdc022556e2ed9a7b52b876de86fe334c4d8ec58d59ecbc", size = 1129875, upload_time = "2025-11-17T14:10:29.295Z" }, + { url = "https://files.pythonhosted.org/packages/b6/3a/c12a4d556349c9f491b0a9d27968483f22934d2a02dfb14fb1d3a7d9b837/srsly-2.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:6658467165d8fa4aec0f5f6e2da8fe977e087eaff13322b0ff20450f0d762cee", size = 658858, upload_time = "2025-11-17T14:10:30.612Z" }, + { url = "https://files.pythonhosted.org/packages/70/db/52510cbf478ab3ae8cb6c95aff3a499f2ded69df6d84df8a293630e9f10a/srsly-2.5.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:517e907792acf574979752ce33e7b15985c95d4ed7d8e38ee47f36063dc985ac", size = 666843, upload_time = "2025-11-17T14:10:32.082Z" }, + { url = "https://files.pythonhosted.org/packages/3d/da/4257b1d4c3eb005ecd135414398c033c13c4d3dffb715f63c3acd63d8d1a/srsly-2.5.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e5602797e6f87bf030b11ad356828142367c5c81e923303b5ff2a88dfb12d1e4", size = 663981, upload_time = "2025-11-17T14:10:33.542Z" }, + { url = "https://files.pythonhosted.org/packages/c6/f8/1ec5edd7299d8599def20fc3440372964f7c750022db8063e321747d1cf8/srsly-2.5.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3452306118f8604daaaac6d770ee8f910fca449e8f066dcc96a869b43ece5340", size = 1267808, upload_time = "2025-11-17T14:10:35.285Z" }, + { url = "https://files.pythonhosted.org/packages/3e/5c/4ef9782c9a3f331ef80e1ea8fc6fab50fc3d32ae61a494625d2c5f30cc4c/srsly-2.5.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e2d59f1ce00d73397a7f5b9fc33e76d17816ce051abe4eb920cec879d2a9d4f4", size = 1252838, upload_time = "2025-11-17T14:10:37.024Z" }, + { url = "https://files.pythonhosted.org/packages/39/da/d13cfc662d71eec3ccd4072433bf435bd2e11e1c5340150b4cc43fad46f4/srsly-2.5.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ebda3736651d33d92b17e26c525ba8d0b94d0ee379c9f92e8d937ba89dca8978", size = 1244558, upload_time = "2025-11-17T14:10:38.73Z" }, + { url = "https://files.pythonhosted.org/packages/26/50/92bf62dfb19532b823ef52251bb7003149e1d4a89f50a63332c8ff5f894b/srsly-2.5.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:74a9338fcc044f4bdc7113b2d9db2db8e0a263c69f1cba965acf12c845d8b365", size = 1244935, upload_time = "2025-11-17T14:10:42.324Z" }, + { url = "https://files.pythonhosted.org/packages/95/81/6ea10ef6228ce4438a240c803639f7ccf5eae3469fbc015f33bd84aa8df1/srsly-2.5.2-cp314-cp314t-win_amd64.whl", hash = "sha256:8e2b9058623c44b07441eb0d711dfdf6302f917f0634d0a294cae37578dcf899", size = 676105, upload_time = "2025-11-17T14:10:43.633Z" }, ] [[package]] @@ -3012,18 +3116,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mpmath" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921 } +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload_time = "2025-04-27T18:05:01.611Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353 }, + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload_time = "2025-04-27T18:04:59.103Z" }, ] [[package]] name = "tenacity" version = "9.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036 } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload_time = "2025-04-02T08:25:09.966Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248 }, + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload_time = "2025-04-02T08:25:07.678Z" }, ] [[package]] @@ -3038,9 +3142,9 @@ dependencies = [ { name = "rich" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/15/4b/24ae02d857ec0fa6661d27a989994e65c6a3b3d56c7177b2d8e022d29ccc/textual-7.2.0.tar.gz", hash = "sha256:5355f2dc16fbdc452a714dee2e440125e33b82373b3032cb53bea96e7019fa0b", size = 1582530 } +sdist = { url = "https://files.pythonhosted.org/packages/15/4b/24ae02d857ec0fa6661d27a989994e65c6a3b3d56c7177b2d8e022d29ccc/textual-7.2.0.tar.gz", hash = "sha256:5355f2dc16fbdc452a714dee2e440125e33b82373b3032cb53bea96e7019fa0b", size = 1582530, upload_time = "2026-01-11T17:40:50.75Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/b8/cc8ed2548ff1bf0fd719dd399bb56869d82a57143c2772cfd57f68efc1d3/textual-7.2.0-py3-none-any.whl", hash = "sha256:2624077f02dbd504beea9a24a943770f954f500a5f29a0bfa83465c52fa3ea1c", size = 715809 }, + { url = "https://files.pythonhosted.org/packages/48/b8/cc8ed2548ff1bf0fd719dd399bb56869d82a57143c2772cfd57f68efc1d3/textual-7.2.0-py3-none-any.whl", hash = "sha256:2624077f02dbd504beea9a24a943770f954f500a5f29a0bfa83465c52fa3ea1c", size = 715809, upload_time = "2026-01-11T17:40:48.679Z" }, ] [[package]] @@ -3061,41 +3165,41 @@ dependencies = [ { name = "srsly" }, { name = "wasabi" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2f/3a/2d0f0be132b9faaa6d56f04565ae122684273e4bf4eab8dee5f48dc00f68/thinc-8.3.10.tar.gz", hash = "sha256:5a75109f4ee1c968fc055ce651a17cb44b23b000d9e95f04a4d047ab3cb3e34e", size = 194196 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/34/ba3b386d92edf50784b60ee34318d47c7f49c198268746ef7851c5bbe8cf/thinc-8.3.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51bc6ef735bdbcab75ab2916731b8f61f94c66add6f9db213d900d3c6a244f95", size = 794509 }, - { url = "https://files.pythonhosted.org/packages/07/f3/9f52d18115cd9d8d7b2590d226cb2752d2a5ffec61576b19462b48410184/thinc-8.3.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4f48b4d346915f98e9722c0c50ef911cc16c6790a2b7afebc6e1a2c96a6ce6c6", size = 741084 }, - { url = "https://files.pythonhosted.org/packages/ad/9c/129c2b740c4e3d3624b6fb3dec1577ef27cb804bc1647f9bc3e1801ea20c/thinc-8.3.10-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5003f4db2db22cc8d686db8db83509acc3c50f4c55ebdcb2bbfcc1095096f7d2", size = 3846337 }, - { url = "https://files.pythonhosted.org/packages/22/d2/738cf188dea8240c2be081c83ea47270fea585eba446171757d2cdb9b675/thinc-8.3.10-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b12484c3ed0632331fada2c334680dd6bc35972d0717343432dfc701f04a9b4c", size = 3901216 }, - { url = "https://files.pythonhosted.org/packages/22/92/32f66eb9b1a29b797bf378a0874615d810d79eefca1d6c736c5ca3f8b918/thinc-8.3.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8677c446d3f9b97a465472c58683b785b25dfcf26c683e3f4e8f8c7c188e4362", size = 4827286 }, - { url = "https://files.pythonhosted.org/packages/c4/5f/7ceae1e1f2029efd67ed88e23cd6dc13a5ee647cdc2b35113101b2a62c10/thinc-8.3.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:759c385ac08dcf950238b60b96a28f9c04618861141766928dff4a51b1679b25", size = 5024421 }, - { url = "https://files.pythonhosted.org/packages/0b/66/30f9d8d41049b78bc614213d492792fbcfeb1b28642adf661c42110a7ebd/thinc-8.3.10-cp312-cp312-win_amd64.whl", hash = "sha256:bf3f188c3fa1fdcefd547d1f90a1245c29025d6d0e3f71d7fdf21dad210b990c", size = 1718631 }, - { url = "https://files.pythonhosted.org/packages/f8/44/32e2a5018a1165a304d25eb9b1c74e5310da19a533a35331e8d824dc6a88/thinc-8.3.10-cp312-cp312-win_arm64.whl", hash = "sha256:234b7e57a6ef4e0260d99f4e8fdc328ed12d0ba9bbd98fdaa567294a17700d1c", size = 1642224 }, - { url = "https://files.pythonhosted.org/packages/53/fc/17a2818d1f460b8c4f33b8bd3f21b19d263a647bfd23b572768d175e6b64/thinc-8.3.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c7c3a50ddd423d1c49419899acef4ac80d800af3b423593acb9e40578384b543", size = 789771 }, - { url = "https://files.pythonhosted.org/packages/8d/24/649f54774b1fbe791a1c2efd7d7f0a95cfd9244902553ca7dcf19daab1dd/thinc-8.3.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a1cb110398f51fc2b9a07a2a4daec6f91e166533a9c9f1c565225330f46569a", size = 737051 }, - { url = "https://files.pythonhosted.org/packages/b2/8c/5840c6c504c1fa9718e1c74d6e04d77a474f594888867dbba53f9317285f/thinc-8.3.10-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42318746a67403d04be57d862fe0c0015b58b6fb9bbbf7b6db01f3f103b73a99", size = 3839221 }, - { url = "https://files.pythonhosted.org/packages/45/ef/e7fca88074cb0aa1c1a23195470b4549492c2797fe7dc9ff79a85500153a/thinc-8.3.10-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6b0e41e79973f8828adead770f885db8d0f199bfbaa9591d1d896c385842e993", size = 3885024 }, - { url = "https://files.pythonhosted.org/packages/9a/eb/805e277aa019896009028d727460f071c6cf83843d70f6a69e58994d2203/thinc-8.3.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9ed982daa1eddbad813bfd079546483b849a68b98c01ad4a7e4efd125ddc5d7b", size = 4815939 }, - { url = "https://files.pythonhosted.org/packages/4f/f5/6425f12a60e3782091c9ec16394b9239f0c18c52c70218f3c8c047ff985c/thinc-8.3.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d22bd381410749dec5f629b3162b7d1f1e2d9b7364fd49a7ea555b61c93772b9", size = 5020260 }, - { url = "https://files.pythonhosted.org/packages/85/a2/ae98feffe0b161400e87b7bfc8859e6fa1e6023fa7bcfa0a8cacd83b39a1/thinc-8.3.10-cp313-cp313-win_amd64.whl", hash = "sha256:9c32830446a57da13b6856cacb0225bc2f2104f279d9928d40500081c13aa9ec", size = 1717562 }, - { url = "https://files.pythonhosted.org/packages/b8/e0/faa1d04a6890ea33b9541727d2a3ca88bad794a89f73b9111af6f9aefe10/thinc-8.3.10-cp313-cp313-win_arm64.whl", hash = "sha256:aa43f9af76781d32f5f9fe29299204c8841d71e64cbb56e0e4f3d1e0387c2783", size = 1641536 }, - { url = "https://files.pythonhosted.org/packages/b8/32/7a96e1f2cac159d778c6b0ab4ddd8a139bb57c602cef793b7606cd32428d/thinc-8.3.10-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:44d7038a5d28572105332b44ec9c4c3b6f7953b41d224588ad0473c9b79ccf9e", size = 793037 }, - { url = "https://files.pythonhosted.org/packages/12/d8/81e8495e8ef412767c09d1f9d0d86dc60cd22e6ed75e61b49fbf1dcfcd65/thinc-8.3.10-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:639f20952af722cb0ab4c3d8a00e661686b60c04f82ef48d12064ceda3b8cd0c", size = 740768 }, - { url = "https://files.pythonhosted.org/packages/c2/6d/716488a301d65c5463e92cb0eddae3672ca84f1d70937808cea9760f759c/thinc-8.3.10-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9306e62c7e7066c63b0c0ba1d164ae0c23bf38edf5a7df2e09cce69a2c290500", size = 3834983 }, - { url = "https://files.pythonhosted.org/packages/9c/a1/d28b21cab9b79e9c803671bebd14489e14c5226136fad6a1c44f96f8e4ef/thinc-8.3.10-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2982604c21096de1a87b04a781a645863eece71ec6ee9f139ac01b998fb5622d", size = 3845215 }, - { url = "https://files.pythonhosted.org/packages/93/9d/ff64ead5f1c2298d9e6a9ccc1c676b2347ac06162ad3c5e5d895c32a719e/thinc-8.3.10-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6b82698e27846004d4eafc38317ace482eced888d4445f7fb9c548fd36777af", size = 4826596 }, - { url = "https://files.pythonhosted.org/packages/4a/44/b80c863608d0fd31641a2d50658560c22d4841f1e445529201e22b3e1d0f/thinc-8.3.10-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2950acab8ae77427a86d11655ed0a161bc83a1edf9d31ba5c43deca6cd27ed4f", size = 4988146 }, - { url = "https://files.pythonhosted.org/packages/93/6d/1bdd9344b2e7299faa55129dda624d50c334eed16a3761eb8b1dacd8bfcd/thinc-8.3.10-cp314-cp314-win_amd64.whl", hash = "sha256:c253139a5c873edf75a3b17ec9d8b6caebee072fdb489594bc64e35115df7625", size = 1738054 }, - { url = "https://files.pythonhosted.org/packages/45/c4/44e3163d48e398efb3748481656963ac6265c14288012871c921dc81d004/thinc-8.3.10-cp314-cp314-win_arm64.whl", hash = "sha256:ad6da67f534995d6ec257f16665377d7ad95bef5c1b1c89618fd4528657a6f24", size = 1665001 }, +sdist = { url = "https://files.pythonhosted.org/packages/2f/3a/2d0f0be132b9faaa6d56f04565ae122684273e4bf4eab8dee5f48dc00f68/thinc-8.3.10.tar.gz", hash = "sha256:5a75109f4ee1c968fc055ce651a17cb44b23b000d9e95f04a4d047ab3cb3e34e", size = 194196, upload_time = "2025-11-17T17:21:46.435Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/34/ba3b386d92edf50784b60ee34318d47c7f49c198268746ef7851c5bbe8cf/thinc-8.3.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51bc6ef735bdbcab75ab2916731b8f61f94c66add6f9db213d900d3c6a244f95", size = 794509, upload_time = "2025-11-17T17:21:03.21Z" }, + { url = "https://files.pythonhosted.org/packages/07/f3/9f52d18115cd9d8d7b2590d226cb2752d2a5ffec61576b19462b48410184/thinc-8.3.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4f48b4d346915f98e9722c0c50ef911cc16c6790a2b7afebc6e1a2c96a6ce6c6", size = 741084, upload_time = "2025-11-17T17:21:04.568Z" }, + { url = "https://files.pythonhosted.org/packages/ad/9c/129c2b740c4e3d3624b6fb3dec1577ef27cb804bc1647f9bc3e1801ea20c/thinc-8.3.10-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5003f4db2db22cc8d686db8db83509acc3c50f4c55ebdcb2bbfcc1095096f7d2", size = 3846337, upload_time = "2025-11-17T17:21:06.079Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/738cf188dea8240c2be081c83ea47270fea585eba446171757d2cdb9b675/thinc-8.3.10-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b12484c3ed0632331fada2c334680dd6bc35972d0717343432dfc701f04a9b4c", size = 3901216, upload_time = "2025-11-17T17:21:07.842Z" }, + { url = "https://files.pythonhosted.org/packages/22/92/32f66eb9b1a29b797bf378a0874615d810d79eefca1d6c736c5ca3f8b918/thinc-8.3.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8677c446d3f9b97a465472c58683b785b25dfcf26c683e3f4e8f8c7c188e4362", size = 4827286, upload_time = "2025-11-17T17:21:09.62Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5f/7ceae1e1f2029efd67ed88e23cd6dc13a5ee647cdc2b35113101b2a62c10/thinc-8.3.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:759c385ac08dcf950238b60b96a28f9c04618861141766928dff4a51b1679b25", size = 5024421, upload_time = "2025-11-17T17:21:11.199Z" }, + { url = "https://files.pythonhosted.org/packages/0b/66/30f9d8d41049b78bc614213d492792fbcfeb1b28642adf661c42110a7ebd/thinc-8.3.10-cp312-cp312-win_amd64.whl", hash = "sha256:bf3f188c3fa1fdcefd547d1f90a1245c29025d6d0e3f71d7fdf21dad210b990c", size = 1718631, upload_time = "2025-11-17T17:21:12.965Z" }, + { url = "https://files.pythonhosted.org/packages/f8/44/32e2a5018a1165a304d25eb9b1c74e5310da19a533a35331e8d824dc6a88/thinc-8.3.10-cp312-cp312-win_arm64.whl", hash = "sha256:234b7e57a6ef4e0260d99f4e8fdc328ed12d0ba9bbd98fdaa567294a17700d1c", size = 1642224, upload_time = "2025-11-17T17:21:14.371Z" }, + { url = "https://files.pythonhosted.org/packages/53/fc/17a2818d1f460b8c4f33b8bd3f21b19d263a647bfd23b572768d175e6b64/thinc-8.3.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c7c3a50ddd423d1c49419899acef4ac80d800af3b423593acb9e40578384b543", size = 789771, upload_time = "2025-11-17T17:21:15.784Z" }, + { url = "https://files.pythonhosted.org/packages/8d/24/649f54774b1fbe791a1c2efd7d7f0a95cfd9244902553ca7dcf19daab1dd/thinc-8.3.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a1cb110398f51fc2b9a07a2a4daec6f91e166533a9c9f1c565225330f46569a", size = 737051, upload_time = "2025-11-17T17:21:17.933Z" }, + { url = "https://files.pythonhosted.org/packages/b2/8c/5840c6c504c1fa9718e1c74d6e04d77a474f594888867dbba53f9317285f/thinc-8.3.10-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42318746a67403d04be57d862fe0c0015b58b6fb9bbbf7b6db01f3f103b73a99", size = 3839221, upload_time = "2025-11-17T17:21:20.003Z" }, + { url = "https://files.pythonhosted.org/packages/45/ef/e7fca88074cb0aa1c1a23195470b4549492c2797fe7dc9ff79a85500153a/thinc-8.3.10-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6b0e41e79973f8828adead770f885db8d0f199bfbaa9591d1d896c385842e993", size = 3885024, upload_time = "2025-11-17T17:21:21.735Z" }, + { url = "https://files.pythonhosted.org/packages/9a/eb/805e277aa019896009028d727460f071c6cf83843d70f6a69e58994d2203/thinc-8.3.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9ed982daa1eddbad813bfd079546483b849a68b98c01ad4a7e4efd125ddc5d7b", size = 4815939, upload_time = "2025-11-17T17:21:23.942Z" }, + { url = "https://files.pythonhosted.org/packages/4f/f5/6425f12a60e3782091c9ec16394b9239f0c18c52c70218f3c8c047ff985c/thinc-8.3.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d22bd381410749dec5f629b3162b7d1f1e2d9b7364fd49a7ea555b61c93772b9", size = 5020260, upload_time = "2025-11-17T17:21:25.507Z" }, + { url = "https://files.pythonhosted.org/packages/85/a2/ae98feffe0b161400e87b7bfc8859e6fa1e6023fa7bcfa0a8cacd83b39a1/thinc-8.3.10-cp313-cp313-win_amd64.whl", hash = "sha256:9c32830446a57da13b6856cacb0225bc2f2104f279d9928d40500081c13aa9ec", size = 1717562, upload_time = "2025-11-17T17:21:27.468Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e0/faa1d04a6890ea33b9541727d2a3ca88bad794a89f73b9111af6f9aefe10/thinc-8.3.10-cp313-cp313-win_arm64.whl", hash = "sha256:aa43f9af76781d32f5f9fe29299204c8841d71e64cbb56e0e4f3d1e0387c2783", size = 1641536, upload_time = "2025-11-17T17:21:30.129Z" }, + { url = "https://files.pythonhosted.org/packages/b8/32/7a96e1f2cac159d778c6b0ab4ddd8a139bb57c602cef793b7606cd32428d/thinc-8.3.10-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:44d7038a5d28572105332b44ec9c4c3b6f7953b41d224588ad0473c9b79ccf9e", size = 793037, upload_time = "2025-11-17T17:21:32.538Z" }, + { url = "https://files.pythonhosted.org/packages/12/d8/81e8495e8ef412767c09d1f9d0d86dc60cd22e6ed75e61b49fbf1dcfcd65/thinc-8.3.10-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:639f20952af722cb0ab4c3d8a00e661686b60c04f82ef48d12064ceda3b8cd0c", size = 740768, upload_time = "2025-11-17T17:21:34.852Z" }, + { url = "https://files.pythonhosted.org/packages/c2/6d/716488a301d65c5463e92cb0eddae3672ca84f1d70937808cea9760f759c/thinc-8.3.10-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9306e62c7e7066c63b0c0ba1d164ae0c23bf38edf5a7df2e09cce69a2c290500", size = 3834983, upload_time = "2025-11-17T17:21:36.81Z" }, + { url = "https://files.pythonhosted.org/packages/9c/a1/d28b21cab9b79e9c803671bebd14489e14c5226136fad6a1c44f96f8e4ef/thinc-8.3.10-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2982604c21096de1a87b04a781a645863eece71ec6ee9f139ac01b998fb5622d", size = 3845215, upload_time = "2025-11-17T17:21:38.362Z" }, + { url = "https://files.pythonhosted.org/packages/93/9d/ff64ead5f1c2298d9e6a9ccc1c676b2347ac06162ad3c5e5d895c32a719e/thinc-8.3.10-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6b82698e27846004d4eafc38317ace482eced888d4445f7fb9c548fd36777af", size = 4826596, upload_time = "2025-11-17T17:21:40.027Z" }, + { url = "https://files.pythonhosted.org/packages/4a/44/b80c863608d0fd31641a2d50658560c22d4841f1e445529201e22b3e1d0f/thinc-8.3.10-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2950acab8ae77427a86d11655ed0a161bc83a1edf9d31ba5c43deca6cd27ed4f", size = 4988146, upload_time = "2025-11-17T17:21:41.73Z" }, + { url = "https://files.pythonhosted.org/packages/93/6d/1bdd9344b2e7299faa55129dda624d50c334eed16a3761eb8b1dacd8bfcd/thinc-8.3.10-cp314-cp314-win_amd64.whl", hash = "sha256:c253139a5c873edf75a3b17ec9d8b6caebee072fdb489594bc64e35115df7625", size = 1738054, upload_time = "2025-11-17T17:21:43.328Z" }, + { url = "https://files.pythonhosted.org/packages/45/c4/44e3163d48e398efb3748481656963ac6265c14288012871c921dc81d004/thinc-8.3.10-cp314-cp314-win_arm64.whl", hash = "sha256:ad6da67f534995d6ec257f16665377d7ad95bef5c1b1c89618fd4528657a6f24", size = 1665001, upload_time = "2025-11-17T17:21:45.019Z" }, ] [[package]] name = "threadpoolctl" version = "3.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload_time = "2025-03-13T13:49:23.031Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 }, + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload_time = "2025-03-13T13:49:21.846Z" }, ] [[package]] @@ -3106,43 +3210,43 @@ dependencies = [ { name = "regex" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728 }, - { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049 }, - { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008 }, - { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665 }, - { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230 }, - { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688 }, - { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694 }, - { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802 }, - { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995 }, - { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948 }, - { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986 }, - { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222 }, - { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097 }, - { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117 }, - { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309 }, - { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712 }, - { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725 }, - { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875 }, - { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451 }, - { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794 }, - { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777 }, - { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188 }, - { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978 }, - { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271 }, - { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216 }, - { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860 }, - { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567 }, - { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067 }, - { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473 }, - { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855 }, - { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022 }, - { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736 }, - { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908 }, - { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706 }, - { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667 }, +sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload_time = "2025-10-06T20:22:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload_time = "2025-10-06T20:21:52.756Z" }, + { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload_time = "2025-10-06T20:21:53.782Z" }, + { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload_time = "2025-10-06T20:21:54.832Z" }, + { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload_time = "2025-10-06T20:21:56.129Z" }, + { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload_time = "2025-10-06T20:21:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload_time = "2025-10-06T20:21:58.619Z" }, + { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload_time = "2025-10-06T20:21:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload_time = "2025-10-06T20:22:00.96Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload_time = "2025-10-06T20:22:02.788Z" }, + { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload_time = "2025-10-06T20:22:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload_time = "2025-10-06T20:22:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload_time = "2025-10-06T20:22:06.265Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload_time = "2025-10-06T20:22:07.403Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload_time = "2025-10-06T20:22:08.418Z" }, + { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload_time = "2025-10-06T20:22:10.939Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload_time = "2025-10-06T20:22:12.115Z" }, + { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload_time = "2025-10-06T20:22:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload_time = "2025-10-06T20:22:14.559Z" }, + { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload_time = "2025-10-06T20:22:15.545Z" }, + { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload_time = "2025-10-06T20:22:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload_time = "2025-10-06T20:22:18.036Z" }, + { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload_time = "2025-10-06T20:22:19.563Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload_time = "2025-10-06T20:22:20.702Z" }, + { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload_time = "2025-10-06T20:22:22.06Z" }, + { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload_time = "2025-10-06T20:22:23.085Z" }, + { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload_time = "2025-10-06T20:22:24.602Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload_time = "2025-10-06T20:22:25.671Z" }, + { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload_time = "2025-10-06T20:22:26.753Z" }, + { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload_time = "2025-10-06T20:22:27.775Z" }, + { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload_time = "2025-10-06T20:22:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload_time = "2025-10-06T20:22:29.981Z" }, + { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload_time = "2025-10-06T20:22:30.996Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload_time = "2025-10-06T20:22:32.073Z" }, + { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload_time = "2025-10-06T20:22:33.385Z" }, + { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload_time = "2025-10-06T20:22:34.444Z" }, ] [[package]] @@ -3152,23 +3256,23 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115 } +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload_time = "2026-01-05T10:45:15.988Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275 }, - { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472 }, - { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736 }, - { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835 }, - { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673 }, - { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818 }, - { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195 }, - { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982 }, - { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245 }, - { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069 }, - { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263 }, - { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429 }, - { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363 }, - { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786 }, - { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133 }, + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload_time = "2026-01-05T10:41:02.158Z" }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload_time = "2026-01-05T10:41:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload_time = "2026-01-05T10:40:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload_time = "2026-01-05T10:40:38.847Z" }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload_time = "2026-01-05T10:40:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload_time = "2026-01-05T10:40:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload_time = "2026-01-05T10:40:51.139Z" }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload_time = "2026-01-05T10:40:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload_time = "2026-01-05T10:41:04.053Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload_time = "2026-01-05T10:45:10.673Z" }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload_time = "2026-01-05T10:45:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload_time = "2026-01-05T10:45:14.333Z" }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload_time = "2026-01-05T10:45:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload_time = "2026-01-05T10:45:18.411Z" }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload_time = "2026-01-05T10:45:17.232Z" }, ] [[package]] @@ -3176,54 +3280,54 @@ name = "torch" version = "2.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "setuptools" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "typing-extensions" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/2f/0b295dd8d199ef71e6f176f576473d645d41357b7b8aa978cc6b042575df/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6abb224c2b6e9e27b592a1c0015c33a504b00a0e0938f1499f7f514e9b7bfb5c", size = 79498197 }, - { url = "https://files.pythonhosted.org/packages/a4/1b/af5fccb50c341bd69dc016769503cb0857c1423fbe9343410dfeb65240f2/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7350f6652dfd761f11f9ecb590bfe95b573e2961f7a242eccb3c8e78348d26fe", size = 79498248 }, - { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088 }, - { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952 }, - { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972 }, - { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198 }, - { url = "https://files.pythonhosted.org/packages/c9/6f/f2e91e34e3fcba2e3fc8d8f74e7d6c22e74e480bbd1db7bc8900fdf3e95c/torch-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5c4d217b14741e40776dd7074d9006fd28b8a97ef5654db959d8635b2fe5f29b", size = 146004247 }, - { url = "https://files.pythonhosted.org/packages/98/fb/5160261aeb5e1ee12ee95fe599d0541f7c976c3701d607d8fc29e623229f/torch-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6b71486353fce0f9714ca0c9ef1c850a2ae766b409808acd58e9678a3edb7738", size = 915716445 }, - { url = "https://files.pythonhosted.org/packages/6a/16/502fb1b41e6d868e8deb5b0e3ae926bbb36dab8ceb0d1b769b266ad7b0c3/torch-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2ee399c644dc92ef7bc0d4f7e74b5360c37cdbe7c5ba11318dda49ffac2bc57", size = 113757050 }, - { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305 }, - { url = "https://files.pythonhosted.org/packages/d8/14/21fbce63bc452381ba5f74a2c0a959fdf5ad5803ccc0c654e752e0dbe91a/torch-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:aae1b29cd68e50a9397f5ee897b9c24742e9e306f88a807a27d617f07adb3bd8", size = 146005472 }, - { url = "https://files.pythonhosted.org/packages/54/fd/b207d1c525cb570ef47f3e9f836b154685011fce11a2f444ba8a4084d042/torch-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6021db85958db2f07ec94e1bc77212721ba4920c12a18dc552d2ae36a3eb163f", size = 915612644 }, - { url = "https://files.pythonhosted.org/packages/36/53/0197f868c75f1050b199fe58f9bf3bf3aecac9b4e85cc9c964383d745403/torch-2.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff43db38af76fda183156153983c9a096fc4c78d0cd1e07b14a2314c7f01c2c8", size = 113997015 }, - { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248 }, - { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992 }, - { url = "https://files.pythonhosted.org/packages/69/2b/51e663ff190c9d16d4a8271203b71bc73a16aa7619b9f271a69b9d4a936b/torch-2.10.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:233aed0659a2503b831d8a67e9da66a62c996204c0bba4f4c442ccc0c68a3f60", size = 146018567 }, - { url = "https://files.pythonhosted.org/packages/5e/cd/4b95ef7f293b927c283db0b136c42be91c8ec6845c44de0238c8c23bdc80/torch-2.10.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:682497e16bdfa6efeec8cde66531bc8d1fbbbb4d8788ec6173c089ed3cc2bfe5", size = 915721646 }, - { url = "https://files.pythonhosted.org/packages/56/97/078a007208f8056d88ae43198833469e61a0a355abc0b070edd2c085eb9a/torch-2.10.0-cp314-cp314-win_amd64.whl", hash = "sha256:6528f13d2a8593a1a412ea07a99812495bec07e9224c28b2a25c0a30c7da025c", size = 113752373 }, - { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324 }, - { url = "https://files.pythonhosted.org/packages/e2/65/1a05346b418ea8ccd10360eef4b3e0ce688fba544e76edec26913a8d0ee0/torch-2.10.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:716b01a176c2a5659c98f6b01bf868244abdd896526f1c692712ab36dbaf9b63", size = 146006482 }, - { url = "https://files.pythonhosted.org/packages/1d/b9/5f6f9d9e859fc3235f60578fa64f52c9c6e9b4327f0fe0defb6de5c0de31/torch-2.10.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d8f5912ba938233f86361e891789595ff35ca4b4e2ac8fe3670895e5976731d6", size = 915613050 }, - { url = "https://files.pythonhosted.org/packages/66/4d/35352043ee0eaffdeff154fad67cd4a31dbed7ff8e3be1cc4549717d6d51/torch-2.10.0-cp314-cp314t-win_amd64.whl", hash = "sha256:71283a373f0ee2c89e0f0d5f446039bdabe8dbc3c9ccf35f0f784908b0acd185", size = 113995816 }, + { url = "https://files.pythonhosted.org/packages/c9/2f/0b295dd8d199ef71e6f176f576473d645d41357b7b8aa978cc6b042575df/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6abb224c2b6e9e27b592a1c0015c33a504b00a0e0938f1499f7f514e9b7bfb5c", size = 79498197, upload_time = "2026-02-06T17:37:27.627Z" }, + { url = "https://files.pythonhosted.org/packages/a4/1b/af5fccb50c341bd69dc016769503cb0857c1423fbe9343410dfeb65240f2/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7350f6652dfd761f11f9ecb590bfe95b573e2961f7a242eccb3c8e78348d26fe", size = 79498248, upload_time = "2026-02-06T17:37:31.982Z" }, + { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload_time = "2026-01-21T16:24:44.171Z" }, + { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload_time = "2026-01-21T16:23:53.503Z" }, + { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload_time = "2026-01-21T16:24:39.516Z" }, + { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload_time = "2026-01-21T16:24:34.704Z" }, + { url = "https://files.pythonhosted.org/packages/c9/6f/f2e91e34e3fcba2e3fc8d8f74e7d6c22e74e480bbd1db7bc8900fdf3e95c/torch-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5c4d217b14741e40776dd7074d9006fd28b8a97ef5654db959d8635b2fe5f29b", size = 146004247, upload_time = "2026-01-21T16:24:29.335Z" }, + { url = "https://files.pythonhosted.org/packages/98/fb/5160261aeb5e1ee12ee95fe599d0541f7c976c3701d607d8fc29e623229f/torch-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6b71486353fce0f9714ca0c9ef1c850a2ae766b409808acd58e9678a3edb7738", size = 915716445, upload_time = "2026-01-21T16:22:45.353Z" }, + { url = "https://files.pythonhosted.org/packages/6a/16/502fb1b41e6d868e8deb5b0e3ae926bbb36dab8ceb0d1b769b266ad7b0c3/torch-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2ee399c644dc92ef7bc0d4f7e74b5360c37cdbe7c5ba11318dda49ffac2bc57", size = 113757050, upload_time = "2026-01-21T16:24:19.204Z" }, + { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload_time = "2026-01-21T16:24:09.209Z" }, + { url = "https://files.pythonhosted.org/packages/d8/14/21fbce63bc452381ba5f74a2c0a959fdf5ad5803ccc0c654e752e0dbe91a/torch-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:aae1b29cd68e50a9397f5ee897b9c24742e9e306f88a807a27d617f07adb3bd8", size = 146005472, upload_time = "2026-01-21T16:22:29.022Z" }, + { url = "https://files.pythonhosted.org/packages/54/fd/b207d1c525cb570ef47f3e9f836b154685011fce11a2f444ba8a4084d042/torch-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6021db85958db2f07ec94e1bc77212721ba4920c12a18dc552d2ae36a3eb163f", size = 915612644, upload_time = "2026-01-21T16:21:47.019Z" }, + { url = "https://files.pythonhosted.org/packages/36/53/0197f868c75f1050b199fe58f9bf3bf3aecac9b4e85cc9c964383d745403/torch-2.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff43db38af76fda183156153983c9a096fc4c78d0cd1e07b14a2314c7f01c2c8", size = 113997015, upload_time = "2026-01-21T16:23:00.767Z" }, + { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload_time = "2026-01-21T16:23:09.315Z" }, + { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992, upload_time = "2026-01-21T16:23:05.162Z" }, + { url = "https://files.pythonhosted.org/packages/69/2b/51e663ff190c9d16d4a8271203b71bc73a16aa7619b9f271a69b9d4a936b/torch-2.10.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:233aed0659a2503b831d8a67e9da66a62c996204c0bba4f4c442ccc0c68a3f60", size = 146018567, upload_time = "2026-01-21T16:22:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/5e/cd/4b95ef7f293b927c283db0b136c42be91c8ec6845c44de0238c8c23bdc80/torch-2.10.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:682497e16bdfa6efeec8cde66531bc8d1fbbbb4d8788ec6173c089ed3cc2bfe5", size = 915721646, upload_time = "2026-01-21T16:21:16.983Z" }, + { url = "https://files.pythonhosted.org/packages/56/97/078a007208f8056d88ae43198833469e61a0a355abc0b070edd2c085eb9a/torch-2.10.0-cp314-cp314-win_amd64.whl", hash = "sha256:6528f13d2a8593a1a412ea07a99812495bec07e9224c28b2a25c0a30c7da025c", size = 113752373, upload_time = "2026-01-21T16:22:13.471Z" }, + { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324, upload_time = "2026-01-21T16:22:09.494Z" }, + { url = "https://files.pythonhosted.org/packages/e2/65/1a05346b418ea8ccd10360eef4b3e0ce688fba544e76edec26913a8d0ee0/torch-2.10.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:716b01a176c2a5659c98f6b01bf868244abdd896526f1c692712ab36dbaf9b63", size = 146006482, upload_time = "2026-01-21T16:22:18.42Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b9/5f6f9d9e859fc3235f60578fa64f52c9c6e9b4327f0fe0defb6de5c0de31/torch-2.10.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d8f5912ba938233f86361e891789595ff35ca4b4e2ac8fe3670895e5976731d6", size = 915613050, upload_time = "2026-01-21T16:20:49.035Z" }, + { url = "https://files.pythonhosted.org/packages/66/4d/35352043ee0eaffdeff154fad67cd4a31dbed7ff8e3be1cc4549717d6d51/torch-2.10.0-cp314-cp314t-win_amd64.whl", hash = "sha256:71283a373f0ee2c89e0f0d5f446039bdabe8dbc3c9ccf35f0f784908b0acd185", size = 113995816, upload_time = "2026-01-21T16:22:05.312Z" }, ] [[package]] @@ -3236,26 +3340,26 @@ dependencies = [ { name = "torch" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920 }, - { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556 }, - { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351 }, - { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059 }, - { url = "https://files.pythonhosted.org/packages/f5/5b/1562a04a6a5a4cf8cf40016a0cdeda91ede75d6962cff7f809a85ae966a5/torchvision-0.25.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:24e11199e4d84ba9c5ee7825ebdf1cd37ce8deec225117f10243cae984ced3ec", size = 1874918 }, - { url = "https://files.pythonhosted.org/packages/36/b1/3d6c42f62c272ce34fcce609bb8939bdf873dab5f1b798fd4e880255f129/torchvision-0.25.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f271136d2d2c0b7a24c5671795c6e4fd8da4e0ea98aeb1041f62bc04c4370ef", size = 2309106 }, - { url = "https://files.pythonhosted.org/packages/c7/60/59bb9c8b67cce356daeed4cb96a717caa4f69c9822f72e223a0eae7a9bd9/torchvision-0.25.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:855c0dc6d37f462482da7531c6788518baedca1e0847f3df42a911713acdfe52", size = 8071522 }, - { url = "https://files.pythonhosted.org/packages/32/a5/9a9b1de0720f884ea50dbf9acb22cbe5312e51d7b8c4ac6ba9b51efd9bba/torchvision-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:cef0196be31be421f6f462d1e9da1101be7332d91984caa6f8022e6c78a5877f", size = 4321911 }, - { url = "https://files.pythonhosted.org/packages/52/99/dca81ed21ebaeff2b67cc9f815a20fdaa418b69f5f9ea4c6ed71721470db/torchvision-0.25.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a8f8061284395ce31bcd460f2169013382ccf411148ceb2ee38e718e9860f5a7", size = 1896209 }, - { url = "https://files.pythonhosted.org/packages/28/cc/2103149761fdb4eaed58a53e8437b2d716d48f05174fab1d9fcf1e2a2244/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:146d02c9876858420adf41f3189fe90e3d6a409cbfa65454c09f25fb33bf7266", size = 2310735 }, - { url = "https://files.pythonhosted.org/packages/76/ad/f4c985ad52ddd3b22711c588501be1b330adaeaf6850317f66751711b78c/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c4d395cb2c4a2712f6eb93a34476cdf7aae74bb6ea2ea1917f858e96344b00aa", size = 8089557 }, - { url = "https://files.pythonhosted.org/packages/63/cc/0ea68b5802e5e3c31f44b307e74947bad5a38cc655231d845534ed50ddb8/torchvision-0.25.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5e6b449e9fa7d642142c0e27c41e5a43b508d57ed8e79b7c0a0c28652da8678c", size = 4344260 }, - { url = "https://files.pythonhosted.org/packages/9e/1f/fa839532660e2602b7e704d65010787c5bb296258b44fa8b9c1cd6175e7d/torchvision-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:620a236288d594dcec7634c754484542dc0a5c1b0e0b83a34bda5e91e9b7c3a1", size = 1896193 }, - { url = "https://files.pythonhosted.org/packages/80/ed/d51889da7ceaf5ff7a0574fb28f9b6b223df19667265395891f81b364ab3/torchvision-0.25.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b5e7f50002a8145a98c5694a018e738c50e2972608310c7e88e1bd4c058f6ce", size = 2309331 }, - { url = "https://files.pythonhosted.org/packages/90/a5/f93fcffaddd8f12f9e812256830ec9c9ca65abbf1bc369379f9c364d1ff4/torchvision-0.25.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:632db02300e83793812eee4f61ae6a2686dab10b4cfd628b620dc47747aa9d03", size = 8088713 }, - { url = "https://files.pythonhosted.org/packages/1f/eb/d0096eed5690d962853213f2ee00d91478dfcb586b62dbbb449fb8abc3a6/torchvision-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:d1abd5ed030c708f5dbf4812ad5f6fbe9384b63c40d6bd79f8df41a4a759a917", size = 4325058 }, - { url = "https://files.pythonhosted.org/packages/97/36/96374a4c7ab50dea9787ce987815614ccfe988a42e10ac1a2e3e5b60319a/torchvision-0.25.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ad9a8a5877782944d99186e4502a614770fe906626d76e9cd32446a0ac3075f2", size = 1896207 }, - { url = "https://files.pythonhosted.org/packages/b5/e2/7abb10a867db79b226b41da419b63b69c0bd5b82438c4a4ed50e084c552f/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:40a122c3cf4d14b651f095e0f672b688dde78632783fc5cd3d4d5e4f6a828563", size = 2310741 }, - { url = "https://files.pythonhosted.org/packages/08/e6/0927784e6ffc340b6676befde1c60260bd51641c9c574b9298d791a9cda4/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:846890161b825b38aa85fc37fb3ba5eea74e7091ff28bab378287111483b6443", size = 8089772 }, - { url = "https://files.pythonhosted.org/packages/b6/37/e7ca4ec820d434c0f23f824eb29f0676a0c3e7a118f1514f5b949c3356da/torchvision-0.25.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f07f01d27375ad89d72aa2b3f2180f07da95dd9d2e4c758e015c0acb2da72977", size = 4425879 }, + { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920, upload_time = "2026-01-21T16:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556, upload_time = "2026-01-21T16:27:40.125Z" }, + { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351, upload_time = "2026-01-21T16:27:21.074Z" }, + { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059, upload_time = "2026-01-21T16:27:11.08Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5b/1562a04a6a5a4cf8cf40016a0cdeda91ede75d6962cff7f809a85ae966a5/torchvision-0.25.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:24e11199e4d84ba9c5ee7825ebdf1cd37ce8deec225117f10243cae984ced3ec", size = 1874918, upload_time = "2026-01-21T16:27:39.02Z" }, + { url = "https://files.pythonhosted.org/packages/36/b1/3d6c42f62c272ce34fcce609bb8939bdf873dab5f1b798fd4e880255f129/torchvision-0.25.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f271136d2d2c0b7a24c5671795c6e4fd8da4e0ea98aeb1041f62bc04c4370ef", size = 2309106, upload_time = "2026-01-21T16:27:30.624Z" }, + { url = "https://files.pythonhosted.org/packages/c7/60/59bb9c8b67cce356daeed4cb96a717caa4f69c9822f72e223a0eae7a9bd9/torchvision-0.25.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:855c0dc6d37f462482da7531c6788518baedca1e0847f3df42a911713acdfe52", size = 8071522, upload_time = "2026-01-21T16:27:29.392Z" }, + { url = "https://files.pythonhosted.org/packages/32/a5/9a9b1de0720f884ea50dbf9acb22cbe5312e51d7b8c4ac6ba9b51efd9bba/torchvision-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:cef0196be31be421f6f462d1e9da1101be7332d91984caa6f8022e6c78a5877f", size = 4321911, upload_time = "2026-01-21T16:27:35.195Z" }, + { url = "https://files.pythonhosted.org/packages/52/99/dca81ed21ebaeff2b67cc9f815a20fdaa418b69f5f9ea4c6ed71721470db/torchvision-0.25.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a8f8061284395ce31bcd460f2169013382ccf411148ceb2ee38e718e9860f5a7", size = 1896209, upload_time = "2026-01-21T16:27:32.159Z" }, + { url = "https://files.pythonhosted.org/packages/28/cc/2103149761fdb4eaed58a53e8437b2d716d48f05174fab1d9fcf1e2a2244/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:146d02c9876858420adf41f3189fe90e3d6a409cbfa65454c09f25fb33bf7266", size = 2310735, upload_time = "2026-01-21T16:27:22.327Z" }, + { url = "https://files.pythonhosted.org/packages/76/ad/f4c985ad52ddd3b22711c588501be1b330adaeaf6850317f66751711b78c/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c4d395cb2c4a2712f6eb93a34476cdf7aae74bb6ea2ea1917f858e96344b00aa", size = 8089557, upload_time = "2026-01-21T16:27:27.666Z" }, + { url = "https://files.pythonhosted.org/packages/63/cc/0ea68b5802e5e3c31f44b307e74947bad5a38cc655231d845534ed50ddb8/torchvision-0.25.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5e6b449e9fa7d642142c0e27c41e5a43b508d57ed8e79b7c0a0c28652da8678c", size = 4344260, upload_time = "2026-01-21T16:27:17.018Z" }, + { url = "https://files.pythonhosted.org/packages/9e/1f/fa839532660e2602b7e704d65010787c5bb296258b44fa8b9c1cd6175e7d/torchvision-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:620a236288d594dcec7634c754484542dc0a5c1b0e0b83a34bda5e91e9b7c3a1", size = 1896193, upload_time = "2026-01-21T16:27:24.785Z" }, + { url = "https://files.pythonhosted.org/packages/80/ed/d51889da7ceaf5ff7a0574fb28f9b6b223df19667265395891f81b364ab3/torchvision-0.25.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b5e7f50002a8145a98c5694a018e738c50e2972608310c7e88e1bd4c058f6ce", size = 2309331, upload_time = "2026-01-21T16:27:19.97Z" }, + { url = "https://files.pythonhosted.org/packages/90/a5/f93fcffaddd8f12f9e812256830ec9c9ca65abbf1bc369379f9c364d1ff4/torchvision-0.25.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:632db02300e83793812eee4f61ae6a2686dab10b4cfd628b620dc47747aa9d03", size = 8088713, upload_time = "2026-01-21T16:27:15.281Z" }, + { url = "https://files.pythonhosted.org/packages/1f/eb/d0096eed5690d962853213f2ee00d91478dfcb586b62dbbb449fb8abc3a6/torchvision-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:d1abd5ed030c708f5dbf4812ad5f6fbe9384b63c40d6bd79f8df41a4a759a917", size = 4325058, upload_time = "2026-01-21T16:27:26.165Z" }, + { url = "https://files.pythonhosted.org/packages/97/36/96374a4c7ab50dea9787ce987815614ccfe988a42e10ac1a2e3e5b60319a/torchvision-0.25.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ad9a8a5877782944d99186e4502a614770fe906626d76e9cd32446a0ac3075f2", size = 1896207, upload_time = "2026-01-21T16:27:23.383Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e2/7abb10a867db79b226b41da419b63b69c0bd5b82438c4a4ed50e084c552f/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:40a122c3cf4d14b651f095e0f672b688dde78632783fc5cd3d4d5e4f6a828563", size = 2310741, upload_time = "2026-01-21T16:27:18.712Z" }, + { url = "https://files.pythonhosted.org/packages/08/e6/0927784e6ffc340b6676befde1c60260bd51641c9c574b9298d791a9cda4/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:846890161b825b38aa85fc37fb3ba5eea74e7091ff28bab378287111483b6443", size = 8089772, upload_time = "2026-01-21T16:27:14.048Z" }, + { url = "https://files.pythonhosted.org/packages/b6/37/e7ca4ec820d434c0f23f824eb29f0676a0c3e7a118f1514f5b949c3356da/torchvision-0.25.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f07f01d27375ad89d72aa2b3f2180f07da95dd9d2e4c758e015c0acb2da72977", size = 4425879, upload_time = "2026-01-21T16:27:12.579Z" }, ] [[package]] @@ -3263,11 +3367,11 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload_time = "2024-11-24T20:12:22.481Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" }, ] [[package]] @@ -3286,9 +3390,9 @@ dependencies = [ { name = "tokenizers" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912 } +sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload_time = "2026-01-16T10:38:39.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498 }, + { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload_time = "2026-01-16T10:38:31.289Z" }, ] [[package]] @@ -3296,16 +3400,11 @@ name = "triton" version = "3.6.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243 }, - { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850 }, - { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521 }, - { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450 }, - { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087 }, - { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296 }, - { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577 }, - { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063 }, - { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804 }, - { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994 }, + { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload_time = "2026-01-20T16:00:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload_time = "2026-01-20T16:00:49.136Z" }, + { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload_time = "2026-01-20T16:00:56.042Z" }, + { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload_time = "2026-01-20T16:01:07.278Z" }, + { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload_time = "2026-01-20T16:01:14.236Z" }, ] [[package]] @@ -3315,9 +3414,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c7/68/71c1a15b5f65f40e91b65da23b8224dad41349894535a97f63a52e462196/typeguard-4.4.4.tar.gz", hash = "sha256:3a7fd2dffb705d4d0efaed4306a704c89b9dee850b688f060a8b1615a79e5f74", size = 75203 } +sdist = { url = "https://files.pythonhosted.org/packages/c7/68/71c1a15b5f65f40e91b65da23b8224dad41349894535a97f63a52e462196/typeguard-4.4.4.tar.gz", hash = "sha256:3a7fd2dffb705d4d0efaed4306a704c89b9dee850b688f060a8b1615a79e5f74", size = 75203, upload_time = "2025-06-18T09:56:07.624Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/a9/e3aee762739c1d7528da1c3e06d518503f8b6c439c35549b53735ba52ead/typeguard-4.4.4-py3-none-any.whl", hash = "sha256:b5f562281b6bfa1f5492470464730ef001646128b180769880468bd84b68b09e", size = 34874 }, + { url = "https://files.pythonhosted.org/packages/1b/a9/e3aee762739c1d7528da1c3e06d518503f8b6c439c35549b53735ba52ead/typeguard-4.4.4-py3-none-any.whl", hash = "sha256:b5f562281b6bfa1f5492470464730ef001646128b180769880468bd84b68b09e", size = 34874, upload_time = "2025-06-18T09:56:05.999Z" }, ] [[package]] @@ -3328,18 +3427,18 @@ dependencies = [ { name = "click" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478 } +sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload_time = "2026-01-06T11:21:11.176Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444 }, + { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload_time = "2026-01-06T11:21:12.441Z" }, ] [[package]] name = "typing-extensions" version = "4.15.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload_time = "2025-08-25T13:49:26.313Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload_time = "2025-08-25T13:49:24.86Z" }, ] [[package]] @@ -3349,36 +3448,36 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949 } +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload_time = "2025-10-01T02:14:41.687Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload_time = "2025-10-01T02:14:40.154Z" }, ] [[package]] name = "tzdata" version = "2025.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772 } +sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload_time = "2025-12-13T17:45:35.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521 }, + { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload_time = "2025-12-13T17:45:33.889Z" }, ] [[package]] name = "ubelt" version = "1.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/24/cec4c09e785f5fdf0b1c7f930ab30710090dabf746b9beb45296d5c55390/ubelt-1.4.0.tar.gz", hash = "sha256:84e146c1c3ba13a2425eea5d5748bf33ab3f9dbe7ce237eb11e130116ba17441", size = 296335 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/24/cec4c09e785f5fdf0b1c7f930ab30710090dabf746b9beb45296d5c55390/ubelt-1.4.0.tar.gz", hash = "sha256:84e146c1c3ba13a2425eea5d5748bf33ab3f9dbe7ce237eb11e130116ba17441", size = 296335, upload_time = "2025-08-17T03:41:35.672Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2d/dd/035fc8cd0a6311a48912b129cb93a19430d8d1e2bf2e8f03d8e1e5e8eb9b/ubelt-1.4.0-py3-none-any.whl", hash = "sha256:1490f377ba3cd1d65d839fae4c1a0354c5d486f3cb19e0cb434712709fcf49df", size = 233381 }, + { url = "https://files.pythonhosted.org/packages/2d/dd/035fc8cd0a6311a48912b129cb93a19430d8d1e2bf2e8f03d8e1e5e8eb9b/ubelt-1.4.0-py3-none-any.whl", hash = "sha256:1490f377ba3cd1d65d839fae4c1a0354c5d486f3cb19e0cb434712709fcf49df", size = 233381, upload_time = "2025-08-17T03:41:33.428Z" }, ] [[package]] name = "uc-micro-py" version = "1.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/91/7a/146a99696aee0609e3712f2b44c6274566bc368dfe8375191278045186b8/uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a", size = 6043 } +sdist = { url = "https://files.pythonhosted.org/packages/91/7a/146a99696aee0609e3712f2b44c6274566bc368dfe8375191278045186b8/uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a", size = 6043, upload_time = "2024-02-09T16:52:01.654Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/87/1f677586e8ac487e29672e4b17455758fce261de06a0d086167bb760361a/uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5", size = 6229 }, + { url = "https://files.pythonhosted.org/packages/37/87/1f677586e8ac487e29672e4b17455758fce261de06a0d086167bb760361a/uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5", size = 6229, upload_time = "2024-02-09T16:52:00.371Z" }, ] [[package]] @@ -3390,7 +3489,7 @@ dependencies = [ { name = "parameterized" }, { name = "scikit-learn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/28/7a/a9b8f063fca2b3d339bd10d38363d58983e8c79095b81c4c87f3a124e211/uncertainty-calibration-0.1.4.tar.gz", hash = "sha256:e99baf2f2ced29b852eb47c25852e4bcc3fff183befef6c35cc239165c6e2634", size = 15949 } +sdist = { url = "https://files.pythonhosted.org/packages/28/7a/a9b8f063fca2b3d339bd10d38363d58983e8c79095b81c4c87f3a124e211/uncertainty-calibration-0.1.4.tar.gz", hash = "sha256:e99baf2f2ced29b852eb47c25852e4bcc3fff183befef6c35cc239165c6e2634", size = 15949, upload_time = "2022-11-11T20:52:04.295Z" } [[package]] name = "universal-pathlib" @@ -3400,18 +3499,32 @@ dependencies = [ { name = "fsspec" }, { name = "pathlib-abc" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6e/ec/764b0d4593c6a8f5f66b347a19b5db9486dd0024b5e3339d468064a90c76/universal_pathlib-0.3.8.tar.gz", hash = "sha256:ead2b65bca3df6e11c3b7cb36fc9846340bc3c2db4ef57131550260422b0a3e8", size = 258837 } +sdist = { url = "https://files.pythonhosted.org/packages/6e/ec/764b0d4593c6a8f5f66b347a19b5db9486dd0024b5e3339d468064a90c76/universal_pathlib-0.3.8.tar.gz", hash = "sha256:ead2b65bca3df6e11c3b7cb36fc9846340bc3c2db4ef57131550260422b0a3e8", size = 258837, upload_time = "2026-01-11T22:13:53.328Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/86/2c/fc9416619a418e94576aef84ef263906a24f76a21a1c3e96ddae25c82df9/universal_pathlib-0.3.8-py3-none-any.whl", hash = "sha256:dac4fd9a3df918d85bb6da678e794b5dfa9ecdb5ff74675b497553dbe50134b8", size = 82608 }, + { url = "https://files.pythonhosted.org/packages/86/2c/fc9416619a418e94576aef84ef263906a24f76a21a1c3e96ddae25c82df9/universal_pathlib-0.3.8-py3-none-any.whl", hash = "sha256:dac4fd9a3df918d85bb6da678e794b5dfa9ecdb5ff74675b497553dbe50134b8", size = 82608, upload_time = "2026-01-11T22:13:51.313Z" }, ] [[package]] name = "urllib3" version = "2.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556 } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload_time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload_time = "2026-01-07T16:24:42.685Z" }, +] + +[[package]] +name = "virtualenv" +version = "20.36.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/a3/4d310fa5f00863544e1d0f4de93bddec248499ccf97d4791bc3122c9d4f3/virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba", size = 6032239, upload_time = "2026-01-09T18:21:01.296Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, + { url = "https://files.pythonhosted.org/packages/6a/2a/dc2228b2888f51192c7dc766106cd475f1b768c10caaf9727659726f7391/virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f", size = 6008258, upload_time = "2026-01-09T18:20:59.425Z" }, ] [[package]] @@ -3421,9 +3534,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/f9/054e6e2f1071e963b5e746b48d1e3727470b2a490834d18ad92364929db3/wasabi-1.1.3.tar.gz", hash = "sha256:4bb3008f003809db0c3e28b4daf20906ea871a2bb43f9914197d540f4f2e0878", size = 30391 } +sdist = { url = "https://files.pythonhosted.org/packages/ac/f9/054e6e2f1071e963b5e746b48d1e3727470b2a490834d18ad92364929db3/wasabi-1.1.3.tar.gz", hash = "sha256:4bb3008f003809db0c3e28b4daf20906ea871a2bb43f9914197d540f4f2e0878", size = 30391, upload_time = "2024-05-31T16:56:18.99Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880 }, + { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880, upload_time = "2024-05-31T16:56:16.699Z" }, ] [[package]] @@ -3441,141 +3554,141 @@ dependencies = [ { name = "typer-slim" }, { name = "wasabi" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/09/d7/edd9c24e60cf8e5de130aa2e8af3b01521f4d0216c371d01212f580d0d8e/weasel-0.4.3.tar.gz", hash = "sha256:f293d6174398e8f478c78481e00c503ee4b82ea7a3e6d0d6a01e46a6b1396845", size = 38733 } +sdist = { url = "https://files.pythonhosted.org/packages/09/d7/edd9c24e60cf8e5de130aa2e8af3b01521f4d0216c371d01212f580d0d8e/weasel-0.4.3.tar.gz", hash = "sha256:f293d6174398e8f478c78481e00c503ee4b82ea7a3e6d0d6a01e46a6b1396845", size = 38733, upload_time = "2025-11-13T23:52:28.193Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/74/a148b41572656904a39dfcfed3f84dd1066014eed94e209223ae8e9d088d/weasel-0.4.3-py3-none-any.whl", hash = "sha256:08f65b5d0dbded4879e08a64882de9b9514753d9eaa4c4e2a576e33666ac12cf", size = 50757 }, + { url = "https://files.pythonhosted.org/packages/a4/74/a148b41572656904a39dfcfed3f84dd1066014eed94e209223ae8e9d088d/weasel-0.4.3-py3-none-any.whl", hash = "sha256:08f65b5d0dbded4879e08a64882de9b9514753d9eaa4c4e2a576e33666ac12cf", size = 50757, upload_time = "2025-11-13T23:52:26.982Z" }, ] [[package]] name = "wrapt" version = "1.17.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998 }, - { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020 }, - { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098 }, - { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036 }, - { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156 }, - { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102 }, - { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732 }, - { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705 }, - { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877 }, - { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885 }, - { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003 }, - { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025 }, - { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108 }, - { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072 }, - { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214 }, - { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105 }, - { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766 }, - { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711 }, - { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885 }, - { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896 }, - { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132 }, - { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091 }, - { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172 }, - { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163 }, - { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963 }, - { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945 }, - { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857 }, - { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178 }, - { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310 }, - { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266 }, - { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544 }, - { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283 }, - { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366 }, - { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571 }, - { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094 }, - { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659 }, - { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946 }, - { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717 }, - { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334 }, - { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471 }, - { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591 }, +sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload_time = "2025-08-12T05:53:21.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload_time = "2025-08-12T05:51:47.138Z" }, + { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload_time = "2025-08-12T05:51:35.906Z" }, + { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload_time = "2025-08-12T05:51:57.474Z" }, + { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload_time = "2025-08-12T05:52:34.784Z" }, + { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload_time = "2025-08-12T05:52:13.599Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload_time = "2025-08-12T05:52:14.56Z" }, + { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload_time = "2025-08-12T05:52:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload_time = "2025-08-12T05:53:07.123Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload_time = "2025-08-12T05:53:05.436Z" }, + { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload_time = "2025-08-12T05:52:54.367Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload_time = "2025-08-12T05:51:48.627Z" }, + { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload_time = "2025-08-12T05:51:37.156Z" }, + { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload_time = "2025-08-12T05:51:58.425Z" }, + { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload_time = "2025-08-12T05:52:37.53Z" }, + { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload_time = "2025-08-12T05:52:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload_time = "2025-08-12T05:52:17.914Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload_time = "2025-08-12T05:52:39.243Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload_time = "2025-08-12T05:53:10.074Z" }, + { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload_time = "2025-08-12T05:53:08.695Z" }, + { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload_time = "2025-08-12T05:52:55.34Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload_time = "2025-08-12T05:51:49.864Z" }, + { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload_time = "2025-08-12T05:51:38.935Z" }, + { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload_time = "2025-08-12T05:51:59.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload_time = "2025-08-12T05:52:40.965Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload_time = "2025-08-12T05:52:20.326Z" }, + { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload_time = "2025-08-12T05:52:21.581Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload_time = "2025-08-12T05:52:43.043Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload_time = "2025-08-12T05:53:12.605Z" }, + { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload_time = "2025-08-12T05:53:11.106Z" }, + { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload_time = "2025-08-12T05:52:56.531Z" }, + { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload_time = "2025-08-12T05:51:51.109Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload_time = "2025-08-12T05:51:39.912Z" }, + { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload_time = "2025-08-12T05:52:00.693Z" }, + { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload_time = "2025-08-12T05:52:44.521Z" }, + { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload_time = "2025-08-12T05:52:22.618Z" }, + { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload_time = "2025-08-12T05:52:24.057Z" }, + { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload_time = "2025-08-12T05:52:45.976Z" }, + { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload_time = "2025-08-12T05:53:15.214Z" }, + { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload_time = "2025-08-12T05:53:14.178Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload_time = "2025-08-12T05:52:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload_time = "2025-08-12T05:53:20.674Z" }, ] [[package]] name = "xxhash" version = "3.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744 }, - { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816 }, - { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035 }, - { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914 }, - { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163 }, - { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411 }, - { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883 }, - { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392 }, - { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898 }, - { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655 }, - { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001 }, - { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431 }, - { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617 }, - { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534 }, - { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876 }, - { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738 }, - { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821 }, - { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127 }, - { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975 }, - { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241 }, - { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471 }, - { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936 }, - { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440 }, - { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990 }, - { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689 }, - { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068 }, - { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495 }, - { url = "https://files.pythonhosted.org/packages/e9/3a/6797e0114c21d1725e2577508e24006fd7ff1d8c0c502d3b52e45c1771d8/xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799", size = 30620 }, - { url = "https://files.pythonhosted.org/packages/86/15/9bc32671e9a38b413a76d24722a2bf8784a132c043063a8f5152d390b0f9/xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392", size = 31542 }, - { url = "https://files.pythonhosted.org/packages/39/c5/cc01e4f6188656e56112d6a8e0dfe298a16934b8c47a247236549a3f7695/xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6", size = 27880 }, - { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956 }, - { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072 }, - { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409 }, - { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736 }, - { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833 }, - { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348 }, - { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070 }, - { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907 }, - { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839 }, - { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304 }, - { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930 }, - { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787 }, - { url = "https://files.pythonhosted.org/packages/19/fa/0172e350361d61febcea941b0cc541d6e6c8d65d153e85f850a7b256ff8a/xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec", size = 30916 }, - { url = "https://files.pythonhosted.org/packages/ad/e6/e8cf858a2b19d6d45820f072eff1bea413910592ff17157cabc5f1227a16/xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8", size = 31799 }, - { url = "https://files.pythonhosted.org/packages/56/15/064b197e855bfb7b343210e82490ae672f8bc7cdf3ddb02e92f64304ee8a/xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746", size = 28044 }, - { url = "https://files.pythonhosted.org/packages/7e/5e/0138bc4484ea9b897864d59fce9be9086030825bc778b76cb5a33a906d37/xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e", size = 32754 }, - { url = "https://files.pythonhosted.org/packages/18/d7/5dac2eb2ec75fd771957a13e5dda560efb2176d5203f39502a5fc571f899/xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405", size = 30846 }, - { url = "https://files.pythonhosted.org/packages/fe/71/8bc5be2bb00deb5682e92e8da955ebe5fa982da13a69da5a40a4c8db12fb/xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3", size = 194343 }, - { url = "https://files.pythonhosted.org/packages/e7/3b/52badfb2aecec2c377ddf1ae75f55db3ba2d321c5e164f14461c90837ef3/xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6", size = 213074 }, - { url = "https://files.pythonhosted.org/packages/a2/2b/ae46b4e9b92e537fa30d03dbc19cdae57ed407e9c26d163895e968e3de85/xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063", size = 212388 }, - { url = "https://files.pythonhosted.org/packages/f5/80/49f88d3afc724b4ac7fbd664c8452d6db51b49915be48c6982659e0e7942/xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7", size = 445614 }, - { url = "https://files.pythonhosted.org/packages/ed/ba/603ce3961e339413543d8cd44f21f2c80e2a7c5cfe692a7b1f2cccf58f3c/xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b", size = 194024 }, - { url = "https://files.pythonhosted.org/packages/78/d1/8e225ff7113bf81545cfdcd79eef124a7b7064a0bba53605ff39590b95c2/xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd", size = 210541 }, - { url = "https://files.pythonhosted.org/packages/6f/58/0f89d149f0bad89def1a8dd38feb50ccdeb643d9797ec84707091d4cb494/xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0", size = 198305 }, - { url = "https://files.pythonhosted.org/packages/11/38/5eab81580703c4df93feb5f32ff8fa7fe1e2c51c1f183ee4e48d4bb9d3d7/xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152", size = 210848 }, - { url = "https://files.pythonhosted.org/packages/5e/6b/953dc4b05c3ce678abca756416e4c130d2382f877a9c30a20d08ee6a77c0/xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11", size = 414142 }, - { url = "https://files.pythonhosted.org/packages/08/a9/238ec0d4e81a10eb5026d4a6972677cbc898ba6c8b9dbaec12ae001b1b35/xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5", size = 191547 }, - { url = "https://files.pythonhosted.org/packages/f1/ee/3cf8589e06c2164ac77c3bf0aa127012801128f1feebf2a079272da5737c/xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f", size = 31214 }, - { url = "https://files.pythonhosted.org/packages/02/5d/a19552fbc6ad4cb54ff953c3908bbc095f4a921bc569433d791f755186f1/xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad", size = 32290 }, - { url = "https://files.pythonhosted.org/packages/b1/11/dafa0643bc30442c887b55baf8e73353a344ee89c1901b5a5c54a6c17d39/xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679", size = 28795 }, - { url = "https://files.pythonhosted.org/packages/2c/db/0e99732ed7f64182aef4a6fb145e1a295558deec2a746265dcdec12d191e/xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4", size = 32955 }, - { url = "https://files.pythonhosted.org/packages/55/f4/2a7c3c68e564a099becfa44bb3d398810cc0ff6749b0d3cb8ccb93f23c14/xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67", size = 31072 }, - { url = "https://files.pythonhosted.org/packages/c6/d9/72a29cddc7250e8a5819dad5d466facb5dc4c802ce120645630149127e73/xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad", size = 196579 }, - { url = "https://files.pythonhosted.org/packages/63/93/b21590e1e381040e2ca305a884d89e1c345b347404f7780f07f2cdd47ef4/xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b", size = 215854 }, - { url = "https://files.pythonhosted.org/packages/ce/b8/edab8a7d4fa14e924b29be877d54155dcbd8b80be85ea00d2be3413a9ed4/xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b", size = 214965 }, - { url = "https://files.pythonhosted.org/packages/27/67/dfa980ac7f0d509d54ea0d5a486d2bb4b80c3f1bb22b66e6a05d3efaf6c0/xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca", size = 448484 }, - { url = "https://files.pythonhosted.org/packages/8c/63/8ffc2cc97e811c0ca5d00ab36604b3ea6f4254f20b7bc658ca825ce6c954/xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a", size = 196162 }, - { url = "https://files.pythonhosted.org/packages/4b/77/07f0e7a3edd11a6097e990f6e5b815b6592459cb16dae990d967693e6ea9/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99", size = 213007 }, - { url = "https://files.pythonhosted.org/packages/ae/d8/bc5fa0d152837117eb0bef6f83f956c509332ce133c91c63ce07ee7c4873/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3", size = 200956 }, - { url = "https://files.pythonhosted.org/packages/26/a5/d749334130de9411783873e9b98ecc46688dad5db64ca6e04b02acc8b473/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6", size = 213401 }, - { url = "https://files.pythonhosted.org/packages/89/72/abed959c956a4bfc72b58c0384bb7940663c678127538634d896b1195c10/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93", size = 417083 }, - { url = "https://files.pythonhosted.org/packages/0c/b3/62fd2b586283b7d7d665fb98e266decadf31f058f1cf6c478741f68af0cb/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518", size = 193913 }, - { url = "https://files.pythonhosted.org/packages/9a/9a/c19c42c5b3f5a4aad748a6d5b4f23df3bed7ee5445accc65a0fb3ff03953/xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119", size = 31586 }, - { url = "https://files.pythonhosted.org/packages/03/d6/4cc450345be9924fd5dc8c590ceda1db5b43a0a889587b0ae81a95511360/xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f", size = 32526 }, - { url = "https://files.pythonhosted.org/packages/0f/c9/7243eb3f9eaabd1a88a5a5acadf06df2d83b100c62684b7425c6a11bcaa8/xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95", size = 28898 }, +sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload_time = "2025-10-02T14:37:08.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload_time = "2025-10-02T14:34:34.622Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload_time = "2025-10-02T14:34:36.043Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload_time = "2025-10-02T14:34:37.354Z" }, + { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload_time = "2025-10-02T14:34:38.6Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload_time = "2025-10-02T14:34:39.872Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload_time = "2025-10-02T14:34:41.569Z" }, + { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload_time = "2025-10-02T14:34:43.249Z" }, + { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload_time = "2025-10-02T14:34:45.042Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload_time = "2025-10-02T14:34:46.302Z" }, + { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload_time = "2025-10-02T14:34:47.571Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload_time = "2025-10-02T14:34:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload_time = "2025-10-02T14:34:50.798Z" }, + { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload_time = "2025-10-02T14:34:51.954Z" }, + { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload_time = "2025-10-02T14:34:53.276Z" }, + { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload_time = "2025-10-02T14:34:54.371Z" }, + { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738, upload_time = "2025-10-02T14:34:55.839Z" }, + { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821, upload_time = "2025-10-02T14:34:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127, upload_time = "2025-10-02T14:34:59.21Z" }, + { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975, upload_time = "2025-10-02T14:35:00.816Z" }, + { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload_time = "2025-10-02T14:35:02.207Z" }, + { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload_time = "2025-10-02T14:35:03.61Z" }, + { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload_time = "2025-10-02T14:35:05.013Z" }, + { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440, upload_time = "2025-10-02T14:35:06.239Z" }, + { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990, upload_time = "2025-10-02T14:35:07.735Z" }, + { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload_time = "2025-10-02T14:35:09.438Z" }, + { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload_time = "2025-10-02T14:35:11.162Z" }, + { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload_time = "2025-10-02T14:35:12.971Z" }, + { url = "https://files.pythonhosted.org/packages/e9/3a/6797e0114c21d1725e2577508e24006fd7ff1d8c0c502d3b52e45c1771d8/xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799", size = 30620, upload_time = "2025-10-02T14:35:14.129Z" }, + { url = "https://files.pythonhosted.org/packages/86/15/9bc32671e9a38b413a76d24722a2bf8784a132c043063a8f5152d390b0f9/xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392", size = 31542, upload_time = "2025-10-02T14:35:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/39/c5/cc01e4f6188656e56112d6a8e0dfe298a16934b8c47a247236549a3f7695/xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6", size = 27880, upload_time = "2025-10-02T14:35:16.315Z" }, + { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956, upload_time = "2025-10-02T14:35:17.413Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072, upload_time = "2025-10-02T14:35:18.844Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409, upload_time = "2025-10-02T14:35:20.31Z" }, + { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736, upload_time = "2025-10-02T14:35:21.616Z" }, + { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload_time = "2025-10-02T14:35:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload_time = "2025-10-02T14:35:25.111Z" }, + { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload_time = "2025-10-02T14:35:26.586Z" }, + { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907, upload_time = "2025-10-02T14:35:28.087Z" }, + { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839, upload_time = "2025-10-02T14:35:29.857Z" }, + { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload_time = "2025-10-02T14:35:31.222Z" }, + { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload_time = "2025-10-02T14:35:32.517Z" }, + { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload_time = "2025-10-02T14:35:33.827Z" }, + { url = "https://files.pythonhosted.org/packages/19/fa/0172e350361d61febcea941b0cc541d6e6c8d65d153e85f850a7b256ff8a/xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec", size = 30916, upload_time = "2025-10-02T14:35:35.107Z" }, + { url = "https://files.pythonhosted.org/packages/ad/e6/e8cf858a2b19d6d45820f072eff1bea413910592ff17157cabc5f1227a16/xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8", size = 31799, upload_time = "2025-10-02T14:35:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/56/15/064b197e855bfb7b343210e82490ae672f8bc7cdf3ddb02e92f64304ee8a/xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746", size = 28044, upload_time = "2025-10-02T14:35:37.195Z" }, + { url = "https://files.pythonhosted.org/packages/7e/5e/0138bc4484ea9b897864d59fce9be9086030825bc778b76cb5a33a906d37/xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e", size = 32754, upload_time = "2025-10-02T14:35:38.245Z" }, + { url = "https://files.pythonhosted.org/packages/18/d7/5dac2eb2ec75fd771957a13e5dda560efb2176d5203f39502a5fc571f899/xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405", size = 30846, upload_time = "2025-10-02T14:35:39.6Z" }, + { url = "https://files.pythonhosted.org/packages/fe/71/8bc5be2bb00deb5682e92e8da955ebe5fa982da13a69da5a40a4c8db12fb/xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3", size = 194343, upload_time = "2025-10-02T14:35:40.69Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3b/52badfb2aecec2c377ddf1ae75f55db3ba2d321c5e164f14461c90837ef3/xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6", size = 213074, upload_time = "2025-10-02T14:35:42.29Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2b/ae46b4e9b92e537fa30d03dbc19cdae57ed407e9c26d163895e968e3de85/xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063", size = 212388, upload_time = "2025-10-02T14:35:43.929Z" }, + { url = "https://files.pythonhosted.org/packages/f5/80/49f88d3afc724b4ac7fbd664c8452d6db51b49915be48c6982659e0e7942/xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7", size = 445614, upload_time = "2025-10-02T14:35:45.216Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ba/603ce3961e339413543d8cd44f21f2c80e2a7c5cfe692a7b1f2cccf58f3c/xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b", size = 194024, upload_time = "2025-10-02T14:35:46.959Z" }, + { url = "https://files.pythonhosted.org/packages/78/d1/8e225ff7113bf81545cfdcd79eef124a7b7064a0bba53605ff39590b95c2/xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd", size = 210541, upload_time = "2025-10-02T14:35:48.301Z" }, + { url = "https://files.pythonhosted.org/packages/6f/58/0f89d149f0bad89def1a8dd38feb50ccdeb643d9797ec84707091d4cb494/xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0", size = 198305, upload_time = "2025-10-02T14:35:49.584Z" }, + { url = "https://files.pythonhosted.org/packages/11/38/5eab81580703c4df93feb5f32ff8fa7fe1e2c51c1f183ee4e48d4bb9d3d7/xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152", size = 210848, upload_time = "2025-10-02T14:35:50.877Z" }, + { url = "https://files.pythonhosted.org/packages/5e/6b/953dc4b05c3ce678abca756416e4c130d2382f877a9c30a20d08ee6a77c0/xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11", size = 414142, upload_time = "2025-10-02T14:35:52.15Z" }, + { url = "https://files.pythonhosted.org/packages/08/a9/238ec0d4e81a10eb5026d4a6972677cbc898ba6c8b9dbaec12ae001b1b35/xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5", size = 191547, upload_time = "2025-10-02T14:35:53.547Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ee/3cf8589e06c2164ac77c3bf0aa127012801128f1feebf2a079272da5737c/xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f", size = 31214, upload_time = "2025-10-02T14:35:54.746Z" }, + { url = "https://files.pythonhosted.org/packages/02/5d/a19552fbc6ad4cb54ff953c3908bbc095f4a921bc569433d791f755186f1/xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad", size = 32290, upload_time = "2025-10-02T14:35:55.791Z" }, + { url = "https://files.pythonhosted.org/packages/b1/11/dafa0643bc30442c887b55baf8e73353a344ee89c1901b5a5c54a6c17d39/xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679", size = 28795, upload_time = "2025-10-02T14:35:57.162Z" }, + { url = "https://files.pythonhosted.org/packages/2c/db/0e99732ed7f64182aef4a6fb145e1a295558deec2a746265dcdec12d191e/xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4", size = 32955, upload_time = "2025-10-02T14:35:58.267Z" }, + { url = "https://files.pythonhosted.org/packages/55/f4/2a7c3c68e564a099becfa44bb3d398810cc0ff6749b0d3cb8ccb93f23c14/xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67", size = 31072, upload_time = "2025-10-02T14:35:59.382Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d9/72a29cddc7250e8a5819dad5d466facb5dc4c802ce120645630149127e73/xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad", size = 196579, upload_time = "2025-10-02T14:36:00.838Z" }, + { url = "https://files.pythonhosted.org/packages/63/93/b21590e1e381040e2ca305a884d89e1c345b347404f7780f07f2cdd47ef4/xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b", size = 215854, upload_time = "2025-10-02T14:36:02.207Z" }, + { url = "https://files.pythonhosted.org/packages/ce/b8/edab8a7d4fa14e924b29be877d54155dcbd8b80be85ea00d2be3413a9ed4/xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b", size = 214965, upload_time = "2025-10-02T14:36:03.507Z" }, + { url = "https://files.pythonhosted.org/packages/27/67/dfa980ac7f0d509d54ea0d5a486d2bb4b80c3f1bb22b66e6a05d3efaf6c0/xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca", size = 448484, upload_time = "2025-10-02T14:36:04.828Z" }, + { url = "https://files.pythonhosted.org/packages/8c/63/8ffc2cc97e811c0ca5d00ab36604b3ea6f4254f20b7bc658ca825ce6c954/xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a", size = 196162, upload_time = "2025-10-02T14:36:06.182Z" }, + { url = "https://files.pythonhosted.org/packages/4b/77/07f0e7a3edd11a6097e990f6e5b815b6592459cb16dae990d967693e6ea9/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99", size = 213007, upload_time = "2025-10-02T14:36:07.733Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d8/bc5fa0d152837117eb0bef6f83f956c509332ce133c91c63ce07ee7c4873/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3", size = 200956, upload_time = "2025-10-02T14:36:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/26/a5/d749334130de9411783873e9b98ecc46688dad5db64ca6e04b02acc8b473/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6", size = 213401, upload_time = "2025-10-02T14:36:10.585Z" }, + { url = "https://files.pythonhosted.org/packages/89/72/abed959c956a4bfc72b58c0384bb7940663c678127538634d896b1195c10/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93", size = 417083, upload_time = "2025-10-02T14:36:12.276Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b3/62fd2b586283b7d7d665fb98e266decadf31f058f1cf6c478741f68af0cb/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518", size = 193913, upload_time = "2025-10-02T14:36:14.025Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9a/c19c42c5b3f5a4aad748a6d5b4f23df3bed7ee5445accc65a0fb3ff03953/xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119", size = 31586, upload_time = "2025-10-02T14:36:15.603Z" }, + { url = "https://files.pythonhosted.org/packages/03/d6/4cc450345be9924fd5dc8c590ceda1db5b43a0a889587b0ae81a95511360/xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f", size = 32526, upload_time = "2025-10-02T14:36:16.708Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c9/7243eb3f9eaabd1a88a5a5acadf06df2d83b100c62684b7425c6a11bcaa8/xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95", size = 28898, upload_time = "2025-10-02T14:36:17.843Z" }, ] [[package]] @@ -3587,98 +3700,98 @@ dependencies = [ { name = "multidict" }, { name = "propcache" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000 }, - { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338 }, - { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909 }, - { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940 }, - { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825 }, - { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705 }, - { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518 }, - { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267 }, - { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797 }, - { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535 }, - { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324 }, - { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803 }, - { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220 }, - { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589 }, - { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213 }, - { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330 }, - { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980 }, - { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424 }, - { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821 }, - { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243 }, - { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361 }, - { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036 }, - { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671 }, - { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059 }, - { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356 }, - { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331 }, - { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590 }, - { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316 }, - { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431 }, - { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555 }, - { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965 }, - { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205 }, - { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209 }, - { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966 }, - { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312 }, - { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967 }, - { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949 }, - { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818 }, - { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626 }, - { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129 }, - { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776 }, - { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879 }, - { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996 }, - { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047 }, - { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947 }, - { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943 }, - { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715 }, - { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857 }, - { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520 }, - { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504 }, - { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282 }, - { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080 }, - { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696 }, - { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121 }, - { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080 }, - { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661 }, - { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645 }, - { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361 }, - { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451 }, - { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814 }, - { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799 }, - { url = "https://files.pythonhosted.org/packages/50/3c/af9dba3b8b5eeb302f36f16f92791f3ea62e3f47763406abf6d5a4a3333b/yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2", size = 82990 }, - { url = "https://files.pythonhosted.org/packages/ac/30/ac3a0c5bdc1d6efd1b41fa24d4897a4329b3b1e98de9449679dd327af4f0/yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79", size = 88292 }, - { url = "https://files.pythonhosted.org/packages/df/0a/227ab4ff5b998a1b7410abc7b46c9b7a26b0ca9e86c34ba4b8d8bc7c63d5/yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33", size = 82888 }, - { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223 }, - { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981 }, - { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303 }, - { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820 }, - { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203 }, - { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173 }, - { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562 }, - { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828 }, - { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551 }, - { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512 }, - { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400 }, - { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140 }, - { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473 }, - { url = "https://files.pythonhosted.org/packages/35/18/55e6011f7c044dc80b98893060773cefcfdbf60dfefb8cb2f58b9bacbd83/yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e", size = 89056 }, - { url = "https://files.pythonhosted.org/packages/f9/86/0f0dccb6e59a9e7f122c5afd43568b1d31b8ab7dda5f1b01fb5c7025c9a9/yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27", size = 96292 }, - { url = "https://files.pythonhosted.org/packages/48/b7/503c98092fb3b344a179579f55814b613c1fbb1c23b3ec14a7b008a66a6e/yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1", size = 85171 }, - { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814 }, +sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload_time = "2025-10-06T14:12:55.963Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload_time = "2025-10-06T14:09:44.631Z" }, + { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload_time = "2025-10-06T14:09:46.372Z" }, + { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload_time = "2025-10-06T14:09:48.648Z" }, + { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload_time = "2025-10-06T14:09:50.089Z" }, + { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload_time = "2025-10-06T14:09:52.142Z" }, + { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload_time = "2025-10-06T14:09:54.128Z" }, + { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload_time = "2025-10-06T14:09:55.762Z" }, + { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload_time = "2025-10-06T14:09:57.958Z" }, + { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload_time = "2025-10-06T14:09:59.527Z" }, + { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload_time = "2025-10-06T14:10:01.139Z" }, + { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload_time = "2025-10-06T14:10:02.756Z" }, + { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload_time = "2025-10-06T14:10:04.552Z" }, + { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload_time = "2025-10-06T14:10:06.489Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload_time = "2025-10-06T14:10:09.254Z" }, + { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload_time = "2025-10-06T14:10:11.369Z" }, + { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload_time = "2025-10-06T14:10:13.112Z" }, + { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload_time = "2025-10-06T14:10:14.601Z" }, + { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload_time = "2025-10-06T14:10:16.115Z" }, + { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload_time = "2025-10-06T14:10:17.993Z" }, + { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload_time = "2025-10-06T14:10:19.44Z" }, + { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload_time = "2025-10-06T14:10:21.124Z" }, + { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload_time = "2025-10-06T14:10:22.902Z" }, + { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload_time = "2025-10-06T14:10:24.523Z" }, + { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload_time = "2025-10-06T14:10:26.406Z" }, + { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload_time = "2025-10-06T14:10:28.461Z" }, + { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload_time = "2025-10-06T14:10:30.541Z" }, + { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload_time = "2025-10-06T14:10:33.352Z" }, + { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload_time = "2025-10-06T14:10:35.034Z" }, + { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload_time = "2025-10-06T14:10:37.76Z" }, + { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555, upload_time = "2025-10-06T14:10:39.649Z" }, + { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965, upload_time = "2025-10-06T14:10:41.313Z" }, + { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205, upload_time = "2025-10-06T14:10:43.167Z" }, + { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload_time = "2025-10-06T14:10:44.643Z" }, + { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload_time = "2025-10-06T14:10:46.554Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload_time = "2025-10-06T14:10:48.007Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload_time = "2025-10-06T14:10:49.997Z" }, + { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload_time = "2025-10-06T14:10:52.004Z" }, + { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload_time = "2025-10-06T14:10:54.078Z" }, + { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload_time = "2025-10-06T14:10:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload_time = "2025-10-06T14:10:57.985Z" }, + { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload_time = "2025-10-06T14:10:59.633Z" }, + { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload_time = "2025-10-06T14:11:01.454Z" }, + { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload_time = "2025-10-06T14:11:03.452Z" }, + { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload_time = "2025-10-06T14:11:05.115Z" }, + { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload_time = "2025-10-06T14:11:08.137Z" }, + { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943, upload_time = "2025-10-06T14:11:10.284Z" }, + { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715, upload_time = "2025-10-06T14:11:11.739Z" }, + { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857, upload_time = "2025-10-06T14:11:13.586Z" }, + { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520, upload_time = "2025-10-06T14:11:15.465Z" }, + { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504, upload_time = "2025-10-06T14:11:17.106Z" }, + { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282, upload_time = "2025-10-06T14:11:19.064Z" }, + { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080, upload_time = "2025-10-06T14:11:20.996Z" }, + { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696, upload_time = "2025-10-06T14:11:22.847Z" }, + { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121, upload_time = "2025-10-06T14:11:24.889Z" }, + { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080, upload_time = "2025-10-06T14:11:27.307Z" }, + { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661, upload_time = "2025-10-06T14:11:29.387Z" }, + { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645, upload_time = "2025-10-06T14:11:31.423Z" }, + { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361, upload_time = "2025-10-06T14:11:33.055Z" }, + { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451, upload_time = "2025-10-06T14:11:35.136Z" }, + { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814, upload_time = "2025-10-06T14:11:37.094Z" }, + { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799, upload_time = "2025-10-06T14:11:38.83Z" }, + { url = "https://files.pythonhosted.org/packages/50/3c/af9dba3b8b5eeb302f36f16f92791f3ea62e3f47763406abf6d5a4a3333b/yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2", size = 82990, upload_time = "2025-10-06T14:11:40.624Z" }, + { url = "https://files.pythonhosted.org/packages/ac/30/ac3a0c5bdc1d6efd1b41fa24d4897a4329b3b1e98de9449679dd327af4f0/yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79", size = 88292, upload_time = "2025-10-06T14:11:42.578Z" }, + { url = "https://files.pythonhosted.org/packages/df/0a/227ab4ff5b998a1b7410abc7b46c9b7a26b0ca9e86c34ba4b8d8bc7c63d5/yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33", size = 82888, upload_time = "2025-10-06T14:11:44.863Z" }, + { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223, upload_time = "2025-10-06T14:11:46.796Z" }, + { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981, upload_time = "2025-10-06T14:11:48.845Z" }, + { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303, upload_time = "2025-10-06T14:11:50.897Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820, upload_time = "2025-10-06T14:11:52.549Z" }, + { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203, upload_time = "2025-10-06T14:11:54.225Z" }, + { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173, upload_time = "2025-10-06T14:11:56.069Z" }, + { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562, upload_time = "2025-10-06T14:11:58.783Z" }, + { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828, upload_time = "2025-10-06T14:12:00.686Z" }, + { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551, upload_time = "2025-10-06T14:12:02.628Z" }, + { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512, upload_time = "2025-10-06T14:12:04.871Z" }, + { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400, upload_time = "2025-10-06T14:12:06.624Z" }, + { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140, upload_time = "2025-10-06T14:12:08.362Z" }, + { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload_time = "2025-10-06T14:12:10.994Z" }, + { url = "https://files.pythonhosted.org/packages/35/18/55e6011f7c044dc80b98893060773cefcfdbf60dfefb8cb2f58b9bacbd83/yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e", size = 89056, upload_time = "2025-10-06T14:12:13.317Z" }, + { url = "https://files.pythonhosted.org/packages/f9/86/0f0dccb6e59a9e7f122c5afd43568b1d31b8ab7dda5f1b01fb5c7025c9a9/yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27", size = 96292, upload_time = "2025-10-06T14:12:15.398Z" }, + { url = "https://files.pythonhosted.org/packages/48/b7/503c98092fb3b344a179579f55814b613c1fbb1c23b3ec14a7b008a66a6e/yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1", size = 85171, upload_time = "2025-10-06T14:12:16.935Z" }, + { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload_time = "2025-10-06T14:12:53.872Z" }, ] [[package]] name = "zipp" version = "3.23.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547 } +sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload_time = "2025-06-08T17:06:39.4Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276 }, + { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload_time = "2025-06-08T17:06:38.034Z" }, ] [[package]] @@ -3688,4 +3801,4 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation == 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/b7/0fe8fb6390309f29a3a76c439dd08a73c05473bbaafa7117596ded319f84/zstandard-0.18.0.tar.gz", hash = "sha256:0ac0357a0d985b4ff31a854744040d7b5754385d1f98f7145c30e02c6865cb6f", size = 631544 } +sdist = { url = "https://files.pythonhosted.org/packages/a7/b7/0fe8fb6390309f29a3a76c439dd08a73c05473bbaafa7117596ded319f84/zstandard-0.18.0.tar.gz", hash = "sha256:0ac0357a0d985b4ff31a854744040d7b5754385d1f98f7145c30e02c6865cb6f", size = 631544, upload_time = "2022-06-21T17:23:58.489Z" } From e0ca88a7041cd8ebb089be27bda927b69ba615cd Mon Sep 17 00:00:00 2001 From: SreeHarshaNelaturu Date: Wed, 11 Feb 2026 16:08:03 +0100 Subject: [PATCH 3/4] update global-mmlu-lite, make confidence_level optional (for now) and other changes --- README.md | 8 +- ...7352e4d2-016d-404e-b443-62674ed3d75f.json} | 132 +++++++--- ...7e577a20-9de8-49d7-b598-11755237d256.json} | 132 +++++++--- ...7be4f076-a537-4d34-b884-8860a2d23e57.json} | 132 +++++++--- ...d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json} | 132 +++++++--- ...1c486108-80af-4752-96d8-02832e4420cb.json} | 132 +++++++--- ...c6d487a0-8088-405c-b6c8-20136afbe339.json} | 132 +++++++--- ...0ef90f90-b956-48ba-bad8-234d74c27c9c.json} | 132 +++++++--- ...10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json} | 132 +++++++--- ...d491f587-b539-4918-9377-2a8204dbd8a4.json} | 132 +++++++--- ...7ec048f5-7539-42fa-8c16-44d006928475.json} | 132 +++++++--- ...c23a5254-8219-460b-b334-0746aded67db.json} | 132 +++++++--- ...9f223ea5-a5a1-4acb-81c6-6fe71239225a.json} | 132 +++++++--- ...f2b74519-42b2-4b41-b963-df24fd0e0690.json} | 132 +++++++--- ...b455eee3-2917-4479-89b1-e1a470e62fbe.json} | 132 +++++++--- ...25786cd1-4ffd-4265-9b61-426ab47e5b1d.json} | 132 +++++++--- ...257c2515-2933-4f2e-b6e6-70d4a818f086.json} | 132 +++++++--- ...e7a344c6-eeff-4b11-93aa-a77ce2063a81.json} | 132 +++++++--- ...55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json} | 132 +++++++--- ...a70391be-f081-4ee6-8128-801b27072b53.json} | 132 +++++++--- ...14712820-c343-4d94-8eff-633a12b830af.json} | 132 +++++++--- ...7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json} | 132 +++++++--- ...026339f8-2a68-47c2-afaf-69a3c0c5a8db.json} | 132 +++++++--- ...2761218a-27ef-4dc3-af3a-41ba830f8b40.json} | 132 +++++++--- ...8a2fda68-99d6-47a5-95ee-d38e695dda55.json} | 132 +++++++--- eval.schema.json | 2 +- eval_types.py | 226 +++++++++--------- instance_level_types.py | 116 +++++---- pyproject.toml | 2 +- scripts/global-mmlu-lite/adapter.py | 10 +- uv.lock | 13 +- 31 files changed, 2542 insertions(+), 1003 deletions(-) rename data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/{2d0a09db-e97e-4ef7-9987-ef7c933ad721.json => 7352e4d2-016d-404e-b443-62674ed3d75f.json} (80%) rename data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/{7af30210-b021-49d5-932c-75a9a42a2d08.json => 7e577a20-9de8-49d7-b598-11755237d256.json} (80%) rename data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/{4291c294-8155-4664-aec4-272445cc8862.json => 7be4f076-a537-4d34-b884-8860a2d23e57.json} (80%) rename data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/{911db593-5c95-41e9-9264-b130be6a9fb1.json => d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json} (80%) rename data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/{51465d80-23e2-4328-8845-70b373408d65.json => 1c486108-80af-4752-96d8-02832e4420cb.json} (80%) rename data/global-mmlu-lite/cohere/command-a-03-2025/{12a16399-1aff-4173-9677-58d0d9e23ea2.json => c6d487a0-8088-405c-b6c8-20136afbe339.json} (80%) rename data/global-mmlu-lite/deepseek/deepseek-r1-0528/{aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json => 0ef90f90-b956-48ba-bad8-234d74c27c9c.json} (80%) rename data/global-mmlu-lite/deepseek/deepseek-v3.1/{803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json => 10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json} (80%) rename data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/{3796f2e5-ee3f-4598-911f-92e8efac92c3.json => d491f587-b539-4918-9377-2a8204dbd8a4.json} (80%) rename data/global-mmlu-lite/google/gemini-2.5-flash/{b225eef0-9698-4340-bc6d-cece877c8863.json => 7ec048f5-7539-42fa-8c16-44d006928475.json} (80%) rename data/global-mmlu-lite/google/gemini-2.5-pro/{531fe0ba-1f29-4409-abdb-daad56918fcc.json => c23a5254-8219-460b-b334-0746aded67db.json} (79%) rename data/global-mmlu-lite/google/gemini-3-pro-preview/{9193adbe-0c95-4b5e-a179-4c14e749a75c.json => 9f223ea5-a5a1-4acb-81c6-6fe71239225a.json} (80%) rename data/global-mmlu-lite/google/gemma-3-27b-it/{7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json => f2b74519-42b2-4b41-b963-df24fd0e0690.json} (79%) rename data/global-mmlu-lite/google/gemma-3-4b-it/{548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json => b455eee3-2917-4479-89b1-e1a470e62fbe.json} (79%) rename data/global-mmlu-lite/mistralai/mistral-medium-3/{1e2e51d0-42e8-4564-a42c-31819f89f459.json => 25786cd1-4ffd-4265-9b61-426ab47e5b1d.json} (80%) rename data/global-mmlu-lite/mistralai/mistral-small-2503/{85822e81-7478-4f63-b7f3-89a78e75c6d9.json => 257c2515-2933-4f2e-b6e6-70d4a818f086.json} (80%) rename data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/{c4b48f92-4f10-4831-86a9-3ede0512bf7b.json => e7a344c6-eeff-4b11-93aa-a77ce2063a81.json} (80%) rename data/global-mmlu-lite/openai/gpt-5-2025-08-07/{07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json => 55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json} (80%) rename data/global-mmlu-lite/openai/o3-mini-2025-01-31/{bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json => a70391be-f081-4ee6-8128-801b27072b53.json} (80%) rename data/global-mmlu-lite/unknown/aya-expanse-32b/{1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json => 14712820-c343-4d94-8eff-633a12b830af.json} (80%) rename data/global-mmlu-lite/unknown/granite-4.0-h-small/{96569c98-0d02-4b32-b915-87b707102913.json => 7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json} (80%) rename data/global-mmlu-lite/unknown/o4-mini-2025-04-16/{f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json => 026339f8-2a68-47c2-afaf-69a3c0c5a8db.json} (80%) rename data/global-mmlu-lite/xai/grok-3-mini/{143d53e6-b34e-4fa8-af3f-8019cef29abb.json => 2761218a-27ef-4dc3-af3a-41ba830f8b40.json} (79%) rename data/global-mmlu-lite/xai/grok-4-0709/{bcfa473c-1686-42af-8d07-4c8b92c3d864.json => 8a2fda68-99d6-47a5-95ee-d38e695dda55.json} (79%) diff --git a/README.md b/README.md index ff8532273..7760495ca 100644 --- a/README.md +++ b/README.md @@ -188,10 +188,8 @@ Each evaluation (e.g., `livecodebenchpro`, `hfopenllm_v2`) has its own directory Run following bash commands to generate pydantic classes for `eval.schema.json` and `instance_level_eval.schema.json` (to easier use in data converter scripts): +uv run datamodel-codegen --input eval.schema.json --output eval_types.py --class-name EvaluationLog --output-model-type pydantic_v2.BaseModel --input-file-type jsonschema --formatters ruff-format ruff-check +uv run datamodel-codegen --input instance_level_eval.schema.json --output instance_level_types.py --class-name InstanceLevelEvaluationLog --output-model-type pydantic_v2.BaseModel --input-file-type jsonschema --formatters ruff-format ruff-check + ```bash -uv run datamodel-codegen --input eval.schema.json --output eval_types.py --class-name EvaluationLog --output-model-type pydantic_v2.BaseModel --input-file-type jsonschema ``` -and -```bash -uv run datamodel-codegen --input instance_level_eval.schema.json --output instance_level_types.py --class-name InstanceLevelEvaluationLog --output-model-type pydantic_v2.BaseModel --input-file-type jsonschema -``` \ No newline at end of file diff --git a/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json b/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/7352e4d2-016d-404e-b443-62674ed3d75f.json similarity index 80% rename from data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json rename to data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/7352e4d2-016d-404e-b443-62674ed3d75f.json index a4dc797fb..f983a3d6f 100644 --- a/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/2d0a09db-e97e-4ef7-9987-ef7c933ad721.json +++ b/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/7352e4d2-016d-404e-b443-62674ed3d75f.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/alibaba_qwen3-235b-a22b-instruct-2507/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/alibaba_qwen3-235b-a22b-instruct-2507/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.89, - "details": { - "confidence_interval": 0.0306626327370121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0307, + "upper": 0.0307, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.8875, - "details": { - "confidence_interval": 0.0309655314070612 + "uncertainty": { + "confidence_interval": { + "lower": -0.031, + "upper": 0.031, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.885, - "details": { - "confidence_interval": 0.0312635759101603 + "uncertainty": { + "confidence_interval": { + "lower": -0.0313, + "upper": 0.0313, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.8775, - "details": { - "confidence_interval": 0.0321299242960121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0321, + "upper": 0.0321, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.875, - "details": { - "confidence_interval": 0.0324098580108514 + "uncertainty": { + "confidence_interval": { + "lower": -0.0324, + "upper": 0.0324, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.8875, - "details": { - "confidence_interval": 0.0309655314070612 + "uncertainty": { + "confidence_interval": { + "lower": -0.031, + "upper": 0.031, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.875, - "details": { - "confidence_interval": 0.0324098580108514 + "uncertainty": { + "confidence_interval": { + "lower": -0.0324, + "upper": 0.0324, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.87, - "details": { - "confidence_interval": 0.0329571309666248 + "uncertainty": { + "confidence_interval": { + "lower": -0.033, + "upper": 0.033, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.8775, - "details": { - "confidence_interval": 0.0321299242960121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0321, + "upper": 0.0321, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json b/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7e577a20-9de8-49d7-b598-11755237d256.json similarity index 80% rename from data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json rename to data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7e577a20-9de8-49d7-b598-11755237d256.json index 6c40a8d5c..f9cb47aa9 100644 --- a/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7af30210-b021-49d5-932c-75a9a42a2d08.json +++ b/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7e577a20-9de8-49d7-b598-11755237d256.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-3-5-haiku-20241022/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/anthropic_claude-3-5-haiku-20241022/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.695, - "details": { - "confidence_interval": 0.045119098880536 + "uncertainty": { + "confidence_interval": { + "lower": -0.0451, + "upper": 0.0451, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.485, - "details": { - "confidence_interval": 0.0489770450552826 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.675, - "details": { - "confidence_interval": 0.0458998918514459 + "uncertainty": { + "confidence_interval": { + "lower": -0.0459, + "upper": 0.0459, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.565, - "details": { - "confidence_interval": 0.0485832929528273 + "uncertainty": { + "confidence_interval": { + "lower": -0.0486, + "upper": 0.0486, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.61, - "details": { - "confidence_interval": 0.0477986153942541 + "uncertainty": { + "confidence_interval": { + "lower": -0.0478, + "upper": 0.0478, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.6575, - "details": { - "confidence_interval": 0.0465046373306654 + "uncertainty": { + "confidence_interval": { + "lower": -0.0465, + "upper": 0.0465, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.5475, - "details": { - "confidence_interval": 0.048777490036628 + "uncertainty": { + "confidence_interval": { + "lower": -0.0488, + "upper": 0.0488, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.48, - "details": { - "confidence_interval": 0.0489598846415423 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.655, - "details": { - "confidence_interval": 0.0465852352416072 + "uncertainty": { + "confidence_interval": { + "lower": -0.0466, + "upper": 0.0466, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.6575, - "details": { - "confidence_interval": 0.0465046373306654 + "uncertainty": { + "confidence_interval": { + "lower": -0.0465, + "upper": 0.0465, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.5225, - "details": { - "confidence_interval": 0.048949462883814 + "uncertainty": { + "confidence_interval": { + "lower": -0.0489, + "upper": 0.0489, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.485, - "details": { - "confidence_interval": 0.0489770450552826 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.69, - "details": { - "confidence_interval": 0.0453235049876571 + "uncertainty": { + "confidence_interval": { + "lower": -0.0453, + "upper": 0.0453, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.6675, - "details": { - "confidence_interval": 0.0461678398924898 + "uncertainty": { + "confidence_interval": { + "lower": -0.0462, + "upper": 0.0462, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.69, - "details": { - "confidence_interval": 0.0453235049876571 + "uncertainty": { + "confidence_interval": { + "lower": -0.0453, + "upper": 0.0453, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.7, - "details": { - "confidence_interval": 0.0449084165927102 + "uncertainty": { + "confidence_interval": { + "lower": -0.0449, + "upper": 0.0449, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json b/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/7be4f076-a537-4d34-b884-8860a2d23e57.json similarity index 80% rename from data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json rename to data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/7be4f076-a537-4d34-b884-8860a2d23e57.json index c33d9b0ec..03f1c7ab0 100644 --- a/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/4291c294-8155-4664-aec4-272445cc8862.json +++ b/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/7be4f076-a537-4d34-b884-8860a2d23e57.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-3-7-sonnet-20250219/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/anthropic_claude-3-7-sonnet-20250219/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.7625, - "details": { - "confidence_interval": 0.0417032427788918 + "uncertainty": { + "confidence_interval": { + "lower": -0.0417, + "upper": 0.0417, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.825, - "details": { - "confidence_interval": 0.0372360919417476 + "uncertainty": { + "confidence_interval": { + "lower": -0.0372, + "upper": 0.0372, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.8125, - "details": { - "confidence_interval": 0.0382499098762049 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.7675, - "details": { - "confidence_interval": 0.0413969901513152 + "uncertainty": { + "confidence_interval": { + "lower": -0.0414, + "upper": 0.0414, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.805, - "details": { - "confidence_interval": 0.0388269557903546 + "uncertainty": { + "confidence_interval": { + "lower": -0.0388, + "upper": 0.0388, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.8175, - "details": { - "confidence_interval": 0.037852399096026 + "uncertainty": { + "confidence_interval": { + "lower": -0.0379, + "upper": 0.0379, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.8225, - "details": { - "confidence_interval": 0.0374442578609762 + "uncertainty": { + "confidence_interval": { + "lower": -0.0374, + "upper": 0.0374, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.8425, - "details": { - "confidence_interval": 0.0356979542967269 + "uncertainty": { + "confidence_interval": { + "lower": -0.0357, + "upper": 0.0357, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.83, - "details": { - "confidence_interval": 0.036811337913744 + "uncertainty": { + "confidence_interval": { + "lower": -0.0368, + "upper": 0.0368, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.77, - "details": { - "confidence_interval": 0.0412408279846843 + "uncertainty": { + "confidence_interval": { + "lower": -0.0412, + "upper": 0.0412, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.8075, - "details": { - "confidence_interval": 0.0386371183112584 + "uncertainty": { + "confidence_interval": { + "lower": -0.0386, + "upper": 0.0386, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.8125, - "details": { - "confidence_interval": 0.0382499098762049 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.81, - "details": { - "confidence_interval": 0.0384447822371523 + "uncertainty": { + "confidence_interval": { + "lower": -0.0384, + "upper": 0.0384, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.835, - "details": { - "confidence_interval": 0.0363750253959063 + "uncertainty": { + "confidence_interval": { + "lower": -0.0364, + "upper": 0.0364, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8125, - "details": { - "confidence_interval": 0.0382499098762049 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json b/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json similarity index 80% rename from data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json rename to data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json index 9cd771388..ce46cb3bf 100644 --- a/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/911db593-5c95-41e9-9264-b130be6a9fb1.json +++ b/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-opus-4-1-20250805/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/anthropic_claude-opus-4-1-20250805/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.945, - "details": { - "confidence_interval": 0.0223416551650486 + "uncertainty": { + "confidence_interval": { + "lower": -0.0223, + "upper": 0.0223, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.9475, - "details": { - "confidence_interval": 0.0218568391591684 + "uncertainty": { + "confidence_interval": { + "lower": -0.0219, + "upper": 0.0219, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.0228135408783901 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.0232732828307025 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.945, - "details": { - "confidence_interval": 0.0223416551650486 + "uncertainty": { + "confidence_interval": { + "lower": -0.0223, + "upper": 0.0223, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.9475, - "details": { - "confidence_interval": 0.0218568391591684 + "uncertainty": { + "confidence_interval": { + "lower": -0.0219, + "upper": 0.0219, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.0228135408783901 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.0232732828307025 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.0232732828307025 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.95, - "details": { - "confidence_interval": 0.0213582123539735 + "uncertainty": { + "confidence_interval": { + "lower": -0.0214, + "upper": 0.0214, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.945, - "details": { - "confidence_interval": 0.0223416551650486 + "uncertainty": { + "confidence_interval": { + "lower": -0.0223, + "upper": 0.0223, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.945, - "details": { - "confidence_interval": 0.0223416551650486 + "uncertainty": { + "confidence_interval": { + "lower": -0.0223, + "upper": 0.0223, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.93, - "details": { - "confidence_interval": 0.0250039481496016 + "uncertainty": { + "confidence_interval": { + "lower": -0.025, + "upper": 0.025, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.9375, - "details": { - "confidence_interval": 0.0237215870977811 + "uncertainty": { + "confidence_interval": { + "lower": -0.0237, + "upper": 0.0237, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.945, - "details": { - "confidence_interval": 0.0223416551650486 + "uncertainty": { + "confidence_interval": { + "lower": -0.0223, + "upper": 0.0223, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.945, - "details": { - "confidence_interval": 0.0223416551650486 + "uncertainty": { + "confidence_interval": { + "lower": -0.0223, + "upper": 0.0223, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json b/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/1c486108-80af-4752-96d8-02832e4420cb.json similarity index 80% rename from data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json rename to data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/1c486108-80af-4752-96d8-02832e4420cb.json index 6fd34931c..5f464fbed 100644 --- a/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/51465d80-23e2-4328-8845-70b373408d65.json +++ b/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/1c486108-80af-4752-96d8-02832e4420cb.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-sonnet-4-20250514/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/anthropic_claude-sonnet-4-20250514/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.9, - "details": { - "confidence_interval": 0.0293994597681008 + "uncertainty": { + "confidence_interval": { + "lower": -0.0294, + "upper": 0.0294, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.9025, - "details": { - "confidence_interval": 0.0290699315059157 + "uncertainty": { + "confidence_interval": { + "lower": -0.0291, + "upper": 0.0291, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.9, - "details": { - "confidence_interval": 0.0293994597681008 + "uncertainty": { + "confidence_interval": { + "lower": -0.0294, + "upper": 0.0294, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.8975, - "details": { - "confidence_interval": 0.0297233158642432 + "uncertainty": { + "confidence_interval": { + "lower": -0.0297, + "upper": 0.0297, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.8975, - "details": { - "confidence_interval": 0.0297233158642432 + "uncertainty": { + "confidence_interval": { + "lower": -0.0297, + "upper": 0.0297, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.9175, - "details": { - "confidence_interval": 0.0269617517795541 + "uncertainty": { + "confidence_interval": { + "lower": -0.027, + "upper": 0.027, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8925, - "details": { - "confidence_interval": 0.0303547345865505 + "uncertainty": { + "confidence_interval": { + "lower": -0.0304, + "upper": 0.0304, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json b/data/global-mmlu-lite/cohere/command-a-03-2025/c6d487a0-8088-405c-b6c8-20136afbe339.json similarity index 80% rename from data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json rename to data/global-mmlu-lite/cohere/command-a-03-2025/c6d487a0-8088-405c-b6c8-20136afbe339.json index da92ce8b3..d40297fe9 100644 --- a/data/global-mmlu-lite/cohere/command-a-03-2025/12a16399-1aff-4173-9677-58d0d9e23ea2.json +++ b/data/global-mmlu-lite/cohere/command-a-03-2025/c6d487a0-8088-405c-b6c8-20136afbe339.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/cohere_command-a-03-2025/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/cohere_command-a-03-2025/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.8425, - "details": { - "confidence_interval": 0.0356979542967269 + "uncertainty": { + "confidence_interval": { + "lower": -0.0357, + "upper": 0.0357, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.855, - "details": { - "confidence_interval": 0.034505248053577 + "uncertainty": { + "confidence_interval": { + "lower": -0.0345, + "upper": 0.0345, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.8225, - "details": { - "confidence_interval": 0.0374442578609762 + "uncertainty": { + "confidence_interval": { + "lower": -0.0374, + "upper": 0.0374, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.8425, - "details": { - "confidence_interval": 0.0356979542967269 + "uncertainty": { + "confidence_interval": { + "lower": -0.0357, + "upper": 0.0357, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.8375, - "details": { - "confidence_interval": 0.0361524043591446 + "uncertainty": { + "confidence_interval": { + "lower": -0.0362, + "upper": 0.0362, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.8421, - "details": { - "confidence_interval": 0.0357790381242715 + "uncertainty": { + "confidence_interval": { + "lower": -0.0358, + "upper": 0.0358, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.8546, - "details": { - "confidence_interval": 0.0345843751705089 + "uncertainty": { + "confidence_interval": { + "lower": -0.0346, + "upper": 0.0346, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.8375, - "details": { - "confidence_interval": 0.0361524043591446 + "uncertainty": { + "confidence_interval": { + "lower": -0.0362, + "upper": 0.0362, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.845, - "details": { - "confidence_interval": 0.0354660072830454 + "uncertainty": { + "confidence_interval": { + "lower": -0.0355, + "upper": 0.0355, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.85, - "details": { - "confidence_interval": 0.0349923562952861 + "uncertainty": { + "confidence_interval": { + "lower": -0.035, + "upper": 0.035, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.84, - "details": { - "confidence_interval": 0.0359267332741682 + "uncertainty": { + "confidence_interval": { + "lower": -0.0359, + "upper": 0.0359, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.8525, - "details": { - "confidence_interval": 0.0347505193336969 + "uncertainty": { + "confidence_interval": { + "lower": -0.0348, + "upper": 0.0348, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.8275, - "details": { - "confidence_interval": 0.0370251346228631 + "uncertainty": { + "confidence_interval": { + "lower": -0.037, + "upper": 0.037, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.815, - "details": { - "confidence_interval": 0.0380524622623213 + "uncertainty": { + "confidence_interval": { + "lower": -0.0381, + "upper": 0.0381, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.835, - "details": { - "confidence_interval": 0.0363750253959063 + "uncertainty": { + "confidence_interval": { + "lower": -0.0364, + "upper": 0.0364, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8175, - "details": { - "confidence_interval": 0.037852399096026 + "uncertainty": { + "confidence_interval": { + "lower": -0.0379, + "upper": 0.0379, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json b/data/global-mmlu-lite/deepseek/deepseek-r1-0528/0ef90f90-b956-48ba-bad8-234d74c27c9c.json similarity index 80% rename from data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json rename to data/global-mmlu-lite/deepseek/deepseek-r1-0528/0ef90f90-b956-48ba-bad8-234d74c27c9c.json index 9c1f481e7..f1a17dbc6 100644 --- a/data/global-mmlu-lite/deepseek/deepseek-r1-0528/aeaab8dd-70cd-484c-a550-18ce9f1dbad7.json +++ b/data/global-mmlu-lite/deepseek/deepseek-r1-0528/0ef90f90-b956-48ba-bad8-234d74c27c9c.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/deepseek_deepseek-r1-0528/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/deepseek_deepseek-r1-0528/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.6825, - "details": { - "confidence_interval": 0.0456185301529649 + "uncertainty": { + "confidence_interval": { + "lower": -0.0456, + "upper": 0.0456, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.715, - "details": { - "confidence_interval": 0.0442378025897236 + "uncertainty": { + "confidence_interval": { + "lower": -0.0442, + "upper": 0.0442, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.655, - "details": { - "confidence_interval": 0.0465852352416072 + "uncertainty": { + "confidence_interval": { + "lower": -0.0466, + "upper": 0.0466, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.6375, - "details": { - "confidence_interval": 0.0471099014100216 + "uncertainty": { + "confidence_interval": { + "lower": -0.0471, + "upper": 0.0471, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.6925, - "details": { - "confidence_interval": 0.0452220810763167 + "uncertainty": { + "confidence_interval": { + "lower": -0.0452, + "upper": 0.0452, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.6475, - "details": { - "confidence_interval": 0.046818505067596 + "uncertainty": { + "confidence_interval": { + "lower": -0.0468, + "upper": 0.0468, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.655, - "details": { - "confidence_interval": 0.0465852352416072 + "uncertainty": { + "confidence_interval": { + "lower": -0.0466, + "upper": 0.0466, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.6775, - "details": { - "confidence_interval": 0.0458076069884696 + "uncertainty": { + "confidence_interval": { + "lower": -0.0458, + "upper": 0.0458, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.7725, - "details": { - "confidence_interval": 0.0410826112430601 + "uncertainty": { + "confidence_interval": { + "lower": -0.0411, + "upper": 0.0411, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.6575, - "details": { - "confidence_interval": 0.0465046373306654 + "uncertainty": { + "confidence_interval": { + "lower": -0.0465, + "upper": 0.0465, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.635, - "details": { - "confidence_interval": 0.0471792888396587 + "uncertainty": { + "confidence_interval": { + "lower": -0.0472, + "upper": 0.0472, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.7175, - "details": { - "confidence_interval": 0.0441202814428089 + "uncertainty": { + "confidence_interval": { + "lower": -0.0441, + "upper": 0.0441, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.6775, - "details": { - "confidence_interval": 0.0458076069884696 + "uncertainty": { + "confidence_interval": { + "lower": -0.0458, + "upper": 0.0458, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.77, - "details": { - "confidence_interval": 0.0412408279846843 + "uncertainty": { + "confidence_interval": { + "lower": -0.0412, + "upper": 0.0412, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.5075, - "details": { - "confidence_interval": 0.0489935869046875 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.69, - "details": { - "confidence_interval": 0.0453235049876571 + "uncertainty": { + "confidence_interval": { + "lower": -0.0453, + "upper": 0.0453, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json b/data/global-mmlu-lite/deepseek/deepseek-v3.1/10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json similarity index 80% rename from data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json rename to data/global-mmlu-lite/deepseek/deepseek-v3.1/10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json index 48717a932..af44b4305 100644 --- a/data/global-mmlu-lite/deepseek/deepseek-v3.1/803ce795-80f9-49a1-92a5-8d81c8d0ff4b.json +++ b/data/global-mmlu-lite/deepseek/deepseek-v3.1/10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/deepseek_deepseek-v3.1/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/deepseek_deepseek-v3.1/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -94,8 +94,12 @@ }, "score_details": { "score": 0.805, - "details": { - "confidence_interval": 0.0388269557903546 + "uncertainty": { + "confidence_interval": { + "lower": -0.0388, + "upper": 0.0388, + "method": "unknown" + } } } }, @@ -117,8 +121,12 @@ }, "score_details": { "score": 0.825, - "details": { - "confidence_interval": 0.0372360919417476 + "uncertainty": { + "confidence_interval": { + "lower": -0.0372, + "upper": 0.0372, + "method": "unknown" + } } } }, @@ -140,8 +148,12 @@ }, "score_details": { "score": 0.8157, - "details": { - "confidence_interval": 0.0381916132135631 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } }, @@ -163,8 +175,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } }, @@ -186,8 +202,12 @@ }, "score_details": { "score": 0.8175, - "details": { - "confidence_interval": 0.037852399096026 + "uncertainty": { + "confidence_interval": { + "lower": -0.0379, + "upper": 0.0379, + "method": "unknown" + } } } }, @@ -209,8 +229,12 @@ }, "score_details": { "score": 0.7569, - "details": { - "confidence_interval": 0.0420899186250792 + "uncertainty": { + "confidence_interval": { + "lower": -0.0421, + "upper": 0.0421, + "method": "unknown" + } } } }, @@ -232,8 +256,12 @@ }, "score_details": { "score": 0.7764, - "details": { - "confidence_interval": 0.0409352762868413 + "uncertainty": { + "confidence_interval": { + "lower": -0.0409, + "upper": 0.0409, + "method": "unknown" + } } } }, @@ -255,8 +283,12 @@ }, "score_details": { "score": 0.8075, - "details": { - "confidence_interval": 0.0386371183112584 + "uncertainty": { + "confidence_interval": { + "lower": -0.0386, + "upper": 0.0386, + "method": "unknown" + } } } }, @@ -278,8 +310,12 @@ }, "score_details": { "score": 0.8312, - "details": { - "confidence_interval": 0.0374186973347394 + "uncertainty": { + "confidence_interval": { + "lower": -0.0374, + "upper": 0.0374, + "method": "unknown" + } } } }, @@ -301,8 +337,12 @@ }, "score_details": { "score": 0.8125, - "details": { - "confidence_interval": 0.0382499098762049 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } }, @@ -324,8 +364,12 @@ }, "score_details": { "score": 0.8246, - "details": { - "confidence_interval": 0.0373194914033146 + "uncertainty": { + "confidence_interval": { + "lower": -0.0373, + "upper": 0.0373, + "method": "unknown" + } } } }, @@ -347,8 +391,12 @@ }, "score_details": { "score": 0.8125, - "details": { - "confidence_interval": 0.0382499098762049 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } }, @@ -370,8 +418,12 @@ }, "score_details": { "score": 0.801, - "details": { - "confidence_interval": 0.0392725803132057 + "uncertainty": { + "confidence_interval": { + "lower": -0.0393, + "upper": 0.0393, + "method": "unknown" + } } } }, @@ -393,8 +445,12 @@ }, "score_details": { "score": 0.7831, - "details": { - "confidence_interval": 0.0415492484871426 + "uncertainty": { + "confidence_interval": { + "lower": -0.0415, + "upper": 0.0415, + "method": "unknown" + } } } }, @@ -416,8 +472,12 @@ }, "score_details": { "score": 0.8161, - "details": { - "confidence_interval": 0.0381062547094242 + "uncertainty": { + "confidence_interval": { + "lower": -0.0381, + "upper": 0.0381, + "method": "unknown" + } } } }, @@ -439,8 +499,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json b/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/d491f587-b539-4918-9377-2a8204dbd8a4.json similarity index 80% rename from data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json rename to data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/d491f587-b539-4918-9377-2a8204dbd8a4.json index 7e5476dde..c80db486b 100644 --- a/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/3796f2e5-ee3f-4598-911f-92e8efac92c3.json +++ b/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/d491f587-b539-4918-9377-2a8204dbd8a4.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash-preview-05-20/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash-preview-05-20/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.9225, - "details": { - "confidence_interval": 0.0262030674045044 + "uncertainty": { + "confidence_interval": { + "lower": -0.0262, + "upper": 0.0262, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.925, - "details": { - "confidence_interval": 0.0258118773864695 + "uncertainty": { + "confidence_interval": { + "lower": -0.0258, + "upper": 0.0258, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.89, - "details": { - "confidence_interval": 0.0306626327370121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0307, + "upper": 0.0307, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.8825, - "details": { - "confidence_interval": 0.0315569037846059 + "uncertainty": { + "confidence_interval": { + "lower": -0.0316, + "upper": 0.0316, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.93, - "details": { - "confidence_interval": 0.0250039481496016 + "uncertainty": { + "confidence_interval": { + "lower": -0.025, + "upper": 0.025, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.9025, - "details": { - "confidence_interval": 0.0290699315059157 + "uncertainty": { + "confidence_interval": { + "lower": -0.0291, + "upper": 0.0291, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json b/data/global-mmlu-lite/google/gemini-2.5-flash/7ec048f5-7539-42fa-8c16-44d006928475.json similarity index 80% rename from data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json rename to data/global-mmlu-lite/google/gemini-2.5-flash/7ec048f5-7539-42fa-8c16-44d006928475.json index 13616c5b7..06cf9f2c2 100644 --- a/data/global-mmlu-lite/google/gemini-2.5-flash/b225eef0-9698-4340-bc6d-cece877c8863.json +++ b/data/global-mmlu-lite/google/gemini-2.5-flash/7ec048f5-7539-42fa-8c16-44d006928475.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.9325, - "details": { - "confidence_interval": 0.0245863693763976 + "uncertainty": { + "confidence_interval": { + "lower": -0.0246, + "upper": 0.0246, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.9025, - "details": { - "confidence_interval": 0.0290699315059157 + "uncertainty": { + "confidence_interval": { + "lower": -0.0291, + "upper": 0.0291, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.925, - "details": { - "confidence_interval": 0.0258118773864695 + "uncertainty": { + "confidence_interval": { + "lower": -0.0258, + "upper": 0.0258, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.9225, - "details": { - "confidence_interval": 0.0262030674045044 + "uncertainty": { + "confidence_interval": { + "lower": -0.0262, + "upper": 0.0262, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.9175, - "details": { - "confidence_interval": 0.0269617517795541 + "uncertainty": { + "confidence_interval": { + "lower": -0.027, + "upper": 0.027, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json b/data/global-mmlu-lite/google/gemini-2.5-pro/c23a5254-8219-460b-b334-0746aded67db.json similarity index 79% rename from data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json rename to data/global-mmlu-lite/google/gemini-2.5-pro/c23a5254-8219-460b-b334-0746aded67db.json index e386875a5..3b4524e12 100644 --- a/data/global-mmlu-lite/google/gemini-2.5-pro/531fe0ba-1f29-4409-abdb-daad56918fcc.json +++ b/data/global-mmlu-lite/google/gemini-2.5-pro/c23a5254-8219-460b-b334-0746aded67db.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-2.5-pro/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-pro/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.9475, - "details": { - "confidence_interval": 0.0218568391591684 + "uncertainty": { + "confidence_interval": { + "lower": -0.0219, + "upper": 0.0219, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.9275, - "details": { - "confidence_interval": 0.0254123049217328 + "uncertainty": { + "confidence_interval": { + "lower": -0.0254, + "upper": 0.0254, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.9275, - "details": { - "confidence_interval": 0.0254123049217328 + "uncertainty": { + "confidence_interval": { + "lower": -0.0254, + "upper": 0.0254, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.93, - "details": { - "confidence_interval": 0.0250039481496016 + "uncertainty": { + "confidence_interval": { + "lower": -0.025, + "upper": 0.025, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.0228135408783901 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.9275, - "details": { - "confidence_interval": 0.0254123049217328 + "uncertainty": { + "confidence_interval": { + "lower": -0.0254, + "upper": 0.0254, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.925, - "details": { - "confidence_interval": 0.0258118773864695 + "uncertainty": { + "confidence_interval": { + "lower": -0.0258, + "upper": 0.0258, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.935, - "details": { - "confidence_interval": 0.0241590904127041 + "uncertainty": { + "confidence_interval": { + "lower": -0.0242, + "upper": 0.0242, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.9375, - "details": { - "confidence_interval": 0.0237215870977811 + "uncertainty": { + "confidence_interval": { + "lower": -0.0237, + "upper": 0.0237, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.9275, - "details": { - "confidence_interval": 0.0254123049217328 + "uncertainty": { + "confidence_interval": { + "lower": -0.0254, + "upper": 0.0254, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.93, - "details": { - "confidence_interval": 0.0250039481496016 + "uncertainty": { + "confidence_interval": { + "lower": -0.025, + "upper": 0.025, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.0232732828307025 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.9375, - "details": { - "confidence_interval": 0.0237215870977811 + "uncertainty": { + "confidence_interval": { + "lower": -0.0237, + "upper": 0.0237, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.925, - "details": { - "confidence_interval": 0.0258118773864695 + "uncertainty": { + "confidence_interval": { + "lower": -0.0258, + "upper": 0.0258, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.9275, - "details": { - "confidence_interval": 0.0254123049217328 + "uncertainty": { + "confidence_interval": { + "lower": -0.0254, + "upper": 0.0254, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.93, - "details": { - "confidence_interval": 0.0250039481496016 + "uncertainty": { + "confidence_interval": { + "lower": -0.025, + "upper": 0.025, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json b/data/global-mmlu-lite/google/gemini-3-pro-preview/9f223ea5-a5a1-4acb-81c6-6fe71239225a.json similarity index 80% rename from data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json rename to data/global-mmlu-lite/google/gemini-3-pro-preview/9f223ea5-a5a1-4acb-81c6-6fe71239225a.json index 358a13ce5..97360f3df 100644 --- a/data/global-mmlu-lite/google/gemini-3-pro-preview/9193adbe-0c95-4b5e-a179-4c14e749a75c.json +++ b/data/global-mmlu-lite/google/gemini-3-pro-preview/9f223ea5-a5a1-4acb-81c6-6fe71239225a.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-3-pro-preview/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/google_gemini-3-pro-preview/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.9475, - "details": { - "confidence_interval": 0.02185683916 + "uncertainty": { + "confidence_interval": { + "lower": -0.0219, + "upper": 0.0219, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.02281354088 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.02281354088 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.02327328283 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.9575, - "details": { - "confidence_interval": 0.01976887483 + "uncertainty": { + "confidence_interval": { + "lower": -0.0198, + "upper": 0.0198, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.02281354088 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.955, - "details": { - "confidence_interval": 0.02031543089 + "uncertainty": { + "confidence_interval": { + "lower": -0.0203, + "upper": 0.0203, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.955, - "details": { - "confidence_interval": 0.02031543089 + "uncertainty": { + "confidence_interval": { + "lower": -0.0203, + "upper": 0.0203, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.02327328283 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.02327328283 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.02281354088 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.9475, - "details": { - "confidence_interval": 0.02185683916 + "uncertainty": { + "confidence_interval": { + "lower": -0.0219, + "upper": 0.0219, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.94, - "details": { - "confidence_interval": 0.02327328283 + "uncertainty": { + "confidence_interval": { + "lower": -0.0233, + "upper": 0.0233, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.02281354088 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.9475, - "details": { - "confidence_interval": 0.02185683916 + "uncertainty": { + "confidence_interval": { + "lower": -0.0219, + "upper": 0.0219, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.9425, - "details": { - "confidence_interval": 0.02281354088 + "uncertainty": { + "confidence_interval": { + "lower": -0.0228, + "upper": 0.0228, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json b/data/global-mmlu-lite/google/gemma-3-27b-it/f2b74519-42b2-4b41-b963-df24fd0e0690.json similarity index 79% rename from data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json rename to data/global-mmlu-lite/google/gemma-3-27b-it/f2b74519-42b2-4b41-b963-df24fd0e0690.json index 0fda04c23..0d14a421a 100644 --- a/data/global-mmlu-lite/google/gemma-3-27b-it/7a0bdc36-cff9-4a01-aa5c-750882aeccd4.json +++ b/data/global-mmlu-lite/google/gemma-3-27b-it/f2b74519-42b2-4b41-b963-df24fd0e0690.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemma-3-27b-it/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/google_gemma-3-27b-it/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.78, - "details": { - "confidence_interval": 0.0405953917837699 + "uncertainty": { + "confidence_interval": { + "lower": -0.0406, + "upper": 0.0406, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.7337, - "details": { - "confidence_interval": 0.0434278012181211 + "uncertainty": { + "confidence_interval": { + "lower": -0.0434, + "upper": 0.0434, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.75, - "details": { - "confidence_interval": 0.0426482420232902 + "uncertainty": { + "confidence_interval": { + "lower": -0.0426, + "upper": 0.0426, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.775, - "details": { - "confidence_interval": 0.0409223160958216 + "uncertainty": { + "confidence_interval": { + "lower": -0.0409, + "upper": 0.0409, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.7481, - "details": { - "confidence_interval": 0.0429190922512267 + "uncertainty": { + "confidence_interval": { + "lower": -0.0429, + "upper": 0.0429, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.7335, - "details": { - "confidence_interval": 0.0436563406109071 + "uncertainty": { + "confidence_interval": { + "lower": -0.0437, + "upper": 0.0437, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.7563, - "details": { - "confidence_interval": 0.0421786424783666 + "uncertainty": { + "confidence_interval": { + "lower": -0.0422, + "upper": 0.0422, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.75, - "details": { - "confidence_interval": 0.0424344650278564 + "uncertainty": { + "confidence_interval": { + "lower": -0.0424, + "upper": 0.0424, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.798, - "details": { - "confidence_interval": 0.0395452064293286 + "uncertainty": { + "confidence_interval": { + "lower": -0.0395, + "upper": 0.0395, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.7481, - "details": { - "confidence_interval": 0.0427012467707135 + "uncertainty": { + "confidence_interval": { + "lower": -0.0427, + "upper": 0.0427, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.7494, - "details": { - "confidence_interval": 0.0425230435928108 + "uncertainty": { + "confidence_interval": { + "lower": -0.0425, + "upper": 0.0425, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.785, - "details": { - "confidence_interval": 0.0402598501134396 + "uncertainty": { + "confidence_interval": { + "lower": -0.0403, + "upper": 0.0403, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.7444, - "details": { - "confidence_interval": 0.0428022952090576 + "uncertainty": { + "confidence_interval": { + "lower": -0.0428, + "upper": 0.0428, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.7719, - "details": { - "confidence_interval": 0.0411703730204029 + "uncertainty": { + "confidence_interval": { + "lower": -0.0412, + "upper": 0.0412, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json b/data/global-mmlu-lite/google/gemma-3-4b-it/b455eee3-2917-4479-89b1-e1a470e62fbe.json similarity index 79% rename from data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json rename to data/global-mmlu-lite/google/gemma-3-4b-it/b455eee3-2917-4479-89b1-e1a470e62fbe.json index 291f3c560..6c6b3a3ce 100644 --- a/data/global-mmlu-lite/google/gemma-3-4b-it/548f8e83-3e97-4cb4-83ac-8da11d8f3e5f.json +++ b/data/global-mmlu-lite/google/gemma-3-4b-it/b455eee3-2917-4479-89b1-e1a470e62fbe.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemma-3-4b-it/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/google_gemma-3-4b-it/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.6525, - "details": { - "confidence_interval": 0.0466644077020903 + "uncertainty": { + "confidence_interval": { + "lower": -0.0467, + "upper": 0.0467, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.67, - "details": { - "confidence_interval": 0.046079999600029 + "uncertainty": { + "confidence_interval": { + "lower": -0.0461, + "upper": 0.0461, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.68, - "details": { - "confidence_interval": 0.0457138228379294 + "uncertainty": { + "confidence_interval": { + "lower": -0.0457, + "upper": 0.0457, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.6525, - "details": { - "confidence_interval": 0.0466644077020903 + "uncertainty": { + "confidence_interval": { + "lower": -0.0467, + "upper": 0.0467, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.6575, - "details": { - "confidence_interval": 0.0465046373306654 + "uncertainty": { + "confidence_interval": { + "lower": -0.0465, + "upper": 0.0465, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.6475, - "details": { - "confidence_interval": 0.046818505067596 + "uncertainty": { + "confidence_interval": { + "lower": -0.0468, + "upper": 0.0468, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.6775, - "details": { - "confidence_interval": 0.0458076069884696 + "uncertainty": { + "confidence_interval": { + "lower": -0.0458, + "upper": 0.0458, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.6675, - "details": { - "confidence_interval": 0.0461678398924898 + "uncertainty": { + "confidence_interval": { + "lower": -0.0462, + "upper": 0.0462, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.6325, - "details": { - "confidence_interval": 0.0472473039906172 + "uncertainty": { + "confidence_interval": { + "lower": -0.0472, + "upper": 0.0472, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.66, - "details": { - "confidence_interval": 0.0464226065447579 + "uncertainty": { + "confidence_interval": { + "lower": -0.0464, + "upper": 0.0464, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.68, - "details": { - "confidence_interval": 0.0457138228379294 + "uncertainty": { + "confidence_interval": { + "lower": -0.0457, + "upper": 0.0457, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.6725, - "details": { - "confidence_interval": 0.0459906864522658 + "uncertainty": { + "confidence_interval": { + "lower": -0.046, + "upper": 0.046, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.6075, - "details": { - "confidence_interval": 0.0478532090532308 + "uncertainty": { + "confidence_interval": { + "lower": -0.0479, + "upper": 0.0479, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.5825, - "details": { - "confidence_interval": 0.0483274967299978 + "uncertainty": { + "confidence_interval": { + "lower": -0.0483, + "upper": 0.0483, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.6475, - "details": { - "confidence_interval": 0.046818505067596 + "uncertainty": { + "confidence_interval": { + "lower": -0.0468, + "upper": 0.0468, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.63, - "details": { - "confidence_interval": 0.0473139527809662 + "uncertainty": { + "confidence_interval": { + "lower": -0.0473, + "upper": 0.0473, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json b/data/global-mmlu-lite/mistralai/mistral-medium-3/25786cd1-4ffd-4265-9b61-426ab47e5b1d.json similarity index 80% rename from data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json rename to data/global-mmlu-lite/mistralai/mistral-medium-3/25786cd1-4ffd-4265-9b61-426ab47e5b1d.json index 8c936e6ff..7e4a31d27 100644 --- a/data/global-mmlu-lite/mistralai/mistral-medium-3/1e2e51d0-42e8-4564-a42c-31819f89f459.json +++ b/data/global-mmlu-lite/mistralai/mistral-medium-3/25786cd1-4ffd-4265-9b61-426ab47e5b1d.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/mistralai_mistral-medium-3/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/mistralai_mistral-medium-3/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.455, - "details": { - "confidence_interval": 0.0488002497704065 + "uncertainty": { + "confidence_interval": { + "lower": -0.0488, + "upper": 0.0488, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.38, - "details": { - "confidence_interval": 0.0475669974392838 + "uncertainty": { + "confidence_interval": { + "lower": -0.0476, + "upper": 0.0476, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.5175, - "details": { - "confidence_interval": 0.0489690784681949 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.4775, - "details": { - "confidence_interval": 0.048949462883814 + "uncertainty": { + "confidence_interval": { + "lower": -0.0489, + "upper": 0.0489, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.41, - "details": { - "confidence_interval": 0.0481987782191081 + "uncertainty": { + "confidence_interval": { + "lower": -0.0482, + "upper": 0.0482, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.555, - "details": { - "confidence_interval": 0.0487017528493824 + "uncertainty": { + "confidence_interval": { + "lower": -0.0487, + "upper": 0.0487, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.515, - "details": { - "confidence_interval": 0.0489770450552826 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.535, - "details": { - "confidence_interval": 0.0488789043994999 + "uncertainty": { + "confidence_interval": { + "lower": -0.0489, + "upper": 0.0489, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.58, - "details": { - "confidence_interval": 0.0483678449158397 + "uncertainty": { + "confidence_interval": { + "lower": -0.0484, + "upper": 0.0484, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.595, - "details": { - "confidence_interval": 0.0481065364404039 + "uncertainty": { + "confidence_interval": { + "lower": -0.0481, + "upper": 0.0481, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.5175, - "details": { - "confidence_interval": 0.0489690784681949 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.5375, - "details": { - "confidence_interval": 0.0488610953035984 + "uncertainty": { + "confidence_interval": { + "lower": -0.0489, + "upper": 0.0489, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.7075, - "details": { - "confidence_interval": 0.0445804299504003 + "uncertainty": { + "confidence_interval": { + "lower": -0.0446, + "upper": 0.0446, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.7675, - "details": { - "confidence_interval": 0.0413969901513152 + "uncertainty": { + "confidence_interval": { + "lower": -0.0414, + "upper": 0.0414, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.535, - "details": { - "confidence_interval": 0.0488789043994999 + "uncertainty": { + "confidence_interval": { + "lower": -0.0489, + "upper": 0.0489, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.7325, - "details": { - "confidence_interval": 0.0433794261948387 + "uncertainty": { + "confidence_interval": { + "lower": -0.0434, + "upper": 0.0434, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json b/data/global-mmlu-lite/mistralai/mistral-small-2503/257c2515-2933-4f2e-b6e6-70d4a818f086.json similarity index 80% rename from data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json rename to data/global-mmlu-lite/mistralai/mistral-small-2503/257c2515-2933-4f2e-b6e6-70d4a818f086.json index 31a088393..74ee2bb5b 100644 --- a/data/global-mmlu-lite/mistralai/mistral-small-2503/85822e81-7478-4f63-b7f3-89a78e75c6d9.json +++ b/data/global-mmlu-lite/mistralai/mistral-small-2503/257c2515-2933-4f2e-b6e6-70d4a818f086.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/mistralai_mistral-small-2503/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/mistralai_mistral-small-2503/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.7875, - "details": { - "confidence_interval": 0.0400887803670033 + "uncertainty": { + "confidence_interval": { + "lower": -0.0401, + "upper": 0.0401, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.8, - "details": { - "confidence_interval": 0.039199279690801 + "uncertainty": { + "confidence_interval": { + "lower": -0.0392, + "upper": 0.0392, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.7725, - "details": { - "confidence_interval": 0.0410826112430601 + "uncertainty": { + "confidence_interval": { + "lower": -0.0411, + "upper": 0.0411, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.7975, - "details": { - "confidence_interval": 0.0393818356106108 + "uncertainty": { + "confidence_interval": { + "lower": -0.0394, + "upper": 0.0394, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.8, - "details": { - "confidence_interval": 0.039199279690801 + "uncertainty": { + "confidence_interval": { + "lower": -0.0392, + "upper": 0.0392, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.795, - "details": { - "confidence_interval": 0.0395620320289107 + "uncertainty": { + "confidence_interval": { + "lower": -0.0396, + "upper": 0.0396, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.785, - "details": { - "confidence_interval": 0.0402598501134396 + "uncertainty": { + "confidence_interval": { + "lower": -0.0403, + "upper": 0.0403, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.805, - "details": { - "confidence_interval": 0.0388269557903546 + "uncertainty": { + "confidence_interval": { + "lower": -0.0388, + "upper": 0.0388, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.77, - "details": { - "confidence_interval": 0.0412408279846843 + "uncertainty": { + "confidence_interval": { + "lower": -0.0412, + "upper": 0.0412, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.79, - "details": { - "confidence_interval": 0.039915473764981 + "uncertainty": { + "confidence_interval": { + "lower": -0.0399, + "upper": 0.0399, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.7825, - "details": { - "confidence_interval": 0.0404287113993418 + "uncertainty": { + "confidence_interval": { + "lower": -0.0404, + "upper": 0.0404, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.775, - "details": { - "confidence_interval": 0.0409223160958216 + "uncertainty": { + "confidence_interval": { + "lower": -0.0409, + "upper": 0.0409, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.735, - "details": { - "confidence_interval": 0.0432498595893876 + "uncertainty": { + "confidence_interval": { + "lower": -0.0432, + "upper": 0.0432, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.7925, - "details": { - "confidence_interval": 0.039739901042451 + "uncertainty": { + "confidence_interval": { + "lower": -0.0397, + "upper": 0.0397, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.7825, - "details": { - "confidence_interval": 0.0404287113993418 + "uncertainty": { + "confidence_interval": { + "lower": -0.0404, + "upper": 0.0404, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json b/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/e7a344c6-eeff-4b11-93aa-a77ce2063a81.json similarity index 80% rename from data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json rename to data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/e7a344c6-eeff-4b11-93aa-a77ce2063a81.json index 9e6900ce9..1b0ed18d8 100644 --- a/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/c4b48f92-4f10-4831-86a9-3ede0512bf7b.json +++ b/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/e7a344c6-eeff-4b11-93aa-a77ce2063a81.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/openai_gpt-4.1-2025-04-14/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/openai_gpt-4.1-2025-04-14/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.8825, - "details": { - "confidence_interval": 0.0315569037846059 + "uncertainty": { + "confidence_interval": { + "lower": -0.0316, + "upper": 0.0316, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.8625, - "details": { - "confidence_interval": 0.0337480742790123 + "uncertainty": { + "confidence_interval": { + "lower": -0.0337, + "upper": 0.0337, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.875, - "details": { - "confidence_interval": 0.0324098580108514 + "uncertainty": { + "confidence_interval": { + "lower": -0.0324, + "upper": 0.0324, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.8875, - "details": { - "confidence_interval": 0.0309655314070612 + "uncertainty": { + "confidence_interval": { + "lower": -0.031, + "upper": 0.031, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.8775, - "details": { - "confidence_interval": 0.0321299242960121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0321, + "upper": 0.0321, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.885, - "details": { - "confidence_interval": 0.0312635759101603 + "uncertainty": { + "confidence_interval": { + "lower": -0.0313, + "upper": 0.0313, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.87, - "details": { - "confidence_interval": 0.0329571309666248 + "uncertainty": { + "confidence_interval": { + "lower": -0.033, + "upper": 0.033, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.875, - "details": { - "confidence_interval": 0.0324098580108514 + "uncertainty": { + "confidence_interval": { + "lower": -0.0324, + "upper": 0.0324, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.885, - "details": { - "confidence_interval": 0.0312635759101603 + "uncertainty": { + "confidence_interval": { + "lower": -0.0313, + "upper": 0.0313, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.875, - "details": { - "confidence_interval": 0.0324098580108514 + "uncertainty": { + "confidence_interval": { + "lower": -0.0324, + "upper": 0.0324, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.87, - "details": { - "confidence_interval": 0.0329571309666248 + "uncertainty": { + "confidence_interval": { + "lower": -0.033, + "upper": 0.033, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8575, - "details": { - "confidence_interval": 0.0342564686873586 + "uncertainty": { + "confidence_interval": { + "lower": -0.0343, + "upper": 0.0343, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json b/data/global-mmlu-lite/openai/gpt-5-2025-08-07/55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json similarity index 80% rename from data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json rename to data/global-mmlu-lite/openai/gpt-5-2025-08-07/55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json index a1689b336..86dfbf015 100644 --- a/data/global-mmlu-lite/openai/gpt-5-2025-08-07/07d59b47-f0d1-48a4-87fb-b931bbb21ac7.json +++ b/data/global-mmlu-lite/openai/gpt-5-2025-08-07/55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/openai_gpt-5-2025-08-07/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/openai_gpt-5-2025-08-07/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.8925, - "details": { - "confidence_interval": 0.0303547345865505 + "uncertainty": { + "confidence_interval": { + "lower": -0.0304, + "upper": 0.0304, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.9, - "details": { - "confidence_interval": 0.0293994597681008 + "uncertainty": { + "confidence_interval": { + "lower": -0.0294, + "upper": 0.0294, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.865, - "details": { - "confidence_interval": 0.0334882947381079 + "uncertainty": { + "confidence_interval": { + "lower": -0.0335, + "upper": 0.0335, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.795, - "details": { - "confidence_interval": 0.0395620320289107 + "uncertainty": { + "confidence_interval": { + "lower": -0.0396, + "upper": 0.0396, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.8875, - "details": { - "confidence_interval": 0.0309655314070612 + "uncertainty": { + "confidence_interval": { + "lower": -0.031, + "upper": 0.031, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.8875, - "details": { - "confidence_interval": 0.0309655314070612 + "uncertainty": { + "confidence_interval": { + "lower": -0.031, + "upper": 0.031, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.865, - "details": { - "confidence_interval": 0.0334882947381079 + "uncertainty": { + "confidence_interval": { + "lower": -0.0335, + "upper": 0.0335, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.9125, - "details": { - "confidence_interval": 0.0276909948229923 + "uncertainty": { + "confidence_interval": { + "lower": -0.0277, + "upper": 0.0277, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.895, - "details": { - "confidence_interval": 0.0300416832365769 + "uncertainty": { + "confidence_interval": { + "lower": -0.03, + "upper": 0.03, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.915, - "details": { - "confidence_interval": 0.0273299039414468 + "uncertainty": { + "confidence_interval": { + "lower": -0.0273, + "upper": 0.0273, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json b/data/global-mmlu-lite/openai/o3-mini-2025-01-31/a70391be-f081-4ee6-8128-801b27072b53.json similarity index 80% rename from data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json rename to data/global-mmlu-lite/openai/o3-mini-2025-01-31/a70391be-f081-4ee6-8128-801b27072b53.json index f888f274b..e92aebf64 100644 --- a/data/global-mmlu-lite/openai/o3-mini-2025-01-31/bfc75f7b-ebc2-4833-acb2-a9b48bd02d79.json +++ b/data/global-mmlu-lite/openai/o3-mini-2025-01-31/a70391be-f081-4ee6-8128-801b27072b53.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/openai_o3-mini-2025-01-31/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/openai_o3-mini-2025-01-31/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.7725, - "details": { - "confidence_interval": 0.0410826112430601 + "uncertainty": { + "confidence_interval": { + "lower": -0.0411, + "upper": 0.0411, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.8025, - "details": { - "confidence_interval": 0.0390143311477458 + "uncertainty": { + "confidence_interval": { + "lower": -0.039, + "upper": 0.039, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.77, - "details": { - "confidence_interval": 0.0412408279846843 + "uncertainty": { + "confidence_interval": { + "lower": -0.0412, + "upper": 0.0412, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.7525, - "details": { - "confidence_interval": 0.0422920706585954 + "uncertainty": { + "confidence_interval": { + "lower": -0.0423, + "upper": 0.0423, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.74, - "details": { - "confidence_interval": 0.0429853660302419 + "uncertainty": { + "confidence_interval": { + "lower": -0.043, + "upper": 0.043, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.7525, - "details": { - "confidence_interval": 0.0422920706585954 + "uncertainty": { + "confidence_interval": { + "lower": -0.0423, + "upper": 0.0423, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.7425, - "details": { - "confidence_interval": 0.042850405989882 + "uncertainty": { + "confidence_interval": { + "lower": -0.0429, + "upper": 0.0429, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.8, - "details": { - "confidence_interval": 0.039199279690801 + "uncertainty": { + "confidence_interval": { + "lower": -0.0392, + "upper": 0.0392, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.81, - "details": { - "confidence_interval": 0.0384447822371523 + "uncertainty": { + "confidence_interval": { + "lower": -0.0384, + "upper": 0.0384, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.8075, - "details": { - "confidence_interval": 0.0386371183112584 + "uncertainty": { + "confidence_interval": { + "lower": -0.0386, + "upper": 0.0386, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.7975, - "details": { - "confidence_interval": 0.0393818356106108 + "uncertainty": { + "confidence_interval": { + "lower": -0.0394, + "upper": 0.0394, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.775, - "details": { - "confidence_interval": 0.0409223160958216 + "uncertainty": { + "confidence_interval": { + "lower": -0.0409, + "upper": 0.0409, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.765, - "details": { - "confidence_interval": 0.0415511209081742 + "uncertainty": { + "confidence_interval": { + "lower": -0.0416, + "upper": 0.0416, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.7725, - "details": { - "confidence_interval": 0.0410826112430601 + "uncertainty": { + "confidence_interval": { + "lower": -0.0411, + "upper": 0.0411, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.8125, - "details": { - "confidence_interval": 0.0382499098762049 + "uncertainty": { + "confidence_interval": { + "lower": -0.0382, + "upper": 0.0382, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8075, - "details": { - "confidence_interval": 0.0386371183112584 + "uncertainty": { + "confidence_interval": { + "lower": -0.0386, + "upper": 0.0386, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json b/data/global-mmlu-lite/unknown/aya-expanse-32b/14712820-c343-4d94-8eff-633a12b830af.json similarity index 80% rename from data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json rename to data/global-mmlu-lite/unknown/aya-expanse-32b/14712820-c343-4d94-8eff-633a12b830af.json index 438f36e4d..057524bd4 100644 --- a/data/global-mmlu-lite/unknown/aya-expanse-32b/1b07b6c2-9eb1-4e90-9e29-91c2b8258b4d.json +++ b/data/global-mmlu-lite/unknown/aya-expanse-32b/14712820-c343-4d94-8eff-633a12b830af.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/unknown_aya-expanse-32b/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/unknown_aya-expanse-32b/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.7425, - "details": { - "confidence_interval": 0.042850405989882 + "uncertainty": { + "confidence_interval": { + "lower": -0.0429, + "upper": 0.0429, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.7544, - "details": { - "confidence_interval": 0.0422362190048598 + "uncertainty": { + "confidence_interval": { + "lower": -0.0422, + "upper": 0.0422, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.7343, - "details": { - "confidence_interval": 0.0433386611155747 + "uncertainty": { + "confidence_interval": { + "lower": -0.0433, + "upper": 0.0433, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.7425, - "details": { - "confidence_interval": 0.042850405989882 + "uncertainty": { + "confidence_interval": { + "lower": -0.0429, + "upper": 0.0429, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.7325, - "details": { - "confidence_interval": 0.0433794261948387 + "uncertainty": { + "confidence_interval": { + "lower": -0.0434, + "upper": 0.0434, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.7375, - "details": { - "confidence_interval": 0.043118511644326 + "uncertainty": { + "confidence_interval": { + "lower": -0.0431, + "upper": 0.0431, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.7594, - "details": { - "confidence_interval": 0.0419416660786681 + "uncertainty": { + "confidence_interval": { + "lower": -0.0419, + "upper": 0.0419, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.7305, - "details": { - "confidence_interval": 0.0436468814160691 + "uncertainty": { + "confidence_interval": { + "lower": -0.0436, + "upper": 0.0436, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.7419, - "details": { - "confidence_interval": 0.0429391274814829 + "uncertainty": { + "confidence_interval": { + "lower": -0.0429, + "upper": 0.0429, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.7525, - "details": { - "confidence_interval": 0.0422920706585954 + "uncertainty": { + "confidence_interval": { + "lower": -0.0423, + "upper": 0.0423, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.7544, - "details": { - "confidence_interval": 0.0422362190048598 + "uncertainty": { + "confidence_interval": { + "lower": -0.0422, + "upper": 0.0422, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.7362, - "details": { - "confidence_interval": 0.0432964154917688 + "uncertainty": { + "confidence_interval": { + "lower": -0.0433, + "upper": 0.0433, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.7071, - "details": { - "confidence_interval": 0.044824280008073 + "uncertainty": { + "confidence_interval": { + "lower": -0.0448, + "upper": 0.0448, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.6942, - "details": { - "confidence_interval": 0.0452072976819525 + "uncertainty": { + "confidence_interval": { + "lower": -0.0452, + "upper": 0.0452, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.743, - "details": { - "confidence_interval": 0.0432027486149424 + "uncertainty": { + "confidence_interval": { + "lower": -0.0432, + "upper": 0.0432, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.7025, - "details": { - "confidence_interval": 0.0448006943140238 + "uncertainty": { + "confidence_interval": { + "lower": -0.0448, + "upper": 0.0448, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json b/data/global-mmlu-lite/unknown/granite-4.0-h-small/7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json similarity index 80% rename from data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json rename to data/global-mmlu-lite/unknown/granite-4.0-h-small/7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json index a8365f097..661c1261f 100644 --- a/data/global-mmlu-lite/unknown/granite-4.0-h-small/96569c98-0d02-4b32-b915-87b707102913.json +++ b/data/global-mmlu-lite/unknown/granite-4.0-h-small/7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/unknown_granite-4.0-h-small/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/unknown_granite-4.0-h-small/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.7613, - "details": { - "confidence_interval": 0.0418799929410023 + "uncertainty": { + "confidence_interval": { + "lower": -0.0419, + "upper": 0.0419, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.77, - "details": { - "confidence_interval": 0.0412408279846843 + "uncertainty": { + "confidence_interval": { + "lower": -0.0412, + "upper": 0.0412, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.7613, - "details": { - "confidence_interval": 0.0418799929410023 + "uncertainty": { + "confidence_interval": { + "lower": -0.0419, + "upper": 0.0419, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.755, - "details": { - "confidence_interval": 0.0421477711557175 + "uncertainty": { + "confidence_interval": { + "lower": -0.0421, + "upper": 0.0421, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.7594, - "details": { - "confidence_interval": 0.0419416660786681 + "uncertainty": { + "confidence_interval": { + "lower": -0.0419, + "upper": 0.0419, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.7575, - "details": { - "confidence_interval": 0.0420015468835339 + "uncertainty": { + "confidence_interval": { + "lower": -0.042, + "upper": 0.042, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.7614, - "details": { - "confidence_interval": 0.0420850950563913 + "uncertainty": { + "confidence_interval": { + "lower": -0.0421, + "upper": 0.0421, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.7525, - "details": { - "confidence_interval": 0.0422920706585954 + "uncertainty": { + "confidence_interval": { + "lower": -0.0423, + "upper": 0.0423, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.7406, - "details": { - "confidence_interval": 0.0431176028953377 + "uncertainty": { + "confidence_interval": { + "lower": -0.0431, + "upper": 0.0431, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.7525, - "details": { - "confidence_interval": 0.0422920706585954 + "uncertainty": { + "confidence_interval": { + "lower": -0.0423, + "upper": 0.0423, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.757, - "details": { - "confidence_interval": 0.0422983725746105 + "uncertainty": { + "confidence_interval": { + "lower": -0.0423, + "upper": 0.0423, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.7638, - "details": { - "confidence_interval": 0.0417276763767606 + "uncertainty": { + "confidence_interval": { + "lower": -0.0417, + "upper": 0.0417, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.7318, - "details": { - "confidence_interval": 0.0434682405808651 + "uncertainty": { + "confidence_interval": { + "lower": -0.0435, + "upper": 0.0435, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.6921, - "details": { - "confidence_interval": 0.0456390297025301 + "uncertainty": { + "confidence_interval": { + "lower": -0.0456, + "upper": 0.0456, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.7475, - "details": { - "confidence_interval": 0.0425749733789813 + "uncertainty": { + "confidence_interval": { + "lower": -0.0426, + "upper": 0.0426, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.7419, - "details": { - "confidence_interval": 0.0429391274814829 + "uncertainty": { + "confidence_interval": { + "lower": -0.0429, + "upper": 0.0429, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json b/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/026339f8-2a68-47c2-afaf-69a3c0c5a8db.json similarity index 80% rename from data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json rename to data/global-mmlu-lite/unknown/o4-mini-2025-04-16/026339f8-2a68-47c2-afaf-69a3c0c5a8db.json index 8cd940025..6b3c13f5c 100644 --- a/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/f1955ca9-b8ef-4565-9c4c-2e7aaba6a9db.json +++ b/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/026339f8-2a68-47c2-afaf-69a3c0c5a8db.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/unknown_o4-mini-2025-04-16/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/unknown_o4-mini-2025-04-16/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.865, - "details": { - "confidence_interval": 0.0334882947381079 + "uncertainty": { + "confidence_interval": { + "lower": -0.0335, + "upper": 0.0335, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.8675, - "details": { - "confidence_interval": 0.0332246776628893 + "uncertainty": { + "confidence_interval": { + "lower": -0.0332, + "upper": 0.0332, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.8875, - "details": { - "confidence_interval": 0.0309655314070612 + "uncertainty": { + "confidence_interval": { + "lower": -0.031, + "upper": 0.031, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.8775, - "details": { - "confidence_interval": 0.0321299242960121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0321, + "upper": 0.0321, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.87, - "details": { - "confidence_interval": 0.0329571309666248 + "uncertainty": { + "confidence_interval": { + "lower": -0.033, + "upper": 0.033, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.87, - "details": { - "confidence_interval": 0.0329571309666248 + "uncertainty": { + "confidence_interval": { + "lower": -0.033, + "upper": 0.033, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.8675, - "details": { - "confidence_interval": 0.0332246776628893 + "uncertainty": { + "confidence_interval": { + "lower": -0.0332, + "upper": 0.0332, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.855, - "details": { - "confidence_interval": 0.034505248053577 + "uncertainty": { + "confidence_interval": { + "lower": -0.0345, + "upper": 0.0345, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.885, - "details": { - "confidence_interval": 0.0312635759101603 + "uncertainty": { + "confidence_interval": { + "lower": -0.0313, + "upper": 0.0313, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.88, - "details": { - "confidence_interval": 0.0318456453642134 + "uncertainty": { + "confidence_interval": { + "lower": -0.0318, + "upper": 0.0318, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.855, - "details": { - "confidence_interval": 0.034505248053577 + "uncertainty": { + "confidence_interval": { + "lower": -0.0345, + "upper": 0.0345, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.8525, - "details": { - "confidence_interval": 0.0347505193336969 + "uncertainty": { + "confidence_interval": { + "lower": -0.0348, + "upper": 0.0348, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.8525, - "details": { - "confidence_interval": 0.0347505193336969 + "uncertainty": { + "confidence_interval": { + "lower": -0.0348, + "upper": 0.0348, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.89, - "details": { - "confidence_interval": 0.0306626327370121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0307, + "upper": 0.0307, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json b/data/global-mmlu-lite/xai/grok-3-mini/2761218a-27ef-4dc3-af3a-41ba830f8b40.json similarity index 79% rename from data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json rename to data/global-mmlu-lite/xai/grok-3-mini/2761218a-27ef-4dc3-af3a-41ba830f8b40.json index 18e2509ab..682b7a3d5 100644 --- a/data/global-mmlu-lite/xai/grok-3-mini/143d53e6-b34e-4fa8-af3f-8019cef29abb.json +++ b/data/global-mmlu-lite/xai/grok-3-mini/2761218a-27ef-4dc3-af3a-41ba830f8b40.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/xai_grok-3-mini/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/xai_grok-3-mini/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.755, - "details": { - "confidence_interval": 0.0421477711557175 + "uncertainty": { + "confidence_interval": { + "lower": -0.0421, + "upper": 0.0421, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.5075, - "details": { - "confidence_interval": 0.0489935869046875 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.7355, - "details": { - "confidence_interval": 0.0433858795425096 + "uncertainty": { + "confidence_interval": { + "lower": -0.0434, + "upper": 0.0434, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.6591, - "details": { - "confidence_interval": 0.0465089008517938 + "uncertainty": { + "confidence_interval": { + "lower": -0.0465, + "upper": 0.0465, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.485, - "details": { - "confidence_interval": 0.0489770450552826 + "uncertainty": { + "confidence_interval": { + "lower": -0.049, + "upper": 0.049, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.56, - "details": { - "confidence_interval": 0.0486450268120758 + "uncertainty": { + "confidence_interval": { + "lower": -0.0486, + "upper": 0.0486, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.725, - "details": { - "confidence_interval": 0.0437575951229009 + "uncertainty": { + "confidence_interval": { + "lower": -0.0438, + "upper": 0.0438, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.696, - "details": { - "confidence_interval": 0.0451914267324277 + "uncertainty": { + "confidence_interval": { + "lower": -0.0452, + "upper": 0.0452, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.6575, - "details": { - "confidence_interval": 0.0465046373306654 + "uncertainty": { + "confidence_interval": { + "lower": -0.0465, + "upper": 0.0465, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.7325, - "details": { - "confidence_interval": 0.0433794261948387 + "uncertainty": { + "confidence_interval": { + "lower": -0.0434, + "upper": 0.0434, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.6275, - "details": { - "confidence_interval": 0.04737924097692 + "uncertainty": { + "confidence_interval": { + "lower": -0.0474, + "upper": 0.0474, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.61, - "details": { - "confidence_interval": 0.0477986153942541 + "uncertainty": { + "confidence_interval": { + "lower": -0.0478, + "upper": 0.0478, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.7625, - "details": { - "confidence_interval": 0.0417032427788918 + "uncertainty": { + "confidence_interval": { + "lower": -0.0417, + "upper": 0.0417, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.8296, - "details": { - "confidence_interval": 0.0368941238003664 + "uncertainty": { + "confidence_interval": { + "lower": -0.0369, + "upper": 0.0369, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.5564, - "details": { - "confidence_interval": 0.0487474461160897 + "uncertainty": { + "confidence_interval": { + "lower": -0.0487, + "upper": 0.0487, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.8693, - "details": { - "confidence_interval": 0.0331103067375873 + "uncertainty": { + "confidence_interval": { + "lower": -0.0331, + "upper": 0.0331, + "method": "unknown" + } } } } diff --git a/data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json b/data/global-mmlu-lite/xai/grok-4-0709/8a2fda68-99d6-47a5-95ee-d38e695dda55.json similarity index 79% rename from data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json rename to data/global-mmlu-lite/xai/grok-4-0709/8a2fda68-99d6-47a5-95ee-d38e695dda55.json index 0a731d29e..b5cecaa08 100644 --- a/data/global-mmlu-lite/xai/grok-4-0709/bcfa473c-1686-42af-8d07-4c8b92c3d864.json +++ b/data/global-mmlu-lite/xai/grok-4-0709/8a2fda68-99d6-47a5-95ee-d38e695dda55.json @@ -1,7 +1,7 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/xai_grok-4-0709/1770682039.8556428", - "retrieved_timestamp": "1770682039.8556428", + "evaluation_id": "global-mmlu-lite/xai_grok-4-0709/1770822205.005286", + "retrieved_timestamp": "1770822205.005286", "source_metadata": { "source_name": "Global MMLU Lite", "source_type": "documentation", @@ -97,8 +97,12 @@ }, "score_details": { "score": 0.885, - "details": { - "confidence_interval": 0.0312635759101603 + "uncertainty": { + "confidence_interval": { + "lower": -0.0313, + "upper": 0.0313, + "method": "unknown" + } } } }, @@ -120,8 +124,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -143,8 +151,12 @@ }, "score_details": { "score": 0.8925, - "details": { - "confidence_interval": 0.0303547345865505 + "uncertainty": { + "confidence_interval": { + "lower": -0.0304, + "upper": 0.0304, + "method": "unknown" + } } } }, @@ -166,8 +178,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } }, @@ -189,8 +205,12 @@ }, "score_details": { "score": 0.875, - "details": { - "confidence_interval": 0.0324098580108514 + "uncertainty": { + "confidence_interval": { + "lower": -0.0324, + "upper": 0.0324, + "method": "unknown" + } } } }, @@ -212,8 +232,12 @@ }, "score_details": { "score": 0.8675, - "details": { - "confidence_interval": 0.0332246776628893 + "uncertainty": { + "confidence_interval": { + "lower": -0.0332, + "upper": 0.0332, + "method": "unknown" + } } } }, @@ -235,8 +259,12 @@ }, "score_details": { "score": 0.89, - "details": { - "confidence_interval": 0.0306626327370121 + "uncertainty": { + "confidence_interval": { + "lower": -0.0307, + "upper": 0.0307, + "method": "unknown" + } } } }, @@ -258,8 +286,12 @@ }, "score_details": { "score": 0.9025, - "details": { - "confidence_interval": 0.0290699315059157 + "uncertainty": { + "confidence_interval": { + "lower": -0.0291, + "upper": 0.0291, + "method": "unknown" + } } } }, @@ -281,8 +313,12 @@ }, "score_details": { "score": 0.87, - "details": { - "confidence_interval": 0.0329571309666248 + "uncertainty": { + "confidence_interval": { + "lower": -0.033, + "upper": 0.033, + "method": "unknown" + } } } }, @@ -304,8 +340,12 @@ }, "score_details": { "score": 0.895, - "details": { - "confidence_interval": 0.0300416832365769 + "uncertainty": { + "confidence_interval": { + "lower": -0.03, + "upper": 0.03, + "method": "unknown" + } } } }, @@ -327,8 +367,12 @@ }, "score_details": { "score": 0.8725, - "details": { - "confidence_interval": 0.0326855581520567 + "uncertainty": { + "confidence_interval": { + "lower": -0.0327, + "upper": 0.0327, + "method": "unknown" + } } } }, @@ -350,8 +394,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } }, @@ -373,8 +421,12 @@ }, "score_details": { "score": 0.91, - "details": { - "confidence_interval": 0.0280452971732717 + "uncertainty": { + "confidence_interval": { + "lower": -0.028, + "upper": 0.028, + "method": "unknown" + } } } }, @@ -396,8 +448,12 @@ }, "score_details": { "score": 0.905, - "details": { - "confidence_interval": 0.0287345359327925 + "uncertainty": { + "confidence_interval": { + "lower": -0.0287, + "upper": 0.0287, + "method": "unknown" + } } } }, @@ -419,8 +475,12 @@ }, "score_details": { "score": 0.8525, - "details": { - "confidence_interval": 0.0347505193336969 + "uncertainty": { + "confidence_interval": { + "lower": -0.0348, + "upper": 0.0348, + "method": "unknown" + } } } }, @@ -442,8 +502,12 @@ }, "score_details": { "score": 0.9075, - "details": { - "confidence_interval": 0.0283930651251164 + "uncertainty": { + "confidence_interval": { + "lower": -0.0284, + "upper": 0.0284, + "method": "unknown" + } } } } diff --git a/eval.schema.json b/eval.schema.json index c9917b4ad..9ca971c4c 100644 --- a/eval.schema.json +++ b/eval.schema.json @@ -287,7 +287,7 @@ "description": "How the confidence interval was computed" } }, - "required": ["lower", "upper", "confidence_level"] + "required": ["lower", "upper"] }, "standard_deviation": { "type": "number", diff --git a/eval_types.py b/eval_types.py index e4d8a9e44..b1b1e6584 100644 --- a/eval_types.py +++ b/eval_types.py @@ -1,65 +1,63 @@ # generated by datamodel-codegen: # filename: eval.schema.json -# timestamp: 2026-02-10T16:07:04+00:00 +# timestamp: 2026-02-11T15:00:05+00:00 from __future__ import annotations - from enum import Enum -from typing import Any, Literal - from pydantic import BaseModel, ConfigDict, Field, confloat, conint +from typing import Any, Literal class SourceType(Enum): - documentation = 'documentation' - evaluation_run = 'evaluation_run' + documentation = "documentation" + evaluation_run = "evaluation_run" class EvaluatorRelationship(Enum): - first_party = 'first_party' - third_party = 'third_party' - collaborative = 'collaborative' - other = 'other' + first_party = "first_party" + third_party = "third_party" + collaborative = "collaborative" + other = "other" class SourceMetadata(BaseModel): source_name: str | None = Field( None, - description='Name of the source (e.g. title of the source leaderboard or name of the platform used for the evaluation).', + description="Name of the source (e.g. title of the source leaderboard or name of the platform used for the evaluation).", ) source_type: SourceType = Field( ..., - description='Whether the data comes from a direct evaluation run or from documentation', + description="Whether the data comes from a direct evaluation run or from documentation", ) source_organization_name: str = Field( - ..., description='Name of the organization that provides the data' + ..., description="Name of the organization that provides the data" ) source_organization_url: str | None = Field( - None, description='URL for the organization that provides the data' + None, description="URL for the organization that provides the data" ) source_organization_logo_url: str | None = Field( - None, description='URL for the Logo for the organization that provides the data' + None, description="URL for the Logo for the organization that provides the data" ) evaluator_relationship: EvaluatorRelationship = Field( - ..., description='Relationship between the evaluator and the model' + ..., description="Relationship between the evaluator and the model" ) class ScoreType(Enum): - binary = 'binary' - continuous = 'continuous' - levels = 'levels' + binary = "binary" + continuous = "continuous" + levels = "levels" class AggregationMethod(Enum): - majority_vote = 'majority_vote' - average = 'average' - weighted_average = 'weighted_average' - median = 'median' + majority_vote = "majority_vote" + average = "average" + weighted_average = "weighted_average" + median = "median" class StandardError(BaseModel): - value: float = Field(..., description='The standard error value') + value: float = Field(..., description="The standard error value") method: str | None = Field( None, description="How the standard error was computed (e.g. 'analytic', 'bootstrap', 'jackknife')", @@ -67,216 +65,216 @@ class StandardError(BaseModel): class ConfidenceInterval(BaseModel): - lower: float = Field(..., description='Lower bound of the confidence interval') - upper: float = Field(..., description='Upper bound of the confidence interval') - confidence_level: confloat(ge=0.0, le=1.0) = Field( - ..., description='Confidence level (e.g. 0.95 for a 95% confidence interval)' + lower: float = Field(..., description="Lower bound of the confidence interval") + upper: float = Field(..., description="Upper bound of the confidence interval") + confidence_level: confloat(ge=0.0, le=1.0) | None = Field( + None, description="Confidence level (e.g. 0.95 for a 95% confidence interval)" ) method: str | None = Field( - None, description='How the confidence interval was computed' + None, description="How the confidence interval was computed" ) class Uncertainty(BaseModel): standard_error: StandardError | None = Field( None, - description='Standard error of the score estimate (SE_mean = standard_deviation / sqrt(num_samples))', + description="Standard error of the score estimate (SE_mean = standard_deviation / sqrt(num_samples))", ) confidence_interval: ConfidenceInterval | None = Field( None, - description='Lower and upper bounds for the metric at a given confidence level.', + description="Lower and upper bounds for the metric at a given confidence level.", ) standard_deviation: float | None = Field( - None, description='Standard deviation of the per-sample scores' + None, description="Standard deviation of the per-sample scores" ) num_samples: int | None = Field( - None, description='Number of samples used to compute the uncertainty estimates' + None, description="Number of samples used to compute the uncertainty estimates" ) num_bootstrap_samples: int | None = Field( None, - description='Number of bootstrap resamples used, if bootstrap method was applied', + description="Number of bootstrap resamples used, if bootstrap method was applied", ) class EvalLimits(BaseModel): - time_limit: int | None = Field(None, description='Time limit for evaluation.') - message_limit: int | None = Field(None, description='Message limit for evaluation.') - token_limit: int | None = Field(None, description='Token limit for evaluation.') + time_limit: int | None = Field(None, description="Time limit for evaluation.") + message_limit: int | None = Field(None, description="Message limit for evaluation.") + token_limit: int | None = Field(None, description="Token limit for evaluation.") class Sandbox(BaseModel): - type: str | None = Field(None, description='Type of sandbox e.g. docker') + type: str | None = Field(None, description="Type of sandbox e.g. docker") config: str | None = Field( None, - description='Config file name/path e.g. compose.yaml. TODO or full config? Not sure based on the Inspect docs', + description="Config file name/path e.g. compose.yaml. TODO or full config? Not sure based on the Inspect docs", ) class Format(Enum): - jsonl = 'jsonl' - json = 'json' + jsonl = "jsonl" + json = "json" class HashAlgorithm(Enum): - sha256 = 'sha256' - md5 = 'md5' + sha256 = "sha256" + md5 = "md5" class DetailedEvaluationResults(BaseModel): format: Format | None = Field( - None, description='Format of the detailed evaluation results' + None, description="Format of the detailed evaluation results" ) file_path: str | None = Field( - None, description='Path to the detailed evaluation results file' + None, description="Path to the detailed evaluation results file" ) hash_algorithm: HashAlgorithm | None = Field( None, - description='Hash algorithm used for checksum and sample_hash in instance-level data', + description="Hash algorithm used for checksum and sample_hash in instance-level data", ) - checksum: str | None = Field(None, description='Checksum value of the file') + checksum: str | None = Field(None, description="Checksum value of the file") total_rows: int | None = Field( - None, description='Total number of rows in the detailed evaluation results file' + None, description="Total number of rows in the detailed evaluation results file" ) class AdditionalPropertiesObject(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) class InferenceEngine(BaseModel): - name: str | None = Field(None, description='Name of the inference engine') - version: str | None = Field(None, description='Version of the inference engine') + name: str | None = Field(None, description="Name of the inference engine") + version: str | None = Field(None, description="Version of the inference engine") class ModelInfo(BaseModel): - name: str = Field(..., description='Model name provided by evaluation source') + name: str = Field(..., description="Model name provided by evaluation source") id: str = Field( ..., - description='Model name in HuggingFace format (e.g. meta-llama/Llama-3.1-8B-Instruct for models available on HuggingFace or openai/azure/gpt-4o-mini-2024-07-18 for closed API models)', + description="Model name in HuggingFace format (e.g. meta-llama/Llama-3.1-8B-Instruct for models available on HuggingFace or openai/azure/gpt-4o-mini-2024-07-18 for closed API models)", ) developer: str | None = Field( None, description="Name of organization that provides the model (e.g. 'OpenAI')" ) inference_platform: str | None = Field( None, - description='Name of inference platform which provides an access to models by API to run the evaluations or provides models weights to run them locally (e.g. HuggingFace, Bedrock, Together AI)', + description="Name of inference platform which provides an access to models by API to run the evaluations or provides models weights to run them locally (e.g. HuggingFace, Bedrock, Together AI)", ) inference_engine: InferenceEngine | None = Field( None, - description='Name of inference engine which provides an access to optimized models to use them for local evaluations (e.g. vLLM, Ollama).', + description="Name of inference engine which provides an access to optimized models to use them for local evaluations (e.g. vLLM, Ollama).", ) additional_details: AdditionalPropertiesObject | None = None class SourceDataUrl(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) - dataset_name: str = Field(..., description='Name of the source dataset') - source_type: Literal['url'] + dataset_name: str = Field(..., description="Name of the source dataset") + source_type: Literal["url"] url: list[str] = Field( - ..., description='URL(s) for the source of the evaluation data', min_length=1 + ..., description="URL(s) for the source of the evaluation data", min_length=1 ) additional_details: AdditionalPropertiesObject | None = None class SourceDataHf(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) - dataset_name: str = Field(..., description='Name of the source dataset') - source_type: Literal['hf_dataset'] - hf_repo: str | None = Field(None, description='HuggingFace repository identifier') - hf_split: str | None = Field(None, description='One of train, val or test.') + dataset_name: str = Field(..., description="Name of the source dataset") + source_type: Literal["hf_dataset"] + hf_repo: str | None = Field(None, description="HuggingFace repository identifier") + hf_split: str | None = Field(None, description="One of train, val or test.") samples_number: int | None = Field( - None, description='Number of samples in the dataset' + None, description="Number of samples in the dataset" ) sample_ids: list[int | str] | None = Field( - None, description='Array of sample ids used for evaluation' + None, description="Array of sample ids used for evaluation" ) additional_details: AdditionalPropertiesObject | None = None class SourceDataPrivate(BaseModel): - dataset_name: str = Field(..., description='Name of the source dataset') - source_type: Literal['other'] + dataset_name: str = Field(..., description="Name of the source dataset") + source_type: Literal["other"] additional_details: AdditionalPropertiesObject | None = None class ScoreDetails(BaseModel): - score: float = Field(..., description='The score for the evaluation') + score: float = Field(..., description="The score for the evaluation") details: AdditionalPropertiesObject | None = None uncertainty: Uncertainty | None = Field( - None, description='Quantification of uncertainty around the reported score' + None, description="Quantification of uncertainty around the reported score" ) class AvailableTool(BaseModel): - name: str | None = Field(None, description='e.g. bash, calculator, ...') + name: str | None = Field(None, description="e.g. bash, calculator, ...") description: str | None = None parameters: AdditionalPropertiesObject | None = None class AgenticEvalConfig(BaseModel): available_tools: list[AvailableTool] | None = Field( - None, description='List of all available tools with their configurations' + None, description="List of all available tools with their configurations" ) additional_details: AdditionalPropertiesObject | None = None class EvalPlan(BaseModel): name: str | None = None - steps: list[Any] | None = Field(None, description='Array of evaluation plan steps') + steps: list[Any] | None = Field(None, description="Array of evaluation plan steps") config: AdditionalPropertiesObject | None = None class GenerationArgs(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) - temperature: float | None = Field(None, description='Sampling temperature') - top_p: float | None = Field(None, description='Nucleus sampling parameter') - top_k: float | None = Field(None, description='Top-k sampling parameter') + temperature: float | None = Field(None, description="Sampling temperature") + top_p: float | None = Field(None, description="Nucleus sampling parameter") + top_k: float | None = Field(None, description="Top-k sampling parameter") max_tokens: conint(ge=1) | None = Field( - None, description='Maximum number of tokens to generate' + None, description="Maximum number of tokens to generate" ) execution_command: str | None = Field( - None, description='Command used to run the model to generate results' + None, description="Command used to run the model to generate results" ) reasoning: bool | None = Field( None, - description='Whether reasoning orchain-of-thought was used to generate results', + description="Whether reasoning orchain-of-thought was used to generate results", ) prompt_template: str | None = Field( None, - description='Input prompt template for task (should contain agentic info if needed).', + description="Input prompt template for task (should contain agentic info if needed).", ) agentic_eval_config: AgenticEvalConfig | None = Field( - None, description='General configuration for agentic evaluations.' + None, description="General configuration for agentic evaluations." ) eval_plan: EvalPlan | None = Field( None, - description='Plan (solvers) used in evaluation. Solvers are crucial parts of Inspect evaluations which can serve a wide variety of purposes like providing system prompts, prompt engineering, model generation or multi-turn dialog.', + description="Plan (solvers) used in evaluation. Solvers are crucial parts of Inspect evaluations which can serve a wide variety of purposes like providing system prompts, prompt engineering, model generation or multi-turn dialog.", ) eval_limits: EvalLimits | None = Field( None, - description='Listed evaluation limits like time limit, message limit, token limit.', + description="Listed evaluation limits like time limit, message limit, token limit.", ) sandbox: Sandbox | None = None max_attempts: int | None = Field( - 1, description='Maximum number of submission attempts (default 1).' + 1, description="Maximum number of submission attempts (default 1)." ) incorrect_attempt_feedback: str | None = Field( - None, description='Feedback from the model after incorrect attempt.' + None, description="Feedback from the model after incorrect attempt." ) class GenerationConfig(BaseModel): generation_args: GenerationArgs | None = Field( None, - description='Parameters used to generate results - properties may vary by model type', + description="Parameters used to generate results - properties may vary by model type", ) additional_details: AdditionalPropertiesObject | None = None @@ -291,90 +289,90 @@ class JudgeConfig(BaseModel): class LlmScoring(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) judges: list[JudgeConfig] = Field( ..., - description='LLM judge(s) - single item for judge, multiple for jury', + description="LLM judge(s) - single item for judge, multiple for jury", min_length=1, ) - input_prompt: str = Field(..., description='Prompt template used for judging') + input_prompt: str = Field(..., description="Prompt template used for judging") aggregation_method: AggregationMethod | None = Field( - None, description='How to aggregate scores when multiple judges' + None, description="How to aggregate scores when multiple judges" ) expert_baseline: float | None = Field( - None, description='Expert/human baseline score for comparison' + None, description="Expert/human baseline score for comparison" ) additional_details: AdditionalPropertiesObject | None = None class MetricConfig(BaseModel): evaluation_description: str | None = Field( - None, description='Description of the evaluation' + None, description="Description of the evaluation" ) - lower_is_better: bool = Field(..., description='Whether a lower score is better') - score_type: ScoreType | None = Field(None, description='Type of score') - level_names: list[str] | None = Field(None, description='Names of the score levels') + lower_is_better: bool = Field(..., description="Whether a lower score is better") + score_type: ScoreType | None = Field(None, description="Type of score") + level_names: list[str] | None = Field(None, description="Names of the score levels") level_metadata: list[str] | None = Field( - None, description='Additional Description for each Score Level' + None, description="Additional Description for each Score Level" ) has_unknown_level: bool | None = Field( None, - description='Indicates whether there is an Unknown Level - if True, then a score of -1 will be treated as Unknown', + description="Indicates whether there is an Unknown Level - if True, then a score of -1 will be treated as Unknown", ) min_score: float | None = Field( - None, description='Minimum possible score for continuous metric' + None, description="Minimum possible score for continuous metric" ) max_score: float | None = Field( - None, description='Maximum possible score for continuous metric' + None, description="Maximum possible score for continuous metric" ) llm_scoring: LlmScoring | None = Field( - None, description='Configuration when LLM is used as scorer/judge' + None, description="Configuration when LLM is used as scorer/judge" ) class EvaluationResult(BaseModel): - evaluation_name: str = Field(..., description='Name of the evaluation') + evaluation_name: str = Field(..., description="Name of the evaluation") source_data: SourceDataUrl | SourceDataHf | SourceDataPrivate = Field( ..., - description='Source of dataset for this evaluation: URL, HuggingFace dataset, or private/custom dataset.', + description="Source of dataset for this evaluation: URL, HuggingFace dataset, or private/custom dataset.", ) evaluation_timestamp: str | None = Field( - None, description='Timestamp for when the evaluations were run' + None, description="Timestamp for when the evaluations were run" ) - metric_config: MetricConfig = Field(..., description='Details about the metric') + metric_config: MetricConfig = Field(..., description="Details about the metric") score_details: ScoreDetails = Field( - ..., description='The score for the evaluation and related details' + ..., description="The score for the evaluation and related details" ) generation_config: GenerationConfig | None = None class EvaluationLog(BaseModel): model_config = ConfigDict( - extra='forbid', + extra="forbid", ) schema_version: str = Field( - ..., description='Version of the schema used for this evaluation data' + ..., description="Version of the schema used for this evaluation data" ) evaluation_id: str = Field( ..., - description='Unique identifier for this specific evaluation run. Use eval_name/model_id/retrieved_timestamp format', + description="Unique identifier for this specific evaluation run. Use eval_name/model_id/retrieved_timestamp format", ) evaluation_timestamp: str | None = Field( - None, description='Timestamp for when the evaluation was run' + None, description="Timestamp for when the evaluation was run" ) retrieved_timestamp: str = Field( ..., - description='Timestamp for when this record was created - using Unix Epoch time format', + description="Timestamp for when this record was created - using Unix Epoch time format", ) source_metadata: SourceMetadata = Field( - ..., description='Metadata about the source of the leaderboard data' + ..., description="Metadata about the source of the leaderboard data" ) model_info: ModelInfo evaluation_results: list[EvaluationResult] = Field( - ..., description='Array of evaluation results' + ..., description="Array of evaluation results" ) detailed_evaluation_results: DetailedEvaluationResults | None = Field( None, - description='Reference to the evaluation results for all individual samples in the evaluation', + description="Reference to the evaluation results for all individual samples in the evaluation", ) diff --git a/instance_level_types.py b/instance_level_types.py index e273d854d..10b03c2b0 100644 --- a/instance_level_types.py +++ b/instance_level_types.py @@ -1,194 +1,188 @@ # generated by datamodel-codegen: # filename: instance_level_eval.schema.json -# timestamp: 2026-02-10T16:07:05+00:00 - +# timestamp: 2026-02-11T15:00:06+00:00 from __future__ import annotations - from enum import Enum -from typing import Any - from pydantic import BaseModel, ConfigDict, Field, confloat, conint +from typing import Any class InteractionType(Enum): - single_turn = 'single_turn' - multi_turn = 'multi_turn' - agentic = 'agentic' + single_turn = "single_turn" + multi_turn = "multi_turn" + agentic = "agentic" class Input(BaseModel): - raw: str = Field(..., description='The raw input as defined in the eval') + raw: str = Field(..., description="The raw input as defined in the eval") formatted: str | None = Field( None, - description='Includes chat template, CoT and all relevant modifications - basically the un-tokenized version of what the model sees', + description="Includes chat template, CoT and all relevant modifications - basically the un-tokenized version of what the model sees", ) reference: str = Field( - ..., description='Ground truth or reference answer for comparison/scoring' + ..., description="Ground truth or reference answer for comparison/scoring" ) choices: list[str] | None = Field( - None, description='Optional list of choices for multiple-choice questions' + None, description="Optional list of choices for multiple-choice questions" ) class Output(BaseModel): - raw: str = Field(..., description='Complete model response') + raw: str = Field(..., description="Complete model response") reasoning_trace: str | None = Field( None, - description='Reasoning trace of the model if applicable (e.g. chain-of-thought tokens)', + description="Reasoning trace of the model if applicable (e.g. chain-of-thought tokens)", ) class ToolCall(BaseModel): - id: str = Field(..., description='Unique identifier for the tool call') - name: str = Field(..., description='Name of tool/function') + id: str = Field(..., description="Unique identifier for the tool call") + name: str = Field(..., description="Name of tool/function") arguments: dict[str, Any] | None = Field( - None, description='Arguments used to call the tool' + None, description="Arguments used to call the tool" ) class Interaction(BaseModel): turn_idx: conint(ge=0) = Field( ..., - description='Index starting from 0 indicating the position in the conversation', + description="Index starting from 0 indicating the position in the conversation", ) role: str = Field( - ..., description='Role of the speaker (e.g. user, assistant, system, tool)' + ..., description="Role of the speaker (e.g. user, assistant, system, tool)" ) content: str | None = Field( None, - description='The actual raw text for that particular turn (can be null if empty)', + description="The actual raw text for that particular turn (can be null if empty)", ) reasoning_trace: str | None = Field( - None, description='Reasoning trace for that particular turn if applicable' + None, description="Reasoning trace for that particular turn if applicable" ) tool_calls: list[ToolCall] | None = Field( - None, description='List of tool invocations for this turn, if applicable' - ) - tool_call_id: str | list[str] | None = Field( - None, - description='Reference to the tool call ID this turn is responding to (for tool role responses)', + None, description="List of tool invocations for this turn, if applicable" ) + tool_call_id: str | list[str] | None = None class AnswerAttributionItem(BaseModel): turn_idx: conint(ge=0) = Field( - ..., description='Turn index in interactions. 0 for single_turn' + ..., description="Turn index in interactions. 0 for single_turn" ) source: str = Field( ..., description="Source of the extracted value (e.g. 'output.raw' or 'interactions[turn_idx].content')", ) - extracted_value: str = Field(..., description='Value that was extracted') + extracted_value: str = Field(..., description="Value that was extracted") extraction_method: str = Field( ..., - description='Method used to extract the value (e.g. regex, exact_match, llm_judge, custom)', + description="Method used to extract the value (e.g. regex, exact_match, llm_judge, custom)", ) is_terminal: bool = Field( ..., - description='Whether this is the final answer (false if intermediate outputs are used to build up to a final answer)', + description="Whether this is the final answer (false if intermediate outputs are used to build up to a final answer)", ) class Evaluation(BaseModel): - score: float | bool = Field(..., description='Instance-level score') - is_correct: bool = Field(..., description='Whether the final answer is correct') + score: float | bool = Field(..., description="Instance-level score") + is_correct: bool = Field(..., description="Whether the final answer is correct") num_turns: conint(ge=1) | None = Field( - None, description='Number of turns in the interaction' + None, description="Number of turns in the interaction" ) tool_calls_count: conint(ge=0) | None = Field( - None, description='Count of tool calls across all turns in interactions' + None, description="Count of tool calls across all turns in interactions" ) class TokenUsage(BaseModel): - input_tokens: conint(ge=0) = Field(..., description='Total input tokens used') - output_tokens: conint(ge=0) = Field(..., description='Total output tokens used') - total_tokens: conint(ge=0) = Field(..., description='Total tokens used') + input_tokens: conint(ge=0) = Field(..., description="Total input tokens used") + output_tokens: conint(ge=0) = Field(..., description="Total output tokens used") + total_tokens: conint(ge=0) = Field(..., description="Total tokens used") input_tokens_cache_write: conint(ge=0) | None = Field( - None, description='Number of tokens written to the cache' + None, description="Number of tokens written to the cache" ) input_tokens_cache_read: conint(ge=0) | None = Field( - None, description='Number of tokens retrieved from the cache' + None, description="Number of tokens retrieved from the cache" ) reasoning_tokens: conint(ge=0) | None = Field( - None, description='Number of tokens used for reasoning' + None, description="Number of tokens used for reasoning" ) class Performance(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) latency_ms: confloat(ge=0.0) | None = Field( - None, description='Total latency in milliseconds' + None, description="Total latency in milliseconds" ) time_to_first_token_ms: confloat(ge=0.0) | None = Field( - None, description='Time to first token in milliseconds' + None, description="Time to first token in milliseconds" ) generation_time_ms: confloat(ge=0.0) | None = Field( - None, description='Time for generation in milliseconds' + None, description="Time for generation in milliseconds" ) class InstanceLevelEvaluationLog(BaseModel): model_config = ConfigDict( - extra='allow', + extra="allow", ) schema_version: str = Field( - ..., description='Version of the schema used for this instance data' + ..., description="Version of the schema used for this instance data" ) evaluation_id: str = Field( ..., - description='Foreign key linking to the aggregate evaluation JSON. Must match the evaluation_id in the aggregate file.', + description="Foreign key linking to the aggregate evaluation JSON. Must match the evaluation_id in the aggregate file.", ) model_id: str = Field( ..., - description='Identifier of the model in HuggingFace format (e.g. meta-llama/Llama-3.2-1B-Instruct)', + description="Identifier of the model in HuggingFace format (e.g. meta-llama/Llama-3.2-1B-Instruct)", ) evaluation_name: str = Field( ..., - description='The specific eval name, ideally unique (e.g. GSM8K, mmlu_physics)', + description="The specific eval name, ideally unique (e.g. GSM8K, mmlu_physics)", ) sample_id: int | str = Field( ..., - description='Question/sample identifier from the original dataset (e.g. gsm8k_0001)', + description="Question/sample identifier from the original dataset (e.g. gsm8k_0001)", ) sample_hash: str | None = Field( None, - description='Hash of (input.raw + input.reference) to ensure comparison is between the same sample across models, in case sample_id is not consistent', + description="Hash of (input.raw + input.reference) to ensure comparison is between the same sample across models, in case sample_id is not consistent", ) interaction_type: InteractionType = Field( ..., - description='Type of interaction: single_turn for simple Q&A, multi_turn for conversations, agentic for tool-using agents', + description="Type of interaction: single_turn for simple Q&A, multi_turn for conversations, agentic for tool-using agents", ) - input: Input = Field(..., description='Input data for the evaluation sample') + input: Input = Field(..., description="Input data for the evaluation sample") output: Output | None = Field( None, - description='Output data - only used for single_turn interactions, null for multi_turn/agentic', + description="Output data - only used for single_turn interactions, null for multi_turn/agentic", ) interactions: list[Interaction] | None = Field( None, - description='List of interactions - used for multi_turn and agentic, null for single_turn', + description="List of interactions - used for multi_turn and agentic, null for single_turn", ) answer_attribution: list[AnswerAttributionItem] = Field( ..., - description='Information about how the answer was extracted from the model output', + description="Information about how the answer was extracted from the model output", ) evaluation: Evaluation = Field( - ..., description='Evaluation results and scoring data' + ..., description="Evaluation results and scoring data" ) token_usage: TokenUsage | None = Field( - None, description='Token usage for the model completion' + None, description="Token usage for the model completion" ) performance: Performance | None = Field( - None, description='Performance and latency metrics' + None, description="Performance and latency metrics" ) error: str | None = Field( None, - description='Information about any error that occurred (e.g. timeout, refusal, API error)', + description="Information about any error that occurred (e.g. timeout, refusal, API error)", ) metadata: dict[str, Any] | None = Field( None, - description='Optional metadata about the sample (e.g. subject, difficulty, tags)', + description="Optional metadata about the sample (e.g. subject, difficulty, tags)", ) diff --git a/pyproject.toml b/pyproject.toml index 26ee103cb..d94fdf437 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ requires-python = ">=3.12" dependencies = [ "crfm-helm>=0.5.12", - "datamodel-code-generator>=0.52.2", + "datamodel-code-generator[ruff]>=0.52.2", "huggingface-hub>=0.36.0,<1.0.0", "inspect-ai>=0.3.160,<0.4.0", "jsonschema>=4.26.0,<5.0.0", diff --git a/scripts/global-mmlu-lite/adapter.py b/scripts/global-mmlu-lite/adapter.py index 73cd86d69..00a3645ba 100644 --- a/scripts/global-mmlu-lite/adapter.py +++ b/scripts/global-mmlu-lite/adapter.py @@ -13,6 +13,7 @@ from typing import List from eval_types import ( + ConfidenceInterval, EvaluationLog, EvaluationResult, EvaluatorRelationship, @@ -69,7 +70,15 @@ def make_eval_result( """Create an EvaluationResult with hardcoded source_data for global-mmlu-lite.""" uncertainty = None if confidence_interval is not None or stddev is not None: + ci = None + if confidence_interval is not None and score is not None and score >= 0: + ci = ConfidenceInterval( + lower=round(-confidence_interval, 4), + upper=round(confidence_interval, 4), + method="unknown", + ) uncertainty = Uncertainty( + confidence_interval=ci, standard_deviation=stddev, ) return EvaluationResult( @@ -130,7 +139,6 @@ def fetch_global_mmlu_lite(retrieved_timestamp: str) -> int: numeric_result = result_data.get("numericResult") or result_data.get("numericResultNullable", {}) score_value = numeric_result.get("value") - # Extract confidence interval if available if numeric_result.get("hasConfidenceInterval"): confidence_interval = numeric_result.get("confidenceInterval") diff --git a/uv.lock b/uv.lock index 85c3b2986..d7fbdbbdb 100644 --- a/uv.lock +++ b/uv.lock @@ -670,6 +670,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/8d/72a75327e05a26d5fb7fa511a7d78af18198dc2595bbb179da0c9e4b6fbc/datamodel_code_generator-0.52.2-py3-none-any.whl", hash = "sha256:3ee19ec38190b76bb4119aa67ddce25b3447dd4e5c6bfd47b03a9937c50f5aac", size = 249759, upload_time = "2026-01-05T17:25:48.845Z" }, ] +[package.optional-dependencies] +ruff = [ + { name = "ruff" }, +] + [[package]] name = "datasets" version = "3.6.0" @@ -748,7 +753,7 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "crfm-helm" }, - { name = "datamodel-code-generator" }, + { name = "datamodel-code-generator", extra = ["ruff"] }, { name = "huggingface-hub" }, { name = "inspect-ai" }, { name = "jsonschema" }, @@ -767,7 +772,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "crfm-helm", specifier = ">=0.5.12" }, - { name = "datamodel-code-generator", specifier = ">=0.52.2" }, + { name = "datamodel-code-generator", extras = ["ruff"], specifier = ">=0.52.2" }, { name = "huggingface-hub", specifier = ">=0.36.0,<1.0.0" }, { name = "inspect-ai", specifier = ">=0.3.160,<0.4.0" }, { name = "jsonschema", specifier = ">=4.26.0,<5.0.0" }, @@ -3306,8 +3311,8 @@ dependencies = [ { name = "typing-extensions" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/2f/0b295dd8d199ef71e6f176f576473d645d41357b7b8aa978cc6b042575df/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6abb224c2b6e9e27b592a1c0015c33a504b00a0e0938f1499f7f514e9b7bfb5c", size = 79498197, upload_time = "2026-02-06T17:37:27.627Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1b/af5fccb50c341bd69dc016769503cb0857c1423fbe9343410dfeb65240f2/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7350f6652dfd761f11f9ecb590bfe95b573e2961f7a242eccb3c8e78348d26fe", size = 79498248, upload_time = "2026-02-06T17:37:31.982Z" }, + { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload_time = "2026-02-10T21:44:52.603Z" }, + { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload_time = "2026-02-10T21:44:44.095Z" }, { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload_time = "2026-01-21T16:24:44.171Z" }, { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload_time = "2026-01-21T16:23:53.503Z" }, { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload_time = "2026-01-21T16:24:39.516Z" }, From f36d4352586f30206580a25e71dd5f4265a002fc Mon Sep 17 00:00:00 2001 From: SreeHarshaNelaturu Date: Wed, 11 Feb 2026 16:13:43 +0100 Subject: [PATCH 4/4] chore: update source_metadata --- ....json => c8ab4e94-d8e8-417f-be18-fececf3c815c.json} | 10 +++++----- ....json => 402c8833-1827-46fc-a497-46b40a6794ff.json} | 10 +++++----- ....json => acd2082a-ce0c-418f-9383-f3c9f11735a2.json} | 10 +++++----- ....json => c65ed336-b283-46c2-8284-c4695cad588d.json} | 10 +++++----- ....json => 5ebb009d-b548-4f2b-b075-feb76ca295d2.json} | 10 +++++----- ....json => c7df2916-bde4-4987-9139-fcfd18a14ac1.json} | 10 +++++----- ....json => 56ec8ab0-d76d-4c03-953b-a2a4a43af5f4.json} | 10 +++++----- ....json => ad3211a9-4390-4247-b64d-600191a88a75.json} | 10 +++++----- ....json => 1a34326a-f75e-434c-a027-9f8cf7fe8fb9.json} | 10 +++++----- ....json => 129c8b21-f97e-4284-9574-33d5932332f7.json} | 10 +++++----- ....json => 3644fd67-0f46-4de3-b542-edf219d0e0cd.json} | 10 +++++----- ....json => c0692e14-6484-4d02-8dac-55ce4373fb15.json} | 10 +++++----- ....json => ab4940d1-118c-479a-bd37-1ea2da6f02a3.json} | 10 +++++----- ....json => 85552093-435f-4d85-897d-4e74c3655533.json} | 10 +++++----- ....json => 4ddc0062-6577-4ab9-85f1-791fd2822776.json} | 10 +++++----- ....json => 50fc4840-933b-43ec-847e-1834b30f9f14.json} | 10 +++++----- ....json => 6cdc5384-2be5-47e0-a9b2-9cd6719c1760.json} | 10 +++++----- ....json => a668c931-34e4-4702-a84c-97d8c6f59ef4.json} | 10 +++++----- ....json => 3a7e2aa6-4e57-446f-a127-4a7e022fe3e1.json} | 10 +++++----- ....json => 938a35f1-195d-49c8-9a16-90fab96692bd.json} | 10 +++++----- ....json => ce756801-f75e-4250-9721-1d627a37f055.json} | 10 +++++----- ....json => b83b41d4-6c95-4c7d-a290-65d89bf776c2.json} | 10 +++++----- ....json => 31c3fe1b-be4b-42ef-8ec0-9da323b2ebb6.json} | 10 +++++----- ....json => a8e0fc0e-b3a4-4a0b-938f-aa11f1c64358.json} | 10 +++++----- scripts/global-mmlu-lite/adapter.py | 6 +++--- 25 files changed, 123 insertions(+), 123 deletions(-) rename data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/{7352e4d2-016d-404e-b443-62674ed3d75f.json => c8ab4e94-d8e8-417f-be18-fececf3c815c.json} (98%) rename data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/{7e577a20-9de8-49d7-b598-11755237d256.json => 402c8833-1827-46fc-a497-46b40a6794ff.json} (98%) rename data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/{7be4f076-a537-4d34-b884-8860a2d23e57.json => acd2082a-ce0c-418f-9383-f3c9f11735a2.json} (98%) rename data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/{d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json => c65ed336-b283-46c2-8284-c4695cad588d.json} (98%) rename data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/{1c486108-80af-4752-96d8-02832e4420cb.json => 5ebb009d-b548-4f2b-b075-feb76ca295d2.json} (98%) rename data/global-mmlu-lite/cohere/command-a-03-2025/{c6d487a0-8088-405c-b6c8-20136afbe339.json => c7df2916-bde4-4987-9139-fcfd18a14ac1.json} (98%) rename data/global-mmlu-lite/deepseek/deepseek-r1-0528/{0ef90f90-b956-48ba-bad8-234d74c27c9c.json => 56ec8ab0-d76d-4c03-953b-a2a4a43af5f4.json} (98%) rename data/global-mmlu-lite/deepseek/deepseek-v3.1/{10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json => ad3211a9-4390-4247-b64d-600191a88a75.json} (98%) rename data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/{d491f587-b539-4918-9377-2a8204dbd8a4.json => 1a34326a-f75e-434c-a027-9f8cf7fe8fb9.json} (98%) rename data/global-mmlu-lite/google/gemini-2.5-flash/{7ec048f5-7539-42fa-8c16-44d006928475.json => 129c8b21-f97e-4284-9574-33d5932332f7.json} (98%) rename data/global-mmlu-lite/google/gemini-2.5-pro/{c23a5254-8219-460b-b334-0746aded67db.json => 3644fd67-0f46-4de3-b542-edf219d0e0cd.json} (98%) rename data/global-mmlu-lite/google/gemini-3-pro-preview/{9f223ea5-a5a1-4acb-81c6-6fe71239225a.json => c0692e14-6484-4d02-8dac-55ce4373fb15.json} (98%) rename data/global-mmlu-lite/google/gemma-3-27b-it/{f2b74519-42b2-4b41-b963-df24fd0e0690.json => ab4940d1-118c-479a-bd37-1ea2da6f02a3.json} (98%) rename data/global-mmlu-lite/google/gemma-3-4b-it/{b455eee3-2917-4479-89b1-e1a470e62fbe.json => 85552093-435f-4d85-897d-4e74c3655533.json} (98%) rename data/global-mmlu-lite/mistralai/mistral-medium-3/{25786cd1-4ffd-4265-9b61-426ab47e5b1d.json => 4ddc0062-6577-4ab9-85f1-791fd2822776.json} (98%) rename data/global-mmlu-lite/mistralai/mistral-small-2503/{257c2515-2933-4f2e-b6e6-70d4a818f086.json => 50fc4840-933b-43ec-847e-1834b30f9f14.json} (98%) rename data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/{e7a344c6-eeff-4b11-93aa-a77ce2063a81.json => 6cdc5384-2be5-47e0-a9b2-9cd6719c1760.json} (98%) rename data/global-mmlu-lite/openai/gpt-5-2025-08-07/{55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json => a668c931-34e4-4702-a84c-97d8c6f59ef4.json} (98%) rename data/global-mmlu-lite/openai/o3-mini-2025-01-31/{a70391be-f081-4ee6-8128-801b27072b53.json => 3a7e2aa6-4e57-446f-a127-4a7e022fe3e1.json} (98%) rename data/global-mmlu-lite/unknown/aya-expanse-32b/{14712820-c343-4d94-8eff-633a12b830af.json => 938a35f1-195d-49c8-9a16-90fab96692bd.json} (98%) rename data/global-mmlu-lite/unknown/granite-4.0-h-small/{7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json => ce756801-f75e-4250-9721-1d627a37f055.json} (98%) rename data/global-mmlu-lite/unknown/o4-mini-2025-04-16/{026339f8-2a68-47c2-afaf-69a3c0c5a8db.json => b83b41d4-6c95-4c7d-a290-65d89bf776c2.json} (98%) rename data/global-mmlu-lite/xai/grok-3-mini/{2761218a-27ef-4dc3-af3a-41ba830f8b40.json => 31c3fe1b-be4b-42ef-8ec0-9da323b2ebb6.json} (98%) rename data/global-mmlu-lite/xai/grok-4-0709/{8a2fda68-99d6-47a5-95ee-d38e695dda55.json => a8e0fc0e-b3a4-4a0b-938f-aa11f1c64358.json} (98%) diff --git a/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/7352e4d2-016d-404e-b443-62674ed3d75f.json b/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/c8ab4e94-d8e8-417f-be18-fececf3c815c.json similarity index 98% rename from data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/7352e4d2-016d-404e-b443-62674ed3d75f.json rename to data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/c8ab4e94-d8e8-417f-be18-fececf3c815c.json index f983a3d6f..b3b764f48 100644 --- a/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/7352e4d2-016d-404e-b443-62674ed3d75f.json +++ b/data/global-mmlu-lite/alibaba/qwen3-235b-a22b-instruct-2507/c8ab4e94-d8e8-417f-be18-fececf3c815c.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/alibaba_qwen3-235b-a22b-instruct-2507/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/alibaba_qwen3-235b-a22b-instruct-2507/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7e577a20-9de8-49d7-b598-11755237d256.json b/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/402c8833-1827-46fc-a497-46b40a6794ff.json similarity index 98% rename from data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7e577a20-9de8-49d7-b598-11755237d256.json rename to data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/402c8833-1827-46fc-a497-46b40a6794ff.json index f9cb47aa9..5bff70d19 100644 --- a/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/7e577a20-9de8-49d7-b598-11755237d256.json +++ b/data/global-mmlu-lite/anthropic/claude-3-5-haiku-20241022/402c8833-1827-46fc-a497-46b40a6794ff.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-3-5-haiku-20241022/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/anthropic_claude-3-5-haiku-20241022/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/7be4f076-a537-4d34-b884-8860a2d23e57.json b/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/acd2082a-ce0c-418f-9383-f3c9f11735a2.json similarity index 98% rename from data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/7be4f076-a537-4d34-b884-8860a2d23e57.json rename to data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/acd2082a-ce0c-418f-9383-f3c9f11735a2.json index 03f1c7ab0..ec9276c60 100644 --- a/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/7be4f076-a537-4d34-b884-8860a2d23e57.json +++ b/data/global-mmlu-lite/anthropic/claude-3-7-sonnet-20250219/acd2082a-ce0c-418f-9383-f3c9f11735a2.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-3-7-sonnet-20250219/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/anthropic_claude-3-7-sonnet-20250219/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json b/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/c65ed336-b283-46c2-8284-c4695cad588d.json similarity index 98% rename from data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json rename to data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/c65ed336-b283-46c2-8284-c4695cad588d.json index ce46cb3bf..06dce92ac 100644 --- a/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/d74e3633-6eda-4261-a06a-1f5d6c7d78d6.json +++ b/data/global-mmlu-lite/anthropic/claude-opus-4-1-20250805/c65ed336-b283-46c2-8284-c4695cad588d.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-opus-4-1-20250805/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/anthropic_claude-opus-4-1-20250805/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/1c486108-80af-4752-96d8-02832e4420cb.json b/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/5ebb009d-b548-4f2b-b075-feb76ca295d2.json similarity index 98% rename from data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/1c486108-80af-4752-96d8-02832e4420cb.json rename to data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/5ebb009d-b548-4f2b-b075-feb76ca295d2.json index 5f464fbed..0251345d9 100644 --- a/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/1c486108-80af-4752-96d8-02832e4420cb.json +++ b/data/global-mmlu-lite/anthropic/claude-sonnet-4-20250514/5ebb009d-b548-4f2b-b075-feb76ca295d2.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/anthropic_claude-sonnet-4-20250514/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/anthropic_claude-sonnet-4-20250514/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/cohere/command-a-03-2025/c6d487a0-8088-405c-b6c8-20136afbe339.json b/data/global-mmlu-lite/cohere/command-a-03-2025/c7df2916-bde4-4987-9139-fcfd18a14ac1.json similarity index 98% rename from data/global-mmlu-lite/cohere/command-a-03-2025/c6d487a0-8088-405c-b6c8-20136afbe339.json rename to data/global-mmlu-lite/cohere/command-a-03-2025/c7df2916-bde4-4987-9139-fcfd18a14ac1.json index d40297fe9..8e9ed8546 100644 --- a/data/global-mmlu-lite/cohere/command-a-03-2025/c6d487a0-8088-405c-b6c8-20136afbe339.json +++ b/data/global-mmlu-lite/cohere/command-a-03-2025/c7df2916-bde4-4987-9139-fcfd18a14ac1.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/cohere_command-a-03-2025/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/cohere_command-a-03-2025/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/deepseek/deepseek-r1-0528/0ef90f90-b956-48ba-bad8-234d74c27c9c.json b/data/global-mmlu-lite/deepseek/deepseek-r1-0528/56ec8ab0-d76d-4c03-953b-a2a4a43af5f4.json similarity index 98% rename from data/global-mmlu-lite/deepseek/deepseek-r1-0528/0ef90f90-b956-48ba-bad8-234d74c27c9c.json rename to data/global-mmlu-lite/deepseek/deepseek-r1-0528/56ec8ab0-d76d-4c03-953b-a2a4a43af5f4.json index f1a17dbc6..b6e9a89cf 100644 --- a/data/global-mmlu-lite/deepseek/deepseek-r1-0528/0ef90f90-b956-48ba-bad8-234d74c27c9c.json +++ b/data/global-mmlu-lite/deepseek/deepseek-r1-0528/56ec8ab0-d76d-4c03-953b-a2a4a43af5f4.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/deepseek_deepseek-r1-0528/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/deepseek_deepseek-r1-0528/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/deepseek/deepseek-v3.1/10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json b/data/global-mmlu-lite/deepseek/deepseek-v3.1/ad3211a9-4390-4247-b64d-600191a88a75.json similarity index 98% rename from data/global-mmlu-lite/deepseek/deepseek-v3.1/10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json rename to data/global-mmlu-lite/deepseek/deepseek-v3.1/ad3211a9-4390-4247-b64d-600191a88a75.json index af44b4305..7e8deab0e 100644 --- a/data/global-mmlu-lite/deepseek/deepseek-v3.1/10fddd86-ef3c-4d77-a1ca-020d7b19b27d.json +++ b/data/global-mmlu-lite/deepseek/deepseek-v3.1/ad3211a9-4390-4247-b64d-600191a88a75.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/deepseek_deepseek-v3.1/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/deepseek_deepseek-v3.1/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/d491f587-b539-4918-9377-2a8204dbd8a4.json b/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/1a34326a-f75e-434c-a027-9f8cf7fe8fb9.json similarity index 98% rename from data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/d491f587-b539-4918-9377-2a8204dbd8a4.json rename to data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/1a34326a-f75e-434c-a027-9f8cf7fe8fb9.json index c80db486b..7a051d563 100644 --- a/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/d491f587-b539-4918-9377-2a8204dbd8a4.json +++ b/data/global-mmlu-lite/google/gemini-2.5-flash-preview-05-20/1a34326a-f75e-434c-a027-9f8cf7fe8fb9.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash-preview-05-20/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash-preview-05-20/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/google/gemini-2.5-flash/7ec048f5-7539-42fa-8c16-44d006928475.json b/data/global-mmlu-lite/google/gemini-2.5-flash/129c8b21-f97e-4284-9574-33d5932332f7.json similarity index 98% rename from data/global-mmlu-lite/google/gemini-2.5-flash/7ec048f5-7539-42fa-8c16-44d006928475.json rename to data/global-mmlu-lite/google/gemini-2.5-flash/129c8b21-f97e-4284-9574-33d5932332f7.json index 06cf9f2c2..ffe8e8eb2 100644 --- a/data/global-mmlu-lite/google/gemini-2.5-flash/7ec048f5-7539-42fa-8c16-44d006928475.json +++ b/data/global-mmlu-lite/google/gemini-2.5-flash/129c8b21-f97e-4284-9574-33d5932332f7.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-flash/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/google/gemini-2.5-pro/c23a5254-8219-460b-b334-0746aded67db.json b/data/global-mmlu-lite/google/gemini-2.5-pro/3644fd67-0f46-4de3-b542-edf219d0e0cd.json similarity index 98% rename from data/global-mmlu-lite/google/gemini-2.5-pro/c23a5254-8219-460b-b334-0746aded67db.json rename to data/global-mmlu-lite/google/gemini-2.5-pro/3644fd67-0f46-4de3-b542-edf219d0e0cd.json index 3b4524e12..6a19f6916 100644 --- a/data/global-mmlu-lite/google/gemini-2.5-pro/c23a5254-8219-460b-b334-0746aded67db.json +++ b/data/global-mmlu-lite/google/gemini-2.5-pro/3644fd67-0f46-4de3-b542-edf219d0e0cd.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-2.5-pro/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/google_gemini-2.5-pro/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/google/gemini-3-pro-preview/9f223ea5-a5a1-4acb-81c6-6fe71239225a.json b/data/global-mmlu-lite/google/gemini-3-pro-preview/c0692e14-6484-4d02-8dac-55ce4373fb15.json similarity index 98% rename from data/global-mmlu-lite/google/gemini-3-pro-preview/9f223ea5-a5a1-4acb-81c6-6fe71239225a.json rename to data/global-mmlu-lite/google/gemini-3-pro-preview/c0692e14-6484-4d02-8dac-55ce4373fb15.json index 97360f3df..8538679be 100644 --- a/data/global-mmlu-lite/google/gemini-3-pro-preview/9f223ea5-a5a1-4acb-81c6-6fe71239225a.json +++ b/data/global-mmlu-lite/google/gemini-3-pro-preview/c0692e14-6484-4d02-8dac-55ce4373fb15.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemini-3-pro-preview/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/google_gemini-3-pro-preview/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/google/gemma-3-27b-it/f2b74519-42b2-4b41-b963-df24fd0e0690.json b/data/global-mmlu-lite/google/gemma-3-27b-it/ab4940d1-118c-479a-bd37-1ea2da6f02a3.json similarity index 98% rename from data/global-mmlu-lite/google/gemma-3-27b-it/f2b74519-42b2-4b41-b963-df24fd0e0690.json rename to data/global-mmlu-lite/google/gemma-3-27b-it/ab4940d1-118c-479a-bd37-1ea2da6f02a3.json index 0d14a421a..211f9d6b8 100644 --- a/data/global-mmlu-lite/google/gemma-3-27b-it/f2b74519-42b2-4b41-b963-df24fd0e0690.json +++ b/data/global-mmlu-lite/google/gemma-3-27b-it/ab4940d1-118c-479a-bd37-1ea2da6f02a3.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemma-3-27b-it/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/google_gemma-3-27b-it/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/google/gemma-3-4b-it/b455eee3-2917-4479-89b1-e1a470e62fbe.json b/data/global-mmlu-lite/google/gemma-3-4b-it/85552093-435f-4d85-897d-4e74c3655533.json similarity index 98% rename from data/global-mmlu-lite/google/gemma-3-4b-it/b455eee3-2917-4479-89b1-e1a470e62fbe.json rename to data/global-mmlu-lite/google/gemma-3-4b-it/85552093-435f-4d85-897d-4e74c3655533.json index 6c6b3a3ce..f5d7db0a6 100644 --- a/data/global-mmlu-lite/google/gemma-3-4b-it/b455eee3-2917-4479-89b1-e1a470e62fbe.json +++ b/data/global-mmlu-lite/google/gemma-3-4b-it/85552093-435f-4d85-897d-4e74c3655533.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/google_gemma-3-4b-it/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/google_gemma-3-4b-it/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/mistralai/mistral-medium-3/25786cd1-4ffd-4265-9b61-426ab47e5b1d.json b/data/global-mmlu-lite/mistralai/mistral-medium-3/4ddc0062-6577-4ab9-85f1-791fd2822776.json similarity index 98% rename from data/global-mmlu-lite/mistralai/mistral-medium-3/25786cd1-4ffd-4265-9b61-426ab47e5b1d.json rename to data/global-mmlu-lite/mistralai/mistral-medium-3/4ddc0062-6577-4ab9-85f1-791fd2822776.json index 7e4a31d27..242b4f1b9 100644 --- a/data/global-mmlu-lite/mistralai/mistral-medium-3/25786cd1-4ffd-4265-9b61-426ab47e5b1d.json +++ b/data/global-mmlu-lite/mistralai/mistral-medium-3/4ddc0062-6577-4ab9-85f1-791fd2822776.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/mistralai_mistral-medium-3/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/mistralai_mistral-medium-3/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/mistralai/mistral-small-2503/257c2515-2933-4f2e-b6e6-70d4a818f086.json b/data/global-mmlu-lite/mistralai/mistral-small-2503/50fc4840-933b-43ec-847e-1834b30f9f14.json similarity index 98% rename from data/global-mmlu-lite/mistralai/mistral-small-2503/257c2515-2933-4f2e-b6e6-70d4a818f086.json rename to data/global-mmlu-lite/mistralai/mistral-small-2503/50fc4840-933b-43ec-847e-1834b30f9f14.json index 74ee2bb5b..afd35d897 100644 --- a/data/global-mmlu-lite/mistralai/mistral-small-2503/257c2515-2933-4f2e-b6e6-70d4a818f086.json +++ b/data/global-mmlu-lite/mistralai/mistral-small-2503/50fc4840-933b-43ec-847e-1834b30f9f14.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/mistralai_mistral-small-2503/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/mistralai_mistral-small-2503/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/e7a344c6-eeff-4b11-93aa-a77ce2063a81.json b/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/6cdc5384-2be5-47e0-a9b2-9cd6719c1760.json similarity index 98% rename from data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/e7a344c6-eeff-4b11-93aa-a77ce2063a81.json rename to data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/6cdc5384-2be5-47e0-a9b2-9cd6719c1760.json index 1b0ed18d8..4ace59a99 100644 --- a/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/e7a344c6-eeff-4b11-93aa-a77ce2063a81.json +++ b/data/global-mmlu-lite/openai/gpt-4.1-2025-04-14/6cdc5384-2be5-47e0-a9b2-9cd6719c1760.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/openai_gpt-4.1-2025-04-14/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/openai_gpt-4.1-2025-04-14/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/openai/gpt-5-2025-08-07/55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json b/data/global-mmlu-lite/openai/gpt-5-2025-08-07/a668c931-34e4-4702-a84c-97d8c6f59ef4.json similarity index 98% rename from data/global-mmlu-lite/openai/gpt-5-2025-08-07/55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json rename to data/global-mmlu-lite/openai/gpt-5-2025-08-07/a668c931-34e4-4702-a84c-97d8c6f59ef4.json index 86dfbf015..7b0435821 100644 --- a/data/global-mmlu-lite/openai/gpt-5-2025-08-07/55a4f39e-36fc-40d9-8e4d-5dfb3bd8d707.json +++ b/data/global-mmlu-lite/openai/gpt-5-2025-08-07/a668c931-34e4-4702-a84c-97d8c6f59ef4.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/openai_gpt-5-2025-08-07/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/openai_gpt-5-2025-08-07/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/openai/o3-mini-2025-01-31/a70391be-f081-4ee6-8128-801b27072b53.json b/data/global-mmlu-lite/openai/o3-mini-2025-01-31/3a7e2aa6-4e57-446f-a127-4a7e022fe3e1.json similarity index 98% rename from data/global-mmlu-lite/openai/o3-mini-2025-01-31/a70391be-f081-4ee6-8128-801b27072b53.json rename to data/global-mmlu-lite/openai/o3-mini-2025-01-31/3a7e2aa6-4e57-446f-a127-4a7e022fe3e1.json index e92aebf64..0d22ba810 100644 --- a/data/global-mmlu-lite/openai/o3-mini-2025-01-31/a70391be-f081-4ee6-8128-801b27072b53.json +++ b/data/global-mmlu-lite/openai/o3-mini-2025-01-31/3a7e2aa6-4e57-446f-a127-4a7e022fe3e1.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/openai_o3-mini-2025-01-31/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/openai_o3-mini-2025-01-31/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/unknown/aya-expanse-32b/14712820-c343-4d94-8eff-633a12b830af.json b/data/global-mmlu-lite/unknown/aya-expanse-32b/938a35f1-195d-49c8-9a16-90fab96692bd.json similarity index 98% rename from data/global-mmlu-lite/unknown/aya-expanse-32b/14712820-c343-4d94-8eff-633a12b830af.json rename to data/global-mmlu-lite/unknown/aya-expanse-32b/938a35f1-195d-49c8-9a16-90fab96692bd.json index 057524bd4..4e5593fdf 100644 --- a/data/global-mmlu-lite/unknown/aya-expanse-32b/14712820-c343-4d94-8eff-633a12b830af.json +++ b/data/global-mmlu-lite/unknown/aya-expanse-32b/938a35f1-195d-49c8-9a16-90fab96692bd.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/unknown_aya-expanse-32b/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/unknown_aya-expanse-32b/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/unknown/granite-4.0-h-small/7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json b/data/global-mmlu-lite/unknown/granite-4.0-h-small/ce756801-f75e-4250-9721-1d627a37f055.json similarity index 98% rename from data/global-mmlu-lite/unknown/granite-4.0-h-small/7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json rename to data/global-mmlu-lite/unknown/granite-4.0-h-small/ce756801-f75e-4250-9721-1d627a37f055.json index 661c1261f..fd8643d63 100644 --- a/data/global-mmlu-lite/unknown/granite-4.0-h-small/7cc69baa-9db9-4a40-a03a-d4cd6bee16c5.json +++ b/data/global-mmlu-lite/unknown/granite-4.0-h-small/ce756801-f75e-4250-9721-1d627a37f055.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/unknown_granite-4.0-h-small/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/unknown_granite-4.0-h-small/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/026339f8-2a68-47c2-afaf-69a3c0c5a8db.json b/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/b83b41d4-6c95-4c7d-a290-65d89bf776c2.json similarity index 98% rename from data/global-mmlu-lite/unknown/o4-mini-2025-04-16/026339f8-2a68-47c2-afaf-69a3c0c5a8db.json rename to data/global-mmlu-lite/unknown/o4-mini-2025-04-16/b83b41d4-6c95-4c7d-a290-65d89bf776c2.json index 6b3c13f5c..95a579825 100644 --- a/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/026339f8-2a68-47c2-afaf-69a3c0c5a8db.json +++ b/data/global-mmlu-lite/unknown/o4-mini-2025-04-16/b83b41d4-6c95-4c7d-a290-65d89bf776c2.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/unknown_o4-mini-2025-04-16/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/unknown_o4-mini-2025-04-16/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/xai/grok-3-mini/2761218a-27ef-4dc3-af3a-41ba830f8b40.json b/data/global-mmlu-lite/xai/grok-3-mini/31c3fe1b-be4b-42ef-8ec0-9da323b2ebb6.json similarity index 98% rename from data/global-mmlu-lite/xai/grok-3-mini/2761218a-27ef-4dc3-af3a-41ba830f8b40.json rename to data/global-mmlu-lite/xai/grok-3-mini/31c3fe1b-be4b-42ef-8ec0-9da323b2ebb6.json index 682b7a3d5..f816ebb33 100644 --- a/data/global-mmlu-lite/xai/grok-3-mini/2761218a-27ef-4dc3-af3a-41ba830f8b40.json +++ b/data/global-mmlu-lite/xai/grok-3-mini/31c3fe1b-be4b-42ef-8ec0-9da323b2ebb6.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/xai_grok-3-mini/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/xai_grok-3-mini/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/data/global-mmlu-lite/xai/grok-4-0709/8a2fda68-99d6-47a5-95ee-d38e695dda55.json b/data/global-mmlu-lite/xai/grok-4-0709/a8e0fc0e-b3a4-4a0b-938f-aa11f1c64358.json similarity index 98% rename from data/global-mmlu-lite/xai/grok-4-0709/8a2fda68-99d6-47a5-95ee-d38e695dda55.json rename to data/global-mmlu-lite/xai/grok-4-0709/a8e0fc0e-b3a4-4a0b-938f-aa11f1c64358.json index b5cecaa08..4e37c60a0 100644 --- a/data/global-mmlu-lite/xai/grok-4-0709/8a2fda68-99d6-47a5-95ee-d38e695dda55.json +++ b/data/global-mmlu-lite/xai/grok-4-0709/a8e0fc0e-b3a4-4a0b-938f-aa11f1c64358.json @@ -1,12 +1,12 @@ { "schema_version": "0.2.0", - "evaluation_id": "global-mmlu-lite/xai_grok-4-0709/1770822205.005286", - "retrieved_timestamp": "1770822205.005286", + "evaluation_id": "global-mmlu-lite/xai_grok-4-0709/1770822797.839372", + "retrieved_timestamp": "1770822797.839372", "source_metadata": { - "source_name": "Global MMLU Lite", + "source_name": "Global MMLU Lite Leaderboard", "source_type": "documentation", - "source_organization_name": "Cohere Labs", - "source_organization_url": "https://cohere.com", + "source_organization_name": "kaggle", + "source_organization_url": "www.kaggle.com", "evaluator_relationship": "third_party" }, "model_info": { diff --git a/scripts/global-mmlu-lite/adapter.py b/scripts/global-mmlu-lite/adapter.py index 00a3645ba..0a0cbe441 100644 --- a/scripts/global-mmlu-lite/adapter.py +++ b/scripts/global-mmlu-lite/adapter.py @@ -171,9 +171,9 @@ def fetch_global_mmlu_lite(retrieved_timestamp: str) -> int: evaluation_id=evaluation_id, retrieved_timestamp=retrieved_timestamp, source_metadata=make_source_metadata( - source_name="Global MMLU Lite", - organization_name="Cohere Labs", - organization_url="https://cohere.com", + source_name="Global MMLU Lite Leaderboard", + organization_name="kaggle", + organization_url="www.kaggle.com", evaluator_relationship=EvaluatorRelationship.third_party, ), model_info=model_info,